1823 files changed, 41901 insertions, 14168 deletions
diff --git a/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll b/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
index 035299e..5b81c17 100644
--- a/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
+++ b/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
@@ -3,12 +3,12 @@
 ; RUN:   grep {ret i32 0}
 ; END.
 
-declare i16 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16, i1)
 
 define i32 @test(i32* %P, i16* %Q) {
         %A = load i16* %Q               ; <i16> [#uses=1]
         %x = load i32* %P               ; <i32> [#uses=1]
-        %B = call i16 @llvm.cttz.i16( i16 %A )          ; <i16> [#uses=1]
+        %B = call i16 @llvm.cttz.i16( i16 %A, i1 true )          ; <i16> [#uses=1]
         %y = load i32* %P               ; <i32> [#uses=1]
         store i16 %B, i16* %Q
         %z = sub i32 %x, %y             ; <i32> [#uses=1]
diff --git a/test/Analysis/BasicAA/aligned-overread.ll b/test/Analysis/BasicAA/aligned-overread.ll
new file mode 100644
index 0000000..b05f8eb
--- /dev/null
+++ b/test/Analysis/BasicAA/aligned-overread.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.S0 = type <{ i8, [4 x i8] }>
+
+@a = global { i8, i8, i8, i8, i8 } { i8 undef, i8 0, i8 0, i8 0, i8 0 }, align 8
+
+define i32 @main() nounwind uwtable ssp {
+entry:
+  %tmp = load i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
+  %tmp1 = or i8 %tmp, -128
+  store i8 %tmp1, i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
+  %tmp2 = load i64* bitcast ({ i8, i8, i8, i8, i8 }* @a to i64*), align 8
+  store i8 11, i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
+  %tmp3 = trunc i64 %tmp2 to i32
+  ret i32 %tmp3
+
+; Make sure we don't delete either store here
+; CHECK: @main
+; CHECK: store i8 %tmp1
+; CHECK: store i8 11
+}
+
diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll
index 8a8ac4f..48ef259 100644
--- a/test/Analysis/BasicAA/constant-over-index.ll
+++ b/test/Analysis/BasicAA/constant-over-index.ll
@@ -16,8 +16,8 @@ loop:
 
   %p.0.i.0 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %i, i64 0
 
-  volatile store double 0.0, double* %p3
-  volatile store double 0.1, double* %p.0.i.0
+  store volatile double 0.0, double* %p3
+  store volatile double 0.1, double* %p.0.i.0
 
   %i.next = add i64 %i, 1
   %cmp = icmp slt i64 %i.next, 3
diff --git a/test/Analysis/BasicAA/dg.exp b/test/Analysis/BasicAA/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/BasicAA/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/BasicAA/lit.local.cfg b/test/Analysis/BasicAA/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/BasicAA/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BasicAA/phi-and-select.ll b/test/Analysis/BasicAA/phi-and-select.ll
index 9bc47ae..0ed4a2c 100644
--- a/test/Analysis/BasicAA/phi-and-select.ll
+++ b/test/Analysis/BasicAA/phi-and-select.ll
@@ -17,8 +17,8 @@ false:
 exit:
   %a = phi double* [ %x, %true ], [ %y, %false ]
   %b = phi double* [ %x, %false ], [ %y, %true ]
-  volatile store double 0.0, double* %a
-  volatile store double 1.0, double* %b
+  store volatile double 0.0, double* %a
+  store volatile double 1.0, double* %b
   ret void
 }
 
@@ -27,8 +27,8 @@ define void @bar(i1 %m, double* noalias %x, double* noalias %y) {
 entry:
   %a = select i1 %m, double* %x, double* %y
   %b = select i1 %m, double* %y, double* %x
-  volatile store double 0.000000e+00, double* %a
-  volatile store double 1.000000e+00, double* %b
+  store volatile double 0.000000e+00, double* %a
+  store volatile double 1.000000e+00, double* %b
   ret void
 }
 
@@ -56,8 +56,8 @@ nfalse:
 
 nexit:
   %b = phi double* [ %v, %ntrue ], [ %w, %nfalse ]
-  volatile store double 0.0, double* %a
-  volatile store double 1.0, double* %b
+  store volatile double 0.0, double* %a
+  store volatile double 1.0, double* %b
   ret void
 }
 
@@ -67,7 +67,7 @@ define void @fin(i1 %m, double* noalias %x, double* noalias %y,
 entry:
   %a = select i1 %m, double* %x, double* %y
   %b = select i1 %n, double* %v, double* %w
-  volatile store double 0.000000e+00, double* %a
-  volatile store double 1.000000e+00, double* %b
+  store volatile double 0.000000e+00, double* %a
+  store volatile double 1.000000e+00, double* %b
   ret void
 }
diff --git a/test/Analysis/BlockFrequencyInfo/dg.exp b/test/Analysis/BlockFrequencyInfo/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/BlockFrequencyInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/BlockFrequencyInfo/lit.local.cfg b/test/Analysis/BlockFrequencyInfo/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
new file mode 100644
index 0000000..74d06a1
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -0,0 +1,90 @@
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+
+define i32 @test1(i32 %i, i32* %a) {
+; CHECK: Printing analysis {{.*}} for function 'test1'
+entry:
+  br label %body
+; CHECK: edge entry -> body probability is 16 / 16 = 100%
+
+body:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body
+; CHECK: edge body -> exit probability is 4 / 128
+; CHECK: edge body -> body probability is 124 / 128
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @test2(i32 %i, i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test2'
+entry:
+  %cond = icmp ult i32 %i, 42
+  br i1 %cond, label %then, label %else, !prof !0
+; CHECK: edge entry -> then probability is 64 / 68
+; CHECK: edge entry -> else probability is 4 / 68
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 16 / 16 = 100%
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 16 / 16 = 100%
+
+exit:
+  %result = phi i32 [ %a, %then ], [ %b, %else ]
+  ret i32 %result
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+
+define i32 @test3(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; CHECK: Printing analysis {{.*}} for function 'test3'
+entry:
+  switch i32 %i, label %case_a [ i32 1, label %case_b
+                                 i32 2, label %case_c
+                                 i32 3, label %case_d
+                                 i32 4, label %case_e ], !prof !1
+; CHECK: edge entry -> case_a probability is 4 / 80
+; CHECK: edge entry -> case_b probability is 4 / 80
+; CHECK: edge entry -> case_c probability is 64 / 80
+; CHECK: edge entry -> case_d probability is 4 / 80
+; CHECK: edge entry -> case_e probability is 4 / 80
+
+case_a:
+  br label %exit
+; CHECK: edge case_a -> exit probability is 16 / 16 = 100%
+
+case_b:
+  br label %exit
+; CHECK: edge case_b -> exit probability is 16 / 16 = 100%
+
+case_c:
+  br label %exit
+; CHECK: edge case_c -> exit probability is 16 / 16 = 100%
+
+case_d:
+  br label %exit
+; CHECK: edge case_d -> exit probability is 16 / 16 = 100%
+
+case_e:
+  br label %exit
+; CHECK: edge case_e -> exit probability is 16 / 16 = 100%
+
+exit:
+  %result = phi i32 [ %a, %case_a ],
+                    [ %b, %case_b ],
+                    [ %c, %case_c ],
+                    [ %d, %case_d ],
+                    [ %e, %case_e ]
+  ret i32 %result
+}
+
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
diff --git a/test/Analysis/BranchProbabilityInfo/lit.local.cfg b/test/Analysis/BranchProbabilityInfo/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BranchProbabilityInfo/loop.ll b/test/Analysis/BranchProbabilityInfo/loop.ll
new file mode 100644
index 0000000..b648cbb
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/loop.ll
@@ -0,0 +1,365 @@
+; Test the static branch probability heuristics for no-return functions.
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+
+declare void @g1()
+declare void @g2()
+declare void @g3()
+declare void @g4()
+
+define void @test1(i32 %a, i32 %b) {
+entry:
+  br label %do.body
+; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+
+do.body:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc3, %do.end ]
+  call void @g1()
+  br label %do.body1
+; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+
+do.body1:
+  %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.body1 ]
+  call void @g2()
+  %inc = add nsw i32 %j.0, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %do.body1, label %do.end
+; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
+; CHECK: edge do.body1 -> do.end probability is 4 / 128
+
+do.end:
+  call void @g3()
+  %inc3 = add nsw i32 %i.0, 1
+  %cmp4 = icmp slt i32 %inc3, %a
+  br i1 %cmp4, label %do.body, label %do.end5
+; CHECK: edge do.end -> do.body probability is 124 / 128
+; CHECK: edge do.end -> do.end5 probability is 4 / 128
+
+do.end5:
+  call void @g4()
+  ret void
+}
+
+define void @test2(i32 %a, i32 %b) {
+entry:
+  %cmp9 = icmp sgt i32 %a, 0
+  br i1 %cmp9, label %for.body.lr.ph, label %for.end6
+; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
+; CHECK: edge entry -> for.end6 probability is 12 / 32
+
+for.body.lr.ph:
+  %cmp27 = icmp sgt i32 %b, 0
+  br label %for.body
+; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+
+for.body:
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc5, %for.end ]
+  call void @g1()
+  br i1 %cmp27, label %for.body3, label %for.end
+; CHECK: edge for.body -> for.body3 probability is 62 / 124 = 50%
+; CHECK: edge for.body -> for.end probability is 62 / 124 = 50%
+
+for.body3:
+  %j.08 = phi i32 [ %inc, %for.body3 ], [ 0, %for.body ]
+  call void @g2()
+  %inc = add nsw i32 %j.08, 1
+  %exitcond = icmp eq i32 %inc, %b
+  br i1 %exitcond, label %for.end, label %for.body3
+; CHECK: edge for.body3 -> for.end probability is 4 / 128
+; CHECK: edge for.body3 -> for.body3 probability is 124 / 128
+
+for.end:
+  call void @g3()
+  %inc5 = add nsw i32 %i.010, 1
+  %exitcond11 = icmp eq i32 %inc5, %a
+  br i1 %exitcond11, label %for.end6, label %for.body
+; CHECK: edge for.end -> for.end6 probability is 4 / 128
+; CHECK: edge for.end -> for.body probability is 124 / 128
+
+for.end6:
+  call void @g4()
+  ret void
+}
+
+define void @test3(i32 %a, i32 %b, i32* %c) {
+entry:
+  br label %do.body
+; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+
+do.body:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc4, %if.end ]
+  call void @g1()
+  %0 = load i32* %c, align 4
+  %cmp = icmp slt i32 %0, 42
+  br i1 %cmp, label %do.body1, label %if.end
+; CHECK: edge do.body -> do.body1 probability is 62 / 124 = 50%
+; CHECK: edge do.body -> if.end probability is 62 / 124 = 50%
+
+do.body1:
+  %j.0 = phi i32 [ %inc, %do.body1 ], [ 0, %do.body ]
+  call void @g2()
+  %inc = add nsw i32 %j.0, 1
+  %cmp2 = icmp slt i32 %inc, %b
+  br i1 %cmp2, label %do.body1, label %if.end
+; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
+; CHECK: edge do.body1 -> if.end probability is 4 / 128
+
+if.end:
+  call void @g3()
+  %inc4 = add nsw i32 %i.0, 1
+  %cmp5 = icmp slt i32 %inc4, %a
+  br i1 %cmp5, label %do.body, label %do.end6
+; CHECK: edge if.end -> do.body probability is 124 / 128
+; CHECK: edge if.end -> do.end6 probability is 4 / 128
+
+do.end6:
+  call void @g4()
+  ret void
+}
+
+define void @test4(i32 %a, i32 %b, i32* %c) {
+entry:
+  br label %do.body
+; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+
+do.body:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
+  call void @g1()
+  %0 = load i32* %c, align 4
+  %cmp = icmp slt i32 %0, 42
+  br i1 %cmp, label %return, label %do.body1
+; CHECK: edge do.body -> return probability is 4 / 128
+; CHECK: edge do.body -> do.body1 probability is 124 / 128
+
+do.body1:
+  %j.0 = phi i32 [ %inc, %do.body1 ], [ 0, %do.body ]
+  call void @g2()
+  %inc = add nsw i32 %j.0, 1
+  %cmp2 = icmp slt i32 %inc, %b
+  br i1 %cmp2, label %do.body1, label %do.end
+; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
+; CHECK: edge do.body1 -> do.end probability is 4 / 128
+
+do.end:
+  call void @g3()
+  %inc4 = add nsw i32 %i.0, 1
+  %cmp5 = icmp slt i32 %inc4, %a
+  br i1 %cmp5, label %do.body, label %do.end6
+; CHECK: edge do.end -> do.body probability is 124 / 128
+; CHECK: edge do.end -> do.end6 probability is 4 / 128
+
+do.end6:
+  call void @g4()
+  br label %return
+; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+
+return:
+  ret void
+}
+
+define void @test5(i32 %a, i32 %b, i32* %c) {
+entry:
+  br label %do.body
+; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+
+do.body:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
+  call void @g1()
+  br label %do.body1
+; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+
+do.body1:
+  %j.0 = phi i32 [ 0, %do.body ], [ %inc, %if.end ]
+  %0 = load i32* %c, align 4
+  %cmp = icmp slt i32 %0, 42
+  br i1 %cmp, label %return, label %if.end
+; CHECK: edge do.body1 -> return probability is 4 / 128
+; CHECK: edge do.body1 -> if.end probability is 124 / 128
+
+if.end:
+  call void @g2()
+  %inc = add nsw i32 %j.0, 1
+  %cmp2 = icmp slt i32 %inc, %b
+  br i1 %cmp2, label %do.body1, label %do.end
+; CHECK: edge if.end -> do.body1 probability is 124 / 128
+; CHECK: edge if.end -> do.end probability is 4 / 128
+
+do.end:
+  call void @g3()
+  %inc4 = add nsw i32 %i.0, 1
+  %cmp5 = icmp slt i32 %inc4, %a
+  br i1 %cmp5, label %do.body, label %do.end6
+; CHECK: edge do.end -> do.body probability is 124 / 128
+; CHECK: edge do.end -> do.end6 probability is 4 / 128
+
+do.end6:
+  call void @g4()
+  br label %return
+; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+
+return:
+  ret void
+}
+
+define void @test6(i32 %a, i32 %b, i32* %c) {
+entry:
+  br label %do.body
+; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+
+do.body:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
+  call void @g1()
+  br label %do.body1
+; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+
+do.body1:
+  %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.cond ]
+  call void @g2()
+  %0 = load i32* %c, align 4
+  %cmp = icmp slt i32 %0, 42
+  br i1 %cmp, label %return, label %do.cond
+; CHECK: edge do.body1 -> return probability is 4 / 128
+; CHECK: edge do.body1 -> do.cond probability is 124 / 128
+
+do.cond:
+  %inc = add nsw i32 %j.0, 1
+  %cmp2 = icmp slt i32 %inc, %b
+  br i1 %cmp2, label %do.body1, label %do.end
+; CHECK: edge do.cond -> do.body1 probability is 124 / 128
+; CHECK: edge do.cond -> do.end probability is 4 / 128
+
+do.end:
+  call void @g3()
+  %inc4 = add nsw i32 %i.0, 1
+  %cmp5 = icmp slt i32 %inc4, %a
+  br i1 %cmp5, label %do.body, label %do.end6
+; CHECK: edge do.end -> do.body probability is 124 / 128
+; CHECK: edge do.end -> do.end6 probability is 4 / 128
+
+do.end6:
+  call void @g4()
+  br label %return
+; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+
+return:
+  ret void
+}
+
+define void @test7(i32 %a, i32 %b, i32* %c) {
+entry:
+  %cmp10 = icmp sgt i32 %a, 0
+  br i1 %cmp10, label %for.body.lr.ph, label %for.end7
+; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
+; CHECK: edge entry -> for.end7 probability is 12 / 32
+
+for.body.lr.ph:
+  %cmp38 = icmp sgt i32 %b, 0
+  br label %for.body
+; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+
+for.body:
+  %i.011 = phi i32 [ 0, %for.body.lr.ph ], [ %inc6, %for.inc5 ]
+  %0 = load i32* %c, align 4
+  %cmp1 = icmp eq i32 %0, %i.011
+  br i1 %cmp1, label %for.inc5, label %if.end
+; CHECK: edge for.body -> for.inc5 probability is 62 / 124 = 50%
+; CHECK: edge for.body -> if.end probability is 62 / 124 = 50%
+
+if.end:
+  call void @g1()
+  br i1 %cmp38, label %for.body4, label %for.end
+; CHECK: edge if.end -> for.body4 probability is 62 / 124 = 50%
+; CHECK: edge if.end -> for.end probability is 62 / 124 = 50%
+
+for.body4:
+  %j.09 = phi i32 [ %inc, %for.body4 ], [ 0, %if.end ]
+  call void @g2()
+  %inc = add nsw i32 %j.09, 1
+  %exitcond = icmp eq i32 %inc, %b
+  br i1 %exitcond, label %for.end, label %for.body4
+; CHECK: edge for.body4 -> for.end probability is 4 / 128
+; CHECK: edge for.body4 -> for.body4 probability is 124 / 128
+
+for.end:
+  call void @g3()
+  br label %for.inc5
+; CHECK: edge for.end -> for.inc5 probability is 124 / 124 = 100%
+
+for.inc5:
+  %inc6 = add nsw i32 %i.011, 1
+  %exitcond12 = icmp eq i32 %inc6, %a
+  br i1 %exitcond12, label %for.end7, label %for.body
+; CHECK: edge for.inc5 -> for.end7 probability is 4 / 128
+; CHECK: edge for.inc5 -> for.body probability is 124 / 128
+
+for.end7:
+  call void @g4()
+  ret void
+}
+
+define void @test8(i32 %a, i32 %b, i32* %c) {
+entry:
+  %cmp18 = icmp sgt i32 %a, 0
+  br i1 %cmp18, label %for.body.lr.ph, label %for.end15
+; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
+; CHECK: edge entry -> for.end15 probability is 12 / 32
+
+for.body.lr.ph:
+  %cmp216 = icmp sgt i32 %b, 0
+  %arrayidx5 = getelementptr inbounds i32* %c, i64 1
+  %arrayidx9 = getelementptr inbounds i32* %c, i64 2
+  br label %for.body
+; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+
+for.body:
+  %i.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc14, %for.end ]
+  call void @g1()
+  br i1 %cmp216, label %for.body3, label %for.end
+; CHECK: edge for.body -> for.body3 probability is 62 / 124 = 50%
+; CHECK: edge for.body -> for.end probability is 62 / 124 = 50%
+
+for.body3:
+  %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %0 = load i32* %c, align 4
+  %cmp4 = icmp eq i32 %0, %j.017
+  br i1 %cmp4, label %for.inc, label %if.end
+; CHECK: edge for.body3 -> for.inc probability is 62 / 124 = 50%
+; CHECK: edge for.body3 -> if.end probability is 62 / 124 = 50%
+
+if.end:
+  %1 = load i32* %arrayidx5, align 4
+  %cmp6 = icmp eq i32 %1, %j.017
+  br i1 %cmp6, label %for.inc, label %if.end8
+; CHECK: edge if.end -> for.inc probability is 62 / 124 = 50%
+; CHECK: edge if.end -> if.end8 probability is 62 / 124 = 50%
+
+if.end8:
+  %2 = load i32* %arrayidx9, align 4
+  %cmp10 = icmp eq i32 %2, %j.017
+  br i1 %cmp10, label %for.inc, label %if.end12
+; CHECK: edge if.end8 -> for.inc probability is 62 / 124 = 50%
+; CHECK: edge if.end8 -> if.end12 probability is 62 / 124 = 50%
+
+if.end12:
+  call void @g2()
+  br label %for.inc
+; CHECK: edge if.end12 -> for.inc probability is 124 / 124 = 100%
+
+for.inc:
+  %inc = add nsw i32 %j.017, 1
+  %exitcond = icmp eq i32 %inc, %b
+  br i1 %exitcond, label %for.end, label %for.body3
+; CHECK: edge for.inc -> for.end probability is 4 / 128
+; CHECK: edge for.inc -> for.body3 probability is 124 / 128
+
+for.end:
+  call void @g3()
+  %inc14 = add nsw i32 %i.019, 1
+  %exitcond20 = icmp eq i32 %inc14, %a
+  br i1 %exitcond20, label %for.end15, label %for.body
+; CHECK: edge for.end -> for.end15 probability is 4 / 128
+; CHECK: edge for.end -> for.body probability is 124 / 128
+
+for.end15:
+  call void @g4()
+  ret void
+}
diff --git a/test/Analysis/BranchProbabilityInfo/noreturn.ll b/test/Analysis/BranchProbabilityInfo/noreturn.ll
new file mode 100644
index 0000000..8b9ae11
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/noreturn.ll
@@ -0,0 +1,79 @@
+; Test the static branch probability heuristics for no-return functions.
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+
+declare void @abort() noreturn
+
+define i32 @test1(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test1'
+entry:
+  %cond = icmp eq i32 %a, 42
+  br i1 %cond, label %exit, label %abort
+; CHECK: edge entry -> exit probability is 1048575 / 1048576
+; CHECK: edge entry -> abort probability is 1 / 1048576
+
+abort:
+  call void @abort() noreturn
+  unreachable
+
+exit:
+  ret i32 %b
+}
+
+define i32 @test2(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test2'
+entry:
+  switch i32 %a, label %exit [i32 1, label %case_a
+                              i32 2, label %case_b
+                              i32 3, label %case_c
+                              i32 4, label %case_d]
+; CHECK: edge entry -> exit probability is 1048575 / 1048579
+; CHECK: edge entry -> case_a probability is 1 / 1048579
+; CHECK: edge entry -> case_b probability is 1 / 1048579
+; CHECK: edge entry -> case_c probability is 1 / 1048579
+; CHECK: edge entry -> case_d probability is 1 / 1048579
+
+case_a:
+  br label %case_b
+
+case_b:
+  br label %case_c
+
+case_c:
+  br label %case_d
+
+case_d:
+  call void @abort() noreturn
+  unreachable
+
+exit:
+  ret i32 %b
+}
+
+define i32 @test3(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test3'
+; Make sure we unify across multiple conditional branches.
+entry:
+  %cond1 = icmp eq i32 %a, 42
+  br i1 %cond1, label %exit, label %dom
+; CHECK: edge entry -> exit probability is 1048575 / 1048576
+; CHECK: edge entry -> dom probability is 1 / 1048576
+
+dom:
+  %cond2 = icmp ult i32 %a, 42
+  br i1 %cond2, label %idom1, label %idom2
+; CHECK: edge dom -> idom1 probability is 1 / 2
+; CHECK: edge dom -> idom2 probability is 1 / 2
+
+idom1:
+  br label %abort
+
+idom2:
+  br label %abort
+
+abort:
+  call void @abort() noreturn
+  unreachable
+
+exit:
+  ret i32 %b
+}
diff --git a/test/Analysis/CallGraph/dg.exp b/test/Analysis/CallGraph/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/CallGraph/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/CallGraph/lit.local.cfg b/test/Analysis/CallGraph/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/CallGraph/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/Dominators/dg.exp b/test/Analysis/Dominators/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/Dominators/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/Dominators/invoke.ll b/test/Analysis/Dominators/invoke.ll
new file mode 100644
index 0000000..f935750
--- /dev/null
+++ b/test/Analysis/Dominators/invoke.ll
@@ -0,0 +1,19 @@
+; RUN: opt -verify -disable-output %s
+; This tests that we handle unreachable blocks correctly
+
+define void @f() {
+  %v1 = invoke i32* @g()
+          to label %bb1 unwind label %bb2
+  invoke void @__dynamic_cast()
+          to label %bb1 unwind label %bb2
+bb1:
+  %Hidden = getelementptr inbounds i32* %v1, i64 1
+  ret void
+bb2:
+  %lpad.loopexit80 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  ret void
+}
+declare i32 @__gxx_personality_v0(...)
+declare void @__dynamic_cast()
+declare i32* @g()
diff --git a/test/Analysis/Dominators/lit.local.cfg b/test/Analysis/Dominators/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/Dominators/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/GlobalsModRef/dg.exp b/test/Analysis/GlobalsModRef/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/GlobalsModRef/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/GlobalsModRef/lit.local.cfg b/test/Analysis/GlobalsModRef/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/GlobalsModRef/pr12351.ll b/test/Analysis/GlobalsModRef/pr12351.ll
new file mode 100644
index 0000000..1c5ac43
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/pr12351.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+define void @foo(i8* %x, i8* %y) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 1, i32 1, i1 false);
+  ret void
+}
+
+define void @bar(i8* %y, i8* %z) {
+  %x = alloca i8
+  call void @foo(i8* %x, i8* %y)
+  %t = load i8* %x
+  store i8 %t, i8* %y
+; CHECK: store i8 %t, i8* %y
+  ret void
+}
+
+
+define i32 @foo2() {
+  %foo = alloca i32
+  call void @bar2(i32* %foo)
+  %t0 = load i32* %foo, align 4
+; CHECK: %t0 = load i32* %foo, align 4
+  ret i32 %t0
+}
+
+define void @bar2(i32* %foo)  {
+  store i32 0, i32* %foo, align 4
+  tail call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
diff --git a/test/Analysis/LoopDependenceAnalysis/dg.exp b/test/Analysis/LoopDependenceAnalysis/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/LoopDependenceAnalysis/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/LoopDependenceAnalysis/lit.local.cfg b/test/Analysis/LoopDependenceAnalysis/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/LoopDependenceAnalysis/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/LoopInfo/dg.exp b/test/Analysis/LoopInfo/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/LoopInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/LoopInfo/lit.local.cfg b/test/Analysis/LoopInfo/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/LoopInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/PostDominators/dg.exp b/test/Analysis/PostDominators/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/PostDominators/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/PostDominators/lit.local.cfg b/test/Analysis/PostDominators/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/PostDominators/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/Profiling/dg.exp b/test/Analysis/Profiling/dg.exp
deleted file mode 100644
index 1eb4755..0000000
--- a/test/Analysis/Profiling/dg.exp
+++ /dev/null
@@ -1,4 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-
diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/Profiling/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/RegionInfo/dg.exp b/test/Analysis/RegionInfo/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/RegionInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/RegionInfo/lit.local.cfg b/test/Analysis/RegionInfo/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/RegionInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
index 4f14a0d..ce0329d 100644
--- a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
@@ -1,6 +1,7 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %loop: backedge-taken count is (100 + (-100 smax %n))}
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
 ; PR2002
 
+; CHECK: Loop %loop: backedge-taken count is (100 + (-100 smax %n))
 define void @foo(i8 %n) {
 entry:
 	br label %loop
diff --git a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
new file mode 100644
index 0000000..138c015
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -basicaa -globalopt -instcombine -loop-rotate -licm -instcombine -indvars -loop-deletion -constmerge -S
+; PR11882: ComputeLoadConstantCompareExitLimit crash.
+;
+; for.body is deleted leaving a loop-invariant load.
+; CHECK-NOT: for.body
+target datalayout = "e-p:64:64:64-n32:64"
+
+@func_21_l_773 = external global i32, align 4
+@g_814 = external global i32, align 4
+@g_244 = internal global [1 x [0 x i32]] zeroinitializer, align 4
+
+define void @func_21() nounwind uwtable ssp {
+entry:
+  br label %lbl_818
+
+lbl_818:                                          ; preds = %for.end, %entry
+  call void (...)* @func_27()
+  store i32 0, i32* @g_814, align 4, !tbaa !0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %lbl_818
+  %0 = load i32* @g_814, align 4, !tbaa !0
+  %cmp = icmp sle i32 %0, 0
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %0 to i64
+  %arrayidx = getelementptr inbounds [0 x i32]* getelementptr inbounds ([1 x [0 x i32]]* @g_244, i32 0, i64 0), i32 0, i64 %idxprom
+  %1 = load i32* %arrayidx, align 1, !tbaa !0
+  store i32 %1, i32* @func_21_l_773, align 4, !tbaa !0
+  store i32 1, i32* @g_814, align 4, !tbaa !0
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %2 = load i32* @func_21_l_773, align 4, !tbaa !0
+  %tobool = icmp ne i32 %2, 0
+  br i1 %tobool, label %lbl_818, label %if.end
+
+if.end:                                           ; preds = %for.end
+  ret void
+}
+
+declare void @func_27(...)
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
index e90a555..d9b83a9 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
@@ -1,14 +1,12 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-
-; Indvars should be able to insert a canonical induction variable
-; for the bb6 loop without using a maximum calculation (icmp, select)
-; because it should be able to prove that the comparison is guarded
-; by an appropriate conditional branch. Unfortunately, indvars is
-; not yet able to find the comparison for the other two loops in
-; this testcase.
-; CHECK: entry:
-; CHECK-NOT: select
-; CHECK: bb6:
+; RUN: opt < %s -analyze -scalar-evolution -S | FileCheck %s
+
+; Indvars should be able to find the trip count for the bb6 loop
+; without using a maximum calculation (icmp, select) because it should
+; be able to prove that the comparison is guarded by an appropriate
+; conditional branch. Unfortunately, indvars is not yet able to find
+; the comparison for the other two loops in this testcase.
+;
+; CHECK: Loop %bb6: backedge-taken count is (-1 + %w)
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
diff --git a/test/Analysis/ScalarEvolution/dg.exp b/test/Analysis/ScalarEvolution/dg.exp
deleted file mode 100644
index b65a250..0000000
--- a/test/Analysis/ScalarEvolution/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]] 
diff --git a/test/Analysis/ScalarEvolution/lit.local.cfg b/test/Analysis/ScalarEvolution/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Analysis/ScalarEvolution/load.ll b/test/Analysis/ScalarEvolution/load.ll
new file mode 100644
index 0000000..2c753f5
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/load.ll
@@ -0,0 +1,65 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+@arr1 = internal unnamed_addr constant [50 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50], align 4
+@arr2 = internal unnamed_addr constant [50 x i32] [i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0], align 4
+
+; PR11034
+define i32 @test1() nounwind readnone {
+; CHECK: test1
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %sum.04 = phi i32 [ 0, %entry ], [ %add2, %for.body ]
+; CHECK: -->  %sum.04{{ *}}Exits: 2450
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [50 x i32]* @arr1, i32 0, i32 %i.03
+  %0 = load i32* %arrayidx, align 4
+; CHECK: -->  %0{{ *}}Exits: 50
+  %arrayidx1 = getelementptr inbounds [50 x i32]* @arr2, i32 0, i32 %i.03
+  %1 = load i32* %arrayidx1, align 4
+; CHECK: -->  %1{{ *}}Exits: 0
+  %add = add i32 %0, %sum.04
+  %add2 = add i32 %add, %1
+  %inc = add nsw i32 %i.03, 1
+  %cmp = icmp eq i32 %inc, 50
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add2
+}
+
+
+%struct.ListNode = type { %struct.ListNode*, i32 }
+
+@node5 = internal constant { %struct.ListNode*, i32, [4 x i8] } { %struct.ListNode* bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node4 to %struct.ListNode*), i32 4, [4 x i8] undef }, align 8
+@node4 = internal constant { %struct.ListNode*, i32, [4 x i8] } { %struct.ListNode* bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node3 to %struct.ListNode*), i32 3, [4 x i8] undef }, align 8
+@node3 = internal constant { %struct.ListNode*, i32, [4 x i8] } { %struct.ListNode* bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node2 to %struct.ListNode*), i32 2, [4 x i8] undef }, align 8
+@node2 = internal constant { %struct.ListNode*, i32, [4 x i8] } { %struct.ListNode* bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node1 to %struct.ListNode*), i32 1, [4 x i8] undef }, align 8
+@node1 = internal constant { %struct.ListNode*, i32, [4 x i8] } { %struct.ListNode* null, i32 0, [4 x i8] undef }, align 8
+
+define i32 @test2() nounwind uwtable readonly {
+; CHECK: test2
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+; CHECK: -->  %sum.02{{ *}}Exits: 10
+  %n.01 = phi %struct.ListNode* [ bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node5 to %struct.ListNode*), %entry ], [ %1, %for.body ]
+; CHECK: -->  %n.01{{ *}}Exits: @node1
+  %i = getelementptr inbounds %struct.ListNode* %n.01, i64 0, i32 1
+  %0 = load i32* %i, align 4
+  %add = add nsw i32 %0, %sum.02
+  %next = getelementptr inbounds %struct.ListNode* %n.01, i64 0, i32 0
+  %1 = load %struct.ListNode** %next, align 8
+; CHECK: -->  %1{{ *}}Exits: 0
+  %cmp = icmp eq %struct.ListNode* %1, null
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll
index 8969a5a..a919319 100644
--- a/test/Analysis/ScalarEvolution/nsw-offset.ll
+++ b/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -23,7 +23,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %1 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
 
 ; CHECK: %2 = getelementptr inbounds double* %d, i64 %1
-; CHECK: -->  {%d,+,16}<nsw><%bb>
+; CHECK: -->  {%d,+,16}<nuw><%bb>
   %2 = getelementptr inbounds double* %d, i64 %1  ; <double*> [#uses=1]
 
   %3 = load double* %2, align 8                   ; <double> [#uses=1]
@@ -37,7 +37,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %8 = sext i32 %7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %9 = getelementptr inbounds double* %q, i64 %8
-; CHECK: {(8 + %q),+,16}<nsw><%bb>
+; CHECK: {(8 + %q),+,16}<nuw><%bb>
   %9 = getelementptr inbounds double* %q, i64 %8  ; <double*> [#uses=1]
 
 ; Artificially repeat the above three instructions, this time using
@@ -49,7 +49,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %t8 = sext i32 %t7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8
-; CHECK: {(8 + %q),+,16}<nsw><%bb>
+; CHECK: {(8 + %q),+,16}<nuw><%bb>
   %t9 = getelementptr inbounds double* %q, i64 %t8  ; <double*> [#uses=1]
 
   %10 = load double* %9, align 8                  ; <double> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll
index da35a6c..288b6fa 100644
--- a/test/Analysis/ScalarEvolution/nsw.ll
+++ b/test/Analysis/ScalarEvolution/nsw.ll
@@ -92,10 +92,10 @@ for.body.i.i:                                     ; preds = %entry, %for.body.i.
 ; CHECK: {1,+,1}<nuw><nsw><%for.body.i.i>
   %ptrincdec.i.i = getelementptr inbounds i32* %begin, i64 %tmp
 ; CHECK: %ptrincdec.i.i =
-; CHECK: {(4 + %begin),+,4}<nsw><%for.body.i.i>
+; CHECK: {(4 + %begin),+,4}<nuw><%for.body.i.i>
   %__first.addr.08.i.i = getelementptr inbounds i32* %begin, i64 %indvar.i.i
 ; CHECK: %__first.addr.08.i.i
-; CHECK: {%begin,+,4}<nsw><%for.body.i.i>
+; CHECK: {%begin,+,4}<nuw><%for.body.i.i>
   store i32 0, i32* %__first.addr.08.i.i, align 4
   %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
   br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i
@@ -103,4 +103,22 @@ for.body.i.i:                                     ; preds = %entry, %for.body.i.
 ; CHECK: Loop %for.body.i.i: max backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4)
 _ZSt4fillIPiiEvT_S1_RKT0_.exit:                   ; preds = %for.body.i.i, %entry
   ret void
-}
-\ No newline at end of file
+}
+
+; A single AddExpr exists for (%a + %b), which is not always <nsw>.
+; CHECK: @addnsw
+; CHECK-NOT: --> (%a + %b)<nsw>
+define i32 @addnsw(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %tmp = add i32 %a, %b
+  %cmp = icmp sgt i32 %tmp, 0
+  br i1 %cmp, label %greater, label %exit
+
+greater:
+  %tmp2 = add nsw i32 %a, %b
+  br label %exit
+
+exit:
+  %result = phi i32 [ %a, %entry ], [ %tmp2, %greater ]
+  ret i32 %result
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count11.ll b/test/Analysis/ScalarEvolution/trip-count11.ll
new file mode 100644
index 0000000..7191503
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count11.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@foo.a = internal constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 16
+
+define i32 @foo() nounwind uwtable noinline {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+; CHECK: --> %sum.0 Exits: 28
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ult i32 %i.0, 8
+  br i1 %cmp, label %for.inc, label %for.end
+
+for.inc:                                          ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [8 x i32]* @foo.a, i64 0, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i32 %sum.0
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count12.ll b/test/Analysis/ScalarEvolution/trip-count12.ll
new file mode 100644
index 0000000..8f960e1
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count12.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: Determining loop execution counts for: @test
+; CHECK: Loop %for.body: backedge-taken count is ((-2 + %len) /u 2)
+; CHECK: Loop %for.body: max backedge-taken count is 1073741823
+
+define zeroext i16 @test(i16* nocapture %p, i32 %len) nounwind readonly {
+entry:
+  %cmp2 = icmp sgt i32 %len, 1
+  br i1 %cmp2, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %for.body.preheader ]
+  %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %for.body.preheader ]
+  %res.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i16* %p.addr.05, i32 1
+  %0 = load i16* %p.addr.05, align 2
+  %conv = zext i16 %0 to i32
+  %add = add i32 %conv, %res.03
+  %sub = add nsw i32 %len.addr.04, -2
+  %cmp = icmp sgt i32 %sub, 1
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %extract.t = trunc i32 %add to i16
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res.0.lcssa.off0 = phi i16 [ %extract.t, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i16 %res.0.lcssa.off0
+}
+
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dg.exp b/test/Analysis/TypeBasedAliasAnalysis/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/TypeBasedAliasAnalysis/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index 8fb5fff..1ac5927 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -24,7 +24,7 @@ define void @test0_no(i32* %p) nounwind {
 ; Add the readonly attribute, since there's just a call to a function which 
 ; TBAA says doesn't modify any memory.
 
-; CHECK: define void @test1_yes(i32* %p) nounwind readonly {
+; CHECK: define void @test1_yes(i32* nocapture %p) nounwind readonly {
 define void @test1_yes(i32* %p) nounwind {
   call void @callee(i32* %p), !tbaa !1
   ret void
diff --git a/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg b/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Archive/dg.exp b/test/Archive/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Archive/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Archive/lit.local.cfg b/test/Archive/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Archive/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
deleted file mode 100644
index daffa3d..0000000
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; Tests to make sure intrinsics are automatically upgraded.
-; RUN: llvm-as < %s | llvm-dis | FileCheck %s
-
-
-declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone
-declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone
-declare <2 x double> @llvm.x86.sse2.loadu.pd(double*) nounwind readnone
-define void @test_loadu(i8* %a, double* %b) {
-  %v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a)
-  %v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a)
-  %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
-
-; CHECK: load i128* {{.*}}, align 1
-; CHECK: load i128* {{.*}}, align 1
-; CHECK: load i128* {{.*}}, align 1
-  ret void
-}
-
-declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind readnone 
-declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x double>) nounwind readnone 
-declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind readnone 
-declare void @llvm.x86.sse2.movnt.i(i8*, i32) nounwind readnone 
-
-define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) {
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse.movnt.ps(i8* %B, <4 x float> %A)
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse2.movnt.dq(i8* %B, <2 x double> %C)
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse2.movnt.pd(i8* %B, <2 x double> %C)
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D)
-  ret void
-}
-
-declare void @llvm.prefetch(i8*, i32, i32) nounwind
-
-define void @p(i8* %ptr) {
-; CHECK: llvm.prefetch(i8* %ptr, i32 0, i32 1, i32 1)
-  tail call void @llvm.prefetch(i8* %ptr, i32 0, i32 1)
-  ret void
-}
-
-declare i32 @nest_f(i8* nest, i32)
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
-
-define void @test_trampolines() {
-; CHECK: call void @llvm.init.trampoline(i8* null, i8* bitcast (i32 (i8*, i32)* @nest_f to i8*), i8* null)
-; CHECK: call i8* @llvm.adjust.trampoline(i8* null)
-
-  call i8* @llvm.init.trampoline(i8* null,
-                                 i8* bitcast (i32 (i8*, i32)* @nest_f to i8*),
-                                 i8* null)
-  ret void
-}
diff --git a/test/Assembler/aggregate-constant-values.ll b/test/Assembler/aggregate-constant-values.ll
index a37d03e..d0aab81 100644
--- a/test/Assembler/aggregate-constant-values.ll
+++ b/test/Assembler/aggregate-constant-values.ll
@@ -1,25 +1,48 @@
-; RUN: llvm-as < %s | llvm-dis | grep 7 | count 3
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 
+; CHECK: @foo
+; CHECK: store { i32, i32 } { i32 7, i32 9 }, { i32, i32 }* %x
+; CHECK: ret
 define void @foo({i32, i32}* %x) nounwind {
   store {i32, i32}{i32 7, i32 9}, {i32, i32}* %x
   ret void
 }
+
+; CHECK: @foo_empty
+; CHECK: store {} zeroinitializer, {}* %x
+; CHECK: ret
 define void @foo_empty({}* %x) nounwind {
   store {}{}, {}* %x
   ret void
 }
+
+; CHECK: @bar
+; CHECK: store [2 x i32] [i32 7, i32 9], [2 x i32]* %x
+; CHECK: ret
 define void @bar([2 x i32]* %x) nounwind {
   store [2 x i32][i32 7, i32 9], [2 x i32]* %x
   ret void
 }
+
+; CHECK: @bar_empty
+; CHECK: store [0 x i32] undef, [0 x i32]* %x
+; CHECK: ret
 define void @bar_empty([0 x i32]* %x) nounwind {
   store [0 x i32][], [0 x i32]* %x
   ret void
 }
+
+; CHECK: @qux
+; CHECK: store <{ i32, i32 }> <{ i32 7, i32 9 }>, <{ i32, i32 }>* %x
+; CHECK: ret
 define void @qux(<{i32, i32}>* %x) nounwind {
   store <{i32, i32}><{i32 7, i32 9}>, <{i32, i32}>* %x
   ret void
 }
+
+; CHECK: @qux_empty
+; CHECK: store <{}> zeroinitializer, <{}>* %x
+; CHECK: ret
 define void @qux_empty(<{}>* %x) nounwind {
   store <{}><{}>, <{}>* %x
   ret void
diff --git a/test/Assembler/auto_upgrade_intrinsics.ll b/test/Assembler/auto_upgrade_intrinsics.ll
new file mode 100644
index 0000000..7ad5cc3
--- /dev/null
+++ b/test/Assembler/auto_upgrade_intrinsics.ll
@@ -0,0 +1,44 @@
+; Test to make sure intrinsics are automatically upgraded.
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+declare i8 @llvm.ctlz.i8(i8)
+declare i16 @llvm.ctlz.i16(i16)
+declare i32 @llvm.ctlz.i32(i32)
+declare i42 @llvm.ctlz.i42(i42)  ; Not a power-of-2
+
+define void @test.ctlz(i8 %a, i16 %b, i32 %c, i42 %d) {
+; CHECK: @test.ctlz
+
+entry:
+  ; CHECK: call i8 @llvm.ctlz.i8(i8 %a, i1 false)
+  call i8 @llvm.ctlz.i8(i8 %a)
+  ; CHECK: call i16 @llvm.ctlz.i16(i16 %b, i1 false)
+  call i16 @llvm.ctlz.i16(i16 %b)
+  ; CHECK: call i32 @llvm.ctlz.i32(i32 %c, i1 false)
+  call i32 @llvm.ctlz.i32(i32 %c)
+  ; CHECK: call i42 @llvm.ctlz.i42(i42 %d, i1 false)
+  call i42 @llvm.ctlz.i42(i42 %d)
+
+  ret void
+}
+
+declare i8 @llvm.cttz.i8(i8)
+declare i16 @llvm.cttz.i16(i16)
+declare i32 @llvm.cttz.i32(i32)
+declare i42 @llvm.cttz.i42(i42)  ; Not a power-of-2
+
+define void @test.cttz(i8 %a, i16 %b, i32 %c, i42 %d) {
+; CHECK: @test.cttz
+
+entry:
+  ; CHECK: call i8 @llvm.cttz.i8(i8 %a, i1 false)
+  call i8 @llvm.cttz.i8(i8 %a)
+  ; CHECK: call i16 @llvm.cttz.i16(i16 %b, i1 false)
+  call i16 @llvm.cttz.i16(i16 %b)
+  ; CHECK: call i32 @llvm.cttz.i32(i32 %c, i1 false)
+  call i32 @llvm.cttz.i32(i32 %c)
+  ; CHECK: call i42 @llvm.cttz.i42(i42 %d, i1 false)
+  call i42 @llvm.cttz.i42(i42 %d)
+
+  ret void
+}
diff --git a/test/Assembler/bcwrap.ll b/test/Assembler/bcwrap.ll
index 859dc4b..4bec48c 100644
--- a/test/Assembler/bcwrap.ll
+++ b/test/Assembler/bcwrap.ll
@@ -1,9 +1,11 @@
 ; RUN: llvm-as < %s > %t
-; RUN: llvm-nm %t | grep foo
-; test for isBitcodeFile, llvm-nm must read from a file for this test
+; RUN: llvm-nm %t | FileCheck %s
+; Test for isBitcodeFile, llvm-nm must read from a file for this test.
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9.2.2"
 
+; CHECK: foo
+
 define i32 @foo() {
   ret i32 0
 }
diff --git a/test/Assembler/dg.exp b/test/Assembler/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Assembler/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Assembler/extractvalue-invalid-idx.ll b/test/Assembler/extractvalue-invalid-idx.ll
index f9644ea..9a215f7 100644
--- a/test/Assembler/extractvalue-invalid-idx.ll
+++ b/test/Assembler/extractvalue-invalid-idx.ll
@@ -1,6 +1,8 @@
-; RUN: not llvm-as < %s |& grep {invalid indices for extractvalue}
+; RUN: not llvm-as < %s |& FileCheck %s
 ; PR4170
 
+; CHECK: invalid indices for extractvalue
+
 define void @test() {
 entry:
         extractvalue [0 x i32] undef, 0
diff --git a/test/Assembler/getelementptr_struct.ll b/test/Assembler/getelementptr_struct.ll
index c8779a6..bfebf29 100644
--- a/test/Assembler/getelementptr_struct.ll
+++ b/test/Assembler/getelementptr_struct.ll
@@ -1,6 +1,8 @@
-; RUN: not llvm-as < %s >/dev/null |& grep {invalid getelementptr indices}
+; RUN: not llvm-as < %s >/dev/null |& FileCheck %s
 ; Test the case of a incorrect indices type into struct
 
+; CHECK: invalid getelementptr indices
+
 %RT = type { i8 , [10 x [20 x i32]], i8  }
 %ST = type { i32, double, %RT }
 
diff --git a/test/Assembler/huge-array.ll b/test/Assembler/huge-array.ll
index e080947..a1abf87 100644
--- a/test/Assembler/huge-array.ll
+++ b/test/Assembler/huge-array.ll
@@ -1,5 +1,7 @@
-; RUN: llvm-as < %s | llvm-dis | grep 18446744073709551615 | count 2
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
+; CHECK: define [18446744073709551615 x i8]* @foo() {
+; CHECK: ret [18446744073709551615 x i8]* null
 define [18446744073709551615 x i8]* @foo() {
   ret [18446744073709551615 x i8]* null
 }
diff --git a/test/Assembler/insertextractvalue.ll b/test/Assembler/insertextractvalue.ll
index 2f5521f..6c00b13 100644
--- a/test/Assembler/insertextractvalue.ll
+++ b/test/Assembler/insertextractvalue.ll
@@ -1,7 +1,11 @@
-; RUN: llvm-as < %s | llvm-dis > %t
-; RUN: grep insertvalue %t | count 1
-; RUN: grep extractvalue %t | count 1
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 
+; CHECK:      @foo
+; CHECK-NEXT: load
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: insertvalue
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
 define float @foo({{i32},{float, double}}* %p) nounwind {
   %t = load {{i32},{float, double}}* %p
   %s = extractvalue {{i32},{float, double}} %t, 1, 0
@@ -9,21 +13,34 @@ define float @foo({{i32},{float, double}}* %p) nounwind {
   store {{i32},{float, double}} %r, {{i32},{float, double}}* %p
   ret float %s
 }
+
+; CHECK:      @bar
+; CHECK-NEXT: store { { i32 }, { float, double } } { { i32 } { i32 4 }, { float, double } { float 4.000000e+00, double 2.000000e+01 } }, { { i32 }, { float, double } }* %p
+; CHECK-NEXT: ret float 7.000000e+00
 define float @bar({{i32},{float, double}}* %p) nounwind {
   store {{i32},{float, double}} insertvalue ({{i32},{float, double}}{{i32}{i32 4},{float, double}{float 4.0, double 5.0}}, double 20.0, 1, 1), {{i32},{float, double}}* %p
   ret float extractvalue ({{i32},{float, double}}{{i32}{i32 3},{float, double}{float 7.0, double 9.0}}, 1, 0)
 }
+
+; CHECK:      @car
+; CHECK-NEXT: store { { i32 }, { float, double } } { { i32 } undef, { float, double } { float undef, double 2.000000e+01 } }, { { i32 }, { float, double } }* %p
+; CHECK-NEXT: ret float undef
 define float @car({{i32},{float, double}}* %p) nounwind {
   store {{i32},{float, double}} insertvalue ({{i32},{float, double}} undef, double 20.0, 1, 1), {{i32},{float, double}}* %p
   ret float extractvalue ({{i32},{float, double}} undef, 1, 0)
 }
+
+; CHECK:      @dar
+; CHECK-NEXT: store { { i32 }, { float, double } } { { i32 } zeroinitializer, { float, double } { float 0.000000e+00, double 2.000000e+01 } }, { { i32 }, { float, double } }* %p
+; CHECK-NEXT: ret float 0.000000e+00
 define float @dar({{i32},{float, double}}* %p) nounwind {
   store {{i32},{float, double}} insertvalue ({{i32},{float, double}} zeroinitializer, double 20.0, 1, 1), {{i32},{float, double}}* %p
   ret float extractvalue ({{i32},{float, double}} zeroinitializer, 1, 0)
 }
 
-
 ; PR4963
+; CHECK:      @test57
+; CHECK-NEXT: ret <{ i32, i32 }> <{ i32 0, i32 4 }>
 define <{ i32, i32 }> @test57() {
   ret <{ i32, i32 }> insertvalue (<{ i32, i32 }> zeroinitializer, i32 4, 1)
 }
diff --git a/test/Assembler/insertvalue-invalid-idx.ll b/test/Assembler/insertvalue-invalid-idx.ll
index 86e7258..355d4e8 100644
--- a/test/Assembler/insertvalue-invalid-idx.ll
+++ b/test/Assembler/insertvalue-invalid-idx.ll
@@ -1,7 +1,9 @@
-; RUN: not llvm-as < %s |& grep {invalid indices for insertvalue}
+; RUN: not llvm-as < %s |& FileCheck %s
+
+; CHECK: invalid indices for insertvalue
 
 define void @test() {
 entry:
-        insertvalue [0 x i32] undef, i32 0, 0
-        ret void
+  insertvalue [0 x i32] undef, i32 0, 0
+  ret void
 }
diff --git a/test/Assembler/invalid_cast.ll b/test/Assembler/invalid_cast.ll
index c5b082b..f682835 100644
--- a/test/Assembler/invalid_cast.ll
+++ b/test/Assembler/invalid_cast.ll
@@ -1,4 +1,6 @@
-; RUN: not llvm-as < %s |& grep {invalid cast opcode}
+; RUN: not llvm-as < %s |& FileCheck %s
+
+; CHECK: invalid cast opcode for cast from '<4 x i64>' to '<3 x i8>'
 
 define <3 x i8> @foo(<4 x i64> %x) {
   %y = trunc <4 x i64> %x to <3 x i8>
diff --git a/test/Assembler/invalid_cast2.ll b/test/Assembler/invalid_cast2.ll
index f2e7c41..a01b935 100644
--- a/test/Assembler/invalid_cast2.ll
+++ b/test/Assembler/invalid_cast2.ll
@@ -1,4 +1,6 @@
-; RUN: not llvm-as < %s |& grep {invalid cast opcode}
+; RUN: not llvm-as < %s |& FileCheck %s
+
+; CHECK: invalid cast opcode for cast from '<4 x i64>' to 'i8'
 
 define i8 @foo(<4 x i64> %x) {
   %y = trunc <4 x i64> %x to i8
diff --git a/test/Assembler/lit.local.cfg b/test/Assembler/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Assembler/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Assembler/metadata.ll b/test/Assembler/metadata.ll
index 50f27b4..56888fd 100644
--- a/test/Assembler/metadata.ll
+++ b/test/Assembler/metadata.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {ret void, !bar !1, !foo !0}
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: @test
+; CHECK: ret void, !bar !1, !foo !0
 define void @test() {
   add i32 2, 1, !bar !0
   add i32 1, 2, !foo !1
-  
   call void @llvm.dbg.func.start(metadata !"foo")
-  
   extractvalue {{i32, i32}, i32} undef, 0, 1, !foo !0
-  
   ret void, !foo !0, !bar !1
 }
 
@@ -15,8 +15,5 @@ define void @test() {
 
 declare void @llvm.dbg.func.start(metadata) nounwind readnone
 
-
 !foo = !{ !0 }
 !bar = !{ !1 }
-
-; !foo = !{ !0, !"foo" }
diff --git a/test/Assembler/vbool-cmp.ll b/test/Assembler/vbool-cmp.ll
index ac8fb29..e652d2f 100644
--- a/test/Assembler/vbool-cmp.ll
+++ b/test/Assembler/vbool-cmp.ll
@@ -1,15 +1,18 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {icmp slt}
-; rudimentary test of fcmp/icmp on vectors returning vector of bool
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; Rudimentary test of fcmp/icmp on vectors returning vector of bool
 
+; CHECK: @ffoo
+; CHECK: fcmp olt <4 x float> %a, %b
 define <4 x i1> @ffoo(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
-	%cmp = fcmp olt <4 x float> %a, %b		; <4 x i1> [#uses=1]
-	ret <4 x i1> %cmp
+  %cmp = fcmp olt <4 x float> %a, %b		; <4 x i1> [#uses=1]
+  ret <4 x i1> %cmp
 }
 
+; CHECK: @ifoo
+; CHECK: icmp slt <4 x i32> %a, %b
 define <4 x i1> @ifoo(<4 x i32> %a, <4 x i32> %b) nounwind {
 entry:
-	%cmp = icmp slt <4 x i32> %a, %b		; <4 x i1> [#uses=1]
-	ret <4 x i1> %cmp
+  %cmp = icmp slt <4 x i32> %a, %b		; <4 x i1> [#uses=1]
+  ret <4 x i1> %cmp
 }
-
diff --git a/test/Assembler/vector-cmp.ll b/test/Assembler/vector-cmp.ll
index 688369b..6e3894c 100644
--- a/test/Assembler/vector-cmp.ll
+++ b/test/Assembler/vector-cmp.ll
@@ -1,16 +1,16 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {global.*icmp slt}
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 ; PR2317
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9.2.2"
 
+; CHECK: @1 = global <4 x i1> <i1 icmp slt (i32 ptrtoint (i32* @B to i32), i32 1), i1 true, i1 false, i1 true>
+
 define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind  {
 entry:
-	%cmp = fcmp olt <4 x float> %a, %b		; <4 x i32> [#uses=1]
-	ret <4 x i1> %cmp
+  %cmp = fcmp olt <4 x float> %a, %b		; <4 x i32> [#uses=1]
+  ret <4 x i1> %cmp
 }
 
 global <4 x i1> icmp slt ( <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> )
-
 @B = external global i32
-
 global <4 x i1> icmp slt ( <4 x i32> <i32 ptrtoint (i32 * @B to i32), i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> )
diff --git a/test/Assembler/vector-select.ll b/test/Assembler/vector-select.ll
index 87af602..ae8358a 100644
--- a/test/Assembler/vector-select.ll
+++ b/test/Assembler/vector-select.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep select
-; rudimentary test of select on vectors returning vector of bool
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; Rudimentary test of select on vectors returning vector of bool
 
-define <4 x i32> @foo(<4 x i32> %a, <4 x i32> %b,
-    <4 x i1> %cond) nounwind  {
+; CHECK: @foo
+; CHECK: select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b
+define <4 x i32> @foo(<4 x i32> %a, <4 x i32> %b, <4 x i1> %cond) nounwind  {
 entry:
-  %cmp = select <4 x i1>  %cond, <4 x i32> %a, <4 x i32> %b 
-                             ; <4 x i32> [#uses=1]
+  %cmp = select <4 x i1>  %cond, <4 x i32> %a, <4 x i32> %b
   ret <4 x i32> %cmp
 }
 
diff --git a/test/Assembler/vector-shift.ll b/test/Assembler/vector-shift.ll
index 1850e66..6a6531b 100644
--- a/test/Assembler/vector-shift.ll
+++ b/test/Assembler/vector-shift.ll
@@ -1,32 +1,45 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep shl | count 1
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep ashr | count 1
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep lshr | count 1
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 
+; CHECK: @foo
+; CHECK: shl
 define <4 x i32> @foo(<4 x i32> %a, <4 x i32> %b) nounwind  {
 entry:
-	%cmp = shl <4 x i32> %a, %b		; <4 x i32> [#uses=1]
-	ret <4 x i32> %cmp
+  %cmp = shl <4 x i32> %a, %b		; <4 x i32> [#uses=1]
+  ret <4 x i32> %cmp
 }
 
+; CHECK: @bar
+; CHECK: lshr
 define <4 x i32> @bar(<4 x i32> %a, <4 x i32> %b) nounwind  {
 entry:
-	%cmp = lshr <4 x i32> %a, %b		; <4 x i32> [#uses=1]
-	ret <4 x i32> %cmp
+  %cmp = lshr <4 x i32> %a, %b		; <4 x i32> [#uses=1]
+  ret <4 x i32> %cmp
 }
 
+; CHECK: @baz
+; CHECK: ashr 
 define <4 x i32> @baz(<4 x i32> %a, <4 x i32> %b) nounwind  {
 entry:
-	%cmp = ashr <4 x i32> %a, %b		; <4 x i32> [#uses=1]
-	ret <4 x i32> %cmp
+  %cmp = ashr <4 x i32> %a, %b		; <4 x i32> [#uses=1]
+  ret <4 x i32> %cmp
 }
 
 ; Constant expressions: these should be folded.
+
+; CHECK: @foo_ce
+; CHECK: ret <2 x i64> <i64 40, i64 192>
 define <2 x i64> @foo_ce() nounwind {
   ret <2 x i64> shl (<2 x i64> <i64 5, i64 6>, <2 x i64> <i64 3, i64 5>)
 }
+
+; CHECK: @bar_ce
+; CHECK: ret <2 x i64> <i64 42, i64 11>
 define <2 x i64> @bar_ce() nounwind {
   ret <2 x i64> lshr (<2 x i64> <i64 340, i64 380>, <2 x i64> <i64 3, i64 5>)
 }
+
+; CHECK: baz_ce
+; CHECK: ret <2 x i64> <i64 71, i64 12>
 define <2 x i64> @baz_ce() nounwind {
   ret <2 x i64> ashr (<2 x i64> <i64 573, i64 411>, <2 x i64> <i64 3, i64 5>)
 }
diff --git a/test/Bindings/Ocaml/dg.exp b/test/Bindings/Ocaml/dg.exp
deleted file mode 100644
index fb4bd07..0000000
--- a/test/Bindings/Ocaml/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if [ llvm_supports_binding ocaml ] then {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,ml}]]
-}
diff --git a/test/Bindings/Ocaml/lit.local.cfg b/test/Bindings/Ocaml/lit.local.cfg
new file mode 100644
index 0000000..640c58d
--- /dev/null
+++ b/test/Bindings/Ocaml/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.ml']
+
+bindings = set([s.strip() for s in config.root.llvm_bindings.split(',')])
+if not 'ocaml' in bindings:
+    config.unsupported = True
+
diff --git a/test/Bitcode/AutoUpgradeGlobals.ll b/test/Bitcode/AutoUpgradeGlobals.ll
deleted file mode 100644
index a5af2b8..0000000
--- a/test/Bitcode/AutoUpgradeGlobals.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; This isn't really an assembly file. It just runs test on bitcode to ensure
-; it is auto-upgraded.
-; RUN: llvm-dis < %s.bc | FileCheck %s 
-; CHECK-NOT: {i32 @\\.llvm\\.eh}
diff --git a/test/Bitcode/AutoUpgradeGlobals.ll.bc b/test/Bitcode/AutoUpgradeGlobals.ll.bc
deleted file mode 100644
index 1abe968..0000000
--- a/test/Bitcode/AutoUpgradeGlobals.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/dg.exp b/test/Bitcode/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Bitcode/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Bitcode/lit.local.cfg b/test/Bitcode/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Bitcode/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Bitcode/null-type.ll b/test/Bitcode/null-type.ll
index 5d3dfab..b972753 100644
--- a/test/Bitcode/null-type.ll
+++ b/test/Bitcode/null-type.ll
@@ -1,2 +1,4 @@
-; RUN: not llvm-dis < %s.bc > /dev/null |& grep "Invalid MODULE_CODE_FUNCTION record"
+; RUN: not llvm-dis < %s.bc > /dev/null |& FileCheck %s
 ; PR8494
+
+; CHECK: Invalid MODULE_CODE_FUNCTION record
diff --git a/test/Bitcode/shuffle.ll b/test/Bitcode/shuffle.ll
new file mode 100644
index 0000000..c3c01c6
--- /dev/null
+++ b/test/Bitcode/shuffle.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llvm-dis
+
+; <rdar://problem/8622574>
+; tests the bitcodereader can handle the case where the reader will initially
+; create shuffle with a place holder mask.
+
+
+define <4 x float> @test(<2 x double> %d2)  {
+entry:
+  %call20.i = tail call <4 x float> @cmp(<2 x double> %d2,
+                                        <2 x double> bitcast (
+                                          <4 x float> shufflevector (
+                                            <3 x float> shufflevector (
+                                              <4 x float> shufflevector (
+                                                <3 x float> bitcast (
+                                                  i96 trunc (
+                                                    i128 bitcast (<2 x double> bitcast (
+                                                      <4 x i32> <i32 0, i32 0, i32 0, i32 undef> to <2 x double>)
+                                                    to i128) to i96)
+                                                  to <3 x float>),
+                                                <3 x float> undef,
+                                                <4 x i32> <i32 0, i32 1, i32 2, i32 undef>),
+                                              <4 x float> undef,
+                                            <3 x i32> <i32 0, i32 1, i32 2>),
+                                            <3 x float> undef,
+                                            <4 x i32> <i32 0, i32 1, i32 2, i32 undef>)
+                                          to <2 x double>))
+  ret <4 x float> %call20.i
+}
+
+declare <4 x float> @cmp(<2 x double>, <2 x double>)
diff --git a/test/Bitcode/sse42_crc32.ll b/test/Bitcode/sse42_crc32.ll
deleted file mode 100644
index 1c371c3..0000000
--- a/test/Bitcode/sse42_crc32.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; Check to make sure old CRC32 intrinsics are auto-upgraded
-; correctly.
-;
-; Rdar: 9472944
-;
-; RUN: llvm-dis < %s.bc | FileCheck %s
-
-; crc32.8 should upgrade to crc32.32.8
-; CHECK: i32 @llvm.x86.sse42.crc32.32.8(
-; CHECK-NOT: i32 @llvm.x86.sse42.crc32.8(
-
-; crc32.16 should upgrade to crc32.32.16
-; CHECK: i32 @llvm.x86.sse42.crc32.32.16(
-; CHECK-NOT: i32 @llvm.x86.sse42.crc32.16(
-
-; crc32.32 should upgrade to crc32.32.32
-; CHECK: i32 @llvm.x86.sse42.crc32.32.32(
-; CHECK-NOT: i32 @llvm.x86.sse42.crc32.32(
-
-; crc64.8 should upgrade to crc32.64.8
-; CHECK: i64 @llvm.x86.sse42.crc32.64.8(
-; CHECK-NOT: i64 @llvm.x86.sse42.crc64.8(
-
-; crc64.64 should upgrade to crc32.64.64
-; CHECK: i64 @llvm.x86.sse42.crc32.64.64(
-; CHECK-NOT: i64 @llvm.x86.sse42.crc64.64(
-
-
diff --git a/test/Bitcode/sse42_crc32.ll.bc b/test/Bitcode/sse42_crc32.ll.bc
deleted file mode 100644
index d895fad..0000000
--- a/test/Bitcode/sse42_crc32.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/ssse3_palignr.ll b/test/Bitcode/ssse3_palignr.ll
index f62ca11..90b4394 100644
--- a/test/Bitcode/ssse3_palignr.ll
+++ b/test/Bitcode/ssse3_palignr.ll
@@ -1,2 +1,82 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s 
+; RUN: opt < %s -S | FileCheck %s
 ; CHECK-NOT: {@llvm\\.palign}
+
+define <4 x i32> @align1(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <4 x i32> %b to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %1 = bitcast <4 x i32> %a to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 15) ; <<2 x i64>> [#uses=1]
+  %3 = bitcast <2 x i64> %2 to <4 x i32>          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %3
+}
+
+define double @align8(<2 x i32> %a, <2 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <2 x i32> %b to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %1 = bitcast <2 x i32> %a to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %2 = tail call <1 x i64> @llvm.x86.ssse3.palign.r(<1 x i64> %1, <1 x i64> %0, i8 7) ; <<1 x i64>> [#uses=1]
+  %3 = extractelement <1 x i64> %2, i32 0         ; <i64> [#uses=1]
+  %retval12 = bitcast i64 %3 to double            ; <double> [#uses=1]
+  ret double %retval12
+}
+
+declare <1 x i64> @llvm.x86.ssse3.palign.r(<1 x i64>, <1 x i64>, i8) nounwind readnone
+
+define double @align7(<2 x i32> %a, <2 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <2 x i32> %b to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %1 = bitcast <2 x i32> %a to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %2 = tail call <1 x i64> @llvm.x86.ssse3.palign.r(<1 x i64> %1, <1 x i64> %0, i8 16) ; <<1 x i64>> [#uses=1]
+  %3 = extractelement <1 x i64> %2, i32 0         ; <i64> [#uses=1]
+  %retval12 = bitcast i64 %3 to double            ; <double> [#uses=1]
+  ret double %retval12
+}
+
+define double @align6(<2 x i32> %a, <2 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <2 x i32> %b to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %1 = bitcast <2 x i32> %a to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %2 = tail call <1 x i64> @llvm.x86.ssse3.palign.r(<1 x i64> %1, <1 x i64> %0, i8 9) ; <<1 x i64>> [#uses=1]
+  %3 = extractelement <1 x i64> %2, i32 0         ; <i64> [#uses=1]
+  %retval12 = bitcast i64 %3 to double            ; <double> [#uses=1]
+  ret double %retval12
+}
+
+define double @align5(<2 x i32> %a, <2 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <2 x i32> %b to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %1 = bitcast <2 x i32> %a to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %2 = tail call <1 x i64> @llvm.x86.ssse3.palign.r(<1 x i64> %1, <1 x i64> %0, i8 8) ; <<1 x i64>> [#uses=1]
+  %3 = extractelement <1 x i64> %2, i32 0         ; <i64> [#uses=1]
+  %retval12 = bitcast i64 %3 to double            ; <double> [#uses=1]
+  ret double %retval12
+}
+
+define <4 x i32> @align4(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <4 x i32> %b to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %1 = bitcast <4 x i32> %a to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 32) ; <<2 x i64>> [#uses=1]
+  %3 = bitcast <2 x i64> %2 to <4 x i32>          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %3
+}
+
+declare <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <4 x i32> @align3(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <4 x i32> %b to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %1 = bitcast <4 x i32> %a to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 17) ; <<2 x i64>> [#uses=1]
+  %3 = bitcast <2 x i64> %2 to <4 x i32>          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @align2(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <4 x i32> %b to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %1 = bitcast <4 x i32> %a to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 16) ; <<2 x i64>> [#uses=1]
+  %3 = bitcast <2 x i64> %2 to <4 x i32>          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %3
+}
diff --git a/test/Bitcode/ssse3_palignr.ll.bc b/test/Bitcode/ssse3_palignr.ll.bc
deleted file mode 100644
index 3fc9cdf..0000000
--- a/test/Bitcode/ssse3_palignr.ll.bc
+++ /dev/null
diff --git a/test/BugPoint/dg.exp b/test/BugPoint/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/BugPoint/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/BugPoint/lit.local.cfg b/test/BugPoint/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/BugPoint/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7bb1bdd..8cebb7c 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -24,98 +24,58 @@ else() # Default for all other unix like systems.
   set(SHLIBPATH_VAR "LD_LIBRARY_PATH")
 endif()
 
-include(FindPythonInterp)
-if(PYTHONINTERP_FOUND)
-  set(LIT_ARGS "${LLVM_LIT_ARGS}")
-  separate_arguments(LIT_ARGS)
+set(LIT_ARGS "${LLVM_LIT_ARGS}")
+separate_arguments(LIT_ARGS)
 
-  get_directory_property(DEFINITIONS COMPILE_DEFINITIONS)
-  foreach(DEF ${DEFINITIONS})
-    set(DEFS "${DEFS} -D${DEF}")
-  endforeach()
-  get_directory_property(INC_DIRS INCLUDE_DIRECTORIES)
-  foreach(INC_DIR ${INC_DIRS})
-    set(IDIRS "${IDIRS} -I${INC_DIR}")
-  endforeach()
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in
+  ${CMAKE_CURRENT_BINARY_DIR}/site.exp)
 
-  if( MSVC )
-    # The compiler's path may contain white space. Wrap it:
-    string(REPLACE "<CMAKE_CXX_COMPILER>" "\\\"${CMAKE_CXX_COMPILER}\\\"" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT})
-    # Eliminate continuation lines from NMake flow. PR9680
-    string(REPLACE "@<<\n"                " "                     TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-    string(REPLACE "\n<<"                 " "                     TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  else()
-    string(REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT})
-  endif()
+MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit)
 
-  string(REPLACE "<DEFINES>"            "${DEFS}"               TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  string(REPLACE "<FLAGS>"              "${CMAKE_CXX_FLAGS}"    TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  if (MSVC) # PR9680
-    # Eliminate MSVC equivalent of -o
-    string(REPLACE "/Fo<OBJECT>"        ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-    # Eliminate "how to rename program database" argument
-    string(REPLACE "/Fd<TARGET_PDB>"    ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  else()
-    string(REPLACE "-o"                 ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  endif(MSVC)
-  string(REGEX REPLACE "<[^>]+>"        ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} ${IDIRS}")
-  if(NOT MSVC)
-    set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} -x c++")
-    # MSVC already has /TP to indicate a C++ source file
-  endif()
-  configure_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in
-    ${CMAKE_CURRENT_BINARY_DIR}/site.exp)
+# Configuration-time: See Unit/lit.site.cfg.in
+set(LLVM_BUILD_MODE "%(build_mode)s")
 
-  MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit)
-
-  # Configuration-time: See Unit/lit.site.cfg.in
-  set(LLVM_BUILD_MODE "%(build_mode)s")
-
-  set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR})
-  set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR})
-  set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
-  set(LLVMGCCDIR "")
-  set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE})
-  set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED})
-  set(SHLIBPATH_VAR ${SHLIBPATH_VAR})
-
-  if(LLVM_ENABLE_ASSERTIONS AND NOT MSVC_IDE)
-    set(ENABLE_ASSERTIONS "1")
-  else()
-    set(ENABLE_ASSERTIONS "0")
-  endif()
-
-  configure_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-    ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
-    @ONLY)
-  configure_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
-    ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
-    @ONLY)
-
-  add_custom_target(check
-    COMMAND ${PYTHON_EXECUTABLE}
-                ${LLVM_SOURCE_DIR}/utils/lit/lit.py
-                --param llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
-                --param llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
-                --param build_config=${CMAKE_CFG_INTDIR}
-                --param build_mode=${RUNTIME_BUILD_MODE}
-                ${LIT_ARGS}
-                ${CMAKE_CURRENT_BINARY_DIR}
-                COMMENT "Running LLVM regression tests")
-  set_target_properties(check PROPERTIES FOLDER "Tests")
-
-  add_custom_target(check.deps)
-  add_dependencies(check check.deps)
-  add_dependencies(check.deps
-                UnitTests
-                BugpointPasses LLVMHello
-                llc lli llvm-ar llvm-as llvm-dis llvm-extract
-                llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump macho-dump opt
-                FileCheck count not)
-  set_target_properties(check.deps PROPERTIES FOLDER "Tests")
+set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR})
+set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR})
+set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
+set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE})
+set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED})
+set(SHLIBPATH_VAR ${SHLIBPATH_VAR})
 
+if(LLVM_ENABLE_ASSERTIONS AND NOT MSVC_IDE)
+  set(ENABLE_ASSERTIONS "1")
+else()
+  set(ENABLE_ASSERTIONS "0")
 endif()
+
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+  @ONLY)
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
+  ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+  @ONLY)
+
+add_custom_target(check
+  COMMAND ${PYTHON_EXECUTABLE}
+              ${LLVM_SOURCE_DIR}/utils/lit/lit.py
+              --param llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+              --param llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+              --param build_config=${CMAKE_CFG_INTDIR}
+              --param build_mode=${RUNTIME_BUILD_MODE}
+              ${LIT_ARGS}
+              ${CMAKE_CURRENT_BINARY_DIR}
+              COMMENT "Running LLVM regression tests")
+
+add_custom_target(check.deps)
+add_dependencies(check check.deps)
+add_dependencies(check.deps
+              UnitTests
+              BugpointPasses LLVMHello
+              llc lli llvm-ar llvm-as llvm-dis llvm-extract llvm-dwarfdump
+              llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj
+              macho-dump opt
+              FileCheck count not json-bench)
+set_target_properties(check.deps PROPERTIES FOLDER "Tests")
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 3694aaa..0bfe331 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck %s
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 78c6222..94c562b 100644
--- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -11,7 +11,7 @@ bb74.i:		; preds = %bb88.i, %bb74.i, %entry
 bb88.i:		; preds = %bb74.i
 	br i1 false, label %mandel.exit, label %bb74.i
 mandel.exit:		; preds = %bb88.i
-	%tmp2 = volatile load double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
+	%tmp2 = load volatile double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
 	%tmp23 = fptosi double %tmp2 to i32		; <i32> [#uses=1]
 	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index 8bde748..a016809 100644
--- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -37,10 +37,11 @@ return:                                           ; preds = %invcont
   ret void
 
 lpad:                                             ; preds = %entry
-  %eh_ptr = call i8* @llvm.eh.exception()
+  %exn = landingpad {i8*, i32} personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           cleanup
+  %eh_ptr = extractvalue {i8*, i32} %exn, 0
   store i8* %eh_ptr, i8** %eh_exception
-  %eh_ptr1 = load i8** %eh_exception
-  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
+  %eh_select2 = extractvalue {i8*, i32} %exn, 1
   store i32 %eh_select2, i32* %eh_selector
   br label %ppad
 
@@ -94,10 +95,6 @@ declare void @_ZdlPv(i8*) nounwind
 
 declare void @_Z3barv()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gxx_personality_sj0(...)
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 0a157c9..426bd17 100644
--- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -9,7 +9,7 @@ define void @test(double* %x, double* %y) nounwind {
   br i1 %4, label %bb1, label %bb2
 
 bb1:
-;CHECK: vstrhi.64
+;CHECK: vstrhi
   store double %1, double* %y
   br label %bb2
 
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
index 8bfd026..eb9c2d0 100644
--- a/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -6,7 +6,7 @@ entry:
   %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-; CHECK: vldr.64
+; CHECK: vldr
   %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   store i16 %1, i16* %out_poly16_t, align 2
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
index 7aae3ac..a8afc20 100644
--- a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp < %s | FileCheck %s
 ; PR5423
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index df9dbca..0ae7f84 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -11,7 +11,7 @@ entry:
 
 ; THUMB:     t:
 ; THUMB-NOT: str r0, [r1], r0
-; THUMB:     str r2, [r1]
+; THUMB:     str r1, [r0]
   %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
   store i32 0, i32* inttoptr (i32 8 to i32*), align 8
   br i1 undef, label %bb.nph96, label %bb3
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index e47c038..e0f50c9 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=linearscan
+; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic
 
 ; This test would crash the rewriter when trying to handle a spill after one of
 ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index cd1c9c8..a400b7b 100644
--- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -10,28 +10,28 @@ entry:
   %4 = ashr i32 %3, 30
   %.sum = add i32 %4, 4
   %5 = getelementptr inbounds float* %table, i32 %.sum
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %6 = load float* %5, align 4
   %tmp11 = insertelement <4 x float> undef, float %6, i32 0
   %7 = shl i32 %packedValue, 18
   %8 = ashr i32 %7, 30
   %.sum12 = add i32 %8, 4
   %9 = getelementptr inbounds float* %table, i32 %.sum12
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %10 = load float* %9, align 4
   %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
   %11 = shl i32 %packedValue, 20
   %12 = ashr i32 %11, 30
   %.sum13 = add i32 %12, 4
   %13 = getelementptr inbounds float* %table, i32 %.sum13
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %14 = load float* %13, align 4
   %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
   %15 = shl i32 %packedValue, 22
   %16 = ashr i32 %15, 30
   %.sum14 = add i32 %16, 4
   %17 = getelementptr inbounds float* %table, i32 %.sum14
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %18 = load float* %17, align 4
   %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
   %19 = fmul <4 x float> %tmp5, %2
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index b9d5600..1aee508 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-darwin10"
 
 ; CHECK: vld1.64 {d16, d17}, [r{{.}}]
 ; CHECK-NOT: vld1.64 {d16, d17}
-; CHECK: vmov.f64 d19, d16
+; CHECK: vmov.f64
 
 define i32 @test(i8* %arg) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
index c03c815..2842437 100644
--- a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
+++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -21,12 +21,8 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare i8* @__cxa_allocate_exception(i32)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_sj0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare void @_Unwind_SjLj_Resume(i8*)
@@ -75,8 +71,11 @@ try.cont:                                         ; preds = %lpad
   ret i32 %conv
 
 lpad:                                             ; preds = %entry
-  %exn = tail call i8* @llvm.eh.exception() nounwind ; <i8*> [#uses=4]
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* bitcast (%0* @_ZTI1A to i8*), i8* null) nounwind ; <i32> [#uses=1]
+  %exn.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* bitcast (%0* @_ZTI1A to i8*)
+           catch i8* null
+  %exn = extractvalue { i8*, i32 } %exn.ptr, 0
+  %eh.selector = extractvalue { i8*, i32 } %exn.ptr, 1
   %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%0* @_ZTI1A to i8*)) nounwind ; <i32> [#uses=1]
   %3 = icmp eq i32 %eh.selector, %2               ; <i1> [#uses=1]
   br i1 %3, label %try.cont, label %eh.resume
diff --git a/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
index f57b7e6..4b47085 100644
--- a/test/CodeGen/ARM/2010-08-04-EHCrash.ll
+++ b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
@@ -34,10 +34,12 @@ return:                                           ; preds = %entry
   ret void
 
 lpad:                                             ; preds = %bb
-  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
-  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+              cleanup
+  %exn = extractvalue { i8*, i32 } %eh_ptr, 0
+  store i8* %exn, i8** %eh_exception
   %eh_ptr13 = load i8** %eh_exception             ; <i8*> [#uses=1]
-  %eh_select14 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr13, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 1)
+  %eh_select14 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select14, i32* %eh_selector
   br label %ppad
 
@@ -54,10 +56,6 @@ declare arm_apcscc void @func2()
 
 declare arm_apcscc void @_ZSt9terminatev() noreturn nounwind
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
 
 declare arm_apcscc void @func3()
diff --git a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
index 0422094..ec74880 100644
--- a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
+++ b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -spiller=standard
+; RUN: llc < %s -verify-machineinstrs -spiller=trivial
 ; RUN: llc < %s -verify-machineinstrs -spiller=inline
 ; PR8612
 ;
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
index e3c18ce..da4d157 100644
--- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; RUN: llc < %s -mtriple=armv7-apple-ios   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB2
 ; rdar://8690640
 
 define i32* @t(i32* %x) nounwind {
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index c65952b..770ad44 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -1,39 +1,15 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
 ; rdar://8728956
 
 define hidden void @foo() nounwind ssp {
 entry:
 ; CHECK: foo:
-; CHECK: push {r7, lr}
-; CHECK-NEXT: mov r7, sp
+; CHECK: mov r7, sp
 ; CHECK-NEXT: vpush {d8}
 ; CHECK-NEXT: vpush {d10, d11}
-  %tmp40 = load <4 x i8>* undef
-  %tmp41 = extractelement <4 x i8> %tmp40, i32 2
-  %conv42 = zext i8 %tmp41 to i32
-  %conv43 = sitofp i32 %conv42 to float
-  %div44 = fdiv float %conv43, 2.560000e+02
-  %vecinit45 = insertelement <4 x float> undef, float %div44, i32 2
-  %vecinit46 = insertelement <4 x float> %vecinit45, float 1.000000e+00, i32 3
-  store <4 x float> %vecinit46, <4 x float>* undef
-  br i1 undef, label %if.then105, label %if.else109
-
-if.then105:                                       ; preds = %entry
-  br label %if.end114
-
-if.else109:                                       ; preds = %entry
-  br label %if.end114
-
-if.end114:                                        ; preds = %if.else109, %if.then105
-  %call185 = call float @bar()
-  %vecinit186 = insertelement <4 x float> undef, float %call185, i32 1
-  %call189 = call float @bar()
-  %vecinit190 = insertelement <4 x float> %vecinit186, float %call189, i32 2
-  %vecinit191 = insertelement <4 x float> %vecinit190, float 1.000000e+00, i32 3
-  store <4 x float> %vecinit191, <4 x float>* undef
+  tail call void asm sideeffect "","~{d8},~{d10},~{d11}"() nounwind
 ; CHECK: vpop {d10, d11}
 ; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r7, pc}
   ret void
 }
 
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 9484212..ca88eed 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -3,11 +3,11 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
-@x1 = internal global i8 1
-@x2 = internal global i8 1
-@x3 = internal global i8 1
-@x4 = internal global i8 1
-@x5 = global i8 1
+@x1 = internal global i8 1, align 1
+@x2 = internal global i8 1, align 1
+@x3 = internal global i8 1, align 1
+@x4 = internal global i8 1, align 1
+@x5 = global i8 1, align 1
 
 ; Check debug info output for merged global.
 ; DW_AT_location
@@ -17,8 +17,7 @@ target triple = "thumbv7-apple-darwin10"
 ; DW_OP_constu
 ; offset
 
-;CHECK:        .ascii   "x2"                   @ DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset6
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index ccda281e9..2faa04a 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
 
 ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
 ; rdar://9133587
diff --git a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
index 0b5f962..d3394b5 100644
--- a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
+++ b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
@@ -35,14 +35,14 @@ for.cond.backedge:
   br label %for.cond
 
 lpad:
-  %exn = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
 lpad26:
-  %exn27 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector28 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn27, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn27 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
@@ -57,31 +57,26 @@ call8.i.i.i.noexc:
   ret void
 
 lpad44:
-  %exn45 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector46 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn45, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn45 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
 eh.resume:
-  %exn.slot.0 = phi i8* [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
-  tail call void @_Unwind_SjLj_Resume_or_Rethrow(i8* %exn.slot.0) noreturn
-  unreachable
+  %exn.slot.0 = phi { i8*, i32 } [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
+  resume { i8*, i32 } %exn.slot.0
 
 terminate.lpad:
-  %exn51 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector52 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn51, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn51 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   tail call void @_ZSt9terminatev() noreturn nounwind
   unreachable
 }
 
 declare void @foo()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_sj0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_Unwind_SjLj_Resume_or_Rethrow(i8*)
 
 declare void @_ZSt9terminatev()
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 7baacfe..3e78c46 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
+target triple = "thumbv7-apple-ios"
 
 %struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
 %struct.B = type <{ i32, i16, i16 }>
diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
index 1b5b8a9..091d037 100644
--- a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
+++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
@@ -1,12 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
 ; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2
 
-%struct.config = type { i16, i16, i16, i16 }
-
 @prev = external global [0 x i16]
 @max_lazy_match = internal unnamed_addr global i32 0, align 4
 @read_buf = external global i32 (i8*, i32)*
 @window = external global [0 x i8]
 @lookahead = internal unnamed_addr global i32 0, align 4
-@eofile.b = internal unnamed_addr global i1 false
+@eofile.b = internal unnamed_addr global i32 0
 @ins_h = internal unnamed_addr global i32 0, align 4
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index f681c34..f2b0c5d 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -8,8 +8,7 @@
 ; DW_OP_constu
 ; offset
 
-;CHECK:        .ascii   "x2"                   @ DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset33
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
index 17264ee..216057a 100644
--- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
+++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; Test that ldmia_ret preserves implicit operands for return values.
 ;
 ; This CFG is reduced from a benchmark miscompile. With current
diff --git a/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
new file mode 100644
index 0000000..09db740
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 -mattr=+neon,+neonfp -relocation-model=pic
+
+target triple = "armv6-none-linux-gnueabi"
+
+define void @sample_test(i8* %.T0348, i16* nocapture %sourceA, i16* nocapture %destValues) {
+L.entry:
+  %0 = call i32 (...)* @get_index(i8* %.T0348, i32 0)
+  %1 = bitcast i16* %destValues to i8*
+  %2 = mul i32 %0, 6
+  %3 = getelementptr i8* %1, i32 %2
+  %4 = bitcast i8* %3 to <3 x i16>*
+  %5 = load <3 x i16>* %4, align 1
+  %6 = bitcast i16* %sourceA to i8*
+  %7 = getelementptr i8* %6, i32 %2
+  %8 = bitcast i8* %7 to <3 x i16>*
+  %9 = load <3 x i16>* %8, align 1
+  %10 = or <3 x i16> %9, %5
+  store <3 x i16> %10, <3 x i16>* %4, align 1
+  ret void
+}
+
+declare i32 @get_index(...)
diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
new file mode 100644
index 0000000..ff049c8
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
@@ -0,0 +1,21 @@
+; Make sure short memsets on ARM lower to stores, even when optimizing for size.
+; RUN: llc -march=arm < %s | FileCheck %s -check-prefix=CHECK-GENERIC
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s -check-prefix=CHECK-UNALIGNED
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios5.0.0"
+
+; CHECK-GENERIC:      strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-UNALIGNED:      strb
+; CHECK-UNALIGNED-NEXT: str 
+define void @foo(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
new file mode 100644
index 0000000..42b1491
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
+
+; Should trigger a NEON store.
+; CHECK: vstr
+define void @f_0_12(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
+  ret void
+}
+
+; Trigger multiple NEON stores.
+; CHECK:      vstmia
+; CHECK-NEXT: vstmia
+define void @f_0_40(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
new file mode 100644
index 0000000..113cbfe
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; PR11319
+
+@i8_res  = global <2 x i8> <i8 0, i8 0>
+@i8_src1 = global <2 x i8> <i8 1, i8 2>
+@i8_src2 = global <2 x i8> <i8 2, i8 1>
+
+define void @test_neon_vector_add_2xi8() nounwind {
+; CHECK: test_neon_vector_add_2xi8:
+  %1 = load <2 x i8>* @i8_src1
+  %2 = load <2 x i8>* @i8_src2
+  %3 = add <2 x i8> %1, %2
+  store <2 x i8> %3, <2 x i8>* @i8_res
+  ret void
+}
+
+define void @test_neon_ld_st_volatile_with_ashr_2xi8() {
+; CHECK: test_neon_ld_st_volatile_with_ashr_2xi8:
+  %1 = load volatile <2 x i8>* @i8_src1
+  %2 = load volatile <2 x i8>* @i8_src2
+  %3 = ashr <2 x i8> %1, %2
+  store volatile <2 x i8> %3, <2 x i8>* @i8_res
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
new file mode 100644
index 0000000..2ab6a4f
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; PR11319
+
+@src1_v2i16 = global <2 x i16> <i16 0, i16 1>
+@res_v2i16  = global <2 x i16> <i16 0, i16 0>
+
+declare <2 x i16> @foo_v2i16(<2 x i16>) nounwind
+
+define void @test_neon_call_return_v2i16() {
+; CHECK: test_neon_call_return_v2i16:
+  %1 = load <2 x i16>* @src1_v2i16
+  %2 = call <2 x i16> @foo_v2i16(<2 x i16> %1) nounwind
+  store <2 x i16> %2, <2 x i16>* @res_v2i16
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
new file mode 100644
index 0000000..719571b
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @test1(<2 x double>* %A) {
+; CHECK: test1
+; CHECK: vcvt.s32.f64
+; CHECK: vcvt.s32.f64
+  %tmp1 = load <2 x double>* %A
+	%tmp2 = fptosi <2 x double> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @test2(<2 x double>* %A) {
+; CHECK: test2
+; CHECK: vcvt.u32.f64
+; CHECK: vcvt.u32.f64
+  %tmp1 = load <2 x double>* %A
+	%tmp2 = fptoui <2 x double> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x double> @test3(<2 x i32>* %A) {
+; CHECK: test3
+; CHECK: vcvt.f64.s32
+; CHECK: vcvt.f64.s32
+  %tmp1 = load <2 x i32>* %A
+	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x double>
+	ret <2 x double> %tmp2
+}
+
+define <2 x double> @test4(<2 x i32>* %A) {
+; CHECK: test4
+; CHECK: vcvt.f64.u32
+; CHECK: vcvt.f64.u32
+  %tmp1 = load <2 x i32>* %A
+	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x double>
+	ret <2 x double> %tmp2
+}
diff --git a/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
new file mode 100644
index 0000000..52aa0bf
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-regalloc
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; This test calls shrinkToUses with an early-clobber redefined live range during
+; spilling.
+;
+;   Shrink: %vreg47,1.158257e-02 = [384r,400e:0)[400e,420r:1)  0@384r 1@400e
+;
+; The early-clobber instruction is an str:
+;
+;   %vreg12<earlyclobber,def> = t2STR_PRE %vreg6, %vreg12, 32, pred:14, pred:%noreg
+;
+; This tests that shrinkToUses handles the EC redef correctly.
+
+%struct.Transform_Struct.0.11.12.17.43.46.56.58.60 = type { [4 x [4 x double]] }
+
+define void @Compute_Axis_Rotation_Transform(%struct.Transform_Struct.0.11.12.17.43.46.56.58.60* nocapture %transform, double* nocapture %V1, double %angle) nounwind {
+entry:
+  store double 1.000000e+00, double* null, align 4
+  %arrayidx5.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 1
+  store double 0.000000e+00, double* %arrayidx5.1.i, align 4
+  %arrayidx5.2.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 2
+  store double 0.000000e+00, double* %arrayidx5.2.i, align 4
+  %arrayidx5.114.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 0
+  store double 0.000000e+00, double* %arrayidx5.114.i, align 4
+  %arrayidx5.1.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 1
+  store double 1.000000e+00, double* %arrayidx5.1.1.i, align 4
+  store double 0.000000e+00, double* null, align 4
+  store double 1.000000e+00, double* null, align 4
+  store double 0.000000e+00, double* null, align 4
+  %call = tail call double @cos(double %angle) nounwind readnone
+  %call1 = tail call double @sin(double %angle) nounwind readnone
+  %0 = load double* %V1, align 4
+  %arrayidx2 = getelementptr inbounds double* %V1, i32 1
+  %1 = load double* %arrayidx2, align 4
+  %mul = fmul double %0, %1
+  %sub = fsub double 1.000000e+00, %call
+  %mul3 = fmul double %mul, %sub
+  %2 = load double* undef, align 4
+  %mul5 = fmul double %2, %call1
+  %add = fadd double %mul3, %mul5
+  store double %add, double* %arrayidx5.1.i, align 4
+  %3 = load double* %V1, align 4
+  %mul11 = fmul double %3, undef
+  %mul13 = fmul double %mul11, %sub
+  %4 = load double* %arrayidx2, align 4
+  %mul15 = fmul double %4, %call1
+  %sub16 = fsub double %mul13, %mul15
+  store double %sub16, double* %arrayidx5.2.i, align 4
+  %5 = load double* %V1, align 4
+  %6 = load double* %arrayidx2, align 4
+  %mul22 = fmul double %5, %6
+  %mul24 = fmul double %mul22, %sub
+  %sub27 = fsub double %mul24, undef
+  store double %sub27, double* %arrayidx5.114.i, align 4
+  ret void
+}
+
+declare double @cos(double) nounwind readnone
+
+declare double @sin(double) nounwind readnone
diff --git a/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
new file mode 100644
index 0000000..5409f8c
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0.0 | FileCheck %s
+; rdar://10464621
+
+; DAG combine increases loads from packed types. ARM load / store optimizer then
+; combined them into a ldm which causes runtime exception.
+
+%struct.InformationBlock = type <{ i32, %struct.FlagBits, %struct.FlagBits }>
+%struct.FlagBits = type <{ [4 x i32] }>
+
+@infoBlock = external global %struct.InformationBlock
+
+define hidden void @foo() {
+; CHECK: foo:
+; CHECK: ldr.w
+; CHECK: ldr.w
+; CHECK-NOT: ldm
+entry:
+  %tmp13 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
+  %tmp15 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
+  %tmp17 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
+  %tmp19 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
+  %tmp = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
+  %tmp3 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
+  %tmp4 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
+  %tmp5 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
+  %insert21 = insertvalue [4 x i32] undef, i32 %tmp13, 0
+  %insert23 = insertvalue [4 x i32] %insert21, i32 %tmp15, 1
+  %insert25 = insertvalue [4 x i32] %insert23, i32 %tmp17, 2
+  %insert27 = insertvalue [4 x i32] %insert25, i32 %tmp19, 3
+  %insert = insertvalue [4 x i32] undef, i32 %tmp, 0
+  %insert7 = insertvalue [4 x i32] %insert, i32 %tmp3, 1
+  %insert9 = insertvalue [4 x i32] %insert7, i32 %tmp4, 2
+  %insert11 = insertvalue [4 x i32] %insert9, i32 %tmp5, 3
+  tail call void @bar([4 x i32] %insert27, [4 x i32] %insert11)
+  ret void
+}
+
+declare void @bar([4 x i32], [4 x i32])
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
new file mode 100644
index 0000000..6fbae19
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -0,0 +1,302 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+@A = global <4 x float> <float 0., float 1., float 2., float 3.>
+
+define void @test_sqrt(<4 x float>* %X) nounwind {
+
+; CHECK: test_sqrt:
+
+; CHECK:      movw    r1, :lower16:{{.*}}
+; CHECK:      movt    r1, :upper16:{{.*}}
+; CHECK:      vldmia  r1
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_cos(<4 x float>* %X) nounwind {
+
+; CHECK: test_cos:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp2(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log10(<4 x float>* %X) nounwind {
+
+; CHECK: test_log10:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log10.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log10.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log(<4 x float>* %X) nounwind {
+
+; CHECK: test_log:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log2(<4 x float>* %X) nounwind {
+
+; CHECK: test_log2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log2.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_pow(<4 x float>* %X) nounwind {
+
+; CHECK: test_pow:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %0, <4 x float> <float 2., float 2., float 2., float 2.>)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) nounwind readonly
+
+define void @test_powi(<4 x float>* %X) nounwind {
+
+; CHECK: test_powi:
+
+; CHECK:       movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:       movt  [[reg0]], :upper16:{{.*}}
+; CHECK:       vldmia  [[reg0]], {{.*}}
+; CHECK:       vmul.f32 {{.*}}
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.powi.v4f32(<4 x float> %0, i32 2)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.powi.v4f32(<4 x float>, i32) nounwind readonly
+
+define void @test_sin(<4 x float>* %X) nounwind {
+
+; CHECK: test_sin:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) nounwind readonly
+
diff --git a/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
new file mode 100644
index 0000000..0c90f4c
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+; <rdar://problem/10497732>
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@x1 = internal global i32 1
+@x2 = internal global i64 12
+
+define i64 @f() {
+  %ax = load i32* @x1
+  %a = zext i32 %ax to i64
+  %b = load i64* @x2
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; We can global-merge the i64 in theory, but the current code doesn't handle
+; the alignment correctly; for the moment, just check that we don't do it.
+; See also 
+
+; CHECK-NOT: MergedGlobals
+; CHECK: _x2
+; CHECK-NOT: MergedGlobals
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
new file mode 100644
index 0000000..5ce600d
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; Radar 10266272
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios4.0.0"
+; STATS-NOT: machine-sink
+
+define i32 @foo(i32 %h) nounwind readonly ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %cmp = icmp slt i32 0, %h
+  br i1 %cmp, label %for.body, label %if.end299
+
+for.body:                                         ; preds = %for.cond
+  %v.5 = select i1 undef, i32 undef, i32 0
+  %0 = load i8* undef, align 1, !tbaa !0
+  %conv88 = zext i8 %0 to i32
+  %sub89 = sub nsw i32 0, %conv88
+  %v.8 = select i1 undef, i32 undef, i32 %sub89
+  %1 = load i8* null, align 1, !tbaa !0
+  %conv108 = zext i8 %1 to i32
+  %2 = load i8* undef, align 1, !tbaa !0
+  %conv110 = zext i8 %2 to i32
+  %sub111 = sub nsw i32 %conv108, %conv110
+  %cmp112 = icmp slt i32 %sub111, 0
+  %sub115 = sub nsw i32 0, %sub111
+  %v.10 = select i1 %cmp112, i32 %sub115, i32 %sub111
+  %add62 = add i32 0, %v.5
+  %add73 = add i32 %add62, 0
+  %add84 = add i32 %add73, 0
+  %add95 = add i32 %add84, %v.8
+  %add106 = add i32 %add95, 0
+  %add117 = add i32 %add106, %v.10
+  %add128 = add i32 %add117, 0
+  %add139 = add i32 %add128, 0
+  %add150 = add i32 %add139, 0
+  %add161 = add i32 %add150, 0
+  %add172 = add i32 %add161, 0
+  br i1 undef, label %for.cond, label %if.end299
+
+if.end299:                                        ; preds = %for.body, %for.cond
+  %s.10 = phi i32 [ %add172, %for.body ], [ 0, %for.cond ]
+  ret i32 %s.10
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
new file mode 100644
index 0000000..ddb7632
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Radar 10567930: Make sure that all the caller-saved registers are saved and
+; restored in a function with setjmp/longjmp EH.  In particular, r6 was not
+; being saved here.
+; CHECK: push {r4, r5, r6, r7, lr}
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) {
+bb:
+  %tmp = alloca i32, align 4
+  %tmp1 = alloca i32, align 4
+  %tmp2 = alloca i8*, align 4
+  %tmp3 = alloca i1
+  %myException = alloca %0*, align 4
+  %tmp4 = alloca i8*
+  %tmp5 = alloca i32
+  %exception = alloca %0*, align 4
+  store i32 %a, i32* %tmp, align 4
+  store i32 %b, i32* %tmp1, align 4
+  store i8* %d, i8** %tmp2, align 4
+  store i1 false, i1* %tmp3
+  %tmp7 = load i8** %c
+  %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
+          to label %bb11 unwind label %bb15
+
+bb11:                                             ; preds = %bb
+  store %0* %tmp10, %0** %myException, align 4
+  %tmp12 = load %0** %myException, align 4
+  %tmp13 = bitcast %0* %tmp12 to i8*
+  invoke void @objc_exception_throw(i8* %tmp13) noreturn
+          to label %bb14 unwind label %bb15
+
+bb14:                                             ; preds = %bb11
+  unreachable
+
+bb15:                                             ; preds = %bb11, %bb
+  %tmp16 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  %tmp17 = extractvalue { i8*, i32 } %tmp16, 0
+  store i8* %tmp17, i8** %tmp4
+  %tmp18 = extractvalue { i8*, i32 } %tmp16, 1
+  store i32 %tmp18, i32* %tmp5
+  store i1 true, i1* %tmp3
+  br label %bb56
+
+bb56:
+  unreachable
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare i32 @__objc_personality_v0(...)
+declare void @objc_exception_throw(i8*)
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
new file mode 100644
index 0000000..926daaf
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br label %bb3
+
+bb3:                                              ; preds = %bb4, %bb2
+  %tmp = icmp slt i32 undef, undef
+  br i1 %tmp, label %bb4, label %bb67
+
+bb4:                                              ; preds = %bb3
+  %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
+  %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp10 = fmul <4 x float> undef, %tmp9
+  %tmp11 = fadd <4 x float> undef, %tmp10
+  %tmp12 = bitcast <4 x float> zeroinitializer to i128
+  %tmp13 = lshr i128 %tmp12, 64
+  %tmp14 = trunc i128 %tmp13 to i64
+  %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1
+  %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind
+  %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind
+  %tmp18 = fmul <4 x float> %tmp17, %tmp16
+  %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind
+  %tmp20 = fmul <4 x float> %tmp19, %tmp18
+  %tmp21 = fmul <4 x float> %tmp20, zeroinitializer
+  %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind
+  call arm_aapcs_vfpcc  void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind
+  %tmp23 = bitcast <4 x float> %tmp22 to i128
+  %tmp24 = trunc i128 %tmp23 to i64
+  %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
+  %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
+  %tmp27 = load float* undef, align 4, !tbaa !2
+  %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
+  %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
+  %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind
+  %tmp35 = fmul <4 x float> %tmp34, undef
+  %tmp36 = fmul <4 x float> %tmp35, undef
+  %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp38 = load float* undef, align 4, !tbaa !2
+  %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
+  %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp41 = load float* undef, align 4, !tbaa !2
+  %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
+  %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp44 = fmul <4 x float> %tmp33, %tmp43
+  %tmp45 = fadd <4 x float> %tmp42, %tmp44
+  %tmp46 = fsub <4 x float> %tmp45, undef
+  %tmp47 = fmul <4 x float> %tmp46, %tmp36
+  %tmp48 = fadd <4 x float> undef, %tmp47
+  %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp50 = load float* undef, align 4, !tbaa !2
+  %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
+  %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
+  %tmp54 = load float* %tmp52, align 4, !tbaa !2
+  %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
+  %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
+  %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
+  %tmp58 = fmul <4 x float> undef, %tmp57
+  %tmp59 = fsub <4 x float> %tmp51, %tmp48
+  %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58
+  %tmp61 = fmul <4 x float> %tmp59, %tmp60
+  %tmp62 = fadd <4 x float> %tmp48, %tmp61
+  call arm_aapcs_vfpcc  void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef)
+  %tmp63 = bitcast <4 x float> %tmp62 to i128
+  %tmp64 = lshr i128 %tmp63, 64
+  %tmp65 = trunc i128 %tmp64 to i64
+  %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1
+  call arm_aapcs_vfpcc  void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef)
+  br label %bb3
+
+bb67:                                             ; preds = %bb3
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64])
+
+declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!2 = metadata !{metadata !"float", metadata !0}
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
new file mode 100644
index 0000000..872eca3
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing
+; PR11841
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
+bb:
+  %tmp = load <2 x float>* undef, align 8, !tbaa !0
+  %tmp2 = extractelement <2 x float> %tmp, i32 0
+  %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
+  %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float 0.000000e+00, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
+  %tmp7 = extractelement <2 x float> %tmp, i32 1
+  %tmp8 = insertelement <4 x float> %tmp3, float %tmp7, i32 1
+  %tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 2
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 3
+  %tmp11 = bitcast <4 x float> %tmp6 to <2 x i64>
+  %tmp12 = shufflevector <2 x i64> %tmp11, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp13 = bitcast <1 x i64> %tmp12 to <2 x float>
+  %tmp14 = shufflevector <2 x float> %tmp13, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp15 = bitcast <4 x float> %tmp14 to <2 x i64>
+  %tmp16 = shufflevector <2 x i64> %tmp15, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp17 = bitcast <1 x i64> %tmp16 to <2 x float>
+  %tmp18 = extractelement <2 x float> %tmp17, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float %tmp18, float undef, float 0.000000e+00) nounwind
+  %tmp19 = bitcast <4 x float> %tmp10 to <2 x i64>
+  %tmp20 = shufflevector <2 x i64> %tmp19, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp21 = bitcast <1 x i64> %tmp20 to <2 x float>
+  %tmp22 = shufflevector <2 x float> %tmp21, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp23 = bitcast <4 x float> %tmp22 to <2 x i64>
+  %tmp24 = shufflevector <2 x i64> %tmp23, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp25 = bitcast <1 x i64> %tmp24 to <2 x float>
+  %tmp26 = extractelement <2 x float> %tmp25, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float undef, float %tmp26, float 0.000000e+00) nounwind
+  ret void
+}
+
+define arm_aapcs_vfpcc void @foo2() nounwind uwtable {
+entry:
+  br i1 undef, label %for.end, label %cond.end295
+
+cond.end295:                                      ; preds = %entry
+  %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> <i32 0, i32 1>
+  %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float>
+  %1 = bitcast <4 x float> undef to <2 x i64>
+  %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
+  unreachable
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
new file mode 100644
index 0000000..ec5b2e9
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -verify-coalescing
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind uwtable align 2 {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi <4 x float> [ undef, %0 ], [ %11, %1 ]
+  %3 = bitcast <4 x float> %2 to <2 x i64>
+  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> zeroinitializer
+  %5 = xor <2 x i32> zeroinitializer, <i32 -1, i32 -1>
+  %6 = bitcast <2 x i32> zeroinitializer to <2 x float>
+  %7 = shufflevector <2 x float> zeroinitializer, <2 x float> %6, <2 x i32> <i32 0, i32 2>
+  %8 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> <i32 1>
+  %9 = bitcast <2 x float> %7 to <1 x i64>
+  %10 = shufflevector <1 x i64> %9, <1 x i64> %8, <2 x i32> <i32 0, i32 1>
+  %11 = bitcast <2 x i64> %10 to <4 x float>
+  br label %1
+}
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
new file mode 100644
index 0000000..5f24e42
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs
+; PR11765
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+; This test case exercises the MachineCopyPropagation pass by disabling the
+; RegisterCoalescer.
+
+define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br i1 undef, label %bb92, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %tmp = or <4 x i32> undef, undef
+  %tmp4 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp4
+  %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float>
+  %tmp7 = fmul <4 x float> %tmp6, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64>
+  %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float>
+  %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float>
+  %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> <i32 0, i32 2>
+  %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 1, i32 2>
+  %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64>
+  %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64>
+  %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32>
+  %tmp19 = and <2 x i32> %tmp18, <i32 -1, i32 0>
+  %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32>
+  %tmp21 = and <2 x i32> %tmp20, <i32 0, i32 -1>
+  %tmp22 = or <2 x i32> %tmp19, %tmp21
+  %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
+  %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float>
+  %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32>
+  %tmp28 = and <2 x i32> %tmp27, <i32 -1, i32 0>
+  %tmp29 = and <2 x i32> undef, <i32 0, i32 -1>
+  %tmp30 = or <2 x i32> %tmp28, %tmp29
+  %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64>
+  %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3
+  %tmp33 = fmul <4 x float> undef, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp34 = fadd <4 x float> %tmp33, %tmp32
+  %tmp35 = fmul <4 x float> %tmp33, zeroinitializer
+  %tmp36 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp37 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64>
+  %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
+  %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp42 = load <4 x float>* null, align 16, !tbaa !0
+  %tmp43 = fmul <4 x float> %tmp42, %tmp41
+  %tmp44 = load <4 x float>* undef, align 16, !tbaa !0
+  %tmp45 = fadd <4 x float> undef, %tmp43
+  %tmp46 = fadd <4 x float> undef, %tmp45
+  %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
+  %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float>
+  %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp51 = fmul <4 x float> %tmp42, %tmp50
+  %tmp52 = fmul <4 x float> %tmp44, undef
+  %tmp53 = fadd <4 x float> %tmp52, %tmp51
+  %tmp54 = fadd <4 x float> undef, %tmp53
+  %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64>
+  %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float>
+  %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp59 = fmul <4 x float> undef, %tmp58
+  %tmp60 = fadd <4 x float> %tmp59, undef
+  %tmp61 = fadd <4 x float> %tmp60, zeroinitializer
+  %tmp62 = load void (i8*, i8*)** undef, align 4
+  call arm_aapcs_vfpcc  void %tmp62(i8* sret undef, i8* undef) nounwind
+  %tmp63 = bitcast <4 x float> %tmp46 to i128
+  %tmp64 = bitcast <4 x float> %tmp54 to i128
+  %tmp65 = bitcast <4 x float> %tmp61 to i128
+  %tmp66 = lshr i128 %tmp63, 64
+  %tmp67 = trunc i128 %tmp66 to i64
+  %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1
+  %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2
+  %tmp70 = lshr i128 %tmp64, 64
+  %tmp71 = trunc i128 %tmp70 to i64
+  %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3
+  %tmp73 = trunc i128 %tmp65 to i64
+  %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4
+  %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5
+  %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6
+  %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7
+  call arm_aapcs_vfpcc  void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
+  %tmp78 = call arm_aapcs_vfpcc  i8* null(i8* null) nounwind
+  %tmp79 = bitcast i8* %tmp78 to i512*
+  %tmp80 = load i512* %tmp79, align 16
+  %tmp81 = lshr i512 %tmp80, 128
+  %tmp82 = trunc i512 %tmp80 to i128
+  %tmp83 = trunc i512 %tmp81 to i128
+  %tmp84 = bitcast i128 %tmp83 to <4 x float>
+  %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64>
+  %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float>
+  %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp89 = fmul <4 x float> undef, %tmp88
+  %tmp90 = fadd <4 x float> %tmp89, undef
+  %tmp91 = fadd <4 x float> undef, %tmp90
+  store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+bb92:                                             ; preds = %bb2
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
new file mode 100644
index 0000000..6c7aaad
--- /dev/null
+++ b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-coalescing < %s
+; PR11868
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+%1 = type { <4 x float> }
+
+@foo = external global %0, align 16
+
+define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
+  %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = extractelement <4 x float> %4, i32 1
+  %7 = extractelement <4 x float> %4, i32 2
+  %8 = insertelement <4 x float> undef, float %5, i32 0
+  %9 = insertelement <4 x float> %8, float %6, i32 1
+  %10 = insertelement <4 x float> %9, float %7, i32 2
+  %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 3
+  store <4 x float> %11, <4 x float>* undef, align 16 
+  call arm_aapcs_vfpcc  void @baz(%1* undef, float 0.000000e+00) nounwind
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @baz(%1*, float)
diff --git a/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll b/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll
new file mode 100644
index 0000000..c9ea691
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 -verify-machineinstrs < %s
+; PR12165
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "arm-none-linux"
+
+define hidden void @_strtod_r() nounwind {
+  br i1 undef, label %1, label %2
+
+; <label>:1                                       ; preds = %0
+  br label %2
+
+; <label>:2                                       ; preds = %1, %0
+  br i1 undef, label %3, label %8
+
+; <label>:3                                       ; preds = %2
+  br i1 undef, label %4, label %7
+
+; <label>:4                                       ; preds = %3
+  %5 = call i32 @llvm.flt.rounds()
+  %6 = icmp eq i32 %5, 1
+  br i1 %6, label %8, label %7
+
+; <label>:7                                       ; preds = %4, %3
+  unreachable
+
+; <label>:8                                       ; preds = %4, %2
+  br i1 undef, label %9, label %10
+
+; <label>:9                                       ; preds = %8
+  br label %10
+
+; <label>:10                                      ; preds = %9, %8
+  ret void
+}
+
+declare i32 @llvm.flt.rounds() nounwind
diff --git a/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
new file mode 100644
index 0000000..6206cd7
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; rdar://11035895
+
+; DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to
+; (i16 load $addr+c*sizeof(i16)). It should have issued an extload instead. i.e.
+; (i32 extload $addr+c*sizeof(i16)
+define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
+entry:
+; CHECK: vst1.32
+  %0 = load <3 x i16> * %srcA, align 8
+  %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
+  store <2 x i16> %1, <2 x i16> * %dst, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
new file mode 100644
index 0000000..0ff4f51
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+
+; ARM has a peephole optimization which looks for a def / use pair. The def
+; produces a 32-bit immediate which is consumed by the use. It tries to 
+; fold the immediate by breaking it into two parts and fold them into the
+; immmediate fields of two uses. e.g
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        add     r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        add.w   r0, r0, #30146560
+;
+; However, this transformation is incorrect if the user produces a flag. e.g.
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        adds    r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        adds.w  r0, r0, #30146560
+; Note the adds.w may not set the carry flag even if the original sequence
+; would.
+;
+; rdar://11116189
+define i64 @t(i64 %aInput) nounwind {
+; CHECK: t:
+; CHECK: movs [[REG:(r[0-9]+)]], #0
+; CHECK: movt [[REG]], #46540
+; CHECK: adds r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
+  %1 = mul i64 %aInput, 1000000
+  %2 = add i64 %1, -7952618389194932224
+  ret i64 %2
+}
diff --git a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
new file mode 100644
index 0000000..33ad187
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind align 2 {
+  br i1 undef, label %5, label %1
+
+; <label>:1                                       ; preds = %0
+  %2 = shufflevector <1 x i64> zeroinitializer, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %2 to <4 x float>
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> %3, <4 x float>* undef, align 16, !tbaa !0
+  %4 = insertelement <4 x float> %3, float 8.000000e+00, i32 2
+  store <4 x float> %4, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+; <label>:5                                       ; preds = %0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
new file mode 100644
index 0000000..6f50f27
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math
+;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+;target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(<4 x float> %arg) nounwind align 2 {
+bb4:
+  %tmp = extractelement <2 x float> undef, i32 0
+  br i1 undef, label %bb18, label %bb5
+
+bb5:                                              ; preds = %bb4
+  %tmp6 = fadd float %tmp, -1.500000e+01
+  %tmp7 = fdiv float %tmp6, 2.000000e+01
+  %tmp8 = fadd float %tmp7, 1.000000e+00
+  %tmp9 = fdiv float 1.000000e+00, %tmp8
+  %tmp10 = fsub float 1.000000e+00, %tmp9
+  %tmp11 = fmul float %tmp10, 1.000000e+01
+  %tmp12 = fadd float %tmp11, 1.500000e+01
+  %tmp13 = fdiv float %tmp12, %tmp
+  %tmp14 = insertelement <2 x float> undef, float %tmp13, i32 0
+  %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp16 = fmul <4 x float> zeroinitializer, %tmp15
+  %tmp17 = fadd <4 x float> %tmp16, %arg
+  store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0
+  br label %bb18
+
+bb18:                                             ; preds = %bb5, %bb4
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 95edaad..1272e8e 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -regalloc=basic | FileCheck %s
 ; rdar://8015977
 ; rdar://8020118
 
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 02ce5a1..8967730 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
 
 define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
@@ -61,7 +61,7 @@ entry:
   ; CHECK: strex
   %7 = atomicrmw min i32* %val2, i32 16 monotonic
 	store i32 %7, i32* %old
-	%neg = sub i32 0, 1		; <i32> [#uses=1]
+	%neg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
@@ -77,5 +77,85 @@ entry:
   ; CHECK: strex
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old
-	ret void
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %11 = atomicrmw umin i32* %val2, i32 16 monotonic
+	store i32 %11, i32* %old
+	%uneg = sub i32 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
+	store i32 %12, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %13 = atomicrmw umax i32* %val2, i32 1 monotonic
+	store i32 %13, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %14 = atomicrmw umax i32* %val2, i32 0 monotonic
+	store i32 %14, i32* %old
+
+  ret void
+}
+
+define void @func2() nounwind {
+entry:
+  %val = alloca i16
+  %old = alloca i16
+  store i16 31, i16* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i16* %val, i16 16 monotonic
+  store i16 %0, i16* %old
+  %uneg = sub i16 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
+  store i16 %1, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i16* %val, i16 1 monotonic
+  store i16 %2, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i16* %val, i16 0 monotonic
+  store i16 %3, i16* %old
+  ret void
+}
+
+define void @func3() nounwind {
+entry:
+  %val = alloca i8
+  %old = alloca i8
+  store i8 31, i8* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i8* %val, i8 16 monotonic
+  store i8 %0, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %uneg = sub i8 0, 1
+  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
+  store i8 %1, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i8* %val, i8 1 monotonic
+  store i8 %2, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i8* %val, i8 0 monotonic
+  store i8 %3, i8* %old
+  ret void
 }
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 92aff70..1b385ab 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -3,14 +3,48 @@
 ; dependency) when it isn't dependent on last CPSR defining instruction.
 ; rdar://8928208
 
-define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  entry:
-; CHECK: t:
-; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
-; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r0, r1
-; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
+; CHECK: t1:
+; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
   %0 = mul nsw i32 %a, %b
   %1 = mul nsw i32 %c, %d
   %2 = mul nsw i32 %0, %1
   ret i32 %2
 }
+
+; Avoid partial CPSR dependency via loop backedge.
+; rdar://10357570
+define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind {
+entry:
+; CHECK: t2:
+  %tobool7 = icmp eq i32* %ptr2, null
+  br i1 %tobool7, label %while.end, label %while.body
+
+while.body:
+; CHECK: while.body
+; CHECK: mul r{{[0-9]+}}
+; CHECK-NOT: muls
+  %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
+  %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
+  %0 = load i32* %ptr1.addr.09, align 4
+  %arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1
+  %1 = load i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3
+  %3 = load i32* %arrayidx4, align 4
+  %add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4
+  %mul = mul i32 %1, %0
+  %mul5 = mul i32 %mul, %2
+  %mul6 = mul i32 %mul5, %3
+  store i32 %mul6, i32* %ptr2.addr.08, align 4
+  %incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1
+  %tobool = icmp eq i32* %incdec.ptr, null
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index f78d998..94edff5 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
 
 ; Enable tailcall optimization for iOS 5.0
@@ -96,3 +96,70 @@ bb:
   tail call void @foo() nounwind
   ret void
 }
+
+; Make sure codegenprep is duplicating ret instructions to enable tail calls.
+; rdar://11140249
+define i32 @t8(i32 %x) nounwind ssp {
+entry:
+; CHECKT2D: t8:
+; CHECKT2D-NOT: push
+; CHECKT2D-NOT
+  %and = and i32 %x, 1
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+; CHECKT2D: bne.w _a
+  %call = tail call i32 @a(i32 %x) nounwind
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %and1 = and i32 %x, 2
+  %tobool2 = icmp eq i32 %and1, 0
+  br i1 %tobool2, label %if.end5, label %if.then3
+
+if.then3:                                         ; preds = %if.end
+; CHECKT2D: bne.w _b
+  %call4 = tail call i32 @b(i32 %x) nounwind
+  br label %return
+
+if.end5:                                          ; preds = %if.end
+; CHECKT2D: b.w _c
+  %call6 = tail call i32 @c(i32 %x) nounwind
+  br label %return
+
+return:                                           ; preds = %if.end5, %if.then3, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call4, %if.then3 ], [ %call6, %if.end5 ]
+  ret i32 %retval.0
+}
+
+declare i32 @a(i32)
+
+declare i32 @b(i32)
+
+declare i32 @c(i32)
+
+; PR12419
+; rdar://11195178
+; Use the correct input chain for the tailcall node or else the call to
+; _ZN9MutexLockD1Ev would be lost.
+%class.MutexLock = type { i8 }
+
+@x = external global i32, align 4
+
+define i32 @t9() nounwind {
+; CHECKT2D: t9:
+; CHECKT2D: blx __ZN9MutexLockC1Ev
+; CHECKT2D: blx __ZN9MutexLockD1Ev
+; CHECKT2D: b.w ___divsi3
+  %lock = alloca %class.MutexLock, align 1
+  %1 = call %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock* %lock)
+  %2 = load i32* @x, align 4
+  %3 = sdiv i32 1000, %2
+  %4 = call %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock* %lock)
+  ret i32 %3
+}
+
+declare %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
+
+declare %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 0f9543f..107e79a 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -26,7 +26,7 @@ define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
 ; CHECKV4: bx r{{.*}}
 BB0:
   %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
-  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
+  %t35 = load volatile i32* %5                    ; <i32> [#uses=1]
   %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
   %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
   %8 = load i32** %7                              ; <i32*> [#uses=1]
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index e381e00..5b6a584 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
 define i32 @test(i32 %x) {
 ; CHECK: test
 ; CHECK: clz r0, r0
-        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 91ef659..487ec69 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
   br i1 %0, label %bb2, label %bb
 
 bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_1:
+; CHECK: bne LBB0_1
+; CHECK-NOT: b LBB0_1
 ; CHECK: bx lr
   %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
   %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
new file mode 100644
index 0000000..7316452
--- /dev/null
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LLVM IR optimizers canonicalize icmp+select this way.
+; Make sure that TwoAddressInstructionPass can commute the corresponding
+; MOVCC instructions to avoid excessive copies in one of the if blocks.
+;
+; CHECK: %if.then
+; CHECK-NOT: mov
+; CHECK: movlo
+; CHECK: movlo
+; CHECK-NOT: mov
+
+; CHECK: %if.else
+; CHECK-NOT: mov
+; CHECK: movls
+; CHECK: movls
+; CHECK-NOT: mov
+
+; This is really an LSR test: Make sure that cmp is using the incremented
+; induction variable.
+; CHECK: %if.end8
+; CHECK: add{{(s|\.w)?}} [[IV:r[0-9]+]], {{.*}}#1
+; CHECK: cmp [[IV]], #
+
+define i32 @f(i32* nocapture %a, i32 %Pref) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %if.end8
+  %i.012 = phi i32 [ 0, %entry ], [ %inc, %if.end8 ]
+  %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
+  %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.012
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %mul = mul i32 %0, %0
+  %sub = add nsw i32 %i.012, -5
+  %cmp2 = icmp eq i32 %sub, %Pref
+  br i1 %cmp2, label %if.else, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %cmp3 = icmp ult i32 %mul, %BestCost.011
+  %i.0.BestIdx.0 = select i1 %cmp3, i32 %i.012, i32 %BestIdx.010
+  %mul.BestCost.0 = select i1 %cmp3, i32 %mul, i32 %BestCost.011
+  br label %if.end8
+
+if.else:                                          ; preds = %for.body
+  %cmp5 = icmp ugt i32 %mul, %BestCost.011
+  %BestIdx.0.i.0 = select i1 %cmp5, i32 %BestIdx.010, i32 %i.012
+  %BestCost.0.mul = select i1 %cmp5, i32 %BestCost.011, i32 %mul
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then
+  %BestIdx.1 = phi i32 [ %i.0.BestIdx.0, %if.then ], [ %BestIdx.0.i.0, %if.else ]
+  %BestCost.1 = phi i32 [ %mul.BestCost.0, %if.then ], [ %BestCost.0.mul, %if.else ]
+  store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+  %inc = add i32 %i.012, 1
+  %cmp = icmp eq i32 %inc, 11
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end8
+  ret i32 %BestIdx.1
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/cse-call.ll b/test/CodeGen/ARM/cse-call.ll
new file mode 100644
index 0000000..eff5de5
--- /dev/null
+++ b/test/CodeGen/ARM/cse-call.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=arm1136jf-s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "armv6-apple-ios0.0.0"
+
+; Don't CSE a cmp across a call that clobbers CPSR.
+;
+; CHECK: cmp
+; CHECK: S_trimzeros
+; CHECK: cmp
+; CHECK: strlen
+
+@F_floatmul.man1 = external global [200 x i8], align 1
+@F_floatmul.man2 = external global [200 x i8], align 1
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+declare void @S_trimzeros(...)
+
+define i8* @F_floatmul(i8* %f1, i8* %f2) nounwind ssp {
+entry:
+  br i1 undef, label %while.end42, label %while.body37
+
+while.body37:                                     ; preds = %while.body37, %entry
+  br i1 false, label %while.end42, label %while.body37
+
+while.end42:                                      ; preds = %while.body37, %entry
+  %. = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0)
+  %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0)
+  tail call void bitcast (void (...)* @S_trimzeros to void (i8*)*)(i8* %.92) nounwind
+  %call47 = tail call i32 @strlen(i8* %.) nounwind
+  unreachable
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 0dcf9dd..1d011be 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -4,7 +4,7 @@ target triple = "i386-apple-darwin8"
 
 ; Without CSE of libcalls, there are two calls in the output instead of one.
 
-define i32 @u_f_nonbon(double %lambda) nounwind {
+define double @u_f_nonbon(double %lambda) nounwind {
 entry:
 	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
 	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
@@ -26,5 +26,5 @@ bb502.loopexit.i:		; preds = %bb28.i
 	br i1 false, label %bb.nph53.i, label %bb508.i
 
 bb508.i:		; preds = %bb502.loopexit.i, %entry
-	ret i32 1
+	ret double %tmp10.i4
 }
diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll
new file mode 100644
index 0000000..6419292
--- /dev/null
+++ b/test/CodeGen/ARM/ctor_order.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
+
+; DARWIN:      .section	__DATA,__mod_init_func,mod_init_funcs
+; DARWIN:      .long _f151
+; DARWIN-NEXT: .long _f152
+
+; ELF:      .section .ctors.65384,"aw",%progbits
+; ELF:      .long    f151
+; ELF:      .section .ctors.65383,"aw",%progbits
+; ELF:      .long    f152
+
+; GNUEABI:      .section .init_array.151,"aw",%init_array
+; GNUEABI:      .long    f151
+; GNUEABI:      .section .init_array.152,"aw",%init_array
+; GNUEABI:      .long    f152
+
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ]
+
+define void @f151() {
+entry:
+        ret void
+}
+
+define void @f152() {
+entry:
+        ret void
+}
diff --git a/test/CodeGen/ARM/ctz.ll b/test/CodeGen/ARM/ctz.ll
index 1d2ced3..5ebca53 100644
--- a/test/CodeGen/ARM/ctz.ll
+++ b/test/CodeGen/ARM/ctz.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
 ; CHECK: rbit
 ; CHECK: clz
-  %tmp = call i32 @llvm.cttz.i32( i32 %a )
+  %tmp = call i32 @llvm.cttz.i32( i32 %a, i1 true )
   ret i32 %tmp
 }
diff --git a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
new file mode 100644
index 0000000..18f57ea
--- /dev/null
+++ b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -0,0 +1,30 @@
+; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+
+; CHECK: f:
+define float @f(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u16
+  ; CHECK-NOT: vand
+  %1 = load <4 x i16>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i16> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+define float @g(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  %1 = load <4 x i16>* %in
+  ; CHECK-NOT: uxth
+  %2 = extractelement <4 x i16> %1, i32 0
+  ; CHECK: vcvt.f32.u32
+  %3 = uitofp i16 %2 to float
+  ret float %3
+}
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index b0270f9..a7b44e6 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -2,7 +2,7 @@
 ; Test to check argument y's debug info uses FI
 ; Radar 10048772
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.tag_s = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 2c59316..0ad0a15 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -O0 < %s | FileCheck %s
-; CHECK: @DEBUG_VALUE: mydata <- [sp+#4]+#0
+; CHECK: @DEBUG_VALUE: mydata <- [sp+#{{[0-9]+}}]+#0
 ; Radar 9331779
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %0 = type opaque
 %1 = type { [4 x i32] }
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 8c9095e..325eea0 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s - | FileCheck %s
+; RUN: llc < %s | FileCheck %s
 ; Radar 9309221
 ; Test dwarf reg no for d16
 ;CHECK: DW_OP_regx
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index e83a83d..97c9c66 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -3,13 +3,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: DW_OP_regx for Q register: D1
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece 8
 ;CHECK-NEXT: byte   8
 ;CHECK-NEXT: DW_OP_regx for Q register: D2
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece 8
 ;CHECK-NEXT: byte   8
 
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index 548c9bd..db41143 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -2,8 +2,7 @@
 ; Radar 9309221
 ; Test dwarf reg no for s16
 ;CHECK: DW_OP_regx for S register
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_bit_piece 32 0
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index ee777ce..ae7af0a 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -4,11 +4,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp0
 ;CHECK-NEXT:        .long   Ltmp1
-;CHECK-NEXT:        .long   Ltmp2
-;CHECK-NEXT: Lset8 = Ltmp10-Ltmp9                    @ Loc expr size
-;CHECK-NEXT:        .short  Lset8
-;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]]        @ Loc expr size
+;CHECK-NEXT:        .short  Lset[[N]]
+;CHECK-NEXT: Ltmp[[M]]:
 ;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
 
 define void @_Z3foov() optsize ssp {
diff --git a/test/CodeGen/ARM/dg.exp b/test/CodeGen/ARM/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/ARM/eh-resume-darwin.ll b/test/CodeGen/ARM/eh-resume-darwin.ll
index e475508..d1252f4 100644
--- a/test/CodeGen/ARM/eh-resume-darwin.ll
+++ b/test/CodeGen/ARM/eh-resume-darwin.ll
@@ -3,12 +3,6 @@ target triple = "armv6-apple-macosx10.6"
 
 declare void @func()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
-declare void @llvm.eh.resume(i8*, i32)
-
 declare i32 @__gxx_personality_sj0(...)
 
 define void @test0() {
@@ -20,10 +14,9 @@ cont:
   ret void
 
 lpad:
-  %exn = call i8* @llvm.eh.exception()
-  %sel = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
-  call void @llvm.eh.resume(i8* %exn, i32 %sel) noreturn
-  unreachable
+  %exn = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           cleanup
+  resume { i8*, i32 } %exn
 }
 
 ; CHECK: __Unwind_SjLj_Resume
diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll
new file mode 100644
index 0000000..fd7d0e6
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-unwind.ll
@@ -0,0 +1,16 @@
+; Test that the EHABI unwind instruction generator does not encounter any
+; unfamiliar instructions.
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors
+
+define void @_Z1fv() nounwind {
+entry:
+  ret void
+}
+
+define void @_Z1gv() nounwind {
+entry:
+  call void @_Z1fv()
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
new file mode 100644
index 0000000..dbb634d
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
+%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
+
+@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4
+@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4
+@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4
+
+define i32* @t1() nounwind {
+entry:
+; ARM: t1
+; THUMB: t1
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #124
+; THUMB: adds r0, #124
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t2() nounwind {
+entry:
+; ARM: t2
+; THUMB: t2
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
+; ARM: movw r1, #1148
+; ARM: add r0, r0, r1
+; THUMB: addw r0, r0, #1148
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t3() nounwind {
+entry:
+; ARM: t3
+; THUMB: t3
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #140
+; THUMB: adds r0, #140
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t4() nounwind {
+entry:
+; ARM: t4
+; THUMB: t4
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
+; ARM-NOT: movw r{{[0-9]}}, #1060
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #132
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #36
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM: movw r{{[0-9]}}, #1284
+; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
new file mode 100644
index 0000000..723383e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test add with non-legal types
+
+define void @add_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: add_i1
+; THUMB: add_i1
+  %a.addr = alloca i1, align 4
+  %0 = add i1 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: add_i8
+; THUMB: add_i8
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: add_i16
+; THUMB: add_i16
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test or with non-legal types
+
+define void @or_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: or_i1
+; THUMB: or_i1
+  %a.addr = alloca i1, align 4
+  %0 = or i1 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: or_i8
+; THUMB: or_i8
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: or_i16
+; THUMB: or_i16
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test sub with non-legal types
+
+define void @sub_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: sub_i1
+; THUMB: sub_i1
+  %a.addr = alloca i1, align 4
+  %0 = sub i1 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: sub_i8
+; THUMB: sub_i8
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: sub_i16
+; THUMB: sub_i16
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
new file mode 100644
index 0000000..7c532d5
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
+entry:
+; THUMB: t1:
+; ARM: t1:
+  %x = add i32 %a, %b  
+  br i1 1, label %if.then, label %if.else
+; THUMB-NOT: b LBB0_1
+; ARM-NOT:  b LBB0_1
+
+if.then:                                          ; preds = %entry
+  call void @foo1()
+  br label %if.end7
+
+if.else:                                          ; preds = %entry
+  br i1 0, label %if.then2, label %if.else3
+; THUMB: b LBB0_4
+; ARM:  b LBB0_4
+
+if.then2:                                         ; preds = %if.else
+  call void @foo2()
+  br label %if.end6
+
+if.else3:                                         ; preds = %if.else
+  %y = sub i32 %a, %b
+  br i1 1, label %if.then5, label %if.end
+; THUMB-NOT: b LBB0_5
+; ARM-NOT:  b LBB0_5
+
+if.then5:                                         ; preds = %if.else3
+  call void @foo1()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then5, %if.else3
+  br label %if.end6
+
+if.end6:                                          ; preds = %if.end, %if.then2
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.end6, %if.then
+  ret i32 0
+}
+
+declare void @foo1()
+
+declare void @foo2()
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
new file mode 100644
index 0000000..a0aba69
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+
+; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
+; non-legal integer types (i.e., i1, i8, i16).
+
+declare void @fooi8(i8)
+declare void @fooi16(i16)
+
+define void @foo(i1 %cmp) nounwind ssp {
+entry:
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i1 [ 0, %cond.true ], [ 1, %cond.false ]
+  br i1 %cond, label %cond.true8, label %cond.false8
+
+cond.true8:                                       ; preds = %cond.end
+  br label %cond.end8
+
+cond.false8:                                      ; preds = %cond.end
+  br label %cond.end8
+
+cond.end8:                                        ; preds = %cond.false8, %cond.true8
+  %cond8 = phi i8 [ 0, %cond.true8 ], [ 1, %cond.false8 ]
+  call void @fooi8(i8 %cond8)
+  br i1 0, label %cond.true16, label %cond.false16
+
+cond.true16:                                       ; preds = %cond.end8
+  br label %cond.end16
+
+cond.false16:                                      ; preds = %cond.end8
+  br label %cond.end16
+
+cond.end16:                                        ; preds = %cond.false16, %cond.true16
+  %cond16 = phi i16 [ 0, %cond.true16 ], [ 1, %cond.false16 ]
+  call void @fooi16(i16 %cond16)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
new file mode 100644
index 0000000..dd460b2
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t0(i1 zeroext %a) nounwind {
+  %1 = zext i1 %a to i32
+  ret i32 %1
+}
+
+define i32 @t1(i8 signext %a) nounwind {
+  %1 = sext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t2(i8 zeroext %a) nounwind {
+  %1 = zext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t3(i16 signext %a) nounwind {
+  %1 = sext i16 %a to i32
+  ret i32 %1
+}
+
+define i32 @t4(i16 zeroext %a) nounwind {
+  %1 = zext i16 %a to i32
+  ret i32 %1
+}
+
+define void @foo(i8 %a, i16 %b) nounwind {
+; ARM: foo
+; THUMB: foo
+;; Materialize i1 1
+; ARM: movw r2, #1
+;; zero-ext
+; ARM: and r2, r2, #1
+; THUMB: and r2, r2, #1
+  %1 = call i32 @t0(i1 zeroext 1)
+; ARM: sxtb	r2, r1
+; ARM: mov r0, r2
+; THUMB: sxtb	r2, r1
+; THUMB: mov r0, r2
+  %2 = call i32 @t1(i8 signext %a)
+; ARM: uxtb	r2, r1
+; ARM: mov r0, r2
+; THUMB: uxtb	r2, r1
+; THUMB: mov r0, r2
+  %3 = call i32 @t2(i8 zeroext %a)
+; ARM: sxth	r2, r1
+; ARM: mov r0, r2
+; THUMB: sxth	r2, r1
+; THUMB: mov r0, r2
+  %4 = call i32 @t3(i16 signext %b)
+; ARM: uxth	r2, r1
+; ARM: mov r0, r2
+; THUMB: uxth	r2, r1
+; THUMB: mov r0, r2
+  %5 = call i32 @t4(i16 zeroext %b)
+
+;; A few test to check materialization
+;; Note: i1 1 was materialized with t1 call
+; ARM: movw r1, #255
+%6 = call i32 @t2(i8 zeroext 255)
+; ARM: movw r1, #65535
+; THUMB: movw r1, #65535
+%7 = call i32 @t4(i16 zeroext 65535)
+  ret void
+}
+
+define void @foo2() nounwind {
+  %1 = call signext i16 @t5()
+  %2 = call zeroext i16 @t6()
+  %3 = call signext i8 @t7()
+  %4 = call zeroext i8 @t8()
+  %5 = call zeroext i1 @t9()
+  ret void
+}
+
+declare signext i16 @t5();
+declare zeroext i16 @t6();
+declare signext i8 @t7();
+declare zeroext i8 @t8();
+declare zeroext i1 @t9();
+
+define i32 @t10(i32 %argc, i8** nocapture %argv) {
+entry:
+; ARM: @t10
+; ARM: movw r0, #0
+; ARM: movw r1, #248
+; ARM: movw r2, #187
+; ARM: movw r3, #28
+; ARM: movw r9, #40
+; ARM: movw r12, #186
+; ARM: uxtb r0, r0
+; ARM: uxtb r1, r1
+; ARM: uxtb r2, r2
+; ARM: uxtb r3, r3
+; ARM: uxtb r9, r9
+; ARM: str r9, [sp]
+; ARM: uxtb r9, r12
+; ARM: str r9, [sp, #4]
+; ARM: bl _bar
+; THUMB: @t10
+; THUMB: movs r0, #0
+; THUMB: movt r0, #0
+; THUMB: movs r1, #248
+; THUMB: movt r1, #0
+; THUMB: movs r2, #187
+; THUMB: movt r2, #0
+; THUMB: movs r3, #28
+; THUMB: movt r3, #0
+; THUMB: movw r9, #40
+; THUMB: movt r9, #0
+; THUMB: movw r12, #186
+; THUMB: movt r12, #0
+; THUMB: uxtb r0, r0
+; THUMB: uxtb r1, r1
+; THUMB: uxtb r2, r2
+; THUMB: uxtb r3, r3
+; THUMB: uxtb.w r9, r9
+; THUMB: str.w r9, [sp]
+; THUMB: uxtb.w r9, r12
+; THUMB: str.w r9, [sp, #4]
+; THUMB: bl _bar
+  %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
+  ret i32 0
+}
+
+declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
new file mode 100644
index 0000000..660156a
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -0,0 +1,250 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1a(float %a) uwtable ssp {
+entry:
+; ARM: t1a
+; THUMB: t1a
+  %cmp = fcmp oeq float %a, 0.000000e+00
+; ARM: vcmpe.f32 s{{[0-9]+}}, #0
+; THUMB: vcmpe.f32 s{{[0-9]+}}, #0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @foo()
+
+; Shouldn't be able to encode -0.0 imm.
+define void @t1b(float %a) uwtable ssp {
+entry:
+; ARM: t1b
+; THUMB: t1b
+  %cmp = fcmp oeq float %a, -0.000000e+00
+; ARM: vldr
+; ARM: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+; THUMB: vldr
+; THUMB: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t2a(double %a) uwtable ssp {
+entry:
+; ARM: t2a
+; THUMB: t2a
+  %cmp = fcmp oeq double %a, 0.000000e+00
+; ARM: vcmpe.f64 d{{[0-9]+}}, #0
+; THUMB: vcmpe.f64 d{{[0-9]+}}, #0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; Shouldn't be able to encode -0.0 imm.
+define void @t2b(double %a) uwtable ssp {
+entry:
+; ARM: t2b
+; THUMB: t2b
+  %cmp = fcmp oeq double %a, -0.000000e+00
+; ARM: vldr
+; ARM: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+; THUMB: vldr
+; THUMB: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t4(i8 signext %a) uwtable ssp {
+entry:
+; ARM: t4
+; THUMB: t4
+  %cmp = icmp eq i8 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t5(i8 zeroext %a) uwtable ssp {
+entry:
+; ARM: t5
+; THUMB: t5
+  %cmp = icmp eq i8 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t6(i16 signext %a) uwtable ssp {
+entry:
+; ARM: t6
+; THUMB: t6
+  %cmp = icmp eq i16 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t7(i16 zeroext %a) uwtable ssp {
+entry:
+; ARM: t7
+; THUMB: t7
+  %cmp = icmp eq i16 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t8(i32 %a) uwtable ssp {
+entry:
+; ARM: t8
+; THUMB: t8
+  %cmp = icmp eq i32 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t9(i32 %a) uwtable ssp {
+entry:
+; ARM: t9
+; THUMB: t9
+  %cmp = icmp eq i32 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t10(i32 %a) uwtable ssp {
+entry:
+; ARM: t10
+; THUMB: t10
+  %cmp = icmp eq i32 %a, 384
+; ARM: cmp r{{[0-9]}}, #384
+; THUMB: cmp.w r{{[0-9]}}, #384
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t11(i32 %a) uwtable ssp {
+entry:
+; ARM: t11
+; THUMB: t11
+  %cmp = icmp eq i32 %a, 4096
+; ARM: cmp r{{[0-9]}}, #4096
+; THUMB: cmp.w r{{[0-9]}}, #4096
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t12(i8 %a) uwtable ssp {
+entry:
+; ARM: t12
+; THUMB: t12
+  %cmp = icmp ugt i8 %a, -113
+; ARM: cmp r{{[0-9]}}, #143
+; THUMB: cmp r{{[0-9]}}, #143
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; rdar://11038907
+; When comparing LONG_MIN/INT_MIN use a cmp instruction.
+define void @t13() nounwind ssp {
+entry:
+; ARM: t13
+; THUMB: t13
+  %cmp = icmp slt i32 -123, -2147483648
+; ARM: cmp r{{[0-9]}}, #-2147483648
+; THUMB: cmp.w r{{[0-9]}}, #-2147483648
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
new file mode 100644
index 0000000..686ccad
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -0,0 +1,242 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test sitofp
+
+define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: sitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: sitofp_single_i16
+; ARM: sxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i16
+; THUMB: sxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: sitofp_single_i8
+; ARM: sxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i8
+; THUMB: sxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i16
+; ARM: sxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i16
+; THUMB: sxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i8
+; ARM: sxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i8
+; THUMB: sxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test uitofp
+
+define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: uitofp_single_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float(float %a) nounwind ssp {
+entry:
+; ARM: fptosi_float
+; ARM: vcvt.s32.f32 s0, s0
+; THUMB: fptosi_float
+; THUMB: vcvt.s32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double(double %a) nounwind ssp {
+entry:
+; ARM: fptosi_double
+; ARM: vcvt.s32.f64 s0, d16
+; THUMB: fptosi_double
+; THUMB: vcvt.s32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float(float %a) nounwind ssp {
+entry:
+; ARM: fptoui_float
+; ARM: vcvt.u32.f32 s0, s0
+; THUMB: fptoui_float
+; THUMB: vcvt.u32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double(double %a) nounwind ssp {
+entry:
+; ARM: fptoui_double
+; ARM: vcvt.u32.f64 s0, d16
+; THUMB: fptoui_double
+; THUMB: vcvt.u32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
new file mode 100644
index 0000000..7e147c7
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Target-specific selector can't properly handle the double because it isn't
+; being passed via a register, so the materialized arguments become dead code.
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+; THUMB: main
+  call void @printArgsNoRet(i32 1, float 0x4000CCCCC0000000, i8 signext 99, double 4.100000e+00)
+; THUMB: blx _printArgsNoRet
+; THUMB-NOT: ldr
+; THUMB-NOT: vldr
+; THUMB-NOT: vmov
+; THUMB-NOT: ldr
+; THUMB-NOT: sxtb
+; THUMB: movs r0, #0
+; THUMB: movt r0, #0
+; THUMB: pop
+  ret i32 0
+}
+
+declare void @printArgsNoRet(i32 %a1, float %a2, i8 signext %a3, double %a4)
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
new file mode 100644
index 0000000..61bd185
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+@a = global i8 1, align 1
+@b = global i16 2, align 2
+
+define void @t1() nounwind uwtable ssp {
+; ARM: t1
+; ARM: ldrb
+; ARM-NOT: uxtb
+; THUMB: t1
+; THUMB: ldrb
+; THUMB-NOT: uxtb
+  %1 = load i8* @a, align 1
+  call void @foo1(i8 zeroext %1)
+  ret void
+}
+
+define void @t2() nounwind uwtable ssp {
+; ARM: t2
+; ARM: ldrh
+; ARM-NOT: uxth
+; THUMB: t2
+; THUMB: ldrh
+; THUMB-NOT: uxth
+  %1 = load i16* @b, align 2
+  call void @foo2(i16 zeroext %1)
+  ret void
+}
+
+declare void @foo1(i8 zeroext)
+declare void @foo2(i16 zeroext)
+
+define i32 @t3() nounwind uwtable ssp {
+; ARM: t3
+; ARM: ldrb
+; ARM-NOT: uxtb
+; THUMB: t3
+; THUMB: ldrb
+; THUMB-NOT: uxtb
+  %1 = load i8* @a, align 1
+  %2 = zext i8 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t4() nounwind uwtable ssp {
+; ARM: t4
+; ARM: ldrh
+; ARM-NOT: uxth
+; THUMB: t4
+; THUMB: ldrh
+; THUMB-NOT: uxth
+  %1 = load i16* @b, align 2
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t5() nounwind uwtable ssp {
+; ARM: t5
+; ARM: ldrsh
+; ARM-NOT: sxth
+; THUMB: t5
+; THUMB: ldrsh
+; THUMB-NOT: sxth
+  %1 = load i16* @b, align 2
+  %2 = sext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t6() nounwind uwtable ssp {
+; ARM: t6
+; ARM: ldrsb
+; ARM-NOT: sxtb
+; THUMB: t6
+; THUMB: ldrsb
+; THUMB-NOT: sxtb
+  %1 = load i8* @a, align 2
+  %2 = sext i8 %1 to i32
+  ret i32 %2
+}
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
new file mode 100644
index 0000000..8764bef
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
+entry:
+; ARM: icmp_i16_unsigned
+; ARM: uxth r0, r0
+; ARM: uxth r1, r1
+; ARM: cmp	r0, r1
+; THUMB: icmp_i16_unsigned
+; THUMB: uxth r0, r0
+; THUMB: uxth r1, r1
+; THUMB: cmp	r0, r1
+  %cmp = icmp ult i16 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i8_signed(i8 %a, i8 %b) nounwind {
+entry:
+; ARM: icmp_i8_signed
+; ARM: sxtb r0, r0
+; ARM: sxtb r1, r1
+; ARM: cmp r0, r1
+; THUMB: icmp_i8_signed
+; THUMB: sxtb r0, r0
+; THUMB: sxtb r1, r1
+; THUMB: cmp r0, r1
+  %cmp = icmp sgt i8 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i1_unsigned(i1 %a, i1 %b) nounwind {
+entry:
+; ARM: icmp_i1_unsigned
+; ARM: and r0, r0, #1
+; ARM: and r1, r1, #1
+; ARM: cmp r0, r1
+; THUMB: icmp_i1_unsigned
+; THUMB: and r0, r0, #1
+; THUMB: and r1, r1, #1
+; THUMB: cmp r0, r1
+  %cmp = icmp ult i1 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
new file mode 100644
index 0000000..be8035e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1(i8* %x) {
+entry:
+; ARM: t1
+; THUMB: t1
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+; ARM: bx r0
+; THUMB: mov pc, r0
+}
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
new file mode 100644
index 0000000..e6bdfa7
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
+@temp = common global [60 x i8] zeroinitializer, align 1
+
+define void @t1() nounwind ssp {
+; ARM: t1
+; ARM: movw r0, :lower16:_message1
+; ARM: movt r0, :upper16:_message1
+; ARM: add r0, r0, #5
+; ARM: movw r1, #64
+; ARM: movw r2, #10
+; ARM: uxtb r1, r1
+; ARM: bl _memset
+; THUMB: t1
+; THUMB: movw r0, :lower16:_message1
+; THUMB: movt r0, :upper16:_message1
+; THUMB: adds r0, #5
+; THUMB: movs r1, #64
+; THUMB: movt r1, #0
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: uxtb r1, r1
+; THUMB: bl _memset
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t2() nounwind ssp {
+; ARM: t2
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: movw r2, #17
+; ARM: str r0, [sp]                @ 4-byte Spill
+; ARM: mov r0, r1
+; ARM: ldr r1, [sp]                @ 4-byte Reload
+; ARM: bl _memcpy
+; THUMB: t2
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: movs r2, #17
+; THUMB: movt r2, #0
+; THUMB: mov r0, r1
+; THUMB: bl _memcpy
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @t3() nounwind ssp {
+; ARM: t3
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: movw r2, #10
+; ARM: mov r0, r1
+; ARM: bl _memmove
+; THUMB: t3
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: mov r0, r1
+; THUMB: bl _memmove
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+define void @t4() nounwind ssp {
+; ARM: t4
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: ldr r1, [r0, #16]
+; ARM: str r1, [r0, #4]
+; ARM: ldr r1, [r0, #20]
+; ARM: str r1, [r0, #8]
+; ARM: ldrh r1, [r0, #24]
+; ARM: strh r1, [r0, #12]
+; ARM: bx lr
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: ldr r1, [r0, #16]
+; THUMB: str r1, [r0, #4]
+; THUMB: ldr r1, [r0, #20]
+; THUMB: str r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #24]
+; THUMB: strh r1, [r0, #12]
+; THUMB: bx lr
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
new file mode 100644
index 0000000..dfb8c53
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
+
+define i32 @t1(i32* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t1
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 1
+  %0 = load i32* %add.ptr, align 4
+; ARM: ldr r{{[0-9]}}, [r0, #4]
+  ret i32 %0
+}
+
+define i32 @t2(i32* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t2
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 63
+  %0 = load i32* %add.ptr, align 4
+; ARM: ldr.w r{{[0-9]}}, [r0, #252]
+  ret i32 %0
+}
+
+define zeroext i16 @t3(i16* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t3
+  %add.ptr = getelementptr inbounds i16* %ptr, i16 1
+  %0 = load i16* %add.ptr, align 4
+; ARM: ldrh r{{[0-9]}}, [r0, #2]
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t4
+  %add.ptr = getelementptr inbounds i16* %ptr, i16 63
+  %0 = load i16* %add.ptr, align 4
+; ARM: ldrh.w r{{[0-9]}}, [r0, #126]
+  ret i16 %0
+}
+
+define zeroext i8 @t5(i8* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t5
+  %add.ptr = getelementptr inbounds i8* %ptr, i8 1
+  %0 = load i8* %add.ptr, align 4
+; ARM: ldrb r{{[0-9]}}, [r0, #1]
+  ret i8 %0
+}
+
+define zeroext i8 @t6(i8* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t6
+  %add.ptr = getelementptr inbounds i8* %ptr, i8 63
+  %0 = load i8* %add.ptr, align 4
+; ARM: ldrb.w r{{[0-9]}}, [r0, #63]
+  ret i8 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
new file mode 100644
index 0000000..2a88678
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -0,0 +1,168 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t1
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0, #-4]
+  ret i32 %0
+}
+
+define i32 @t2(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t2
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0, #-252]
+  ret i32 %0
+}
+
+define i32 @t3(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t3
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0]
+  ret i32 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t4
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
+  ret i16 %0
+}
+
+define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t5
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
+  ret i16 %0
+}
+
+define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t6
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0]
+  ret i16 %0
+}
+
+define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t7
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
+  ret i8 %0
+}
+
+define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t8
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
+  ret i8 %0
+}
+
+define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t9
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0]
+  ret i8 %0
+}
+
+define void @t10(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t10
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0, #-4]
+  ret void
+}
+
+define void @t11(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t11
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0, #-252]
+  ret void
+}
+
+define void @t12(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t12
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0]
+  ret void
+}
+
+define void @t13(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t13
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0, #-2]
+  ret void
+}
+
+define void @t14(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t14
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0, #-254]
+  ret void
+}
+
+define void @t15(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t15
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0]
+  ret void
+}
+
+define void @t16(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t16
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0, #-1]
+  ret void
+}
+
+define void @t17(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t17
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0, #-255]
+  ret void
+}
+
+define void @t18(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t18
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0]
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
new file mode 100644
index 0000000..e8cc2b2
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -0,0 +1,149 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; rdar://10418009
+
+define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t1
+  %add.ptr = getelementptr inbounds i16* %a, i64 -8
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-16]
+  ret i16 %0
+}
+
+define zeroext i16 @t2(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t2
+  %add.ptr = getelementptr inbounds i16* %a, i64 -16
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-32]
+  ret i16 %0
+}
+
+define zeroext i16 @t3(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t3
+  %add.ptr = getelementptr inbounds i16* %a, i64 -127
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-254]
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t4
+  %add.ptr = getelementptr inbounds i16* %a, i64 -128
+  %0 = load i16* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: ldrh r0, [r0]
+  ret i16 %0
+}
+
+define zeroext i16 @t5(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t5
+  %add.ptr = getelementptr inbounds i16* %a, i64 8
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #16]
+  ret i16 %0
+}
+
+define zeroext i16 @t6(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t6
+  %add.ptr = getelementptr inbounds i16* %a, i64 16
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #32]
+  ret i16 %0
+}
+
+define zeroext i16 @t7(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t7
+  %add.ptr = getelementptr inbounds i16* %a, i64 127
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #254]
+  ret i16 %0
+}
+
+define zeroext i16 @t8(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t8
+  %add.ptr = getelementptr inbounds i16* %a, i64 128
+  %0 = load i16* %add.ptr, align 2
+; ARM: add r0, r0, #256
+; ARM: ldrh r0, [r0]
+  ret i16 %0
+}
+
+define void @t9(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t9
+  %add.ptr = getelementptr inbounds i16* %a, i64 -8
+  store i16 0, i16* %add.ptr, align 2
+; ARM: strh	r1, [r0, #-16]
+  ret void
+}
+
+; mvn r1, #255
+; strh r2, [r0, r1]
+define void @t10(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t10
+  %add.ptr = getelementptr inbounds i16* %a, i64 -128
+  store i16 0, i16* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: strh r{{[1-9]}}, [r0]
+  ret void
+}
+
+define void @t11(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t11
+  %add.ptr = getelementptr inbounds i16* %a, i64 8
+  store i16 0, i16* %add.ptr, align 2
+; ARM strh r{{[1-9]}}, [r0, #16]
+  ret void
+}
+
+; mov r1, #256
+; strh r2, [r0, r1]
+define void @t12(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t12
+  %add.ptr = getelementptr inbounds i16* %a, i64 128
+  store i16 0, i16* %add.ptr, align 2
+; ARM: add r0, r0, #256
+; ARM: strh r{{[1-9]}}, [r0]
+  ret void
+}
+
+define signext i8 @t13(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t13
+  %add.ptr = getelementptr inbounds i8* %a, i64 -8
+  %0 = load i8* %add.ptr, align 2
+; ARM: ldrsb r0, [r0, #-8]
+  ret i8 %0
+}
+
+define signext i8 @t14(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t14
+  %add.ptr = getelementptr inbounds i8* %a, i64 -255
+  %0 = load i8* %add.ptr, align 2
+; ARM: ldrsb r0, [r0, #-255]
+  ret i8 %0
+}
+
+define signext i8 @t15(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t15
+  %add.ptr = getelementptr inbounds i8* %a, i64 -256
+  %0 = load i8* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: ldrsb r0, [r0]
+  ret i8 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
new file mode 100644
index 0000000..b180e43
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; rdar://10412592
+
+; Note: The Thumb code is being generated by the target-independent selector.
+
+define void @t1() nounwind {
+entry:
+; ARM: t1
+; THUMB: t1
+; ARM: mvn r0, #0
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #65535
+  call void @foo(i32 -1)
+  ret void
+}
+
+declare void @foo(i32)
+
+define void @t2() nounwind {
+entry:
+; ARM: t2
+; THUMB: t2
+; ARM: mvn r0, #233
+; THUMB: movw r0, #65302
+; THUMB: movt r0, #65535
+  call void @foo(i32 -234)
+  ret void
+}
+
+define void @t3() nounwind {
+entry:
+; ARM: t3
+; THUMB: t3
+; ARM: mvn	r0, #256
+; THUMB: movw r0, #65279
+; THUMB: movt r0, #65535
+  call void @foo(i32 -257)
+  ret void
+}
+
+; Load from constant pool
+define void @t4() nounwind {
+entry:
+; ARM: t4
+; THUMB: t4
+; ARM: ldr	r0
+; THUMB: movw r0, #65278
+; THUMB: movt r0, #65535
+  call void @foo(i32 -258)
+  ret void
+}
+
+define void @t5() nounwind {
+entry:
+; ARM: t5
+; THUMB: t5
+; ARM: mvn r0, #65280
+; THUMB: movs r0, #255
+; THUMB: movt r0, #65535
+  call void @foo(i32 -65281)
+  ret void
+}
+
+define void @t6() nounwind {
+entry:
+; ARM: t6
+; THUMB: t6
+; ARM: mvn r0, #978944
+; THUMB: movw r0, #4095
+; THUMB: movt r0, #65521
+  call void @foo(i32 -978945)
+  ret void
+}
+
+define void @t7() nounwind {
+entry:
+; ARM: t7
+; THUMB: t7
+; ARM: mvn r0, #267386880
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #61455
+  call void @foo(i32 -267386881)
+  ret void
+}
+
+define void @t8() nounwind {
+entry:
+; ARM: t8
+; THUMB: t8
+; ARM: mvn r0, #65280
+; THUMB: movs r0, #255
+; THUMB: movt r0, #65535
+  call void @foo(i32 -65281)
+  ret void
+}
+
+define void @t9() nounwind {
+entry:
+; ARM: t9
+; THUMB: t9
+; ARM: mvn r0, #2130706432
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #33023
+  call void @foo(i32 -2130706433)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 08dcc64..e50c3a4 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=linearscan < %s
+; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
new file mode 100644
index 0000000..689b169
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Sign-extend of i1 currently not supported by fast-isel
+;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
+;entry:
+;  ret i1 %a
+;}
+
+define zeroext i1 @ret1(i1 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret1
+; CHECK: and r0, r0, #1
+; CHECK: bx lr
+  ret i1 %a
+}
+
+define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret2
+; CHECK: sxtb r0, r0
+; CHECK: bx lr
+  ret i8 %a
+}
+
+define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret3
+; CHECK: uxtb r0, r0
+; CHECK: bx lr
+  ret i8 %a
+}
+
+define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret4
+; CHECK: sxth r0, r0
+; CHECK: bx lr
+  ret i16 %a
+}
+
+define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret5
+; CHECK: uxth r0, r0
+; CHECK: bx lr
+  ret i16 %a
+}
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret6
+; CHECK-NOT: uxth
+; CHECK-NOT: sxth
+; CHECK: bx lr
+  ret i16 %a
+}
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
new file mode 100644
index 0000000..b83a733
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i1 %c) nounwind readnone {
+entry:
+; ARM: t1
+; ARM: movw r{{[1-9]}}, #10
+; ARM: cmp r0, #0
+; ARM: moveq r{{[1-9]}}, #20
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t1
+; THUMB: movs r{{[1-9]}}, #10
+; THUMB: movt r{{[1-9]}}, #0
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: moveq r{{[1-9]}}, #20
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 10, i32 20
+  ret i32 %0
+}
+
+define i32 @t2(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t2
+; ARM: cmp r0, #0
+; ARM: moveq r{{[1-9]}}, #20
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t2
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: moveq r{{[1-9]}}, #20
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 20
+  ret i32 %0
+}
+
+define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
+entry:
+; ARM: t3
+; ARM: cmp r0, #0
+; ARM: movne r{{[1-9]}}, r{{[1-9]}}
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t3
+; THUMB: cmp r0, #0
+; THUMB: it ne
+; THUMB: movne r{{[1-9]}}, r{{[1-9]}}
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 %b
+  ret i32 %0
+}
+
+define i32 @t4(i1 %c) nounwind readnone {
+entry:
+; ARM: t4
+; ARM: mvn r{{[1-9]}}, #9
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #0
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t4
+; THUMB: movw r{{[1-9]}}, #65526
+; THUMB: movt r{{[1-9]}}, #65535
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #0
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 -10, i32 -1
+  ret i32 %0
+}
+
+define i32 @t5(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t5
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #1
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t5
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #1
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 -2
+  ret i32 %0
+}
+
+; Check one large negative immediates.
+define i32 @t6(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t6
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #978944
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t6
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #978944
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 -978945
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 465e85f..417e2d9 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Very basic fast-isel functionality.
 define i32 @add(i32 %a, i32 %b) nounwind {
@@ -142,19 +142,87 @@ define void @test4() {
   store i32 %b, i32* @test4g
   ret void
 
-; THUMB: ldr.n r0, LCPI4_1
+; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
-; THUMB: ldr r0, [r0]
-; THUMB: adds r0, #1
-; THUMB: ldr.n r1, LCPI4_0
-; THUMB: ldr r1, [r1]
-; THUMB: str r0, [r1]
+; THUMB: ldr r1, [r0]
+; THUMB: adds r1, #1
+; THUMB: str r1, [r0]
 
-; ARM: ldr r0, LCPI4_1
-; ARM: ldr r0, [r0]
+; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
+; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; ARM: ldr r0, [r0]
-; ARM: add r0, r0, #1
-; ARM: ldr r1, LCPI4_0
-; ARM: ldr r1, [r1]
-; ARM: str r0, [r1]
+; ARM: ldr r1, [r0]
+; ARM: add r1, r1, #1
+; ARM: str r1, [r0]
+}
+
+; Check unaligned stores
+%struct.anon = type <{ float }>
+
+@a = common global %struct.anon* null, align 4
+
+define void @unaligned_store(float %x, float %y) nounwind {
+entry:
+; ARM: @unaligned_store
+; ARM: vmov r1, s0
+; ARM: str r1, [r0]
+
+; THUMB: @unaligned_store
+; THUMB: vmov r1, s0
+; THUMB: str r1, [r0]
+
+  %add = fadd float %x, %y
+  %0 = load %struct.anon** @a, align 4
+  %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
+  store float %add, float* %x1, align 1
+  ret void
+}
+
+; Doublewords require only word-alignment.
+; rdar://10528060
+%struct.anon.0 = type { double }
+
+@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
+
+define void @test5(double %a, double %b) nounwind {
+entry:
+; ARM: @test5
+; THUMB: @test5
+  %add = fadd double %a, %b
+  store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
+; ARM: vstr d16, [r0]
+; THUMB: vstr d16, [r0]
+  ret void
+}
+
+; Check unaligned loads of floats
+%class.TAlignTest = type <{ i16, float }>
+
+define zeroext i1 @test6(%class.TAlignTest* %this) nounwind align 2 {
+entry:
+; ARM: @test6
+; THUMB: @test6
+  %0 = alloca %class.TAlignTest*, align 4
+  store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
+  %1 = load %class.TAlignTest** %0
+  %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
+  %3 = load float* %2, align 1
+  %4 = fcmp une float %3, 0.000000e+00
+; ARM: ldr r0, [r0, #2]
+; ARM: vmov s0, r0
+; ARM: vcmpe.f32 s0, #0
+; THUMB: ldr.w r0, [r0, #2]
+; THUMB: vmov s0, r0
+; THUMB: vcmpe.f32 s0, #0
+  ret i1 %4
+}
+
+; ARM: @urem_fold
+; THUMB: @urem_fold
+; ARM: and r0, r0, #31
+; THUMB: and r0, r0, #31
+define i32 @urem_fold(i32 %a) nounwind {
+  %rem = urem i32 %a, 32
+  ret i32 %rem
 }
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index c4dbeb9..27fa2b0 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
-; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
 
 ; rdar://8984306
 define float @test1(float %x, float %y) nounwind {
@@ -40,28 +40,12 @@ entry:
   ret double %1
 }
 
-; rdar://9059537
-define i32 @test4() ssp {
-entry:
-; SOFT: test4:
-; SOFT: vmov.f64 [[REG4:(d[0-9]+)]], #1.000000e+00
-; This S-reg must be the first sub-reg of the last D-reg on vbsl.
-; SOFT: vcvt.f32.f64 {{s1?[02468]}}, [[REG4]]
-; SOFT: vshr.u64 [[REG4]], [[REG4]], #32
-; SOFT: vmov.i32 [[REG5:(d[0-9]+)]], #0x80000000
-; SOFT: vbsl [[REG5]], [[REG4]], {{d[0-9]+}}
-  %call80 = tail call double @copysign(double 1.000000e+00, double undef)
-  %conv81 = fptrunc double %call80 to float
-  %tmp88 = bitcast float %conv81 to i32
-  ret i32 %tmp88
-}
-
 ; rdar://9287902
-define float @test5() nounwind {
+define float @test4() nounwind {
 entry:
-; SOFT: test5:
-; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
+; SOFT: test4:
 ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
+; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
 ; SOFT: vbsl [[REG6]], [[REG7]], 
   %0 = tail call double (...)* @bar() nounwind
diff --git a/test/CodeGen/ARM/fold-const.ll b/test/CodeGen/ARM/fold-const.ll
index 227e4e8..1ba561d 100644
--- a/test/CodeGen/ARM/fold-const.ll
+++ b/test/CodeGen/ARM/fold-const.ll
@@ -3,7 +3,7 @@
 define i32 @f(i32 %a) nounwind readnone optsize ssp {
 entry:
   %conv = zext i32 %a to i64
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv, i1 true)
 ; CHECK: clz
 ; CHECK-NOT: adds
   %cast = trunc i64 %tmp1 to i32
@@ -11,4 +11,4 @@ entry:
   ret i32 %sub
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index ac023d1..93601cf 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -42,7 +42,7 @@ entry:
 
 define double @h(double* %v) {
 ;CHECK: h:
-;CHECK: vldr.64
+;CHECK: vldr
 ;CHECK-NEXT: vmov
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 7c0dd0e..2d8f710 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,24 +1,16 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s
 ; rdar://7461510
+; rdar://10964603
 
+; Disable this optimization unless we know one of them is zero.
 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t1:
-; FINITE-NOT: vldr
-; FINITE: ldr
-; FINITE: ldr
-; FINITE: cmp r0, r1
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: beq
-
-; NAN: t1:
-; NAN: vldr.32 s0,
-; NAN: vldr.32 s1,
-; NAN: vcmpe.f32 s1, s0
-; NAN: vmrs apsr_nzcv, fpscr
-; NAN: beq
+; CHECK: t1:
+; CHECK: vldr [[S0:s[0-9]+]],
+; CHECK: vldr [[S1:s[0-9]+]],
+; CHECK: vcmpe.f32 [[S1]], [[S0]]
+; CHECK: vmrs APSR_nzcv, fpscr
+; CHECK: beq
   %0 = load float* %a
   %1 = load float* %b
   %2 = fcmp une float %0, %1
@@ -33,17 +25,21 @@ bb2:
   ret i32 %4
 }
 
+; If one side is zero, the other size sign bit is masked off to allow
+; +0.0 == -0.0
 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
 entry:
-; FINITE: t2:
-; FINITE-NOT: vldr
-; FINITE: ldrd r0, r1, [r0]
-; FINITE-NOT: b LBB
-; FINITE: cmp r0, #0
-; FINITE: cmpeq r1, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t2:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG1:(r[0-9]+)]], [r0]
+; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4]
+; CHECK-NOT: b LBB
+; CHECK: cmp [[REG1]], #0
+; CHECK: bfc [[REG2]], #31, #1
+; CHECK: cmpeq [[REG2]], #0
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load double* %a
   %1 = fcmp oeq double %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
@@ -59,13 +55,14 @@ bb2:
 
 define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t3:
-; FINITE-NOT: vldr
-; FINITE: ldr r0, [r0]
-; FINITE: cmp r0, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t3:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
+; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
+; CHECK: tst [[REG3]], [[REG4]]
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load float* %a
   %1 = fcmp oeq float %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 2e6b3e3..4a4c5b1 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -5,7 +5,7 @@ define i32 @f7(float %a, float %b) {
 entry:
 ; CHECK: f7:
 ; CHECK: vcmpe.f32
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: movweq
 ; CHECK-NOT: vmrs
 ; CHECK: movwvs
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 3833933..8faa578 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -8,7 +8,7 @@ define float @f1(float %a) {
 
 define float @f2(float* %v, float %u) {
 ; CHECK: f2:
-; CHECK: vldr.32{{.*}}[
+; CHECK: vldr{{.*}}[
         %tmp = load float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
@@ -16,7 +16,7 @@ define float @f2(float* %v, float %u) {
 
 define float @f2offset(float* %v, float %u) {
 ; CHECK: f2offset:
-; CHECK: vldr.32{{.*}}, #4]
+; CHECK: vldr{{.*}}, #4]
         %addr = getelementptr float* %v, i32 1
         %tmp = load float* %addr
         %tmp1 = fadd float %tmp, %u
@@ -25,7 +25,7 @@ define float @f2offset(float* %v, float %u) {
 
 define float @f2noffset(float* %v, float %u) {
 ; CHECK: f2noffset:
-; CHECK: vldr.32{{.*}}, #-4]
+; CHECK: vldr{{.*}}, #-4]
         %addr = getelementptr float* %v, i32 -1
         %tmp = load float* %addr
         %tmp1 = fadd float %tmp, %u
@@ -34,7 +34,7 @@ define float @f2noffset(float* %v, float %u) {
 
 define void @f3(float %a, float %b, float* %v) {
 ; CHECK: f3:
-; CHECK: vstr.32{{.*}}[
+; CHECK: vstr{{.*}}[
         %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
         ret void
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
new file mode 100644
index 0000000..a8b3999
--- /dev/null
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -0,0 +1,185 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
+; Check generated fused MAC and MLS.
+
+define double @fusedMACTest1(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest1:
+;CHECK: vfma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fadd double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest2(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest2:
+;CHECK: vfma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fadd float %1, %f3
+  ret float %2
+}
+
+define double @fusedMACTest3(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest3:
+;CHECK: vfms.f64
+  %1 = fmul double %d2, %d3
+  %2 = fsub double %d1, %1
+  ret double %2
+}
+
+define float @fusedMACTest4(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest4:
+;CHECK: vfms.f32
+  %1 = fmul float %f2, %f3
+  %2 = fsub float %f1, %1
+  ret float %2
+}
+
+define double @fusedMACTest5(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest5:
+;CHECK: vfnma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double -0.0, %1
+  %3 = fsub double %2, %d3
+  ret double %3
+}
+
+define float @fusedMACTest6(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest6:
+;CHECK: vfnma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float -0.0, %1
+  %3 = fsub float %2, %f3
+  ret float %3
+}
+
+define double @fusedMACTest7(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest7:
+;CHECK: vfnms.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest8(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest8:
+;CHECK: vfnms.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float %1, %f3
+  ret float %2
+}
+
+define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest9:
+;CHECK: vfma.f32
+  %mul = fmul <2 x float> %a, %b
+  %add = fadd <2 x float> %mul, %a
+  ret <2 x float> %add
+}
+
+define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest10:
+;CHECK: vfms.f32
+  %mul = fmul <2 x float> %a, %b
+  %sub = fsub <2 x float> %a, %mul
+  ret <2 x float> %sub
+}
+
+define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest11:
+;CHECK: vfma.f32
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %a
+  ret <4 x float> %add
+}
+
+define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest12:
+;CHECK: vfms.f32
+  %mul = fmul <4 x float> %a, %b
+  %sub = fsub <4 x float> %a, %mul
+  ret <4 x float> %sub
+}
+
+define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_f32
+; CHECK: vfma.f32
+  %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  ret float %tmp1
+}
+
+define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_f64
+; CHECK: vfma.f64
+  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  ret double %tmp1
+}
+
+define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_v2f32
+; CHECK: vfma.f32
+  %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
+  ret <2 x float> %tmp1
+}
+
+define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fms_f64
+; CHECK: vfms.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
+  ret double %tmp2
+}
+
+define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fms_f64_2
+; CHECK: vfms.f64
+  %tmp1 = fsub double -0.0, %b
+  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
+}
+
+define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnms_f64
+; CHECK: vfnms.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
+}
+
+define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnms_f64_2
+; CHECK: vfnms.f64
+  %tmp1 = fsub double -0.0, %b
+  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
+  ret double %tmp2
+}
+
+define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnma_f64
+; CHECK: vfnma.f64
+  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  %tmp2 = fsub double -0.0, %tmp1
+  ret double %tmp2
+}
+
+define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnma_f64_2
+; CHECK: vfnma.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = fsub double -0.0, %c
+  %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone
+  ret double %tmp3
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
index 28bf221..1732df3 100644
--- a/test/CodeGen/ARM/global-merge.ll
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -14,7 +14,7 @@
 ; offset.  Having the starting offset in range is not sufficient.
 ; When this works properly, @g3 is placed in a separate chunk of merged globals.
 ; CHECK: _MergedGlobals1:
-@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ]
+@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ], align 4
 
 ; Global variables that can be placed in BSS should be kept together in a
 ; separate pool of merged globals.
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 5e7e3f2..eb71149 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -70,6 +70,5 @@ define i32 @test1() {
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_0:
 ; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0_0+8)
-; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_1:
 ; LinuxPIC:     .long	G(GOT)
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 9f46ae0..893b426 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1
 ; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 2
-; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index b073a05..cd870bb 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,15 +1,17 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
-; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b) {
+; CHECK: t1:
 	%tmp2 = icmp eq i32 %a, 0
 	br i1 %tmp2, label %cond_false, label %cond_true
 
 cond_true:
+; CHECK: subeq r0, r1, #1
 	%tmp5 = add i32 %b, 1
 	ret i32 %tmp5
 
 cond_false:
+; CHECK: addne r0, r1, #1
 	%tmp7 = add i32 %b, -1
 	ret i32 %tmp7
 }
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 18f87bf..a5082d8 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; rdar://8402126
 ; Make sure if-converter is not predicating vldmia and ldmia. These are
 ; micro-coded and would have long issue latency even if predicated on
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
index 63f8557..0f142ee 100644
--- a/test/CodeGen/ARM/ifcvt11.ll
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -18,7 +18,7 @@ bb.nph:                                           ; preds = %entry
 
 bb:                                               ; preds = %bb4, %bb.nph
 ; CHECK: vcmpe.f64
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
   %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
   %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
   %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
@@ -33,7 +33,7 @@ bb1:                                              ; preds = %bb
 ; CHECK-NOT: vcmpemi
 ; CHECK-NOT: vmrsmi
 ; CHECK: vcmpe.f64
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
   %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
   %6 = load double* %scevgep12, align 4
   %7 = fcmp uge double %3, %6
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 3e2c578..eef4de0 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,14 +1,19 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: cmp r2, #1
+; CHECK: cmpne r2, #7
 	switch i32 %c, label %cond_next [
 		 i32 1, label %cond_true
 		 i32 7, label %cond_true
 	]
 
 cond_true:
+; CHECK: addne r0
+; CHECK: bxne
 	%tmp12 = add i32 %a, 1
 	%tmp1518 = add i32 %tmp12, %b
 	ret i32 %tmp1518
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 3615055..95f5c97 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 2327657..a00deda 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index cb5243c..2fcc45f 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -110,3 +110,13 @@ entry:
   call void asm "str $1, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
   ret void
 }
+
+; Radar 10551006
+
+define <4 x i32> @t11(i32* %p) nounwind {
+entry:
+; CHECK: t11
+; CHECK: vld1.s32 {d16[], d17[]}, [r0]
+  %0 = tail call <4 x i32> asm "vld1.s32 {${0:e}[], ${0:f}[]}, [$1]", "=w,r"(i32* %p) nounwind
+  ret <4 x i32> %0
+}
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index 1d32322..d188fae 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\
 ; RUN:   grep mov | count 3
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ldrd-memoper.ll b/test/CodeGen/ARM/ldrd-memoper.ll
new file mode 100644
index 0000000..f1a1121
--- /dev/null
+++ b/test/CodeGen/ARM/ldrd-memoper.ll
@@ -0,0 +1,15 @@
+; RUN: llc %s -o /dev/null -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -debug-only=arm-ldst-opt 2> %t
+; RUN: FileCheck %s < %t
+; REQUIRES: asserts
+; PR8113: ARMLoadStoreOptimizer must preserve memoperands.
+
+@b = external global i64*
+
+; CHECK: Formed {{.*}} t2LDRD{{.*}} mem:LD4[%0] LD4[%0+4]
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 8010f20..3f8fd75 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,24 +1,70 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V6
-; RUN: llc < %s -mtriple=armv5-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V5
-; RUN: llc < %s -mtriple=armv6-eabi -regalloc=linearscan | FileCheck %s -check-prefix=EABI
-; rdar://r6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
+; rdar://6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
 
-; Magic ARM pair hints works best with linearscan.
+; Magic ARM pair hints works best with linearscan / fast.
+
+; Cortex-M3 errata 602117: LDRD with base in list may result in incorrect base
+; register when interrupted or faulted.
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-;V6:   ldrd r2, r3, [r2]
-
-;V5:   ldr r{{[0-9]+}}, [r2]
-;V5:   ldr r{{[0-9]+}}, [r2, #4]
+; A8: t:
+; A8:   ldrd r2, r3, [r2]
 
-;EABI: ldr r{{[0-9]+}}, [r2]
-;EABI: ldr r{{[0-9]+}}, [r2, #4]
+; M3: t:
+; M3-NOT: ldrd
+; M3: ldm.w r2, {r2, r3}
 
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
+
+; rdar://10435045 mixed LDRi8/LDRi12
+;
+; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be
+; able to generate an LDRD pair here, but this is highly sensitive to
+; regalloc hinting. So, this doubles as a register allocation
+; test. RABasic currently does a better job within the inner loop
+; because of its *lack* of hinting ability. Whereas RAGreedy keeps
+; R0/R1/R2 live as the three arguments, forcing the LDRD's odd
+; destination into R3. We then sensibly split LDRD again rather then
+; evict another live range or use callee saved regs. Sorry if the test
+; is sensitive to Regalloc changes, but it is an interesting case.
+;
+; BASIC: @f
+; BASIC: %bb
+; BASIC: ldrd
+; BASIC: str
+; GREEDY: @f
+; GREEDY: %bb
+; GREEDY: ldrd
+; GREEDY: str
+define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
+entry:
+  %0 = add nsw i32 %n, -1                         ; <i32> [#uses=2]
+  %1 = icmp sgt i32 %0, 0                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ]    ; <i32> [#uses=3]
+  %scevgep = getelementptr i32* %a, i32 %i.03     ; <i32*> [#uses=1]
+  %scevgep4 = getelementptr i32* %b, i32 %i.03    ; <i32*> [#uses=1]
+  %tmp = add i32 %i.03, 1                         ; <i32> [#uses=3]
+  %scevgep5 = getelementptr i32* %a, i32 %tmp     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = load i32* %scevgep5, align 4               ; <i32> [#uses=1]
+  %4 = add nsw i32 %3, %2                         ; <i32> [#uses=1]
+  store i32 %4, i32* %scevgep4, align 4
+  %exitcond = icmp eq i32 %tmp, %0                ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
new file mode 100644
index 0000000..cb77b09
--- /dev/null
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll
new file mode 100644
index 0000000..bdd4081
--- /dev/null
+++ b/test/CodeGen/ARM/load_i1_select.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; Codegen should only compare one bit of the loaded value.
+; rdar://10887484
+
+; CHECK: foo:
+; CHECK: ldrb r[[R0:[0-9]+]], [r0]
+; CHECK: tst.w r[[R0]], #1
+define void @foo(i8* %call, double* %p) nounwind {
+entry:
+  %tmp2 = load i8* %call
+  %tmp3 = trunc i8 %tmp2 to i1
+  %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
+  store double %cond, double* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/log2_not_readnone.ll b/test/CodeGen/ARM/log2_not_readnone.ll
new file mode 100644
index 0000000..8068abd
--- /dev/null
+++ b/test/CodeGen/ARM/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=arm-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: bl log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: bl exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index d5aac2e..a99a7ec 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -25,8 +25,8 @@ define i32 @f2(i64 %x, i64 %y) {
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: asrge   r0, r1, r2
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -38,8 +38,8 @@ define i32 @f3(i64 %x, i64 %y) {
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: lsrge   r0, r1, r2
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32
diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll
new file mode 100644
index 0000000..5283f57
--- /dev/null
+++ b/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LSR should compare against the post-incremented induction variable.
+; In this case, the immediate value is -2 which requires a cmn instruction.
+;
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: sub{{.*}}[[IV:r[0-9]+]], #2
+; CHECK: cmn{{.*}}[[IV]], #2
+; CHECK: bne
+define i32 @f(i32* nocapture %a, i32 %i) nounwind readonly ssp {
+entry:
+  %cmp3 = icmp eq i32 %i, -2
+  br i1 %cmp3, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %bi.06 = phi i32 [ %i.addr.0.bi.0, %for.body ], [ 0, %entry ]
+  %i.addr.05 = phi i32 [ %sub, %for.body ], [ %i, %entry ]
+  %b.04 = phi i32 [ %.b.0, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.addr.05
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, %b.04
+  %.b.0 = select i1 %cmp1, i32 %0, i32 %b.04
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %i.addr.05, i32 %bi.06
+  %sub = add nsw i32 %i.addr.05, -2
+  %cmp = icmp eq i32 %i.addr.05, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
deleted file mode 100644
index 4737901..0000000
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ /dev/null
@@ -1,640 +0,0 @@
-; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8  -enable-lsr-nested < %s | FileCheck %s
-
-; LSR should recognize that this is an unrolled loop which can use
-; constant offset addressing, so that each of the following stores
-; uses the same register.
-
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
-
-; We can also save a register in the outer loop, but that requires
-; performing LSR on the outer loop.
-
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
-
-%0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* }
-%1 = type { void (%2*)*, void (%2*, i32)*, void (%2*)*, void (%2*, i8*)*, void (%2*)*, i32, %7, i32, i32, i8**, i32, i8**, i32, i32 }
-%2 = type { %1*, %3*, %6*, i8*, i32, i32 }
-%3 = type { i8* (%2*, i32, i32)*, i8* (%2*, i32, i32)*, i8** (%2*, i32, i32, i32)*, [64 x i16]** (%2*, i32, i32, i32)*, %4* (%2*, i32, i32, i32, i32, i32)*, %5* (%2*, i32, i32, i32, i32, i32)*, void (%2*)*, i8** (%2*, %4*, i32, i32, i32)*, [64 x i16]** (%2*, %5*, i32, i32, i32)*, void (%2*, i32)*, void (%2*)*, i32, i32 }
-%4 = type opaque
-%5 = type opaque
-%6 = type { void (%2*)*, i32, i32, i32, i32 }
-%7 = type { [8 x i32], [12 x i32] }
-%8 = type { i8*, i32, void (%0*)*, i32 (%0*)*, void (%0*, i32)*, i32 (%0*, i32)*, void (%0*)* }
-%9 = type { [64 x i16], i32 }
-%10 = type { [17 x i8], [256 x i8], i32 }
-%11 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %9*, i8* }
-%12 = type { %12*, i8, i32, i32, i8* }
-%13 = type { void (%0*)*, void (%0*)*, i32 }
-%14 = type { void (%0*, i32)*, void (%0*, i8**, i32*, i32)* }
-%15 = type { void (%0*)*, i32 (%0*)*, void (%0*)*, i32 (%0*, i8***)*, %5** }
-%16 = type { void (%0*, i32)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)* }
-%17 = type { i32 (%0*)*, void (%0*)*, void (%0*)*, void (%0*)*, i32, i32 }
-%18 = type { void (%0*)*, i32 (%0*)*, i32 (%0*)*, i32, i32, i32, i32 }
-%19 = type { void (%0*)*, i32 (%0*, [64 x i16]**)*, i32 }
-%20 = type { void (%0*)*, [10 x void (%0*, %11*, i16*, i8**, i32)*] }
-%21 = type { void (%0*)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
-%22 = type { void (%0*)*, void (%0*, i8***, i32, i8**, i32)* }
-%23 = type { void (%0*, i32)*, void (%0*, i8**, i8**, i32)*, void (%0*)*, void (%0*)* }
-
-define void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind {
-bb:
-  %t = alloca [64 x float], align 4
-  %t5 = getelementptr inbounds %0* %a0, i32 0, i32 65
-  %t6 = load i8** %t5, align 4
-  %t7 = getelementptr inbounds %11* %a1, i32 0, i32 20
-  %t8 = load i8** %t7, align 4
-  br label %bb9
-
-bb9:
-  %t10 = phi i32 [ 0, %bb ], [ %t157, %bb156 ]
-  %t11 = add i32 %t10, 8
-  %t12 = getelementptr [64 x float]* %t, i32 0, i32 %t11
-  %t13 = add i32 %t10, 16
-  %t14 = getelementptr [64 x float]* %t, i32 0, i32 %t13
-  %t15 = add i32 %t10, 24
-  %t16 = getelementptr [64 x float]* %t, i32 0, i32 %t15
-  %t17 = add i32 %t10, 32
-  %t18 = getelementptr [64 x float]* %t, i32 0, i32 %t17
-  %t19 = add i32 %t10, 40
-  %t20 = getelementptr [64 x float]* %t, i32 0, i32 %t19
-  %t21 = add i32 %t10, 48
-  %t22 = getelementptr [64 x float]* %t, i32 0, i32 %t21
-  %t23 = add i32 %t10, 56
-  %t24 = getelementptr [64 x float]* %t, i32 0, i32 %t23
-  %t25 = getelementptr [64 x float]* %t, i32 0, i32 %t10
-  %t26 = shl i32 %t10, 5
-  %t27 = or i32 %t26, 8
-  %t28 = getelementptr i8* %t8, i32 %t27
-  %t29 = bitcast i8* %t28 to float*
-  %t30 = or i32 %t26, 16
-  %t31 = getelementptr i8* %t8, i32 %t30
-  %t32 = bitcast i8* %t31 to float*
-  %t33 = or i32 %t26, 24
-  %t34 = getelementptr i8* %t8, i32 %t33
-  %t35 = bitcast i8* %t34 to float*
-  %t36 = or i32 %t26, 4
-  %t37 = getelementptr i8* %t8, i32 %t36
-  %t38 = bitcast i8* %t37 to float*
-  %t39 = or i32 %t26, 12
-  %t40 = getelementptr i8* %t8, i32 %t39
-  %t41 = bitcast i8* %t40 to float*
-  %t42 = or i32 %t26, 20
-  %t43 = getelementptr i8* %t8, i32 %t42
-  %t44 = bitcast i8* %t43 to float*
-  %t45 = or i32 %t26, 28
-  %t46 = getelementptr i8* %t8, i32 %t45
-  %t47 = bitcast i8* %t46 to float*
-  %t48 = getelementptr i8* %t8, i32 %t26
-  %t49 = bitcast i8* %t48 to float*
-  %t50 = shl i32 %t10, 3
-  %t51 = or i32 %t50, 1
-  %t52 = getelementptr i16* %a2, i32 %t51
-  %t53 = or i32 %t50, 2
-  %t54 = getelementptr i16* %a2, i32 %t53
-  %t55 = or i32 %t50, 3
-  %t56 = getelementptr i16* %a2, i32 %t55
-  %t57 = or i32 %t50, 4
-  %t58 = getelementptr i16* %a2, i32 %t57
-  %t59 = or i32 %t50, 5
-  %t60 = getelementptr i16* %a2, i32 %t59
-  %t61 = or i32 %t50, 6
-  %t62 = getelementptr i16* %a2, i32 %t61
-  %t63 = or i32 %t50, 7
-  %t64 = getelementptr i16* %a2, i32 %t63
-  %t65 = getelementptr i16* %a2, i32 %t50
-  %t66 = load i16* %t52, align 2
-  %t67 = icmp eq i16 %t66, 0
-  %t68 = load i16* %t54, align 2
-  %t69 = icmp eq i16 %t68, 0
-  %t70 = and i1 %t67, %t69
-  br i1 %t70, label %bb71, label %bb91
-
-bb71:
-  %t72 = load i16* %t56, align 2
-  %t73 = icmp eq i16 %t72, 0
-  br i1 %t73, label %bb74, label %bb91
-
-bb74:
-  %t75 = load i16* %t58, align 2
-  %t76 = icmp eq i16 %t75, 0
-  br i1 %t76, label %bb77, label %bb91
-
-bb77:
-  %t78 = load i16* %t60, align 2
-  %t79 = icmp eq i16 %t78, 0
-  br i1 %t79, label %bb80, label %bb91
-
-bb80:
-  %t81 = load i16* %t62, align 2
-  %t82 = icmp eq i16 %t81, 0
-  br i1 %t82, label %bb83, label %bb91
-
-bb83:
-  %t84 = load i16* %t64, align 2
-  %t85 = icmp eq i16 %t84, 0
-  br i1 %t85, label %bb86, label %bb91
-
-bb86:
-  %t87 = load i16* %t65, align 2
-  %t88 = sitofp i16 %t87 to float
-  %t89 = load float* %t49, align 4
-  %t90 = fmul float %t88, %t89
-  store float %t90, float* %t25, align 4
-  store float %t90, float* %t12, align 4
-  store float %t90, float* %t14, align 4
-  store float %t90, float* %t16, align 4
-  store float %t90, float* %t18, align 4
-  store float %t90, float* %t20, align 4
-  store float %t90, float* %t22, align 4
-  store float %t90, float* %t24, align 4
-  br label %bb156
-
-bb91:
-  %t92 = load i16* %t65, align 2
-  %t93 = sitofp i16 %t92 to float
-  %t94 = load float* %t49, align 4
-  %t95 = fmul float %t93, %t94
-  %t96 = sitofp i16 %t68 to float
-  %t97 = load float* %t29, align 4
-  %t98 = fmul float %t96, %t97
-  %t99 = load i16* %t58, align 2
-  %t100 = sitofp i16 %t99 to float
-  %t101 = load float* %t32, align 4
-  %t102 = fmul float %t100, %t101
-  %t103 = load i16* %t62, align 2
-  %t104 = sitofp i16 %t103 to float
-  %t105 = load float* %t35, align 4
-  %t106 = fmul float %t104, %t105
-  %t107 = fadd float %t95, %t102
-  %t108 = fsub float %t95, %t102
-  %t109 = fadd float %t98, %t106
-  %t110 = fsub float %t98, %t106
-  %t111 = fmul float %t110, 0x3FF6A09E60000000
-  %t112 = fsub float %t111, %t109
-  %t113 = fadd float %t107, %t109
-  %t114 = fsub float %t107, %t109
-  %t115 = fadd float %t108, %t112
-  %t116 = fsub float %t108, %t112
-  %t117 = sitofp i16 %t66 to float
-  %t118 = load float* %t38, align 4
-  %t119 = fmul float %t117, %t118
-  %t120 = load i16* %t56, align 2
-  %t121 = sitofp i16 %t120 to float
-  %t122 = load float* %t41, align 4
-  %t123 = fmul float %t121, %t122
-  %t124 = load i16* %t60, align 2
-  %t125 = sitofp i16 %t124 to float
-  %t126 = load float* %t44, align 4
-  %t127 = fmul float %t125, %t126
-  %t128 = load i16* %t64, align 2
-  %t129 = sitofp i16 %t128 to float
-  %t130 = load float* %t47, align 4
-  %t131 = fmul float %t129, %t130
-  %t132 = fadd float %t127, %t123
-  %t133 = fsub float %t127, %t123
-  %t134 = fadd float %t119, %t131
-  %t135 = fsub float %t119, %t131
-  %t136 = fadd float %t134, %t132
-  %t137 = fsub float %t134, %t132
-  %t138 = fmul float %t137, 0x3FF6A09E60000000
-  %t139 = fadd float %t133, %t135
-  %t140 = fmul float %t139, 0x3FFD906BC0000000
-  %t141 = fmul float %t135, 0x3FF1517A80000000
-  %t142 = fsub float %t141, %t140
-  %t143 = fmul float %t133, 0xC004E7AEA0000000
-  %t144 = fadd float %t143, %t140
-  %t145 = fsub float %t144, %t136
-  %t146 = fsub float %t138, %t145
-  %t147 = fadd float %t142, %t146
-  %t148 = fadd float %t113, %t136
-  store float %t148, float* %t25, align 4
-  %t149 = fsub float %t113, %t136
-  store float %t149, float* %t24, align 4
-  %t150 = fadd float %t115, %t145
-  store float %t150, float* %t12, align 4
-  %t151 = fsub float %t115, %t145
-  store float %t151, float* %t22, align 4
-  %t152 = fadd float %t116, %t146
-  store float %t152, float* %t14, align 4
-  %t153 = fsub float %t116, %t146
-  store float %t153, float* %t20, align 4
-  %t154 = fadd float %t114, %t147
-  store float %t154, float* %t18, align 4
-  %t155 = fsub float %t114, %t147
-  store float %t155, float* %t16, align 4
-  br label %bb156
-
-bb156:
-  %t157 = add i32 %t10, 1
-  %t158 = icmp eq i32 %t157, 8
-  br i1 %t158, label %bb159, label %bb9
-
-bb159:
-  %t160 = add i32 %a4, 7
-  %t161 = add i32 %a4, 1
-  %t162 = add i32 %a4, 6
-  %t163 = add i32 %a4, 2
-  %t164 = add i32 %a4, 5
-  %t165 = add i32 %a4, 4
-  %t166 = add i32 %a4, 3
-  br label %bb167
-
-bb167:
-  %t168 = phi i32 [ 0, %bb159 ], [ %t293, %bb167 ]
-  %t169 = getelementptr i8** %a3, i32 %t168
-  %t170 = shl i32 %t168, 3
-  %t171 = or i32 %t170, 4
-  %t172 = getelementptr [64 x float]* %t, i32 0, i32 %t171
-  %t173 = or i32 %t170, 2
-  %t174 = getelementptr [64 x float]* %t, i32 0, i32 %t173
-  %t175 = or i32 %t170, 6
-  %t176 = getelementptr [64 x float]* %t, i32 0, i32 %t175
-  %t177 = or i32 %t170, 5
-  %t178 = getelementptr [64 x float]* %t, i32 0, i32 %t177
-  %t179 = or i32 %t170, 3
-  %t180 = getelementptr [64 x float]* %t, i32 0, i32 %t179
-  %t181 = or i32 %t170, 1
-  %t182 = getelementptr [64 x float]* %t, i32 0, i32 %t181
-  %t183 = or i32 %t170, 7
-  %t184 = getelementptr [64 x float]* %t, i32 0, i32 %t183
-  %t185 = getelementptr [64 x float]* %t, i32 0, i32 %t170
-  %t186 = load i8** %t169, align 4
-  %t187 = getelementptr inbounds i8* %t186, i32 %a4
-  %t188 = load float* %t185, align 4
-  %t189 = load float* %t172, align 4
-  %t190 = fadd float %t188, %t189
-  %t191 = fsub float %t188, %t189
-  %t192 = load float* %t174, align 4
-  %t193 = load float* %t176, align 4
-  %t194 = fadd float %t192, %t193
-  %t195 = fsub float %t192, %t193
-  %t196 = fmul float %t195, 0x3FF6A09E60000000
-  %t197 = fsub float %t196, %t194
-  %t198 = fadd float %t190, %t194
-  %t199 = fsub float %t190, %t194
-  %t200 = fadd float %t191, %t197
-  %t201 = fsub float %t191, %t197
-  %t202 = load float* %t178, align 4
-  %t203 = load float* %t180, align 4
-  %t204 = fadd float %t202, %t203
-  %t205 = fsub float %t202, %t203
-  %t206 = load float* %t182, align 4
-  %t207 = load float* %t184, align 4
-  %t208 = fadd float %t206, %t207
-  %t209 = fsub float %t206, %t207
-  %t210 = fadd float %t208, %t204
-  %t211 = fsub float %t208, %t204
-  %t212 = fmul float %t211, 0x3FF6A09E60000000
-  %t213 = fadd float %t205, %t209
-  %t214 = fmul float %t213, 0x3FFD906BC0000000
-  %t215 = fmul float %t209, 0x3FF1517A80000000
-  %t216 = fsub float %t215, %t214
-  %t217 = fmul float %t205, 0xC004E7AEA0000000
-  %t218 = fadd float %t217, %t214
-  %t219 = fsub float %t218, %t210
-  %t220 = fsub float %t212, %t219
-  %t221 = fadd float %t216, %t220
-  %t222 = fadd float %t198, %t210
-  %t223 = fptosi float %t222 to i32
-  %t224 = add nsw i32 %t223, 4
-  %t225 = lshr i32 %t224, 3
-  %t226 = and i32 %t225, 1023
-  %t227 = add i32 %t226, 128
-  %t228 = getelementptr inbounds i8* %t6, i32 %t227
-  %t229 = load i8* %t228, align 1
-  store i8 %t229, i8* %t187, align 1
-  %t230 = fsub float %t198, %t210
-  %t231 = fptosi float %t230 to i32
-  %t232 = add nsw i32 %t231, 4
-  %t233 = lshr i32 %t232, 3
-  %t234 = and i32 %t233, 1023
-  %t235 = add i32 %t234, 128
-  %t236 = getelementptr inbounds i8* %t6, i32 %t235
-  %t237 = load i8* %t236, align 1
-  %t238 = getelementptr inbounds i8* %t186, i32 %t160
-  store i8 %t237, i8* %t238, align 1
-  %t239 = fadd float %t200, %t219
-  %t240 = fptosi float %t239 to i32
-  %t241 = add nsw i32 %t240, 4
-  %t242 = lshr i32 %t241, 3
-  %t243 = and i32 %t242, 1023
-  %t244 = add i32 %t243, 128
-  %t245 = getelementptr inbounds i8* %t6, i32 %t244
-  %t246 = load i8* %t245, align 1
-  %t247 = getelementptr inbounds i8* %t186, i32 %t161
-  store i8 %t246, i8* %t247, align 1
-  %t248 = fsub float %t200, %t219
-  %t249 = fptosi float %t248 to i32
-  %t250 = add nsw i32 %t249, 4
-  %t251 = lshr i32 %t250, 3
-  %t252 = and i32 %t251, 1023
-  %t253 = add i32 %t252, 128
-  %t254 = getelementptr inbounds i8* %t6, i32 %t253
-  %t255 = load i8* %t254, align 1
-  %t256 = getelementptr inbounds i8* %t186, i32 %t162
-  store i8 %t255, i8* %t256, align 1
-  %t257 = fadd float %t201, %t220
-  %t258 = fptosi float %t257 to i32
-  %t259 = add nsw i32 %t258, 4
-  %t260 = lshr i32 %t259, 3
-  %t261 = and i32 %t260, 1023
-  %t262 = add i32 %t261, 128
-  %t263 = getelementptr inbounds i8* %t6, i32 %t262
-  %t264 = load i8* %t263, align 1
-  %t265 = getelementptr inbounds i8* %t186, i32 %t163
-  store i8 %t264, i8* %t265, align 1
-  %t266 = fsub float %t201, %t220
-  %t267 = fptosi float %t266 to i32
-  %t268 = add nsw i32 %t267, 4
-  %t269 = lshr i32 %t268, 3
-  %t270 = and i32 %t269, 1023
-  %t271 = add i32 %t270, 128
-  %t272 = getelementptr inbounds i8* %t6, i32 %t271
-  %t273 = load i8* %t272, align 1
-  %t274 = getelementptr inbounds i8* %t186, i32 %t164
-  store i8 %t273, i8* %t274, align 1
-  %t275 = fadd float %t199, %t221
-  %t276 = fptosi float %t275 to i32
-  %t277 = add nsw i32 %t276, 4
-  %t278 = lshr i32 %t277, 3
-  %t279 = and i32 %t278, 1023
-  %t280 = add i32 %t279, 128
-  %t281 = getelementptr inbounds i8* %t6, i32 %t280
-  %t282 = load i8* %t281, align 1
-  %t283 = getelementptr inbounds i8* %t186, i32 %t165
-  store i8 %t282, i8* %t283, align 1
-  %t284 = fsub float %t199, %t221
-  %t285 = fptosi float %t284 to i32
-  %t286 = add nsw i32 %t285, 4
-  %t287 = lshr i32 %t286, 3
-  %t288 = and i32 %t287, 1023
-  %t289 = add i32 %t288, 128
-  %t290 = getelementptr inbounds i8* %t6, i32 %t289
-  %t291 = load i8* %t290, align 1
-  %t292 = getelementptr inbounds i8* %t186, i32 %t166
-  store i8 %t291, i8* %t292, align 1
-  %t293 = add nsw i32 %t168, 1
-  %t294 = icmp eq i32 %t293, 8
-  br i1 %t294, label %bb295, label %bb167
-
-bb295:
-  ret void
-}
-
-%struct.ct_data_s = type { %union.anon, %union.anon }
-%struct.gz_header = type { i32, i32, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 }
-%struct.internal_state = type { %struct.z_stream*, i32, i8*, i32, i8*, i32, i32, %struct.gz_header*, i32, i8, i32, i32, i32, i32, i8*, i32, i16*, i16*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i32, i32, i32, i32, i16, i32 }
-%struct.static_tree_desc = type { i32 }
-%struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc* }
-%struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
-%union.anon = type { i16 }
-
-define i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize {
-entry:
-  %0 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 31 ; <i32*> [#uses=1]
-  %1 = load i32* %0, align 4                      ; <i32> [#uses=2]
-  %2 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 14 ; <i8**> [#uses=1]
-  %3 = load i8** %2, align 4                      ; <i8*> [#uses=27]
-  %4 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 27 ; <i32*> [#uses=1]
-  %5 = load i32* %4, align 4                      ; <i32> [#uses=17]
-  %6 = getelementptr inbounds i8* %3, i32 %5      ; <i8*> [#uses=1]
-  %7 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 30 ; <i32*> [#uses=1]
-  %8 = load i32* %7, align 4                      ; <i32> [#uses=4]
-  %9 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 36 ; <i32*> [#uses=1]
-  %10 = load i32* %9, align 4                     ; <i32> [#uses=2]
-  %11 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 11 ; <i32*> [#uses=1]
-  %12 = load i32* %11, align 4                    ; <i32> [#uses=2]
-  %13 = add i32 %12, -262                         ; <i32> [#uses=1]
-  %14 = icmp ugt i32 %5, %13                      ; <i1> [#uses=1]
-  br i1 %14, label %bb, label %bb2
-
-bb:                                               ; preds = %entry
-  %15 = add i32 %5, 262                           ; <i32> [#uses=1]
-  %16 = sub i32 %15, %12                          ; <i32> [#uses=1]
-  br label %bb2
-
-bb2:                                              ; preds = %bb, %entry
-  %iftmp.48.0 = phi i32 [ %16, %bb ], [ 0, %entry ] ; <i32> [#uses=1]
-  %17 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 16 ; <i16**> [#uses=1]
-  %18 = load i16** %17, align 4                   ; <i16*> [#uses=1]
-  %19 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 13 ; <i32*> [#uses=1]
-  %20 = load i32* %19, align 4                    ; <i32> [#uses=1]
-  %.sum = add i32 %5, 258                         ; <i32> [#uses=2]
-  %21 = getelementptr inbounds i8* %3, i32 %.sum  ; <i8*> [#uses=1]
-  %22 = add nsw i32 %5, -1                        ; <i32> [#uses=1]
-  %.sum30 = add i32 %22, %8                       ; <i32> [#uses=1]
-  %23 = getelementptr inbounds i8* %3, i32 %.sum30 ; <i8*> [#uses=1]
-  %24 = load i8* %23, align 1                     ; <i8> [#uses=1]
-  %.sum31 = add i32 %8, %5                        ; <i32> [#uses=1]
-  %25 = getelementptr inbounds i8* %3, i32 %.sum31 ; <i8*> [#uses=1]
-  %26 = load i8* %25, align 1                     ; <i8> [#uses=1]
-  %27 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 35 ; <i32*> [#uses=1]
-  %28 = load i32* %27, align 4                    ; <i32> [#uses=1]
-  %29 = lshr i32 %1, 2                            ; <i32> [#uses=1]
-  %30 = icmp ult i32 %8, %28                      ; <i1> [#uses=1]
-  %. = select i1 %30, i32 %1, i32 %29             ; <i32> [#uses=1]
-  %31 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 29 ; <i32*> [#uses=1]
-  %32 = load i32* %31, align 4                    ; <i32> [#uses=4]
-  %33 = icmp ugt i32 %10, %32                     ; <i1> [#uses=1]
-  %nice_match.0.ph = select i1 %33, i32 %32, i32 %10 ; <i32> [#uses=1]
-  %34 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 28 ; <i32*> [#uses=1]
-  %35 = ptrtoint i8* %21 to i32                   ; <i32> [#uses=1]
-  %36 = add nsw i32 %5, 257                       ; <i32> [#uses=1]
-  %tmp81 = add i32 %., -1                         ; <i32> [#uses=1]
-  br label %bb6
-
-bb6:                                              ; preds = %bb24, %bb2
-  %indvar78 = phi i32 [ 0, %bb2 ], [ %indvar.next79, %bb24 ] ; <i32> [#uses=2]
-  %best_len.2 = phi i32 [ %8, %bb2 ], [ %best_len.0, %bb24 ] ; <i32> [#uses=8]
-  %scan_end1.1 = phi i8 [ %24, %bb2 ], [ %scan_end1.0, %bb24 ] ; <i8> [#uses=6]
-  %cur_match_addr.0 = phi i32 [ %cur_match, %bb2 ], [ %90, %bb24 ] ; <i32> [#uses=14]
-  %scan_end.1 = phi i8 [ %26, %bb2 ], [ %scan_end.0, %bb24 ] ; <i8> [#uses=6]
-  %37 = getelementptr inbounds i8* %3, i32 %cur_match_addr.0 ; <i8*> [#uses=1]
-  %.sum32 = add i32 %cur_match_addr.0, %best_len.2 ; <i32> [#uses=1]
-  %38 = getelementptr inbounds i8* %3, i32 %.sum32 ; <i8*> [#uses=1]
-  %39 = load i8* %38, align 1                     ; <i8> [#uses=1]
-  %40 = icmp eq i8 %39, %scan_end.1               ; <i1> [#uses=1]
-  br i1 %40, label %bb7, label %bb23
-
-bb7:                                              ; preds = %bb6
-  %41 = add nsw i32 %best_len.2, -1               ; <i32> [#uses=1]
-  %.sum33 = add i32 %41, %cur_match_addr.0        ; <i32> [#uses=1]
-  %42 = getelementptr inbounds i8* %3, i32 %.sum33 ; <i8*> [#uses=1]
-  %43 = load i8* %42, align 1                     ; <i8> [#uses=1]
-  %44 = icmp eq i8 %43, %scan_end1.1              ; <i1> [#uses=1]
-  br i1 %44, label %bb8, label %bb23
-
-bb8:                                              ; preds = %bb7
-  %45 = load i8* %37, align 1                     ; <i8> [#uses=1]
-  %46 = load i8* %6, align 1                      ; <i8> [#uses=1]
-  %47 = icmp eq i8 %45, %46                       ; <i1> [#uses=1]
-  br i1 %47, label %bb9, label %bb23
-
-bb9:                                              ; preds = %bb8
-  %.sum34 = add i32 %cur_match_addr.0, 1          ; <i32> [#uses=1]
-  %48 = getelementptr inbounds i8* %3, i32 %.sum34 ; <i8*> [#uses=1]
-  %49 = load i8* %48, align 1                     ; <i8> [#uses=1]
-  %.sum88 = add i32 %5, 1                         ; <i32> [#uses=1]
-  %50 = getelementptr inbounds i8* %3, i32 %.sum88 ; <i8*> [#uses=1]
-  %51 = load i8* %50, align 1                     ; <i8> [#uses=1]
-  %52 = icmp eq i8 %49, %51                       ; <i1> [#uses=1]
-  br i1 %52, label %bb10, label %bb23
-
-bb10:                                             ; preds = %bb9
-  %tmp39 = add i32 %cur_match_addr.0, 10          ; <i32> [#uses=1]
-  %tmp41 = add i32 %cur_match_addr.0, 9           ; <i32> [#uses=1]
-  %tmp44 = add i32 %cur_match_addr.0, 8           ; <i32> [#uses=1]
-  %tmp47 = add i32 %cur_match_addr.0, 7           ; <i32> [#uses=1]
-  %tmp50 = add i32 %cur_match_addr.0, 6           ; <i32> [#uses=1]
-  %tmp53 = add i32 %cur_match_addr.0, 5           ; <i32> [#uses=1]
-  %tmp56 = add i32 %cur_match_addr.0, 4           ; <i32> [#uses=1]
-  %tmp59 = add i32 %cur_match_addr.0, 3           ; <i32> [#uses=1]
-  br label %bb11
-
-bb11:                                             ; preds = %bb18, %bb10
-  %indvar = phi i32 [ %indvar.next, %bb18 ], [ 0, %bb10 ] ; <i32> [#uses=2]
-  %tmp = shl i32 %indvar, 3                       ; <i32> [#uses=16]
-  %tmp40 = add i32 %tmp39, %tmp                   ; <i32> [#uses=1]
-  %scevgep = getelementptr i8* %3, i32 %tmp40     ; <i8*> [#uses=1]
-  %tmp42 = add i32 %tmp41, %tmp                   ; <i32> [#uses=1]
-  %scevgep43 = getelementptr i8* %3, i32 %tmp42   ; <i8*> [#uses=1]
-  %tmp45 = add i32 %tmp44, %tmp                   ; <i32> [#uses=1]
-  %scevgep46 = getelementptr i8* %3, i32 %tmp45   ; <i8*> [#uses=1]
-  %tmp48 = add i32 %tmp47, %tmp                   ; <i32> [#uses=1]
-  %scevgep49 = getelementptr i8* %3, i32 %tmp48   ; <i8*> [#uses=1]
-  %tmp51 = add i32 %tmp50, %tmp                   ; <i32> [#uses=1]
-  %scevgep52 = getelementptr i8* %3, i32 %tmp51   ; <i8*> [#uses=1]
-  %tmp54 = add i32 %tmp53, %tmp                   ; <i32> [#uses=1]
-  %scevgep55 = getelementptr i8* %3, i32 %tmp54   ; <i8*> [#uses=1]
-  %tmp60 = add i32 %tmp59, %tmp                   ; <i32> [#uses=1]
-  %scevgep61 = getelementptr i8* %3, i32 %tmp60   ; <i8*> [#uses=1]
-  %tmp62 = add i32 %tmp, 10                       ; <i32> [#uses=1]
-  %.sum89 = add i32 %5, %tmp62                    ; <i32> [#uses=2]
-  %scevgep63 = getelementptr i8* %3, i32 %.sum89  ; <i8*> [#uses=2]
-  %tmp64 = add i32 %tmp, 9                        ; <i32> [#uses=1]
-  %.sum90 = add i32 %5, %tmp64                    ; <i32> [#uses=1]
-  %scevgep65 = getelementptr i8* %3, i32 %.sum90  ; <i8*> [#uses=2]
-  %tmp66 = add i32 %tmp, 8                        ; <i32> [#uses=1]
-  %.sum91 = add i32 %5, %tmp66                    ; <i32> [#uses=1]
-  %scevgep67 = getelementptr i8* %3, i32 %.sum91  ; <i8*> [#uses=2]
-  %tmp6883 = or i32 %tmp, 7                       ; <i32> [#uses=1]
-  %.sum92 = add i32 %5, %tmp6883                  ; <i32> [#uses=1]
-  %scevgep69 = getelementptr i8* %3, i32 %.sum92  ; <i8*> [#uses=2]
-  %tmp7084 = or i32 %tmp, 6                       ; <i32> [#uses=1]
-  %.sum93 = add i32 %5, %tmp7084                  ; <i32> [#uses=1]
-  %scevgep71 = getelementptr i8* %3, i32 %.sum93  ; <i8*> [#uses=2]
-  %tmp7285 = or i32 %tmp, 5                       ; <i32> [#uses=1]
-  %.sum94 = add i32 %5, %tmp7285                  ; <i32> [#uses=1]
-  %scevgep73 = getelementptr i8* %3, i32 %.sum94  ; <i8*> [#uses=2]
-  %tmp7486 = or i32 %tmp, 4                       ; <i32> [#uses=1]
-  %.sum95 = add i32 %5, %tmp7486                  ; <i32> [#uses=1]
-  %scevgep75 = getelementptr i8* %3, i32 %.sum95  ; <i8*> [#uses=2]
-  %tmp7687 = or i32 %tmp, 3                       ; <i32> [#uses=1]
-  %.sum96 = add i32 %5, %tmp7687                  ; <i32> [#uses=1]
-  %scevgep77 = getelementptr i8* %3, i32 %.sum96  ; <i8*> [#uses=2]
-  %53 = load i8* %scevgep77, align 1              ; <i8> [#uses=1]
-  %54 = load i8* %scevgep61, align 1              ; <i8> [#uses=1]
-  %55 = icmp eq i8 %53, %54                       ; <i1> [#uses=1]
-  br i1 %55, label %bb12, label %bb20
-
-bb12:                                             ; preds = %bb11
-  %tmp57 = add i32 %tmp56, %tmp                   ; <i32> [#uses=1]
-  %scevgep58 = getelementptr i8* %3, i32 %tmp57   ; <i8*> [#uses=1]
-  %56 = load i8* %scevgep75, align 1              ; <i8> [#uses=1]
-  %57 = load i8* %scevgep58, align 1              ; <i8> [#uses=1]
-  %58 = icmp eq i8 %56, %57                       ; <i1> [#uses=1]
-  br i1 %58, label %bb13, label %bb20
-
-bb13:                                             ; preds = %bb12
-  %59 = load i8* %scevgep73, align 1              ; <i8> [#uses=1]
-  %60 = load i8* %scevgep55, align 1              ; <i8> [#uses=1]
-  %61 = icmp eq i8 %59, %60                       ; <i1> [#uses=1]
-  br i1 %61, label %bb14, label %bb20
-
-bb14:                                             ; preds = %bb13
-  %62 = load i8* %scevgep71, align 1              ; <i8> [#uses=1]
-  %63 = load i8* %scevgep52, align 1              ; <i8> [#uses=1]
-  %64 = icmp eq i8 %62, %63                       ; <i1> [#uses=1]
-  br i1 %64, label %bb15, label %bb20
-
-bb15:                                             ; preds = %bb14
-  %65 = load i8* %scevgep69, align 1              ; <i8> [#uses=1]
-  %66 = load i8* %scevgep49, align 1              ; <i8> [#uses=1]
-  %67 = icmp eq i8 %65, %66                       ; <i1> [#uses=1]
-  br i1 %67, label %bb16, label %bb20
-
-bb16:                                             ; preds = %bb15
-  %68 = load i8* %scevgep67, align 1              ; <i8> [#uses=1]
-  %69 = load i8* %scevgep46, align 1              ; <i8> [#uses=1]
-  %70 = icmp eq i8 %68, %69                       ; <i1> [#uses=1]
-  br i1 %70, label %bb17, label %bb20
-
-bb17:                                             ; preds = %bb16
-  %71 = load i8* %scevgep65, align 1              ; <i8> [#uses=1]
-  %72 = load i8* %scevgep43, align 1              ; <i8> [#uses=1]
-  %73 = icmp eq i8 %71, %72                       ; <i1> [#uses=1]
-  br i1 %73, label %bb18, label %bb20
-
-bb18:                                             ; preds = %bb17
-  %74 = load i8* %scevgep63, align 1              ; <i8> [#uses=1]
-  %75 = load i8* %scevgep, align 1                ; <i8> [#uses=1]
-  %76 = icmp eq i8 %74, %75                       ; <i1> [#uses=1]
-  %77 = icmp slt i32 %.sum89, %.sum               ; <i1> [#uses=1]
-  %or.cond = and i1 %76, %77                      ; <i1> [#uses=1]
-  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
-  br i1 %or.cond, label %bb11, label %bb20
-
-bb20:                                             ; preds = %bb18, %bb17, %bb16, %bb15, %bb14, %bb13, %bb12, %bb11
-  %scan.3 = phi i8* [ %scevgep77, %bb11 ], [ %scevgep75, %bb12 ], [ %scevgep73, %bb13 ], [ %scevgep71, %bb14 ], [ %scevgep69, %bb15 ], [ %scevgep67, %bb16 ], [ %scevgep65, %bb17 ], [ %scevgep63, %bb18 ] ; <i8*> [#uses=1]
-  %78 = ptrtoint i8* %scan.3 to i32               ; <i32> [#uses=1]
-  %79 = sub nsw i32 %78, %35                      ; <i32> [#uses=2]
-  %80 = add i32 %79, 258                          ; <i32> [#uses=5]
-  %81 = icmp sgt i32 %80, %best_len.2             ; <i1> [#uses=1]
-  br i1 %81, label %bb21, label %bb23
-
-bb21:                                             ; preds = %bb20
-  store i32 %cur_match_addr.0, i32* %34, align 4
-  %82 = icmp slt i32 %80, %nice_match.0.ph        ; <i1> [#uses=1]
-  br i1 %82, label %bb22, label %bb25
-
-bb22:                                             ; preds = %bb21
-  %.sum37 = add i32 %36, %79                      ; <i32> [#uses=1]
-  %83 = getelementptr inbounds i8* %3, i32 %.sum37 ; <i8*> [#uses=1]
-  %84 = load i8* %83, align 1                     ; <i8> [#uses=1]
-  %.sum38 = add i32 %80, %5                       ; <i32> [#uses=1]
-  %85 = getelementptr inbounds i8* %3, i32 %.sum38 ; <i8*> [#uses=1]
-  %86 = load i8* %85, align 1                     ; <i8> [#uses=1]
-  br label %bb23
-
-bb23:                                             ; preds = %bb22, %bb20, %bb9, %bb8, %bb7, %bb6
-  %best_len.0 = phi i32 [ %best_len.2, %bb6 ], [ %best_len.2, %bb7 ], [ %best_len.2, %bb8 ], [ %best_len.2, %bb9 ], [ %80, %bb22 ], [ %best_len.2, %bb20 ] ; <i32> [#uses=3]
-  %scan_end1.0 = phi i8 [ %scan_end1.1, %bb6 ], [ %scan_end1.1, %bb7 ], [ %scan_end1.1, %bb8 ], [ %scan_end1.1, %bb9 ], [ %84, %bb22 ], [ %scan_end1.1, %bb20 ] ; <i8> [#uses=1]
-  %scan_end.0 = phi i8 [ %scan_end.1, %bb6 ], [ %scan_end.1, %bb7 ], [ %scan_end.1, %bb8 ], [ %scan_end.1, %bb9 ], [ %86, %bb22 ], [ %scan_end.1, %bb20 ] ; <i8> [#uses=1]
-  %87 = and i32 %cur_match_addr.0, %20            ; <i32> [#uses=1]
-  %88 = getelementptr inbounds i16* %18, i32 %87  ; <i16*> [#uses=1]
-  %89 = load i16* %88, align 2                    ; <i16> [#uses=1]
-  %90 = zext i16 %89 to i32                       ; <i32> [#uses=2]
-  %91 = icmp ugt i32 %90, %iftmp.48.0             ; <i1> [#uses=1]
-  br i1 %91, label %bb24, label %bb25
-
-bb24:                                             ; preds = %bb23
-
-; LSR should use count-down iteration to avoid requiring the trip count
-; in a register.
-
-;      CHECK: @ %bb24
-; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
-; CHECK: bne.w
-
-  %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
-  %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
-  br i1 %92, label %bb25, label %bb6
-
-bb25:                                             ; preds = %bb24, %bb23, %bb21
-  %best_len.1 = phi i32 [ %best_len.0, %bb23 ], [ %best_len.0, %bb24 ], [ %80, %bb21 ] ; <i32> [#uses=2]
-  %93 = icmp ugt i32 %best_len.1, %32             ; <i1> [#uses=1]
-  %merge = select i1 %93, i32 %32, i32 %best_len.1 ; <i32> [#uses=1]
-  ret i32 %merge
-}
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index bf26a96..5b4cf9d 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -12,7 +12,7 @@
 ; CHECK: add
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.partition_entry = type { i32, i32, i64, i64 }
 
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index c77402f..3ac7d77 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 ;rdar://8003725
 
 @G1 = external global i32
@@ -6,13 +6,42 @@
 
 define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
 entry:
+; CHECK: f1:
 ; CHECK: cmp
 ; CHECK: moveq
 ; CHECK-NOT: cmp
-; CHECK: moveq
+; CHECK: mov{{eq|ne}}
     %tmp1 = icmp eq i32 %cond1, 0
     %tmp2 = select i1 %tmp1, i32 %x1, i32 %x2
     %tmp3 = select i1 %tmp1, i32 %x2, i32 %x3
     %tmp4 = add i32 %tmp2, %tmp3
     ret i32 %tmp4
 }
+
+@foo = external global i32
+@bar = external global [250 x i8], align 1
+
+; CSE of cmp across BB boundary
+; rdar://10660865
+define void @f2() nounwind ssp {
+entry:
+; CHECK: f2:
+; CHECK: cmp
+; CHECK: poplt
+; CHECK-NOT: cmp
+; CHECK: movle
+  %0 = load i32* @foo, align 4
+  %cmp28 = icmp sgt i32 %0, 0
+  br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
+
+for.body.lr.ph:                                   ; preds = %entry
+  %1 = icmp sgt i32 %0, 1
+  %smax = select i1 %1, i32 %0, i32 1
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+  unreachable
+
+for.cond1.preheader:                              ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 30b9f59..dc77282 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-post-ra | FileCheck %s
 
-; The ARM magic hinting works best with linear scan.
 ; CHECK: ldrd
 ; CHECK: strd
 ; CHECK: ldrb
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index aeda022..fe0056c 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN
 ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
 
 @from = common global [500 x i32] zeroinitializer, align 4
@@ -18,6 +19,8 @@ entry:
         ; EABI memset swaps arguments
         ; CHECK: mov r1, #0
         ; CHECK: memset
+        ; DARWIN: movs r1, #0
+        ; DARWIN: memset
         ; EABI: mov r2, #0
         ; EABI: __aeabi_memset
         call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
index 3cb8a8e..c50a233 100644
--- a/test/CodeGen/ARM/mul_const.ll
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -41,3 +41,45 @@ entry:
         ret i32 %0
 }
 
+define i32 @tn9(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn9:
+; CHECK: add	r0, r0, r0, lsl #3
+; CHECK: rsb	r0, r0, #0
+        %0 = mul i32 %v, -9
+        ret i32 %0
+}
+
+define i32 @tn7(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn7:
+; CHECK: sub r0, r0, r0, lsl #3
+	%0 = mul i32 %v, -7
+	ret i32 %0
+}
+
+define i32 @tn5(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn5:
+; CHECK: add r0, r0, r0, lsl #2
+; CHECK: rsb r0, r0, #0
+        %0 = mul i32 %v, -5
+        ret i32 %0
+}
+
+define i32 @tn3(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn3:
+; CHECK: sub r0, r0, r0, lsl #2
+        %0 = mul i32 %v, -3
+        ret i32 %0
+}
+
+define i32 @tn12288(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn12288:
+; CHECK: sub r0, r0, r0, lsl #2
+; CHECK: lsl{{.*}}#12
+        %0 = mul i32 %v, -12288
+        ret i32 %0
+}
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index c78872a..b892d2d 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64
-; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; CHECK: t1
+; CHECK: vldr d
+; CHECK: vldr d
+; CHECK: vadd.i16 d
+; CHECK: vstr d
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
 	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
@@ -12,6 +15,11 @@ entry:
 	ret void
 }
 
+; CHECK: t2
+; CHECK: vldr d
+; CHECK: vldr d
+; CHECK: vsub.i16 d
+; CHECK: vmov r0, r1, d
 define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
 entry:
 	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 130277b..944bfe0 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llc < %s -march=arm -mattr=+neon | grep vmov  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; CHECK: t1
+; CHECK: vldmia
+; CHECK: vldmia
+; CHECK: vadd.i64 q
+; CHECK: vstmia
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
@@ -12,6 +15,12 @@ entry:
 	ret void
 }
 
+; CHECK: t2
+; CHECK: vldmia
+; CHECK: vldmia
+; CHECK: vsub.i64 q
+; CHECK: vmov r0, r1, d
+; CHECK: vmov r2, r3, d
 define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
new file mode 100644
index 0000000..277bd05
--- /dev/null
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -verify-machineinstrs
+; RUN: llc < %s -verify-machineinstrs -O0
+; PR12177
+;
+; This test case spills a QQQQ register.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { %1*, i32, i32, i32, i8 }
+%1 = type { i32 (...)** }
+%2 = type { i8*, i8*, i8*, i32 }
+%3 = type { %4 }
+%4 = type { i32 (...)**, %2, %4*, i8, i8 }
+
+declare arm_aapcs_vfpcc void @func1(%0*, float* nocapture, float* nocapture, %2*) nounwind
+
+declare arm_aapcs_vfpcc %0** @func2()
+
+declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32)
+
+declare arm_aapcs_vfpcc %2** @func4()
+
+define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  %3 = load %0** %2, align 4, !tbaa !0
+  store float 0.000000e+00, float* undef, align 4
+  %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
+  call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
+  %5 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  store float 1.000000e+00, float* undef, align 4
+  call arm_aapcs_vfpcc  void @func1(%0* undef, float* undef, float* undef, %2* undef)
+  store float 1.500000e+01, float* undef, align 4
+  %6 = call arm_aapcs_vfpcc  %2** @func4() nounwind
+  %7 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2971) nounwind
+  %8 = fadd float undef, -1.000000e+05
+  store float %8, float* undef, align 16, !tbaa !3
+  %9 = call arm_aapcs_vfpcc  i32 @rand() nounwind
+  %10 = fmul float undef, 2.000000e+05
+  %11 = fadd float %10, -1.000000e+05
+  store float %11, float* undef, align 4, !tbaa !3
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+
+declare arm_aapcs_vfpcc i32 @rand()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/ARM/odr_comdat.ll b/test/CodeGen/ARM/odr_comdat.ll
new file mode 100644
index 0000000..e28b578
--- /dev/null
+++ b/test/CodeGen/ARM/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ARMGNUEABI
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; ARMGNUEABI: .section        .bss._ZN1CIiE1iE,"aGw",%nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; ARMGNUEABI: .section        .data._ZN1CIiE1jE,"aGw",%progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll
new file mode 100644
index 0000000..b4da552
--- /dev/null
+++ b/test/CodeGen/ARM/opt-shuff-tstore.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s
+
+; CHECK: func_4_8
+; CHECK: vst1.32
+; CHECK-NEXT: bx lr
+define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) {
+  %r = add <4 x i8> %param, <i8 1, i8 2, i8 3, i8 4>
+  store <4 x i8> %r, <4 x i8>* %p
+  ret void
+}
+
+; CHECK: func_2_16
+; CHECK: vst1.32
+; CHECK-NEXT: bx lr
+define void @func_2_16(<2 x i16> %param, <2 x i16>* %p) {
+  %r = add <2 x i16> %param, <i16 1, i16 2>
+  store <2 x i16> %r, <2 x i16>* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll
index e670a5b..e72d51f 100644
--- a/test/CodeGen/ARM/peephole-bitcast.ll
+++ b/test/CodeGen/ARM/peephole-bitcast.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; XFAIL: *
+; PR11364
 
 ; vmov s0, r0 + vmov r0, s0 should have been optimized away.
 ; rdar://9104514
diff --git a/test/CodeGen/ARM/reg_asc_order.ll b/test/CodeGen/ARM/reg_asc_order.ll
new file mode 100644
index 0000000..d1d0ee5
--- /dev/null
+++ b/test/CodeGen/ARM/reg_asc_order.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Check that memcpy gets lowered to ldm/stm, at least in this very smple case.
+
+%struct.Foo = type { i32, i32, i32, i32 }
+
+define void @_Z10CopyStructP3FooS0_(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
+entry:
+;CHECK: ldm
+;CHECK: stm
+  %0 = bitcast %struct.Foo* %a to i8*
+  %1 = bitcast %struct.Foo* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 3a19211..05794e4 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -155,7 +155,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 
 define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 ; CHECK:        t6:
-; CHECK:        vldr.64
+; CHECK:        vldr
 ; CHECK:        vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
 ; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
   %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
@@ -240,7 +240,7 @@ bb14:                                             ; preds = %bb6
 ; PR7157
 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        t9:
-; CHECK:        vldr.64
+; CHECK:        vldr
 ; CHECK-NOT:    vmov d{{.*}}, d16
 ; CHECK:        vmov.i32 d17
 ; CHECK-NEXT:   vstmia r0, {d16, d17}
@@ -272,8 +272,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
 ; CHECK: t10:
-; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000
-; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000
+; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]]
 ; CHECK: vadd.f32 q8, q8, q8
   %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index ea44c28..6bb6743 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -112,11 +112,11 @@ entry:
   ret i32 %conv3
 }
 
+; rdar://10750814
 define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
 entry:
 ; CHECK: test9
-; CHECK: rev r0, r0
-; CHECK: lsr r0, r0, #16
+; CHECK: rev16 r0, r0
   %conv = zext i16 %v to i32
   %shr4 = lshr i32 %conv, 8
   %shl = shl nuw nsw i32 %conv, 8
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index f43dde5..c9ac66a 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -64,14 +64,14 @@ define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
 entry:
 ; ARM: t4:
 ; ARM: ldr
-; ARM: movlt
+; ARM: mov{{lt|ge}}
 
 ; ARMT2: t4:
 ; ARMT2: movwlt [[R0:r[0-9]+]], #65365
 ; ARMT2: movtlt [[R0]], #65365
 
 ; THUMB2: t4:
-; THUMB2: mvnlt.w [[R0:r[0-9]+]], #11141290
+; THUMB2: mvnlt [[R0:r[0-9]+]], #11141290
   %0 = icmp slt i32 %a, %b
   %1 = select i1 %0, i32 4283826005, i32 %x
   ret i32 %1
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index f1bd7ee..3e07da8 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -76,12 +76,12 @@ define double @f7(double %a, double %b) {
 ; block generated, odds are good that we have close to the ideal code for this:
 ;
 ; CHECK-NEON:      _f8:
-; CHECK-NEON:      adr     r2, LCPI7_0
-; CHECK-NEON-NEXT: movw    r3, #1123
-; CHECK-NEON-NEXT: adds    r1, r2, #4
-; CHECK-NEON-NEXT: cmp     r0, r3
+; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
+; CHECK-NEON-NEXT: movw    [[R3:r[0-9]+]], #1123
+; CHECK-NEON-NEXT: adds    {{r.*}}, [[R2]], #4
+; CHECK-NEON-NEXT: cmp     r0, [[R3]]
 ; CHECK-NEON-NEXT: it      ne
-; CHECK-NEON-NEXT: movne   r1, r2
+; CHECK-NEON-NEXT: movne   {{r.*}}, [[R2]]
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx
 
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 8a3133a..ca2e18a 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
   %s = or i32 %z, %y
  ret i32 %s
 }
+
+define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t5:
+; ARM-NOT: moveq
+; ARM: orreq r2, r2, #1
+
+; T2: t5:
+; T2-NOT: moveq
+; T2: orreq r2, r2, #1
+  %tmp1 = icmp eq i32 %a, %b
+  %tmp2 = zext i1 %tmp1 to i32
+  %tmp3 = or i32 %tmp2, %c
+  ret i32 %tmp3
+}
+
+define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t6:
+; ARM-NOT: movge
+; ARM: eorlt r3, r3, r2
+
+; T2: t6:
+; T2-NOT: movge
+; T2: eorlt.w r3, r3, r2
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %c, i32 0
+  %tmp2 = xor i32 %tmp1, %d
+  ret i32 %tmp2
+}
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t7:
+; ARM-NOT: lsleq
+; ARM: andeq r2, r2, r2, lsl #1
+
+; T2: t7:
+; T2-NOT: lsleq.w
+; T2: andeq.w r2, r2, r2, lsl #1
+  %tmp1 = shl i32 %c, 1
+  %cond = icmp eq i32 %a, %b
+  %tmp2 = select i1 %cond, i32 %tmp1, i32 -1
+  %tmp3 = and i32 %c, %tmp2
+  ret i32 %tmp3
+}
+
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 964cef0..eb971ff 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -55,11 +55,15 @@ define fastcc void @test4(i16 %addr) nounwind {
 entry:
 ; A8: test4:
 ; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; A8-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A8: str [[REG]], [r0, r1, lsl #2]
+; A8-NOT: str [[REG]], [r0]
 
 ; A9: test4:
 ; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A9: str [[REG]], [r0, r1, lsl #2]
+; A9-NOT: str [[REG]], [r0]
   %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index bf4e55c..057ea11 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: bic sp, sp, #15
+; CHECK: bic {{.*}}, #15
 ; CHECK: vst1.64 {{.*}}sp, :128
 ; CHECK: vld1.64 {{.*}}sp, :128
 entry:
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index f4e3a44..983ba45 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=linearscan | FileCheck %s
 ; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s
 
 ; The greedy register allocator uses a single CSR here, invalidating the test.
diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll
index 993d7ec..03ae12c 100644
--- a/test/CodeGen/ARM/subreg-remat.ll
+++ b/test/CodeGen/ARM/subreg-remat.ll
@@ -12,13 +12,13 @@ target triple = "thumbv7-apple-ios"
 ;
 ; CHECK: f1
 ; CHECK: vmov    s1, r0
-; CHECK: vldr.32 s0, LCPI
+; CHECK: vldr s0, LCPI
 ; The vector must be spilled:
-; CHECK: vstr.64 d0,
+; CHECK: vstr d0,
 ; CHECK: asm clobber d0
 ; And reloaded after the asm:
-; CHECK: vldr.64 [[D16:d[0-9]+]],
-; CHECK: vstr.64 [[D16]], [r1]
+; CHECK: vldr [[D16:d[0-9]+]],
+; CHECK: vstr [[D16]], [r1]
 define void @f1(float %x, <2 x float>* %p) {
   %v1 = insertelement <2 x float> undef, float %x, i32 1
   %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 0
@@ -37,13 +37,13 @@ define void @f1(float %x, <2 x float>* %p) {
 ; virtual register.  It doesn't read the old value.
 ;
 ; CHECK: f2
-; CHECK: vldr.32 s0, LCPI
+; CHECK: vldr s0, LCPI
 ; The vector must not be spilled:
-; CHECK-NOT: vstr.64
+; CHECK-NOT: vstr
 ; CHECK: asm clobber d0
 ; But instead rematerialize after the asm:
-; CHECK: vldr.32 [[S0:s[0-9]+]], LCPI
-; CHECK: vstr.64 [[D0:d[0-9]+]], [r0]
+; CHECK: vldr [[S0:s[0-9]+]], LCPI
+; CHECK: vstr [[D0:d[0-9]+]], [r0]
 define void @f2(<2 x float>* %p) {
   %v2 = insertelement <2 x float> undef, float 0x400921FB60000000, i32 0
   %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
new file mode 100644
index 0000000..e015bf0
--- /dev/null
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+; We should be able to tail-duplicate the basic block containing the indirectbr
+; into all of its predecessors.
+; CHECK: fn:
+; CHECK: mov pc
+; CHECK: mov pc
+; CHECK: mov pc
+
+@fn.codetable = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@fn, %RETURN), i8* blockaddress(@fn, %INCREMENT), i8* blockaddress(@fn, %DECREMENT)], align 4
+
+define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
+entry:
+  %0 = load i32* %opcodes, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
+  br label %indirectgoto
+
+INCREMENT:                                        ; preds = %indirectgoto
+  %inc = add nsw i32 %result.0, 1
+  %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
+  br label %indirectgoto
+
+DECREMENT:                                        ; preds = %indirectgoto
+  %dec = add nsw i32 %result.0, -1
+  %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
+  br label %indirectgoto
+
+indirectgoto:                                     ; preds = %DECREMENT, %INCREMENT, %entry
+  %result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ]
+  %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
+  %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
+  %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1
+  %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+  indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
+
+RETURN:                                           ; preds = %indirectgoto
+  ret i32 %result.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
new file mode 100644
index 0000000..93340c3
--- /dev/null
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
+; REQUIRES: asserts
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; rdar://10674430
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+; CHECK: sharedidx:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+; CHECK: %for.body
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+; CHECK: %for.body.1
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+; CHECK: %for.body.2
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+; CHECK: %for.body.3
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll
index 14e668e..f157dbd 100644
--- a/test/CodeGen/ARM/vbsl-constant.ll
+++ b/test/CodeGen/ARM/vbsl-constant.ll
@@ -2,8 +2,8 @@
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK: v_bsli8:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -16,8 +16,8 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK: v_bsli16:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -30,8 +30,8 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK: v_bsli32:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -44,9 +44,9 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
 ;CHECK: v_bsli64:
-;CHECK: vldr.64
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index 1387393..7fddbed 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -8,7 +8,7 @@ declare void @foo_int32x4_t(<4 x i32>)
 
 ; Test signed conversion.
 ; CHECK: t1
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t1() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -24,7 +24,7 @@ declare void @foo_float32x2_t(<2 x float>)
 
 ; Test unsigned conversion.
 ; CHECK: t2
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t2() nounwind {
 entry:
   %tmp = load i32* @uin, align 4, !tbaa !3
@@ -38,7 +38,7 @@ entry:
 
 ; Test which should not fold due to non-power of 2.
 ; CHECK: t3
-; CHECK: vdiv
+; CHECK: {{vdiv|vmul}}
 define void @t3() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -52,7 +52,7 @@ entry:
 
 ; Test which should not fold due to power of 2 out of range.
 ; CHECK: t4
-; CHECK: vdiv
+; CHECK: {{vdiv|vmul}}
 define void @t4() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -66,7 +66,7 @@ entry:
 
 ; Test case where const is max power of 2 (i.e., 2^32).
 ; CHECK: t5
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t5() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -80,7 +80,7 @@ entry:
 
 ; Test quadword.
 ; CHECK: t6
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t6() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index e99fac1..05332e4 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -254,7 +254,7 @@ entry:
 ;CHECK: redundantVdup:
 ;CHECK: vmov.i8
 ;CHECK-NOT: vdup.8
-;CHECK: vstr.64
+;CHECK: vstr
 define void @redundantVdup(<8 x i8>* %ptr) nounwind {
   %1 = insertelement <8 x i8> undef, i8 -128, i32 0
   %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 81bdc44..a38a0fe 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -80,7 +80,7 @@ declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
 ; so they are not split up into i32 values.  Radar 8755338.
 define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_buildvector
-; CHECK: vldr.64
+; CHECK: vldr
   %t0 = load i64* %ptr, align 4
   %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
   store <2 x i64> %t1, <2 x i64>* %vp
@@ -89,7 +89,7 @@ define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 
 define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_insertelement
-; CHECK: vldr.64
+; CHECK: vldr
   %t0 = load i64* %ptr, align 4
   %vec = load <2 x i64>* %vp
   %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
@@ -99,7 +99,7 @@ define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 
 define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_extractelement
-; CHECK: vstr.64
+; CHECK: vstr
   %vec = load <2 x i64>* %vp
   %t1 = extractelement <2 x i64> %vec, i32 0
   store i64 %t1, i64* %ptr
@@ -123,3 +123,13 @@ define void @orVec(<3 x i8>* %A) nounwind {
   ret void
 }
 
+; The following test was hitting an assertion in the DAG combiner when
+; constant folding the multiply because the "sext undef" was translated to
+; a BUILD_VECTOR with i32 0 operands, which did not match the i16 operands
+; of the other BUILD_VECTOR.
+define i16 @foldBuildVectors() {
+  %1 = sext <8 x i8> undef to <8 x i16>
+  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %3 = extractelement <8 x i16> %2, i32 0
+  ret i16 %3
+}
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
new file mode 100644
index 0000000..5e9239f
--- /dev/null
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+
+; CHECK: f:
+define float @f(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u16
+  %1 = load <4 x i16>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i16> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+; CHECK: g:
+define float @g(<4 x i8>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u8
+  ; CHECK: vmovl.u16
+  %1 = load <4 x i8>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i8> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+; CHECK: h:
+define <4 x i8> @h(<4 x float> %v) {
+  ; CHECK: vcvt.{{[us]}}32.f32
+  ; CHECK: vmovn.i32
+  %1 = fptoui <4 x float> %v to <4 x i8>
+  ret <4 x i8> %1
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index 65b5913..e224bdf 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -138,7 +138,7 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ; Make sure this doesn't crash
 define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind {
 ; CHECK: test_elem_mismatch:
-; CHECK: vstr.64
+; CHECK: vstr
   %tmp0 = load <2 x i64>* %src, align 16
   %tmp1 = bitcast <2 x i64> %tmp0 to <4 x i32>
   %tmp2 = extractelement <4 x i32> %tmp1, i32 0
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index d0e9ac3..61d73c1 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -32,7 +32,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 
 define <2 x float> @vld1dupf(float* %A) nounwind {
 ;CHECK: vld1dupf:
-;CHECK: vld1.32 {d16[]}, [r0]
+;CHECK: vld1.32 {d16[]}, [r0, :32]
 	%tmp0 = load float* %A
         %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@@ -51,7 +51,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
 
 define <4 x float> @vld1dupQf(float* %A) nounwind {
 ;CHECK: vld1dupQf:
-;CHECK: vld1.32 {d16[], d17[]}, [r0]
+;CHECK: vld1.32 {d16[], d17[]}, [r0, :32]
         %tmp0 = load float* %A
         %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 0d7d4ec..7bd0cbd 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -31,9 +31,19 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
         ret <2 x i32> %tmp3
 }
 
+define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld1lanei32a32:
+;Check the alignment value.  Legal values are none or :32.
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = load i32* %A, align 4
+	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+        ret <2 x i32> %tmp3
+}
+
 define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld1lanef:
-;CHECK: vld1.32 {d16[1]}, [r0]
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
 	%tmp1 = load <2 x float>* %B
 	%tmp2 = load float* %A, align 4
 	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
@@ -69,7 +79,7 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 
 define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld1laneQf:
-;CHECK: vld1.32 {d16[0]}, [r0]
+;CHECK: vld1.32 {d16[0]}, [r0, :32]
 	%tmp1 = load <4 x float>* %B
 	%tmp2 = load float* %A
 	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index a86be32b..0c23879 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -56,13 +56,13 @@ define <2 x i32> @v_movi32d() nounwind {
 
 define <2 x i32> @v_movi32e() nounwind {
 ;CHECK: v_movi32e:
-;CHECK: vmov.i32 d{{.*}}, #0x20FF
+;CHECK: vmov.i32 d{{.*}}, #0x20ff
 	ret <2 x i32> < i32 8447, i32 8447 >
 }
 
 define <2 x i32> @v_movi32f() nounwind {
 ;CHECK: v_movi32f:
-;CHECK: vmov.i32 d{{.*}}, #0x20FFFF
+;CHECK: vmov.i32 d{{.*}}, #0x20ffff
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
@@ -92,19 +92,19 @@ define <2 x i32> @v_mvni32d() nounwind {
 
 define <2 x i32> @v_mvni32e() nounwind {
 ;CHECK: v_mvni32e:
-;CHECK: vmvn.i32 d{{.*}}, #0x20FF
+;CHECK: vmvn.i32 d{{.*}}, #0x20ff
 	ret <2 x i32> < i32 4294958848, i32 4294958848 >
 }
 
 define <2 x i32> @v_mvni32f() nounwind {
 ;CHECK: v_mvni32f:
-;CHECK: vmvn.i32 d{{.*}}, #0x20FFFF
+;CHECK: vmvn.i32 d{{.*}}, #0x20ffff
 	ret <2 x i32> < i32 4292804608, i32 4292804608 >
 }
 
 define <1 x i64> @v_movi64() nounwind {
 ;CHECK: v_movi64:
-;CHECK: vmov.i64 d{{.*}}, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 d{{.*}}, #0xff0000ff0000ffff
 	ret <1 x i64> < i64 18374687574888349695 >
 }
 
@@ -152,19 +152,19 @@ define <4 x i32> @v_movQi32d() nounwind {
 
 define <4 x i32> @v_movQi32e() nounwind {
 ;CHECK: v_movQi32e:
-;CHECK: vmov.i32 q{{.*}}, #0x20FF
+;CHECK: vmov.i32 q{{.*}}, #0x20ff
 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
 }
 
 define <4 x i32> @v_movQi32f() nounwind {
 ;CHECK: v_movQi32f:
-;CHECK: vmov.i32 q{{.*}}, #0x20FFFF
+;CHECK: vmov.i32 q{{.*}}, #0x20ffff
 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
 }
 
 define <2 x i64> @v_movQi64() nounwind {
 ;CHECK: v_movQi64:
-;CHECK: vmov.i64 q{{.*}}, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 q{{.*}}, #0xff0000ff0000ffff
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
@@ -182,7 +182,7 @@ entry:
 define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
 entry:
 ;CHECK: vdupnneg75:
-;CHECK: vmov.i8 d{{.*}}, #0xB5
+;CHECK: vmov.i8 d{{.*}}, #0xb5
   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
   store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
   ret void
@@ -353,3 +353,48 @@ define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
   store <4 x i16> %tmp2, <4 x i16>* %b, align 8
   ret void
 }
+
+; Use vmov.f32 to materialize f32 immediate splats
+; rdar://10437054
+define void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v2f32:
+;CHECK: vmov.f32 d{{.*}}, #-1.600000e+01
+  store <2 x float> <float -1.600000e+01, float -1.600000e+01>, <2 x float>* %p, align 4
+  ret void
+}
+
+define void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v4f32:
+;CHECK: vmov.f32 q{{.*}}, #3.100000e+01
+  store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4
+  ret void
+}
+
+define void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v4f32_undef:
+;CHECK: vmov.f32 q{{.*}}, #1.000000e+00
+  %a = load <4 x float> *%p
+  %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
+  store <4 x float> %b, <4 x float> *%p
+  ret void
+}
+
+; Vector any_extends must be selected as either vmovl.u or vmovl.s.
+; rdar://10723651
+define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
+entry:
+;CHECK: any_extend
+;CHECK: vmovl
+  %and.i186 = zext <4 x i1> %x to <4 x i32>
+  %add.i185 = sub <4 x i32> %and.i186, %y
+  %sub.i = sub <4 x i32> %add.i185, zeroinitializer
+  %add.i = add <4 x i32> %sub.i, zeroinitializer
+  %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
+  tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 1780d6e..61d89bb 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -514,3 +514,14 @@ entry:
   store <8 x i8> %10, <8 x i8>* %11, align 8
   ret void
 }
+
+; If one operand has a zero-extend and the other a sign-extend, vmull
+; cannot be used.
+define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
+; CHECK: vmullWithInconsistentExtensions
+; CHECK-NOT: vmull.s8
+  %1 = sext <8 x i8> %vec to <8 x i16>
+  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %3 = extractelement <8 x i16> %2, i32 0
+  ret i16 %3
+}
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index 34acd16..122ec03 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -148,11 +148,11 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
   ret void
 }
 
-; vrev <4 x i16> should use VREV32 and not VREV64
+; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored
+; to <2 x i16> when stored to memory.
 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
 ; CHECK: test_vrev64:
-; CHECK: vext.16
-; CHECK: vrev32.16
+; CHECK: vst1.32
 entry:
   %0 = bitcast <4 x i16>* %source to <8 x i16>*
   %tmp2 = load <8 x i16>* %0, align 4
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 915a84b..fb05a20 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -110,6 +110,24 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
+;CHECK: vst2update
+;CHECK: vst2.16 {d16, d17}, [r0]!
+	%tmp1 = load <4 x i16>* %B
+	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
+	%t5 = getelementptr inbounds i8* %out, i32 16
+	ret i8* %t5
+}
+
+define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
+;CHECK: vst2update2
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
+  %tmp1 = load <4 x float>* %this
+  call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
+  %tmp2 = getelementptr inbounds i8* %out, i32  32
+  ret i8* %tmp2
+}
+
 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 08b7232..758b355 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -45,7 +45,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 
 define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst1lanef:
-;CHECK: vst1.32 {d16[1]}, [r0]
+;CHECK: vst1.32 {d16[1]}, [r0, :32]
 	%tmp1 = load <2 x float>* %B
         %tmp2 = extractelement <2 x float> %tmp1, i32 1
         store float %tmp2, float* %A
@@ -358,6 +358,13 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+; Make sure this doesn't crash; PR10258
+define <8 x i16> @variable_insertelement(<8 x i16> %a, i16 %b, i32 %c) nounwind readnone {
+;CHECK: variable_insertelement:
+    %r = insertelement <8 x i16> %a, i16 %b, i32 %c
+    ret <8 x i16> %r
+}
+
 declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
 declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
 declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll
index 8fd99ba..2cffda3 100644
--- a/test/CodeGen/ARM/widen-vmovs.ll
+++ b/test/CodeGen/ARM/widen-vmovs.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs | FileCheck %s
 target triple = "thumbv7-apple-ios"
 
-; The 0.0 constant is loaded from the constant pool and kept in a register.
+; The 1.0e+10 constant is loaded from the constant pool and kept in a register.
 ; CHECK: %entry
-; CHECK: vldr.32 s
+; CHECK: vldr s
 ; The float loop variable is initialized with a vmovs from the constant register.
 ; The vmovs is first widened to a vmovd, and then converted to a vorr because of the v2f32 vadd.f32.
 ; CHECK: vorr [[DL:d[0-9]+]], [[DN:d[0-9]+]]
@@ -24,8 +24,8 @@ for.body4:
   br label %for.body.i
 
 for.body.i:
-  %tmp3.i = phi float [ 0.000000e+00, %for.body4 ], [ %add.i, %for.body.i ]
-  %add.i = fadd float %tmp3.i, 0.000000e+00
+  %tmp3.i = phi float [ 1.000000e+10, %for.body4 ], [ %add.i, %for.body.i ]
+  %add.i = fadd float %tmp3.i, 1.000000e+10
   %exitcond.i = icmp eq i32 undef, 41
   br i1 %exitcond.i, label %rInnerproduct.exit, label %for.body.i
 
diff --git a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
deleted file mode 100644
index 4b3d022..0000000
--- a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; This shouldn't crash
-; RUN: llc < %s -march=alpha
-
-@.str_4 = external global [44 x i8]             ; <[44 x i8]*> [#uses=0]
-
-declare void @printf(i32, ...)
-
-define void @main() {
-entry:
-        %tmp.11861 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        %tmp.19466 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        %tmp.21571 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        %tmp.36796 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        br i1 %tmp.11861, label %loopexit.2, label %no_exit.2
-
-no_exit.2:              ; preds = %entry
-        ret void
-
-loopexit.2:             ; preds = %entry
-        br i1 %tmp.19466, label %loopexit.3, label %no_exit.3.preheader
-
-no_exit.3.preheader:            ; preds = %loopexit.2
-        ret void
-
-loopexit.3:             ; preds = %loopexit.2
-        br i1 %tmp.21571, label %no_exit.6, label %no_exit.4
-
-no_exit.4:              ; preds = %loopexit.3
-        ret void
-
-no_exit.6:              ; preds = %no_exit.6, %loopexit.3
-        %tmp.30793 = icmp sgt i64 0, 0          ; <i1> [#uses=1]
-        br i1 %tmp.30793, label %loopexit.6, label %no_exit.6
-
-loopexit.6:             ; preds = %no_exit.6
-        %Z.1 = select i1 %tmp.36796, double 1.000000e+00, double 0x3FEFFF7CEDE74EAE; <double> [#uses=2]
-        tail call void (i32, ...)* @printf( i32 0, i64 0, i64 0, i64 0, double 1.000000e+00, double 1.000000e+00, double %Z.1, double %Z.1 )
-        ret void
-}
-
diff --git a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
deleted file mode 100644
index 65d2a8d..0000000
--- a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; The global symbol should be legalized
-; RUN: llc < %s -march=alpha 
-
-target datalayout = "e-p:64:64"
-        %struct.LIST_HELP = type { %struct.LIST_HELP*, i8* }
-        %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [44 x i8] }
-        %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@clause_SORT = external global [21 x %struct.LIST_HELP*]                ; <[21 x %struct.LIST_HELP*]*> [#uses=0]
-@ia_in = external global %struct._IO_FILE*              ; <%struct._IO_FILE**> [#uses=1]
-@multvec_j = external global [100 x i32]                ; <[100 x i32]*> [#uses=0]
-
-define void @main(i32 %argc) {
-clock_Init.exit:
-        %tmp.5.i575 = load i32* null            ; <i32> [#uses=1]
-        %tmp.309 = icmp eq i32 %tmp.5.i575, 0           ; <i1> [#uses=1]
-        br i1 %tmp.309, label %UnifiedReturnBlock, label %then.17
-
-then.17:                ; preds = %clock_Init.exit
-        store %struct._IO_FILE* null, %struct._IO_FILE** @ia_in
-        %savedstack = call i8* @llvm.stacksave( )               ; <i8*> [#uses=0]
-        ret void
-
-UnifiedReturnBlock:             ; preds = %clock_Init.exit
-        ret void
-}
-
-declare i8* @llvm.stacksave()
diff --git a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
deleted file mode 100644
index 45587f0..0000000
--- a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; This shouldn't crash
-; RUN: llc < %s -march=alpha 
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev6-unknown-linux-gnu"
-deplibs = [ "c", "crtend", "stdc++" ]
-        %struct.__va_list_tag = type { i8*, i32 }
-
-define i32 @emit_library_call_value(i32 %nargs, ...) {
-entry:
-        %tmp.223 = va_arg %struct.__va_list_tag* null, i32              ; <i32> [#uses=1]
-        ret i32 %tmp.223
-}
-
diff --git a/test/CodeGen/Alpha/2006-04-04-zextload.ll b/test/CodeGen/Alpha/2006-04-04-zextload.ll
deleted file mode 100644
index 671d39e..0000000
--- a/test/CodeGen/Alpha/2006-04-04-zextload.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-        %struct._Callback_list = type { %struct._Callback_list*, void (i32, %struct.ios_base*, i32)*, i32, i32 }
-        %struct._Impl = type { i32, %struct.facet**, i64, %struct.facet**, i8** }
-        %struct._Words = type { i8*, i64 }
-        %"struct.__codecvt_abstract_base<char,char,__mbstate_t>" = type { %struct.facet }
-        %"struct.basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %struct.locale }
-        %struct.facet = type { i32 (...)**, i32 }
-        %struct.ios_base = type { i32 (...)**, i64, i64, i32, i32, i32, %struct._Callback_list*, %struct._Words, [8 x %struct._Words], i32, %struct._Words*, %struct.locale }
-        %struct.locale = type { %struct._Impl* }
-        %"struct.ostreambuf_iterator<char,std::char_traits<char> >" = type { %"struct.basic_streambuf<char,std::char_traits<char> >"*, i1 }
-
-define void @_ZNKSt7num_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE15_M_insert_floatIdEES3_S3_RSt8ios_baseccT_() {
-entry:
-        %tmp234 = icmp eq i8 0, 0               ; <i1> [#uses=1]
-        br i1 %tmp234, label %cond_next243, label %cond_true235
-
-cond_true235:           ; preds = %entry
-        ret void
-
-cond_next243:           ; preds = %entry
-        %tmp428 = load i64* null                ; <i64> [#uses=1]
-        %tmp428.upgrd.1 = trunc i64 %tmp428 to i32              ; <i32> [#uses=1]
-        %tmp429 = alloca i8, i32 %tmp428.upgrd.1                ; <i8*> [#uses=0]
-        unreachable
-}
-
-
diff --git a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
deleted file mode 100644
index 5d31bc3..0000000
--- a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-
-define i32 @_ZN9__gnu_cxx18__exchange_and_addEPVii(i32* %__mem, i32 %__val) {
-entry:
-        %__tmp = alloca i32, align 4            ; <i32*> [#uses=1]
-        %tmp3 = call i32 asm sideeffect "\0A$$Lxadd_0:\0A\09ldl_l  $0,$3\0A\09addl   $0,$4,$1\0A\09stl_c  $1,$2\0A\09beq    $1,$$Lxadd_0\0A\09mb", "=&r,=*&r,=*m,m,r"( i32* %__tmp, i32* %__mem, i32* %__mem, i32 %__val )            ; <i32> [#uses=1]
-        ret i32 %tmp3
-}
-
-define void @_ZN9__gnu_cxx12__atomic_addEPVii(i32* %__mem, i32 %__val) {
-entry:
-        %tmp2 = call i32 asm sideeffect "\0A$$Ladd_1:\0A\09ldl_l  $0,$2\0A\09addl   $0,$3,$0\0A\09stl_c  $0,$1\0A\09beq    $0,$$Ladd_1\0A\09mb", "=&r,=*m,m,r"( i32* %__mem, i32* %__mem, i32 %__val )                ; <i32> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/Alpha/2006-11-01-vastart.ll b/test/CodeGen/Alpha/2006-11-01-vastart.ll
deleted file mode 100644
index 14e0bcc..0000000
--- a/test/CodeGen/Alpha/2006-11-01-vastart.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-        %struct.va_list = type { i8*, i32, i32 }
-
-define void @yyerror(i32, ...) {
-entry:
-        %va.upgrd.1 = bitcast %struct.va_list* null to i8*              ; <i8*> [#uses=1]
-        call void @llvm.va_start( i8* %va.upgrd.1 )
-        ret void
-}
-
-declare void @llvm.va_start(i8*)
-
diff --git a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
deleted file mode 100644
index b537e25..0000000
--- a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-;FIXME: this should produce no mul inst.  But not crashing will have to do for now
-
-; ModuleID = 'Output/bugpoint-train/bugpoint-reduced-simplified.bc'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
-target triple = "alphaev6-unknown-linux-gnu"
-
-define fastcc i32 @getcount(i32 %s) {
-cond_next43:		; preds = %bb27
-	%tmp431 = mul i32 %s, -3
-	ret i32 %tmp431
-}
diff --git a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
deleted file mode 100644
index 1a4b40e..0000000
--- a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
-target triple = "alphaev6-unknown-linux-gnu"
-
-define i64 @__mulvdi3(i64 %a, i64 %b) nounwind {
-entry:
-	%0 = sext i64 %a to i128		; <i128> [#uses=1]
-	%1 = sext i64 %b to i128		; <i128> [#uses=1]
-	%2 = mul i128 %1, %0		; <i128> [#uses=2]
-	%3 = lshr i128 %2, 64		; <i128> [#uses=1]
-	%4 = trunc i128 %3 to i64		; <i64> [#uses=1]
-	%5 = trunc i128 %2 to i64		; <i64> [#uses=1]
-	%6 = icmp eq i64 %4, 0		; <i1> [#uses=1]
-	br i1 %6, label %bb1, label %bb
-
-bb:		; preds = %entry
-	unreachable
-
-bb1:		; preds = %entry
-	ret i64 %5
-}
diff --git a/test/CodeGen/Alpha/2008-11-12-Add128.ll b/test/CodeGen/Alpha/2008-11-12-Add128.ll
deleted file mode 100644
index 8b9b603..0000000
--- a/test/CodeGen/Alpha/2008-11-12-Add128.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s
-; PR3044
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
-target triple = "alphaev6-unknown-linux-gnu"
-
-define i128 @__mulvti3(i128 %u, i128 %v) nounwind {
-entry:
-	%0 = load i128* null, align 16		; <i128> [#uses=1]
-	%1 = load i64* null, align 8		; <i64> [#uses=1]
-	%2 = zext i64 %1 to i128		; <i128> [#uses=1]
-	%3 = add i128 %2, %0		; <i128> [#uses=1]
-	store i128 %3, i128* null, align 16
-	unreachable
-}
diff --git a/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
deleted file mode 100644
index cfbf7fc..0000000
--- a/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-define i1 @a(float %x) {
-  %r = fcmp ult float %x, 1.0
-  ret i1 %r
-}
diff --git a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 4590f12..0000000
--- a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
deleted file mode 100644
index b838ec9..0000000
--- a/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=alpha | FileCheck %s
-
-define fastcc i64 @getcount(i64 %s) {
-	%tmp431 = mul i64 %s, 12884901888
-	ret i64 %tmp431
-}
-
-; CHECK: sll $16,33,$0
-; CHECK-NEXT: sll $16,32,$1
-; CHECK-NEXT: addq $0,$1,$0
-
diff --git a/test/CodeGen/Alpha/add.ll b/test/CodeGen/Alpha/add.ll
deleted file mode 100644
index 8a92695..0000000
--- a/test/CodeGen/Alpha/add.ll
+++ /dev/null
@@ -1,178 +0,0 @@
-;test all the shifted and signextending adds and subs with and without consts
-;
-; RUN: llc < %s -march=alpha -o %t.s
-; RUN: grep {	addl} %t.s | count 2
-; RUN: grep {	addq} %t.s | count 2
-; RUN: grep {	subl} %t.s | count 2
-; RUN: grep {	subq} %t.s | count 2
-;
-; RUN: grep {s4addl} %t.s | count 2
-; RUN: grep {s8addl} %t.s | count 2
-; RUN: grep {s4addq} %t.s | count 2
-; RUN: grep {s8addq} %t.s | count 2
-;
-; RUN: grep {s4subl} %t.s | count 2
-; RUN: grep {s8subl} %t.s | count 2
-; RUN: grep {s4subq} %t.s | count 2
-; RUN: grep {s8subq} %t.s | count 2
-
-
-define signext i32 @al(i32 signext %x.s, i32 signext %y.s) {
-entry:
-	%tmp.3.s = add i32 %y.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @ali(i32 signext %x.s)  {
-entry:
-	%tmp.3.s = add i32 100, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i64 @aq(i64 signext %x.s, i64 signext %y.s)  {
-entry:
-	%tmp.3.s = add i64 %y.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @aqi(i64 %x.s) {
-entry:
-	%tmp.3.s = add i64 100, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @sl(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.3.s = sub i32 %y.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @sli(i32 signext %x.s)  {
-entry:
-	%tmp.3.s = sub i32 %x.s, 100		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @sq(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.3.s = sub i64 %y.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @sqi(i64 %x.s) {
-entry:
-	%tmp.3.s = sub i64 %x.s, 100		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @a4l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @a8l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @a4q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = add i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @a8q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = add i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @a4li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @a8li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @a4qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = add i64 100, %tmp.1.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @a8qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = add i64 100, %tmp.1.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @s4l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @s8l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @s4q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @s8q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @s4li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @s8li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @s4qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, 100		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @s8qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, 100		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
diff --git a/test/CodeGen/Alpha/add128.ll b/test/CodeGen/Alpha/add128.ll
deleted file mode 100644
index fa3b949..0000000
--- a/test/CodeGen/Alpha/add128.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;test for ADDC and ADDE expansion
-;
-; RUN: llc < %s -march=alpha
-
-define i128 @add128(i128 %x, i128 %y) {
-entry:
-	%tmp = add i128 %y, %x
-	ret i128 %tmp
-}
diff --git a/test/CodeGen/Alpha/bic.ll b/test/CodeGen/Alpha/bic.ll
deleted file mode 100644
index 9f00350..0000000
--- a/test/CodeGen/Alpha/bic.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the bic instruction
-; RUN: llc < %s -march=alpha | grep {bic}
-
-define i64 @bar(i64 %x, i64 %y) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        %tmp.2 = and i64 %y, %tmp.1             ; <i64> [#uses=1]
-        ret i64 %tmp.2
-}
diff --git a/test/CodeGen/Alpha/bsr.ll b/test/CodeGen/Alpha/bsr.ll
deleted file mode 100644
index 14f6b46..0000000
--- a/test/CodeGen/Alpha/bsr.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; Make sure this testcase codegens the bsr instruction
-; RUN: llc < %s -march=alpha | grep bsr
-
-define internal i64 @abc(i32 %x) {
-        %tmp.2 = add i32 %x, -1         ; <i32> [#uses=1]
-        %tmp.0 = call i64 @abc( i32 %tmp.2 )            ; <i64> [#uses=1]
-        %tmp.5 = add i32 %x, -2         ; <i32> [#uses=1]
-        %tmp.3 = call i64 @abc( i32 %tmp.5 )            ; <i64> [#uses=1]
-        %tmp.6 = add i64 %tmp.0, %tmp.3         ; <i64> [#uses=1]
-        ret i64 %tmp.6
-}
-
diff --git a/test/CodeGen/Alpha/call_adj.ll b/test/CodeGen/Alpha/call_adj.ll
deleted file mode 100644
index 24e97a9..0000000
--- a/test/CodeGen/Alpha/call_adj.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-;All this should do is not crash
-;RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-
-define void @_ZNSt13basic_filebufIcSt11char_traitsIcEE22_M_convert_to_externalEPcl(i32 %f) {
-entry:
-        %tmp49 = alloca i8, i32 %f              ; <i8*> [#uses=0]
-        %tmp = call i32 null( i8* null, i8* null, i8* null, i8* null, i8* null, i8* null, i8* null )               ; <i32> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/Alpha/cmov.ll b/test/CodeGen/Alpha/cmov.ll
deleted file mode 100644
index 9b655f0..0000000
--- a/test/CodeGen/Alpha/cmov.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=alpha | not grep cmovlt
-; RUN: llc < %s -march=alpha | grep cmoveq
-
-define i64 @cmov_lt(i64 %a, i64 %c) {
-entry:
-        %tmp.1 = icmp slt i64 %c, 0             ; <i1> [#uses=1]
-        %retval = select i1 %tmp.1, i64 %a, i64 10              ; <i64> [#uses=1]
-        ret i64 %retval
-}
-
-define i64 @cmov_const(i64 %a, i64 %b, i64 %c) {
-entry:
-        %tmp.1 = icmp slt i64 %a, %b            ; <i1> [#uses=1]
-        %retval = select i1 %tmp.1, i64 %c, i64 10              ; <i64> [#uses=1]
-        ret i64 %retval
-}
-
-define i64 @cmov_lt2(i64 %a, i64 %c) {
-entry:
-        %tmp.1 = icmp sgt i64 %c, 0             ; <i1> [#uses=1]
-        %retval = select i1 %tmp.1, i64 10, i64 %a              ; <i64> [#uses=1]
-        ret i64 %retval
-}
diff --git a/test/CodeGen/Alpha/cmpbge.ll b/test/CodeGen/Alpha/cmpbge.ll
deleted file mode 100644
index e88d2ee..0000000
--- a/test/CodeGen/Alpha/cmpbge.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=alpha | grep cmpbge | count 2
-
-define i1 @test1(i64 %A, i64 %B) {
-        %C = and i64 %A, 255            ; <i64> [#uses=1]
-        %D = and i64 %B, 255            ; <i64> [#uses=1]
-        %E = icmp uge i64 %C, %D                ; <i1> [#uses=1]
-        ret i1 %E
-}
-
-define i1 @test2(i64 %a, i64 %B) {
-        %A = shl i64 %a, 1              ; <i64> [#uses=1]
-        %C = and i64 %A, 254            ; <i64> [#uses=1]
-        %D = and i64 %B, 255            ; <i64> [#uses=1]
-        %E = icmp uge i64 %C, %D                ; <i1> [#uses=1]
-        ret i1 %E
-}
diff --git a/test/CodeGen/Alpha/ctlz.ll b/test/CodeGen/Alpha/ctlz.ll
deleted file mode 100644
index aa1588a..0000000
--- a/test/CodeGen/Alpha/ctlz.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; Make sure this testcase codegens to the ctlz instruction
-; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctlz
-; RUN: llc < %s -march=alpha -mattr=+CIX | grep -i ctlz
-; RUN: llc < %s -march=alpha -mcpu=ev6 | not grep -i ctlz
-; RUN: llc < %s -march=alpha -mattr=-CIX | not grep -i ctlz
-
-declare i8 @llvm.ctlz.i8(i8)
-
-define i32 @bar(i8 %x) {
-entry:
-	%tmp.1 = call i8 @llvm.ctlz.i8( i8 %x ) 
-	%tmp.2 = sext i8 %tmp.1 to i32
-	ret i32 %tmp.2
-}
diff --git a/test/CodeGen/Alpha/ctlz_e.ll b/test/CodeGen/Alpha/ctlz_e.ll
deleted file mode 100644
index 230e096..0000000
--- a/test/CodeGen/Alpha/ctlz_e.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; Make sure this testcase does not use ctpop
-; RUN: llc < %s -march=alpha | not grep -i ctpop 
-
-declare i64 @llvm.ctlz.i64(i64)
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = call i64 @llvm.ctlz.i64( i64 %x )              ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/ctpop.ll b/test/CodeGen/Alpha/ctpop.ll
deleted file mode 100644
index f887882..0000000
--- a/test/CodeGen/Alpha/ctpop.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; Make sure this testcase codegens to the ctpop instruction
-; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctpop
-; RUN: llc < %s -march=alpha -mattr=+CIX | \
-; RUN:   grep -i ctpop
-; RUN: llc < %s -march=alpha -mcpu=ev6 | \
-; RUN:   not grep -i ctpop
-; RUN: llc < %s -march=alpha -mattr=-CIX | \
-; RUN:   not grep -i ctpop
-
-declare i64 @llvm.ctpop.i64(i64)
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = call i64 @llvm.ctpop.i64( i64 %x )             ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/dg.exp b/test/CodeGen/Alpha/dg.exp
deleted file mode 100644
index fb9f710..0000000
--- a/test/CodeGen/Alpha/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Alpha] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/CodeGen/Alpha/eqv.ll b/test/CodeGen/Alpha/eqv.ll
deleted file mode 100644
index b3413d6..0000000
--- a/test/CodeGen/Alpha/eqv.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Make sure this testcase codegens to the eqv instruction
-; RUN: llc < %s -march=alpha | grep eqv
-
-define i64 @bar(i64 %x, i64 %y) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        %tmp.2 = xor i64 %y, %tmp.1             ; <i64> [#uses=1]
-        ret i64 %tmp.2
-}
-
diff --git a/test/CodeGen/Alpha/i32_sub_1.ll b/test/CodeGen/Alpha/i32_sub_1.ll
deleted file mode 100644
index 35b1d08..0000000
--- a/test/CodeGen/Alpha/i32_sub_1.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the ctpop instruction
-; RUN: llc < %s -march=alpha | grep -i {subl \$16,1,\$0}
-
-
-define signext i32 @foo(i32 signext %x) {
-entry:
-	%tmp.1 = add i32 %x, -1		; <int> [#uses=1]
-	ret i32 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/illegal-element-type.ll b/test/CodeGen/Alpha/illegal-element-type.ll
deleted file mode 100644
index 4cf80dee..0000000
--- a/test/CodeGen/Alpha/illegal-element-type.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -mtriple=alphaev6-unknown-linux-gnu
-
-define void @foo() {
-entry:
-        br label %bb
-
-bb:             ; preds = %bb, %entry
-        br i1 false, label %bb26, label %bb
-
-bb19:           ; preds = %bb26
-        ret void
-
-bb26:           ; preds = %bb
-        br i1 false, label %bb30, label %bb19
-
-bb30:           ; preds = %bb26
-        br label %bb45
-
-bb45:           ; preds = %bb45, %bb30
-        %V.0 = phi <8 x i16> [ %tmp42, %bb45 ], [ zeroinitializer, %bb30 ]     ; <<8 x i16>> [#uses=1]
-        %tmp42 = mul <8 x i16> zeroinitializer, %V.0            ; <<8 x i16>> [#uses=1]
-        br label %bb45
-}
diff --git a/test/CodeGen/Alpha/jmp_table.ll b/test/CodeGen/Alpha/jmp_table.ll
deleted file mode 100644
index 917c932..0000000
--- a/test/CodeGen/Alpha/jmp_table.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; try to check that we have the most important instructions, which shouldn't 
-; appear otherwise
-; RUN: llc < %s -march=alpha | grep jmp
-; RUN: llc < %s -march=alpha | grep gprel32
-; RUN: llc < %s -march=alpha | grep ldl
-; RUN: llc < %s -march=alpha | grep rodata
-; END.
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-@str = internal constant [2 x i8] c"1\00"               ; <[2 x i8]*> [#uses=1]
-@str1 = internal constant [2 x i8] c"2\00"              ; <[2 x i8]*> [#uses=1]
-@str2 = internal constant [2 x i8] c"3\00"              ; <[2 x i8]*> [#uses=1]
-@str3 = internal constant [2 x i8] c"4\00"              ; <[2 x i8]*> [#uses=1]
-@str4 = internal constant [2 x i8] c"5\00"              ; <[2 x i8]*> [#uses=1]
-@str5 = internal constant [2 x i8] c"6\00"              ; <[2 x i8]*> [#uses=1]
-@str6 = internal constant [2 x i8] c"7\00"              ; <[2 x i8]*> [#uses=1]
-@str7 = internal constant [2 x i8] c"8\00"              ; <[2 x i8]*> [#uses=1]
-
-define i32 @main(i32 %x, i8** %y) {
-entry:
-        %x_addr = alloca i32            ; <i32*> [#uses=2]
-        %y_addr = alloca i8**           ; <i8***> [#uses=1]
-        %retval = alloca i32, align 4           ; <i32*> [#uses=2]
-        %tmp = alloca i32, align 4              ; <i32*> [#uses=2]
-        %foo = alloca i8*, align 8              ; <i8**> [#uses=9]
-        %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-        store i32 %x, i32* %x_addr
-        store i8** %y, i8*** %y_addr
-        %tmp.upgrd.1 = load i32* %x_addr                ; <i32> [#uses=1]
-        switch i32 %tmp.upgrd.1, label %bb15 [
-                 i32 1, label %bb
-                 i32 2, label %bb1
-                 i32 3, label %bb3
-                 i32 4, label %bb5
-                 i32 5, label %bb7
-                 i32 6, label %bb9
-                 i32 7, label %bb11
-                 i32 8, label %bb13
-        ]
-
-bb:             ; preds = %entry
-        %tmp.upgrd.2 = getelementptr [2 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
-        store i8* %tmp.upgrd.2, i8** %foo
-        br label %bb16
-
-bb1:            ; preds = %entry
-        %tmp2 = getelementptr [2 x i8]* @str1, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp2, i8** %foo
-        br label %bb16
-
-bb3:            ; preds = %entry
-        %tmp4 = getelementptr [2 x i8]* @str2, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp4, i8** %foo
-        br label %bb16
-
-bb5:            ; preds = %entry
-        %tmp6 = getelementptr [2 x i8]* @str3, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp6, i8** %foo
-        br label %bb16
-
-bb7:            ; preds = %entry
-        %tmp8 = getelementptr [2 x i8]* @str4, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp8, i8** %foo
-        br label %bb16
-
-bb9:            ; preds = %entry
-        %tmp10 = getelementptr [2 x i8]* @str5, i32 0, i64 0            ; <i8*> [#uses=1]
-        store i8* %tmp10, i8** %foo
-        br label %bb16
-
-bb11:           ; preds = %entry
-        %tmp12 = getelementptr [2 x i8]* @str6, i32 0, i64 0            ; <i8*> [#uses=1]
-        store i8* %tmp12, i8** %foo
-        br label %bb16
-
-bb13:           ; preds = %entry
-        %tmp14 = getelementptr [2 x i8]* @str7, i32 0, i64 0            ; <i8*> [#uses=1]
-        store i8* %tmp14, i8** %foo
-        br label %bb16
-
-bb15:           ; preds = %entry
-        br label %bb16
-
-bb16:           ; preds = %bb15, %bb13, %bb11, %bb9, %bb7, %bb5, %bb3, %bb1, %bb
-        %tmp17 = load i8** %foo         ; <i8*> [#uses=1]
-        %tmp18 = call i32 (...)* @print( i8* %tmp17 )           ; <i32> [#uses=0]
-        store i32 0, i32* %tmp
-        %tmp19 = load i32* %tmp         ; <i32> [#uses=1]
-        store i32 %tmp19, i32* %retval
-        br label %return
-
-return:         ; preds = %bb16
-        %retval.upgrd.3 = load i32* %retval             ; <i32> [#uses=1]
-        ret i32 %retval.upgrd.3
-}
-
-declare i32 @print(...)
-
diff --git a/test/CodeGen/Alpha/mb.ll b/test/CodeGen/Alpha/mb.ll
deleted file mode 100644
index 3268c54..0000000
--- a/test/CodeGen/Alpha/mb.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=alpha | grep mb
-
-define void @test() {
-	fence seq_cst
-	ret void
-}
diff --git a/test/CodeGen/Alpha/mul128.ll b/test/CodeGen/Alpha/mul128.ll
deleted file mode 100644
index daf8409..0000000
--- a/test/CodeGen/Alpha/mul128.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-define i128 @__mulvdi3(i128 %a, i128 %b) nounwind {
-entry:
-        %r = mul i128 %a, %b
-        ret i128 %r
-}
diff --git a/test/CodeGen/Alpha/mul5.ll b/test/CodeGen/Alpha/mul5.ll
deleted file mode 100644
index 4075dd6..0000000
--- a/test/CodeGen/Alpha/mul5.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; Make sure this testcase does not use mulq
-; RUN: llc < %s -march=alpha | not grep -i mul
-
-define i64 @foo1(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 9          ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @foo3(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 259                ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @foo4l(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 260                ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @foo8l(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 768                ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 5          ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/neg1.ll b/test/CodeGen/Alpha/neg1.ll
deleted file mode 100644
index 0db767f..0000000
--- a/test/CodeGen/Alpha/neg1.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; Make sure this testcase codegens to the lda -1 instruction
-; RUN: llc < %s -march=alpha | grep {\\-1}
-
-define i64 @bar() {
-entry:
-	ret i64 -1
-}
diff --git a/test/CodeGen/Alpha/not.ll b/test/CodeGen/Alpha/not.ll
deleted file mode 100644
index 4f0a5c2..0000000
--- a/test/CodeGen/Alpha/not.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; Make sure this testcase codegens to the ornot instruction
-; RUN: llc < %s -march=alpha | grep eqv
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/ornot.ll b/test/CodeGen/Alpha/ornot.ll
deleted file mode 100644
index f930e34..0000000
--- a/test/CodeGen/Alpha/ornot.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Make sure this testcase codegens to the ornot instruction
-; RUN: llc < %s -march=alpha | grep ornot
-
-define i64 @bar(i64 %x, i64 %y) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        %tmp.2 = or i64 %y, %tmp.1              ; <i64> [#uses=1]
-        ret i64 %tmp.2
-}
-
diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll
deleted file mode 100644
index f8d3094..0000000
--- a/test/CodeGen/Alpha/private.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; Test to make sure that the 'private' is used correctly.
-;
-; RUN: llc < %s -march=alpha > %t
-; RUN: grep \\\$foo: %t
-; RUN: grep bsr.*\\\$\\\$foo %t
-; RUN: grep \\\$baz: %t
-; RUN: grep ldah.*\\\$baz %t
-
-define private void @foo() {
-        ret void
-}
-
-@baz = private global i32 4
-
-define i32 @bar() {
-        call void @foo()
-	%1 = load i32* @baz, align 4
-        ret i32 %1
-}
diff --git a/test/CodeGen/Alpha/rpcc.ll b/test/CodeGen/Alpha/rpcc.ll
deleted file mode 100644
index d6665b5..0000000
--- a/test/CodeGen/Alpha/rpcc.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=alpha | grep rpcc
-
-declare i64 @llvm.readcyclecounter()
-
-define i64 @foo() {
-entry:
-        %tmp.1 = call i64 @llvm.readcyclecounter( )             ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/srl_and.ll b/test/CodeGen/Alpha/srl_and.ll
deleted file mode 100644
index 3042ef3..0000000
--- a/test/CodeGen/Alpha/srl_and.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Make sure this testcase codegens to the zapnot instruction
-; RUN: llc < %s -march=alpha | grep zapnot
-
-define i64 @foo(i64 %y) {
-entry:
-        %tmp = lshr i64 %y, 3           ; <i64> [#uses=1]
-        %tmp2 = and i64 %tmp, 8191              ; <i64> [#uses=1]
-        ret i64 %tmp2
-}
-
diff --git a/test/CodeGen/Alpha/sub128.ll b/test/CodeGen/Alpha/sub128.ll
deleted file mode 100644
index d26404b..0000000
--- a/test/CodeGen/Alpha/sub128.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;test for SUBC and SUBE expansion
-;
-; RUN: llc < %s -march=alpha
-
-define i128 @sub128(i128 %x, i128 %y) {
-entry:
-	%tmp = sub i128 %y, %x
-	ret i128 %tmp
-}
diff --git a/test/CodeGen/Alpha/weak.ll b/test/CodeGen/Alpha/weak.ll
deleted file mode 100644
index ff04de9..0000000
--- a/test/CodeGen/Alpha/weak.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=alpha | grep .weak.*f
-; RUN: llc < %s -march=alpha | grep .weak.*h
-
-define weak i32 @f() {
-entry:
-        unreachable
-}
-
-define void @g() {
-entry:
-        tail call void @h( )
-        ret void
-}
-
-declare extern_weak void @h()
-
diff --git a/test/CodeGen/Alpha/zapnot.ll b/test/CodeGen/Alpha/zapnot.ll
deleted file mode 100644
index a47035e..0000000
--- a/test/CodeGen/Alpha/zapnot.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the bic instruction
-; RUN: llc < %s -march=alpha | grep zapnot
-
-
-define zeroext i16 @foo(i64 %y)  {
-entry:
-        %tmp.1 = trunc i64 %y to i16         ; <ushort> [#uses=1]
-        ret i16 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/zapnot2.ll b/test/CodeGen/Alpha/zapnot2.ll
deleted file mode 100644
index cd3caae..0000000
--- a/test/CodeGen/Alpha/zapnot2.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the zapnot instruction
-; RUN: llc < %s -march=alpha | grep zapnot
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = and i64 %x, 16711935           ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/zapnot3.ll b/test/CodeGen/Alpha/zapnot3.ll
deleted file mode 100644
index f02961f..0000000
--- a/test/CodeGen/Alpha/zapnot3.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=alpha | grep zapnot
-
-;demanded bits mess up this mask in a hard to fix way
-;define i64 @foo(i64 %y) {
-;        %tmp = and i64 %y,  65535
-;        %tmp2 = shr i64 %tmp,  i8 3
-;        ret i64 %tmp2
-;}
-
-define i64 @foo2(i64 %y) {
-        %tmp = lshr i64 %y, 3           ; <i64> [#uses=1]
-        %tmp2 = and i64 %tmp, 8191              ; <i64> [#uses=1]
-        ret i64 %tmp2
-}
-
diff --git a/test/CodeGen/Alpha/zapnot4.ll b/test/CodeGen/Alpha/zapnot4.ll
deleted file mode 100644
index 89beeef..0000000
--- a/test/CodeGen/Alpha/zapnot4.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=alpha | grep zapnot
-
-define i64 @foo(i64 %y) {
-        %tmp = shl i64 %y, 3            ; <i64> [#uses=1]
-        %tmp2 = and i64 %tmp, 65535             ; <i64> [#uses=1]
-        ret i64 %tmp2
-}
diff --git a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
deleted file mode 100644
index 50fccb4..0000000
--- a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs
-; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs -regalloc=greedy
-
-; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a
-; super-register is illegally extended.
-
-define i16 @f(i16 %x1, i16 %x2, i16 %x3, i16 %x4) {
-  %y1 = add i16 %x1, 1
-  %y2 = add i16 %x2, 2
-  %y3 = add i16 %x3, 3
-  %y4 = add i16 %x4, 4
-  %z12 = add i16 %y1, %y2
-  %z34 = add i16 %y3, %y4
-  %p = add i16 %z12, %z34
-  ret i16 %p
-}
diff --git a/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
deleted file mode 100644
index e5d1637..0000000
--- a/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
-
-declare i16 @llvm.cttz.i16(i16) nounwind readnone
-
-declare i8 @llvm.cttz.i8(i8) nounwind readnone
-
-define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.cttz.i8(i8 %A)		; <i8> [#uses=1]
-	%b = call i16 @llvm.cttz.i16(i16 %B)		; <i16> [#uses=1]
-	%d = call i64 @llvm.cttz.i64(i64 %D)		; <i64> [#uses=1]
-	store i8 %a, i8* %AP
-	store i16 %b, i16* %BP
-	store i64 %d, i64* %DP
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
deleted file mode 100644
index 0b731dc..0000000
--- a/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; When joining live intervals of sub-registers, an MBB live-in list is not
-; updated properly. The register scavenger asserts on an undefined register.
-
-define i32 @foo(i8 %bar) {
-entry:
-  switch i8 %bar, label %bb1203 [
-    i8 117, label %bb1204
-    i8 85, label %bb1204
-    i8 106, label %bb1204
-  ]
-
-bb1203:                                           ; preds = %entry
-  ret i32 1
-
-bb1204:                                           ; preds = %entry, %entry, %entry
-  ret i32 2
-}
diff --git a/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
deleted file mode 100644
index dcc3ea0..0000000
--- a/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; LocalRewriter can forget to transfer a <def,dead> flag when setting up call
-; argument registers. This then causes register scavenger asserts.
-
-declare i32 @printf(i8*, i32, float)
-
-define i32 @testissue(i32 %i, float %x, float %y) {
-  br label %bb1
-
-bb1:                                              ; preds = %bb1, %0
-  %x2 = fmul float %x, 5.000000e-01               ; <float> [#uses=1]
-  %y2 = fmul float %y, 0x3FECCCCCC0000000         ; <float> [#uses=1]
-  %z2 = fadd float %x2, %y2                       ; <float> [#uses=1]
-  %z3 = fadd float undef, %z2                     ; <float> [#uses=1]
-  %i1 = shl i32 %i, 3                             ; <i32> [#uses=1]
-  %j1 = add i32 %i, 7                             ; <i32> [#uses=1]
-  %m1 = add i32 %i1, %j1                          ; <i32> [#uses=2]
-  %b = icmp sle i32 %m1, 6                        ; <i1> [#uses=1]
-  br i1 %b, label %bb1, label %bb2
-
-bb2:                                              ; preds = %bb1
-  %1 = call i32 @printf(i8* undef, i32 %m1, float %z3); <i32> [#uses=0]
-  ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
deleted file mode 100644
index b6cd2d4..0000000
--- a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; An undef argument causes a setugt node to escape instruction selection.
-
-define void @bugt() {
-cond_next305:
-  %tmp306307 = trunc i32 undef to i8              ; <i8> [#uses=1]
-  %tmp308 = icmp ugt i8 %tmp306307, 6             ; <i1> [#uses=1]
-  br i1 %tmp308, label %bb311, label %bb314
-
-bb311:                                            ; preds = %cond_next305
-  unreachable
-
-bb314:                                            ; preds = %cond_next305
-  ret void
-}
diff --git a/test/CodeGen/Blackfin/add-overflow.ll b/test/CodeGen/Blackfin/add-overflow.ll
deleted file mode 100644
index 8dcf3f8..0000000
--- a/test/CodeGen/Blackfin/add-overflow.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-	%0 = type { i24, i1 }		; type %0
-
-define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
-entry:
-	%t = call %0 @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2)		; <%0> [#uses=1]
-	%obit = extractvalue %0 %t, 1		; <i1> [#uses=1]
-	br i1 %obit, label %carry, label %normal
-
-normal:		; preds = %entry
-	ret i1 true
-
-carry:		; preds = %entry
-	ret i1 false
-}
-
-declare %0 @llvm.uadd.with.overflow.i24(i24, i24) nounwind
diff --git a/test/CodeGen/Blackfin/add.ll b/test/CodeGen/Blackfin/add.ll
deleted file mode 100644
index 3311c03..0000000
--- a/test/CodeGen/Blackfin/add.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-define i32 @add(i32 %A, i32 %B) {
-	%R = add i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
diff --git a/test/CodeGen/Blackfin/addsub-i128.ll b/test/CodeGen/Blackfin/addsub-i128.ll
deleted file mode 100644
index dd56101..0000000
--- a/test/CodeGen/Blackfin/addsub-i128.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; These functions have just the right size to annoy the register scavenger: They
-; use all the scratch registers, but not all the callee-saved registers.
-
-define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
-entry:
-	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
-	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
-	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
-	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
-	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
-	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
-	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
-	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
-	%tmp15 = add i128 %tmp12, %tmp5		; <i128> [#uses=2]
-	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
-	store i64 %tmp1617, i64* %RL
-	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
-	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
-	store i64 %tmp2122, i64* %RH
-	ret void
-}
-
-define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
-entry:
-	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
-	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
-	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
-	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
-	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
-	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
-	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
-	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
-	%tmp15 = sub i128 %tmp5, %tmp12		; <i128> [#uses=2]
-	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
-	store i64 %tmp1617, i64* %RL
-	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
-	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
-	store i64 %tmp2122, i64* %RH
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/basic-i1.ll b/test/CodeGen/Blackfin/basic-i1.ll
deleted file mode 100644
index c63adab..0000000
--- a/test/CodeGen/Blackfin/basic-i1.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin > %t
-
-define i1 @add(i1 %A, i1 %B) {
-	%R = add i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @sub(i1 %A, i1 %B) {
-	%R = sub i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @mul(i1 %A, i1 %B) {
-	%R = mul i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @sdiv(i1 %A, i1 %B) {
-	%R = sdiv i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @udiv(i1 %A, i1 %B) {
-	%R = udiv i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @srem(i1 %A, i1 %B) {
-	%R = srem i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @urem(i1 %A, i1 %B) {
-	%R = urem i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @and(i1 %A, i1 %B) {
-	%R = and i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @or(i1 %A, i1 %B) {
-	%R = or i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @xor(i1 %A, i1 %B) {
-	%R = xor i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i16.ll b/test/CodeGen/Blackfin/basic-i16.ll
deleted file mode 100644
index 541e9a8..0000000
--- a/test/CodeGen/Blackfin/basic-i16.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-define i16 @add(i16 %A, i16 %B) {
-	%R = add i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @sub(i16 %A, i16 %B) {
-	%R = sub i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @mul(i16 %A, i16 %B) {
-	%R = mul i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @sdiv(i16 %A, i16 %B) {
-	%R = sdiv i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @udiv(i16 %A, i16 %B) {
-	%R = udiv i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @srem(i16 %A, i16 %B) {
-	%R = srem i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @urem(i16 %A, i16 %B) {
-	%R = urem i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i32.ll b/test/CodeGen/Blackfin/basic-i32.ll
deleted file mode 100644
index 4b5dbfc..0000000
--- a/test/CodeGen/Blackfin/basic-i32.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define i32 @add(i32 %A, i32 %B) {
-	%R = add i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @sub(i32 %A, i32 %B) {
-	%R = sub i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @mul(i32 %A, i32 %B) {
-	%R = mul i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @sdiv(i32 %A, i32 %B) {
-	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @udiv(i32 %A, i32 %B) {
-	%R = udiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @srem(i32 %A, i32 %B) {
-	%R = srem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @urem(i32 %A, i32 %B) {
-	%R = urem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @and(i32 %A, i32 %B) {
-	%R = and i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @or(i32 %A, i32 %B) {
-	%R = or i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @xor(i32 %A, i32 %B) {
-	%R = xor i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i64.ll b/test/CodeGen/Blackfin/basic-i64.ll
deleted file mode 100644
index d4dd8e2..0000000
--- a/test/CodeGen/Blackfin/basic-i64.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define i64 @add(i64 %A, i64 %B) {
-	%R = add i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @sub(i64 %A, i64 %B) {
-	%R = sub i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @mul(i64 %A, i64 %B) {
-	%R = mul i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @sdiv(i64 %A, i64 %B) {
-	%R = sdiv i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @udiv(i64 %A, i64 %B) {
-	%R = udiv i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @srem(i64 %A, i64 %B) {
-	%R = srem i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @urem(i64 %A, i64 %B) {
-	%R = urem i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @and(i64 %A, i64 %B) {
-	%R = and i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @or(i64 %A, i64 %B) {
-	%R = or i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @xor(i64 %A, i64 %B) {
-	%R = xor i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i8.ll b/test/CodeGen/Blackfin/basic-i8.ll
deleted file mode 100644
index 2c7ce9d..0000000
--- a/test/CodeGen/Blackfin/basic-i8.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-define i8 @add(i8 %A, i8 %B) {
-	%R = add i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @sub(i8 %A, i8 %B) {
-	%R = sub i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @mul(i8 %A, i8 %B) {
-	%R = mul i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @sdiv(i8 %A, i8 %B) {
-	%R = sdiv i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @udiv(i8 %A, i8 %B) {
-	%R = udiv i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @srem(i8 %A, i8 %B) {
-	%R = srem i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @urem(i8 %A, i8 %B) {
-	%R = urem i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @and(i8 %A, i8 %B) {
-	%R = and i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @or(i8 %A, i8 %B) {
-	%R = or i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @xor(i8 %A, i8 %B) {
-	%R = xor i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
diff --git a/test/CodeGen/Blackfin/basictest.ll b/test/CodeGen/Blackfin/basictest.ll
deleted file mode 100644
index 85040df..0000000
--- a/test/CodeGen/Blackfin/basictest.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define void @void(i32, i32) {
-        add i32 0, 0            ; <i32>:3 [#uses=2]
-        sub i32 0, 4            ; <i32>:4 [#uses=2]
-        br label %5
-
-; <label>:5             ; preds = %5, %2
-        add i32 %0, %1          ; <i32>:6 [#uses=2]
-        sub i32 %6, %4          ; <i32>:7 [#uses=1]
-        icmp sle i32 %7, %3             ; <i1>:8 [#uses=1]
-        br i1 %8, label %9, label %5
-
-; <label>:9             ; preds = %5
-        add i32 %0, %1          ; <i32>:10 [#uses=0]
-        sub i32 %6, %4          ; <i32>:11 [#uses=1]
-        icmp sle i32 %11, %3            ; <i1>:12 [#uses=0]
-        ret void
-}
diff --git a/test/CodeGen/Blackfin/cmp-small-imm.ll b/test/CodeGen/Blackfin/cmp-small-imm.ll
deleted file mode 100644
index e1732a8..0000000
--- a/test/CodeGen/Blackfin/cmp-small-imm.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=bfin > %t
-
-define i1 @cmp3(i32 %A) {
-	%R = icmp uge i32 %A, 2
-	ret i1 %R
-}
diff --git a/test/CodeGen/Blackfin/cmp64.ll b/test/CodeGen/Blackfin/cmp64.ll
deleted file mode 100644
index 6c4f9c5..0000000
--- a/test/CodeGen/Blackfin/cmp64.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-; This test tries to use a JustCC register as a data operand for MOVEcc.  It
-; copies (JustCC -> DP), failing because JustCC can only be copied to D.
-; The proper solution would be to restrict the virtual register to D only.
-
-define i32 @main() {
-entry:
-	br label %loopentry
-
-loopentry:
-	%done = icmp sle i64 undef, 5
-	br i1 %done, label %loopentry, label %exit.1
-
-exit.1:
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/ct32.ll b/test/CodeGen/Blackfin/ct32.ll
deleted file mode 100644
index 363286d..0000000
--- a/test/CodeGen/Blackfin/ct32.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i32 @llvm.ctlz.i32(i32)
-declare i32 @llvm.cttz.i32(i32)
-declare i32 @llvm.ctpop.i32(i32)
-
-define i32 @ctlztest(i32 %B) {
-	%b = call i32 @llvm.ctlz.i32( i32 %B )
-	ret i32 %b
-}
-
-define i32 @cttztest(i32 %B) {
-	%b = call i32 @llvm.cttz.i32( i32 %B )
-	ret i32 %b
-}
-
-define i32 @ctpoptest(i32 %B) {
-	%b = call i32 @llvm.ctpop.i32( i32 %B )
-	ret i32 %b
-}
diff --git a/test/CodeGen/Blackfin/ct64.ll b/test/CodeGen/Blackfin/ct64.ll
deleted file mode 100644
index 7502434..0000000
--- a/test/CodeGen/Blackfin/ct64.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i64 @llvm.ctlz.i64(i64)
-declare i64 @llvm.cttz.i64(i64)
-declare i64 @llvm.ctpop.i64(i64)
-
-define i64 @ctlztest(i64 %B) {
-	%b = call i64 @llvm.ctlz.i64( i64 %B )
-	ret i64 %b
-}
-
-define i64 @cttztest(i64 %B) {
-	%b = call i64 @llvm.cttz.i64( i64 %B )
-	ret i64 %b
-}
-
-define i64 @ctpoptest(i64 %B) {
-	%b = call i64 @llvm.ctpop.i64( i64 %B )
-	ret i64 %b
-}
diff --git a/test/CodeGen/Blackfin/ctlz16.ll b/test/CodeGen/Blackfin/ctlz16.ll
deleted file mode 100644
index eb4af23..0000000
--- a/test/CodeGen/Blackfin/ctlz16.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i16 @llvm.ctlz.i16(i16)
-
-define i16 @ctlztest(i16 %B) {
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-define i16 @ctlztest_z(i16 zeroext %B) {
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
-define i16 @ctlztest_s(i16 signext %B) {
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
diff --git a/test/CodeGen/Blackfin/ctlz64.ll b/test/CodeGen/Blackfin/ctlz64.ll
deleted file mode 100644
index 3e22f88..0000000
--- a/test/CodeGen/Blackfin/ctlz64.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-@.str = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
-
-define i32 @main(i64 %arg) nounwind {
-entry:
-	%tmp47 = tail call i64 @llvm.cttz.i64(i64 %arg)		; <i64> [#uses=1]
-	%tmp48 = trunc i64 %tmp47 to i32		; <i32> [#uses=1]
-	%tmp40 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 0, i32 %tmp48, i32 0) nounwind		; <i32> [#uses=0]
-	ret i32 0
-}
-
-declare i32 @printf(i8* noalias, ...) nounwind
-
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
diff --git a/test/CodeGen/Blackfin/ctpop16.ll b/test/CodeGen/Blackfin/ctpop16.ll
deleted file mode 100644
index 8b6c07e..0000000
--- a/test/CodeGen/Blackfin/ctpop16.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i16 @llvm.ctpop.i16(i16)
-
-define i16 @ctpoptest(i16 %B) {
-	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-define i16 @ctpoptest_z(i16 zeroext %B) {
-	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
-define i16 @ctpoptest_s(i16 signext %B) {
-	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
diff --git a/test/CodeGen/Blackfin/cttz16.ll b/test/CodeGen/Blackfin/cttz16.ll
deleted file mode 100644
index 510882a..0000000
--- a/test/CodeGen/Blackfin/cttz16.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i16 @llvm.cttz.i16(i16)
-
-define i16 @cttztest(i16 %B) {
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-define i16 @cttztest_z(i16 zeroext %B) {
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
-define i16 @cttztest_s(i16 signext %B) {
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
diff --git a/test/CodeGen/Blackfin/cycles.ll b/test/CodeGen/Blackfin/cycles.ll
deleted file mode 100644
index 6451c74..0000000
--- a/test/CodeGen/Blackfin/cycles.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=bfin | FileCheck %s
-
-declare i64 @llvm.readcyclecounter()
-
-; CHECK: cycles
-; CHECK: cycles2
-define i64 @cyc64() {
-	%tmp.1 = call i64 @llvm.readcyclecounter()
-	ret i64 %tmp.1
-}
-
-; CHECK: cycles
-define i32@cyc32() {
-	%tmp.1 = call i64 @llvm.readcyclecounter()
-        %s = trunc i64 %tmp.1 to i32
-	ret i32 %s
-}
diff --git a/test/CodeGen/Blackfin/dg.exp b/test/CodeGen/Blackfin/dg.exp
deleted file mode 100644
index 5fdbe5f..0000000
--- a/test/CodeGen/Blackfin/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Blackfin] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Blackfin/double-cast.ll b/test/CodeGen/Blackfin/double-cast.ll
deleted file mode 100644
index 815ca79..0000000
--- a/test/CodeGen/Blackfin/double-cast.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() {
-	%1 = call i32 (i8*, ...)* @printf(i8* undef, double undef)
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/frameindex.ll b/test/CodeGen/Blackfin/frameindex.ll
deleted file mode 100644
index 7e677fb..0000000
--- a/test/CodeGen/Blackfin/frameindex.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-declare i32 @SIM(i8*, i8*, i32, i32, i32, [256 x i32]*, i32, i32, i32)
-
-define void @foo() {
-bb0:
-	%V = alloca [256 x i32], i32 256		; <[256 x i32]*> [#uses=1]
-	%0 = call i32 @SIM(i8* null, i8* null, i32 0, i32 0, i32 0, [256 x i32]* %V, i32 0, i32 0, i32 2)		; <i32> [#uses=0]
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i17mem.ll b/test/CodeGen/Blackfin/i17mem.ll
deleted file mode 100644
index bc5ade7..0000000
--- a/test/CodeGen/Blackfin/i17mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i17_l = external global i17		; <i17*> [#uses=1]
-@i17_s = external global i17		; <i17*> [#uses=1]
-
-define void @i17_ls() nounwind  {
-	%tmp = load i17* @i17_l		; <i17> [#uses=1]
-	store i17 %tmp, i17* @i17_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i1mem.ll b/test/CodeGen/Blackfin/i1mem.ll
deleted file mode 100644
index cb03e3d..0000000
--- a/test/CodeGen/Blackfin/i1mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i1_l = external global i1		; <i1*> [#uses=1]
-@i1_s = external global i1		; <i1*> [#uses=1]
-
-define void @i1_ls() nounwind  {
-	%tmp = load i1* @i1_l		; <i1> [#uses=1]
-	store i1 %tmp, i1* @i1_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i1ops.ll b/test/CodeGen/Blackfin/i1ops.ll
deleted file mode 100644
index 6b5612c..0000000
--- a/test/CodeGen/Blackfin/i1ops.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define i32 @adj(i32 %d.1, i32 %ct.1) {
-entry:
-	%tmp.22.not = trunc i32 %ct.1 to i1		; <i1> [#uses=1]
-	%tmp.221 = xor i1 %tmp.22.not, true		; <i1> [#uses=1]
-	%tmp.26 = or i1 false, %tmp.221		; <i1> [#uses=1]
-	%tmp.27 = zext i1 %tmp.26 to i32		; <i32> [#uses=1]
-	ret i32 %tmp.27
-}
diff --git a/test/CodeGen/Blackfin/i216mem.ll b/test/CodeGen/Blackfin/i216mem.ll
deleted file mode 100644
index 9f8cf48..0000000
--- a/test/CodeGen/Blackfin/i216mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i216_l = external global i216		; <i216*> [#uses=1]
-@i216_s = external global i216		; <i216*> [#uses=1]
-
-define void @i216_ls() nounwind  {
-	%tmp = load i216* @i216_l		; <i216> [#uses=1]
-	store i216 %tmp, i216* @i216_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i248mem.ll b/test/CodeGen/Blackfin/i248mem.ll
deleted file mode 100644
index db23f54..0000000
--- a/test/CodeGen/Blackfin/i248mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin
-@i248_l = external global i248		; <i248*> [#uses=1]
-@i248_s = external global i248		; <i248*> [#uses=1]
-
-define void @i248_ls() nounwind  {
-	%tmp = load i248* @i248_l		; <i248> [#uses=1]
-	store i248 %tmp, i248* @i248_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i256mem.ll b/test/CodeGen/Blackfin/i256mem.ll
deleted file mode 100644
index bc5ade7..0000000
--- a/test/CodeGen/Blackfin/i256mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i17_l = external global i17		; <i17*> [#uses=1]
-@i17_s = external global i17		; <i17*> [#uses=1]
-
-define void @i17_ls() nounwind  {
-	%tmp = load i17* @i17_l		; <i17> [#uses=1]
-	store i17 %tmp, i17* @i17_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i256param.ll b/test/CodeGen/Blackfin/i256param.ll
deleted file mode 100644
index df74c9a..0000000
--- a/test/CodeGen/Blackfin/i256param.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i256_s = external global i256		; <i256*> [#uses=1]
-
-define void @i256_ls(i256 %x) nounwind  {
-	store i256 %x, i256* @i256_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i56param.ll b/test/CodeGen/Blackfin/i56param.ll
deleted file mode 100644
index ca02563..0000000
--- a/test/CodeGen/Blackfin/i56param.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i56_l = external global i56		; <i56*> [#uses=1]
-@i56_s = external global i56		; <i56*> [#uses=1]
-
-define void @i56_ls(i56 %x) nounwind  {
-	store i56 %x, i56* @i56_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i8mem.ll b/test/CodeGen/Blackfin/i8mem.ll
deleted file mode 100644
index ea3a67e..0000000
--- a/test/CodeGen/Blackfin/i8mem.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-@i8_l = external global i8		; <i8*> [#uses=1]
-@i8_s = external global i8		; <i8*> [#uses=1]
-
-define void @i8_ls() nounwind  {
-	%tmp = load i8* @i8_l		; <i8> [#uses=1]
-	store i8 %tmp, i8* @i8_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/inline-asm.ll b/test/CodeGen/Blackfin/inline-asm.ll
deleted file mode 100644
index d623f6b..0000000
--- a/test/CodeGen/Blackfin/inline-asm.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -march=bfin | FileCheck %s
-
-; Standard "r"
-; CHECK: r0 = r0 + r1;
-define i32 @add_r(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = $1 + $2;", "=r,r,r"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "d"
-; CHECK: r0 = r0 - r1;
-define i32 @add_d(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = $1 - $2;", "=d,d,d"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "a" for P-regs
-; CHECK: p0 = (p0 + p1) << 1;
-define i32 @add_a(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = ($1 + $2) << 1;", "=a,a,a"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "z" for P0, P1, P2. This is not a real regclass
-; CHECK: p0 = (p0 + p1) << 2;
-define i32 @add_Z(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = ($1 + $2) << 2;", "=z,z,z"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "C" for CC. This is a single register
-; CHECK: cc = p0 < p1;
-; CHECK: r0 = cc;
-define i32 @add_C(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = $1 < $2;", "=C,z,z"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
diff --git a/test/CodeGen/Blackfin/int-setcc.ll b/test/CodeGen/Blackfin/int-setcc.ll
deleted file mode 100644
index 6bd9f86..0000000
--- a/test/CodeGen/Blackfin/int-setcc.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-define fastcc void @Evaluate() {
-entry:
-	br i1 false, label %cond_false186, label %cond_true
-
-cond_true:		; preds = %entry
-	ret void
-
-cond_false186:		; preds = %entry
-	br i1 false, label %cond_true293, label %bb203
-
-bb203:		; preds = %cond_false186
-	ret void
-
-cond_true293:		; preds = %cond_false186
-	br i1 false, label %cond_true298, label %cond_next317
-
-cond_true298:		; preds = %cond_true293
-	br i1 false, label %cond_next518, label %cond_true397.preheader
-
-cond_next317:		; preds = %cond_true293
-	ret void
-
-cond_true397.preheader:		; preds = %cond_true298
-	ret void
-
-cond_next518:		; preds = %cond_true298
-	br i1 false, label %bb1069, label %cond_true522
-
-cond_true522:		; preds = %cond_next518
-	ret void
-
-bb1069:		; preds = %cond_next518
-	br i1 false, label %cond_next1131, label %bb1096
-
-bb1096:		; preds = %bb1069
-	ret void
-
-cond_next1131:		; preds = %bb1069
-	br i1 false, label %cond_next1207, label %cond_true1150
-
-cond_true1150:		; preds = %cond_next1131
-	ret void
-
-cond_next1207:		; preds = %cond_next1131
-	br i1 false, label %cond_next1219, label %cond_true1211
-
-cond_true1211:		; preds = %cond_next1207
-	ret void
-
-cond_next1219:		; preds = %cond_next1207
-	br i1 false, label %cond_true1223, label %cond_next1283
-
-cond_true1223:		; preds = %cond_next1219
-	br i1 false, label %cond_true1254, label %cond_true1264
-
-cond_true1254:		; preds = %cond_true1223
-	br i1 false, label %bb1567, label %cond_true1369.preheader
-
-cond_true1264:		; preds = %cond_true1223
-	ret void
-
-cond_next1283:		; preds = %cond_next1219
-	ret void
-
-cond_true1369.preheader:		; preds = %cond_true1254
-	ret void
-
-bb1567:		; preds = %cond_true1254
-	%tmp1605 = load i8* null		; <i8> [#uses=1]
-	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]
-	br i1 %tmp1606, label %cond_next1637, label %cond_true1607
-
-cond_true1607:		; preds = %bb1567
-	ret void
-
-cond_next1637:		; preds = %bb1567
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/invalid-apint.ll b/test/CodeGen/Blackfin/invalid-apint.ll
deleted file mode 100644
index a8c01ba..0000000
--- a/test/CodeGen/Blackfin/invalid-apint.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-; Assertion failed: (width < BitWidth && "Invalid APInt Truncate request"),
-; function trunc, file APInt.cpp, line 956.
-
-@str2 = external global [29 x i8]
-
-define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
-entry:
-	%tmp17 = sext i8 %a13 to i32
-	%tmp23 = call i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0)
-	ret void
-}
-
-declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Blackfin/jumptable.ll b/test/CodeGen/Blackfin/jumptable.ll
deleted file mode 100644
index 263533c..0000000
--- a/test/CodeGen/Blackfin/jumptable.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
-
-; CHECK: .section .rodata
-; CHECK: JTI0_0:
-; CHECK: .long .BB0_1
-
-define i32 @oper(i32 %op, i32 %A, i32 %B) {
-entry:
-        switch i32 %op, label %bbx [
-               i32 1 , label %bb1
-               i32 2 , label %bb2
-               i32 3 , label %bb3
-               i32 4 , label %bb4
-               i32 5 , label %bb5
-               i32 6 , label %bb6
-               i32 7 , label %bb7
-               i32 8 , label %bb8
-               i32 9 , label %bb9
-               i32 10, label %bb10
-        ]
-bb1:
-	%R1 = add i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R1
-bb2:
-	%R2 = sub i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R2
-bb3:
-	%R3 = mul i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R3
-bb4:
-	%R4 = sdiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R4
-bb5:
-	%R5 = udiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R5
-bb6:
-	%R6 = srem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R6
-bb7:
-	%R7 = urem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R7
-bb8:
-	%R8 = and i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R8
-bb9:
-	%R9 = or i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R9
-bb10:
-	%R10 = xor i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R10
-bbx:
-        ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/large-switch.ll b/test/CodeGen/Blackfin/large-switch.ll
deleted file mode 100644
index 02d32ef..0000000
--- a/test/CodeGen/Blackfin/large-switch.ll
+++ /dev/null
@@ -1,187 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-; The switch expansion uses a dynamic shl, and it produces a jumptable
-
-define void @athlon_fp_unit_ready_cost() {
-entry:
-	switch i32 0, label %UnifiedReturnBlock [
-		i32 -1, label %bb2063
-		i32 19, label %bb2035
-		i32 20, label %bb2035
-		i32 21, label %bb2035
-		i32 23, label %bb2035
-		i32 24, label %bb2035
-		i32 27, label %bb2035
-		i32 32, label %bb2035
-		i32 33, label %bb1994
-		i32 35, label %bb2035
-		i32 36, label %bb1994
-		i32 90, label %bb1948
-		i32 94, label %bb1948
-		i32 95, label %bb1948
-		i32 133, label %bb1419
-		i32 135, label %bb1238
-		i32 136, label %bb1238
-		i32 137, label %bb1238
-		i32 138, label %bb1238
-		i32 139, label %bb1201
-		i32 140, label %bb1201
-		i32 141, label %bb1154
-		i32 142, label %bb1126
-		i32 144, label %bb1201
-		i32 145, label %bb1126
-		i32 146, label %bb1201
-		i32 147, label %bb1126
-		i32 148, label %bb1201
-		i32 149, label %bb1126
-		i32 150, label %bb1201
-		i32 151, label %bb1126
-		i32 152, label %bb1096
-		i32 153, label %bb1096
-		i32 154, label %bb1096
-		i32 157, label %bb1096
-		i32 158, label %bb1096
-		i32 159, label %bb1096
-		i32 162, label %bb1096
-		i32 163, label %bb1096
-		i32 164, label %bb1096
-		i32 167, label %bb1201
-		i32 168, label %bb1201
-		i32 170, label %bb1201
-		i32 171, label %bb1201
-		i32 173, label %bb1201
-		i32 174, label %bb1201
-		i32 176, label %bb1201
-		i32 177, label %bb1201
-		i32 179, label %bb993
-		i32 180, label %bb993
-		i32 181, label %bb993
-		i32 182, label %bb993
-		i32 183, label %bb993
-		i32 184, label %bb993
-		i32 365, label %bb1126
-		i32 366, label %bb1126
-		i32 367, label %bb1126
-		i32 368, label %bb1126
-		i32 369, label %bb1126
-		i32 370, label %bb1126
-		i32 371, label %bb1126
-		i32 372, label %bb1126
-		i32 373, label %bb1126
-		i32 384, label %bb1126
-		i32 385, label %bb1126
-		i32 386, label %bb1126
-		i32 387, label %bb1126
-		i32 388, label %bb1126
-		i32 389, label %bb1126
-		i32 390, label %bb1126
-		i32 391, label %bb1126
-		i32 392, label %bb1126
-		i32 525, label %bb919
-		i32 526, label %bb839
-		i32 528, label %bb919
-		i32 529, label %bb839
-		i32 532, label %cond_next6.i97
-		i32 533, label %cond_next6.i81
-		i32 534, label %bb495
-		i32 536, label %cond_next6.i81
-		i32 537, label %cond_next6.i81
-		i32 538, label %bb396
-		i32 539, label %bb288
-		i32 541, label %bb396
-		i32 542, label %bb396
-		i32 543, label %bb396
-		i32 544, label %bb396
-		i32 545, label %bb189
-		i32 546, label %cond_next6.i
-		i32 547, label %bb189
-		i32 548, label %cond_next6.i
-		i32 549, label %bb189
-		i32 550, label %cond_next6.i
-		i32 551, label %bb189
-		i32 552, label %cond_next6.i
-		i32 553, label %bb189
-		i32 554, label %cond_next6.i
-		i32 555, label %bb189
-		i32 556, label %cond_next6.i
-		i32 557, label %bb189
-		i32 558, label %cond_next6.i
-		i32 618, label %bb40
-		i32 619, label %bb18
-		i32 620, label %bb40
-		i32 621, label %bb10
-		i32 622, label %bb10
-	]
-
-bb10:
-	ret void
-
-bb18:
-	ret void
-
-bb40:
-	ret void
-
-cond_next6.i:
-	ret void
-
-bb189:
-	ret void
-
-bb288:
-	ret void
-
-bb396:
-	ret void
-
-bb495:
-	ret void
-
-cond_next6.i81:
-	ret void
-
-cond_next6.i97:
-	ret void
-
-bb839:
-	ret void
-
-bb919:
-	ret void
-
-bb993:
-	ret void
-
-bb1096:
-	ret void
-
-bb1126:
-	ret void
-
-bb1154:
-	ret void
-
-bb1201:
-	ret void
-
-bb1238:
-	ret void
-
-bb1419:
-	ret void
-
-bb1948:
-	ret void
-
-bb1994:
-	ret void
-
-bb2035:
-	ret void
-
-bb2063:
-	ret void
-
-UnifiedReturnBlock:
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/load-i16.ll b/test/CodeGen/Blackfin/load-i16.ll
deleted file mode 100644
index eb18d41..0000000
--- a/test/CodeGen/Blackfin/load-i16.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; This somewhat contrived function heavily exercises register classes
-; It can trick -join-cross-class-copies into making illegal joins
-
-define void @f(i16** nocapture %p) nounwind readonly {
-entry:
-	%tmp1 = load i16** %p		; <i16*> [#uses=1]
-	%tmp2 = load i16* %tmp1		; <i16> [#uses=1]
-	%ptr = getelementptr i16* %tmp1, i16 %tmp2
-    store i16 %tmp2, i16* %ptr
-    ret void
-}
diff --git a/test/CodeGen/Blackfin/logic-i16.ll b/test/CodeGen/Blackfin/logic-i16.ll
deleted file mode 100644
index e44672f..0000000
--- a/test/CodeGen/Blackfin/logic-i16.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-define i16 @and(i16 %A, i16 %B) {
-	%R = and i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @or(i16 %A, i16 %B) {
-	%R = or i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @xor(i16 %A, i16 %B) {
-	%R = xor i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
diff --git a/test/CodeGen/Blackfin/many-args.ll b/test/CodeGen/Blackfin/many-args.ll
deleted file mode 100644
index 2df32ca..0000000
--- a/test/CodeGen/Blackfin/many-args.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-	%0 = type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
-	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
-
-define i32 @main(i32 %argc.1, i8** %argv.1) {
-entry:
-	%tmp.218 = load float* null		; <float> [#uses=1]
-	%tmp.219 = getelementptr %0* null, i64 0, i32 6		; <float*> [#uses=1]
-	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]
-	%tmp.221 = getelementptr %0* null, i64 0, i32 7		; <float*> [#uses=1]
-	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]
-	%tmp.223 = getelementptr %0* null, i64 0, i32 8		; <float*> [#uses=1]
-	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]
-	%tmp.225 = getelementptr %0* null, i64 0, i32 9		; <float*> [#uses=1]
-	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]
-	%tmp.227 = getelementptr %0* null, i64 0, i32 10		; <float*> [#uses=1]
-	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]
-	call void @place_and_route(i32 0, i32 0, float 0.000000e+00, i32 0, i32 0, i8* null, i32 0, i32 0, i8* null, i8* null, i8* null, i8* null, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i16 0, i16 0, i16 0, float 0.000000e+00, float 0.000000e+00, %struct..s_segment_inf* null, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228)
-	ret i32 0
-}
-
-declare void @place_and_route(i32, i32, float, i32, i32, i8*, i32, i32, i8*, i8*, i8*, i8*, i32, i32, i32, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, float, float, float, i32, i32, i16, i16, i16, float, float, %struct..s_segment_inf*, i32, float, float, float, float, float, float, float, float, float, float)
diff --git a/test/CodeGen/Blackfin/mulhu.ll b/test/CodeGen/Blackfin/mulhu.ll
deleted file mode 100644
index 72bacee..0000000
--- a/test/CodeGen/Blackfin/mulhu.ll
+++ /dev/null
@@ -1,106 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
-	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
-	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
-	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
-	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
-	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
-	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
-	%struct.cost_pair = type { %struct.iv_cand*, i32, %struct.bitmap_head_def* }
-	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
-	%struct.def_operand_ptr = type { %struct.tree_node** }
-	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
-	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
-	%struct.edge_def_insns = type { %struct.rtx_def* }
-	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
-	%struct.eh_status = type opaque
-	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
-	%struct.et_node = type opaque
-	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
-	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
-	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
-	%struct.initial_value_struct = type opaque
-	%struct.iv = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i1, i1, i32 }
-	%struct.iv_cand = type { i32, i1, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.iv*, i32 }
-	%struct.iv_use = type { i32, i32, %struct.iv*, %struct.tree_node*, %struct.tree_node**, %struct.bitmap_head_def*, i32, %struct.cost_pair*, %struct.iv_cand* }
-	%struct.ivopts_data = type { %struct.loop*, %struct.htab*, i32, %struct.version_info*, %struct.bitmap_head_def*, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.bitmap_head_def*, i1 }
-	%struct.lang_decl = type opaque
-	%struct.language_function = type opaque
-	%struct.location_t = type { i8*, i32 }
-	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.lpt_decision, i32, i32, %struct.edge_def**, i32, %struct.basic_block_def*, %struct.basic_block_def*, i32, %struct.edge_def**, i32, %struct.edge_def**, i32, %struct.simple_bitmap_def*, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i32, i8*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i1 }
-	%struct.lpt_decision = type { i32, i32 }
-	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
-	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
-	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
-	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
-	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
-	%struct.rtx_def = type { i16, i8, i8, %struct.u }
-	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
-	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
-	%struct.stack_local_entry = type opaque
-	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
-	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
-	%struct.temp_slot = type opaque
-	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
-	%struct.tree_ann_d = type { %struct.stmt_ann_d }
-	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
-	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
-	%struct.tree_decl_u1 = type { i64 }
-	%struct.tree_decl_u2 = type { %struct.function* }
-	%struct.tree_node = type { %struct.tree_decl }
-	%struct.u = type { [1 x i64] }
-	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
-	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
-	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
-	%struct.varasm_status = type opaque
-	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
-	%struct.version_info = type { %struct.tree_node*, %struct.iv*, i1, i32, i1 }
-	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
-
-define i1 @determine_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand) {
-entry:
-	switch i32 0, label %bb91 [
-		i32 0, label %bb
-		i32 1, label %bb6
-		i32 3, label %cond_next135
-	]
-
-bb:		; preds = %entry
-	ret i1 false
-
-bb6:		; preds = %entry
-	br i1 false, label %bb87, label %cond_next27
-
-cond_next27:		; preds = %bb6
-	br i1 false, label %cond_true30, label %cond_next55
-
-cond_true30:		; preds = %cond_next27
-	br i1 false, label %cond_next41, label %cond_true35
-
-cond_true35:		; preds = %cond_true30
-	ret i1 false
-
-cond_next41:		; preds = %cond_true30
-	%tmp44 = call i32 @force_var_cost(%struct.ivopts_data* %data, %struct.tree_node* null, %struct.bitmap_head_def** null)		; <i32> [#uses=1]
-	%tmp46 = udiv i32 %tmp44, 5		; <i32> [#uses=1]
-	call void @set_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand, i32 %tmp46, %struct.bitmap_head_def* null)
-	br label %bb87
-
-cond_next55:		; preds = %cond_next27
-	ret i1 false
-
-bb87:		; preds = %cond_next41, %bb6
-	ret i1 false
-
-bb91:		; preds = %entry
-	ret i1 false
-
-cond_next135:		; preds = %entry
-	ret i1 false
-}
-
-declare void @set_use_iv_cost(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*, i32, %struct.bitmap_head_def*)
-
-declare i32 @force_var_cost(%struct.ivopts_data*, %struct.tree_node*, %struct.bitmap_head_def**)
diff --git a/test/CodeGen/Blackfin/printf.ll b/test/CodeGen/Blackfin/printf.ll
deleted file mode 100644
index 9e54b73..0000000
--- a/test/CodeGen/Blackfin/printf.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@.str_1 = external constant [42 x i8]		; <[42 x i8]*> [#uses=1]
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main(i32 %argc.1, i8** %argv.1) {
-entry:
-	%tmp.16 = call i32 (i8*, ...)* @printf(i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 0, i32 0, i64 0, i64 0)
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/printf2.ll b/test/CodeGen/Blackfin/printf2.ll
deleted file mode 100644
index 7ac7e80..0000000
--- a/test/CodeGen/Blackfin/printf2.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() {
-	%1 = call i32 (i8*, ...)* @printf(i8* undef, i1 undef)
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll
deleted file mode 100644
index 1ac1408..0000000
--- a/test/CodeGen/Blackfin/promote-logic.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=bfin 
-
-; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
-; operation after LegalizeOps.
-
-define void @mng_display_bgr565() {
-entry:
-	br i1 false, label %bb.preheader, label %return
-
-bb.preheader:
-	br i1 false, label %cond_true48, label %cond_next80
-
-cond_true48:
-	%tmp = load i8* null
-	%tmp51 = zext i8 %tmp to i16
-	%tmp99 = load i8* null
-	%tmp54 = bitcast i8 %tmp99 to i8
-	%tmp54.upgrd.1 = zext i8 %tmp54 to i32
-	%tmp55 = lshr i32 %tmp54.upgrd.1, 3
-	%tmp55.upgrd.2 = trunc i32 %tmp55 to i16
-	%tmp52 = shl i16 %tmp51, 5
-	%tmp56 = and i16 %tmp55.upgrd.2, 28
-	%tmp57 = or i16 %tmp56, %tmp52
-	%tmp60 = zext i16 %tmp57 to i32
-	%tmp62 = xor i32 0, 65535
-	%tmp63 = mul i32 %tmp60, %tmp62
-	%tmp65 = add i32 0, %tmp63
-	%tmp69 = add i32 0, %tmp65
-	%tmp70 = lshr i32 %tmp69, 16
-	%tmp70.upgrd.3 = trunc i32 %tmp70 to i16
-	%tmp75 = lshr i16 %tmp70.upgrd.3, 8
-	%tmp75.upgrd.4 = trunc i16 %tmp75 to i8
-	%tmp76 = lshr i8 %tmp75.upgrd.4, 5
-	store i8 %tmp76, i8* null
-	ret void
-
-cond_next80:
-	ret void
-
-return:
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/promote-setcc.ll b/test/CodeGen/Blackfin/promote-setcc.ll
deleted file mode 100644
index d344fad..0000000
--- a/test/CodeGen/Blackfin/promote-setcc.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=bfin > %t
-
-; The DAG combiner may sometimes create illegal i16 SETCC operations when run
-; after LegalizeOps. Try to tease out all the optimizations in
-; TargetLowering::SimplifySetCC.
-
-@x = external global i16
-@y = external global i16
-
-declare i16 @llvm.ctlz.i16(i16)
-
-; Case (srl (ctlz x), 5) == const
-; Note: ctlz is promoted, so this test does not catch the DAG combiner
-define i1 @srl_ctlz_const() {
-  %x = load i16* @x
-  %c = call i16 @llvm.ctlz.i16(i16 %x)
-  %s = lshr i16 %c, 4
-  %r = icmp eq i16 %s, 1
-  ret i1 %r
-}
-
-; Case (zext x) == const
-define i1 @zext_const() {
-  %x = load i16* @x
-  %r = icmp ugt i16 %x, 1
-  ret i1 %r
-}
-
-; Case (sext x) == const
-define i1 @sext_const() {
-  %x = load i16* @x
-  %y = add i16 %x, 1
-  %x2 = sext i16 %y to i32
-  %r = icmp ne i32 %x2, -1
-  ret i1 %r
-}
-
diff --git a/test/CodeGen/Blackfin/sdiv.ll b/test/CodeGen/Blackfin/sdiv.ll
deleted file mode 100644
index 1426655..0000000
--- a/test/CodeGen/Blackfin/sdiv.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-define i32 @sdiv(i32 %A, i32 %B) {
-	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
diff --git a/test/CodeGen/Blackfin/simple-select.ll b/test/CodeGen/Blackfin/simple-select.ll
deleted file mode 100644
index 0f7f270..0000000
--- a/test/CodeGen/Blackfin/simple-select.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-declare i1 @foo()
-
-define i32 @test(i32* %A, i32* %B) {
-	%a = load i32* %A
-	%b = load i32* %B
-	%cond = call i1 @foo()
-	%c = select i1 %cond, i32 %a, i32 %b
-	ret i32 %c
-}
diff --git a/test/CodeGen/Blackfin/switch.ll b/test/CodeGen/Blackfin/switch.ll
deleted file mode 100644
index 3680ec6..0000000
--- a/test/CodeGen/Blackfin/switch.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-define i32 @foo(i32 %A, i32 %B, i32 %C) {
-entry:
-	switch i32 %A, label %out [
-		i32 1, label %bb
-		i32 0, label %bb13
-	]
-
-bb:		; preds = %entry
-	ret i32 1
-
-bb13:		; preds = %entry
-	ret i32 1
-
-out:		; preds = %entry
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/switch2.ll b/test/CodeGen/Blackfin/switch2.ll
deleted file mode 100644
index 7877bce..0000000
--- a/test/CodeGen/Blackfin/switch2.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-define i8* @FindChar(i8* %CurPtr) {
-entry:
-	br label %bb
-
-bb:		; preds = %bb, %entry
-	%tmp = load i8* null		; <i8> [#uses=1]
-	switch i8 %tmp, label %bb [
-		i8 0, label %bb7
-		i8 120, label %bb7
-	]
-
-bb7:		; preds = %bb, %bb
-	ret i8* null
-}
diff --git a/test/CodeGen/Blackfin/sync-intr.ll b/test/CodeGen/Blackfin/sync-intr.ll
deleted file mode 100644
index 0b103a3..0000000
--- a/test/CodeGen/Blackfin/sync-intr.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
-
-define void @f() nounwind {
-entry:
-        ; CHECK-NOT: llvm.bfin
-        ; CHECK: csync;
-        call void @llvm.bfin.csync()
-
-        ; CHECK-NOT: llvm.bfin
-        ; CHECK: ssync;
-        call void @llvm.bfin.ssync()
-	ret void
-}
-
-declare void @llvm.bfin.csync() nounwind
-declare void @llvm.bfin.ssync() nounwind
diff --git a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll b/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
deleted file mode 100644
index 0b06041..0000000
--- a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Make sure that global variables do not collide if they have the same name,
-; but different types.
-
-@X = global i32 5               ; <i32*> [#uses=0]
-@X.upgrd.1 = global i64 7               ; <i64*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll b/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
deleted file mode 100644
index a9f54e4..0000000
--- a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This case was emitting code that looked like this:
-; ...
-;   llvm_BB1:       /* no statement here */
-; }
-; 
-; Which the Sun C compiler rejected, so now we are sure to put a return 
-; instruction in there if the basic block is otherwise empty.
-;
-define void @test() {
-        br label %BB1
-
-BB2:            ; preds = %BB2
-        br label %BB2
-
-BB1:            ; preds = %0
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll b/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
deleted file mode 100644
index 2afb1a0..0000000
--- a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Test const pointer refs & forward references
-
-@t3 = global i32* @t1           ; <i32**> [#uses=0]
-@t1 = global i32 4              ; <i32*> [#uses=1]
-
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
deleted file mode 100644
index b71cf07..0000000
--- a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-global i32* bitcast (float* @2 to i32*)   ;; Forward numeric reference
-global float* @2                       ;; Duplicate forward numeric reference
-global float 0.0
-
-@array = constant [2 x i32] [ i32 12, i32 52 ]
-@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)
diff --git a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll b/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
deleted file mode 100644
index b5a1f0b..0000000
--- a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-@sptr1 = global [11 x i8]* @somestr         ;; Forward ref to a constant
-@somestr = constant [11 x i8] c"hello world"
diff --git a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll b/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
deleted file mode 100644
index 10b9fe2..0000000
--- a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-@fptr = global void ()* @f       ;; Forward ref method defn
-declare void @f()               ;; External method
-
diff --git a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
deleted file mode 100644
index 0827423..0000000
--- a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-@array = constant [2 x i32] [ i32 12, i32 52 ]          ; <[2 x i32]*> [#uses=1]
-@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)         ; <i32**> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll b/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
deleted file mode 100644
index 59aafd5..0000000
--- a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c
-
-; The C Writer bombs on this testcase because it tries the print the prototype
-; for the test function, which tries to print the argument name.  The function
-; has not been incorporated into the slot calculator, so after it does the name
-; lookup, it tries a slot calculator lookup, which fails.
-
-define i32 @test(i32) {
-        ret i32 0
-}
diff --git a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll b/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
deleted file mode 100644
index 6c4d629..0000000
--- a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Indirect function call test... found by Joel & Brian
-;
-
-@taskArray = external global i32*               ; <i32**> [#uses=1]
-
-define void @test(i32 %X) {
-        %Y = add i32 %X, -1             ; <i32> [#uses=1]
-        %cast100 = sext i32 %Y to i64           ; <i64> [#uses=1]
-        %gep100 = getelementptr i32** @taskArray, i64 %cast100          ; <i32**> [#uses=1]
-        %fooPtr = load i32** %gep100            ; <i32*> [#uses=1]
-        %cast101 = bitcast i32* %fooPtr to void (i32)*          ; <void (i32)*> [#uses=1]
-        call void %cast101( i32 1000 )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll b/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
deleted file mode 100644
index 1187a37..0000000
--- a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This testcase fails because the C backend does not arrange to output the 
-; contents of a structure type before it outputs the structure type itself.
-
-@Y = external global { { i32 } }                ; <{ { i32 } }*> [#uses=0]
-@X = external global { float }          ; <{ float }*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll b/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
deleted file mode 100644
index 021adb9..0000000
--- a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-define void @test() {
-        %X = alloca [4 x i32]           ; <[4 x i32]*> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll b/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
deleted file mode 100644
index e915cd2..0000000
--- a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=c
-
-
-declare void @foo(...)
-
-
diff --git a/test/CodeGen/CBackend/2002-10-16-External.ll b/test/CodeGen/CBackend/2002-10-16-External.ll
deleted file mode 100644
index 2cdd15c..0000000
--- a/test/CodeGen/CBackend/2002-10-16-External.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-@bob = external global i32              ; <i32*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll b/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
deleted file mode 100644
index 82d594f..0000000
--- a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=c
-
-@testString = internal constant [18 x i8] c"Escaped newline\5Cn\00"             ; <[18 x i8]*> [#uses=1]
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() {
-        call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @testString, i64 0, i64 0) )                ; <i32>:1 [#uses=0]
-        ret i32 0
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll b/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
deleted file mode 100644
index 92d582d..0000000
--- a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Apparently this constant was unsigned in ISO C 90, but not in C 99.
-
-define i32 @foo() {
-        ret i32 -2147483648
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll b/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
deleted file mode 100644
index a42dc27..0000000
--- a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This testcase breaks the C backend, because gcc doesn't like (...) functions
-; with no arguments at all.
-
-define void @test(i64 %Ptr) {
-        %P = inttoptr i64 %Ptr to void (...)*           ; <void (...)*> [#uses=1]
-        call void (...)* %P( i64 %Ptr )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll b/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
deleted file mode 100644
index 19c7840..0000000
--- a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-; The C backend was dying when there was no typename for a struct type!
-
-declare i32 @test(i32, { [32 x i32] }*)
diff --git a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll b/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
deleted file mode 100644
index 048e045..0000000
--- a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=c
-
-%X = type { i32, float }
-
-define void @test() {
-        getelementptr %X* null, i64 0, i32 1            ; <float*>:1 [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll b/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
deleted file mode 100644
index 6197b30..0000000
--- a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Make sure hex constant does not continue into a valid hexadecimal letter/number
-@version = global [3 x i8] c"\001\00"
diff --git a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll b/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
deleted file mode 100644
index f6177ea..0000000
--- a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llc < %s -march=c
-
-@version = global [3 x i8] c"1\00\00"
diff --git a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll b/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
deleted file mode 100644
index f0b1bbc..0000000
--- a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=c
-
-declare i32 @callee(i32, i32)
-
-define i32 @test(i32 %X) {
-; <label>:0
-        %A = invoke i32 @callee( i32 %X, i32 5 )
-                        to label %Ok unwind label %Threw                ; <i32> [#uses=1]
-
-Ok:             ; preds = %Threw, %0
-        %B = phi i32 [ %A, %0 ], [ -1, %Threw ]         ; <i32> [#uses=1]
-        ret i32 %B
-
-Threw:          ; preds = %0
-        br label %Ok
-}
-
diff --git a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll b/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
deleted file mode 100644
index 4bd1da2..0000000
--- a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llc < %s -march=c | grep common | grep X
-
-@X = linkonce global i32 5
diff --git a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll b/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
deleted file mode 100644
index 0fbb3fe..0000000
--- a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This is a non-normal FP value: it's a nan.
-@NAN = global { float } { float 0x7FF8000000000000 }            ; <{ float }*> [#uses=0]
-@NANs = global { float } { float 0x7FFC000000000000 }           ; <{ float }*> [#uses=0]
diff --git a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll b/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
deleted file mode 100644
index 9195634..0000000
--- a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-%A = type { i32, i8*, { i32, i32, i32, i32, i32, i32, i32, i32 }*, i16 }
-
-define void @test(%A*) {
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll b/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
deleted file mode 100644
index b4389ff..0000000
--- a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c
-
-; reduced from DOOM.
-        %union._XEvent = type { i32 }
-@.X_event_9 = global %union._XEvent zeroinitializer             ; <%union._XEvent*> [#uses=1]
-
-define void @I_InitGraphics() {
-shortcirc_next.3:
-        %tmp.319 = load i32* getelementptr ({ i32, i32 }* bitcast (%union._XEvent* @.X_event_9 to { i32, i32 }*), i64 0, i32 1)               ; <i32> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll b/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
deleted file mode 100644
index 6a26291..0000000
--- a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=c
-@y = weak global i8 0           ; <i8*> [#uses=1]
-
-define i32 @testcaseshr() {
-entry:
-        ret i32 lshr (i32 ptrtoint (i8* @y to i32), i32 4)
-}
-
-define i32 @testcaseshl() {
-entry:
-        ret i32 shl (i32 ptrtoint (i8* @y to i32), i32 4)
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
deleted file mode 100644
index 142fbd8..0000000
--- a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=c | grep builtin_return_address
-
-declare i8* @llvm.returnaddress(i32)
-
-declare i8* @llvm.frameaddress(i32)
-
-define i8* @test1() {
-        %X = call i8* @llvm.returnaddress( i32 0 )              ; <i8*> [#uses=1]
-        ret i8* %X
-}
-
-define i8* @test2() {
-        %X = call i8* @llvm.frameaddress( i32 0 )               ; <i8*> [#uses=1]
-        ret i8* %X
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll b/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
deleted file mode 100644
index d1c6861..0000000
--- a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; The intrinsic lowering pass was lowering intrinsics like llvm.memcpy to 
-; explicitly specified prototypes, inserting a new function if the old one
-; didn't exist.  This caused there to be two external memcpy functions in 
-; this testcase for example, which caused the CBE to mangle one, screwing
-; everything up.  :(  Test that this does not happen anymore.
-;
-; RUN: llc < %s -march=c | not grep _memcpy
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare float* @memcpy(i32*, i32, i32)
-
-define i32 @test(i8* %A, i8* %B, i32* %C) {
-        call float* @memcpy( i32* %C, i32 4, i32 17 )           ; <float*>:1 [#uses=0]
-        call void @llvm.memcpy.i32( i8* %A, i8* %B, i32 123, i32 14 )
-        ret i32 7
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll b/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
deleted file mode 100644
index 6fceb08..0000000
--- a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; This is a non-normal FP value
-; RUN: llc < %s -march=c | grep FPConstant | grep static
-
-define float @func() {
-        ret float 0xFFF0000000000000
-}
-
-define double @func2() {
-        ret double 0xFF20000000000000
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll b/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
deleted file mode 100644
index cf59634..0000000
--- a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=c | grep func1 | grep WEAK
-
-define linkonce i32 @func1() {
-        ret i32 5
-}
-
diff --git a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll b/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
deleted file mode 100644
index 3ee23d1..0000000
--- a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c
-
-declare void @llvm.va_end(i8*)
-
-define void @test() {
-        %va.upgrd.1 = bitcast i8* null to i8*           ; <i8*> [#uses=1]
-        call void @llvm.va_end( i8* %va.upgrd.1 )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll b/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
deleted file mode 100644
index af8f441..0000000
--- a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; The CBE should not emit code that casts the function pointer.  This causes
-; GCC to get testy and insert trap instructions instead of doing the right
-; thing. :(
-; RUN: llc < %s -march=c
-
-declare void @external(i8*)
-
-define i32 @test(i32* %X) {
-        %RV = call i32 bitcast (void (i8*)* @external to i32 (i32*)*)( i32* %X )                ; <i32> [#uses=1]
-        ret i32 %RV
-}
-
diff --git a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll b/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
deleted file mode 100644
index 78e9bac..0000000
--- a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c | not grep extern.*msg
-; PR472
-
-@msg = internal global [6 x i8] c"hello\00"             ; <[6 x i8]*> [#uses=1]
-
-define i8* @foo() {
-entry:
-        ret i8* getelementptr ([6 x i8]* @msg, i32 0, i32 0)
-}
-
diff --git a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll b/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
deleted file mode 100644
index 57a9adc..0000000
--- a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-define i32 @foo() {
-        ret i32 and (i32 123456, i32 ptrtoint (i32 ()* @foo to i32))
-}
diff --git a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll b/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
deleted file mode 100644
index dd505af..0000000
--- a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c | grep volatile
-
-define void @test(i32* %P) {
-        %X = volatile load i32* %P              ; <i32> [#uses=1]
-        volatile store i32 %X, i32* %P
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll b/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
deleted file mode 100644
index 808b8f9..0000000
--- a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=c | not grep -- --65535
-; PR596
-
-target datalayout = "e-p:32:32"
-target triple = "i686-pc-linux-gnu"
-
-declare void @func(i32)
-
-define void @funcb() {
-entry:
-        %tmp.1 = sub i32 0, -65535              ; <i32> [#uses=1]
-        call void @func( i32 %tmp.1 )
-        br label %return
-
-return:         ; preds = %entry
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2005-08-23-Fmod.ll b/test/CodeGen/CBackend/2005-08-23-Fmod.ll
deleted file mode 100644
index 6e650eb..0000000
--- a/test/CodeGen/CBackend/2005-08-23-Fmod.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c | grep fmod
-
-define double @test(double %A, double %B) {
-        %C = frem double %A, %B         ; <double> [#uses=1]
-        ret double %C
-}
-
diff --git a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll b/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
deleted file mode 100644
index 99de837..0000000
--- a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c | grep {\\* *volatile *\\*}
-
-@G = external global void ()*           ; <void ()**> [#uses=2]
-
-define void @test() {
-        volatile store void ()* @test, void ()** @G
-        volatile load void ()** @G              ; <void ()*>:1 [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
deleted file mode 100644
index c9df800..0000000
--- a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=c | \
-; RUN:   grep __BITCAST | count 14
-
-define i32 @test1(float %F) {
-        %X = bitcast float %F to i32            ; <i32> [#uses=1]
-        ret i32 %X
-}
-
-define float @test2(i32 %I) {
-        %X = bitcast i32 %I to float            ; <float> [#uses=1]
-        ret float %X
-}
-
-define i64 @test3(double %D) {
-        %X = bitcast double %D to i64           ; <i64> [#uses=1]
-        ret i64 %X
-}
-
-define double @test4(i64 %L) {
-        %X = bitcast i64 %L to double           ; <double> [#uses=1]
-        ret double %X
-}
-
-define double @test5(double %D) {
-        %X = bitcast double %D to double                ; <double> [#uses=1]
-        %Y = fadd double %X, 2.000000e+00                ; <double> [#uses=1]
-        %Z = bitcast double %Y to i64           ; <i64> [#uses=1]
-        %res = bitcast i64 %Z to double         ; <double> [#uses=1]
-        ret double %res
-}
-
-define float @test6(float %F) {
-        %X = bitcast float %F to float          ; <float> [#uses=1]
-        %Y = fadd float %X, 2.000000e+00         ; <float> [#uses=1]
-        %Z = bitcast float %Y to i32            ; <i32> [#uses=1]
-        %res = bitcast i32 %Z to float          ; <float> [#uses=1]
-        ret float %res
-}
-
-define i32 @main(i32 %argc, i8** %argv) {
-        %a = call i32 @test1( float 0x400921FB40000000 )                ; <i32> [#uses=2]
-        %b = call float @test2( i32 %a )                ; <float> [#uses=0]
-        %c = call i64 @test3( double 0x400921FB4D12D84A )               ; <i64> [#uses=1]
-        %d = call double @test4( i64 %c )               ; <double> [#uses=0]
-        %e = call double @test5( double 7.000000e+00 )          ; <double> [#uses=0]
-        %f = call float @test6( float 7.000000e+00 )            ; <float> [#uses=0]
-        ret i32 %a
-}
-
diff --git a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll b/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
deleted file mode 100644
index da36e78..0000000
--- a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; For PR1099
-; RUN: llc < %s -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
-
-target datalayout = "e-p:32:32"
-target triple = "i686-apple-darwin8"
-        %struct.Connector = type { i16, i16, i8, i8, %struct.Connector*, i8* }
-
-
-define i1 @prune_match_entry_2E_ce(%struct.Connector* %a, i16 %b.0.0.val) {
-newFuncRoot:
-        br label %entry.ce
-
-cond_next.exitStub:             ; preds = %entry.ce
-        ret i1 true
-
-entry.return_crit_edge.exitStub:                ; preds = %entry.ce
-        ret i1 false
-
-entry.ce:               ; preds = %newFuncRoot
-        %tmp1 = getelementptr %struct.Connector* %a, i32 0, i32 0                ; <i16*> [#uses=1]
-        %tmp2 = load i16* %tmp1           ; <i16> [#uses=1]
-        %tmp3 = icmp eq i16 %tmp2, %b.0.0.val             ; <i1> [#uses=1]
-        br i1 %tmp3, label %cond_next.exitStub, label %entry.return_crit_edge.exitStub
-}
-
-
diff --git a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll b/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
deleted file mode 100644
index 4f699b7..0000000
--- a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep __builtin_stack_save
-; RUN: llc < %s -march=c | grep __builtin_stack_restore
-; PR1028
-
-declare i8* @llvm.stacksave()
-declare void @llvm.stackrestore(i8*)
-
-define i8* @test() {
-    %s = call i8* @llvm.stacksave()
-    call void @llvm.stackrestore(i8* %s)
-    ret i8* %s
-}
diff --git a/test/CodeGen/CBackend/2007-02-05-memset.ll b/test/CodeGen/CBackend/2007-02-05-memset.ll
deleted file mode 100644
index 7d508e4..0000000
--- a/test/CodeGen/CBackend/2007-02-05-memset.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=c
-; PR1181
-target datalayout = "e-p:64:64"
-target triple = "x86_64-apple-darwin8"
-
-
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-
-define fastcc void @InitUser_data_unregistered() {
-entry:
-        tail call void @llvm.memset.i64( i8* null, i8 0, i64 65496, i32 1 )
-        ret void
-}
diff --git a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll b/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
deleted file mode 100644
index 7e1ff2a..0000000
--- a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; PR1164
-; RUN: llc < %s -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
-; RUN: llc < %s -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
-; RUN: llc < %s -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
-
-@G = global i32 123
-@ltmp_0_1 = global i32 123
-
-define i32 @test(i32 *%G) {
-        %A = load i32* %G
-        %B = load i32* @ltmp_0_1
-        %C = add i32 %A, %B
-        ret i32 %C
-}
diff --git a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll b/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
deleted file mode 100644
index c8bfdd6..0000000
--- a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=c | grep {packed}
-
-	%struct.p = type <{ i16 }>
-
-define i32 @main() {
-entry:
-        %t = alloca %struct.p, align 2
-	ret i32 5
-}
diff --git a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
deleted file mode 100644
index 6e0cf68..0000000
--- a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=c | \
-; RUN:          grep {struct __attribute__ ((packed, aligned(} | count 4
-
-define void @test(i32* %P) {
-        %X = load i32* %P, align 1
-        store i32 %X, i32* %P, align 1
-        ret void
-}
-
-define void @test2(i32* %P) {
-        %X = volatile load i32* %P, align 2
-        volatile store i32 %X, i32* %P, align 2
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll b/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
deleted file mode 100644
index e9fa552..0000000
--- a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=c | grep {llvm_cbe_t.*&1}
-define i32 @test(i32 %r) {
-  %s = icmp eq i32 %r, 0
-  %t = add i1 %s, %s
-  %u = zext i1 %t to i32
-  br i1 %t, label %A, label %B
-A:
-
-  ret i32 %u
-B:
-
-  %v = select i1 %t, i32 %r, i32 %u
-  ret i32 %v
-}
diff --git a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
deleted file mode 100644
index b72b573..0000000
--- a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=c
-; PR2907
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
-target triple = "powerpc-apple-darwin9.5"
-	%"struct.Point<0>" = type { %"struct.Tensor<1,0>" }
-	%"struct.QGauss2<1>" = type { %"struct.Quadrature<0>" }
-	%"struct.Quadrature<0>" = type { %struct.Subscriptor, i32, %"struct.std::vector<Point<0>,std::allocator<Point<0> > >", %"struct.std::vector<double,std::allocator<double> >" }
-	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
-	%"struct.Tensor<1,0>" = type { [1 x double] }
-	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" }
-	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" = type { %"struct.Point<0>"*, %"struct.Point<0>"*, %"struct.Point<0>"* }
-	%"struct.std::_Vector_base<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" }
-	%"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" = type { double*, double*, double* }
-	%"struct.std::type_info" = type { i32 (...)**, i8* }
-	%"struct.std::vector<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" }
-	%"struct.std::vector<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >" }
-
-define fastcc void @_ZN6QGaussILi1EEC1Ej(%"struct.QGauss2<1>"* %this, i32 %n) {
-entry:
-	br label %bb4
-
-bb4:		; preds = %bb5.split, %bb4, %entry
-	%0 = fcmp ogt ppc_fp128 0xM00000000000000000000000000000000, select (i1 fcmp olt (ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000)), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000), ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128))		; <i1> [#uses=1]
-	br i1 %0, label %bb4, label %bb5.split
-
-bb5.split:		; preds = %bb4
-	%1 = getelementptr double* null, i32 0		; <double*> [#uses=0]
-	br label %bb4
-}
diff --git a/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll b/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
deleted file mode 100644
index 0ae480d..0000000
--- a/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc < %s -march=c
-; Check that uadd and sadd with overflow are handled by C Backend.
-
-%0 = type { i32, i1 }        ; type %0
-
-define i1 @func1(i32 zeroext %v1, i32 zeroext %v2) nounwind {
-entry:
-    %t = call %0 @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
-    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
-    br i1 %obit, label %carry, label %normal
-
-normal:     ; preds = %entry
-    ret i1 true
-
-carry:      ; preds = %entry
-    ret i1 false
-}
-
-define i1 @func2(i32 signext %v1, i32 signext %v2) nounwind {
-entry:
-    %t = call %0 @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
-    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
-    br i1 %obit, label %carry, label %normal
-
-normal:     ; preds = %entry
-    ret i1 true
-
-carry:      ; preds = %entry
-    ret i1 false
-}
-
-declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
-
-declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind
-
diff --git a/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll b/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
deleted file mode 100644
index 054a3ca..0000000
--- a/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep {"m"(llvm_cbe_newcw))}
-; PR2407
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-
-define void @foo() {
-  %newcw = alloca i16             ; <i16*> [#uses=2]
-  call void asm sideeffect "fldcw $0", "*m,~{dirflag},~{fpsr},~{flags}"( i16*
-%newcw ) nounwind 
-  ret void
-}
diff --git a/test/CodeGen/CBackend/X86/dg.exp b/test/CodeGen/CBackend/X86/dg.exp
deleted file mode 100644
index 44e3a5e..0000000
--- a/test/CodeGen/CBackend/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] && [llvm_supports_target CBackend] } {
-    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/CodeGen/CBackend/dg.exp b/test/CodeGen/CBackend/dg.exp
deleted file mode 100644
index 9d78940..0000000
--- a/test/CodeGen/CBackend/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CBackend] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CBackend/fneg.ll b/test/CodeGen/CBackend/fneg.ll
deleted file mode 100644
index 7dec3d9..0000000
--- a/test/CodeGen/CBackend/fneg.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-define void @func() nounwind {
-  entry:
-  %0 = fsub double -0.0, undef
-  ret void
-}
diff --git a/test/CodeGen/CBackend/pr2408.ll b/test/CodeGen/CBackend/pr2408.ll
deleted file mode 100644
index bf8477b..0000000
--- a/test/CodeGen/CBackend/pr2408.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep {\\* ((unsigned int )}
-; PR2408
-
-define i32 @a(i32 %a) {
-entry:
-        %shr = ashr i32 %a, 0           ; <i32> [#uses=1]
-        %shr2 = ashr i32 2, 0           ; <i32> [#uses=1]
-        %mul = mul i32 %shr, %shr2              ; <i32> [#uses=1]
-        %shr4 = ashr i32 2, 0           ; <i32> [#uses=1]
-        %div = sdiv i32 %mul, %shr4             ; <i32> [#uses=1]
-        ret i32 %div
-}
diff --git a/test/CodeGen/CBackend/vectors.ll b/test/CodeGen/CBackend/vectors.ll
deleted file mode 100644
index b7b7677..0000000
--- a/test/CodeGen/CBackend/vectors.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=c
-@.str15 = external global [2 x i8]
-
-define <4 x i32> @foo(<4 x i32> %a, i32 %b) {
-  %c = insertelement <4 x i32> %a, i32 1, i32 %b
-  
-  ret <4 x i32> %c
-}
-
-define i32 @test2(<4 x i32> %a, i32 %b) {
-  %c = extractelement <4 x i32> %a, i32 1
-  
-  ret i32 %c
-}
-
-define <4 x float> @test3(<4 x float> %Y) {
-	%Z = fadd <4 x float> %Y, %Y
-	%X = shufflevector <4 x float> zeroinitializer, <4 x float> %Z, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >
-	ret <4 x float> %X
-}
-
-define void @test4() {
-	%x = alloca <4 x float>
-	%tmp3.i16 = getelementptr <4 x float>* %x, i32 0, i32 0
-	store float 1.0, float* %tmp3.i16
-	ret void
-}
-
-define i32* @test5({i32, i32} * %P) {
-	%x = getelementptr {i32, i32} * %P, i32 0, i32 1
-	ret i32* %x
-}
-
-define i8* @test6() {
-  ret i8* getelementptr ([2 x i8]* @.str15, i32 0, i32 0) 
-}
-
diff --git a/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll b/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll
new file mode 100644
index 0000000..419f594
--- /dev/null
+++ b/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=cpp
+declare void @foo(<4 x i32>)
+define void @bar() {
+  call void @foo(<4 x i32> <i32 0, i32 1, i32 2, i32 3>)
+  ret void
+}
diff --git a/test/CodeGen/CPP/dg.exp b/test/CodeGen/CPP/dg.exp
deleted file mode 100644
index 3276dcc..0000000
--- a/test/CodeGen/CPP/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CppBackend] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CPP/lit.local.cfg b/test/CodeGen/CPP/lit.local.cfg
new file mode 100644
index 0000000..4d4b4a4
--- /dev/null
+++ b/test/CodeGen/CPP/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'CppBackend' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
index 72478a1..4203e91 100644
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -5,6 +5,9 @@
 ; RUN: grep andhi  %t1.s | count 30
 ; RUN: grep andbi  %t1.s | count 4
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
index 559b266..11cf770 100644
--- a/test/CodeGen/CellSPU/call.ll
+++ b/test/CodeGen/CellSPU/call.ll
@@ -1,7 +1,3 @@
-; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s
-; RUN: grep brsl    %t1.s | count 1
-; RUN: grep brasl   %t1.s | count 2
-; RUN: grep stqd    %t1.s | count 82
 ; RUN: llc < %s -march=cellspu | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
deleted file mode 100644
index 141361d..0000000
--- a/test/CodeGen/CellSPU/call_indirect.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=cellspu -asm-verbose=0 -regalloc=linearscan > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 -regalloc=linearscan > %t2.s
-; RUN: grep bisl    %t1.s | count 7
-; RUN: grep ila     %t1.s | count 1
-; RUN: grep rotqby  %t1.s | count 5
-; RUN: grep lqa     %t1.s | count 1
-; RUN: grep lqd     %t1.s | count 12
-; RUN: grep dispatch_tab %t1.s | count 5
-; RUN: grep bisl    %t2.s | count 7
-; RUN: grep ilhu    %t2.s | count 2
-; RUN: grep iohl    %t2.s | count 2
-; RUN: grep rotqby  %t2.s | count 5
-; RUN: grep lqd     %t2.s | count 13
-; RUN: grep ilhu    %t2.s | count 2
-; RUN: grep ai      %t2.s | count 9
-; RUN: grep dispatch_tab %t2.s | count 6
-
-; ModuleID = 'call_indirect.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
-target triple = "spu-unknown-elf"
-
-@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
-
-define void @dispatcher(i32 %i_arg, float %f_arg) {
-entry:
-        %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
-        tail call void %tmp2( i32 %i_arg, float %f_arg )
-        %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
-        tail call void %tmp2.1( i32 %i_arg, float %f_arg )
-        %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
-        tail call void %tmp2.2( i32 %i_arg, float %f_arg )
-        %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
-        tail call void %tmp2.3( i32 %i_arg, float %f_arg )
-        %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
-        tail call void %tmp2.4( i32 %i_arg, float %f_arg )
-        %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
-        tail call void %tmp2.5( i32 %i_arg, float %f_arg )
-        ret void
-}
-
-@ptr_list = internal global [1 x void ()*] [ void ()* inttoptr (i64 4294967295 to void ()*) ], align 4
-@ptr.a = internal global void ()** getelementptr ([1 x void ()*]* @ptr_list, i32 0, i32 1), align 16
-
-define void @double_indirect_call() {
-        %a = load void ()*** @ptr.a, align 16
-        %b = load void ()** %a, align 4
-        tail call void %b()
-        ret void
-}
diff --git a/test/CodeGen/CellSPU/dg.exp b/test/CodeGen/CellSPU/dg.exp
deleted file mode 100644
index d416479..0000000
--- a/test/CodeGen/CellSPU/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CellSPU] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CellSPU/lit.local.cfg b/test/CodeGen/CellSPU/lit.local.cfg
new file mode 100644
index 0000000..ea00867
--- /dev/null
+++ b/test/CodeGen/CellSPU/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'CellSPU' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
index b770cad..57ac709 100644
--- a/test/CodeGen/CellSPU/nand.ll
+++ b/test/CodeGen/CellSPU/nand.ll
@@ -3,6 +3,10 @@
 ; RUN: grep and    %t1.s | count 94
 ; RUN: grep xsbh   %t1.s | count 2
 ; RUN: grep xshw   %t1.s | count 4
+
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
index 4f1febb..f329266 100644
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ b/test/CodeGen/CellSPU/or_ops.ll
@@ -6,6 +6,9 @@
 ; RUN: grep orbi   %t1.s | count 15
 ; RUN: FileCheck %s < %t1.s
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index b1219e6..9770935 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -163,7 +163,7 @@ define i8 @rotri8(i8 %A) {
 define <2 x float> @test1(<4 x float> %param )
 {
 ; CHECK: test1
-; CHECK: rotqbyi
+; CHECK: shufb
   %el = extractelement <4 x float> %param, i32 1
   %vec1 = insertelement <1 x float> undef, float %el, i32 0
   %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
index c804256..65e0aa6 100644
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ b/test/CodeGen/CellSPU/select_bits.ll
@@ -1,6 +1,9 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep selb   %t1.s | count 56
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index 3252c77..f4aad44 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,12 +1,12 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep {shlh	}  %t1.s | count 10
 ; RUN: grep {shlhi	}  %t1.s | count 3
-; RUN: grep {shl	}  %t1.s | count 11
+; RUN: grep {shl	}  %t1.s | count 10
 ; RUN: grep {shli	}  %t1.s | count 3
 ; RUN: grep {xshw	}  %t1.s | count 5
-; RUN: grep {and	}  %t1.s | count 14
-; RUN: grep {andi	}  %t1.s | count 2
-; RUN: grep {rotmi	}  %t1.s | count 2
+; RUN: grep {and	}  %t1.s | count 15
+; RUN: grep {andi	}  %t1.s | count 4
+; RUN: grep {rotmi	}  %t1.s | count 4
 ; RUN: grep {rotqmbyi	}  %t1.s | count 1
 ; RUN: grep {rotqmbii	}  %t1.s | count 2
 ; RUN: grep {rotqmby	}  %t1.s | count 1
@@ -342,3 +342,7 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
 	%rv = ashr <8 x i16> %val, %sh
 	ret <8 x i16> %rv
 }
+
+define <2 x i64> @special_const() {
+  ret <2 x i64> <i64 4294967295, i64 4294967295>
+}
diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll
index c88a258..973586b 100644
--- a/test/CodeGen/CellSPU/shuffles.ll
+++ b/test/CodeGen/CellSPU/shuffles.ll
@@ -1,12 +1,14 @@
 ; RUN: llc -O1  --march=cellspu < %s | FileCheck %s
 
+;CHECK: shuffle
 define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
   ; CHECK: cwd {{\$.}}, 0($sp)
   ; CHECK: shufb {{\$., \$4, \$3, \$.}}
   %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> <i32 4,i32 1,i32 2,i32 3>
   ret <4 x float> %val
 }
- 
+
+;CHECK: splat
 define <4 x float> @splat(float %param1) {
   ; CHECK: lqa
   ; CHECK: shufb $3
@@ -16,6 +18,7 @@ define <4 x float> @splat(float %param1) {
   ret <4 x float> %val  
 }
 
+;CHECK: test_insert
 define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
   %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
 ;CHECK:	lqa	$6,
@@ -31,6 +34,7 @@ define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
   ret void 
 }
 
+;CHECK: test_insert_1
 define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
 ;CHECK: cwd     $5, 4($sp)
 ;CHECK: shufb   $3, $4, $3, $5
@@ -39,6 +43,7 @@ define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
   ret <4 x float> %rv
 }
 
+;CHECK: test_v2i32
 define <2 x i32> @test_v2i32(<4 x i32>%vec)
 {
 ;CHECK: rotqbyi $3, $3, 4
@@ -49,17 +54,14 @@ define <2 x i32> @test_v2i32(<4 x i32>%vec)
 
 define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
 {
-;CHECK: rotqbyi $3, $3, 8
-;CHECK: bi $lr
   %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
         <4 x i32> <i32 2,i32 3,i32 0, i32 1>
   ret <4 x i32> %rv
 }
 
+;CHECK: test_v4i32_rot4
 define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
 {
-;CHECK: rotqbyi $3, $3, 4
-;CHECK: bi $lr
   %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
         <4 x i32> <i32 1,i32 2,i32 3, i32 0>
   ret <4 x i32> %rv
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
index adbb5ef..8c32750 100644
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -22,6 +22,9 @@
 ; RUN: grep shufb   %t2.s | count 7
 ; RUN: grep stqd    %t2.s | count 7
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 ; ModuleID = 'struct_1.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll
index 71d4aba..9c5b896 100644
--- a/test/CodeGen/CellSPU/v2i32.ll
+++ b/test/CodeGen/CellSPU/v2i32.ll
@@ -9,7 +9,8 @@ define %vec @test_ret(%vec %param)
 
 define %vec @test_add(%vec %param)
 {
-;CHECK: a {{\$.}}, $3, $3
+;CHECK: shufb
+;CHECK: addx
   %1 = add %vec %param, %param
 ;CHECK: bi $lr
   ret %vec %1
@@ -17,21 +18,14 @@ define %vec @test_add(%vec %param)
 
 define %vec @test_sub(%vec %param)
 {
-;CHECK: sf {{\$.}}, $4, $3
   %1 = sub %vec %param, <i32 1, i32 1>
-
 ;CHECK: bi $lr
   ret %vec %1
 }
 
 define %vec @test_mul(%vec %param)
 {
-;CHECK: mpyu
-;CHECK: mpyh
-;CHECK: a {{\$., \$., \$.}}
-;CHECK: a {{\$., \$., \$.}}
   %1 = mul %vec %param, %param
-
 ;CHECK: bi $lr
   ret %vec %1
 }
@@ -56,22 +50,12 @@ define i32 @test_extract() {
 
 define void @test_store( %vec %val, %vec* %ptr)
 {
-;CHECK: stqd $3, 0(${{.}})
-;CHECK: bi $lr
   store %vec %val, %vec* %ptr
   ret void
 }
 
-;Alignment of <2 x i32> is not *directly* defined in the ABI
-;It probably is safe to interpret it as an array, thus having 8 byte
-;alignment (according to ABI). This tests that the size of
-;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the
-;two arrays
 define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
 {
-; CHECK-NOT:	ai	$3, $3, 16
-; CHECK:	ai	$3, $3, 8
-; CHECK:	bi	$lr
    %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
    ret <2 x i32>* %rv
 }
diff --git a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
index 943ed88..d67559e 100644
--- a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
+++ b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
@@ -30,8 +30,6 @@ UnifiedUnreachableBlock:		; preds = %entry
 
 declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn 
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
-
 declare void @__cxa_end_catch()
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
index 288bfd2..9f10206 100644
--- a/test/CodeGen/Generic/2008-02-04-Ctlz.ll
+++ b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
@@ -4,8 +4,8 @@
 
 define i32 @main(i64 %arg) nounwind  {
 entry:
-	%tmp37 = tail call i64 @llvm.ctlz.i64( i64 %arg )		; <i64> [#uses=1]
-	%tmp47 = tail call i64 @llvm.cttz.i64( i64 %arg )		; <i64> [#uses=1]
+	%tmp37 = tail call i64 @llvm.ctlz.i64( i64 %arg, i1 true )		; <i64> [#uses=1]
+	%tmp47 = tail call i64 @llvm.cttz.i64( i64 %arg, i1 true )		; <i64> [#uses=1]
 	%tmp57 = tail call i64 @llvm.ctpop.i64( i64 %arg )		; <i64> [#uses=1]
 	%tmp38 = trunc i64 %tmp37 to i32		; <i32>:0 [#uses=1]
 	%tmp48 = trunc i64 %tmp47 to i32		; <i32>:0 [#uses=1]
@@ -16,6 +16,6 @@ entry:
 
 declare i32 @printf(i8* noalias , ...) nounwind 
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone 
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone 
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone 
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone 
diff --git a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
index 3cbf4c5..b483009 100644
--- a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
+++ b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
@@ -15,8 +15,6 @@
 %"struct.std::locale::facet" = type { i32 (...)**, i32 }
 %union..0._15 = type { i32 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i8* @__cxa_begin_catch(i8*) nounwind
 
 declare %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"*)
diff --git a/test/CodeGen/Generic/bool-vector.ll b/test/CodeGen/Generic/bool-vector.ll
deleted file mode 100644
index 4758697..0000000
--- a/test/CodeGen/Generic/bool-vector.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s
-; PR1845
-
-define void @boolVectorSelect(<4 x i1>* %boolVectorPtr) {
-Body:
-        %castPtr = bitcast <4 x i1>* %boolVectorPtr to <4 x i1>*
-        %someBools = load <4 x i1>* %castPtr, align 1           ; <<4 x i1>>
-        %internal = alloca <4 x i1>, align 16           ; <<4 x i1>*> [#uses=1]
-        store <4 x i1> %someBools, <4 x i1>* %internal, align 1
-        ret void
-}
diff --git a/test/CodeGen/Generic/dbg-declare.ll b/test/CodeGen/Generic/dbg-declare.ll
new file mode 100644
index 0000000..01f7d6d
--- /dev/null
+++ b/test/CodeGen/Generic/dbg-declare.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -O0
+; <rdar://problem/11134152>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @foo(i32* %x) nounwind uwtable ssp {
+entry:
+  %x.addr = alloca i32*, align 8
+  %saved_stack = alloca i8*
+  %cleanup.dest.slot = alloca i32
+  store i32* %x, i32** %x.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i32** %x.addr}, metadata !14), !dbg !15
+  %0 = load i32** %x.addr, align 8, !dbg !16
+  %1 = load i32* %0, align 4, !dbg !16
+  %2 = zext i32 %1 to i64, !dbg !16
+  %3 = call i8* @llvm.stacksave(), !dbg !16
+  store i8* %3, i8** %saved_stack, !dbg !16
+  %vla = alloca i8, i64 %2, align 16, !dbg !16
+  call void @llvm.dbg.declare(metadata !{i8* %vla}, metadata !18), !dbg !23
+  store i32 1, i32* %cleanup.dest.slot
+  %4 = load i8** %saved_stack, !dbg !24
+  call void @llvm.stackrestore(i8* %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.1 (trunk 153698)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 5, i32 21, metadata !5, null}
+!16 = metadata !{i32 7, i32 13, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !5, i32 6, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786688, metadata !17, metadata !"a", metadata !6, i32 7, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 786465, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!23 = metadata !{i32 7, i32 8, metadata !17, null}
+!24 = metadata !{i32 9, i32 1, metadata !17, null}
+!25 = metadata !{i32 8, i32 3, metadata !17, null}
diff --git a/test/CodeGen/Generic/dg.exp b/test/CodeGen/Generic/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/CodeGen/Generic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/CodeGen/Generic/lit.local.cfg b/test/CodeGen/Generic/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/CodeGen/Generic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/CodeGen/Generic/llvm-ct-intrinsics.ll b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
index 1db7549..abcdb9b 100644
--- a/test/CodeGen/Generic/llvm-ct-intrinsics.ll
+++ b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
@@ -21,19 +21,19 @@ define void @ctpoptest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %C
 	ret void
 }
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
-declare i16 @llvm.ctlz.i16(i16)
+declare i16 @llvm.ctlz.i16(i16, i1)
 
-declare i8 @llvm.ctlz.i8(i8)
+declare i8 @llvm.ctlz.i8(i8, i1)
 
 define void @ctlztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.ctlz.i8( i8 %A )		; <i8> [#uses=1]
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	%c = call i32 @llvm.ctlz.i32( i32 %C )		; <i32> [#uses=1]
-	%d = call i64 @llvm.ctlz.i64( i64 %D )		; <i64> [#uses=1]
+	%a = call i8 @llvm.ctlz.i8( i8 %A, i1 true )		; <i8> [#uses=1]
+	%b = call i16 @llvm.ctlz.i16( i16 %B, i1 true )		; <i16> [#uses=1]
+	%c = call i32 @llvm.ctlz.i32( i32 %C, i1 true )		; <i32> [#uses=1]
+	%d = call i64 @llvm.ctlz.i64( i64 %D, i1 true )		; <i64> [#uses=1]
 	store i8 %a, i8* %AP
 	store i16 %b, i16* %BP
 	store i32 %c, i32* %CP
@@ -41,19 +41,19 @@ define void @ctlztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP
 	ret void
 }
 
-declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.cttz.i64(i64, i1)
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
-declare i16 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16, i1)
 
-declare i8 @llvm.cttz.i8(i8)
+declare i8 @llvm.cttz.i8(i8, i1)
 
 define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.cttz.i8( i8 %A )		; <i8> [#uses=1]
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	%c = call i32 @llvm.cttz.i32( i32 %C )		; <i32> [#uses=1]
-	%d = call i64 @llvm.cttz.i64( i64 %D )		; <i64> [#uses=1]
+	%a = call i8 @llvm.cttz.i8( i8 %A, i1 true )		; <i8> [#uses=1]
+	%b = call i16 @llvm.cttz.i16( i16 %B, i1 true )		; <i16> [#uses=1]
+	%c = call i32 @llvm.cttz.i32( i32 %C, i1 true )		; <i32> [#uses=1]
+	%d = call i64 @llvm.cttz.i64( i64 %D, i1 true )		; <i64> [#uses=1]
 	store i8 %a, i8* %AP
 	store i16 %b, i16* %BP
 	store i32 %c, i32* %CP
diff --git a/test/CodeGen/Generic/pr12507.ll b/test/CodeGen/Generic/pr12507.ll
new file mode 100644
index 0000000..c793358
--- /dev/null
+++ b/test/CodeGen/Generic/pr12507.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+@c = external global i32, align 4
+
+define void @foo(i160 %x) {
+entry:
+  %cmp.i = icmp ne i160 %x, 340282366920938463463374607431768211456
+  %conv.i = zext i1 %cmp.i to i32
+  %tobool.i = icmp eq i32 %conv.i, 0
+  br i1 %tobool.i, label %if.then.i, label %fn1.exit
+
+if.then.i:
+  store i32 0, i32* @c, align 4
+  br label %fn1.exit
+
+fn1.exit:
+  ret void
+}
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index 63052c1..77636eb 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -185,3 +185,11 @@ define i32 @checkFoldGEP(%Domain* %D, i64 %idx) {
         ret i32 %reg820
 }
 
+; Test case for scalarising a 1 element vselect
+;
+define <1 x i32> @checkScalariseVSELECT(<1 x i32> %a, <1 x i32> %b) {
+        %cond = icmp uge <1 x i32> %a, %b
+        %s = select <1 x i1> %cond, <1 x i32> %a, <1 x i32> %b
+        ret <1 x i32> %s
+}
+
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
new file mode 100644
index 0000000..69002e0
--- /dev/null
+++ b/test/CodeGen/Hexagon/args.ll
@@ -0,0 +1,19 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; CHECK: r[[T0:[0-9]+]] = #7
+; CHECK: memw(r29 + #0) = r[[T0]]
+; CHECK: r0 = #1
+; CHECK: r1 = #2
+; CHECK: r2 = #3
+; CHECK: r3 = #4
+; CHECK: r4 = #5
+; CHECK: r5 = #6
+
+
+define void @foo() nounwind {
+entry:
+  call void @bar(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
+  ret void
+}
+
+declare void @bar(i32, i32, i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
new file mode 100644
index 0000000..36abd74
--- /dev/null
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -0,0 +1,18 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
+
+@j = external global i32
+@k = external global i64
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i64* @k, align 8
+  %conv = trunc i64 %1 to i32
+  %2 = call i64 @llvm.hexagon.A2.combinew(i32 %0, i32 %conv)
+  store i64 %2, i64* @k, align 8
+  ret void
+}
+
+declare i64 @llvm.hexagon.A2.combinew(i32, i32) nounwind readnone
diff --git a/test/CodeGen/Hexagon/double.ll b/test/CodeGen/Hexagon/double.ll
new file mode 100644
index 0000000..04c2ec1
--- /dev/null
+++ b/test/CodeGen/Hexagon/double.ll
@@ -0,0 +1,23 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: __hexagon_adddf3
+; CHECK: __hexagon_subdf3
+
+define void @foo(double* %acc, double %num, double %num2) nounwind {
+entry:
+  %acc.addr = alloca double*, align 4
+  %num.addr = alloca double, align 8
+  %num2.addr = alloca double, align 8
+  store double* %acc, double** %acc.addr, align 4
+  store double %num, double* %num.addr, align 8
+  store double %num2, double* %num2.addr, align 8
+  %0 = load double** %acc.addr, align 4
+  %1 = load double* %0
+  %2 = load double* %num.addr, align 8
+  %add = fadd double %1, %2
+  %3 = load double* %num2.addr, align 8
+  %sub = fsub double %add, %3
+  %4 = load double** %acc.addr, align 4
+  store double %sub, double* %4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/float.ll b/test/CodeGen/Hexagon/float.ll
new file mode 100644
index 0000000..51acf2e
--- /dev/null
+++ b/test/CodeGen/Hexagon/float.ll
@@ -0,0 +1,23 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: __hexagon_addsf3
+; CHECK: __hexagon_subsf3
+
+define void @foo(float* %acc, float %num, float %num2) nounwind {
+entry:
+  %acc.addr = alloca float*, align 4
+  %num.addr = alloca float, align 4
+  %num2.addr = alloca float, align 4
+  store float* %acc, float** %acc.addr, align 4
+  store float %num, float* %num.addr, align 4
+  store float %num2, float* %num2.addr, align 4
+  %0 = load float** %acc.addr, align 4
+  %1 = load float* %0
+  %2 = load float* %num.addr, align 4
+  %add = fadd float %1, %2
+  %3 = load float* %num2.addr, align 4
+  %sub = fsub float %add, %3
+  %4 = load float** %acc.addr, align 4
+  store float %sub, float* %4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
new file mode 100644
index 0000000..c0a9fda
--- /dev/null
+++ b/test/CodeGen/Hexagon/frame.ll
@@ -0,0 +1,24 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+@num = external global i32
+@acc = external global i32
+@num2 = external global i32
+
+; CHECK: allocframe
+; CHECK: dealloc_return
+
+define i32 @foo() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i32* @num, align 4
+  store i32 %0, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %2 = load i32* @acc, align 4
+  %mul = mul nsw i32 %1, %2
+  %3 = load i32* @num2, align 4
+  %add = add nsw i32 %mul, %3
+  store i32 %add, i32* %i, align 4
+  %4 = load i32* %i, align 4
+  ret i32 %4
+}
diff --git a/test/CodeGen/Hexagon/lit.local.cfg b/test/CodeGen/Hexagon/lit.local.cfg
new file mode 100644
index 0000000..24324b2
--- /dev/null
+++ b/test/CodeGen/Hexagon/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Hexagon' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Hexagon/mpy.ll b/test/CodeGen/Hexagon/mpy.ll
new file mode 100644
index 0000000..afd6fc6
--- /dev/null
+++ b/test/CodeGen/Hexagon/mpy.ll
@@ -0,0 +1,20 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: += mpyi
+
+define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
+entry:
+  %acc.addr = alloca i32, align 4
+  %num.addr = alloca i32, align 4
+  %num2.addr = alloca i32, align 4
+  store i32 %acc, i32* %acc.addr, align 4
+  store i32 %num, i32* %num.addr, align 4
+  store i32 %num2, i32* %num2.addr, align 4
+  %0 = load i32* %num.addr, align 4
+  %1 = load i32* %acc.addr, align 4
+  %mul = mul nsw i32 %0, %1
+  %2 = load i32* %num2.addr, align 4
+  %add = add nsw i32 %mul, %2
+  store i32 %add, i32* %num.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
new file mode 100644
index 0000000..c63a3ba
--- /dev/null
+++ b/test/CodeGen/Hexagon/static.ll
@@ -0,0 +1,21 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+
+@num = external global i32
+@acc = external global i32
+@val = external global i32
+
+; CHECK: CONST32(#num)
+; CHECK: CONST32(#acc)
+; CHECK: CONST32(#val)
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @num, align 4
+  %1 = load i32* @acc, align 4
+  %mul = mul nsw i32 %0, %1
+  %2 = load i32* @val, align 4
+  %add = add nsw i32 %mul, %2
+  store i32 %add, i32* @num, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
new file mode 100644
index 0000000..2c962d0
--- /dev/null
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -0,0 +1,16 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r1:0 = or(r{{[0-9]}}:{{[0-9]}}, r{{[0-9]}}:{{[0-9]}})
+
+%struct.small = type { i32, i32 }
+
+@s1 = common global %struct.small zeroinitializer, align 4
+
+define void @foo() nounwind {
+entry:
+  %0 = load i64* bitcast (%struct.small* @s1 to i64*), align 1
+  call void @bar(i64 %0)
+  ret void
+}
+
+declare void @bar(i64)
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
new file mode 100644
index 0000000..69de4f6
--- /dev/null
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -0,0 +1,17 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
+; CHECK: r[[T1:[0-9]+]] = memw(r[[T0]] + #0)
+; CHECK: memw(r29 + #0) = r[[T1]]
+
+%struct.large = type { i64, i64 }
+
+@s2 = common global %struct.large zeroinitializer, align 8
+
+define void @foo() nounwind {
+entry:
+  call void @bar(%struct.large* byval @s2)
+  ret void
+}
+
+declare void @bar(%struct.large* byval)
diff --git a/test/CodeGen/Hexagon/vaddh.ll b/test/CodeGen/Hexagon/vaddh.ll
new file mode 100644
index 0000000..788e474
--- /dev/null
+++ b/test/CodeGen/Hexagon/vaddh.ll
@@ -0,0 +1,17 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: vaddh(r{{[0-9]+}}, r{{[0-9]+}})
+
+@j = external global i32
+@k = external global i32
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i32* @k, align 4
+  %2 = call i32 @llvm.hexagon.A2.svaddh(i32 %0, i32 %1)
+  store i32 %2, i32* @k, align 4
+  ret void
+}
+
+declare i32 @llvm.hexagon.A2.svaddh(i32, i32) nounwind readnone
diff --git a/test/CodeGen/MBlaze/cc.ll b/test/CodeGen/MBlaze/cc.ll
index b1eb22a..827fd32 100644
--- a/test/CodeGen/MBlaze/cc.ll
+++ b/test/CodeGen/MBlaze/cc.ll
@@ -222,8 +222,8 @@ define void @testing() {
 
     %tmp.12 = call i32 @params8_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7, i32 8)
-    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        {{.* r7, .*, .*}}
@@ -235,9 +235,9 @@ define void @testing() {
 
     %tmp.13 = call i32 @params9_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7, i32 8, i32 9)
-    ; CHECK:        {{swi? .*, r1, 28}}
-    ; CHECK:        {{swi? .*, r1, 32}}
     ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        {{.* r7, .*, .*}}
@@ -249,10 +249,10 @@ define void @testing() {
 
     %tmp.14 = call i32 @params10_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                           i32 6, i32 7, i32 8, i32 9, i32 10)
-    ; CHECK:        {{swi? .*, r1, 28}}
-    ; CHECK:        {{swi? .*, r1, 32}}
-    ; CHECK:        {{swi? .*, r1, 36}}
     ; CHECK:        {{swi? .*, r1, 40}}
+    ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        {{.* r7, .*, .*}}
diff --git a/test/CodeGen/MBlaze/dg.exp b/test/CodeGen/MBlaze/dg.exp
deleted file mode 100644
index bfd5e47..0000000
--- a/test/CodeGen/MBlaze/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/MBlaze/div.ll b/test/CodeGen/MBlaze/div.ll
index fae9830..621784a 100644
--- a/test/CodeGen/MBlaze/div.ll
+++ b/test/CodeGen/MBlaze/div.ll
@@ -13,14 +13,14 @@ define i8 @test_i8(i8 %a, i8 %b) {
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV:        idivu
+    ; DIV:        idiv
 
     %tmp.2 = sdiv i8 %a, %b
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV-NOT:    idivu
-    ; DIV:        idiv
+    ; DIV-NOT:    idiv
+    ; DIV:        idivu
 
     %tmp.3 = add i8 %tmp.1, %tmp.2
     ret i8 %tmp.3
@@ -36,14 +36,14 @@ define i16 @test_i16(i16 %a, i16 %b) {
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV:        idivu
+    ; DIV:        idiv
 
     %tmp.2 = sdiv i16 %a, %b
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV-NOT:    idivu
-    ; DIV:        idiv
+    ; DIV-NOT:    idiv
+    ; DIV:        idivu
 
     %tmp.3 = add i16 %tmp.1, %tmp.2
     ret i16 %tmp.3
@@ -59,14 +59,14 @@ define i32 @test_i32(i32 %a, i32 %b) {
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV:        idivu
+    ; DIV:        idiv
 
     %tmp.2 = sdiv i32 %a, %b
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV-NOT:    idivu
-    ; DIV:        idiv
+    ; DIV-NOT:    idiv
+    ; DIV:        idivu
 
     %tmp.3 = add i32 %tmp.1, %tmp.2
     ret i32 %tmp.3
diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg
new file mode 100644
index 0000000..e236200
--- /dev/null
+++ b/test/CodeGen/MBlaze/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
index f339373..4c7d2d0 100644
--- a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
+++ b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
@@ -7,9 +7,9 @@ target triple = "msp430-unknown-linux-gnu"
 
 define void @uip_arp_arpin() nounwind {
 entry:
-	%tmp = volatile load i16* @uip_len		; <i16> [#uses=1]
+	%tmp = load volatile i16* @uip_len		; <i16> [#uses=1]
 	%cmp = icmp ult i16 %tmp, 42		; <i1> [#uses=1]
-	volatile store i16 0, i16* @uip_len
+	store volatile i16 0, i16* @uip_len
 	br i1 %cmp, label %if.then, label %if.end
 
 if.then:		; preds = %entry
diff --git a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
index 088d3e1..e8c0d14a 100644
--- a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
+++ b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
@@ -6,8 +6,8 @@ target triple = "msp430-generic-generic"
 define i16 @foo() nounwind readnone {
 entry:
   %result = alloca i16, align 1                   ; <i16*> [#uses=2]
-  volatile store i16 0, i16* %result
-  %tmp = volatile load i16* %result               ; <i16> [#uses=1]
+  store volatile i16 0, i16* %result
+  %tmp = load volatile i16* %result               ; <i16> [#uses=1]
   ret i16 %tmp
 }
 
@@ -22,8 +22,8 @@ while.cond:                                       ; preds = %while.cond, %entry
 
 while.end:                                        ; preds = %while.cond
   %result.i = alloca i16, align 1                 ; <i16*> [#uses=2]
-  volatile store i16 0, i16* %result.i
-  %tmp.i = volatile load i16* %result.i           ; <i16> [#uses=0]
+  store volatile i16 0, i16* %result.i
+  %tmp.i = load volatile i16* %result.i           ; <i16> [#uses=0]
   ret i16 0
 }
 
diff --git a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
index 4d7d9b9..9fab482 100644
--- a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
+++ b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -11,10 +11,10 @@ entry:
   %x.addr = alloca i8                             ; <i8*> [#uses=2]
   %tmp = alloca i8, align 1                       ; <i8*> [#uses=2]
   store i8 %x, i8* %x.addr
-  %tmp1 = volatile load i8* @"\010x0021"          ; <i8> [#uses=1]
+  %tmp1 = load volatile i8* @"\010x0021"          ; <i8> [#uses=1]
   store i8 %tmp1, i8* %tmp
   %tmp2 = load i8* %x.addr                        ; <i8> [#uses=1]
-  volatile store i8 %tmp2, i8* @"\010x0021"
+  store volatile i8 %tmp2, i8* @"\010x0021"
   %tmp3 = load i8* %tmp                           ; <i8> [#uses=1]
   store i8 %tmp3, i8* %retval
   %0 = load i8* %retval                           ; <i8> [#uses=1]
diff --git a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
index 856eb9d..c1a186a 100644
--- a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
+++ b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
@@ -4,9 +4,9 @@ define void @foo() nounwind {
 entry:
 	%r = alloca i8		; <i8*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	volatile load i8* %r, align 1		; <i8>:0 [#uses=1]
+	load volatile i8* %r, align 1		; <i8>:0 [#uses=1]
 	or i8 %0, 1		; <i8>:1 [#uses=1]
-	volatile store i8 %1, i8* %r, align 1
+	store volatile i8 %1, i8* %r, align 1
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
index 4f9a724..c7ecb5a 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -32,7 +32,7 @@ define i8 @am3(i8 %x, i16 %n) nounwind {
 ; CHECK:		bis.b	bar(r14), r15
 
 define i16 @am4(i16 %x) nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
diff --git a/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
index 17ebd87..727c29f 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -35,9 +35,9 @@ define void @am3(i16 %i, i8 %x) nounwind {
 ; CHECK:		bis.b	r14, bar(r15)
 
 define void @am4(i16 %x) nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	%2 = or i16 %x, %1
-	volatile store i16 %2, i16* inttoptr(i16 32 to i16*)
+	store volatile i16 %2, i16* inttoptr(i16 32 to i16*)
 	ret void
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
index 6676b88..7cd345b 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -29,7 +29,7 @@ define i8 @am3(i16 %n) nounwind {
 ; CHECK:		mov.b	bar(r15), r15
 
 define i16 @am4() nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	ret i16 %1
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
index 4b327b0..5eeb02f 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
@@ -29,7 +29,7 @@ define void @am3(i16 %i, i8 %a) nounwind {
 ; CHECK:		mov.b	r14, bar(r15)
 
 define void @am4(i16 %a) nounwind {
-	volatile store i16 %a, i16* inttoptr(i16 32 to i16*)
+	store volatile i16 %a, i16* inttoptr(i16 32 to i16*)
 	ret void
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll
index 2337c2c..d4ae811 100644
--- a/test/CodeGen/MSP430/Inst16mm.ll
+++ b/test/CodeGen/MSP430/Inst16mm.ll
@@ -64,6 +64,6 @@ entry:
  %0 = load i16* %retval                          ; <i16> [#uses=1]
  ret i16 %0
 ; CHECK: mov2:
-; CHECK:	mov.w	0(r1), 4(r1)
 ; CHECK:	mov.w	2(r1), 6(r1)
+; CHECK:	mov.w	0(r1), 4(r1)
 }
diff --git a/test/CodeGen/MSP430/dg.exp b/test/CodeGen/MSP430/dg.exp
deleted file mode 100644
index e4ea13a..0000000
--- a/test/CodeGen/MSP430/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MSP430] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/MSP430/indirectbr2.ll b/test/CodeGen/MSP430/indirectbr2.ll
index 93cfb25..dc2abf5 100644
--- a/test/CodeGen/MSP430/indirectbr2.ll
+++ b/test/CodeGen/MSP430/indirectbr2.ll
@@ -5,7 +5,7 @@ define internal i16 @foo(i16 %i) nounwind {
 entry:
   %tmp1 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
   %gotovar.4.0 = load i8** %tmp1, align 4        ; <i8*> [#uses=1]
-; CHECK: mov.w   .LC.0.2070(r15), pc
+; CHECK: mov.w   .LC.0.2070(r12), pc
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
 L5:                                               ; preds = %bb2
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
new file mode 100644
index 0000000..972732e
--- /dev/null
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'MSP430' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll
index 9d8e391..c61e1cd 100644
--- a/test/CodeGen/Mips/2008-06-05-Carry.ll
+++ b/test/CodeGen/Mips/2008-06-05-Carry.ll
@@ -1,19 +1,22 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep subu %t | count 2
-; RUN: grep addu %t | count 4
-
-target datalayout =
-"e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i64 @add64(i64 %u, i64 %v) nounwind  {
 entry:
-	%tmp2 = add i64 %u, %v	
+; CHECK: addu
+; CHECK: sltu 
+; CHECK: addu
+; CHECK: addu
+  %tmp2 = add i64 %u, %v  
   ret i64 %tmp2
 }
 
 define i64 @sub64(i64 %u, i64 %v) nounwind  {
 entry:
+; CHECK: sub64
+; CHECK: subu
+; CHECK: sltu 
+; CHECK: addu
+; CHECK: subu
   %tmp2 = sub i64 %u, %v
   ret i64 %tmp2
 }
diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll
index b1d20d9..afec7f6 100644
--- a/test/CodeGen/Mips/2008-07-03-SRet.ll
+++ b/test/CodeGen/Mips/2008-07-03-SRet.ll
@@ -1,17 +1,18 @@
-; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3
+; RUN: llc -march=mips < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
-	%struct.sret0 = type { i32, i32, i32 }
+%struct.sret0 = type { i32, i32, i32 }
 
 define void @test0(%struct.sret0* noalias sret %agg.result, i32 %dummy) nounwind {
 entry:
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 0		; <i32*>:0 [#uses=1]
-	store i32 %dummy, i32* %0, align 4
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 1		; <i32*>:1 [#uses=1]
-	store i32 %dummy, i32* %1, align 4
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 2		; <i32*>:2 [#uses=1]
-	store i32 %dummy, i32* %2, align 4
-	ret void
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 0    ; <i32*>:0 [#uses=1]
+  store i32 %dummy, i32* %0, align 4
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 1    ; <i32*>:1 [#uses=1]
+  store i32 %dummy, i32* %1, align 4
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 2    ; <i32*>:2 [#uses=1]
+  store i32 %dummy, i32* %2, align 4
+  ret void
 }
 
diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
index d804c7d..4c55236 100644
--- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll
+++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
@@ -1,16 +1,17 @@
-; RUN: llc < %s -march=mips | grep trunc.w.s | count 3
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @fptoint(float %a) nounwind {
 entry:
-	fptosi float %a to i32		; <i32>:0 [#uses=1]
-	ret i32 %0
+; CHECK: trunc.w.s 
+  fptosi float %a to i32		; <i32>:0 [#uses=1]
+  ret i32 %0
 }
 
 define i32 @fptouint(float %a) nounwind {
 entry:
-	fptoui float %a to i32		; <i32>:0 [#uses=1]
-	ret i32 %0
+; CHECK: fptouint
+; CHECK: trunc.w.s 
+; CHECK: trunc.w.s 
+  fptoui float %a to i32		; <i32>:0 [#uses=1]
+  ret i32 %0
 }
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index e0c745f..8479ad2 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,20 +1,16 @@
-; DISABLED: llc < %s -march=mips -o %t
-; DISABLED: grep seh %t | count 1
-; DISABLED: grep seb %t | count 1
-; RUN: false
-; XFAIL: *
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s 
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s 
 
 define signext i8 @A(i8 %e.0, i8 signext %sum)  nounwind {
 entry:
+; CHECK: seb
 	add i8 %sum, %e.0		; <i8>:0 [#uses=1]
 	ret i8 %0
 }
 
 define signext i16 @B(i16 %e.0, i16 signext %sum) nounwind {
 entry:
+; CHECK: seh
 	add i16 %sum, %e.0		; <i16>:0 [#uses=1]
 	ret i16 %0
 }
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 94dfe35..a8e5470 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
-; RUN: grep {.rodata.cst4,"aM",@progbits} %t | count 1
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define float @F(float %a) nounwind {
+; CHECK: .rodata.cst4,"aM",@progbits 
 entry:
-	fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
-	fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
-	ret float %1
+; CHECK: ($CPI0_{{[0-1]}})
+; CHECK: ($CPI0_{{[0,1]}}) 
+; CHECK: ($CPI0_{{[0,1]}}) 
+; CHECK: ($CPI0_{{[0,1]}}) 
+  fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
+  fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
+  ret float %1
 }
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index 23ed64a..dbde742 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,17 +1,53 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep mfhi  %t | count 1
-; RUN: grep mflo  %t | count 1
-; RUN: grep multu %t | count 1
+; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
-	%struct.DWstruct = type { i32, i32 }
+%struct.DWstruct = type { i32, i32 }
 
 define i32 @A0(i32 %u, i32 %v) nounwind  {
 entry:
-	%asmtmp = tail call %struct.DWstruct asm "multu $2,$3", "={lo},={hi},d,d"( i32 %u, i32 %v ) nounwind
-	%asmresult = extractvalue %struct.DWstruct %asmtmp, 0
-	%asmresult1 = extractvalue %struct.DWstruct %asmtmp, 1		; <i32> [#uses=1]
+; CHECK: multu 
+; CHECK: mflo
+; CHECK: mfhi
+  %asmtmp = tail call %struct.DWstruct asm "multu $2,$3", "={lo},={hi},d,d"( i32 %u, i32 %v ) nounwind
+  %asmresult = extractvalue %struct.DWstruct %asmtmp, 0
+  %asmresult1 = extractvalue %struct.DWstruct %asmtmp, 1    ; <i32> [#uses=1]
   %res = add i32 %asmresult, %asmresult1
-	ret i32 %res
+  ret i32 %res
 }
+
+@gi2 = external global i32
+@gi1 = external global i32
+@gi0 = external global i32
+@gf0 = external global float
+@gf1 = external global float
+@gd0 = external global double
+@gd1 = external global double
+
+define void @foo0() nounwind {
+entry:
+; CHECK: addu
+  %0 = load i32* @gi1, align 4
+  %1 = load i32* @gi0, align 4
+  %2 = tail call i32 asm "addu $0, $1, $2", "=r,r,r"(i32 %0, i32 %1) nounwind
+  store i32 %2, i32* @gi2, align 4
+  ret void
+}
+
+define void @foo2() nounwind {
+entry:
+; CHECK: neg.s
+  %0 = load float* @gf1, align 4
+  %1 = tail call float asm "neg.s $0, $1", "=f,f"(float %0) nounwind
+  store float %1, float* @gf0, align 4
+  ret void
+}
+
+define void @foo3() nounwind {
+entry:
+; CHECK: neg.d
+  %0 = load double* @gd1, align 8
+  %1 = tail call double asm "neg.d $0, $1", "=f,f"(double %0) nounwind
+  store double %1, double* @gd0, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
index f8eb028..78a49ff 100644
--- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
+++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
@@ -1,18 +1,15 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep mtc1 %t | count 1
-; RUN: grep mfc1 %t | count 1
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define float @A(i32 %u) nounwind  {
 entry:
-	bitcast i32 %u to float
-	ret float %0
+; CHECK: mtc1 
+  bitcast i32 %u to float
+  ret float %0
 }
 
 define i32 @B(float %u) nounwind  {
 entry:
-	bitcast float %u to i32
-	ret i32 %0
+; CHECK: mfc1 
+  bitcast float %u to i32
+  ret i32 %0
 }
diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll
index 6dd4af1..0d94b19 100644
--- a/test/CodeGen/Mips/2008-08-06-Alloca.ll
+++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll
@@ -1,17 +1,15 @@
-; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
-; RUN: llc < %s -march=mips -regalloc=basic | grep {subu.*sp} | count 2
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @twoalloca(i32 %size) nounwind {
 entry:
-	alloca i8, i32 %size		; <i8*>:0 [#uses=1]
-	alloca i8, i32 %size		; <i8*>:1 [#uses=1]
-	call i32 @foo( i8* %0 ) nounwind		; <i32>:2 [#uses=1]
-	call i32 @foo( i8* %1 ) nounwind		; <i32>:3 [#uses=1]
-	add i32 %3, %2		; <i32>:4 [#uses=1]
-	ret i32 %4
+; CHECK: subu ${{[0-9]+}}, $sp
+; CHECK: subu ${{[0-9]+}}, $sp
+  alloca i8, i32 %size    ; <i8*>:0 [#uses=1]
+  alloca i8, i32 %size    ; <i8*>:1 [#uses=1]
+  call i32 @foo( i8* %0 ) nounwind    ; <i32>:2 [#uses=1]
+  call i32 @foo( i8* %1 ) nounwind    ; <i32>:3 [#uses=1]
+  add i32 %3, %2    ; <i32>:4 [#uses=1]
+  ret i32 %4
 }
 
 declare i32 @foo(i8*)
diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll
index fb33323..abd61de 100644
--- a/test/CodeGen/Mips/2008-08-08-ctlz.ll
+++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -march=mips | grep clz | count 1
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @A0(i32 %u) nounwind  {
 entry:
-	call i32 @llvm.ctlz.i32( i32 %u )
+; CHECK: clz 
+  call i32 @llvm.ctlz.i32( i32 %u, i1 true )
   ret i32 %0
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone 
diff --git a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index f518843..9c4838a 100644
--- a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -1,7 +1,6 @@
-; RUN: llc < %s
+; RUN: llc -march=mips -soft-float < %s
 ; PR2667
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "psp"
 	%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
 	%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*, i8*, i32)*, i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)*, i32 (i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._reent*, i32 }
 	%struct.__sbuf = type { i8*, i32 }
diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
index b8d6826..2b2ee0f 100644
--- a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
+++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -1,10 +1,23 @@
-; RUN: llc < %s | FileCheck %s
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
-target triple = "mips-unknown-linux"
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
 
 define float @h() nounwind readnone {
 entry:
-; CHECK: lw $2, %got($CPI0_0)($gp)
-; CHECK: lwc1 $f0, %lo($CPI0_0)($2)
+; PIC-O32: lw  $[[R0:[0-9]+]], %got($CPI0_0)
+; PIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; STATIC-O32: lui  $[[R0:[0-9]+]], %hi($CPI0_0)
+; STATIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; PIC-N32: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
+; STATIC-N32: lui  $[[R0:[0-9]+]], %hi($CPI0_0)
+; STATIC-N32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; PIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
+; STATIC-N64: ld  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; STATIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
   ret float 0x400B333340000000
 }
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 07fc10c..aaf6767 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -1,13 +1,25 @@
-; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s
+; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s -check-prefix=STATIC-O32 
+; RUN: llc < %s -march=mips -relocation-model=pic | FileCheck %s -check-prefix=PIC-O32 
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=PIC-N64
 
 define i32 @main() nounwind readnone {
 entry:
   %x = alloca i32, align 4                        ; <i32*> [#uses=2]
-  volatile store i32 2, i32* %x, align 4
-  %0 = volatile load i32* %x, align 4             ; <i32> [#uses=1]
-; CHECK: lui $3, %hi($JTI0_0)
-; CHECK: sll $2, $2, 2
-; CHECK: addiu $3, $3, %lo($JTI0_0)
+  store volatile i32 2, i32* %x, align 4
+  %0 = load volatile i32* %x, align 4             ; <i32> [#uses=1]
+; STATIC-O32: lui $[[R0:[0-9]+]], %hi($JTI0_0)
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
+; STATIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-O32: lw $[[R0:[0-9]+]], %got($JTI0_0)
+; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
+; PIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-O32: addu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
+; PIC-O32: jr  $[[R1]]
+; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0)
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($JTI0_0)
+; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 3
+; PIC-N64: daddu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
+; PIC-N64: jr  $[[R1]]
   switch i32 %0, label %bb4 [
     i32 0, label %bb5
     i32 1, label %bb1
@@ -18,7 +30,7 @@ entry:
 bb1:                                              ; preds = %entry
   ret i32 2
 
-; CHECK: $BB0_2
+; CHECK: STATIC-O32: $BB0_2
 bb2:                                              ; preds = %entry
   ret i32 0
 
@@ -31,3 +43,23 @@ bb4:                                              ; preds = %entry
 bb5:                                              ; preds = %entry
   ret i32 1
 }
+
+; STATIC-O32: .align  2
+; STATIC-O32: $JTI0_0:
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; PIC-O32: .align  2
+; PIC-O32: $JTI0_0:
+; PIC-O32: .gpword
+; PIC-O32: .gpword
+; PIC-O32: .gpword 
+; PIC-O32: .gpword 
+; PIC-N64: .align  3
+; PIC-N64: $JTI0_0:
+; PIC-N64: .gpdword
+; PIC-N64: .gpdword
+; PIC-N64: .gpdword 
+; PIC-N64: .gpdword 
+
diff --git a/test/CodeGen/Mips/2010-11-09-CountLeading.ll b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
index c592b31..6174500 100644
--- a/test/CodeGen/Mips/2010-11-09-CountLeading.ll
+++ b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
@@ -3,16 +3,16 @@
 ; CHECK: clz $2, $4
 define i32 @t1(i32 %X) nounwind readnone {
 entry:
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X, i1 true)
   ret i32 %tmp1
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 
 ; CHECK: clz $2, $4
 define i32 @t2(i32 %X) nounwind readnone {
 entry:
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X, i1 true)
   ret i32 %tmp1
 }
 
@@ -20,7 +20,7 @@ entry:
 define i32 @t3(i32 %X) nounwind readnone {
 entry:
   %neg = xor i32 %X, -1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg, i1 true)
   ret i32 %tmp1
 }
 
@@ -28,6 +28,6 @@ entry:
 define i32 @t4(i32 %X) nounwind readnone {
 entry:
   %neg = xor i32 %X, -1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg, i1 true)
   ret i32 %tmp1
 }
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index 6de6b77..7de7fa6 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -1,5 +1,9 @@
-; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
-; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
 
 @reg = common global i8* null, align 4
 
@@ -8,14 +12,30 @@ entry:
   ret i8* %x
 }
 
-; CHECK-PIC: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
-; CHECK-PIC: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
-; CHECK-STATIC: lui  $[[R2:[0-9]+]], %hi($tmp[[T0:[0-9]+]])
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T0]])
-; CHECK-STATIC: lui   $[[R3:[0-9]+]], %hi($tmp[[T1:[0-9]+]])
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T1]])
+; PIC-O32: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])
+; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
+; PIC-O32: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])
+; PIC-O32: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
+; STATIC-O32: lui  $[[R2:[0-9]+]], %hi($tmp[[T2:[0-9]+]])
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]])
+; STATIC-O32: lui   $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]])
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]])
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]])
+; PIC-N32: addiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]])
+; PIC-N32: lw  $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]])
+; PIC-N32: addiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]])
+; STATIC-N32: lui  $[[R2:[0-9]+]], %hi($tmp[[T2:[0-9]+]])
+; STATIC-N32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]])
+; STATIC-N32: lui   $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]])
+; STATIC-N32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]])
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]])
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]])
+; PIC-N64: ld  $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]])
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]])
+; STATIC-N64: ld  $[[R2:[0-9]+]], %got_page($tmp[[T2:[0-9]+]])
+; STATIC-N64: daddiu ${{[0-9]+}}, $[[R2]], %got_ofst($tmp[[T2]])
+; STATIC-N64: ld  $[[R3:[0-9]+]], %got_page($tmp[[T3:[0-9]+]])
+; STATIC-N64: daddiu ${{[0-9]+}}, $[[R3]], %got_ofst($tmp[[T3]])
 define void @f() nounwind {
 entry:
   %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
diff --git a/test/CodeGen/Mips/br-jmp.ll b/test/CodeGen/Mips/br-jmp.ll
new file mode 100644
index 0000000..1b5513a
--- /dev/null
+++ b/test/CodeGen/Mips/br-jmp.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+
+define void @count(i32 %x, i32 %y, i32 %z) noreturn nounwind readnone {
+entry:
+  br label %bosco
+
+bosco:                                            ; preds = %bosco, %entry
+  br label %bosco
+}
+
+; CHECK-PIC: b	$BB0_1
+; CHECK-STATIC: j	$BB0_1
diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll
new file mode 100644
index 0000000..a8fc2cd
--- /dev/null
+++ b/test/CodeGen/Mips/bswap.ll
@@ -0,0 +1,25 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
+
+define i32 @bswap32(i32 %x) nounwind readnone {
+entry:
+; MIPS32: bswap32:
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+  %or.3 = call i32 @llvm.bswap.i32(i32 %x)
+  ret i32 %or.3
+}
+
+define i64 @bswap64(i64 %x) nounwind readnone {
+entry:
+; MIPS64: bswap64:
+; MIPS64: dsbh $[[R0:[0-9]+]]
+; MIPS64: dshd ${{[0-9]+}}, $[[R0]]
+  %or.7 = call i64 @llvm.bswap.i64(i64 %x)
+  ret i64 %or.7
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 7851ba9..03254a9 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -1,11 +1,14 @@
-; RUN: llc -march=mips < %s | FileCheck %s
-; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s
+; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=N64
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; CHECK:  addiu ${{[0-9]+}}, $gp, %got(i1)
-; CHECK:  lw  ${{[0-9]+}}, %got(i3)($gp)
+; O32:  lw  ${{[0-9]+}}, %got(i3)($gp)
+; O32:  addiu ${{[0-9]+}}, $gp, %got(i1)
+; N64:  ld  ${{[0-9]+}}, %got_disp(i3)($gp)
+; N64:  daddiu ${{[0-9]+}}, $gp, %got_disp(i1)
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -17,10 +20,14 @@ entry:
 @c = global i32 1, align 4
 @d = global i32 0, align 4
 
-; CHECK: cmov2:
-; CHECK: addiu $[[R0:[0-9]+]], $gp, %got(c)
-; CHECK: addiu $[[R1:[0-9]+]], $gp, %got(d)
-; CHECK: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+; O32: cmov2:
+; O32: addiu $[[R1:[0-9]+]], $gp, %got(d)
+; O32: addiu $[[R0:[0-9]+]], $gp, %got(c)
+; O32: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+; N64: cmov2:
+; N64: daddiu $[[R1:[0-9]+]], $gp, %got_disp(d)
+; N64: daddiu $[[R0:[0-9]+]], $gp, %got_disp(c)
+; N64: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
 define i32 @cmov2(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
diff --git a/test/CodeGen/Mips/cprestore.ll b/test/CodeGen/Mips/cprestore.ll
index 391f5c7..57d022f 100644
--- a/test/CodeGen/Mips/cprestore.ll
+++ b/test/CodeGen/Mips/cprestore.ll
@@ -1,11 +1,9 @@
-; DISABLED: llc -march=mipsel < %s | FileCheck %s
-; RUN: false
-
-; byval is currently unsupported.
-; XFAIL: *
+; RUN: llc -march=mipsel < %s | FileCheck %s
 
 ; CHECK: .set macro
+; CHECK: .set at
 ; CHECK-NEXT: .cprestore
+; CHECK: .set noat
 ; CHECK-NEXT: .set nomacro
 
 %struct.S = type { [16384 x i32] }
diff --git a/test/CodeGen/Mips/dg.exp b/test/CodeGen/Mips/dg.exp
deleted file mode 100644
index adb2cac..0000000
--- a/test/CodeGen/Mips/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Mips] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index 9cd3413..c3facdb 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -10,15 +10,11 @@ entry:
 ; CHECK-EL:  .cfi_def_cfa_offset
 ; CHECK-EL:  sdc1 $f20
 ; CHECK-EL:  sw  $ra
-; CHECK-EL:  sw  $17
-; CHECK-EL:  sw  $16
 ; CHECK-EL:  .cfi_offset 52, -8
 ; CHECK-EL:  .cfi_offset 53, -4
 ; CHECK-EB:  .cfi_offset 53, -8
 ; CHECK-EB:  .cfi_offset 52, -4
 ; CHECK-EL:  .cfi_offset 31, -12
-; CHECK-EL:  .cfi_offset 17, -16
-; CHECK-EL:  .cfi_offset 16, -20
 ; CHECK-EL:  .cprestore 
 
   %exception = tail call i8* @__cxa_allocate_exception(i32 8) nounwind
@@ -58,16 +54,10 @@ unreachable:                                      ; preds = %entry
 
 declare i8* @__cxa_allocate_exception(i32)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
-declare void @llvm.eh.resume(i8*, i32)
-
 declare void @__cxa_throw(i8*, i8*, i8*)
 
 declare i8* @__cxa_begin_catch(i8*)
diff --git a/test/CodeGen/Mips/extins.ll b/test/CodeGen/Mips/extins.ll
index 69f53e5..a164f70 100644
--- a/test/CodeGen/Mips/extins.ll
+++ b/test/CodeGen/Mips/extins.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
 
 define i32 @ext0_5_9(i32 %s, i32 %pos, i32 %sz) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/fabs.ll b/test/CodeGen/Mips/fabs.ll
new file mode 100644
index 0000000..b296ab3
--- /dev/null
+++ b/test/CodeGen/Mips/fabs.ll
@@ -0,0 +1,52 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
+
+define float @foo0(float %a) nounwind readnone {
+entry:
+
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: mtc1 $[[AND]], $f0
+
+; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
+; 32R2: mtc1 $[[INS]], $f0
+
+; NO-NAN: abs.s
+
+  %call = tail call float @fabsf(float %a) nounwind readnone
+  ret float %call
+}
+
+declare float @fabsf(float) nounwind readnone
+
+define double @foo1(double %a) nounwind readnone {
+entry:
+
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: mtc1 $[[AND]], $f1
+
+; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
+; 32R2: mtc1 $[[INS]], $f1
+
+; 64: daddiu  $[[T0:[0-9]+]], $zero, 1
+; 64: dsll    $[[T1:[0-9]+]], ${{[0-9]+}}, 63
+; 64: daddiu  $[[MSK0:[0-9]+]], $[[T1]], -1
+; 64: and     $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: dmtc1   $[[AND]], $f0
+
+; 64R2: dins  $[[INS:[0-9]+]], $zero, 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
+; NO-NAN: abs.d
+
+  %call = tail call double @fabs(double %a) nounwind readnone
+  ret double %call
+}
+
+declare double @fabs(double) nounwind readnone
diff --git a/test/CodeGen/Mips/fcopysign-f32-f64.ll b/test/CodeGen/Mips/fcopysign-f32-f64.ll
new file mode 100644
index 0000000..b36473d
--- /dev/null
+++ b/test/CodeGen/Mips/fcopysign-f32-f64.ll
@@ -0,0 +1,50 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+
+declare double @copysign(double, double) nounwind readnone
+
+declare float @copysignf(float, float) nounwind readnone
+
+define float @func2(float %d, double %f) nounwind readnone {
+entry:
+; 64: func2
+; 64: lui  $[[T0:[0-9]+]], 32767
+; 64: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 64: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 63
+; 64: sll  $[[SLL:[0-9]+]], ${{[0-9]+}}, 31
+; 64: or   $[[OR:[0-9]+]], $[[AND0]], $[[SLL]]
+; 64: mtc1 $[[OR]], $f0
+
+; 64R2: dext ${{[0-9]+}}, ${{[0-9]+}}, 63, 1
+; 64R2: ins  $[[INS:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 64R2: mtc1 $[[INS]], $f0
+
+  %add = fadd float %d, 1.000000e+00
+  %conv = fptrunc double %f to float
+  %call = tail call float @copysignf(float %add, float %conv) nounwind readnone
+  ret float %call
+}
+
+define double @func3(double %d, float %f) nounwind readnone {
+entry:
+
+; 64: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64: dsll   $[[T1:[0-9]+]], $[[T0]], 63
+; 64: daddiu $[[MSK0:[0-9]+]], $[[T1]], -1
+; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: srl    ${{[0-9]+}}, ${{[0-9]+}}, 31
+; 64: dsll   $[[DSLL:[0-9]+]], ${{[0-9]+}}, 63
+; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[DSLL]]
+; 64: dmtc1  $[[OR]], $f0
+
+; 64R2: ext   ${{[0-9]+}}, ${{[0-9]+}}, 31, 1
+; 64R2: dins  $[[INS:[0-9]+]], ${{[0-9]+}}, 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
+  %add = fadd double %d, 1.000000e+00
+  %conv = fpext float %f to double
+  %call = tail call double @copysign(double %add, double %conv) nounwind readnone
+  ret double %call
+}
+
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index 79f956d..1c57eca 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -1,34 +1,35 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL
-; RUN: llc  < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
 
 define double @func0(double %d0, double %d1) nounwind readnone {
 entry:
-; CHECK-EL: func0:
-; CHECK-EL: lui $[[T0:[0-9]+]], 32767
-; CHECK-EL: lui $[[T1:[0-9]+]], 32768
-; CHECK-EL: mfc1 $[[HI0:[0-9]+]], $f13
-; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EL: mfc1 $[[HI1:[0-9]+]], $f15
-; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EL: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
-; CHECK-EL: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
-; CHECK-EL: mfc1 $[[LO0:[0-9]+]], $f12
-; CHECK-EL: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; CHECK-EL: mtc1 $[[LO0]], $f0
-; CHECK-EL: mtc1 $[[OR]], $f1
 ;
-; CHECK-EB: lui $[[T0:[0-9]+]], 32767
-; CHECK-EB: lui $[[T1:[0-9]+]], 32768
-; CHECK-EB: mfc1 $[[HI0:[0-9]+]], $f12
-; CHECK-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EB: mfc1 $[[HI1:[0-9]+]], $f14
-; CHECK-EB: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EB: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
-; CHECK-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
-; CHECK-EB: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; CHECK-EB: mfc1 $[[LO0:[0-9]+]], $f13
-; CHECK-EB: mtc1 $[[OR]], $f0
-; CHECK-EB: mtc1 $[[LO0]], $f1
+; 32: lui  $[[MSK1:[0-9]+]], 32768
+; 32: and  $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: or   $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 32: mtc1 $[[OR]], $f1
+
+; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
+; 32R2: mtc1 $[[INS]], $f1
+
+; 64: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64: dsll   $[[MSK1:[0-9]+]], $[[T0]], 63
+; 64: and    $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 64: daddiu $[[MSK0:[0-9]+]], $[[MSK1]], -1
+; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 64: dmtc1  $[[OR]], $f0
+
+; 64R2: dext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 63, 1
+; 64R2: dins  $[[INS:[0-9]+]], $[[EXT]], 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
   %call = tail call double @copysign(double %d0, double %d1) nounwind readnone
   ret double %call
 }
@@ -37,19 +38,22 @@ declare double @copysign(double, double) nounwind readnone
 
 define float @func1(float %f0, float %f1) nounwind readnone {
 entry:
-; CHECK-EL: func1:
-; CHECK-EL: lui $[[T0:[0-9]+]], 32767
-; CHECK-EL: lui $[[T1:[0-9]+]], 32768
-; CHECK-EL: mfc1 $[[ARG0:[0-9]+]], $f12
-; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EL: mfc1 $[[ARG1:[0-9]+]], $f14
-; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EL: and $[[T2:[0-9]+]], $[[ARG0]], $[[MSK0]]
-; CHECK-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
-; CHECK-EL: or  $[[T4:[0-9]+]], $[[T2]], $[[T3]]
-; CHECK-EL: mtc1 $[[T4]], $f0
+
+; 32: lui  $[[MSK1:[0-9]+]], 32768
+; 32: and  $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: or   $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 32: mtc1 $[[OR]], $f0
+
+; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
+; 32R2: mtc1 $[[INS]], $f0
+
   %call = tail call float @copysignf(float %f0, float %f1) nounwind readnone
   ret float %call
 }
 
 declare float @copysignf(float, float) nounwind readnone
+
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
new file mode 100644
index 0000000..435b419
--- /dev/null
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefix=32R2
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=64R2
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2NAN
+
+define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: madd.s 
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  %add1 = fadd float %add, 0.000000e+00
+  ret float %add1
+}
+
+define float @FOO1float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: msub.s 
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  %add = fadd float %sub, 0.000000e+00
+  ret float %add
+}
+
+define float @FOO2float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; 32R2: nmadd.s 
+; 64R2: nmadd.s 
+; 32R2NAN: madd.s 
+; 64R2NAN: madd.s 
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  %sub = fsub float 0.000000e+00, %add
+  ret float %sub
+}
+
+define float @FOO3float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; 32R2: nmsub.s 
+; 64R2: nmsub.s 
+; 32R2NAN: msub.s 
+; 64R2NAN: msub.s 
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  %sub1 = fsub float 0.000000e+00, %sub
+  ret float %sub1
+}
+
+define double @FOO10double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: madd.d
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  %add1 = fadd double %add, 0.000000e+00
+  ret double %add1
+}
+
+define double @FOO11double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: msub.d
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  %add = fadd double %sub, 0.000000e+00
+  ret double %add
+}
+
+define double @FOO12double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; 32R2: nmadd.d 
+; 64R2: nmadd.d 
+; 32R2NAN: madd.d 
+; 64R2NAN: madd.d 
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  %sub = fsub double 0.000000e+00, %add
+  ret double %sub
+}
+
+define double @FOO13double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; 32R2: nmsub.d 
+; 64R2: nmsub.d 
+; 32R2NAN: msub.d 
+; 64R2NAN: msub.d 
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  %sub1 = fsub double 0.000000e+00, %sub
+  ret double %sub1
+}
diff --git a/test/CodeGen/Mips/fneg.ll b/test/CodeGen/Mips/fneg.ll
new file mode 100644
index 0000000..b322abd
--- /dev/null
+++ b/test/CodeGen/Mips/fneg.ll
@@ -0,0 +1,17 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s 
+
+define float @foo0(i32 %a, float %d) nounwind readnone {
+entry:
+; CHECK-NOT: neg.s
+  %sub = fsub float -0.000000e+00, %d
+  ret float %sub
+}
+
+define double @foo1(i32 %a, double %d) nounwind readnone {
+entry:
+; CHECK:     foo1
+; CHECK-NOT: neg.d
+; CHECK:     jr
+  %sub = fsub double -0.000000e+00, %d
+  ret double %sub
+}
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
new file mode 100644
index 0000000..08bd6e7
--- /dev/null
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -0,0 +1,98 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
+
+%struct.S = type <{ [4 x float] }>
+%struct.S2 = type <{ [4 x double] }>
+%struct.S3 = type <{ i8, float }>
+
+@s = external global [4 x %struct.S]
+@gf = external global float
+@gd = external global double
+@s2 = external global [4 x %struct.S2]
+@s3 = external global %struct.S3
+
+define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: lwxc1
+  %arrayidx = getelementptr inbounds float* %b, i32 %o
+  %0 = load float* %arrayidx, align 4
+  ret float %0
+}
+
+define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: ldxc1
+  %arrayidx = getelementptr inbounds double* %b, i32 %o
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+define float @foo2(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: luxc1
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+  %0 = load float* %arrayidx1, align 1
+  ret float %0
+}
+
+define void @foo3(float* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: swxc1
+  %0 = load float* @gf, align 4
+  %arrayidx = getelementptr inbounds float* %b, i32 %o
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo4(double* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: sdxc1
+  %0 = load double* @gd, align 8
+  %arrayidx = getelementptr inbounds double* %b, i32 %o
+  store double %0, double* %arrayidx, align 8
+  ret void
+}
+
+define void @foo5(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: suxc1
+  %0 = load float* @gf, align 4
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+  store float %0, float* %arrayidx1, align 1
+  ret void
+}
+
+define double @foo6(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: foo6
+; CHECK-NOT: ldxc1
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+  %0 = load double* %arrayidx1, align 1
+  ret double %0
+}
+
+define void @foo7(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: foo7
+; CHECK-NOT: sdxc1
+  %0 = load double* @gd, align 8
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+  store double %0, double* %arrayidx1, align 1
+  ret void
+}
+
+define float @foo8() nounwind readonly {
+entry:
+; CHECK: foo8
+; CHECK: luxc1
+  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  ret float %0
+}
+
+define void @foo9(float %f) nounwind {
+entry:
+; CHECK: foo9
+; CHECK: suxc1
+  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/fpcmp.ll b/test/CodeGen/Mips/fpcmp.ll
deleted file mode 100644
index 86545e3..0000000
--- a/test/CodeGen/Mips/fpcmp.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-MIPS32
-
-@g1 = external global i32
-
-define i32 @f(float %f0, float %f1) nounwind {
-entry:
-; CHECK-MIPS32: c.olt.s
-; CHECK-MIPS32: movt
-; CHECK-MIPS32: c.olt.s
-; CHECK-MIPS32: movt
-  %cmp = fcmp olt float %f0, %f1
-  %conv = zext i1 %cmp to i32
-  %tmp2 = load i32* @g1, align 4
-  %add = add nsw i32 %tmp2, %conv
-  store i32 %add, i32* @g1, align 4
-  %cond = select i1 %cmp, i32 10, i32 20
-  ret i32 %cond
-}
diff --git a/test/CodeGen/Mips/frem.ll b/test/CodeGen/Mips/frem.ll
new file mode 100644
index 0000000..be222b2
--- /dev/null
+++ b/test/CodeGen/Mips/frem.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=mipsel 
+
+define float @fmods(float %x, float %y) {
+entry:
+  %r = frem float %x, %y
+  ret float %r
+}
+
+define double @fmodd(double %x, double %y) {
+entry:
+  %r = frem double %x, %y
+  ret double %r
+}
diff --git a/test/CodeGen/Mips/global-address.ll b/test/CodeGen/Mips/global-address.ll
new file mode 100644
index 0000000..0d49a74
--- /dev/null
+++ b/test/CodeGen/Mips/global-address.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static  -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+
+@s1 = internal unnamed_addr global i32 8, align 4
+@g1 = external global i32
+
+define void @foo() nounwind {
+entry:
+; PIC-O32: lw  $[[R0:[0-9]+]], %got(s1)
+; PIC-O32: lw  ${{[0-9]+}}, %lo(s1)($[[R0]])
+; PIC-O32: lw  ${{[0-9]+}}, %got(g1)
+; STATIC-O32: lui $[[R1:[0-9]+]], %hi(s1)
+; STATIC-O32: lw  ${{[0-9]+}}, %lo(s1)($[[R1]])
+; STATIC-O32: lui $[[R2:[0-9]+]], %hi(g1)
+; STATIC-O32: lw  ${{[0-9]+}}, %lo(g1)($[[R2]])
+
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page(s1)
+; PIC-N32: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R0]])
+; PIC-N32: lw  ${{[0-9]+}}, %got_disp(g1)
+; STATIC-N32: lui $[[R1:[0-9]+]], %hi(s1)
+; STATIC-N32: lw  ${{[0-9]+}}, %lo(s1)($[[R1]])
+; STATIC-N32: lui $[[R2:[0-9]+]], %hi(g1)
+; STATIC-N32: lw  ${{[0-9]+}}, %lo(g1)($[[R2]])
+
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page(s1)
+; PIC-N64: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R0]])
+; PIC-N64: ld  ${{[0-9]+}}, %got_disp(g1)
+; STATIC-N64: ld  $[[R1:[0-9]+]], %got_page(s1)
+; STATIC-N64: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R1]])
+; STATIC-N64: ld  ${{[0-9]+}}, %got_disp(g1)
+
+  %0 = load i32* @s1, align 4
+  tail call void @foo1(i32 %0) nounwind
+  %1 = load i32* @g1, align 4
+  store i32 %1, i32* @s1, align 4
+  %add = add nsw i32 %1, 2
+  store i32 %add, i32* @g1, align 4
+  ret void
+}
+
+declare void @foo1(i32)
+
diff --git a/test/CodeGen/Mips/global-pointer-reg.ll b/test/CodeGen/Mips/global-pointer-reg.ll
new file mode 100644
index 0000000..174d1f9
--- /dev/null
+++ b/test/CodeGen/Mips/global-pointer-reg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=mipsel -mips-fix-global-base-reg=false | FileCheck %s 
+
+@g0 = external global i32
+@g1 = external global i32
+@g2 = external global i32
+
+define void @foo1() nounwind {
+entry:
+; CHECK-NOT:    .cpload
+; CHECK-NOT:    .cprestore
+; CHECK: lui    $[[R0:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu  $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp)
+; CHECK: addu   $[[GP:[0-9]+]], $[[R1]], $25
+; CHECK: lw     ${{[0-9]+}}, %call16(foo2)($[[GP]])
+
+  tail call void @foo2(i32* @g0) nounwind
+  tail call void @foo2(i32* @g1) nounwind
+  tail call void @foo2(i32* @g2) nounwind
+  ret void
+}
+
+declare void @foo2(i32*)
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
index 87cf2a6..8b1f71b 100644
--- a/test/CodeGen/Mips/i64arg.ll
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -4,21 +4,21 @@ define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
 entry:
 ; CHECK: addu $[[R1:[0-9]+]], $zero, $5
 ; CHECK: addu $[[R0:[0-9]+]], $zero, $4
-; CHECK: lw  $25, %call16(ff1)
 ; CHECK: ori $6, ${{[0-9]+}}, 3855
 ; CHECK: ori $7, ${{[0-9]+}}, 22136
+; CHECK: lw  $25, %call16(ff1)
 ; CHECK: jalr
   tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
 ; CHECK: lw $25, %call16(ff2)
-; CHECK: lw $[[R2:[0-9]+]], 88($sp)
-; CHECK: lw $[[R3:[0-9]+]], 92($sp)
+; CHECK: lw $[[R2:[0-9]+]], 80($sp)
+; CHECK: lw $[[R3:[0-9]+]], 84($sp)
 ; CHECK: addu $4, $zero, $[[R2]]
 ; CHECK: addu $5, $zero, $[[R3]]
 ; CHECK: jalr $25
   tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
   %sub = add nsw i32 %i, -1
-; CHECK: sw $[[R0]], 24($sp)
 ; CHECK: sw $[[R1]], 28($sp)
+; CHECK: sw $[[R0]], 24($sp)
 ; CHECK: lw $25, %call16(ff3)
 ; CHECK: addu $6, $zero, $[[R2]]
 ; CHECK: addu $7, $zero, $[[R3]]
diff --git a/test/CodeGen/Mips/imm.ll b/test/CodeGen/Mips/imm.ll
new file mode 100644
index 0000000..eea391e
--- /dev/null
+++ b/test/CodeGen/Mips/imm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i32 @foo0() nounwind readnone {
+entry:
+; CHECK: foo0
+; CHECK: lui $[[R0:[0-9]+]], 4660
+; CHECK: ori ${{[0-9]+}}, $[[R0]], 22136
+  ret i32 305419896
+}
+
+define i32 @foo1() nounwind readnone {
+entry:
+; CHECK: foo1
+; CHECK: lui ${{[0-9]+}}, 4660
+; CHECK-NOT: ori
+  ret i32 305397760
+}
+
+define i32 @foo2() nounwind readnone {
+entry:
+; CHECK: foo2
+; CHECK: addiu ${{[0-9]+}}, $zero, 4660
+  ret i32 4660
+}
+
+define i32 @foo17() nounwind readnone {
+entry:
+; CHECK: foo17
+; CHECK: addiu ${{[0-9]+}}, $zero, -32204
+  ret i32 -32204
+}
+
+define i32 @foo18() nounwind readnone {
+entry:
+; CHECK: foo18
+; CHECK: ori ${{[0-9]+}}, $zero, 33332
+  ret i32 33332
+}
diff --git a/test/CodeGen/Mips/indirectcall.ll b/test/CodeGen/Mips/indirectcall.ll
new file mode 100644
index 0000000..ac565d6
--- /dev/null
+++ b/test/CodeGen/Mips/indirectcall.ll
@@ -0,0 +1,8 @@
+; RUN: llc  < %s -march=mipsel -relocation-model=static | FileCheck %s 
+
+define void @foo0(void (i32)* nocapture %f1) nounwind {
+entry:
+; CHECK: jalr $25
+  tail call void %f1(i32 13) nounwind
+  ret void
+}
diff --git a/test/CodeGen/Mips/inlineasm64.ll b/test/CodeGen/Mips/inlineasm64.ll
new file mode 100644
index 0000000..dbce3c39
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm64.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+@gl2 = external global i64
+@gl1 = external global i64
+@gl0 = external global i64
+
+define void @foo1() nounwind {
+entry:
+; CHECK: foo1
+; CHECK: daddu
+  %0 = load i64* @gl1, align 8
+  %1 = load i64* @gl0, align 8
+  %2 = tail call i64 asm "daddu $0, $1, $2", "=r,r,r"(i64 %0, i64 %1) nounwind
+  store i64 %2, i64* @gl2, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index b5db58a..4b31a88 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -8,10 +8,10 @@ entry:
 ; CHECK: #APP
 ; CHECK: sw $4, 0($[[T0]])
 ; CHECK: #NO_APP
-; CHECK: lw  $[[T1:[0-9]+]], %got(g1)($gp)
 ; CHECK: #APP
 ; CHECK: lw $[[T3:[0-9]+]], 0($[[T0]])
 ; CHECK: #NO_APP
+; CHECK: lw  $[[T1:[0-9]+]], %got(g1)($gp)
 ; CHECK: sw  $[[T3]], 0($[[T1]])
 
   %l1 = alloca i32, align 4
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index 579a319..b7c9a9c 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -1,8 +1,4 @@
-; DISABLED: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
-; RUN: false
-
-; byval is currently unsupported.
-; XFAIL: *
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
 
 %struct.S1 = type { [65536 x i8] }
 
@@ -11,8 +7,8 @@
 define void @f() nounwind {
 entry:
 ; CHECK:  lui $at, 65534
-; CHECK:  addu  $at, $sp, $at
-; CHECK:  addiu $sp, $at, -24
+; CHECK:  addiu $at, $at, -24
+; CHECK:  addu  $sp, $sp, $at
 ; CHECK:  .cprestore  65536
 
   %agg.tmp = alloca %struct.S1, align 1
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
new file mode 100644
index 0000000..0587d32
--- /dev/null
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
new file mode 100644
index 0000000..09745fb
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
@@ -0,0 +1,110 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+%struct.S = type <{ [4 x float] }>
+%struct.S2 = type <{ [4 x double] }>
+%struct.S3 = type <{ i8, float }>
+
+@s = external global [4 x %struct.S]
+@gf = external global float
+@gd = external global double
+@s2 = external global [4 x %struct.S2]
+@s3 = external global %struct.S3
+
+define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: lwxc1
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
+  %0 = load float* %arrayidx, align 4
+  ret float %0
+}
+
+define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: ldxc1
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+define float @foo2(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: luxc1
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  %0 = load float* %arrayidx2, align 1
+  ret float %0
+}
+
+define void @foo3(float* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: swxc1
+  %0 = load float* @gf, align 4
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo4(double* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: sdxc1
+  %0 = load double* @gd, align 8
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
+  store double %0, double* %arrayidx, align 8
+  ret void
+}
+
+define void @foo5(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: suxc1
+  %0 = load float* @gf, align 4
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  store float %0, float* %arrayidx2, align 1
+  ret void
+}
+
+define double @foo6(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: foo6
+; CHECK-NOT: ldxc1
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  %0 = load double* %arrayidx2, align 1
+  ret double %0
+}
+
+define void @foo7(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: foo7
+; CHECK-NOT: sdxc1
+  %0 = load double* @gd, align 8
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  store double %0, double* %arrayidx2, align 1
+  ret void
+}
+
+define float @foo8() nounwind readonly {
+entry:
+; CHECK: foo8
+; CHECK: luxc1
+  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
+  ret float %0
+}
+
+define void @foo9(float %f) nounwind {
+entry:
+; CHECK: foo9
+; CHECK: suxc1
+  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/mips64countleading.ll b/test/CodeGen/Mips/mips64countleading.ll
new file mode 100644
index 0000000..b2b67e5
--- /dev/null
+++ b/test/CodeGen/Mips/mips64countleading.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i64 @t1(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclz
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
+  ret i64 %tmp1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i64 @t3(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclo 
+  %neg = xor i64 %X, -1
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
+  ret i64 %tmp1
+}
+
diff --git a/test/CodeGen/Mips/mips64directive.ll b/test/CodeGen/Mips/mips64directive.ll
new file mode 100644
index 0000000..fa81b72
--- /dev/null
+++ b/test/CodeGen/Mips/mips64directive.ll
@@ -0,0 +1,11 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+
+@gl = global i64 1250999896321, align 8
+
+; CHECK: 8byte
+define i64 @foo1() nounwind readonly {
+entry:
+  %0 = load i64* @gl, align 8
+  ret i64 %0
+}
+
diff --git a/test/CodeGen/Mips/mips64ext.ll b/test/CodeGen/Mips/mips64ext.ll
new file mode 100644
index 0000000..02a35f8
--- /dev/null
+++ b/test/CodeGen/Mips/mips64ext.ll
@@ -0,0 +1,26 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s 
+
+define i64 @zext64_32(i32 %a) nounwind readnone {
+entry:
+; CHECK: addiu $[[R0:[0-9]+]], ${{[0-9]+}}, 2
+; CHECK: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; CHECK: dsrl ${{[0-9]+}}, $[[R1]], 32
+  %add = add i32 %a, 2
+  %conv = zext i32 %add to i64
+  ret i64 %conv
+}
+
+define i64 @sext64_32(i32 %a) nounwind readnone {
+entry:
+; CHECK: sll ${{[0-9]+}}, ${{[0-9]+}}, 0
+  %conv = sext i32 %a to i64
+  ret i64 %conv
+}
+
+define i64 @i64_float(float %f) nounwind readnone {
+entry:
+; CHECK: trunc.l.s 
+  %conv = fptosi float %f to i64
+  ret i64 %conv
+}
+
diff --git a/test/CodeGen/Mips/mips64extins.ll b/test/CodeGen/Mips/mips64extins.ll
new file mode 100644
index 0000000..14f92ca
--- /dev/null
+++ b/test/CodeGen/Mips/mips64extins.ll
@@ -0,0 +1,55 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s 
+
+define i64 @dext(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 5, 10
+  %shr = lshr i64 %i, 5
+  %and = and i64 %shr, 1023
+  ret i64 %and
+}
+
+define i64 @dextm(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 5, 34
+  %shr = lshr i64 %i, 5
+  %and = and i64 %shr, 17179869183
+  ret i64 %and
+}
+
+define i64 @dextu(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 34, 6
+  %shr = lshr i64 %i, 34
+  %and = and i64 %shr, 63
+  ret i64 %and
+}
+
+define i64 @dins(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 8, 10
+  %shl2 = shl i64 %j, 8
+  %and = and i64 %shl2, 261888
+  %and3 = and i64 %i, -261889
+  %or = or i64 %and3, %and
+  ret i64 %or
+}
+
+define i64 @dinsm(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 10, 33
+  %shl4 = shl i64 %j, 10
+  %and = and i64 %shl4, 8796093021184
+  %and5 = and i64 %i, -8796093021185
+  %or = or i64 %and5, %and
+  ret i64 %or
+}
+
+define i64 @dinsu(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 40, 13
+  %shl4 = shl i64 %j, 40
+  %and = and i64 %shl4, 9006099743113216
+  %and5 = and i64 %i, -9006099743113217
+  %or = or i64 %and5, %and
+  ret i64 %or
+}
diff --git a/test/CodeGen/Mips/mips64fpimm0.ll b/test/CodeGen/Mips/mips64fpimm0.ll
new file mode 100644
index 0000000..17716da
--- /dev/null
+++ b/test/CodeGen/Mips/mips64fpimm0.ll
@@ -0,0 +1,7 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+
+define double @foo1() nounwind readnone {
+entry:
+; CHECK: dmtc1 $zero
+  ret double 0.000000e+00
+}
diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll
index b8f3ca9..24647b2 100644
--- a/test/CodeGen/Mips/mips64fpldst.ll
+++ b/test/CodeGen/Mips/mips64fpldst.ll
@@ -1,5 +1,5 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
 
 @f0 = common global float 0.000000e+00, align 4
 @d0 = common global double 0.000000e+00, align 8
@@ -12,7 +12,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N64: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfl1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(f0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N32: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load float* @f0, align 4
   ret float %0
@@ -24,7 +24,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N64: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfl2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(d0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N32: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load double* @d0, align 8 
   ret double %0
@@ -36,7 +36,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N64: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfs1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(f0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N32: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load float* @f1, align 4 
   store float %0, float* @f0, align 4 
@@ -49,7 +49,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N64: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfs2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(d0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N32: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load double* @d1, align 8 
   store double %0, double* @d0, align 8 
diff --git a/test/CodeGen/Mips/mips64imm.ll b/test/CodeGen/Mips/mips64imm.ll
new file mode 100644
index 0000000..1fc8636
--- /dev/null
+++ b/test/CodeGen/Mips/mips64imm.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i32 @foo1() nounwind readnone {
+entry:
+; CHECK: foo1
+; CHECK: lui ${{[0-9]+}}, 4660
+; CHECK-NOT: ori
+  ret i32 305397760
+}
+
+define i64 @foo3() nounwind readnone {
+entry:
+; CHECK: foo3
+; CHECK: lui $[[R0:[0-9]+]], 4660
+; CHECK: daddiu ${{[0-9]+}}, $[[R0]], 22136
+  ret i64 305419896
+}
+
+define i64 @foo6() nounwind readnone {
+entry:
+; CHECK: foo6
+; CHECK: ori ${{[0-9]+}}, $zero, 33332
+  ret i64 33332
+}
+
+define i64 @foo7() nounwind readnone {
+entry:
+; CHECK: foo7
+; CHECK: daddiu ${{[0-9]+}}, $zero, -32204
+  ret i64 -32204
+}
+
+define i64 @foo9() nounwind readnone {
+entry:
+; CHECK: foo9
+; CHECK: lui $[[R0:[0-9]+]], 583
+; CHECK: daddiu $[[R1:[0-9]+]], $[[R0]], -30001
+; CHECK: dsll $[[R2:[0-9]+]], $[[R1]], 18
+; CHECK: daddiu $[[R3:[0-9]+]], $[[R2]], 18441
+; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 17
+; CHECK: daddiu ${{[0-9]+}}, $[[R4]], 13398
+  ret i64 1311768467284833366
+}
+
+define i64 @foo10() nounwind readnone {
+entry:
+; CHECK: foo10
+; CHECK: lui $[[R0:[0-9]+]], 34661
+; CHECK: daddiu  ${{[0-9]+}}, $[[R0]], 17185
+  ret i64 -8690466096928522240
+}
+
diff --git a/test/CodeGen/Mips/mips64instrs.ll b/test/CodeGen/Mips/mips64instrs.ll
index c9812a2..0418311 100644
--- a/test/CodeGen/Mips/mips64instrs.ll
+++ b/test/CodeGen/Mips/mips64instrs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips64el -mcpu=mips64r1 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
 define i64 @f0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
@@ -116,12 +116,12 @@ entry:
   ret i64 %rem
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 
 define i64 @f18(i64 %X) nounwind readnone {
 entry:
 ; CHECK: dclz $2, $4
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
   ret i64 %tmp1
 }
 
@@ -129,7 +129,7 @@ define i64 @f19(i64 %X) nounwind readnone {
 entry:
 ; CHECK: dclo $2, $4
   %neg = xor i64 %X, -1
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
   ret i64 %tmp1
 }
 
diff --git a/test/CodeGen/Mips/mips64intldst.ll b/test/CodeGen/Mips/mips64intldst.ll
index fdf496b..0e310a8 100644
--- a/test/CodeGen/Mips/mips64intldst.ll
+++ b/test/CodeGen/Mips/mips64intldst.ll
@@ -1,5 +1,5 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
 
 @c = common global i8 0, align 4
 @s = common global i16 0, align 4
@@ -16,7 +16,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N64: lb ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(c)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N32: lb ${{[0-9]+}}, 0($[[R0]])
   %0 = load i8* @c, align 4
   %conv = sext i8 %0 to i64
@@ -29,7 +29,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N64: lh ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(s)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N32: lh ${{[0-9]+}}, 0($[[R0]])
   %0 = load i16* @s, align 4
   %conv = sext i16 %0 to i64
@@ -42,7 +42,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N64: lw ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(i)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N32: lw ${{[0-9]+}}, 0($[[R0]])
   %0 = load i32* @i, align 4
   %conv = sext i32 %0 to i64
@@ -55,7 +55,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N64: ld ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func4
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(l)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N32: ld ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l, align 8
   ret i64 %0
@@ -67,7 +67,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(uc)
 ; CHECK-N64: lbu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(uc)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(uc)
 ; CHECK-N32: lbu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i8* @uc, align 4
   %conv = zext i8 %0 to i64
@@ -80,7 +80,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(us)
 ; CHECK-N64: lhu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(us)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(us)
 ; CHECK-N32: lhu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i16* @us, align 4
   %conv = zext i16 %0 to i64
@@ -93,7 +93,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(ui)
 ; CHECK-N64: lwu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(ui)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(ui)
 ; CHECK-N32: lwu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i32* @ui, align 4
   %conv = zext i32 %0 to i64
@@ -106,7 +106,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N64: sb ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(c)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N32: sb ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i8
@@ -120,7 +120,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N64: sh ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(s)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N32: sh ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i16
@@ -134,7 +134,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N64: sw ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(i)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N32: sw ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i32
@@ -148,7 +148,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N64: sd ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc4
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(l)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N32: sd ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   store i64 %0, i64* @l, align 8
diff --git a/test/CodeGen/Mips/mips64lea.ll b/test/CodeGen/Mips/mips64lea.ll
new file mode 100644
index 0000000..54d504f
--- /dev/null
+++ b/test/CodeGen/Mips/mips64lea.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define void @foo3() nounwind {
+entry:
+; CHECK: daddiu ${{[0-9]+}}, $sp
+  %a = alloca i32, align 4
+  call void @foo1(i32* %a) nounwind
+  ret void
+}
+
+declare void @foo1(i32*)
+
diff --git a/test/CodeGen/Mips/mips64muldiv.ll b/test/CodeGen/Mips/mips64muldiv.ll
new file mode 100644
index 0000000..fd036a2
--- /dev/null
+++ b/test/CodeGen/Mips/mips64muldiv.ll
@@ -0,0 +1,49 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i64 @m0(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: dmult
+; CHECK: mflo
+  %mul = mul i64 %a1, %a0
+  ret i64 %mul
+}
+
+define i64 @m1(i64 %a) nounwind readnone {
+entry:
+; CHECK: dmult
+; CHECK: mfhi
+  %div = sdiv i64 %a, 3
+  ret i64 %div
+}
+
+define i64 @d0(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddivu
+; CHECK: mflo
+  %div = udiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @d1(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddiv
+; CHECK: mflo
+  %div = sdiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @d2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddivu
+; CHECK: mfhi
+  %rem = urem i64 %a0, %a1
+  ret i64 %rem
+}
+
+define i64 @d3(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddiv
+; CHECK: mfhi
+  %rem = srem i64 %a0, %a1
+  ret i64 %rem
+}
diff --git a/test/CodeGen/Mips/mips64shift.ll b/test/CodeGen/Mips/mips64shift.ll
index cc5e508..45d1c95 100644
--- a/test/CodeGen/Mips/mips64shift.ll
+++ b/test/CodeGen/Mips/mips64shift.ll
@@ -44,21 +44,21 @@ entry:
 
 define i64 @f6(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsll ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shl = shl i64 %a0, 40
   ret i64 %shl
 }
 
 define i64 @f7(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsra32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsra ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shr = ashr i64 %a0, 40
   ret i64 %shr
 }
 
 define i64 @f8(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsrl32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shr = lshr i64 %a0, 40
   ret i64 %shr
 }
@@ -94,7 +94,7 @@ entry:
 
 define i64 @f12(i64 %a0) nounwind readnone {
 entry:
-; CHECK: drotr32 ${{[0-9]+}}, ${{[0-9]+}}, 22
+; CHECK: drotr ${{[0-9]+}}, ${{[0-9]+}}, 54
   %shl = shl i64 %a0, 10
   %shr = lshr i64 %a0, 54
   %or = or i64 %shl, %shr
diff --git a/test/CodeGen/Mips/mipslopat.ll b/test/CodeGen/Mips/mipslopat.ll
index 0279828..1f433b9 100644
--- a/test/CodeGen/Mips/mipslopat.ll
+++ b/test/CodeGen/Mips/mipslopat.ll
@@ -6,7 +6,7 @@
 
 define void @simple_vol_file() nounwind {
 entry:
-  %tmp = volatile load i32** @stat_vol_ptr_int, align 4
+  %tmp = load volatile i32** @stat_vol_ptr_int, align 4
   %0 = bitcast i32* %tmp to i8*
   call void @llvm.prefetch(i8* %0, i32 0, i32 0, i32 1)
   %tmp1 = load i32** @stat_ptr_vol_int, align 4
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index e673480..c5cbc7a 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -12,20 +12,20 @@ define void @f1() nounwind {
 entry:
 ; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)($gp)
 ; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
-; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
-; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
-; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
-; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
-; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
 ; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
-; CHECK: sw  $[[R2]], 16($sp)
-; CHECK: sw  $[[R7]], 20($sp)
-; CHECK: sw  $[[R3]], 24($sp)
-; CHECK: sw  $[[R4]], 28($sp)
-; CHECK: sw  $[[R5]], 32($sp)
+; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
+; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
 ; CHECK: sw  $[[R6]], 36($sp)
-; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
+; CHECK: sw  $[[R5]], 32($sp)
+; CHECK: sw  $[[R4]], 28($sp)
+; CHECK: sw  $[[R3]], 24($sp)
+; CHECK: sw  $[[R7]], 20($sp)
+; CHECK: sw  $[[R2]], 16($sp)
 ; CHECK: lw  $7, 4($[[R0]])
+; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
   %agg.tmp10 = alloca %struct.S3, align 4
   call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
   call void @callee2(%struct.S2* byval @f1.s2) nounwind
@@ -44,20 +44,20 @@ declare void @callee3(float, %struct.S3* byval, %struct.S1* byval)
 define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind {
 entry:
 ; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $6, 64($sp)
 ; CHECK: sw  $7, 68($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: lw  $4, 88($sp)
 ; CHECK: ldc1 $f[[F0:[0-9]+]], 80($sp)
+; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
+; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
 ; CHECK: lw  $[[R2:[0-9]+]], 68($sp)
 ; CHECK: lh  $[[R1:[0-9]+]], 66($sp)
 ; CHECK: lb  $[[R0:[0-9]+]], 64($sp)
-; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
-; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
-; CHECK: lw  $4, 88($sp)
-; CHECK: sw  $[[R3]], 16($sp)
-; CHECK: sw  $[[R4]], 20($sp)
-; CHECK: sw  $[[R2]], 24($sp)
-; CHECK: sw  $[[R1]], 28($sp)
 ; CHECK: sw  $[[R0]], 32($sp)
+; CHECK: sw  $[[R1]], 28($sp)
+; CHECK: sw  $[[R2]], 24($sp)
+; CHECK: sw  $[[R4]], 20($sp)
+; CHECK: sw  $[[R3]], 16($sp)
 ; CHECK: mfc1 $6, $f[[F0]]
 
   %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
@@ -81,12 +81,12 @@ declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float)
 define void @f3(%struct.S2* nocapture byval %s2) nounwind {
 entry:
 ; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $4, 56($sp)
-; CHECK: sw  $5, 60($sp)
-; CHECK: sw  $6, 64($sp)
 ; CHECK: sw  $7, 68($sp)
-; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $5, 60($sp)
+; CHECK: sw  $4, 56($sp)
 ; CHECK: lw  $4, 56($sp)
+; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
 ; CHECK: sw  $[[R0]], 24($sp)
 
   %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
@@ -100,14 +100,14 @@ entry:
 define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind {
 entry:
 ; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $5, 60($sp)
-; CHECK: sw  $6, 64($sp)
 ; CHECK: sw  $7, 68($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $5, 60($sp)
+; CHECK: lw  $4, 68($sp)
 ; CHECK: lw  $[[R1:[0-9]+]], 88($sp)
 ; CHECK: lb  $[[R0:[0-9]+]], 60($sp)
-; CHECK: lw  $4, 68($sp)
-; CHECK: sw  $[[R1]], 24($sp)
 ; CHECK: sw  $[[R0]], 32($sp)
+; CHECK: sw  $[[R1]], 24($sp)
 
   %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
   %tmp = load i32* %i, align 4, !tbaa !0
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index 4cc48f0..d1a67fd 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -1,19 +1,20 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -march=mips > %t
-; RUN: grep \\\$foo: %t
-; RUN: grep call.*\\\$foo %t
-; RUN: grep \\\$baz: %t
-; RUN: grep lw.*\\\$baz %t
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define private void @foo() {
-        ret void
+; CHECK: foo:
+  ret void
 }
 
 @baz = private global i32 4
 
 define i32 @bar() {
-        call void @foo()
-	%1 = load i32* @baz, align 4
-        ret i32 %1
+; CHECK: bar:
+; CHECK: call16($foo)
+; CHECK: lw $[[R0:[0-9]+]], %got($baz)($
+; CHECK: lw ${{[0-9]+}}, %lo($baz)($[[R0]])
+  call void @foo()
+  %1 = load i32* @baz, align 4
+  ret i32 %1
 }
diff --git a/test/CodeGen/Mips/rotate.ll b/test/CodeGen/Mips/rotate.ll
index 8e27f4a..4f3cfb7 100644
--- a/test/CodeGen/Mips/rotate.ll
+++ b/test/CodeGen/Mips/rotate.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
 
 ; CHECK:  rotrv $2, $4
 define i32 @rot0(i32 %a, i32 %b) nounwind readnone {
diff --git a/test/CodeGen/Mips/swzero.ll b/test/CodeGen/Mips/swzero.ll
new file mode 100644
index 0000000..da1e036
--- /dev/null
+++ b/test/CodeGen/Mips/swzero.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+%struct.unaligned = type <{ i32 }>
+
+define void @zero_u(%struct.unaligned* nocapture %p) nounwind {
+entry:
+; CHECK: usw $zero
+  %x = getelementptr inbounds %struct.unaligned* %p, i32 0, i32 0
+  store i32 0, i32* %x, align 1
+  ret void
+}
+
+define void @zero_a(i32* nocapture %p) nounwind {
+entry:
+; CHECK: sw $zero
+  store i32 0, i32* %p, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index b0474b4..a3c4768 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -1,7 +1,8 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=PIC
 ; RUN: llc -march=mipsel -relocation-model=static < %s \
 ; RUN:                             | FileCheck %s -check-prefix=STATIC
-
+; RUN: llc -march=mipsel -relocation-model=static < %s \
+; RUN:   -mips-fix-global-base-reg=false | FileCheck %s -check-prefix=STATICGP
 
 @t1 = thread_local global i32 0, align 4
 
@@ -39,8 +40,32 @@ entry:
 ; PIC:   jalr    $25
 ; PIC:   lw      $2, 0($2)
 
+; STATICGP: lui     $[[R0:[0-9]+]], %hi(__gnu_local_gp)
+; STATICGP: addiu   $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
+; STATICGP: lw      ${{[0-9]+}}, %gottprel(t2)($[[GP]])
+; STATIC:   lui     $gp, %hi(__gnu_local_gp)
+; STATIC:   addiu   $gp, $gp, %lo(__gnu_local_gp)
 ; STATIC:   rdhwr   $3, $29
 ; STATIC:   lw      $[[R0:[0-9]+]], %gottprel(t2)($gp)
 ; STATIC:   addu    $[[R1:[0-9]+]], $3, $[[R0]]
 ; STATIC:   lw      $2, 0($[[R1]])
 }
+
+@f3.i = internal thread_local unnamed_addr global i32 1, align 4
+
+define i32 @f3() nounwind {
+entry:
+; CHECK: f3:
+
+; PIC:   addiu   $4, $gp, %tlsldm(f3.i)
+; PIC:   jalr    $25
+; PIC:   lui     $[[R0:[0-9]+]], %dtprel_hi(f3.i)
+; PIC:   addu    $[[R1:[0-9]+]], $[[R0]], $2
+; PIC:   lw      ${{[0-9]+}}, %dtprel_lo(f3.i)($[[R1]])
+
+  %0 = load i32* @f3.i, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @f3.i, align 4
+  ret i32 %inc
+}
+
diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll
index 433e896..6a087ba 100644
--- a/test/CodeGen/Mips/unalignedload.ll
+++ b/test/CodeGen/Mips/unalignedload.ll
@@ -9,27 +9,27 @@
 
 define void @foo1() nounwind {
 entry:
-; CHECK-EL: lw  $25, %call16(foo2)
 ; CHECK-EL: ulhu  $4, 2
+; CHECK-EL: lw  $25, %call16(foo2)
 ; CHECK-EL: lw  $[[R0:[0-9]+]], %got(s4)
 ; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]])
-; CHECK-EL: ulhu  $[[R2:[0-9]+]], 4($[[R0]])
 ; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16
+; CHECK-EL: ulhu  $[[R2:[0-9]+]], 4($[[R0]])
+; CHECK-EL: or  $5, $[[R2]], $[[R3]]
 ; CHECK-EL: ulw $4, 0($[[R0]])
 ; CHECK-EL: lw  $25, %call16(foo4)
-; CHECK-EL: or  $5, $[[R2]], $[[R3]]
 
 ; CHECK-EB: ulhu  $[[R0:[0-9]+]], 2
-; CHECK-EB: lw  $25, %call16(foo2)
 ; CHECK-EB: sll $4, $[[R0]], 16
+; CHECK-EB: lw  $25, %call16(foo2)
 ; CHECK-EB: lw  $[[R1:[0-9]+]], %got(s4)
-; CHECK-EB: ulhu  $[[R2:[0-9]+]], 4($[[R1]])
 ; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]])
-; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
 ; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8
+; CHECK-EB: ulhu  $[[R2:[0-9]+]], 4($[[R1]])
+; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
+; CHECK-EB: or  $5, $[[R4]], $[[R5]]
 ; CHECK-EB: ulw $4, 0($[[R1]])
 ; CHECK-EB: lw  $25, %call16(foo4)
-; CHECK-EB: or  $5, $[[R4]], $[[R5]]
 
   tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
   tail call void @foo4(%struct.S4* byval @s4) nounwind
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
new file mode 100644
index 0000000..b890e1d
--- /dev/null
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=mipsel | FileCheck %s
+
+@g1 = external global i32
+
+define i32 @foo0(i32 %s) nounwind readonly {
+entry:
+; CHECK-NOT: addiu
+; CHECK:     movn
+  %tobool = icmp ne i32 %s, 0
+  %0 = load i32* @g1, align 4, !tbaa !0
+  %cond = select i1 %tobool, i32 0, i32 %0
+  ret i32 %cond
+}
+
+define i32 @foo1(i32 %s) nounwind readonly {
+entry:
+; CHECK-NOT: addiu
+; CHECK:     movz
+  %tobool = icmp ne i32 %s, 0
+  %0 = load i32* @g1, align 4, !tbaa !0
+  %cond = select i1 %tobool, i32 %0, i32 0
+  ret i32 %cond
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll
index a643d25..f55070a 100644
--- a/test/CodeGen/PTX/cvt.ll
+++ b/test/CodeGen/PTX/cvt.ll
@@ -172,9 +172,9 @@ define ptx_device i64 @cvt_i64_f64(double %x) {
 ; f32
 
 define ptx_device float @cvt_f32_preds(i1 %x) {
-; CHECK: mov.b32 %f0, 1065353216;
-; CHECK: mov.b32 %f1, 0;
-; CHECK: selp.f32 %ret{{[0-9]+}}, %f0, %f1, %p{{[0-9]+}};
+; CHECK: mov.b32 %f0, 0;
+; CHECK: mov.b32 %f1, 1065353216;
+; CHECK: selp.f32 %ret{{[0-9]+}}, %f1, %f0, %p{{[0-9]+}};
 ; CHECK: ret;
 	%a = uitofp i1 %x to float
 	ret float %a
@@ -232,9 +232,9 @@ define ptx_device float @cvt_f32_s64(i64 %x) {
 ; f64
 
 define ptx_device double @cvt_f64_preds(i1 %x) {
-; CHECK: mov.b64 %fd0, 4575657221408423936;
-; CHECK: mov.b64 %fd1, 0;
-; CHECK: selp.f64 %ret{{[0-9]+}}, %fd0, %fd1, %p{{[0-9]+}};
+; CHECK: mov.b64 %fd0, 0;
+; CHECK: mov.b64 %fd1, 4575657221408423936;
+; CHECK: selp.f64 %ret{{[0-9]+}}, %fd1, %fd0, %p{{[0-9]+}};
 ; CHECK: ret;
 	%a = uitofp i1 %x to double
 	ret double %a
diff --git a/test/CodeGen/PTX/dg.exp b/test/CodeGen/PTX/dg.exp
deleted file mode 100644
index 2c304b5..0000000
--- a/test/CodeGen/PTX/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PTX] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll
index 81fd33a..e55820d 100644
--- a/test/CodeGen/PTX/ld.ll
+++ b/test/CodeGen/PTX/ld.ll
@@ -1,48 +1,48 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-;CHECK: .extern .global .b8 array_i16[20];
+;CHECK: .extern .global .b16 array_i16[10];
 @array_i16 = external global [10 x i16]
 
-;CHECK: .extern .const .b8 array_constant_i16[20];
+;CHECK: .extern .const .b16 array_constant_i16[10];
 @array_constant_i16 = external addrspace(1) constant [10 x i16]
 
-;CHECK: .extern .shared .b8 array_shared_i16[20];
+;CHECK: .extern .shared .b16 array_shared_i16[10];
 @array_shared_i16 = external addrspace(4) global [10 x i16]
 
-;CHECK: .extern .global .b8 array_i32[40];
+;CHECK: .extern .global .b32 array_i32[10];
 @array_i32 = external global [10 x i32]
 
-;CHECK: .extern .const .b8 array_constant_i32[40];
+;CHECK: .extern .const .b32 array_constant_i32[10];
 @array_constant_i32 = external addrspace(1) constant [10 x i32]
 
-;CHECK: .extern .shared .b8 array_shared_i32[40];
+;CHECK: .extern .shared .b32 array_shared_i32[10];
 @array_shared_i32 = external addrspace(4) global [10 x i32]
 
-;CHECK: .extern .global .b8 array_i64[80];
+;CHECK: .extern .global .b64 array_i64[10];
 @array_i64 = external global [10 x i64]
 
-;CHECK: .extern .const .b8 array_constant_i64[80];
+;CHECK: .extern .const .b64 array_constant_i64[10];
 @array_constant_i64 = external addrspace(1) constant [10 x i64]
 
-;CHECK: .extern .shared .b8 array_shared_i64[80];
+;CHECK: .extern .shared .b64 array_shared_i64[10];
 @array_shared_i64 = external addrspace(4) global [10 x i64]
 
-;CHECK: .extern .global .b8 array_float[40];
+;CHECK: .extern .global .b32 array_float[10];
 @array_float = external global [10 x float]
 
-;CHECK: .extern .const .b8 array_constant_float[40];
+;CHECK: .extern .const .b32 array_constant_float[10];
 @array_constant_float = external addrspace(1) constant [10 x float]
 
-;CHECK: .extern .shared .b8 array_shared_float[40];
+;CHECK: .extern .shared .b32 array_shared_float[10];
 @array_shared_float = external addrspace(4) global [10 x float]
 
-;CHECK: .extern .global .b8 array_double[80];
+;CHECK: .extern .global .b64 array_double[10];
 @array_double = external global [10 x double]
 
-;CHECK: .extern .const .b8 array_constant_double[80];
+;CHECK: .extern .const .b64 array_constant_double[10];
 @array_constant_double = external addrspace(1) constant [10 x double]
 
-;CHECK: .extern .shared .b8 array_shared_double[80];
+;CHECK: .extern .shared .b64 array_shared_double[10];
 @array_shared_double = external addrspace(4) global [10 x double]
 
 
diff --git a/test/CodeGen/PTX/lit.local.cfg b/test/CodeGen/PTX/lit.local.cfg
new file mode 100644
index 0000000..e748f7f
--- /dev/null
+++ b/test/CodeGen/PTX/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'PTX' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/PTX/mad-disabling.ll b/test/CodeGen/PTX/mad-disabling.ll
index ad7b341..603c3ba 100644
--- a/test/CodeGen/PTX/mad-disabling.ll
+++ b/test/CodeGen/PTX/mad-disabling.ll
@@ -1,8 +1,13 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | grep "mad"
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | grep -v "mad"
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
+; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
+; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
 
 define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
 entry:
+; FMA: mad.rn.f32
+; MUL: mul.rn.f32
+; MUL: add.rn.f32
   %a = fmul float %x, %y
   %b = fadd float %a, %z
   ret float %b
@@ -10,6 +15,9 @@ entry:
 
 define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
 entry:
+; FMA: mad.rn.f64
+; MUL: mul.rn.f64
+; MUL: add.rn.f64
   %a = fmul double %x, %y
   %b = fadd double %a, %z
   ret double %b
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
index 75555a7..9e501be 100644
--- a/test/CodeGen/PTX/mov.ll
+++ b/test/CodeGen/PTX/mov.ll
@@ -31,31 +31,31 @@ define ptx_device double @t1_f64() {
 }
 
 define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: mov.b16 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b16 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i16 %x
 }
 
 define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: mov.b32 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i32 %x
 }
 
 define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: mov.b64 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i64 %x
 }
 
 define ptx_device float @t3_f32(float %x) {
-; CHECK: mov.f32 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.f32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret float %x
 }
 
 define ptx_device double @t3_f64(double %x) {
-; CHECK: mov.f64 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.f64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret double %x
 }
diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll
index 09015da..377f173 100644
--- a/test/CodeGen/PTX/parameter-order.ll
+++ b/test/CodeGen/PTX/parameter-order.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}})
+; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .f32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .f32 %arg{{[0-9]+}})
 define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
 ; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
 	%result = sub i32 %b, %c
diff --git a/test/CodeGen/PTX/printf.ll b/test/CodeGen/PTX/printf.ll
new file mode 100644
index 0000000..f901b20
--- /dev/null
+++ b/test/CodeGen/PTX/printf.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+
+@str = private unnamed_addr constant [6 x i8] c"test\0A\00"
+
+define ptx_device void @t1_printf() {
+; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str;
+; CHECK: call.uni	(__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
+; CHECK: ret;
+    %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @str, i64 0, i64 0))
+	ret void
+}
+
+@str2 = private unnamed_addr constant [11 x i8] c"test = %f\0A\00"
+
+define ptx_device void @t2_printf() {
+; CHECK: .local .align 8 .b8 __local{{[0-9]+}}[{{[0-9]+}}];
+; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str2;
+; CHECK: cvta.local.u64  %rd{{[0-9]+}}, __local{{[0-9+]}};
+; CHECK: call.uni	(__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
+; CHECK: ret;
+  %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @str2, i64 0, i64 0), double 0x3FF3333340000000)
+  ret void
+}
diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll
index 63ef58c..c794363 100644
--- a/test/CodeGen/PTX/st.ll
+++ b/test/CodeGen/PTX/st.ll
@@ -1,48 +1,48 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-;CHECK: .extern .global .b8 array_i16[20];
+;CHECK: .extern .global .b16 array_i16[10];
 @array_i16 = external global [10 x i16]
 
-;CHECK: .extern .const .b8 array_constant_i16[20];
+;CHECK: .extern .const .b16 array_constant_i16[10];
 @array_constant_i16 = external addrspace(1) constant [10 x i16]
 
-;CHECK: .extern .shared .b8 array_shared_i16[20];
+;CHECK: .extern .shared .b16 array_shared_i16[10];
 @array_shared_i16 = external addrspace(4) global [10 x i16]
 
-;CHECK: .extern .global .b8 array_i32[40];
+;CHECK: .extern .global .b32 array_i32[10];
 @array_i32 = external global [10 x i32]
 
-;CHECK: .extern .const .b8 array_constant_i32[40];
+;CHECK: .extern .const .b32 array_constant_i32[10];
 @array_constant_i32 = external addrspace(1) constant [10 x i32]
 
-;CHECK: .extern .shared .b8 array_shared_i32[40];
+;CHECK: .extern .shared .b32 array_shared_i32[10];
 @array_shared_i32 = external addrspace(4) global [10 x i32]
 
-;CHECK: .extern .global .b8 array_i64[80];
+;CHECK: .extern .global .b64 array_i64[10];
 @array_i64 = external global [10 x i64]
 
-;CHECK: .extern .const .b8 array_constant_i64[80];
+;CHECK: .extern .const .b64 array_constant_i64[10];
 @array_constant_i64 = external addrspace(1) constant [10 x i64]
 
-;CHECK: .extern .shared .b8 array_shared_i64[80];
+;CHECK: .extern .shared .b64 array_shared_i64[10];
 @array_shared_i64 = external addrspace(4) global [10 x i64]
 
-;CHECK: .extern .global .b8 array_float[40];
+;CHECK: .extern .global .b32 array_float[10];
 @array_float = external global [10 x float]
 
-;CHECK: .extern .const .b8 array_constant_float[40];
+;CHECK: .extern .const .b32 array_constant_float[10];
 @array_constant_float = external addrspace(1) constant [10 x float]
 
-;CHECK: .extern .shared .b8 array_shared_float[40];
+;CHECK: .extern .shared .b32 array_shared_float[10];
 @array_shared_float = external addrspace(4) global [10 x float]
 
-;CHECK: .extern .global .b8 array_double[80];
+;CHECK: .extern .global .b64 array_double[10];
 @array_double = external global [10 x double]
 
-;CHECK: .extern .const .b8 array_constant_double[80];
+;CHECK: .extern .const .b64 array_constant_double[10];
 @array_constant_double = external addrspace(1) constant [10 x double]
 
-;CHECK: .extern .shared .b8 array_shared_double[80];
+;CHECK: .extern .shared .b64 array_shared_double[10];
 @array_shared_double = external addrspace(4) global [10 x double]
 
 
diff --git a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
deleted file mode 100644
index 57ed250..0000000
--- a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=ppc32 -combiner-alias-analysis | grep f5
-
-target datalayout = "E-p:32:32"
-target triple = "powerpc-apple-darwin8.2.0"
-        %struct.Point = type { double, double, double }
-
-define void @offset(%struct.Point* %pt, double %x, double %y, double %z) {
-entry:
-        %tmp = getelementptr %struct.Point* %pt, i32 0, i32 0           ; <double*> [#uses=2]
-        %tmp.upgrd.1 = load double* %tmp                ; <double> [#uses=1]
-        %tmp2 = fadd double %tmp.upgrd.1, %x             ; <double> [#uses=1]
-        store double %tmp2, double* %tmp
-        %tmp6 = getelementptr %struct.Point* %pt, i32 0, i32 1          ; <double*> [#uses=2]
-        %tmp7 = load double* %tmp6              ; <double> [#uses=1]
-        %tmp9 = fadd double %tmp7, %y            ; <double> [#uses=1]
-        store double %tmp9, double* %tmp6
-        %tmp13 = getelementptr %struct.Point* %pt, i32 0, i32 2         ; <double*> [#uses=2]
-        %tmp14 = load double* %tmp13            ; <double> [#uses=1]
-        %tmp16 = fadd double %tmp14, %z          ; <double> [#uses=1]
-        store double %tmp16, double* %tmp13
-        ret void
-}
-
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index cca9e65..3620b0e 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -2,11 +2,11 @@
 
 define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
         %tmp19 = load i64* %t
-        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19 )             ; <i64> [#uses=1]
+        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19, i1 true )             ; <i64> [#uses=1]
         %tmp23 = trunc i64 %tmp22 to i32
         %tmp89 = add i32 %tmp23, -64          ; <i32> [#uses=1]
         %tmp90 = add i32 %tmp89, 0            ; <i32> [#uses=1]
         ret i32 %tmp90
 }
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
diff --git a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
index e50fac4..d10291e 100644
--- a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 declare i8* @bar(i32)
 
diff --git a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
index 9f35b83..fb8cdce 100644
--- a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -enable-ppc32-regscavenger
+; RUN: llc < %s -march=ppc32
 
 	%struct._cpp_strbuf = type { i8*, i32, i32 }
 	%struct.cpp_string = type { i32, i8* }
diff --git a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
index dd425f5..f256bca 100644
--- a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
+; RUN: llc < %s -march=ppc64
 
 define i16 @test(i8* %d1, i16* %d2) {
  %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )
diff --git a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
index 7b6d491..e7a1cf6 100644
--- a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -12,7 +12,7 @@ declare void @IODelay(i32)
 
 define i32 @_Z14ProgramByWordsPvyy(i8* %buffer, i64 %Offset, i64 %bufferSize) nounwind  {
 entry:
-	volatile store i8 -1, i8* null, align 1
+	store volatile i8 -1, i8* null, align 1
 	%tmp28 = icmp eq i8 0, 0		; <i1> [#uses=1]
 	br i1 %tmp28, label %bb107, label %bb
 
@@ -43,7 +43,7 @@ bb68:		; preds = %bb31
 	%tmp2021.i = trunc i64 %Pos.0.reg2mem.0 to i32		; <i32> [#uses=1]
 	%tmp202122.i = inttoptr i32 %tmp2021.i to i8*		; <i8*> [#uses=1]
 	tail call void @IODelay( i32 500 ) nounwind 
-	%tmp53.i = volatile load i16* null, align 2		; <i16> [#uses=2]
+	%tmp53.i = load volatile i16* null, align 2		; <i16> [#uses=2]
 	%tmp5455.i = zext i16 %tmp53.i to i32		; <i32> [#uses=1]
 	br i1 false, label %bb.i, label %bb65.i
 
@@ -59,7 +59,7 @@ bb70.i:		; preds = %bb65.i
 	ret i32 0
 
 _Z24unlock_then_erase_sectory.exit:		; preds = %bb65.i
-	volatile store i8 -1, i8* %tmp202122.i, align 1
+	store volatile i8 -1, i8* %tmp202122.i, align 1
 	%tmp93 = add i64 0, %Pos.0.reg2mem.0		; <i64> [#uses=2]
 	%tmp98 = add i64 0, %Offset		; <i64> [#uses=1]
 	%tmp100 = icmp ugt i64 %tmp98, %tmp93		; <i1> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index b73382e..974a99a 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
@@ -7,21 +7,27 @@ target triple = "powerpc-apple-darwin9.6"
 define void @foo() nounwind {
 entry:
 ;CHECK:  mfcr r2
+;CHECK:  lis r3, 1
 ;CHECK:  rlwinm r2, r2, 8, 0, 31
-;CHECK:  lis r0, 1
-;CHECK:  ori r0, r0, 34540
-;CHECK:  stwx r2, r1, r0
+;CHECK:  ori r3, r3, 34524
+;CHECK:  stwx r2, r1, r3
+; Make sure that the register scavenger returns the same temporary register.
+;CHECK:  mfcr r2
+;CHECK:  lis r3, 1
+;CHECK:  rlwinm r2, r2, 12, 0, 31
+;CHECK:  ori r3, r3, 34520
+;CHECK:  stwx r2, r1, r3
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
   call void @bar(i8* %x1) nounwind
-  call void asm sideeffect "", "~{cr2}"() nounwind
+  call void asm sideeffect "", "~{cr2},~{cr3}"() nounwind
   br label %return
 
 return:                                           ; preds = %entry
-;CHECK:  lis r0, 1
-;CHECK:  ori r0, r0, 34540
-;CHECK:  lwzx r2, r1, r0
+;CHECK:  lis r3, 1
+;CHECK:  ori r3, r3, 34524
+;CHECK:  lwzx r2, r1, r3
 ;CHECK:  rlwinm r2, r2, 24, 0, 31
 ;CHECK:  mtcrf 32, r2
   ret void
diff --git a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
new file mode 100644
index 0000000..6161b55
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+; ModuleID = 'tsc.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = common global [32000 x float] zeroinitializer, align 16
+@b = common global [32000 x float] zeroinitializer, align 16
+@c = common global [32000 x float] zeroinitializer, align 16
+@d = common global [32000 x float] zeroinitializer, align 16
+@e = common global [32000 x float] zeroinitializer, align 16
+@aa = common global [256 x [256 x float]] zeroinitializer, align 16
+@bb = common global [256 x [256 x float]] zeroinitializer, align 16
+@cc = common global [256 x [256 x float]] zeroinitializer, align 16
+
+@.str11 = private unnamed_addr constant [6 x i8] c"s122 \00", align 1
+@.str152 = private unnamed_addr constant [14 x i8] c"S122\09 %.2f \09\09\00", align 1
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @init(i8* %name) nounwind
+declare i64 @clock() nounwind
+declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float)
+declare void @check(i32 %name) nounwind
+
+; CHECK: mfcr
+; CHECK: mtcr
+
+define i32 @s122(i32 %n1, i32 %n3) nounwind {
+entry:
+  %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str11, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  %sub = add nsw i32 %n1, -1
+  %cmp316 = icmp slt i32 %sub, 32000
+  br i1 %cmp316, label %entry.split.us, label %for.end.7
+
+entry.split.us:                                   ; preds = %entry
+  %0 = sext i32 %sub to i64
+  %1 = sext i32 %n3 to i64
+  br label %for.body4.lr.ph.us
+
+for.body4.us:                                     ; preds = %for.body4.lr.ph.us, %for.body4.us
+  %indvars.iv20 = phi i64 [ 0, %for.body4.lr.ph.us ], [ %indvars.iv.next21, %for.body4.us ]
+  %indvars.iv = phi i64 [ %0, %for.body4.lr.ph.us ], [ %indvars.iv.next, %for.body4.us ]
+  %indvars.iv.next21 = add i64 %indvars.iv20, 1
+  %sub5.us = sub i64 31999, %indvars.iv20
+  %sext = shl i64 %sub5.us, 32
+  %idxprom.us = ashr exact i64 %sext, 32
+  %arrayidx.us = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us
+  %2 = load float* %arrayidx.us, align 4, !tbaa !5
+  %arrayidx7.us = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
+  %3 = load float* %arrayidx7.us, align 4, !tbaa !5
+  %add8.us = fadd float %3, %2
+  store float %add8.us, float* %arrayidx7.us, align 4, !tbaa !5
+  %indvars.iv.next = add i64 %indvars.iv, %1
+  %4 = trunc i64 %indvars.iv.next to i32
+  %cmp3.us = icmp slt i32 %4, 32000
+  br i1 %cmp3.us, label %for.body4.us, label %for.body4.lr.ph.us.1
+
+for.body4.lr.ph.us:                               ; preds = %entry.split.us, %for.end.us.4
+  %nl.019.us = phi i32 [ 0, %entry.split.us ], [ %inc.us.4, %for.end.us.4 ]
+  br label %for.body4.us
+
+for.end12:                                        ; preds = %for.end.7, %for.end.us.4
+  %call13 = tail call i64 @clock() nounwind
+  %sub14 = sub nsw i64 %call13, %call1
+  %conv = sitofp i64 %sub14 to double
+  %div = fdiv double %conv, 1.000000e+06
+  %call15 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str152, i64 0, i64 0), double %div) nounwind
+  tail call void @check(i32 1)
+  ret i32 0
+
+for.body4.lr.ph.us.1:                             ; preds = %for.body4.us
+  %call10.us = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.1
+
+for.body4.us.1:                                   ; preds = %for.body4.us.1, %for.body4.lr.ph.us.1
+  %indvars.iv20.1 = phi i64 [ 0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next21.1, %for.body4.us.1 ]
+  %indvars.iv.1 = phi i64 [ %0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next.1, %for.body4.us.1 ]
+  %indvars.iv.next21.1 = add i64 %indvars.iv20.1, 1
+  %sub5.us.1 = sub i64 31999, %indvars.iv20.1
+  %sext23 = shl i64 %sub5.us.1, 32
+  %idxprom.us.1 = ashr exact i64 %sext23, 32
+  %arrayidx.us.1 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.1
+  %5 = load float* %arrayidx.us.1, align 4, !tbaa !5
+  %arrayidx7.us.1 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.1
+  %6 = load float* %arrayidx7.us.1, align 4, !tbaa !5
+  %add8.us.1 = fadd float %6, %5
+  store float %add8.us.1, float* %arrayidx7.us.1, align 4, !tbaa !5
+  %indvars.iv.next.1 = add i64 %indvars.iv.1, %1
+  %7 = trunc i64 %indvars.iv.next.1 to i32
+  %cmp3.us.1 = icmp slt i32 %7, 32000
+  br i1 %cmp3.us.1, label %for.body4.us.1, label %for.body4.lr.ph.us.2
+
+for.body4.lr.ph.us.2:                             ; preds = %for.body4.us.1
+  %call10.us.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.2
+
+for.body4.us.2:                                   ; preds = %for.body4.us.2, %for.body4.lr.ph.us.2
+  %indvars.iv20.2 = phi i64 [ 0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next21.2, %for.body4.us.2 ]
+  %indvars.iv.2 = phi i64 [ %0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next.2, %for.body4.us.2 ]
+  %indvars.iv.next21.2 = add i64 %indvars.iv20.2, 1
+  %sub5.us.2 = sub i64 31999, %indvars.iv20.2
+  %sext24 = shl i64 %sub5.us.2, 32
+  %idxprom.us.2 = ashr exact i64 %sext24, 32
+  %arrayidx.us.2 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.2
+  %8 = load float* %arrayidx.us.2, align 4, !tbaa !5
+  %arrayidx7.us.2 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.2
+  %9 = load float* %arrayidx7.us.2, align 4, !tbaa !5
+  %add8.us.2 = fadd float %9, %8
+  store float %add8.us.2, float* %arrayidx7.us.2, align 4, !tbaa !5
+  %indvars.iv.next.2 = add i64 %indvars.iv.2, %1
+  %10 = trunc i64 %indvars.iv.next.2 to i32
+  %cmp3.us.2 = icmp slt i32 %10, 32000
+  br i1 %cmp3.us.2, label %for.body4.us.2, label %for.body4.lr.ph.us.3
+
+for.body4.lr.ph.us.3:                             ; preds = %for.body4.us.2
+  %call10.us.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.3
+
+for.body4.us.3:                                   ; preds = %for.body4.us.3, %for.body4.lr.ph.us.3
+  %indvars.iv20.3 = phi i64 [ 0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next21.3, %for.body4.us.3 ]
+  %indvars.iv.3 = phi i64 [ %0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next.3, %for.body4.us.3 ]
+  %indvars.iv.next21.3 = add i64 %indvars.iv20.3, 1
+  %sub5.us.3 = sub i64 31999, %indvars.iv20.3
+  %sext25 = shl i64 %sub5.us.3, 32
+  %idxprom.us.3 = ashr exact i64 %sext25, 32
+  %arrayidx.us.3 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.3
+  %11 = load float* %arrayidx.us.3, align 4, !tbaa !5
+  %arrayidx7.us.3 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.3
+  %12 = load float* %arrayidx7.us.3, align 4, !tbaa !5
+  %add8.us.3 = fadd float %12, %11
+  store float %add8.us.3, float* %arrayidx7.us.3, align 4, !tbaa !5
+  %indvars.iv.next.3 = add i64 %indvars.iv.3, %1
+  %13 = trunc i64 %indvars.iv.next.3 to i32
+  %cmp3.us.3 = icmp slt i32 %13, 32000
+  br i1 %cmp3.us.3, label %for.body4.us.3, label %for.body4.lr.ph.us.4
+
+for.body4.lr.ph.us.4:                             ; preds = %for.body4.us.3
+  %call10.us.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.4
+
+for.body4.us.4:                                   ; preds = %for.body4.us.4, %for.body4.lr.ph.us.4
+  %indvars.iv20.4 = phi i64 [ 0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next21.4, %for.body4.us.4 ]
+  %indvars.iv.4 = phi i64 [ %0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next.4, %for.body4.us.4 ]
+  %indvars.iv.next21.4 = add i64 %indvars.iv20.4, 1
+  %sub5.us.4 = sub i64 31999, %indvars.iv20.4
+  %sext26 = shl i64 %sub5.us.4, 32
+  %idxprom.us.4 = ashr exact i64 %sext26, 32
+  %arrayidx.us.4 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.4
+  %14 = load float* %arrayidx.us.4, align 4, !tbaa !5
+  %arrayidx7.us.4 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.4
+  %15 = load float* %arrayidx7.us.4, align 4, !tbaa !5
+  %add8.us.4 = fadd float %15, %14
+  store float %add8.us.4, float* %arrayidx7.us.4, align 4, !tbaa !5
+  %indvars.iv.next.4 = add i64 %indvars.iv.4, %1
+  %16 = trunc i64 %indvars.iv.next.4 to i32
+  %cmp3.us.4 = icmp slt i32 %16, 32000
+  br i1 %cmp3.us.4, label %for.body4.us.4, label %for.end.us.4
+
+for.end.us.4:                                     ; preds = %for.body4.us.4
+  %call10.us.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %inc.us.4 = add nsw i32 %nl.019.us, 5
+  %exitcond.4 = icmp eq i32 %inc.us.4, 200000
+  br i1 %exitcond.4, label %for.end12, label %for.body4.lr.ph.us
+
+for.end.7:                                        ; preds = %entry, %for.end.7
+  %nl.019 = phi i32 [ %inc.7, %for.end.7 ], [ 0, %entry ]
+  %call10 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.5 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.6 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.7 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %inc.7 = add nsw i32 %nl.019, 8
+  %exitcond.7 = icmp eq i32 %inc.7, 200000
+  br i1 %exitcond.7, label %for.end12, label %for.end.7
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
new file mode 100644
index 0000000..52bf6c7
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+; ModuleID = 'tsc.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = common global [32000 x float] zeroinitializer, align 16
+@b = common global [32000 x float] zeroinitializer, align 16
+@c = common global [32000 x float] zeroinitializer, align 16
+@d = common global [32000 x float] zeroinitializer, align 16
+@e = common global [32000 x float] zeroinitializer, align 16
+@aa = common global [256 x [256 x float]] zeroinitializer, align 16
+@bb = common global [256 x [256 x float]] zeroinitializer, align 16
+@cc = common global [256 x [256 x float]] zeroinitializer, align 16
+@temp = common global float 0.000000e+00, align 4
+
+@.str81 = private unnamed_addr constant [6 x i8] c"s3110\00", align 1
+@.str235 = private unnamed_addr constant [15 x i8] c"S3110\09 %.2f \09\09\00", align 1
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @init(i8* %name) nounwind
+declare i64 @clock() nounwind
+declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float)
+declare void @check(i32 %name) nounwind
+
+; CHECK: mfcr
+; CHECK: mtcr
+
+define i32 @s3110() nounwind {
+entry:
+  %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str81, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  br label %for.body
+
+for.body:                                         ; preds = %for.end17, %entry
+  %nl.041 = phi i32 [ 0, %entry ], [ %inc22, %for.end17 ]
+  %0 = load float* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0, i64 0), align 16, !tbaa !5
+  br label %for.cond5.preheader
+
+for.cond5.preheader:                              ; preds = %for.inc15, %for.body
+  %indvars.iv42 = phi i64 [ 0, %for.body ], [ %indvars.iv.next43, %for.inc15 ]
+  %max.139 = phi float [ %0, %for.body ], [ %max.3.15, %for.inc15 ]
+  %xindex.138 = phi i32 [ 0, %for.body ], [ %xindex.3.15, %for.inc15 ]
+  %yindex.137 = phi i32 [ 0, %for.body ], [ %yindex.3.15, %for.inc15 ]
+  br label %for.body7
+
+for.body7:                                        ; preds = %for.body7, %for.cond5.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond5.preheader ], [ %indvars.iv.next.15, %for.body7 ]
+  %max.235 = phi float [ %max.139, %for.cond5.preheader ], [ %max.3.15, %for.body7 ]
+  %xindex.234 = phi i32 [ %xindex.138, %for.cond5.preheader ], [ %xindex.3.15, %for.body7 ]
+  %yindex.233 = phi i32 [ %yindex.137, %for.cond5.preheader ], [ %yindex.3.15, %for.body7 ]
+  %arrayidx9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv
+  %1 = load float* %arrayidx9, align 16, !tbaa !5
+  %cmp10 = fcmp ogt float %1, %max.235
+  %2 = trunc i64 %indvars.iv to i32
+  %yindex.3 = select i1 %cmp10, i32 %2, i32 %yindex.233
+  %3 = trunc i64 %indvars.iv42 to i32
+  %xindex.3 = select i1 %cmp10, i32 %3, i32 %xindex.234
+  %max.3 = select i1 %cmp10, float %1, float %max.235
+  %indvars.iv.next45 = or i64 %indvars.iv, 1
+  %arrayidx9.1 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next45
+  %4 = load float* %arrayidx9.1, align 4, !tbaa !5
+  %cmp10.1 = fcmp ogt float %4, %max.3
+  %5 = trunc i64 %indvars.iv.next45 to i32
+  %yindex.3.1 = select i1 %cmp10.1, i32 %5, i32 %yindex.3
+  %xindex.3.1 = select i1 %cmp10.1, i32 %3, i32 %xindex.3
+  %max.3.1 = select i1 %cmp10.1, float %4, float %max.3
+  %indvars.iv.next.146 = or i64 %indvars.iv, 2
+  %arrayidx9.2 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.146
+  %6 = load float* %arrayidx9.2, align 8, !tbaa !5
+  %cmp10.2 = fcmp ogt float %6, %max.3.1
+  %7 = trunc i64 %indvars.iv.next.146 to i32
+  %yindex.3.2 = select i1 %cmp10.2, i32 %7, i32 %yindex.3.1
+  %xindex.3.2 = select i1 %cmp10.2, i32 %3, i32 %xindex.3.1
+  %max.3.2 = select i1 %cmp10.2, float %6, float %max.3.1
+  %indvars.iv.next.247 = or i64 %indvars.iv, 3
+  %arrayidx9.3 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.247
+  %8 = load float* %arrayidx9.3, align 4, !tbaa !5
+  %cmp10.3 = fcmp ogt float %8, %max.3.2
+  %9 = trunc i64 %indvars.iv.next.247 to i32
+  %yindex.3.3 = select i1 %cmp10.3, i32 %9, i32 %yindex.3.2
+  %xindex.3.3 = select i1 %cmp10.3, i32 %3, i32 %xindex.3.2
+  %max.3.3 = select i1 %cmp10.3, float %8, float %max.3.2
+  %indvars.iv.next.348 = or i64 %indvars.iv, 4
+  %arrayidx9.4 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.348
+  %10 = load float* %arrayidx9.4, align 16, !tbaa !5
+  %cmp10.4 = fcmp ogt float %10, %max.3.3
+  %11 = trunc i64 %indvars.iv.next.348 to i32
+  %yindex.3.4 = select i1 %cmp10.4, i32 %11, i32 %yindex.3.3
+  %xindex.3.4 = select i1 %cmp10.4, i32 %3, i32 %xindex.3.3
+  %max.3.4 = select i1 %cmp10.4, float %10, float %max.3.3
+  %indvars.iv.next.449 = or i64 %indvars.iv, 5
+  %arrayidx9.5 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.449
+  %12 = load float* %arrayidx9.5, align 4, !tbaa !5
+  %cmp10.5 = fcmp ogt float %12, %max.3.4
+  %13 = trunc i64 %indvars.iv.next.449 to i32
+  %yindex.3.5 = select i1 %cmp10.5, i32 %13, i32 %yindex.3.4
+  %xindex.3.5 = select i1 %cmp10.5, i32 %3, i32 %xindex.3.4
+  %max.3.5 = select i1 %cmp10.5, float %12, float %max.3.4
+  %indvars.iv.next.550 = or i64 %indvars.iv, 6
+  %arrayidx9.6 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.550
+  %14 = load float* %arrayidx9.6, align 8, !tbaa !5
+  %cmp10.6 = fcmp ogt float %14, %max.3.5
+  %15 = trunc i64 %indvars.iv.next.550 to i32
+  %yindex.3.6 = select i1 %cmp10.6, i32 %15, i32 %yindex.3.5
+  %xindex.3.6 = select i1 %cmp10.6, i32 %3, i32 %xindex.3.5
+  %max.3.6 = select i1 %cmp10.6, float %14, float %max.3.5
+  %indvars.iv.next.651 = or i64 %indvars.iv, 7
+  %arrayidx9.7 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.651
+  %16 = load float* %arrayidx9.7, align 4, !tbaa !5
+  %cmp10.7 = fcmp ogt float %16, %max.3.6
+  %17 = trunc i64 %indvars.iv.next.651 to i32
+  %yindex.3.7 = select i1 %cmp10.7, i32 %17, i32 %yindex.3.6
+  %xindex.3.7 = select i1 %cmp10.7, i32 %3, i32 %xindex.3.6
+  %max.3.7 = select i1 %cmp10.7, float %16, float %max.3.6
+  %indvars.iv.next.752 = or i64 %indvars.iv, 8
+  %arrayidx9.8 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.752
+  %18 = load float* %arrayidx9.8, align 16, !tbaa !5
+  %cmp10.8 = fcmp ogt float %18, %max.3.7
+  %19 = trunc i64 %indvars.iv.next.752 to i32
+  %yindex.3.8 = select i1 %cmp10.8, i32 %19, i32 %yindex.3.7
+  %xindex.3.8 = select i1 %cmp10.8, i32 %3, i32 %xindex.3.7
+  %max.3.8 = select i1 %cmp10.8, float %18, float %max.3.7
+  %indvars.iv.next.853 = or i64 %indvars.iv, 9
+  %arrayidx9.9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.853
+  %20 = load float* %arrayidx9.9, align 4, !tbaa !5
+  %cmp10.9 = fcmp ogt float %20, %max.3.8
+  %21 = trunc i64 %indvars.iv.next.853 to i32
+  %yindex.3.9 = select i1 %cmp10.9, i32 %21, i32 %yindex.3.8
+  %xindex.3.9 = select i1 %cmp10.9, i32 %3, i32 %xindex.3.8
+  %max.3.9 = select i1 %cmp10.9, float %20, float %max.3.8
+  %indvars.iv.next.954 = or i64 %indvars.iv, 10
+  %arrayidx9.10 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.954
+  %22 = load float* %arrayidx9.10, align 8, !tbaa !5
+  %cmp10.10 = fcmp ogt float %22, %max.3.9
+  %23 = trunc i64 %indvars.iv.next.954 to i32
+  %yindex.3.10 = select i1 %cmp10.10, i32 %23, i32 %yindex.3.9
+  %xindex.3.10 = select i1 %cmp10.10, i32 %3, i32 %xindex.3.9
+  %max.3.10 = select i1 %cmp10.10, float %22, float %max.3.9
+  %indvars.iv.next.1055 = or i64 %indvars.iv, 11
+  %arrayidx9.11 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1055
+  %24 = load float* %arrayidx9.11, align 4, !tbaa !5
+  %cmp10.11 = fcmp ogt float %24, %max.3.10
+  %25 = trunc i64 %indvars.iv.next.1055 to i32
+  %yindex.3.11 = select i1 %cmp10.11, i32 %25, i32 %yindex.3.10
+  %xindex.3.11 = select i1 %cmp10.11, i32 %3, i32 %xindex.3.10
+  %max.3.11 = select i1 %cmp10.11, float %24, float %max.3.10
+  %indvars.iv.next.1156 = or i64 %indvars.iv, 12
+  %arrayidx9.12 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1156
+  %26 = load float* %arrayidx9.12, align 16, !tbaa !5
+  %cmp10.12 = fcmp ogt float %26, %max.3.11
+  %27 = trunc i64 %indvars.iv.next.1156 to i32
+  %yindex.3.12 = select i1 %cmp10.12, i32 %27, i32 %yindex.3.11
+  %xindex.3.12 = select i1 %cmp10.12, i32 %3, i32 %xindex.3.11
+  %max.3.12 = select i1 %cmp10.12, float %26, float %max.3.11
+  %indvars.iv.next.1257 = or i64 %indvars.iv, 13
+  %arrayidx9.13 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1257
+  %28 = load float* %arrayidx9.13, align 4, !tbaa !5
+  %cmp10.13 = fcmp ogt float %28, %max.3.12
+  %29 = trunc i64 %indvars.iv.next.1257 to i32
+  %yindex.3.13 = select i1 %cmp10.13, i32 %29, i32 %yindex.3.12
+  %xindex.3.13 = select i1 %cmp10.13, i32 %3, i32 %xindex.3.12
+  %max.3.13 = select i1 %cmp10.13, float %28, float %max.3.12
+  %indvars.iv.next.1358 = or i64 %indvars.iv, 14
+  %arrayidx9.14 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1358
+  %30 = load float* %arrayidx9.14, align 8, !tbaa !5
+  %cmp10.14 = fcmp ogt float %30, %max.3.13
+  %31 = trunc i64 %indvars.iv.next.1358 to i32
+  %yindex.3.14 = select i1 %cmp10.14, i32 %31, i32 %yindex.3.13
+  %xindex.3.14 = select i1 %cmp10.14, i32 %3, i32 %xindex.3.13
+  %max.3.14 = select i1 %cmp10.14, float %30, float %max.3.13
+  %indvars.iv.next.1459 = or i64 %indvars.iv, 15
+  %arrayidx9.15 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1459
+  %32 = load float* %arrayidx9.15, align 4, !tbaa !5
+  %cmp10.15 = fcmp ogt float %32, %max.3.14
+  %33 = trunc i64 %indvars.iv.next.1459 to i32
+  %yindex.3.15 = select i1 %cmp10.15, i32 %33, i32 %yindex.3.14
+  %xindex.3.15 = select i1 %cmp10.15, i32 %3, i32 %xindex.3.14
+  %max.3.15 = select i1 %cmp10.15, float %32, float %max.3.14
+  %indvars.iv.next.15 = add i64 %indvars.iv, 16
+  %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
+  %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 256
+  br i1 %exitcond.15, label %for.inc15, label %for.body7
+
+for.inc15:                                        ; preds = %for.body7
+  %indvars.iv.next43 = add i64 %indvars.iv42, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next43 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %for.end17, label %for.cond5.preheader
+
+for.end17:                                        ; preds = %for.inc15
+  %conv = sitofp i32 %xindex.3.15 to float
+  %add = fadd float %max.3.15, %conv
+  %conv18 = sitofp i32 %yindex.3.15 to float
+  %add19 = fadd float %add, %conv18
+  %call20 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float %add19) nounwind
+  %inc22 = add nsw i32 %nl.041, 1
+  %exitcond44 = icmp eq i32 %inc22, 78100
+  br i1 %exitcond44, label %for.end23, label %for.body
+
+for.end23:                                        ; preds = %for.end17
+  %call24 = tail call i64 @clock() nounwind
+  %sub = sub nsw i64 %call24, %call1
+  %conv25 = sitofp i64 %sub to double
+  %div = fdiv double %conv25, 1.000000e+06
+  %call26 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str235, i64 0, i64 0), double %div) nounwind
+  %add29 = fadd float %add, 1.000000e+00
+  %add31 = fadd float %add29, %conv18
+  %add32 = fadd float %add31, 1.000000e+00
+  store float %add32, float* @temp, align 4, !tbaa !5
+  tail call void @check(i32 -1)
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
new file mode 100644
index 0000000..a18829e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+define void @test(i32* nocapture %x, i64* %xx, i32* %yp) nounwind uwtable ssp {
+entry:
+  %yy = load i32* %yp
+  %y = add i32 %yy, 1
+  %z = zext i32 %y to i64
+  %z2 = shl i64 %z, 32 
+  store i64 %z2, i64* %xx, align 4
+  ret void
+
+; CHECK: test:
+; CHECK: sldi {{.*}}, {{.*}}, 32
+; Note: it's okay if someday CodeGen gets smart enough to optimize out
+; the shift.
+}
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 466ae80..28dd08c 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -2,9 +2,9 @@
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32-RS
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-RS-NOFP
 
 ; CHECK-PPC32: stw r31, -4(r1)
 ; CHECK-PPC32: lwz r1, 0(r1)
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index 302d3df..d07fea7 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -21,12 +21,14 @@ define i32* @f1() nounwind {
 ; PPC32-NOFP: 	lwz r1, 0(r1)
 ; PPC32-NOFP: 	blr 
 
+
 ; PPC32-FP: _f1:
-; PPC32-FP:	stw r31, -4(r1)
 ; PPC32-FP:	lis r0, -1
+; PPC32-FP:	stw r31, -4(r1)
 ; PPC32-FP:	ori r0, r0, 32704
 ; PPC32-FP:	stwux r1, r1, r0
-; ...
+; PPC32-FP:	mr r31, r1
+; PPC32-FP:	addi r3, r31, 64
 ; PPC32-FP:	lwz r1, 0(r1)
 ; PPC32-FP:	lwz r31, -4(r1)
 ; PPC32-FP:	blr 
@@ -42,11 +44,12 @@ define i32* @f1() nounwind {
 
 
 ; PPC64-FP: _f1:
-; PPC64-FP:	std r31, -8(r1)
 ; PPC64-FP:	lis r0, -1
+; PPC64-FP:	std r31, -8(r1)
 ; PPC64-FP:	ori r0, r0, 32640
 ; PPC64-FP:	stdux r1, r1, r0
-; ...
+; PPC64-FP:	mr r31, r1
+; PPC64-FP:	addi r3, r31, 124
 ; PPC64-FP:	ld r1, 0(r1)
 ; PPC64-FP:	ld r31, -8(r1)
 ; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index b10a996..7b0d69c 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {stw r3, 32751}
+; RUN:   grep {stw r4, 32751}
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {stw r3, 32751}
+; RUN:   grep {stw r4, 32751}
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {std r3, 9024}
+; RUN:   grep {std r4, 9024}
 
 define void @test() nounwind {
 	store i32 0, i32* inttoptr (i64 48725999 to i32*)
diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll
new file mode 100644
index 0000000..932ad7a
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
+
+%0 = type { double, double }
+
+define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
+entry:
+  %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
+  %a.real = load double* %a.realp
+  %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
+  %a.imag = load double* %a.imagp
+  %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
+  %b.real = load double* %b.realp
+  %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
+  %b.imag = load double* %b.imagp
+  %mul.rl = fmul double %a.real, %b.real
+  %mul.rr = fmul double %a.imag, %b.imag
+  %mul.r = fsub double %mul.rl, %mul.rr
+  %mul.il = fmul double %a.imag, %b.real
+  %mul.ir = fmul double %a.real, %b.imag
+  %mul.i = fadd double %mul.il, %mul.ir
+  %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
+  %c.real = load double* %c.realp
+  %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
+  %c.imag = load double* %c.imagp
+  %add.r = fadd double %mul.r, %c.real
+  %add.i = fadd double %mul.i, %c.imag
+  %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
+  %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+  store double %add.r, double* %real
+  store double %add.i, double* %imag
+  ret void
+; CHECK: fmadd
+}
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index 318ccb0..9a456b6 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -2,8 +2,8 @@
 
 declare void @bar(i64 %x, i64 %y)
 
-; CHECK: li 4, 2
 ; CHECK: li {{[53]}}, 0
+; CHECK: li 4, 2
 ; CHECK: li 6, 3
 ; CHECK: mr {{[53]}}, {{[53]}}
 
diff --git a/test/CodeGen/PowerPC/bl8_elf_nop.ll b/test/CodeGen/PowerPC/bl8_elf_nop.ll
new file mode 100644
index 0000000..386c59e
--- /dev/null
+++ b/test/CodeGen/PowerPC/bl8_elf_nop.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck  %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i32 @clock() nounwind
+
+define i32 @func() {
+entry:
+  %call = call i32 @clock() nounwind
+  %call2 = add i32 %call, 7
+  ret i32 %call2
+}
+
+; CHECK: bl clock
+; CHECK-NEXT: nop
+
diff --git a/test/CodeGen/PowerPC/can-lower-ret.ll b/test/CodeGen/PowerPC/can-lower-ret.ll
new file mode 100644
index 0000000..acf4104
--- /dev/null
+++ b/test/CodeGen/PowerPC/can-lower-ret.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=ppc64
+
+define <4 x float> @foo1(<2 x float> %a, <2 x float> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+  %1 = shufflevector <2 x float> %b, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = shufflevector <4 x float> %0, <4 x float> %1, <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+  ret <4 x float> %2
+}
+
+define <4 x double> @foo2(<2 x double> %a, <2 x double> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+  %1 = shufflevector <2 x double> %b, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+  ret <4 x double> %2
+}
+
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index ab493a0..1d365d4 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,11 +1,11 @@
 ; Make sure this testcase does not use ctpop
 ; RUN: llc < %s -march=ppc32 | grep -i cntlzw
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @bar(i32 %x) {
 entry:
-        %tmp.1 = call i32 @llvm.cttz.i32( i32 %x )              ; <i32> [#uses=1]
+        %tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true )              ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
 
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
new file mode 100644
index 0000000..e161cb0
--- /dev/null
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -break-anti-dependencies=all -march=ppc64 -mcpu=g5 | FileCheck %s
+; CHECK: main:
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !15), !dbg !17
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !16), !dbg !18
+  %add = add nsw i32 %argc, 1, !dbg !19
+  ret i32 %add, !dbg !19
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !9, metadata !10}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!13 = metadata !{metadata !14}
+!14 = metadata !{metadata !15, metadata !16}
+!15 = metadata !{i32 721153, metadata !5, metadata !"argc", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 721153, metadata !5, metadata !"argv", metadata !6, i32 33554433, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 1, i32 14, metadata !5, null}
+!18 = metadata !{i32 1, i32 26, metadata !5, null}
+!19 = metadata !{i32 2, i32 3, metadata !20, null}
+!20 = metadata !{i32 720907, metadata !5, i32 1, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+
diff --git a/test/CodeGen/PowerPC/dg.exp b/test/CodeGen/PowerPC/dg.exp
deleted file mode 100644
index 9e50b55..0000000
--- a/test/CodeGen/PowerPC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PowerPC] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index 29c620e..4b6f88b 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -17,10 +17,22 @@ entry:
 bb2:                                              ; preds = %entry, %bb3
   %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
 ; PIC: mtctr
+; PIC-NEXT: li
+; PIC-NEXT: li
+; PIC-NEXT: li
+; PIC-NEXT: li
 ; PIC-NEXT: bctr
 ; STATIC: mtctr
+; STATIC-NEXT: li
+; STATIC-NEXT: li
+; STATIC-NEXT: li
+; STATIC-NEXT: li
 ; STATIC-NEXT: bctr
 ; PPC64: mtctr
+; PPC64-NEXT: li
+; PPC64-NEXT: li
+; PPC64-NEXT: li
+; PPC64-NEXT: li
 ; PPC64-NEXT: bctr
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
@@ -47,8 +59,8 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r[[R0:[0-9]+]], r{{[0-9]+}}, ha16(Ltmp0-L0$pb)
 ; PIC: li r[[R1:[0-9]+]], lo16(Ltmp0-L0$pb)
+; PIC: addis r[[R0:[0-9]+]], r{{[0-9]+}}, ha16(Ltmp0-L0$pb)
 ; PIC: add r[[R2:[0-9]+]], r[[R0]], r[[R1]]
 ; PIC: stw r[[R2]]
 ; STATIC: li r[[R0:[0-9]+]], lo16(Ltmp0)
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
new file mode 100644
index 0000000..4019eca
--- /dev/null
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/PowerPC/ppc32-vaarg.ll b/test/CodeGen/PowerPC/ppc32-vaarg.ll
deleted file mode 100644
index 6042991..0000000
--- a/test/CodeGen/PowerPC/ppc32-vaarg.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: llc -O0 < %s | FileCheck %s
-;ModuleID = 'test.c'
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
-target triple = "powerpc-unknown-freebsd9.0"
-
-%struct.__va_list_tag = type { i8, i8, i16, i8*, i8* }
-
-@var1 = common global i64 0, align 8
-@var2 = common global double 0.0, align 8
-@var3 = common global i32 0, align 4
-
-define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind {
- entry:
-  %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6
-; CHECK:      lbz 4, 0(3)
-; CHECK-NEXT: lwz 5, 4(3)
-; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31
-; CHECK-NEXT: cmplwi 0, 6, 0
-; CHECK-NEXT: addi 6, 4, 1
-; CHECK-NEXT: stw 3, -4(1)
-; CHECK-NEXT: stw 6, -8(1)
-; CHECK-NEXT: stw 4, -12(1)
-; CHECK-NEXT: stw 5, -16(1)
-; CHECK-NEXT: bne 0, .LBB0_2
-; CHECK-NEXT: # BB#1:                                 # %entry
-; CHECK-NEXT: lwz 3, -12(1)
-; CHECK-NEXT: stw 3, -8(1)
-; CHECK-NEXT: .LBB0_2:                                # %entry
-; CHECK-NEXT: lwz 3, -8(1)
-; CHECK-NEXT: lwz 4, -4(1)
-; CHECK-NEXT: lwz 5, 8(4)
-; CHECK-NEXT: slwi 6, 3, 2
-; CHECK-NEXT: addi 7, 3, 2
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: lwz 3, -16(1)
-; CHECK-NEXT: addi 8, 3, 4
-; CHECK-NEXT: add 5, 5, 6
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -20(1)
-; CHECK-NEXT: stw 5, -24(1)
-; CHECK-NEXT: stw 3, -28(1)
-; CHECK-NEXT: stw 7, -32(1)
-; CHECK-NEXT: stw 8, -36(1)
-; CHECK-NEXT: blt 0, .LBB0_4
-; CHECK-NEXT: # BB#3:                                 # %entry
-; CHECK-NEXT: lwz 3, -36(1)
-; CHECK-NEXT: stw 3, -28(1)
-; CHECK-NEXT: .LBB0_4:                                # %entry
-; CHECK-NEXT: lwz 3, -28(1)
-; CHECK-NEXT: lwz 4, -32(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stb 4, 0(5)
-; CHECK-NEXT: lwz 4, -24(1)
-; CHECK-NEXT: lwz 0, -20(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 3, -40(1)
-; CHECK-NEXT: stw 4, -44(1)
-; CHECK-NEXT: blt 0, .LBB0_6
-; CHECK-NEXT: # BB#5:                                 # %entry
-; CHECK-NEXT: lwz 3, -16(1)
-; CHECK-NEXT: stw 3, -44(1)
-; CHECK-NEXT: .LBB0_6:                                # %entry
-; CHECK-NEXT: lwz 3, -44(1)
-; CHECK-NEXT: lwz 4, -40(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stw 4, 4(5)
-  store i64 %x, i64* @var1, align 8
-; CHECK-NEXT: lis 4, var1@ha
-; CHECK-NEXT: lwz 6, 4(3)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: la 7, var1@l(4)
-; CHECK-NEXT: stw 3, var1@l(4)
-; CHECK-NEXT: stw 6, 4(7)
-  %y = va_arg %struct.__va_list_tag* %ap, double; From f1
-; CHECK-NEXT: lbz 3, 1(5)
-; CHECK-NEXT: lwz 4, 4(5)
-; CHECK-NEXT: lwz 6, 8(5)
-; CHECK-NEXT: slwi 7, 3, 3
-; CHECK-NEXT: add 6, 6, 7
-; CHECK-NEXT: addi 7, 3, 1
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: addi 3, 4, 8
-; CHECK-NEXT: addi 6, 6, 32
-; CHECK-NEXT: mr 8, 4
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -48(1)
-; CHECK-NEXT: stw 4, -52(1)
-; CHECK-NEXT: stw 6, -56(1)
-; CHECK-NEXT: stw 7, -60(1)
-; CHECK-NEXT: stw 3, -64(1)
-; CHECK-NEXT: stw 8, -68(1)
-; CHECK-NEXT: blt 0, .LBB0_8
-; CHECK-NEXT: # BB#7:                                 # %entry
-; CHECK-NEXT: lwz 3, -64(1)
-; CHECK-NEXT: stw 3, -68(1)
-; CHECK-NEXT: .LBB0_8:                                # %entry
-; CHECK-NEXT: lwz 3, -68(1)
-; CHECK-NEXT: lwz 4, -60(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stb 4, 1(5)
-; CHECK-NEXT: lwz 4, -56(1)
-; CHECK-NEXT: lwz 0, -48(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 4, -72(1)
-; CHECK-NEXT: stw 3, -76(1)
-; CHECK-NEXT: blt 0, .LBB0_10
-; CHECK-NEXT: # BB#9:                                 # %entry
-; CHECK-NEXT: lwz 3, -52(1)
-; CHECK-NEXT: stw 3, -72(1)
-; CHECK-NEXT: .LBB0_10:                               # %entry
-; CHECK-NEXT: lwz 3, -72(1)
-; CHECK-NEXT: lwz 4, -76(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stw 4, 4(5)
-; CHECK-NEXT: lfd 0, 0(3)
-  store double %y, double* @var2, align 8
-; CHECK-NEXT: lis 3, var2@ha
-; CHECK-NEXT: stfd 0, var2@l(3)
-  %z = va_arg %struct.__va_list_tag* %ap, i32; From r7
-; CHECK-NEXT: lbz 3, 0(5)
-; CHECK-NEXT: lwz 4, 4(5)
-; CHECK-NEXT: lwz 6, 8(5)
-; CHECK-NEXT: slwi 7, 3, 2
-; CHECK-NEXT: addi 8, 3, 1
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: addi 3, 4, 4
-; CHECK-NEXT: add 6, 6, 7
-; CHECK-NEXT: mr 7, 4
-; CHECK-NEXT: stw 6, -80(1)
-; CHECK-NEXT: stw 8, -84(1)
-; CHECK-NEXT: stw 3, -88(1)
-; CHECK-NEXT: stw 4, -92(1)
-; CHECK-NEXT: stw 7, -96(1)
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -100(1)
-; CHECK-NEXT: blt 0, .LBB0_12
-; CHECK-NEXT: # BB#11:                                # %entry
-; CHECK-NEXT: lwz 3, -88(1)
-; CHECK-NEXT: stw 3, -96(1)
-; CHECK-NEXT: .LBB0_12:                               # %entry
-; CHECK-NEXT: lwz 3, -96(1)
-; CHECK-NEXT: lwz 4, -84(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stb 4, 0(5)
-; CHECK-NEXT: lwz 4, -80(1)
-; CHECK-NEXT: lwz 0, -100(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 4, -104(1)
-; CHECK-NEXT: stw 3, -108(1)
-; CHECK-NEXT: blt 0, .LBB0_14
-; CHECK-NEXT: # BB#13:                                # %entry
-; CHECK-NEXT: lwz 3, -92(1)
-; CHECK-NEXT: stw 3, -104(1)
-; CHECK-NEXT: .LBB0_14:                               # %entry
-; CHECK-NEXT: lwz 3, -104(1)
-; CHECK-NEXT: lwz 4, -108(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stw 4, 4(5)
-; CHECK-NEXT: lwz 3, 0(3)
-  store i32 %z, i32* @var3, align 4
-; CHECK-NEXT: lis 4, var3@ha
-; CHECK-NEXT: stw 3, var3@l(4)
-  ret void
-; CHECK-NEXT: stw 5, -112(1)
-; CHECK-NEXT: blr
-}
-
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
new file mode 100644
index 0000000..1fad2fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s
+
+%0 = type { double, double }
+
+define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
+entry:
+  %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
+  %a.real = load double* %a.realp
+  %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
+  %a.imag = load double* %a.imagp
+  %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
+  %b.real = load double* %b.realp
+  %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
+  %b.imag = load double* %b.imagp
+  %mul.rl = fmul double %a.real, %b.real
+  %mul.rr = fmul double %a.imag, %b.imag
+  %mul.r = fsub double %mul.rl, %mul.rr
+  %mul.il = fmul double %a.imag, %b.real
+  %mul.ir = fmul double %a.real, %b.imag
+  %mul.i = fadd double %mul.il, %mul.ir
+  %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
+  %c.real = load double* %c.realp
+  %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
+  %c.imag = load double* %c.imagp
+  %add.r = fadd double %mul.r, %c.real
+  %add.i = fadd double %mul.i, %c.imag
+  %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
+  %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+  store double %add.r, double* %real
+  store double %add.i, double* %imag
+  ret void
+; CHECK: fmadd
+}
diff --git a/test/CodeGen/PowerPC/ppc440-msync.ll b/test/CodeGen/PowerPC/ppc440-msync.ll
new file mode 100644
index 0000000..1274173
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc440-msync.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=ppc32 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s -check-prefix=BE-CHK
+
+define i32 @has_a_fence(i32 %a, i32 %b) nounwind {
+entry:
+  fence acquire
+  %cond = icmp eq i32 %a, %b
+  br i1 %cond, label %IfEqual, label %IfUnequal
+
+IfEqual:
+  fence release
+; CHECK: sync
+; CHECK-NOT: msync
+; BE-CHK: msync
+  br label %end
+
+IfUnequal:
+  fence release
+; CHECK: sync
+; CHECK-NOT: msync
+; BE-CHK: msync
+  ret i32 0
+
+end:
+  ret i32 1
+}
+
diff --git a/test/CodeGen/PowerPC/ppc64-ind-call.ll b/test/CodeGen/PowerPC/ppc64-ind-call.ll
new file mode 100644
index 0000000..d5c4d46
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-ind-call.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @test1() {
+entry:
+  %call.i75 = call zeroext i8 undef(i8* undef, i8 zeroext 10)
+  unreachable
+}
+
+; CHECK: @test1
+; CHECK: ld 11, 0(3)
+; CHECK: ld 2, 8(3)
+; CHECK: bctrl
+; CHECK: ld 2, 40(1)
+
diff --git a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
new file mode 100644
index 0000000..e5aa1f1
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+
+; CHECK:      .section	.opd,"aw",@progbits
+; CHECK-NEXT: test1:
+; CHECK-NEXT:	.align 3
+; CHECK-NEXT:	.quad .L.test1
+; CHECK-NEXT:	.quad .TOC.@tocbase
+; CHECK-NEXT:	.text
+; CHECK-NEXT: .L.test1:
+
+define i32 @test1(i32 %a) nounwind {
+entry:
+  ret i32 %a
+}
+
+; Until recently, binutils accepted the .size directive as:
+;  .size	test1, .Ltmp0-test1
+; however, using this directive with recent binutils will result in the error:
+;  .size expression for XXX does not evaluate to a constant
+; so we must use the label which actually tags the start of the function.
+; CHECK: .size	test1, .Ltmp0-.L.test1
diff --git a/test/CodeGen/PowerPC/ppc64-prefetch.ll b/test/CodeGen/PowerPC/ppc64-prefetch.ll
new file mode 100644
index 0000000..b2f3709
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-prefetch.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define void @test1(i8* %a, ...) nounwind {
+entry:
+  call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1)
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+; CHECK: @test1
+; CHECK: dcbt
+
diff --git a/test/CodeGen/PowerPC/ppc64-vaarg-int.ll b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
new file mode 100644
index 0000000..5a63b01
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
@@ -0,0 +1,20 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define i32 @intvaarg(i32 %a, ...) nounwind {
+entry:
+  %va = alloca i8*, align 8
+  %va1 = bitcast i8** %va to i8*
+  call void @llvm.va_start(i8* %va1)
+  %0 = va_arg i8** %va, i32
+  %sub = sub nsw i32 %a, %0
+  ret i32 %sub
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+; CHECK: @intvaarg
+; Make sure that the va pointer is incremented by 8 (not 4).
+; CHECK: addi{{.*}}, 8
+
diff --git a/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll b/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll
new file mode 100644
index 0000000..aa7de16
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=sparc <%s
+
+define void @foo(i32 %a) nounwind {
+entry:
+  br i1 undef, label %return, label %else.0
+
+else.0:
+  br i1 undef, label %if.end.0, label %return
+
+if.end.0:
+  br i1 undef, label %if.then.1, label %else.1
+
+else.1:
+  %0 = bitcast i8* undef to i8**
+  br label %else.1.2
+
+if.then.1:
+  br i1 undef, label %return, label %return
+
+else.1.2:
+  br i1 undef, label %return, label %return
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/SPARC/dg.exp b/test/CodeGen/SPARC/dg.exp
deleted file mode 100644
index 6c0a997..0000000
--- a/test/CodeGen/SPARC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Sparc] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
new file mode 100644
index 0000000..786fee9
--- /dev/null
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Sparc' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/SystemZ/00-RetVoid.ll b/test/CodeGen/SystemZ/00-RetVoid.ll
deleted file mode 100644
index 6f3cbac..0000000
--- a/test/CodeGen/SystemZ/00-RetVoid.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define void @foo() {
-entry:
-    ret void
-}
diff --git a/test/CodeGen/SystemZ/01-RetArg.ll b/test/CodeGen/SystemZ/01-RetArg.ll
deleted file mode 100644
index 8e1ff49..0000000
--- a/test/CodeGen/SystemZ/01-RetArg.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    ret i64 %b
-}
diff --git a/test/CodeGen/SystemZ/01-RetImm.ll b/test/CodeGen/SystemZ/01-RetImm.ll
deleted file mode 100644
index 8b99e68..0000000
--- a/test/CodeGen/SystemZ/01-RetImm.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lghi  | count 1
-; RUN: llc < %s -march=systemz | grep llill | count 1
-; RUN: llc < %s -march=systemz | grep llilh | count 1
-; RUN: llc < %s -march=systemz | grep llihl | count 1
-; RUN: llc < %s -march=systemz | grep llihh | count 1
-; RUN: llc < %s -march=systemz | grep lgfi  | count 1
-; RUN: llc < %s -march=systemz | grep llilf | count 1
-; RUN: llc < %s -march=systemz | grep llihf | count 1
-
-
-define i64 @foo1() {
-entry:
-    ret i64 1
-}
-
-define i64 @foo2() {
-entry:
-    ret i64 65535 
-}
-
-define i64 @foo3() {
-entry:
-    ret i64 131072
-}
-
-define i64 @foo4() {
-entry:
-    ret i64 8589934592
-}
-
-define i64 @foo5() {
-entry:
-    ret i64 562949953421312
-}
-
-define i64 @foo6() {
-entry:
-    ret i64 65537
-}
-
-define i64 @foo7() {
-entry:
-    ret i64 4294967295
-}
-
-define i64 @foo8() {
-entry:
-    ret i64 281483566645248
-}
diff --git a/test/CodeGen/SystemZ/02-MemArith.ll b/test/CodeGen/SystemZ/02-MemArith.ll
deleted file mode 100644
index ee9e5e9..0000000
--- a/test/CodeGen/SystemZ/02-MemArith.ll
+++ /dev/null
@@ -1,133 +0,0 @@
-; RUN: llc < %s -march=systemz | FileCheck %s
-
-define signext i32 @foo1(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo1:
-; CHECK:  a %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = add i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo2(i32 %a, i32 *%b, i64 %idx) {
-; CHECK: foo2:
-; CHECK:  ay %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = add i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo3(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo3:
-; CHECK:  ag %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = add i64 %a, %c
-    ret i64 %d
-}
-
-define signext i32 @foo4(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo4:
-; CHECK:  n %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = and i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo5(i32 %a, i32 *%b, i64 %idx) {
-; CHECK: foo5:
-; CHECK:  ny %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = and i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo6(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo6:
-; CHECK:  ng %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = and i64 %a, %c
-    ret i64 %d
-}
-
-define signext i32 @foo7(i32 %a, i32 *%b, i64 %idx) {
-; CHECK: foo7:
-; CHECK:  o %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = or i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo8(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo8:
-; CHECK:  oy %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = or i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo9(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo9:
-; CHECK:  og %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = or i64 %a, %c
-    ret i64 %d
-}
-
-define signext i32 @foo10(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo10:
-; CHECK:  x %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = xor i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo11(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo11:
-; CHECK:  xy %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = xor i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo12(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo12:
-; CHECK:  xg %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = xor i64 %a, %c
-    ret i64 %d
-}
diff --git a/test/CodeGen/SystemZ/02-RetAdd.ll b/test/CodeGen/SystemZ/02-RetAdd.ll
deleted file mode 100644
index d5dfa22..0000000
--- a/test/CodeGen/SystemZ/02-RetAdd.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = add i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetAddImm.ll b/test/CodeGen/SystemZ/02-RetAddImm.ll
deleted file mode 100644
index 40f6cce..0000000
--- a/test/CodeGen/SystemZ/02-RetAddImm.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = add i64 %a, 1
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetAnd.ll b/test/CodeGen/SystemZ/02-RetAnd.ll
deleted file mode 100644
index b568a57..0000000
--- a/test/CodeGen/SystemZ/02-RetAnd.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetAndImm.ll b/test/CodeGen/SystemZ/02-RetAndImm.ll
deleted file mode 100644
index 53c5e54..0000000
--- a/test/CodeGen/SystemZ/02-RetAndImm.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ngr   | count 4
-; RUN: llc < %s -march=systemz | grep llilh | count 1
-; RUN: llc < %s -march=systemz | grep llihl | count 1
-; RUN: llc < %s -march=systemz | grep llihh | count 1
-
-define i64 @foo1(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 1
-    ret i64 %c
-}
-
-define i64 @foo2(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 131072
-    ret i64 %c
-}
-
-define i64 @foo3(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 8589934592
-    ret i64 %c
-}
-
-define i64 @foo4(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 562949953421312
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetNeg.ll b/test/CodeGen/SystemZ/02-RetNeg.ll
deleted file mode 100644
index 3f6ba2f..0000000
--- a/test/CodeGen/SystemZ/02-RetNeg.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lcgr | count 1
-
-define i64 @foo(i64 %a) {
-entry:
-    %c = sub i64 0, %a
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetOr.ll b/test/CodeGen/SystemZ/02-RetOr.ll
deleted file mode 100644
index a1ddb63..0000000
--- a/test/CodeGen/SystemZ/02-RetOr.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetOrImm.ll b/test/CodeGen/SystemZ/02-RetOrImm.ll
deleted file mode 100644
index 68cd24d..0000000
--- a/test/CodeGen/SystemZ/02-RetOrImm.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -march=systemz | grep oill | count 1
-; RUN: llc < %s -march=systemz | grep oilh | count 1
-; RUN: llc < %s -march=systemz | grep oihl | count 1
-; RUN: llc < %s -march=systemz | grep oihh | count 1
-
-define i64 @foo1(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 1
-    ret i64 %c
-}
-
-define i64 @foo2(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 131072
-    ret i64 %c
-}
-
-define i64 @foo3(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 8589934592
-    ret i64 %c
-}
-
-define i64 @foo4(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 562949953421312
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetSub.ll b/test/CodeGen/SystemZ/02-RetSub.ll
deleted file mode 100644
index 98e1861..0000000
--- a/test/CodeGen/SystemZ/02-RetSub.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = sub i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetSubImm.ll b/test/CodeGen/SystemZ/02-RetSubImm.ll
deleted file mode 100644
index 8479fbf..0000000
--- a/test/CodeGen/SystemZ/02-RetSubImm.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = sub i64 %a, 1
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetXor.ll b/test/CodeGen/SystemZ/02-RetXor.ll
deleted file mode 100644
index 4d1adf2..0000000
--- a/test/CodeGen/SystemZ/02-RetXor.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = xor i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetXorImm.ll b/test/CodeGen/SystemZ/02-RetXorImm.ll
deleted file mode 100644
index 473bbf7..0000000
--- a/test/CodeGen/SystemZ/02-RetXorImm.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = xor i64 %a, 1
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
deleted file mode 100644
index 0a7f5ee..0000000
--- a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ahi   | count 3
-; RUN: llc < %s -march=systemz | grep afi   | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 4
-; RUN: llc < %s -march=systemz | grep llgfr | count 2
-
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = add i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = add i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 1
-    ret i32 %c
-}
-
-define zeroext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 131072
-    ret i32 %c
-}
-
-define signext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 131072
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetAddSubreg.ll b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
deleted file mode 100644
index 337bb3f..0000000
--- a/test/CodeGen/SystemZ/03-RetAddSubreg.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ar    | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 2
-; RUN: llc < %s -march=systemz | grep llgfr | count 1
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = add i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
deleted file mode 100644
index c5326ab..0000000
--- a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ngr  | count 6
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = and i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = and i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 131072
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetAndSubreg.ll b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
deleted file mode 100644
index 75dc90a..0000000
--- a/test/CodeGen/SystemZ/03-RetAndSubreg.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ngr | count 3
-; RUN: llc < %s -march=systemz | grep nihf | count 1
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = and i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetArgSubreg.ll b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
deleted file mode 100644
index 476821a..0000000
--- a/test/CodeGen/SystemZ/03-RetArgSubreg.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lgr   | count 2
-; RUN: llc < %s -march=systemz | grep nihf  | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 1
-
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    ret i32 %b
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    ret i32 %b
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    ret i32 %b
-}
diff --git a/test/CodeGen/SystemZ/03-RetImmSubreg.ll b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
deleted file mode 100644
index 70da913..0000000
--- a/test/CodeGen/SystemZ/03-RetImmSubreg.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lghi  | count 2
-; RUN: llc < %s -march=systemz | grep llill | count 1
-; RUN: llc < %s -march=systemz | grep llilh | count 1
-; RUN: llc < %s -march=systemz | grep lgfi  | count 1
-; RUN: llc < %s -march=systemz | grep llilf | count 2
-
-
-define i32 @foo1() {
-entry:
-    ret i32 1
-}
-
-define i32 @foo2() {
-entry:
-    ret i32 65535 
-}
-
-define i32 @foo3() {
-entry:
-    ret i32 131072
-}
-
-define i32 @foo4() {
-entry:
-    ret i32 65537
-}
-
-define i32 @foo5() {
-entry:
-    ret i32 4294967295
-}
-
-define zeroext i32 @foo6()  {
-entry:
-    ret i32 4294967295
-}
-
-define signext i32 @foo7()  {
-entry:
-    ret i32 4294967295
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll b/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
deleted file mode 100644
index 87ebcc1..0000000
--- a/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lcr | count 1
-
-define i32 @foo(i32 %a) {
-entry:
-    %c = sub i32 0, %a
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
deleted file mode 100644
index 99adea8..0000000
--- a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: llc < %s -march=systemz | grep oill  | count 3
-; RUN: llc < %s -march=systemz | grep oilh  | count 3
-; RUN: llc < %s -march=systemz | grep oilf  | count 3
-; RUN: llc < %s -march=systemz | grep llgfr | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 6
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, 131072
-    ret i32 %c
-}
-
-define i32 @foo7(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, 123456
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 1
-    ret i32 %c
-}
-
-define zeroext i32 @foo8(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 123456
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 131072
-    ret i32 %c
-}
-
-define signext i32 @foo9(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 123456
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetOrSubreg.ll b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
deleted file mode 100644
index 7dab5ca..0000000
--- a/test/CodeGen/SystemZ/03-RetOrSubreg.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ogr   | count 3
-; RUN: llc < %s -march=systemz | grep nihf  | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 1
-
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
deleted file mode 100644
index 21ea9b5..0000000
--- a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ahi   | count 3
-; RUN: llc < %s -march=systemz | grep afi   | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 4
-; RUN: llc < %s -march=systemz | grep llgfr | count 2
-
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = sub i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = sub i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 131072
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetSubSubreg.ll b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
deleted file mode 100644
index 24b7631..0000000
--- a/test/CodeGen/SystemZ/03-RetSubSubreg.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=systemz | grep sr    | count 3
-; RUN: llc < %s -march=systemz | grep llgfr | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 2
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = sub i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
deleted file mode 100644
index 70ee454..0000000
--- a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s -march=systemz | grep xilf  | count 9
-; RUN: llc < %s -march=systemz | grep llgfr | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 6
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, 131072
-    ret i32 %c
-}
-
-define i32 @foo7(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, 123456
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 1
-    ret i32 %c
-}
-
-define zeroext i32 @foo8(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 123456
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 131072
-    ret i32 %c
-}
-
-define signext i32 @foo9(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 123456
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetXorSubreg.ll b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
deleted file mode 100644
index 02c4a2a..0000000
--- a/test/CodeGen/SystemZ/03-RetXorSubreg.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=systemz | grep xgr   | count 3
-; RUN: llc < %s -march=systemz | grep nihf  | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 1
-
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/04-RetShifts.ll b/test/CodeGen/SystemZ/04-RetShifts.ll
deleted file mode 100644
index cccdc47..0000000
--- a/test/CodeGen/SystemZ/04-RetShifts.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; RUN: llc < %s -march=systemz | grep sra   | count 6
-; RUN: llc < %s -march=systemz | grep srag  | count 3
-; RUN: llc < %s -march=systemz | grep srl   | count 6
-; RUN: llc < %s -march=systemz | grep srlg  | count 3
-; RUN: llc < %s -march=systemz | grep sll   | count 6
-; RUN: llc < %s -march=systemz | grep sllg  | count 3
-
-define signext i32 @foo1(i32 %a, i32 %idx) nounwind readnone {
-entry:
-	%add = add i32 %idx, 1		; <i32> [#uses=1]
-	%shr = ashr i32 %a, %add		; <i32> [#uses=1]
-	ret i32 %shr
-}
-
-define signext i32 @foo2(i32 %a, i32 %idx) nounwind readnone {
-entry:
-	%add = add i32 %idx, 1		; <i32> [#uses=1]
-	%shr = shl i32 %a, %add		; <i32> [#uses=1]
-	ret i32 %shr
-}
-
-define signext i32 @foo3(i32 %a, i32 %idx) nounwind readnone {
-entry:
-	%add = add i32 %idx, 1		; <i32> [#uses=1]
-	%shr = lshr i32 %a, %add		; <i32> [#uses=1]
-	ret i32 %shr
-}
-
-define signext i64 @foo4(i64 %a, i64 %idx) nounwind readnone {
-entry:
-	%add = add i64 %idx, 1		; <i64> [#uses=1]
-	%shr = ashr i64 %a, %add		; <i64> [#uses=1]
-	ret i64 %shr
-}
-
-define signext i64 @foo5(i64 %a, i64 %idx) nounwind readnone {
-entry:
-	%add = add i64 %idx, 1		; <i64> [#uses=1]
-	%shr = shl i64 %a, %add		; <i64> [#uses=1]
-	ret i64 %shr
-}
-
-define signext i64 @foo6(i64 %a, i64 %idx) nounwind readnone {
-entry:
-	%add = add i64 %idx, 1		; <i64> [#uses=1]
-	%shr = lshr i64 %a, %add		; <i64> [#uses=1]
-	ret i64 %shr
-}
-
-define signext i32 @foo7(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = ashr i32 %a, 1
-        ret i32 %shr
-}
-
-define signext i32 @foo8(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = shl i32 %a, 1
-        ret i32 %shr
-}
-
-define signext i32 @foo9(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = lshr i32 %a, 1
-        ret i32 %shr
-}
-
-define signext i32 @foo10(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = ashr i32 %a, %idx
-        ret i32 %shr
-}
-
-define signext i32 @foo11(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = shl i32 %a, %idx
-        ret i32 %shr
-}
-
-define signext i32 @foo12(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = lshr i32 %a, %idx
-        ret i32 %shr
-}
-
-define signext i64 @foo13(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = ashr i64 %a, 1
-        ret i64 %shr
-}
-
-define signext i64 @foo14(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = shl i64 %a, 1
-        ret i64 %shr
-}
-
-define signext i64 @foo15(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = lshr i64 %a, 1
-        ret i64 %shr
-}
-
-define signext i64 @foo16(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = ashr i64 %a, %idx
-        ret i64 %shr
-}
-
-define signext i64 @foo17(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = shl i64 %a, %idx
-        ret i64 %shr
-}
-
-define signext i64 @foo18(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = lshr i64 %a, %idx
-        ret i64 %shr
-}
-
diff --git a/test/CodeGen/SystemZ/05-LoadAddr.ll b/test/CodeGen/SystemZ/05-LoadAddr.ll
deleted file mode 100644
index cf02642..0000000
--- a/test/CodeGen/SystemZ/05-LoadAddr.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s | grep lay | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64* @foo(i64* %a, i64 %idx) nounwind readnone {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	ret i64* %add.ptr2
-}
diff --git a/test/CodeGen/SystemZ/05-MemImmStores.ll b/test/CodeGen/SystemZ/05-MemImmStores.ll
deleted file mode 100644
index 3cf21cc..0000000
--- a/test/CodeGen/SystemZ/05-MemImmStores.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s -mattr=+z10 | grep mvghi | count 1
-; RUN: llc < %s -mattr=+z10 | grep mvhi  | count 1
-; RUN: llc < %s -mattr=+z10 | grep mvhhi | count 1
-; RUN: llc < %s | grep mvi   | count 2
-; RUN: llc < %s | grep mviy  | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo1(i64* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i64* %a, i64 1		; <i64*> [#uses=1]
-	store i64 1, i64* %add.ptr
-	ret void
-}
-
-define void @foo2(i32* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i32* %a, i64 1		; <i32*> [#uses=1]
-	store i32 2, i32* %add.ptr
-	ret void
-}
-
-define void @foo3(i16* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %a, i64 1		; <i16*> [#uses=1]
-	store i16 3, i16* %add.ptr
-	ret void
-}
-
-define void @foo4(i8* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i8* %a, i64 1		; <i8*> [#uses=1]
-	store i8 4, i8* %add.ptr
-	ret void
-}
-
-define void @foo5(i8* nocapture %a, i64 %idx) nounwind {
-entry:
-        %add.ptr = getelementptr i8* %a, i64 -1         ; <i8*> [#uses=1]
-        store i8 4, i8* %add.ptr
-        ret void
-}
-
-define void @foo6(i16* nocapture %a, i64 %idx) nounwind {
-entry:
-        %add.ptr = getelementptr i16* %a, i64 -1         ; <i16*> [#uses=1]
-        store i16 3, i16* %add.ptr
-        ret void
-}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
deleted file mode 100644
index eabeb0a..0000000
--- a/test/CodeGen/SystemZ/05-MemLoadsStores.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: llc < %s | grep ly     | count 2
-; RUN: llc < %s | grep sty    | count 2
-; RUN: llc < %s | grep {l	%}  | count 2
-; RUN: llc < %s | grep {st	%} | count 2
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind {
-entry:
-	%tmp1 = load i32* %foo		; <i32> [#uses=1]
-	store i32 %tmp1, i32* %bar
-	ret void
-}
-
-define void @foo2(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i32* %foo, i64 1		; <i32*> [#uses=1]
-	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %tmp1, i32* %add.ptr5
-	ret void
-}
-
-define void @foo3(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%sub.ptr = getelementptr i32* %foo, i64 -1		; <i32*> [#uses=1]
-	%tmp1 = load i32* %sub.ptr		; <i32> [#uses=1]
-	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
-	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %tmp1, i32* %add.ptr
-	ret void
-}
-
-define void @foo4(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i32* %foo, i64 8192		; <i32*> [#uses=1]
-	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %tmp1, i32* %add.ptr5
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
deleted file mode 100644
index 53bb641..0000000
--- a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: llc < %s | grep {sthy.%} | count 2
-; RUN: llc < %s | grep {lhy.%}  | count 2
-; RUN: llc < %s | grep {lh.%}   | count 6
-; RUN: llc < %s | grep {sth.%}  | count 2
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind {
-entry:
-	%tmp1 = load i16* %foo		; <i16> [#uses=1]
-	store i16 %tmp1, i16* %bar
-	ret void
-}
-
-define void @foo2(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
-	store i16 %tmp1, i16* %add.ptr5
-	ret void
-}
-
-define void @foo3(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
-	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
-	%add.ptr = getelementptr i16* %bar, i64 %sub.ptr3.sum		; <i16*> [#uses=1]
-	store i16 %tmp1, i16* %add.ptr
-	ret void
-}
-
-define void @foo4(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
-	store i16 %tmp1, i16* %add.ptr5
-	ret void
-}
-
-define void @foo5(i16* nocapture %foo, i32* nocapture %bar) nounwind {
-entry:
-	%tmp1 = load i16* %foo		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	store i32 %conv, i32* %bar
-	ret void
-}
-
-define void @foo6(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %conv, i32* %add.ptr5
-	ret void
-}
-
-define void @foo7(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
-	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %conv, i32* %add.ptr
-	ret void
-}
-
-define void @foo8(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %conv, i32* %add.ptr5
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/05-MemRegLoads.ll b/test/CodeGen/SystemZ/05-MemRegLoads.ll
deleted file mode 100644
index f690a48..0000000
--- a/test/CodeGen/SystemZ/05-MemRegLoads.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; RUN: llc < %s -march=systemz | not grep aghi
-; RUN: llc < %s -march=systemz | grep llgf | count 1
-; RUN: llc < %s -march=systemz | grep llgh | count 1
-; RUN: llc < %s -march=systemz | grep llgc | count 1
-; RUN: llc < %s -march=systemz | grep lgf  | count 2
-; RUN: llc < %s -march=systemz | grep lgh  | count 2
-; RUN: llc < %s -march=systemz | grep lgb  | count 1
-
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define zeroext i64 @foo1(i64* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
-	ret i64 %tmp3
-}
-
-define zeroext i32 @foo2(i32* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
-	ret i32 %tmp3
-}
-
-define zeroext i16 @foo3(i16* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
-	ret i16 %tmp3
-}
-
-define zeroext i8 @foo4(i8* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
-	ret i8 %tmp3
-}
-
-define signext i64 @foo5(i64* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
-	ret i64 %tmp3
-}
-
-define signext i32 @foo6(i32* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
-	ret i32 %tmp3
-}
-
-define signext i16 @foo7(i16* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
-	ret i16 %tmp3
-}
-
-define signext i8 @foo8(i8* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
-	ret i8 %tmp3
-}
diff --git a/test/CodeGen/SystemZ/05-MemRegStores.ll b/test/CodeGen/SystemZ/05-MemRegStores.ll
deleted file mode 100644
index b851c3f..0000000
--- a/test/CodeGen/SystemZ/05-MemRegStores.ll
+++ /dev/null
@@ -1,79 +0,0 @@
-; RUN: llc < %s | not grep aghi
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo1(i64* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-
-; CHECK: foo1:
-; CHECK:   stg %r4, 8(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	store i64 %val, i64* %add.ptr2
-	ret void
-}
-
-define void @foo2(i32* nocapture %a, i64 %idx, i32 %val) nounwind {
-entry:
-; CHECK: foo2:
-; CHECK:   st %r4, 4(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	store i32 %val, i32* %add.ptr2
-	ret void
-}
-
-define void @foo3(i16* nocapture %a, i64 %idx, i16 zeroext %val) nounwind {
-entry:
-; CHECK: foo3:
-; CHECK: sth     %r4, 2(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	store i16 %val, i16* %add.ptr2
-	ret void
-}
-
-define void @foo4(i8* nocapture %a, i64 %idx, i8 zeroext %val) nounwind {
-entry:
-; CHECK: foo4:
-; CHECK: stc     %r4, 1(%r3,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	store i8 %val, i8* %add.ptr2
-	ret void
-}
-
-define void @foo5(i8* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-; CHECK: foo5:
-; CHECK: stc     %r4, 1(%r3,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	%conv = trunc i64 %val to i8		; <i8> [#uses=1]
-	store i8 %conv, i8* %add.ptr2
-	ret void
-}
-
-define void @foo6(i16* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-; CHECK: foo6:
-; CHECK: sth     %r4, 2(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	%conv = trunc i64 %val to i16		; <i16> [#uses=1]
-	store i16 %conv, i16* %add.ptr2
-	ret void
-}
-
-define void @foo7(i32* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-; CHECK: foo7:
-; CHECK: st      %r4, 4(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	%conv = trunc i64 %val to i32		; <i32> [#uses=1]
-	store i32 %conv, i32* %add.ptr2
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/06-CallViaStack.ll b/test/CodeGen/SystemZ/06-CallViaStack.ll
deleted file mode 100644
index e904f49..0000000
--- a/test/CodeGen/SystemZ/06-CallViaStack.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s | grep 168 | count 1
-; RUN: llc < %s | grep 160 | count 3
-; RUN: llc < %s | grep 328 | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) nounwind {
-entry:
-	%a = alloca i64, align 8		; <i64*> [#uses=3]
-	store i64 %g, i64* %a
-	call void @bar(i64* %a) nounwind
-	%tmp1 = load i64* %a		; <i64> [#uses=1]
-	ret i64 %tmp1
-}
-
-declare void @bar(i64*)
diff --git a/test/CodeGen/SystemZ/06-FrameIdxLoad.ll b/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
deleted file mode 100644
index c71da9b..0000000
--- a/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s | grep 160 | count 1
-; RUN: llc < %s | grep 168 | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
-entry:
-        ret i64 %f
-}
-
-define i64 @bar(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
-entry:
-	%conv = ptrtoint i64* %g to i64		; <i64> [#uses=1]
-	ret i64 %conv
-}
diff --git a/test/CodeGen/SystemZ/06-LocalFrame.ll b/test/CodeGen/SystemZ/06-LocalFrame.ll
deleted file mode 100644
index d89b0df..0000000
--- a/test/CodeGen/SystemZ/06-LocalFrame.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s | grep 160 | count 1
-; RUN: llc < %s | grep 328 | count 1
-; RUN: llc < %s | grep 168 | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define noalias i64* @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f) nounwind readnone {
-entry:
-	%g = alloca i64, align 8		; <i64*> [#uses=1]
-	%add.ptr = getelementptr i64* %g, i64 %f		; <i64*> [#uses=1]
-	ret i64* %add.ptr
-}
diff --git a/test/CodeGen/SystemZ/06-SimpleCall.ll b/test/CodeGen/SystemZ/06-SimpleCall.ll
deleted file mode 100644
index fd4b5029..0000000
--- a/test/CodeGen/SystemZ/06-SimpleCall.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo() nounwind {
-entry:
-	tail call void @bar() nounwind
-	ret void
-}
-
-declare void @bar()
diff --git a/test/CodeGen/SystemZ/07-BrCond.ll b/test/CodeGen/SystemZ/07-BrCond.ll
deleted file mode 100644
index 8599717..0000000
--- a/test/CodeGen/SystemZ/07-BrCond.ll
+++ /dev/null
@@ -1,141 +0,0 @@
-; RUN: llc < %s | grep je  | count 1
-; RUN: llc < %s | grep jne | count 1
-; RUN: llc < %s | grep jhe | count 2
-; RUN: llc < %s | grep jle | count 2
-; RUN: llc < %s | grep jh  | count 4
-; RUN: llc < %s | grep jl  | count 4
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-BrCond32.ll b/test/CodeGen/SystemZ/07-BrCond32.ll
deleted file mode 100644
index 8ece5ac..0000000
--- a/test/CodeGen/SystemZ/07-BrCond32.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; RUN: llc < %s | grep je  | count 1
-; RUN: llc < %s | grep jne | count 1
-; RUN: llc < %s | grep jhe | count 2
-; RUN: llc < %s | grep jle | count 2
-; RUN: llc < %s | grep jh  | count 4
-; RUN: llc < %s | grep jl  | count 4
-
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-BrUnCond.ll b/test/CodeGen/SystemZ/07-BrUnCond.ll
deleted file mode 100644
index ac6067a..0000000
--- a/test/CodeGen/SystemZ/07-BrUnCond.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-ibm-linux"
-
-define void @foo() noreturn nounwind {
-entry:
-	tail call void @baz() nounwind
-	br label %l1
-
-l1:		; preds = %entry, %l1
-	tail call void @bar() nounwind
-	br label %l1
-}
-
-declare void @bar()
-
-declare void @baz()
diff --git a/test/CodeGen/SystemZ/07-CmpImm.ll b/test/CodeGen/SystemZ/07-CmpImm.ll
deleted file mode 100644
index 4d0ebda..0000000
--- a/test/CodeGen/SystemZ/07-CmpImm.ll
+++ /dev/null
@@ -1,137 +0,0 @@
-; RUN: llc < %s | grep cgfi | count 8
-; RUN: llc < %s | grep clgfi | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i64 %a) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i64 %a) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i64 %a) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i64 %a) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i64 %a) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i64 %a) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i64 %a) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-CmpImm32.ll b/test/CodeGen/SystemZ/07-CmpImm32.ll
deleted file mode 100644
index add34fa..0000000
--- a/test/CodeGen/SystemZ/07-CmpImm32.ll
+++ /dev/null
@@ -1,139 +0,0 @@
-; RUN: llc < %s | grep jl  | count 3
-; RUN: llc < %s | grep jh  | count 3
-; RUN: llc < %s | grep je  | count 2
-; RUN: llc < %s | grep jne | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i32 %a) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i32 %a) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i32 %a) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i32 %a) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i32 %a) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i32 %a) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i32 %a) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-SelectCC.ll b/test/CodeGen/SystemZ/07-SelectCC.ll
deleted file mode 100644
index aa4b36e..0000000
--- a/test/CodeGen/SystemZ/07-SelectCC.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s | grep clgr
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
-	%cond = select i1 %cmp, i64 %a, i64 %b		; <i64> [#uses=1]
-	ret i64 %cond
-}
diff --git a/test/CodeGen/SystemZ/08-DivRem.ll b/test/CodeGen/SystemZ/08-DivRem.ll
deleted file mode 100644
index ff1e441..0000000
--- a/test/CodeGen/SystemZ/08-DivRem.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s | grep dsgr  | count 2
-; RUN: llc < %s | grep dsgfr | count 2
-; RUN: llc < %s | grep dlr   | count 2
-; RUN: llc < %s | grep dlgr  | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @div(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%div = sdiv i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %div
-}
-
-define i32 @div1(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%div = sdiv i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %div
-}
-
-define i64 @div2(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%div = udiv i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %div
-}
-
-define i32 @div3(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%div = udiv i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %div
-}
-
-define i64 @rem(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%rem = srem i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %rem
-}
-
-define i32 @rem1(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%rem = srem i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %rem
-}
-
-define i64 @rem2(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%rem = urem i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %rem
-}
-
-define i32 @rem3(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%rem = urem i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %rem
-}
diff --git a/test/CodeGen/SystemZ/08-DivRemMemOp.ll b/test/CodeGen/SystemZ/08-DivRemMemOp.ll
deleted file mode 100644
index d6ec0e7..0000000
--- a/test/CodeGen/SystemZ/08-DivRemMemOp.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: llc < %s | grep {dsgf.%} | count 2
-; RUN: llc < %s | grep {dsg.%}  | count 2
-; RUN: llc < %s | grep {dl.%}   | count 2
-; RUN: llc < %s | grep dlg      | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @div(i64 %a, i64* %b) nounwind readnone {
-entry:
-	%b1 = load i64* %b
-	%div = sdiv i64 %a, %b1
-	ret i64 %div
-}
-
-define i64 @div1(i64 %a, i64* %b) nounwind readnone {
-entry:
-        %b1 = load i64* %b
-        %div = udiv i64 %a, %b1
-        ret i64 %div
-}
-
-define i64 @rem(i64 %a, i64* %b) nounwind readnone {
-entry:
-        %b1 = load i64* %b
-        %div = srem i64 %a, %b1
-        ret i64 %div
-}
-
-define i64 @rem1(i64 %a, i64* %b) nounwind readnone {
-entry:
-        %b1 = load i64* %b
-        %div = urem i64 %a, %b1
-        ret i64 %div
-}
-
-define i32 @div2(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = sdiv i32 %a, %b1
-        ret i32 %div
-}
-
-define i32 @div3(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = udiv i32 %a, %b1
-        ret i32 %div
-}
-
-define i32 @rem2(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = srem i32 %a, %b1
-        ret i32 %div
-}
-
-define i32 @rem3(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = urem i32 %a, %b1
-        ret i32 %div
-}
-
diff --git a/test/CodeGen/SystemZ/08-SimpleMuls.ll b/test/CodeGen/SystemZ/08-SimpleMuls.ll
deleted file mode 100644
index 1ab88d6..0000000
--- a/test/CodeGen/SystemZ/08-SimpleMuls.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s | grep msgr | count 2
-; RUN: llc < %s | grep msr  | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%mul = mul i64 %b, %a		; <i64> [#uses=1]
-	ret i64 %mul
-}
-
-define i64 @foo2(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%mul = mul i64 %b, %a		; <i64> [#uses=1]
-	ret i64 %mul
-}
-
-define i32 @foo3(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%mul = mul i32 %b, %a		; <i32> [#uses=1]
-	ret i32 %mul
-}
-
-define i32 @foo4(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%mul = mul i32 %b, %a		; <i32> [#uses=1]
-	ret i32 %mul
-}
diff --git a/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
deleted file mode 100644
index 30810ce..0000000
--- a/test/CodeGen/SystemZ/09-DynamicAlloca.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @foo(i64 %N) nounwind {
-entry:
-	%N3 = trunc i64 %N to i32		; <i32> [#uses=1]
-	%vla = alloca i8, i32 %N3, align 2		; <i8*> [#uses=1]
-	call void @bar(i8* %vla) nounwind
-	ret void
-}
-
-declare void @bar(i8*)
diff --git a/test/CodeGen/SystemZ/09-Globals.ll b/test/CodeGen/SystemZ/09-Globals.ll
deleted file mode 100644
index 50a26e2..0000000
--- a/test/CodeGen/SystemZ/09-Globals.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s | grep larl | count 3
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-ibm-linux"
-@bar = common global i64 0, align 8		; <i64*> [#uses=3]
-
-define i64 @foo() nounwind readonly {
-entry:
-	%tmp = load i64* @bar		; <i64> [#uses=1]
-	ret i64 %tmp
-}
-
-define i64* @foo2() nounwind readnone {
-entry:
-	ret i64* @bar
-}
-
-define i64* @foo3(i64 %idx) nounwind readnone {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* @bar, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	ret i64* %add.ptr2
-}
diff --git a/test/CodeGen/SystemZ/09-Switches.ll b/test/CodeGen/SystemZ/09-Switches.ll
deleted file mode 100644
index 32aaa62..0000000
--- a/test/CodeGen/SystemZ/09-Switches.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: llc < %s -march=systemz | grep larl
-
-define i32 @main(i32 %tmp158) {
-entry:
-        switch i32 %tmp158, label %bb336 [
-		 i32 -2147483648, label %bb338
-		 i32 -2147483647, label %bb338
-		 i32 -2147483646, label %bb338
-		 i32 120, label %bb338
-		 i32 121, label %bb339
-		 i32 122, label %bb340
-                 i32 123, label %bb341
-                 i32 124, label %bb342
-                 i32 125, label %bb343
-                 i32 126, label %bb336
-		 i32 1024, label %bb338
-                 i32 0, label %bb338
-                 i32 1, label %bb338
-                 i32 2, label %bb338
-                 i32 3, label %bb338
-                 i32 4, label %bb338
-		 i32 5, label %bb338
-        ]
-bb336:
-  ret i32 10
-bb338:
-  ret i32 11
-bb339:
-  ret i32 12
-bb340:
-  ret i32 13
-bb341:
-  ret i32 14
-bb342:
-  ret i32 15
-bb343:
-  ret i32 18
-
-}
diff --git a/test/CodeGen/SystemZ/10-FuncsPic.ll b/test/CodeGen/SystemZ/10-FuncsPic.ll
deleted file mode 100644
index f291e5f..0000000
--- a/test/CodeGen/SystemZ/10-FuncsPic.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 3
-; RUN: llc < %s -relocation-model=pic | grep PLT | count 1
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-@ptr = external global void (...)*		; <void (...)**> [#uses=2]
-
-define void @foo1() nounwind {
-entry:
-	store void (...)* @func, void (...)** @ptr
-	ret void
-}
-
-declare void @func(...)
-
-define void @foo2() nounwind {
-entry:
-	tail call void (...)* @func() nounwind
-	ret void
-}
-
-define void @foo3() nounwind {
-entry:
-	%tmp = load void (...)** @ptr		; <void (...)*> [#uses=1]
-	tail call void (...)* %tmp() nounwind
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/10-GlobalsPic.ll b/test/CodeGen/SystemZ/10-GlobalsPic.ll
deleted file mode 100644
index c581ad9..0000000
--- a/test/CodeGen/SystemZ/10-GlobalsPic.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-@src = external global i32		; <i32*> [#uses=2]
-@dst = external global i32		; <i32*> [#uses=2]
-@ptr = external global i32*		; <i32**> [#uses=2]
-
-define void @foo1() nounwind {
-entry:
-	%tmp = load i32* @src		; <i32> [#uses=1]
-	store i32 %tmp, i32* @dst
-	ret void
-}
-
-define void @foo2() nounwind {
-entry:
-	store i32* @dst, i32** @ptr
-	ret void
-}
-
-define void @foo3() nounwind {
-entry:
-	%tmp = load i32* @src		; <i32> [#uses=1]
-	%tmp1 = load i32** @ptr		; <i32*> [#uses=1]
-	%arrayidx = getelementptr i32* %tmp1, i64 1		; <i32*> [#uses=1]
-	store i32 %tmp, i32* %arrayidx
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll
deleted file mode 100644
index 1aa9c67..0000000
--- a/test/CodeGen/SystemZ/11-BSwap.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-
-define zeroext i16 @foo(i16 zeroext %a)  {
-	%res = tail call i16 @llvm.bswap.i16(i16 %a)
-	ret i16 %res
-}
-
-define zeroext i32 @foo2(i32 zeroext %a)  {
-; CHECK: foo2:
-; CHECK:  lrvr [[R1:%r.]], %r2
-        %res = tail call i32 @llvm.bswap.i32(i32 %a)
-        ret i32 %res
-}
-
-define zeroext i64 @foo3(i64 %a)  {
-; CHECK: foo3:
-; CHECK:  lrvgr %r2, %r2
-        %res = tail call i64 @llvm.bswap.i64(i64 %a)
-        ret i64 %res
-}
-
-define zeroext i16 @foo4(i16* %b)  {
-	%a = load i16* %b
-        %res = tail call i16 @llvm.bswap.i16(i16 %a)
-        ret i16 %res
-}
-
-define zeroext i32 @foo5(i32* %b)  {
-; CHECK: foo5:
-; CHECK:  lrv [[R1:%r.]], 0(%r2)
-	%a = load i32* %b
-        %res = tail call i32 @llvm.bswap.i32(i32 %a)
-        ret i32 %res
-}
-
-define i64 @foo6(i64* %b) {
-; CHECK: foo6:
-; CHECK:  lrvg %r2, 0(%r2)
-	%a = load i64* %b
-        %res = tail call i64 @llvm.bswap.i64(i64 %a)
-        ret i64 %res
-}
-
-define void @foo7(i16 %a, i16* %b) {
-        %res = tail call i16 @llvm.bswap.i16(i16 %a)
-        store i16 %res, i16* %b
-        ret void
-}
-
-define void @foo8(i32 %a, i32* %b) {
-; CHECK: foo8:
-; CHECK:  strv %r2, 0(%r3)
-        %res = tail call i32 @llvm.bswap.i32(i32 %a)
-        store i32 %res, i32* %b
-        ret void
-}
-
-define void @foo9(i64 %a, i64* %b) {
-; CHECK: foo9:
-; CHECK:  strvg %r2, 0(%r3)
-        %res = tail call i64 @llvm.bswap.i64(i64 %a)
-        store i64 %res, i64* %b
-        ret void
-}
-
-declare i16 @llvm.bswap.i16(i16) nounwind readnone
-declare i32 @llvm.bswap.i32(i32) nounwind readnone
-declare i64 @llvm.bswap.i64(i64) nounwind readnone
-
diff --git a/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll b/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
deleted file mode 100644
index 65f8e14..0000000
--- a/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i32 @main() nounwind {
-entry:
-	%call = call i32 (...)* @random() nounwind		; <i32> [#uses=0]
-	unreachable
-}
-
-declare i32 @random(...)
diff --git a/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll b/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
deleted file mode 100644
index 3cfa97d..0000000
--- a/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=systemz | grep nilf | count 1
-; RUN: llc < %s -march=systemz | grep nill | count 1
-
-define i32 @gnu_dev_major(i64 %__dev) nounwind readnone {
-entry:
-        %shr = lshr i64 %__dev, 8               ; <i64> [#uses=1]
-        %shr8 = trunc i64 %shr to i32           ; <i32> [#uses=1]
-        %shr2 = lshr i64 %__dev, 32             ; <i64> [#uses=1]
-        %conv = trunc i64 %shr2 to i32          ; <i32> [#uses=1]
-        %and3 = and i32 %conv, -4096            ; <i32> [#uses=1]
-        %and6 = and i32 %shr8, 4095             ; <i32> [#uses=1]
-        %conv5 = or i32 %and6, %and3            ; <i32> [#uses=1]
-        ret i32 %conv5
-}
diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
deleted file mode 100644
index 54424e1..0000000
--- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=systemz | grep rll
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
-entry:
-	%shl = shl i32 %x, 1		; <i32> [#uses=1]
-	%sub = sub i32 32, 1		; <i32> [#uses=1]
-	%shr = lshr i32 %x, %sub		; <i32> [#uses=1]
-	%or = or i32 %shr, %shl		; <i32> [#uses=1]
-	ret i32 %or
-}
diff --git a/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll b/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
deleted file mode 100644
index 5f6ec50d..0000000
--- a/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-ibm-linux"
-	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i8*, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
-	%struct.re_registers = type <{ i32, i8, i8, i8, i8, i32*, i32* }>
-
-define i32 @xre_search_2(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %startpos, i32 %range, %struct.re_registers* %regs, i32 %stop) nounwind {
-entry:
-	%cmp17.i = icmp slt i32 undef, %startpos		; <i1> [#uses=1]
-	%or.cond.i = or i1 undef, %cmp17.i		; <i1> [#uses=1]
-	br i1 %or.cond.i, label %byte_re_search_2.exit, label %if.then20.i
-
-if.then20.i:		; preds = %entry
-	ret i32 -2
-
-byte_re_search_2.exit:		; preds = %entry
-	ret i32 -1
-}
diff --git a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
deleted file mode 100644
index 89b2225..0000000
--- a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind {
-entry:
-	br label %for.body38
-
-for.body38:		; preds = %for.body38, %entry
-	br i1 undef, label %for.cond220, label %for.body38
-
-for.cond220:		; preds = %for.cond220, %for.body38
-	br i1 false, label %for.cond220, label %for.end297
-
-for.end297:		; preds = %for.cond220
-	%tmp334 = load i8* undef		; <i8> [#uses=1]
-	%conv343 = zext i8 %tmp334 to i32		; <i32> [#uses=1]
-	%sub344 = add i32 %conv343, -1		; <i32> [#uses=1]
-	%shl345 = shl i32 1, %sub344		; <i32> [#uses=1]
-	%conv346 = sext i32 %shl345 to i64		; <i64> [#uses=1]
-	br label %for.body356
-
-for.body356:		; preds = %for.body356, %for.end297
-	%mask.1633 = phi i64 [ %conv346, %for.end297 ], [ undef, %for.body356 ]		; <i64> [#uses=0]
-	br label %for.body356
-}
diff --git a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
deleted file mode 100644
index 68ccb84..0000000
--- a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind {
-entry:
-	br i1 undef, label %for.body, label %return
-
-for.body:		; preds = %entry
-	%add = add i32 0, %col		; <i32> [#uses=1]
-	%sh_prom = zext i32 %add to i64		; <i64> [#uses=1]
-	%shl = shl i64 1, %sh_prom		; <i64> [#uses=1]
-	br i1 undef, label %if.then13, label %if.else
-
-if.then13:		; preds = %for.body
-	ret i32 0
-
-if.else:		; preds = %for.body
-	%or34 = or i64 undef, %shl		; <i64> [#uses=0]
-	ret i32 0
-
-return:		; preds = %entry
-	ret i32 1
-}
diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
deleted file mode 100644
index 92f5467..0000000
--- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; RUN: llc < %s -regalloc=basic | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind
-
-declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind
-
-define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind {
-; CHECK: lg %r{{[0-9]+}}, 328(%r15)
-
-newFuncRoot:
-	br label %bb3
-
-bb4.exitStub:		; preds = %bb3
-	store double %call, double* %call.out
-	ret void
-
-bb3:		; preds = %newFuncRoot
-	tail call void @rdft(i32 signext %nfft, i32 signext -1, double* %arrayidx44.reload, i32* %ip, double* %w) nounwind
-	%call = tail call double @mp_mul_d2i_test(i32 signext %radix, i32 signext %nfft, double* %tmpfft)		; <double> [#uses=1]
-	br label %bb4.exitStub
-}
diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
deleted file mode 100644
index f4e176e..0000000
--- a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define float @foo(i32 signext %a) {
-entry:
-    %b = bitcast i32 %a to float
-    ret float %b
-}
-
-define i32 @bar(float %a) {
-entry:
-    %b = bitcast float %a to i32
-    ret i32 %b
-}
diff --git a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
deleted file mode 100644
index 63fd855..0000000
--- a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define signext i32 @dfg_parse() nounwind {
-entry:
-	br i1 undef, label %if.then2208, label %if.else2360
-
-if.then2208:		; preds = %entry
-	br i1 undef, label %bb.nph3189, label %for.end2270
-
-bb.nph3189:		; preds = %if.then2208
-	unreachable
-
-for.end2270:		; preds = %if.then2208
-	%call2279 = call i64 @strlen(i8* undef) nounwind		; <i64> [#uses=1]
-	%add2281 = add i64 0, %call2279		; <i64> [#uses=1]
-	%tmp2283 = trunc i64 %add2281 to i32		; <i32> [#uses=1]
-	%tmp2284 = alloca i8, i32 %tmp2283, align 2		; <i8*> [#uses=1]
-	%yyd.0.i2561.13 = getelementptr i8* %tmp2284, i64 13		; <i8*> [#uses=1]
-	store i8 117, i8* %yyd.0.i2561.13
-	br label %while.cond.i2558
-
-while.cond.i2558:		; preds = %while.cond.i2558, %for.end2270
-	br label %while.cond.i2558
-
-if.else2360:		; preds = %entry
-	unreachable
-}
-
-declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll b/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
deleted file mode 100644
index f7686f1..0000000
--- a/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
-target triple = "s390x-ibm-linux-gnu"
-
-@__JCR_LIST__ = internal global [0 x i8*] zeroinitializer, section ".jcr", align 8 ; <[0 x i8*]*> [#uses=1]
-
-define internal void @frame_dummy() nounwind {
-entry:
-  %asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind ; <void (i8*)*> [#uses=2]
-  %0 = icmp eq void (i8*)* %asmtmp, null          ; <i1> [#uses=1]
-  br i1 %0, label %return, label %bb3
-
-bb3:                                              ; preds = %entry
-  tail call void %asmtmp(i8* bitcast ([0 x i8*]* @__JCR_LIST__ to i8*)) nounwind
-  ret void
-
-return:                                           ; preds = %entry
-  ret void
-}
-
-declare extern_weak void @_Jv_RegisterClasses(i8*)
diff --git a/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll b/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
deleted file mode 100644
index fde7d9d..0000000
--- a/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
-target triple = "s390x-ibm-linux-gnu"
-
-define double @foo(double %a, double %b) nounwind {
-entry:
-; CHECK: cpsdr %f0, %f2, %f0
-  %0 = tail call double @copysign(double %a, double %b) nounwind readnone
-  ret double %0
-}
-
-define float @bar(float %a, float %b) nounwind {
-entry:
-; CHECK: cpsdr %f0, %f2, %f0
-  %0 = tail call float @copysignf(float %a, float %b) nounwind readnone
-  ret float %0
-}
-
-
-declare double @copysign(double, double) nounwind readnone
-declare float @copysignf(float, float) nounwind readnone
diff --git a/test/CodeGen/SystemZ/2010-01-04-DivMem.ll b/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
deleted file mode 100644
index d730bec..0000000
--- a/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16-n32:64"
-target triple = "s390x-elf"
-
-@REGISTER = external global [10 x i32]            ; <[10 x i32]*> [#uses=2]
-
-define void @DIVR_P(i32 signext %PRINT_EFFECT) nounwind {
-entry:
-  %REG1 = alloca i32, align 4                     ; <i32*> [#uses=2]
-  %REG2 = alloca i32, align 4                     ; <i32*> [#uses=2]
-  %call = call signext i32 (...)* @FORMAT2(i32* %REG1, i32* %REG2) nounwind ; <i32> [#uses=0]
-  %tmp = load i32* %REG1                          ; <i32> [#uses=1]
-  %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
-  %arrayidx = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom ; <i32*> [#uses=2]
-  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=2]
-  %tmp2 = load i32* %REG2                         ; <i32> [#uses=1]
-  %idxprom3 = sext i32 %tmp2 to i64               ; <i64> [#uses=1]
-  %arrayidx4 = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom3 ; <i32*> [#uses=3]
-  %tmp5 = load i32* %arrayidx4                    ; <i32> [#uses=3]
-  %cmp6 = icmp sgt i32 %tmp5, 8388607             ; <i1> [#uses=1]
-  %REG2_SIGN.0 = select i1 %cmp6, i32 -1, i32 1   ; <i32> [#uses=2]
-  %cmp10 = icmp eq i32 %REG2_SIGN.0, 1            ; <i1> [#uses=1]
-  %not.cmp = icmp slt i32 %tmp1, 8388608          ; <i1> [#uses=2]
-  %or.cond = and i1 %cmp10, %not.cmp              ; <i1> [#uses=1]
-  br i1 %or.cond, label %if.then13, label %if.end25
-
-if.then13:                                        ; preds = %entry
-  %div = sdiv i32 %tmp5, %tmp1                    ; <i32> [#uses=2]
-  store i32 %div, i32* %arrayidx4
-  br label %if.end25
-
-if.end25:                                         ; preds = %if.then13, %entry
-  %tmp35 = phi i32 [ %div, %if.then13 ], [ %tmp5, %entry ] ; <i32> [#uses=1]
-  %cmp27 = icmp eq i32 %REG2_SIGN.0, -1           ; <i1> [#uses=1]
-  %or.cond46 = and i1 %cmp27, %not.cmp            ; <i1> [#uses=1]
-  br i1 %or.cond46, label %if.then31, label %if.end45
-
-if.then31:                                        ; preds = %if.end25
-  %sub = sub i32 16777216, %tmp35                 ; <i32> [#uses=1]
-  %tmp39 = load i32* %arrayidx                    ; <i32> [#uses=1]
-  %div40 = udiv i32 %sub, %tmp39                  ; <i32> [#uses=1]
-  %sub41 = sub i32 16777216, %div40               ; <i32> [#uses=1]
-  store i32 %sub41, i32* %arrayidx4
-  ret void
-
-if.end45:                                         ; preds = %if.end25
-  ret void
-}
-
-declare signext i32 @FORMAT2(...)
diff --git a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index c2877ac..0000000
--- a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/SystemZ/dg.exp b/test/CodeGen/SystemZ/dg.exp
deleted file mode 100644
index e9624ba..0000000
--- a/test/CodeGen/SystemZ/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target SystemZ] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
index 2890c22..ed55bb5 100644
--- a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
+++ b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
@@ -1,11 +1,7 @@
-; DISABLED: llc -mtriple=thumbv6-apple-darwin < %s
-; RUN: false
+; RUN: llc -mtriple=thumbv6-apple-darwin < %s
 ; rdar://problem/9416774
 ; ModuleID = 'reduced.ll'
 
-; byval is currently unsupported.
-; XFAIL: *
-
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-ios"
 
diff --git a/test/CodeGen/Thumb/dg.exp b/test/CodeGen/Thumb/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/Thumb/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index fbacaba..f8c438c 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s
 
 define void @test1() {
 ; CHECK: test1:
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
new file mode 100644
index 0000000..cb77b09
--- /dev/null
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index c2ba208..50a1a07 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -13,9 +13,9 @@ entry:
 
 bb:             ; preds = %bb, %entry
         %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp5, %bb ]              ; <i32> [#uses=2]
-        %tmp = volatile load i8** %va           ; <i8*> [#uses=2]
+        %tmp = load volatile i8** %va           ; <i8*> [#uses=2]
         %tmp2 = getelementptr i8* %tmp, i32 4           ; <i8*> [#uses=1]
-        volatile store i8* %tmp2, i8** %va
+        store volatile i8* %tmp2, i8** %va
         %tmp5 = add i32 %a_addr.0, -1           ; <i32> [#uses=1]
         %tmp.upgrd.2 = icmp eq i32 %a_addr.0, 1         ; <i1> [#uses=1]
         br i1 %tmp.upgrd.2, label %bb7, label %bb
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
index 4e1394f..4616dcf 100644
--- a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+vfp2,+thumb2 | FileCheck %s
 ; rdar://7076238
 
 @"\01LC" = external constant [36 x i8], align 1		; <[36 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
index 034a28f..524e5a6 100644
--- a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
+++ b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
@@ -5,7 +5,7 @@ define void @fred(i32 %three_by_three, i8* %in, double %dt1, i32 %x_size, i32 %y
 entry:
 ; -- The loop following the load should only use a single add-literation
 ;    instruction.
-; CHECK: ldr.64
+; CHECK: vldr
 ; CHECK: adds r{{[0-9]+.*}}#1
 ; CHECK-NOT: adds
 ; CHECK: subsections_via_symbols
diff --git a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
index bb734ac..fcf1bae 100644
--- a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -21,7 +21,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
 entry:
   %tmp1 = getelementptr inbounds %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
-  volatile store i32 1, i32* %tmp1, align 4
+  store volatile i32 1, i32* %tmp1, align 4
   %tmp12 = getelementptr inbounds %s1* %this, i32 0, i32 1
   store i32 %levels, i32* %tmp12, align 4
   %tmp13 = getelementptr inbounds %s1* %this, i32 0, i32 3
@@ -46,7 +46,7 @@ entry:
   %tmp24 = shl i32 %flags.0, 16
   %asmtmp.i.i.i = tail call %0 asm sideeffect "\0A0:\09ldrex $1, [$2]\0A\09orr $1, $1, $3\0A\09strex $0, $1, [$2]\0A\09cmp $0, #0\0A\09bne 0b", "=&r,=&r,r,r,~{memory},~{cc}"(i32* %tmp1, i32 %tmp24) nounwind
   %tmp25 = getelementptr inbounds %s1* %this, i32 0, i32 2, i32 0, i32 0
-  volatile store i32 1, i32* %tmp25, align 4
+  store volatile i32 1, i32* %tmp25, align 4
   %tmp26 = icmp eq i32 %levels, 0
   br i1 %tmp26, label %return, label %bb4
 
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
index 01fb0a5..06762ba 100644
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -23,7 +23,7 @@ entry:
   %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
 ; Constant pool load followed by add.
 ; Then clobber the loaded register, not the sum.
-; CHECK: vldr.64 [[LDR:d.*]],
+; CHECK: vldr [[LDR:d.*]],
 ; CHECK: LPC0_0:
 ; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
 ; CHECK-NOT: vmov.f64 [[ADD]]
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
index d2140a1..5cb266b 100644
--- a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -1,5 +1,5 @@
 ; rdar://8465407
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
 %struct.buf = type opaque
 
diff --git a/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
new file mode 100644
index 0000000..dadbdc5
--- /dev/null
+++ b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 
+
+%struct.LIST_NODE.0.16 = type { %struct.LIST_NODE.0.16*, i8* }
+
+define %struct.LIST_NODE.0.16* @list_AssocListPair(%struct.LIST_NODE.0.16* %List, i8* %Key) nounwind readonly {
+entry:
+  br label %bb3
+
+bb:                                               ; preds = %bb3
+  %Scan.0.idx7.val = load i8** undef, align 4
+  %.idx = getelementptr i8* %Scan.0.idx7.val, i32 4
+  %0 = bitcast i8* %.idx to i8**
+  %.idx.val = load i8** %0, align 4
+  %1 = icmp eq i8* %.idx.val, %Key
+  br i1 %1, label %bb5, label %bb2
+
+bb2:                                              ; preds = %bb
+  %Scan.0.idx8.val = load %struct.LIST_NODE.0.16** undef, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %entry
+  %Scan.0 = phi %struct.LIST_NODE.0.16* [ %List, %entry ], [ %Scan.0.idx8.val, %bb2 ]
+  %2 = icmp eq %struct.LIST_NODE.0.16* %Scan.0, null
+  br i1 %2, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb3, %bb
+  ret %struct.LIST_NODE.0.16* null
+}
diff --git a/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
new file mode 100644
index 0000000..4acdd9e
--- /dev/null
+++ b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; rdar://10676853
+
+%struct.Dict_node_struct = type { i8*, %struct.Word_file_struct*, %struct.Exp_struct*, %struct.Dict_node_struct*, %struct.Dict_node_struct* }
+%struct.Word_file_struct = type { [60 x i8], i32, %struct.Word_file_struct* }
+%struct.Exp_struct = type { i8, i8, i8, i8, %union.anon }
+%union.anon = type { %struct.E_list_struct* }
+%struct.E_list_struct = type { %struct.E_list_struct*, %struct.Exp_struct* }
+
+@lookup_list = external hidden unnamed_addr global %struct.Dict_node_struct*, align 4
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define hidden fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %dn, i8* nocapture %s) nounwind ssp {
+; CHECK: rdictionary_lookup:
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then10, %entry
+  %dn.tr = phi %struct.Dict_node_struct* [ %dn, %entry ], [ %9, %if.then10 ]
+  %cmp = icmp eq %struct.Dict_node_struct* %dn.tr, null
+  br i1 %cmp, label %if.end11, label %if.end
+
+if.end:                                           ; preds = %tailrecurse
+  %string = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 0
+  %0 = load i8** %string, align 4
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.body.i, %if.end
+  %1 = phi i8* [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ]
+  %storemerge.i = phi i8* [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ]
+  %2 = load i8* %1, align 1
+  %cmp.i = icmp eq i8 %2, 0
+  %.pre.i = load i8* %storemerge.i, align 1
+  br i1 %cmp.i, label %lor.lhs.false.i, label %land.end.i
+
+land.end.i:                                       ; preds = %while.cond.i
+  %cmp4.i = icmp eq i8 %2, %.pre.i
+  br i1 %cmp4.i, label %while.body.i, label %while.end.i
+
+while.body.i:                                     ; preds = %land.end.i
+  %incdec.ptr.i = getelementptr inbounds i8* %1, i32 1
+  %incdec.ptr6.i = getelementptr inbounds i8* %storemerge.i, i32 1
+  br label %while.cond.i
+
+while.end.i:                                      ; preds = %land.end.i
+  %cmp8.i = icmp eq i8 %2, 42
+  br i1 %cmp8.i, label %if.end3, label %lor.lhs.false.i
+
+lor.lhs.false.i:                                  ; preds = %while.end.i, %while.cond.i
+  %3 = phi i8 [ %2, %while.end.i ], [ 0, %while.cond.i ]
+  %cmp11.i = icmp eq i8 %.pre.i, 42
+  br i1 %cmp11.i, label %if.end3, label %dict_match.exit
+
+dict_match.exit:                                  ; preds = %lor.lhs.false.i
+  %cmp14.i = icmp eq i8 %3, 46
+  %conv16.i = sext i8 %3 to i32
+  %.conv16.i = select i1 %cmp14.i, i32 0, i32 %conv16.i
+  %cmp18.i = icmp eq i8 %.pre.i, 46
+  %conv22.i = sext i8 %.pre.i to i32
+  %cond24.i = select i1 %cmp18.i, i32 0, i32 %conv22.i
+  %sub.i = sub nsw i32 %.conv16.i, %cond24.i
+  %cmp1 = icmp sgt i32 %sub.i, -1
+  br i1 %cmp1, label %if.end3, label %if.then10
+
+if.end3:                                          ; preds = %dict_match.exit, %lor.lhs.false.i, %while.end.i
+; CHECK: %if.end3
+; CHECK: cmp
+; CHECK-NOT: cbnz
+  %storemerge1.i3 = phi i32 [ %sub.i, %dict_match.exit ], [ 0, %lor.lhs.false.i ], [ 0, %while.end.i ]
+  %right = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 4
+  %4 = load %struct.Dict_node_struct** %right, align 4
+  tail call fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %4, i8* %s)
+  %cmp4 = icmp eq i32 %storemerge1.i3, 0
+  br i1 %cmp4, label %if.then5, label %if.end8
+
+if.then5:                                         ; preds = %if.end3
+  %call6 = tail call fastcc i8* @xalloc(i32 20)
+  %5 = bitcast i8* %call6 to %struct.Dict_node_struct*
+  %6 = bitcast %struct.Dict_node_struct* %dn.tr to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call6, i8* %6, i32 16, i32 4, i1 false)
+  %7 = load %struct.Dict_node_struct** @lookup_list, align 4
+  %right7 = getelementptr inbounds i8* %call6, i32 16
+  %8 = bitcast i8* %right7 to %struct.Dict_node_struct**
+  store %struct.Dict_node_struct* %7, %struct.Dict_node_struct** %8, align 4
+  store %struct.Dict_node_struct* %5, %struct.Dict_node_struct** @lookup_list, align 4
+  br label %if.then10
+
+if.end8:                                          ; preds = %if.end3
+  %cmp9 = icmp slt i32 %storemerge1.i3, 1
+  br i1 %cmp9, label %if.then10, label %if.end11
+
+if.then10:                                        ; preds = %if.end8, %if.then5, %dict_match.exit
+  %left = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 3
+  %9 = load %struct.Dict_node_struct** %left, align 4
+  br label %tailrecurse
+
+if.end11:                                         ; preds = %if.end8, %tailrecurse
+  ret void
+}
+
+; Materializable
+declare hidden fastcc i8* @xalloc(i32) nounwind ssp
diff --git a/test/CodeGen/Thumb2/aligned-constants.ll b/test/CodeGen/Thumb2/aligned-constants.ll
new file mode 100644
index 0000000..16b3a19
--- /dev/null
+++ b/test/CodeGen/Thumb2/aligned-constants.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; The double in the constant pool is 8-byte aligned, forcing the function
+; alignment.
+; CHECK: .align 3
+; CHECK: func
+;
+; Constant pool with 8-byte entry before 4-byte entry:
+; CHECK: .align 3
+; CHECK: LCPI
+; CHECK:	.long	2370821947
+; CHECK:	.long	1080815255
+; CHECK: LCPI
+; CHECK:	.long	1123477881
+define void @func(float* nocapture %x, double* nocapture %y) nounwind ssp {
+entry:
+  %0 = load float* %x, align 4
+  %add = fadd float %0, 0x405EDD2F20000000
+  store float %add, float* %x, align 4
+  %1 = load double* %y, align 4
+  %add1 = fadd double %1, 2.234560e+02
+  store double %add1, double* %y, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
new file mode 100644
index 0000000..c98ca80
--- /dev/null
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=0 | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=1 | FileCheck %s --check-prefix=NEON
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; CHECK: f
+; This function is forced to spill a double.
+; Verify that the spill slot is properly aligned.
+;
+; The caller-saved r4 is used as a scratch register for stack realignment.
+; CHECK: push {r4, r7, lr}
+; CHECK: bic r4, r4, #7
+; CHECK: mov sp, r4
+define void @f(double* nocapture %p) nounwind ssp {
+entry:
+  %0 = load double* %p, align 4
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  tail call void @g() nounwind
+  store double %0, double* %p, align 4
+  ret void
+}
+
+; NEON: f
+; NEON: push {r4, r7, lr}
+; NEON: sub.w r4, sp, #64
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
+; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128]
+; Stack pointer adjustment for the stack frame contents.
+; This could legally happen before the spills.
+; Since the spill slot is only 8 bytes, technically it would be fine to only
+; subtract #8 here. That would leave sp less aligned than some stack slots,
+; and would probably blow MFI's mind.
+; NEON: sub sp, #16
+; The epilog is free to use another scratch register than r4.
+; NEON: add r[[R4:[0-9]+]], sp, #16
+; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]!
+; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128]
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: pop
+
+declare void @g()
+
+; Spill 7 d-registers.
+define void @f7(double* nocapture %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind
+  ret void
+}
+
+; NEON: f7
+; NEON: push {r4, r7, lr}
+; NEON: sub.w r4, sp, #56
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
+; NEON: vst1.64 {d12, d13}, [r4, :128]
+; NEON: vstr d14, [r4, #16]
+; Epilog
+; NEON: vld1.64 {d8, d9, d10, d11},
+; NEON: vld1.64 {d12, d13},
+; NEON: vldr d14,
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: pop
+
+; Spill 7 d-registers, leave a hole.
+define void @f3plus4(double* nocapture %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  ret void
+}
+
+; Aligned spilling only works for contiguous ranges starting from d8.
+; The rest goes to the standard vpush instructions.
+; NEON: f3plus4
+; NEON: push {r4, r7, lr}
+; NEON: vpush {d12, d13, d14, d15}
+; NEON: sub.w r4, sp, #24
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9}, [r4, :128]
+; NEON: vstr d10, [r4, #16]
+; Epilog
+; NEON: vld1.64 {d8, d9},
+; NEON: vldr d10, [{{.*}}, #16]
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: vpop {d12, d13, d14, d15}
+; NEON: pop
diff --git a/test/CodeGen/Thumb2/constant-islands.ll b/test/CodeGen/Thumb2/constant-islands.ll
new file mode 100644
index 0000000..19d2385
--- /dev/null
+++ b/test/CodeGen/Thumb2/constant-islands.ll
@@ -0,0 +1,1400 @@
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; This function comes from the Bullet test.  It is quite big, and exercises the
+; constant island pass a bit.  It has caused failures, including
+; <rdar://problem/10670199>
+;
+; It is unlikely that this code will continue to create the exact conditions
+; that broke the arm constant island pass in the past, but it is still useful to
+; force the pass to split basic blocks etc.
+;
+; The run lines above force the integrated assembler to be enabled so it can
+; catch any illegal displacements.  Other than that, we depend on the constant
+; island pass assertions.
+
+%class.btVector3 = type { [4 x float] }
+%class.btTransform = type { %class.btMatrix3x3, %class.btVector3 }
+%class.btMatrix3x3 = type { [3 x %class.btVector3] }
+%class.btCapsuleShape = type { %class.btConvexInternalShape, i32 }
+%class.btConvexInternalShape = type { %class.btConvexShape, %class.btVector3, %class.btVector3, float, float }
+%class.btConvexShape = type { %class.btCollisionShape }
+%class.btCollisionShape = type { i32 (...)**, i32, i8* }
+%class.RagDoll = type { i32 (...)**, %class.btDynamicsWorld*, [11 x %class.btCollisionShape*], [11 x %class.btRigidBody*], [10 x %class.btTypedConstraint*] }
+%class.btDynamicsWorld = type { %class.btCollisionWorld, void (%class.btDynamicsWorld*, float)*, void (%class.btDynamicsWorld*, float)*, i8*, %struct.btContactSolverInfo }
+%class.btCollisionWorld = type { i32 (...)**, %class.btAlignedObjectArray, %class.btDispatcher*, %struct.btDispatcherInfo, %class.btStackAlloc*, %class.btBroadphaseInterface*, %class.btIDebugDraw*, i8 }
+%class.btAlignedObjectArray = type { %class.btAlignedAllocator, i32, i32, %class.btCollisionObject**, i8 }
+%class.btAlignedAllocator = type { i8 }
+%class.btCollisionObject = type { i32 (...)**, %class.btTransform, %class.btTransform, %class.btVector3, %class.btVector3, %class.btVector3, i8, float, %struct.btBroadphaseProxy*, %class.btCollisionShape*, %class.btCollisionShape*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] }
+%struct.btBroadphaseProxy = type { i8*, i16, i16, i8*, i32, %class.btVector3, %class.btVector3 }
+%class.btDispatcher = type { i32 (...)** }
+%struct.btDispatcherInfo = type { float, i32, i32, float, i8, %class.btIDebugDraw*, i8, i8, i8, float, i8, float, %class.btStackAlloc* }
+%class.btIDebugDraw = type { i32 (...)** }
+%class.btStackAlloc = type opaque
+%class.btBroadphaseInterface = type { i32 (...)** }
+%struct.btContactSolverInfo = type { %struct.btContactSolverInfoData }
+%struct.btContactSolverInfoData = type { float, float, float, float, float, i32, float, float, float, float, float, i32, float, float, float, i32, i32 }
+%class.btRigidBody = type { %class.btCollisionObject, %class.btMatrix3x3, %class.btVector3, %class.btVector3, float, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float, float, i8, float, float, float, float, float, float, %class.btMotionState*, %class.btAlignedObjectArray.22, i32, i32, i32 }
+%class.btMotionState = type { i32 (...)** }
+%class.btAlignedObjectArray.22 = type { %class.btAlignedAllocator.23, i32, i32, %class.btTypedConstraint**, i8 }
+%class.btAlignedAllocator.23 = type { i8 }
+%class.btTypedConstraint = type { i32 (...)**, %struct.btTypedObject, i32, i32, i8, %class.btRigidBody*, %class.btRigidBody*, float, float, %class.btVector3, %class.btVector3, %class.btVector3 }
+%struct.btTypedObject = type { i32 }
+%class.btHingeConstraint = type { %class.btTypedConstraint, [3 x %class.btJacobianEntry], [3 x %class.btJacobianEntry], %class.btTransform, %class.btTransform, float, float, float, float, float, float, float, float, float, float, float, float, float, i8, i8, i8, i8, i8, float }
+%class.btJacobianEntry = type { %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float }
+%class.btConeTwistConstraint = type { %class.btTypedConstraint, [3 x %class.btJacobianEntry], %class.btTransform, %class.btTransform, float, float, float, float, float, float, float, float, %class.btVector3, %class.btVector3, float, float, float, float, float, float, float, float, i8, i8, i8, i8, float, float, %class.btVector3, i8, i8, %class.btQuaternion, float, %class.btVector3 }
+%class.btQuaternion = type { %class.btQuadWord }
+%class.btQuadWord = type { [4 x float] }
+
+@_ZTV7RagDoll = external unnamed_addr constant [4 x i8*]
+
+declare noalias i8* @_Znwm(i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3*, float*, float*, float*) unnamed_addr inlinehint ssp align 2
+
+declare void @_ZSt9terminatev()
+
+declare %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform*) unnamed_addr ssp align 2
+
+declare void @_ZN11btTransform11setIdentityEv(%class.btTransform*) ssp align 2
+
+declare void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform*, %class.btVector3*) nounwind inlinehint ssp align 2
+
+declare i8* @_ZN13btConvexShapenwEm(i32) inlinehint ssp align 2
+
+declare void @_ZN13btConvexShapedlEPv(i8*) inlinehint ssp align 2
+
+declare %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape*, float, float)
+
+declare %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform*) nounwind inlinehint ssp align 2
+
+define %class.RagDoll* @_ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f(%class.RagDoll* %this, %class.btDynamicsWorld* %ownerWorld, %class.btVector3* %positionOffset, float %scale) unnamed_addr ssp align 2 {
+entry:
+  %retval = alloca %class.RagDoll*, align 4
+  %this.addr = alloca %class.RagDoll*, align 4
+  %ownerWorld.addr = alloca %class.btDynamicsWorld*, align 4
+  %positionOffset.addr = alloca %class.btVector3*, align 4
+  %scale.addr = alloca float, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %offset = alloca %class.btTransform, align 4
+  %transform = alloca %class.btTransform, align 4
+  %ref.tmp = alloca %class.btVector3, align 4
+  %ref.tmp97 = alloca %class.btVector3, align 4
+  %ref.tmp98 = alloca float, align 4
+  %ref.tmp99 = alloca float, align 4
+  %ref.tmp100 = alloca float, align 4
+  %ref.tmp102 = alloca %class.btTransform, align 4
+  %ref.tmp107 = alloca %class.btVector3, align 4
+  %ref.tmp108 = alloca %class.btVector3, align 4
+  %ref.tmp109 = alloca float, align 4
+  %ref.tmp110 = alloca float, align 4
+  %ref.tmp111 = alloca float, align 4
+  %ref.tmp113 = alloca %class.btTransform, align 4
+  %ref.tmp119 = alloca %class.btVector3, align 4
+  %ref.tmp120 = alloca %class.btVector3, align 4
+  %ref.tmp121 = alloca float, align 4
+  %ref.tmp122 = alloca float, align 4
+  %ref.tmp123 = alloca float, align 4
+  %ref.tmp125 = alloca %class.btTransform, align 4
+  %ref.tmp131 = alloca %class.btVector3, align 4
+  %ref.tmp132 = alloca %class.btVector3, align 4
+  %ref.tmp133 = alloca float, align 4
+  %ref.tmp134 = alloca float, align 4
+  %ref.tmp135 = alloca float, align 4
+  %ref.tmp137 = alloca %class.btTransform, align 4
+  %ref.tmp143 = alloca %class.btVector3, align 4
+  %ref.tmp144 = alloca %class.btVector3, align 4
+  %ref.tmp145 = alloca float, align 4
+  %ref.tmp146 = alloca float, align 4
+  %ref.tmp147 = alloca float, align 4
+  %ref.tmp149 = alloca %class.btTransform, align 4
+  %ref.tmp155 = alloca %class.btVector3, align 4
+  %ref.tmp156 = alloca %class.btVector3, align 4
+  %ref.tmp157 = alloca float, align 4
+  %ref.tmp158 = alloca float, align 4
+  %ref.tmp159 = alloca float, align 4
+  %ref.tmp161 = alloca %class.btTransform, align 4
+  %ref.tmp167 = alloca %class.btVector3, align 4
+  %ref.tmp168 = alloca %class.btVector3, align 4
+  %ref.tmp169 = alloca float, align 4
+  %ref.tmp170 = alloca float, align 4
+  %ref.tmp171 = alloca float, align 4
+  %ref.tmp173 = alloca %class.btTransform, align 4
+  %ref.tmp179 = alloca %class.btVector3, align 4
+  %ref.tmp180 = alloca %class.btVector3, align 4
+  %ref.tmp181 = alloca float, align 4
+  %ref.tmp182 = alloca float, align 4
+  %ref.tmp183 = alloca float, align 4
+  %ref.tmp186 = alloca %class.btTransform, align 4
+  %ref.tmp192 = alloca %class.btVector3, align 4
+  %ref.tmp193 = alloca %class.btVector3, align 4
+  %ref.tmp194 = alloca float, align 4
+  %ref.tmp195 = alloca float, align 4
+  %ref.tmp196 = alloca float, align 4
+  %ref.tmp199 = alloca %class.btTransform, align 4
+  %ref.tmp205 = alloca %class.btVector3, align 4
+  %ref.tmp206 = alloca %class.btVector3, align 4
+  %ref.tmp207 = alloca float, align 4
+  %ref.tmp208 = alloca float, align 4
+  %ref.tmp209 = alloca float, align 4
+  %ref.tmp212 = alloca %class.btTransform, align 4
+  %ref.tmp218 = alloca %class.btVector3, align 4
+  %ref.tmp219 = alloca %class.btVector3, align 4
+  %ref.tmp220 = alloca float, align 4
+  %ref.tmp221 = alloca float, align 4
+  %ref.tmp222 = alloca float, align 4
+  %ref.tmp225 = alloca %class.btTransform, align 4
+  %i = alloca i32, align 4
+  %hingeC = alloca %class.btHingeConstraint*, align 4
+  %coneC = alloca %class.btConeTwistConstraint*, align 4
+  %localA = alloca %class.btTransform, align 4
+  %localB = alloca %class.btTransform, align 4
+  %ref.tmp240 = alloca %class.btVector3, align 4
+  %ref.tmp241 = alloca %class.btVector3, align 4
+  %ref.tmp242 = alloca float, align 4
+  %ref.tmp243 = alloca float, align 4
+  %ref.tmp244 = alloca float, align 4
+  %ref.tmp247 = alloca %class.btVector3, align 4
+  %ref.tmp248 = alloca %class.btVector3, align 4
+  %ref.tmp249 = alloca float, align 4
+  %ref.tmp250 = alloca float, align 4
+  %ref.tmp251 = alloca float, align 4
+  %ref.tmp266 = alloca %class.btVector3, align 4
+  %ref.tmp267 = alloca %class.btVector3, align 4
+  %ref.tmp268 = alloca float, align 4
+  %ref.tmp269 = alloca float, align 4
+  %ref.tmp270 = alloca float, align 4
+  %ref.tmp273 = alloca %class.btVector3, align 4
+  %ref.tmp274 = alloca %class.btVector3, align 4
+  %ref.tmp275 = alloca float, align 4
+  %ref.tmp276 = alloca float, align 4
+  %ref.tmp277 = alloca float, align 4
+  %ref.tmp295 = alloca %class.btVector3, align 4
+  %ref.tmp296 = alloca %class.btVector3, align 4
+  %ref.tmp297 = alloca float, align 4
+  %ref.tmp298 = alloca float, align 4
+  %ref.tmp299 = alloca float, align 4
+  %ref.tmp302 = alloca %class.btVector3, align 4
+  %ref.tmp303 = alloca %class.btVector3, align 4
+  %ref.tmp304 = alloca float, align 4
+  %ref.tmp305 = alloca float, align 4
+  %ref.tmp306 = alloca float, align 4
+  %ref.tmp324 = alloca %class.btVector3, align 4
+  %ref.tmp325 = alloca %class.btVector3, align 4
+  %ref.tmp326 = alloca float, align 4
+  %ref.tmp327 = alloca float, align 4
+  %ref.tmp328 = alloca float, align 4
+  %ref.tmp331 = alloca %class.btVector3, align 4
+  %ref.tmp332 = alloca %class.btVector3, align 4
+  %ref.tmp333 = alloca float, align 4
+  %ref.tmp334 = alloca float, align 4
+  %ref.tmp335 = alloca float, align 4
+  %ref.tmp353 = alloca %class.btVector3, align 4
+  %ref.tmp354 = alloca %class.btVector3, align 4
+  %ref.tmp355 = alloca float, align 4
+  %ref.tmp356 = alloca float, align 4
+  %ref.tmp357 = alloca float, align 4
+  %ref.tmp360 = alloca %class.btVector3, align 4
+  %ref.tmp361 = alloca %class.btVector3, align 4
+  %ref.tmp362 = alloca float, align 4
+  %ref.tmp363 = alloca float, align 4
+  %ref.tmp364 = alloca float, align 4
+  %ref.tmp382 = alloca %class.btVector3, align 4
+  %ref.tmp383 = alloca %class.btVector3, align 4
+  %ref.tmp384 = alloca float, align 4
+  %ref.tmp385 = alloca float, align 4
+  %ref.tmp386 = alloca float, align 4
+  %ref.tmp389 = alloca %class.btVector3, align 4
+  %ref.tmp390 = alloca %class.btVector3, align 4
+  %ref.tmp391 = alloca float, align 4
+  %ref.tmp392 = alloca float, align 4
+  %ref.tmp393 = alloca float, align 4
+  %ref.tmp411 = alloca %class.btVector3, align 4
+  %ref.tmp412 = alloca %class.btVector3, align 4
+  %ref.tmp413 = alloca float, align 4
+  %ref.tmp414 = alloca float, align 4
+  %ref.tmp415 = alloca float, align 4
+  %ref.tmp418 = alloca %class.btVector3, align 4
+  %ref.tmp419 = alloca %class.btVector3, align 4
+  %ref.tmp420 = alloca float, align 4
+  %ref.tmp421 = alloca float, align 4
+  %ref.tmp422 = alloca float, align 4
+  %ref.tmp440 = alloca %class.btVector3, align 4
+  %ref.tmp441 = alloca %class.btVector3, align 4
+  %ref.tmp442 = alloca float, align 4
+  %ref.tmp443 = alloca float, align 4
+  %ref.tmp444 = alloca float, align 4
+  %ref.tmp447 = alloca %class.btVector3, align 4
+  %ref.tmp448 = alloca %class.btVector3, align 4
+  %ref.tmp449 = alloca float, align 4
+  %ref.tmp450 = alloca float, align 4
+  %ref.tmp451 = alloca float, align 4
+  %ref.tmp469 = alloca %class.btVector3, align 4
+  %ref.tmp470 = alloca %class.btVector3, align 4
+  %ref.tmp471 = alloca float, align 4
+  %ref.tmp472 = alloca float, align 4
+  %ref.tmp473 = alloca float, align 4
+  %ref.tmp476 = alloca %class.btVector3, align 4
+  %ref.tmp477 = alloca %class.btVector3, align 4
+  %ref.tmp478 = alloca float, align 4
+  %ref.tmp479 = alloca float, align 4
+  %ref.tmp480 = alloca float, align 4
+  %ref.tmp498 = alloca %class.btVector3, align 4
+  %ref.tmp499 = alloca %class.btVector3, align 4
+  %ref.tmp500 = alloca float, align 4
+  %ref.tmp501 = alloca float, align 4
+  %ref.tmp502 = alloca float, align 4
+  %ref.tmp505 = alloca %class.btVector3, align 4
+  %ref.tmp506 = alloca %class.btVector3, align 4
+  %ref.tmp507 = alloca float, align 4
+  %ref.tmp508 = alloca float, align 4
+  %ref.tmp509 = alloca float, align 4
+  store %class.RagDoll* %this, %class.RagDoll** %this.addr, align 4
+  store %class.btDynamicsWorld* %ownerWorld, %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  store %class.btVector3* %positionOffset, %class.btVector3** %positionOffset.addr, align 4
+  store float %scale, float* %scale.addr, align 4
+  %this1 = load %class.RagDoll** %this.addr
+  store %class.RagDoll* %this1, %class.RagDoll** %retval
+  %0 = bitcast %class.RagDoll* %this1 to i8***
+  store i8** getelementptr inbounds ([4 x i8*]* @_ZTV7RagDoll, i64 0, i64 2), i8*** %0
+  %m_ownerWorld = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %1 = load %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  store %class.btDynamicsWorld* %1, %class.btDynamicsWorld** %m_ownerWorld, align 4
+  %call = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %2 = bitcast i8* %call to %class.btCapsuleShape*
+  %3 = load float* %scale.addr, align 4
+  %mul = fmul float 0x3FC3333340000000, %3
+  %4 = load float* %scale.addr, align 4
+  %mul2 = fmul float 0x3FC99999A0000000, %4
+  %call3 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %2, float %mul, float %mul2)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %5 = bitcast %class.btCapsuleShape* %2 to %class.btCollisionShape*
+  %m_shapes = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes, i32 0, i32 0
+  store %class.btCollisionShape* %5, %class.btCollisionShape** %arrayidx, align 4
+  %call5 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %6 = bitcast i8* %call5 to %class.btCapsuleShape*
+  %7 = load float* %scale.addr, align 4
+  %mul6 = fmul float 0x3FC3333340000000, %7
+  %8 = load float* %scale.addr, align 4
+  %mul7 = fmul float 0x3FD1EB8520000000, %8
+  %call10 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %6, float %mul6, float %mul7)
+          to label %invoke.cont9 unwind label %lpad8
+
+invoke.cont9:                                     ; preds = %invoke.cont
+  %9 = bitcast %class.btCapsuleShape* %6 to %class.btCollisionShape*
+  %m_shapes12 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx13 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes12, i32 0, i32 1
+  store %class.btCollisionShape* %9, %class.btCollisionShape** %arrayidx13, align 4
+  %call14 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %10 = bitcast i8* %call14 to %class.btCapsuleShape*
+  %11 = load float* %scale.addr, align 4
+  %mul15 = fmul float 0x3FB99999A0000000, %11
+  %12 = load float* %scale.addr, align 4
+  %mul16 = fmul float 0x3FA99999A0000000, %12
+  %call19 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %10, float %mul15, float %mul16)
+          to label %invoke.cont18 unwind label %lpad17
+
+invoke.cont18:                                    ; preds = %invoke.cont9
+  %13 = bitcast %class.btCapsuleShape* %10 to %class.btCollisionShape*
+  %m_shapes21 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx22 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes21, i32 0, i32 2
+  store %class.btCollisionShape* %13, %class.btCollisionShape** %arrayidx22, align 4
+  %call23 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %14 = bitcast i8* %call23 to %class.btCapsuleShape*
+  %15 = load float* %scale.addr, align 4
+  %mul24 = fmul float 0x3FB1EB8520000000, %15
+  %16 = load float* %scale.addr, align 4
+  %mul25 = fmul float 0x3FDCCCCCC0000000, %16
+  %call28 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %14, float %mul24, float %mul25)
+          to label %invoke.cont27 unwind label %lpad26
+
+invoke.cont27:                                    ; preds = %invoke.cont18
+  %17 = bitcast %class.btCapsuleShape* %14 to %class.btCollisionShape*
+  %m_shapes30 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx31 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes30, i32 0, i32 3
+  store %class.btCollisionShape* %17, %class.btCollisionShape** %arrayidx31, align 4
+  %call32 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %18 = bitcast i8* %call32 to %class.btCapsuleShape*
+  %19 = load float* %scale.addr, align 4
+  %mul33 = fmul float 0x3FA99999A0000000, %19
+  %20 = load float* %scale.addr, align 4
+  %mul34 = fmul float 0x3FD7AE1480000000, %20
+  %call37 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %18, float %mul33, float %mul34)
+          to label %invoke.cont36 unwind label %lpad35
+
+invoke.cont36:                                    ; preds = %invoke.cont27
+  %21 = bitcast %class.btCapsuleShape* %18 to %class.btCollisionShape*
+  %m_shapes39 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx40 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes39, i32 0, i32 4
+  store %class.btCollisionShape* %21, %class.btCollisionShape** %arrayidx40, align 4
+  %call41 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %22 = bitcast i8* %call41 to %class.btCapsuleShape*
+  %23 = load float* %scale.addr, align 4
+  %mul42 = fmul float 0x3FB1EB8520000000, %23
+  %24 = load float* %scale.addr, align 4
+  %mul43 = fmul float 0x3FDCCCCCC0000000, %24
+  %call46 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %22, float %mul42, float %mul43)
+          to label %invoke.cont45 unwind label %lpad44
+
+invoke.cont45:                                    ; preds = %invoke.cont36
+  %25 = bitcast %class.btCapsuleShape* %22 to %class.btCollisionShape*
+  %m_shapes48 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx49 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes48, i32 0, i32 5
+  store %class.btCollisionShape* %25, %class.btCollisionShape** %arrayidx49, align 4
+  %call50 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %26 = bitcast i8* %call50 to %class.btCapsuleShape*
+  %27 = load float* %scale.addr, align 4
+  %mul51 = fmul float 0x3FA99999A0000000, %27
+  %28 = load float* %scale.addr, align 4
+  %mul52 = fmul float 0x3FD7AE1480000000, %28
+  %call55 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %26, float %mul51, float %mul52)
+          to label %invoke.cont54 unwind label %lpad53
+
+invoke.cont54:                                    ; preds = %invoke.cont45
+  %29 = bitcast %class.btCapsuleShape* %26 to %class.btCollisionShape*
+  %m_shapes57 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx58 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes57, i32 0, i32 6
+  store %class.btCollisionShape* %29, %class.btCollisionShape** %arrayidx58, align 4
+  %call59 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %30 = bitcast i8* %call59 to %class.btCapsuleShape*
+  %31 = load float* %scale.addr, align 4
+  %mul60 = fmul float 0x3FA99999A0000000, %31
+  %32 = load float* %scale.addr, align 4
+  %mul61 = fmul float 0x3FD51EB860000000, %32
+  %call64 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %30, float %mul60, float %mul61)
+          to label %invoke.cont63 unwind label %lpad62
+
+invoke.cont63:                                    ; preds = %invoke.cont54
+  %33 = bitcast %class.btCapsuleShape* %30 to %class.btCollisionShape*
+  %m_shapes66 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx67 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes66, i32 0, i32 7
+  store %class.btCollisionShape* %33, %class.btCollisionShape** %arrayidx67, align 4
+  %call68 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %34 = bitcast i8* %call68 to %class.btCapsuleShape*
+  %35 = load float* %scale.addr, align 4
+  %mul69 = fmul float 0x3FA47AE140000000, %35
+  %36 = load float* %scale.addr, align 4
+  %mul70 = fmul float 2.500000e-01, %36
+  %call73 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %34, float %mul69, float %mul70)
+          to label %invoke.cont72 unwind label %lpad71
+
+invoke.cont72:                                    ; preds = %invoke.cont63
+  %37 = bitcast %class.btCapsuleShape* %34 to %class.btCollisionShape*
+  %m_shapes75 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx76 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes75, i32 0, i32 8
+  store %class.btCollisionShape* %37, %class.btCollisionShape** %arrayidx76, align 4
+  %call77 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %38 = bitcast i8* %call77 to %class.btCapsuleShape*
+  %39 = load float* %scale.addr, align 4
+  %mul78 = fmul float 0x3FA99999A0000000, %39
+  %40 = load float* %scale.addr, align 4
+  %mul79 = fmul float 0x3FD51EB860000000, %40
+  %call82 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %38, float %mul78, float %mul79)
+          to label %invoke.cont81 unwind label %lpad80
+
+invoke.cont81:                                    ; preds = %invoke.cont72
+  %41 = bitcast %class.btCapsuleShape* %38 to %class.btCollisionShape*
+  %m_shapes84 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx85 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes84, i32 0, i32 9
+  store %class.btCollisionShape* %41, %class.btCollisionShape** %arrayidx85, align 4
+  %call86 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %42 = bitcast i8* %call86 to %class.btCapsuleShape*
+  %43 = load float* %scale.addr, align 4
+  %mul87 = fmul float 0x3FA47AE140000000, %43
+  %44 = load float* %scale.addr, align 4
+  %mul88 = fmul float 2.500000e-01, %44
+  %call91 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %42, float %mul87, float %mul88)
+          to label %invoke.cont90 unwind label %lpad89
+
+invoke.cont90:                                    ; preds = %invoke.cont81
+  %45 = bitcast %class.btCapsuleShape* %42 to %class.btCollisionShape*
+  %m_shapes93 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx94 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes93, i32 0, i32 10
+  store %class.btCollisionShape* %45, %class.btCollisionShape** %arrayidx94, align 4
+  %call95 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %offset)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %offset)
+  %46 = load %class.btVector3** %positionOffset.addr, align 4
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %offset, %class.btVector3* %46)
+  %call96 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %transform)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp98, align 4
+  store float 1.000000e+00, float* %ref.tmp99, align 4
+  store float 0.000000e+00, float* %ref.tmp100, align 4
+  %call101 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp97, float* %ref.tmp98, float* %ref.tmp99, float* %ref.tmp100)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp, float* %scale.addr, %class.btVector3* %ref.tmp97)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp102, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes103 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx104 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes103, i32 0, i32 0
+  %47 = load %class.btCollisionShape** %arrayidx104, align 4
+  %call105 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp102, %class.btCollisionShape* %47)
+  %m_bodies = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx106 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies, i32 0, i32 0
+  store %class.btRigidBody* %call105, %class.btRigidBody** %arrayidx106, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp109, align 4
+  store float 0x3FF3333340000000, float* %ref.tmp110, align 4
+  store float 0.000000e+00, float* %ref.tmp111, align 4
+  %call112 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp108, float* %ref.tmp109, float* %ref.tmp110, float* %ref.tmp111)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp107, float* %scale.addr, %class.btVector3* %ref.tmp108)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp107)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp113, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes114 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx115 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes114, i32 0, i32 1
+  %48 = load %class.btCollisionShape** %arrayidx115, align 4
+  %call116 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp113, %class.btCollisionShape* %48)
+  %m_bodies117 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx118 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies117, i32 0, i32 1
+  store %class.btRigidBody* %call116, %class.btRigidBody** %arrayidx118, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp121, align 4
+  store float 0x3FF99999A0000000, float* %ref.tmp122, align 4
+  store float 0.000000e+00, float* %ref.tmp123, align 4
+  %call124 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp120, float* %ref.tmp121, float* %ref.tmp122, float* %ref.tmp123)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp119, float* %scale.addr, %class.btVector3* %ref.tmp120)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp119)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp125, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes126 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx127 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes126, i32 0, i32 2
+  %49 = load %class.btCollisionShape** %arrayidx127, align 4
+  %call128 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp125, %class.btCollisionShape* %49)
+  %m_bodies129 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx130 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies129, i32 0, i32 2
+  store %class.btRigidBody* %call128, %class.btRigidBody** %arrayidx130, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFC70A3D80000000, float* %ref.tmp133, align 4
+  store float 0x3FE4CCCCC0000000, float* %ref.tmp134, align 4
+  store float 0.000000e+00, float* %ref.tmp135, align 4
+  %call136 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp132, float* %ref.tmp133, float* %ref.tmp134, float* %ref.tmp135)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp131, float* %scale.addr, %class.btVector3* %ref.tmp132)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp131)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp137, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes138 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx139 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes138, i32 0, i32 3
+  %50 = load %class.btCollisionShape** %arrayidx139, align 4
+  %call140 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp137, %class.btCollisionShape* %50)
+  %m_bodies141 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx142 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies141, i32 0, i32 3
+  store %class.btRigidBody* %call140, %class.btRigidBody** %arrayidx142, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFC70A3D80000000, float* %ref.tmp145, align 4
+  store float 0x3FC99999A0000000, float* %ref.tmp146, align 4
+  store float 0.000000e+00, float* %ref.tmp147, align 4
+  %call148 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp144, float* %ref.tmp145, float* %ref.tmp146, float* %ref.tmp147)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp143, float* %scale.addr, %class.btVector3* %ref.tmp144)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp143)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp149, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes150 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx151 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes150, i32 0, i32 4
+  %51 = load %class.btCollisionShape** %arrayidx151, align 4
+  %call152 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp149, %class.btCollisionShape* %51)
+  %m_bodies153 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx154 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies153, i32 0, i32 4
+  store %class.btRigidBody* %call152, %class.btRigidBody** %arrayidx154, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FC70A3D80000000, float* %ref.tmp157, align 4
+  store float 0x3FE4CCCCC0000000, float* %ref.tmp158, align 4
+  store float 0.000000e+00, float* %ref.tmp159, align 4
+  %call160 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp156, float* %ref.tmp157, float* %ref.tmp158, float* %ref.tmp159)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp155, float* %scale.addr, %class.btVector3* %ref.tmp156)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp155)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp161, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes162 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx163 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes162, i32 0, i32 5
+  %52 = load %class.btCollisionShape** %arrayidx163, align 4
+  %call164 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp161, %class.btCollisionShape* %52)
+  %m_bodies165 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx166 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies165, i32 0, i32 5
+  store %class.btRigidBody* %call164, %class.btRigidBody** %arrayidx166, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FC70A3D80000000, float* %ref.tmp169, align 4
+  store float 0x3FC99999A0000000, float* %ref.tmp170, align 4
+  store float 0.000000e+00, float* %ref.tmp171, align 4
+  %call172 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp168, float* %ref.tmp169, float* %ref.tmp170, float* %ref.tmp171)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp167, float* %scale.addr, %class.btVector3* %ref.tmp168)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp167)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp173, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes174 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx175 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes174, i32 0, i32 6
+  %53 = load %class.btCollisionShape** %arrayidx175, align 4
+  %call176 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp173, %class.btCollisionShape* %53)
+  %m_bodies177 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx178 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies177, i32 0, i32 6
+  store %class.btRigidBody* %call176, %class.btRigidBody** %arrayidx178, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFD6666660000000, float* %ref.tmp181, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp182, align 4
+  store float 0.000000e+00, float* %ref.tmp183, align 4
+  %call184 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp180, float* %ref.tmp181, float* %ref.tmp182, float* %ref.tmp183)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp179, float* %scale.addr, %class.btVector3* %ref.tmp180)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp179)
+  %call185 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call185, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp186, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes187 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx188 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes187, i32 0, i32 7
+  %54 = load %class.btCollisionShape** %arrayidx188, align 4
+  %call189 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp186, %class.btCollisionShape* %54)
+  %m_bodies190 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx191 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies190, i32 0, i32 7
+  store %class.btRigidBody* %call189, %class.btRigidBody** %arrayidx191, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFE6666660000000, float* %ref.tmp194, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp195, align 4
+  store float 0.000000e+00, float* %ref.tmp196, align 4
+  %call197 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp193, float* %ref.tmp194, float* %ref.tmp195, float* %ref.tmp196)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp192, float* %scale.addr, %class.btVector3* %ref.tmp193)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp192)
+  %call198 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call198, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp199, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes200 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx201 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes200, i32 0, i32 8
+  %55 = load %class.btCollisionShape** %arrayidx201, align 4
+  %call202 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp199, %class.btCollisionShape* %55)
+  %m_bodies203 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx204 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies203, i32 0, i32 8
+  store %class.btRigidBody* %call202, %class.btRigidBody** %arrayidx204, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FD6666660000000, float* %ref.tmp207, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp208, align 4
+  store float 0.000000e+00, float* %ref.tmp209, align 4
+  %call210 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp206, float* %ref.tmp207, float* %ref.tmp208, float* %ref.tmp209)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp205, float* %scale.addr, %class.btVector3* %ref.tmp206)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp205)
+  %call211 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call211, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp212, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes213 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx214 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes213, i32 0, i32 9
+  %56 = load %class.btCollisionShape** %arrayidx214, align 4
+  %call215 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp212, %class.btCollisionShape* %56)
+  %m_bodies216 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx217 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies216, i32 0, i32 9
+  store %class.btRigidBody* %call215, %class.btRigidBody** %arrayidx217, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FE6666660000000, float* %ref.tmp220, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp221, align 4
+  store float 0.000000e+00, float* %ref.tmp222, align 4
+  %call223 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp219, float* %ref.tmp220, float* %ref.tmp221, float* %ref.tmp222)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp218, float* %scale.addr, %class.btVector3* %ref.tmp219)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp218)
+  %call224 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call224, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp225, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes226 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx227 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes226, i32 0, i32 10
+  %57 = load %class.btCollisionShape** %arrayidx227, align 4
+  %call228 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp225, %class.btCollisionShape* %57)
+  %m_bodies229 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx230 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies229, i32 0, i32 10
+  store %class.btRigidBody* %call228, %class.btRigidBody** %arrayidx230, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %invoke.cont90
+  %58 = load i32* %i, align 4
+  %cmp = icmp slt i32 %58, 11
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %59 = load i32* %i, align 4
+  %m_bodies231 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx232 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies231, i32 0, i32 %59
+  %60 = load %class.btRigidBody** %arrayidx232, align 4
+  call void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody* %60, float 0x3FA99999A0000000, float 0x3FEB333340000000)
+  %61 = load i32* %i, align 4
+  %m_bodies233 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx234 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies233, i32 0, i32 %61
+  %62 = load %class.btRigidBody** %arrayidx234, align 4
+  %63 = bitcast %class.btRigidBody* %62 to %class.btCollisionObject*
+  call void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject* %63, float 0x3FE99999A0000000)
+  %64 = load i32* %i, align 4
+  %m_bodies235 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx236 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies235, i32 0, i32 %64
+  %65 = load %class.btRigidBody** %arrayidx236, align 4
+  call void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody* %65, float 0x3FF99999A0000000, float 2.500000e+00)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %66 = load i32* %i, align 4
+  %inc = add nsw i32 %66, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+lpad:                                             ; preds = %entry
+  %67 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %68 = extractvalue { i8*, i32 } %67, 0
+  store i8* %68, i8** %exn.slot
+  %69 = extractvalue { i8*, i32 } %67, 1
+  store i32 %69, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call)
+          to label %invoke.cont4 unwind label %terminate.lpad
+
+invoke.cont4:                                     ; preds = %lpad
+  br label %eh.resume
+
+lpad8:                                            ; preds = %invoke.cont
+  %70 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %71 = extractvalue { i8*, i32 } %70, 0
+  store i8* %71, i8** %exn.slot
+  %72 = extractvalue { i8*, i32 } %70, 1
+  store i32 %72, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call5)
+          to label %invoke.cont11 unwind label %terminate.lpad
+
+invoke.cont11:                                    ; preds = %lpad8
+  br label %eh.resume
+
+lpad17:                                           ; preds = %invoke.cont9
+  %73 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %74 = extractvalue { i8*, i32 } %73, 0
+  store i8* %74, i8** %exn.slot
+  %75 = extractvalue { i8*, i32 } %73, 1
+  store i32 %75, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call14)
+          to label %invoke.cont20 unwind label %terminate.lpad
+
+invoke.cont20:                                    ; preds = %lpad17
+  br label %eh.resume
+
+lpad26:                                           ; preds = %invoke.cont18
+  %76 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %77 = extractvalue { i8*, i32 } %76, 0
+  store i8* %77, i8** %exn.slot
+  %78 = extractvalue { i8*, i32 } %76, 1
+  store i32 %78, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call23)
+          to label %invoke.cont29 unwind label %terminate.lpad
+
+invoke.cont29:                                    ; preds = %lpad26
+  br label %eh.resume
+
+lpad35:                                           ; preds = %invoke.cont27
+  %79 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %80 = extractvalue { i8*, i32 } %79, 0
+  store i8* %80, i8** %exn.slot
+  %81 = extractvalue { i8*, i32 } %79, 1
+  store i32 %81, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call32)
+          to label %invoke.cont38 unwind label %terminate.lpad
+
+invoke.cont38:                                    ; preds = %lpad35
+  br label %eh.resume
+
+lpad44:                                           ; preds = %invoke.cont36
+  %82 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %83 = extractvalue { i8*, i32 } %82, 0
+  store i8* %83, i8** %exn.slot
+  %84 = extractvalue { i8*, i32 } %82, 1
+  store i32 %84, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call41)
+          to label %invoke.cont47 unwind label %terminate.lpad
+
+invoke.cont47:                                    ; preds = %lpad44
+  br label %eh.resume
+
+lpad53:                                           ; preds = %invoke.cont45
+  %85 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %86 = extractvalue { i8*, i32 } %85, 0
+  store i8* %86, i8** %exn.slot
+  %87 = extractvalue { i8*, i32 } %85, 1
+  store i32 %87, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call50)
+          to label %invoke.cont56 unwind label %terminate.lpad
+
+invoke.cont56:                                    ; preds = %lpad53
+  br label %eh.resume
+
+lpad62:                                           ; preds = %invoke.cont54
+  %88 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %89 = extractvalue { i8*, i32 } %88, 0
+  store i8* %89, i8** %exn.slot
+  %90 = extractvalue { i8*, i32 } %88, 1
+  store i32 %90, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call59)
+          to label %invoke.cont65 unwind label %terminate.lpad
+
+invoke.cont65:                                    ; preds = %lpad62
+  br label %eh.resume
+
+lpad71:                                           ; preds = %invoke.cont63
+  %91 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %92 = extractvalue { i8*, i32 } %91, 0
+  store i8* %92, i8** %exn.slot
+  %93 = extractvalue { i8*, i32 } %91, 1
+  store i32 %93, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call68)
+          to label %invoke.cont74 unwind label %terminate.lpad
+
+invoke.cont74:                                    ; preds = %lpad71
+  br label %eh.resume
+
+lpad80:                                           ; preds = %invoke.cont72
+  %94 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %95 = extractvalue { i8*, i32 } %94, 0
+  store i8* %95, i8** %exn.slot
+  %96 = extractvalue { i8*, i32 } %94, 1
+  store i32 %96, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call77)
+          to label %invoke.cont83 unwind label %terminate.lpad
+
+invoke.cont83:                                    ; preds = %lpad80
+  br label %eh.resume
+
+lpad89:                                           ; preds = %invoke.cont81
+  %97 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %98 = extractvalue { i8*, i32 } %97, 0
+  store i8* %98, i8** %exn.slot
+  %99 = extractvalue { i8*, i32 } %97, 1
+  store i32 %99, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call86)
+          to label %invoke.cont92 unwind label %terminate.lpad
+
+invoke.cont92:                                    ; preds = %lpad89
+  br label %eh.resume
+
+for.end:                                          ; preds = %for.cond
+  %call237 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %localA)
+  %call238 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %localB)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call239 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call239, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp242, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp243, align 4
+  store float 0.000000e+00, float* %ref.tmp244, align 4
+  %call245 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp241, float* %ref.tmp242, float* %ref.tmp243, float* %ref.tmp244)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp240, float* %scale.addr, %class.btVector3* %ref.tmp241)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp240)
+  %call246 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call246, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp249, align 4
+  store float 0xBFC3333340000000, float* %ref.tmp250, align 4
+  store float 0.000000e+00, float* %ref.tmp251, align 4
+  %call252 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp248, float* %ref.tmp249, float* %ref.tmp250, float* %ref.tmp251)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp247, float* %scale.addr, %class.btVector3* %ref.tmp248)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp247)
+  %call253 = call noalias i8* @_Znwm(i32 780)
+  %100 = bitcast i8* %call253 to %class.btHingeConstraint*
+  %m_bodies254 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx255 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies254, i32 0, i32 0
+  %101 = load %class.btRigidBody** %arrayidx255, align 4
+  %m_bodies256 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx257 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies256, i32 0, i32 1
+  %102 = load %class.btRigidBody** %arrayidx257, align 4
+  %call260 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %100, %class.btRigidBody* %101, %class.btRigidBody* %102, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont259 unwind label %lpad258
+
+invoke.cont259:                                   ; preds = %for.end
+  store %class.btHingeConstraint* %100, %class.btHingeConstraint** %hingeC, align 4
+  %103 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %103, float 0xBFE921FB60000000, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %104 = load %class.btHingeConstraint** %hingeC, align 4
+  %105 = bitcast %class.btHingeConstraint* %104 to %class.btTypedConstraint*
+  %m_joints = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx261 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints, i32 0, i32 0
+  store %class.btTypedConstraint* %105, %class.btTypedConstraint** %arrayidx261, align 4
+  %m_ownerWorld262 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %106 = load %class.btDynamicsWorld** %m_ownerWorld262, align 4
+  %107 = bitcast %class.btDynamicsWorld* %106 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %107
+  %vfn = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable, i64 10
+  %108 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn
+  %m_joints263 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx264 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints263, i32 0, i32 0
+  %109 = load %class.btTypedConstraint** %arrayidx264, align 4
+  call void %108(%class.btDynamicsWorld* %106, %class.btTypedConstraint* %109, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call265 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call265, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp268, align 4
+  store float 0x3FD3333340000000, float* %ref.tmp269, align 4
+  store float 0.000000e+00, float* %ref.tmp270, align 4
+  %call271 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp267, float* %ref.tmp268, float* %ref.tmp269, float* %ref.tmp270)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp266, float* %scale.addr, %class.btVector3* %ref.tmp267)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp266)
+  %call272 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call272, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp275, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp276, align 4
+  store float 0.000000e+00, float* %ref.tmp277, align 4
+  %call278 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp274, float* %ref.tmp275, float* %ref.tmp276, float* %ref.tmp277)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp273, float* %scale.addr, %class.btVector3* %ref.tmp274)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp273)
+  %call279 = call noalias i8* @_Znwm(i32 628)
+  %110 = bitcast i8* %call279 to %class.btConeTwistConstraint*
+  %m_bodies280 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx281 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies280, i32 0, i32 1
+  %111 = load %class.btRigidBody** %arrayidx281, align 4
+  %m_bodies282 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx283 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies282, i32 0, i32 2
+  %112 = load %class.btRigidBody** %arrayidx283, align 4
+  %call286 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %110, %class.btRigidBody* %111, %class.btRigidBody* %112, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont285 unwind label %lpad284
+
+invoke.cont285:                                   ; preds = %invoke.cont259
+  store %class.btConeTwistConstraint* %110, %class.btConeTwistConstraint** %coneC, align 4
+  %113 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %113, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0x3FF921FB60000000, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %114 = load %class.btConeTwistConstraint** %coneC, align 4
+  %115 = bitcast %class.btConeTwistConstraint* %114 to %class.btTypedConstraint*
+  %m_joints287 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx288 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints287, i32 0, i32 1
+  store %class.btTypedConstraint* %115, %class.btTypedConstraint** %arrayidx288, align 4
+  %m_ownerWorld289 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %116 = load %class.btDynamicsWorld** %m_ownerWorld289, align 4
+  %117 = bitcast %class.btDynamicsWorld* %116 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable290 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %117
+  %vfn291 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable290, i64 10
+  %118 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn291
+  %m_joints292 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx293 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints292, i32 0, i32 1
+  %119 = load %class.btTypedConstraint** %arrayidx293, align 4
+  call void %118(%class.btDynamicsWorld* %116, %class.btTypedConstraint* %119, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call294 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call294, float 0.000000e+00, float 0.000000e+00, float 0xC00F6A7A20000000)
+  store float 0xBFC70A3D80000000, float* %ref.tmp297, align 4
+  store float 0xBFB99999A0000000, float* %ref.tmp298, align 4
+  store float 0.000000e+00, float* %ref.tmp299, align 4
+  %call300 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp296, float* %ref.tmp297, float* %ref.tmp298, float* %ref.tmp299)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp295, float* %scale.addr, %class.btVector3* %ref.tmp296)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp295)
+  %call301 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call301, float 0.000000e+00, float 0.000000e+00, float 0xC00F6A7A20000000)
+  store float 0.000000e+00, float* %ref.tmp304, align 4
+  store float 0x3FCCCCCCC0000000, float* %ref.tmp305, align 4
+  store float 0.000000e+00, float* %ref.tmp306, align 4
+  %call307 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp303, float* %ref.tmp304, float* %ref.tmp305, float* %ref.tmp306)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp302, float* %scale.addr, %class.btVector3* %ref.tmp303)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp302)
+  %call308 = call noalias i8* @_Znwm(i32 628)
+  %120 = bitcast i8* %call308 to %class.btConeTwistConstraint*
+  %m_bodies309 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx310 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies309, i32 0, i32 0
+  %121 = load %class.btRigidBody** %arrayidx310, align 4
+  %m_bodies311 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx312 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies311, i32 0, i32 3
+  %122 = load %class.btRigidBody** %arrayidx312, align 4
+  %call315 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %120, %class.btRigidBody* %121, %class.btRigidBody* %122, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont314 unwind label %lpad313
+
+invoke.cont314:                                   ; preds = %invoke.cont285
+  store %class.btConeTwistConstraint* %120, %class.btConeTwistConstraint** %coneC, align 4
+  %123 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %123, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %124 = load %class.btConeTwistConstraint** %coneC, align 4
+  %125 = bitcast %class.btConeTwistConstraint* %124 to %class.btTypedConstraint*
+  %m_joints316 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx317 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints316, i32 0, i32 2
+  store %class.btTypedConstraint* %125, %class.btTypedConstraint** %arrayidx317, align 4
+  %m_ownerWorld318 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %126 = load %class.btDynamicsWorld** %m_ownerWorld318, align 4
+  %127 = bitcast %class.btDynamicsWorld* %126 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable319 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %127
+  %vfn320 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable319, i64 10
+  %128 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn320
+  %m_joints321 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx322 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints321, i32 0, i32 2
+  %129 = load %class.btTypedConstraint** %arrayidx322, align 4
+  call void %128(%class.btDynamicsWorld* %126, %class.btTypedConstraint* %129, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call323 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call323, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp326, align 4
+  store float 0xBFCCCCCCC0000000, float* %ref.tmp327, align 4
+  store float 0.000000e+00, float* %ref.tmp328, align 4
+  %call329 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp325, float* %ref.tmp326, float* %ref.tmp327, float* %ref.tmp328)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp324, float* %scale.addr, %class.btVector3* %ref.tmp325)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp324)
+  %call330 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call330, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp333, align 4
+  store float 0x3FC7AE1480000000, float* %ref.tmp334, align 4
+  store float 0.000000e+00, float* %ref.tmp335, align 4
+  %call336 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp332, float* %ref.tmp333, float* %ref.tmp334, float* %ref.tmp335)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp331, float* %scale.addr, %class.btVector3* %ref.tmp332)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp331)
+  %call337 = call noalias i8* @_Znwm(i32 780)
+  %130 = bitcast i8* %call337 to %class.btHingeConstraint*
+  %m_bodies338 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx339 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies338, i32 0, i32 3
+  %131 = load %class.btRigidBody** %arrayidx339, align 4
+  %m_bodies340 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx341 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies340, i32 0, i32 4
+  %132 = load %class.btRigidBody** %arrayidx341, align 4
+  %call344 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %130, %class.btRigidBody* %131, %class.btRigidBody* %132, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont343 unwind label %lpad342
+
+invoke.cont343:                                   ; preds = %invoke.cont314
+  store %class.btHingeConstraint* %130, %class.btHingeConstraint** %hingeC, align 4
+  %133 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %133, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %134 = load %class.btHingeConstraint** %hingeC, align 4
+  %135 = bitcast %class.btHingeConstraint* %134 to %class.btTypedConstraint*
+  %m_joints345 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx346 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints345, i32 0, i32 3
+  store %class.btTypedConstraint* %135, %class.btTypedConstraint** %arrayidx346, align 4
+  %m_ownerWorld347 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %136 = load %class.btDynamicsWorld** %m_ownerWorld347, align 4
+  %137 = bitcast %class.btDynamicsWorld* %136 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable348 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %137
+  %vfn349 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable348, i64 10
+  %138 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn349
+  %m_joints350 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx351 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints350, i32 0, i32 3
+  %139 = load %class.btTypedConstraint** %arrayidx351, align 4
+  call void %138(%class.btDynamicsWorld* %136, %class.btTypedConstraint* %139, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call352 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call352, float 0.000000e+00, float 0.000000e+00, float 0x3FE921FB60000000)
+  store float 0x3FC70A3D80000000, float* %ref.tmp355, align 4
+  store float 0xBFB99999A0000000, float* %ref.tmp356, align 4
+  store float 0.000000e+00, float* %ref.tmp357, align 4
+  %call358 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp354, float* %ref.tmp355, float* %ref.tmp356, float* %ref.tmp357)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp353, float* %scale.addr, %class.btVector3* %ref.tmp354)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp353)
+  %call359 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call359, float 0.000000e+00, float 0.000000e+00, float 0x3FE921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp362, align 4
+  store float 0x3FCCCCCCC0000000, float* %ref.tmp363, align 4
+  store float 0.000000e+00, float* %ref.tmp364, align 4
+  %call365 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp361, float* %ref.tmp362, float* %ref.tmp363, float* %ref.tmp364)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp360, float* %scale.addr, %class.btVector3* %ref.tmp361)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp360)
+  %call366 = call noalias i8* @_Znwm(i32 628)
+  %140 = bitcast i8* %call366 to %class.btConeTwistConstraint*
+  %m_bodies367 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx368 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies367, i32 0, i32 0
+  %141 = load %class.btRigidBody** %arrayidx368, align 4
+  %m_bodies369 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx370 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies369, i32 0, i32 5
+  %142 = load %class.btRigidBody** %arrayidx370, align 4
+  %call373 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %140, %class.btRigidBody* %141, %class.btRigidBody* %142, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont372 unwind label %lpad371
+
+invoke.cont372:                                   ; preds = %invoke.cont343
+  store %class.btConeTwistConstraint* %140, %class.btConeTwistConstraint** %coneC, align 4
+  %143 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %143, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %144 = load %class.btConeTwistConstraint** %coneC, align 4
+  %145 = bitcast %class.btConeTwistConstraint* %144 to %class.btTypedConstraint*
+  %m_joints374 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx375 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints374, i32 0, i32 4
+  store %class.btTypedConstraint* %145, %class.btTypedConstraint** %arrayidx375, align 4
+  %m_ownerWorld376 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %146 = load %class.btDynamicsWorld** %m_ownerWorld376, align 4
+  %147 = bitcast %class.btDynamicsWorld* %146 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable377 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %147
+  %vfn378 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable377, i64 10
+  %148 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn378
+  %m_joints379 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx380 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints379, i32 0, i32 4
+  %149 = load %class.btTypedConstraint** %arrayidx380, align 4
+  call void %148(%class.btDynamicsWorld* %146, %class.btTypedConstraint* %149, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call381 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call381, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp384, align 4
+  store float 0xBFCCCCCCC0000000, float* %ref.tmp385, align 4
+  store float 0.000000e+00, float* %ref.tmp386, align 4
+  %call387 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp383, float* %ref.tmp384, float* %ref.tmp385, float* %ref.tmp386)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp382, float* %scale.addr, %class.btVector3* %ref.tmp383)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp382)
+  %call388 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call388, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp391, align 4
+  store float 0x3FC7AE1480000000, float* %ref.tmp392, align 4
+  store float 0.000000e+00, float* %ref.tmp393, align 4
+  %call394 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp390, float* %ref.tmp391, float* %ref.tmp392, float* %ref.tmp393)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp389, float* %scale.addr, %class.btVector3* %ref.tmp390)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp389)
+  %call395 = call noalias i8* @_Znwm(i32 780)
+  %150 = bitcast i8* %call395 to %class.btHingeConstraint*
+  %m_bodies396 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx397 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies396, i32 0, i32 5
+  %151 = load %class.btRigidBody** %arrayidx397, align 4
+  %m_bodies398 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx399 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies398, i32 0, i32 6
+  %152 = load %class.btRigidBody** %arrayidx399, align 4
+  %call402 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %150, %class.btRigidBody* %151, %class.btRigidBody* %152, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont401 unwind label %lpad400
+
+invoke.cont401:                                   ; preds = %invoke.cont372
+  store %class.btHingeConstraint* %150, %class.btHingeConstraint** %hingeC, align 4
+  %153 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %153, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %154 = load %class.btHingeConstraint** %hingeC, align 4
+  %155 = bitcast %class.btHingeConstraint* %154 to %class.btTypedConstraint*
+  %m_joints403 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx404 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints403, i32 0, i32 5
+  store %class.btTypedConstraint* %155, %class.btTypedConstraint** %arrayidx404, align 4
+  %m_ownerWorld405 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %156 = load %class.btDynamicsWorld** %m_ownerWorld405, align 4
+  %157 = bitcast %class.btDynamicsWorld* %156 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable406 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %157
+  %vfn407 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable406, i64 10
+  %158 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn407
+  %m_joints408 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx409 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints408, i32 0, i32 5
+  %159 = load %class.btTypedConstraint** %arrayidx409, align 4
+  call void %158(%class.btDynamicsWorld* %156, %class.btTypedConstraint* %159, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call410 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call410, float 0.000000e+00, float 0.000000e+00, float 0x400921FB60000000)
+  store float 0xBFC99999A0000000, float* %ref.tmp413, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp414, align 4
+  store float 0.000000e+00, float* %ref.tmp415, align 4
+  %call416 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp412, float* %ref.tmp413, float* %ref.tmp414, float* %ref.tmp415)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp411, float* %scale.addr, %class.btVector3* %ref.tmp412)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp411)
+  %call417 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call417, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp420, align 4
+  store float 0xBFC70A3D80000000, float* %ref.tmp421, align 4
+  store float 0.000000e+00, float* %ref.tmp422, align 4
+  %call423 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp419, float* %ref.tmp420, float* %ref.tmp421, float* %ref.tmp422)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp418, float* %scale.addr, %class.btVector3* %ref.tmp419)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp418)
+  %call424 = call noalias i8* @_Znwm(i32 628)
+  %160 = bitcast i8* %call424 to %class.btConeTwistConstraint*
+  %m_bodies425 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx426 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies425, i32 0, i32 1
+  %161 = load %class.btRigidBody** %arrayidx426, align 4
+  %m_bodies427 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx428 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies427, i32 0, i32 7
+  %162 = load %class.btRigidBody** %arrayidx428, align 4
+  %call431 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %160, %class.btRigidBody* %161, %class.btRigidBody* %162, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont430 unwind label %lpad429
+
+invoke.cont430:                                   ; preds = %invoke.cont401
+  store %class.btConeTwistConstraint* %160, %class.btConeTwistConstraint** %coneC, align 4
+  %163 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %163, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %164 = load %class.btConeTwistConstraint** %coneC, align 4
+  %165 = bitcast %class.btConeTwistConstraint* %164 to %class.btTypedConstraint*
+  %m_joints432 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx433 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints432, i32 0, i32 6
+  store %class.btTypedConstraint* %165, %class.btTypedConstraint** %arrayidx433, align 4
+  %m_ownerWorld434 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %166 = load %class.btDynamicsWorld** %m_ownerWorld434, align 4
+  %167 = bitcast %class.btDynamicsWorld* %166 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable435 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %167
+  %vfn436 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable435, i64 10
+  %168 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn436
+  %m_joints437 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx438 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints437, i32 0, i32 6
+  %169 = load %class.btTypedConstraint** %arrayidx438, align 4
+  call void %168(%class.btDynamicsWorld* %166, %class.btTypedConstraint* %169, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call439 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call439, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp442, align 4
+  store float 0x3FC70A3D80000000, float* %ref.tmp443, align 4
+  store float 0.000000e+00, float* %ref.tmp444, align 4
+  %call445 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp441, float* %ref.tmp442, float* %ref.tmp443, float* %ref.tmp444)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp440, float* %scale.addr, %class.btVector3* %ref.tmp441)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp440)
+  %call446 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call446, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp449, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp450, align 4
+  store float 0.000000e+00, float* %ref.tmp451, align 4
+  %call452 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp448, float* %ref.tmp449, float* %ref.tmp450, float* %ref.tmp451)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp447, float* %scale.addr, %class.btVector3* %ref.tmp448)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp447)
+  %call453 = call noalias i8* @_Znwm(i32 780)
+  %170 = bitcast i8* %call453 to %class.btHingeConstraint*
+  %m_bodies454 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx455 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies454, i32 0, i32 7
+  %171 = load %class.btRigidBody** %arrayidx455, align 4
+  %m_bodies456 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx457 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies456, i32 0, i32 8
+  %172 = load %class.btRigidBody** %arrayidx457, align 4
+  %call460 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %170, %class.btRigidBody* %171, %class.btRigidBody* %172, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont459 unwind label %lpad458
+
+invoke.cont459:                                   ; preds = %invoke.cont430
+  store %class.btHingeConstraint* %170, %class.btHingeConstraint** %hingeC, align 4
+  %173 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %173, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %174 = load %class.btHingeConstraint** %hingeC, align 4
+  %175 = bitcast %class.btHingeConstraint* %174 to %class.btTypedConstraint*
+  %m_joints461 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx462 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints461, i32 0, i32 7
+  store %class.btTypedConstraint* %175, %class.btTypedConstraint** %arrayidx462, align 4
+  %m_ownerWorld463 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %176 = load %class.btDynamicsWorld** %m_ownerWorld463, align 4
+  %177 = bitcast %class.btDynamicsWorld* %176 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable464 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %177
+  %vfn465 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable464, i64 10
+  %178 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn465
+  %m_joints466 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx467 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints466, i32 0, i32 7
+  %179 = load %class.btTypedConstraint** %arrayidx467, align 4
+  call void %178(%class.btDynamicsWorld* %176, %class.btTypedConstraint* %179, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call468 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call468, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
+  store float 0x3FC99999A0000000, float* %ref.tmp471, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp472, align 4
+  store float 0.000000e+00, float* %ref.tmp473, align 4
+  %call474 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp470, float* %ref.tmp471, float* %ref.tmp472, float* %ref.tmp473)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp469, float* %scale.addr, %class.btVector3* %ref.tmp470)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp469)
+  %call475 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call475, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp478, align 4
+  store float 0xBFC70A3D80000000, float* %ref.tmp479, align 4
+  store float 0.000000e+00, float* %ref.tmp480, align 4
+  %call481 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp477, float* %ref.tmp478, float* %ref.tmp479, float* %ref.tmp480)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp476, float* %scale.addr, %class.btVector3* %ref.tmp477)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp476)
+  %call482 = call noalias i8* @_Znwm(i32 628)
+  %180 = bitcast i8* %call482 to %class.btConeTwistConstraint*
+  %m_bodies483 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx484 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies483, i32 0, i32 1
+  %181 = load %class.btRigidBody** %arrayidx484, align 4
+  %m_bodies485 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx486 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies485, i32 0, i32 9
+  %182 = load %class.btRigidBody** %arrayidx486, align 4
+  %call489 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %180, %class.btRigidBody* %181, %class.btRigidBody* %182, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont488 unwind label %lpad487
+
+invoke.cont488:                                   ; preds = %invoke.cont459
+  store %class.btConeTwistConstraint* %180, %class.btConeTwistConstraint** %coneC, align 4
+  %183 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %183, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %184 = load %class.btConeTwistConstraint** %coneC, align 4
+  %185 = bitcast %class.btConeTwistConstraint* %184 to %class.btTypedConstraint*
+  %m_joints490 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx491 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints490, i32 0, i32 8
+  store %class.btTypedConstraint* %185, %class.btTypedConstraint** %arrayidx491, align 4
+  %m_ownerWorld492 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %186 = load %class.btDynamicsWorld** %m_ownerWorld492, align 4
+  %187 = bitcast %class.btDynamicsWorld* %186 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable493 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %187
+  %vfn494 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable493, i64 10
+  %188 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn494
+  %m_joints495 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx496 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints495, i32 0, i32 8
+  %189 = load %class.btTypedConstraint** %arrayidx496, align 4
+  call void %188(%class.btDynamicsWorld* %186, %class.btTypedConstraint* %189, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call497 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call497, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp500, align 4
+  store float 0x3FC70A3D80000000, float* %ref.tmp501, align 4
+  store float 0.000000e+00, float* %ref.tmp502, align 4
+  %call503 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp499, float* %ref.tmp500, float* %ref.tmp501, float* %ref.tmp502)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp498, float* %scale.addr, %class.btVector3* %ref.tmp499)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp498)
+  %call504 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call504, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp507, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp508, align 4
+  store float 0.000000e+00, float* %ref.tmp509, align 4
+  %call510 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp506, float* %ref.tmp507, float* %ref.tmp508, float* %ref.tmp509)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp505, float* %scale.addr, %class.btVector3* %ref.tmp506)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp505)
+  %call511 = call noalias i8* @_Znwm(i32 780)
+  %190 = bitcast i8* %call511 to %class.btHingeConstraint*
+  %m_bodies512 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx513 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies512, i32 0, i32 9
+  %191 = load %class.btRigidBody** %arrayidx513, align 4
+  %m_bodies514 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx515 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies514, i32 0, i32 10
+  %192 = load %class.btRigidBody** %arrayidx515, align 4
+  %call518 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %190, %class.btRigidBody* %191, %class.btRigidBody* %192, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont517 unwind label %lpad516
+
+invoke.cont517:                                   ; preds = %invoke.cont488
+  store %class.btHingeConstraint* %190, %class.btHingeConstraint** %hingeC, align 4
+  %193 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %193, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %194 = load %class.btHingeConstraint** %hingeC, align 4
+  %195 = bitcast %class.btHingeConstraint* %194 to %class.btTypedConstraint*
+  %m_joints519 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx520 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints519, i32 0, i32 9
+  store %class.btTypedConstraint* %195, %class.btTypedConstraint** %arrayidx520, align 4
+  %m_ownerWorld521 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %196 = load %class.btDynamicsWorld** %m_ownerWorld521, align 4
+  %197 = bitcast %class.btDynamicsWorld* %196 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable522 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %197
+  %vfn523 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable522, i64 10
+  %198 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn523
+  %m_joints524 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx525 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints524, i32 0, i32 9
+  %199 = load %class.btTypedConstraint** %arrayidx525, align 4
+  call void %198(%class.btDynamicsWorld* %196, %class.btTypedConstraint* %199, i1 zeroext true)
+  %200 = load %class.RagDoll** %retval
+  ret %class.RagDoll* %200
+
+lpad258:                                          ; preds = %for.end
+  %201 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %202 = extractvalue { i8*, i32 } %201, 0
+  store i8* %202, i8** %exn.slot
+  %203 = extractvalue { i8*, i32 } %201, 1
+  store i32 %203, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call253) nounwind
+  br label %eh.resume
+
+lpad284:                                          ; preds = %invoke.cont259
+  %204 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %205 = extractvalue { i8*, i32 } %204, 0
+  store i8* %205, i8** %exn.slot
+  %206 = extractvalue { i8*, i32 } %204, 1
+  store i32 %206, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call279) nounwind
+  br label %eh.resume
+
+lpad313:                                          ; preds = %invoke.cont285
+  %207 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %208 = extractvalue { i8*, i32 } %207, 0
+  store i8* %208, i8** %exn.slot
+  %209 = extractvalue { i8*, i32 } %207, 1
+  store i32 %209, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call308) nounwind
+  br label %eh.resume
+
+lpad342:                                          ; preds = %invoke.cont314
+  %210 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %211 = extractvalue { i8*, i32 } %210, 0
+  store i8* %211, i8** %exn.slot
+  %212 = extractvalue { i8*, i32 } %210, 1
+  store i32 %212, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call337) nounwind
+  br label %eh.resume
+
+lpad371:                                          ; preds = %invoke.cont343
+  %213 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %214 = extractvalue { i8*, i32 } %213, 0
+  store i8* %214, i8** %exn.slot
+  %215 = extractvalue { i8*, i32 } %213, 1
+  store i32 %215, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call366) nounwind
+  br label %eh.resume
+
+lpad400:                                          ; preds = %invoke.cont372
+  %216 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %217 = extractvalue { i8*, i32 } %216, 0
+  store i8* %217, i8** %exn.slot
+  %218 = extractvalue { i8*, i32 } %216, 1
+  store i32 %218, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call395) nounwind
+  br label %eh.resume
+
+lpad429:                                          ; preds = %invoke.cont401
+  %219 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %220 = extractvalue { i8*, i32 } %219, 0
+  store i8* %220, i8** %exn.slot
+  %221 = extractvalue { i8*, i32 } %219, 1
+  store i32 %221, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call424) nounwind
+  br label %eh.resume
+
+lpad458:                                          ; preds = %invoke.cont430
+  %222 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %223 = extractvalue { i8*, i32 } %222, 0
+  store i8* %223, i8** %exn.slot
+  %224 = extractvalue { i8*, i32 } %222, 1
+  store i32 %224, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call453) nounwind
+  br label %eh.resume
+
+lpad487:                                          ; preds = %invoke.cont459
+  %225 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %226 = extractvalue { i8*, i32 } %225, 0
+  store i8* %226, i8** %exn.slot
+  %227 = extractvalue { i8*, i32 } %225, 1
+  store i32 %227, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call482) nounwind
+  br label %eh.resume
+
+lpad516:                                          ; preds = %invoke.cont488
+  %228 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %229 = extractvalue { i8*, i32 } %228, 0
+  store i8* %229, i8** %exn.slot
+  %230 = extractvalue { i8*, i32 } %228, 1
+  store i32 %230, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call511) nounwind
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad516, %lpad487, %lpad458, %lpad429, %lpad400, %lpad371, %lpad342, %lpad313, %lpad284, %lpad258, %invoke.cont92, %invoke.cont83, %invoke.cont74, %invoke.cont65, %invoke.cont56, %invoke.cont47, %invoke.cont38, %invoke.cont29, %invoke.cont20, %invoke.cont11, %invoke.cont4
+  %exn = load i8** %exn.slot
+  %sel = load i32* %ehselector.slot
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+  %lpad.val526 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+  resume { i8*, i32 } %lpad.val526
+
+terminate.lpad:                                   ; preds = %lpad89, %lpad80, %lpad71, %lpad62, %lpad53, %lpad44, %lpad35, %lpad26, %lpad17, %lpad8, %lpad
+  %231 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_ZmlRKfRK9btVector3(%class.btVector3* noalias sret, float*, %class.btVector3*) inlinehint ssp
+
+declare %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll*, float, %class.btTransform*, %class.btCollisionShape*) ssp align 2
+
+declare void @_ZNK11btTransformmlERKS_(%class.btTransform* noalias sret, %class.btTransform*, %class.btTransform*) inlinehint ssp align 2
+
+declare void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3*, float, float, float) ssp align 2
+
+declare void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody*, float, float)
+
+declare void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject*, float) nounwind ssp align 2
+
+declare void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody*, float, float) nounwind ssp align 2
+
+declare %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint*, %class.btRigidBody*, %class.btRigidBody*, %class.btTransform*, %class.btTransform*, i1 zeroext)
+
+declare void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint*, float, float, float, float, float) ssp align 2
+
+declare %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint*, %class.btRigidBody*, %class.btRigidBody*, %class.btTransform*, %class.btTransform*)
+
+declare void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint*, float, float, float, float, float, float) nounwind ssp align 2
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index d8b51ec..cb4d080 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
@@ -47,3 +47,32 @@ bb2:                                              ; preds = %bb
   tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
   ret i32 0
 }
+
+; PR12389
+; Make sure the DPair register class can spill.
+define void @pr12389(i8* %p) nounwind ssp {
+entry:
+  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %p, i32 1)
+  tail call void asm sideeffect "", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* %p, <4 x float> %vld1, i32 1)
+  ret void
+}
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+; <rdar://problem/11101911>
+; When an strd is expanded into two str instructions, make sure the first str
+; doesn't kill the base register. This can happen if the base register is the
+; same as the data register.
+%class = type { i8*, %class*, i32 }
+define void @f11101911(%class* %this, i32 %num) ssp align 2 {
+entry:
+  %p1 = getelementptr inbounds %class* %this, i32 0, i32 1
+  %p2 = getelementptr inbounds %class* %this, i32 0, i32 2
+  tail call void asm sideeffect "", "~{r1},~{r3},~{r5},~{r11},~{r13}"() nounwind
+  store %class* %this, %class** %p1, align 4
+  store i32 %num, i32* %p2, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/dg.exp b/test/CodeGen/Thumb2/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/Thumb2/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
new file mode 100644
index 0000000..aef6f85
--- /dev/null
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; This test case would clobber the outgoing call arguments by writing to the
+; emergency spill slot at [sp, #4] without adjusting the stack pointer first.
+
+; CHECK: main
+; CHECK: vmov.f64
+; Adjust SP for the large call
+; CHECK: sub sp,
+; CHECK: mov [[FR:r[0-9]+]], sp
+; Store to call frame + #4
+; CHECK: str{{.*\[}}[[FR]], #4]
+; Don't clobber that store until the call.
+; CHECK-NOT: [sp, #4]
+; CHECK: variadic
+
+define i32 @main() ssp {
+entry:
+  %d = alloca double, align 8
+  store double 1.000000e+00, double* %d, align 8
+  %0 = load double* %d, align 8
+  call void (i8*, i8*, i8*, ...)* @variadic(i8* null, i8* null, i8* null, i32 1, double 1.234800e+03, double 2.363450e+03, double %0, i32 1, double 1.234560e+03, double 2.345670e+03, double 4.6334563e+03, double 2.423440e+03, double 4.234330e+03, double 2.965430e+03, i32 1, double 4.669300e+03, double 2.927500e+03, double 4.663100e+03, double 2.921000e+03, double 4.663100e+03, double 2.345100e+03, i32 1, double 3.663100e+03, double 2.905100e+03, double 4.669300e+03, double 2.898600e+03, double 4.676900e+03, double 2.898600e+03, i32 1, double 4.684600e+03, double 2.898600e+03, double 1.234800e+03, double 2.905100e+03, double 1.234800e+03, double 2.345100e+03, i32 1, double 7.719700e+03, double 2.920500e+03, double 4.713500e+03, double 2.927000e+03, double 4.705800e+03, double 2.927000e+03, i32 1, double 8.698200e+03, double 2.927000e+03, double 4.692000e+03, double 2.920500e+03, double 4.692000e+03, double 2.912500e+03, i32 1, double 4.692000e+03, double 2.945600e+03, double 4.698200e+03, double 2.898100e+03, double 4.705800e+03, double 2.898100e+03, i32 1, double 4.713500e+03, double 2.898100e+03, double 4.719700e+03, double 2.945600e+03, double 4.719700e+03, double 2.912500e+03, i32 1, double 4.749200e+03, double 2.920100e+03, double 4.743000e+03, double 2.926600e+03, double 4.735300e+03, double 2.926600e+03, i32 1, double 4.727700e+03, double 2.926600e+03, double 4.721500e+03, double 2.920100e+03, double 4.721500e+03, double 2.912100e+03, i32 1, double 4.721500e+03, double 2.945100e+03, double 4.727700e+03, double 2.897700e+03, double 4.735300e+03, double 2.897700e+03, i32 1, double 4.743000e+03, double 2.897700e+03, double 4.749200e+03, double 2.945100e+03, double 4.749200e+03, double 2.912100e+03, i32 1, double 4.778200e+03, double 2.920100e+03, double 4.772000e+03, double 2.926600e+03, double 4.764300e+03, double 2.926600e+03, i32 1, double 4.756700e+03, double 2.926600e+03, double 4.750500e+03, double 2.920100e+03, double 4.750500e+03, double 2.912100e+03, i32 1, double 4.750500e+03, double 2.945100e+03, double 4.756700e+03, double 2.897700e+03, double 4.764300e+03, double 2.897700e+03, i32 1, double 4.772000e+03, double 2.897700e+03, double 4.778200e+03, double 2.945100e+03, double 4.778200e+03, double 2.912100e+03, i32 1, double 4.801900e+03, double 2.942100e+03, double 4.795700e+03, double 2.948500e+03, double 4.788100e+03, double 2.948500e+03, i32 1, double 4.780500e+03, double 2.948500e+03, double 4.774300e+03, double 2.942100e+03, double 4.774300e+03, double 2.934100e+03, i32 1, double 4.774300e+03, double 2.926100e+03, double 4.780500e+03, double 2.919600e+03, double 4.788100e+03, double 2.919600e+03, i32 1, double 4.795700e+03, double 2.919600e+03, double 4.801900e+03, double 2.926100e+03, double 4.801900e+03, double 2.934100e+03, i32 1, double 4.801500e+03, double 2.972500e+03, double 4.795300e+03, double 2.978900e+03, double 4.787700e+03, double 2.978900e+03, i32 1, double 4.780000e+03, double 2.978900e+03, double 4.773800e+03, double 2.972500e+03, double 4.773800e+03, double 2.964500e+03, i32 1, double 4.773800e+03, double 2.956500e+03, double 4.780000e+03, double 2.950000e+03, double 4.787700e+03, double 2.950000e+03, i32 1, double 4.795300e+03, double 2.950000e+03, double 4.801500e+03, double 2.956500e+03, double 4.801500e+03, double 2.964500e+03, i32 1, double 4.802400e+03, double 3.010200e+03, double 4.796200e+03, double 3.016600e+03, double 4.788500e+03, double 3.016600e+03, i32 1, double 4.780900e+03, double 3.016600e+03, double 4.774700e+03, double 3.010200e+03, double 4.774700e+03, double 3.002200e+03, i32 1, double 4.774700e+03, double 2.994200e+03, double 4.780900e+03, double 2.987700e+03, double 4.788500e+03, double 2.987700e+03, i32 1, double 4.796200e+03, double 2.987700e+03, double 4.802400e+03, double 2.994200e+03, double 4.802400e+03, double 3.002200e+03, i32 1, double 4.802400e+03, double 3.039400e+03, double 4.796200e+03, double 3.455800e+03, double 4.788500e+03, double 3.455800e+03, i32 1, double 4.780900e+03, double 3.455800e+03, double 4.774700e+03, double 3.039400e+03, double 4.774700e+03, double 3.031400e+03, i32 1, double 4.774700e+03, double 3.023400e+03, double 4.780900e+03, double 3.016900e+03, double 4.788500e+03, double 3.016900e+03, i32 1, double 4.796200e+03, double 3.016900e+03, double 4.802400e+03, double 3.023400e+03, double 4.802400e+03, double 3.031400e+03, i32 1, double 4.778600e+03, double 3.063100e+03, double 4.772400e+03, double 3.069600e+03, double 4.764700e+03, double 3.069600e+03, i32 1, double 4.757100e+03, double 3.069600e+03, double 4.750900e+03, double 3.063100e+03, double 4.750900e+03, double 3.055100e+03, i32 1, double 4.750900e+03, double 3.457100e+03, double 4.757100e+03, double 3.450700e+03, double 4.764700e+03, double 3.450700e+03, i32 1, double 4.772400e+03, double 3.450700e+03, double 4.778600e+03, double 3.457100e+03, double 4.778600e+03, double 3.055100e+03, i32 1, double 4.748600e+03, double 3.063600e+03, double 4.742400e+03, double 3.070000e+03, double 4.734700e+03, double 3.070000e+03, i32 1, double 4.727100e+03, double 3.070000e+03, double 4.720900e+03, double 3.063600e+03, double 4.720900e+03, double 3.055600e+03, i32 1, double 4.720900e+03, double 3.457600e+03, double 4.727100e+03, double 3.451100e+03, double 4.734700e+03, double 3.451100e+03, i32 1, double 4.742400e+03, double 3.451100e+03, double 4.748600e+03, double 3.457600e+03, double 4.748600e+03, double 3.055600e+03, i32 1, double 4.719500e+03, double 3.063600e+03, double 4.713300e+03, double 3.070000e+03, double 4.705700e+03, double 3.070000e+03, i32 1, double 4.698000e+03, double 3.070000e+03, double 4.691900e+03, double 3.063600e+03, double 4.691900e+03, double 3.055600e+03, i32 1, double 4.691900e+03, double 3.457600e+03, double 4.698000e+03, double 3.451100e+03, double 4.705700e+03, double 3.451100e+03, i32 1, double 4.713300e+03, double 3.451100e+03, double 4.719500e+03, double 3.457600e+03, double 4.719500e+03, double 3.055600e+03, i32 1, double 4.691300e+03, double 3.064000e+03, double 4.685100e+03, double 3.070500e+03, double 4.677500e+03, double 3.070500e+03, i32 1, double 4.669900e+03, double 3.070500e+03, double 4.663700e+03, double 3.064000e+03, double 4.663700e+03, double 3.056000e+03, i32 1, double 4.663700e+03, double 3.458000e+03, double 4.669900e+03, double 3.451600e+03, double 4.677500e+03, double 3.451600e+03, i32 1, double 4.685100e+03, double 3.451600e+03, double 4.691300e+03, double 3.458000e+03, double 4.691300e+03, double 3.056000e+03, i32 1, double 4.668500e+03, double 3.453000e+03, double 4.662300e+03, double 3.459400e+03, double 4.654700e+03, double 3.459400e+03, i32 1, double 4.647000e+03, double 3.459400e+03, double 4.640900e+03, double 3.453000e+03, double 4.640900e+03, double 3.035000e+03, i32 1, double 4.640900e+03, double 3.027000e+03, double 4.647000e+03, double 3.020500e+03, double 4.654700e+03, double 3.020500e+03, i32 1, double 4.662300e+03, double 3.020500e+03, double 4.668500e+03, double 3.027000e+03, double 4.668500e+03, double 3.035000e+03, i32 1, double 4.668500e+03, double 3.014300e+03, double 4.662300e+03, double 3.020800e+03, double 4.654700e+03, double 3.020800e+03, i32 1, double 4.647000e+03, double 3.020800e+03, double 4.640900e+03, double 3.014300e+03, double 4.640900e+03, double 3.006400e+03, i32 1, double 4.640900e+03, double 2.998400e+03, double 4.647000e+03, double 2.991900e+03, double 4.654700e+03, double 2.991900e+03, i32 1, double 4.662300e+03, double 2.991900e+03, double 4.668500e+03, double 2.998400e+03, double 4.668500e+03, double 3.006400e+03, i32 1, double 4.668100e+03, double 2.941100e+03, double 4.661900e+03, double 2.947600e+03, double 4.654200e+03, double 2.947600e+03, i32 1, double 4.646600e+03, double 2.947600e+03, double 4.640400e+03, double 2.941100e+03, double 4.640400e+03, double 2.933100e+03, i32 1, double 4.640400e+03, double 2.925200e+03, double 4.646600e+03, double 2.918700e+03, double 4.654200e+03, double 2.918700e+03, i32 1, double 4.661900e+03, double 2.918700e+03, double 4.668100e+03, double 2.925200e+03, double 4.668100e+03, double 2.933100e+03, i32 1, double 4.668500e+03, double 2.971600e+03, double 4.662300e+03, double 2.978100e+03, double 4.654700e+03, double 2.978100e+03, i32 1, double 4.647000e+03, double 2.978100e+03, double 4.640900e+03, double 2.971600e+03, double 4.640900e+03, double 2.963600e+03, i32 1, double 4.640900e+03, double 2.955700e+03, double 4.647000e+03, double 2.949200e+03, double 4.654700e+03, double 2.949200e+03, i32 1, double 4.662300e+03, double 2.949200e+03, double 4.668500e+03, double 2.955700e+03, double 4.668500e+03, double 2.963600e+03, i32 2, i32 1, double 4.691300e+03, double 3.056000e+03, i32 2, i32 1, double 4.748600e+03, double 3.055600e+03, i32 2, i32 1, double 4.778200e+03, double 2.912100e+03, i32 2, i32 1, double 4.749200e+03, double 2.912100e+03, i32 2, i32 1, double 4.802400e+03, double 3.031400e+03, i32 2, i32 1, double 4.778600e+03, double 3.055100e+03, i32 2, i32 1, double 4.801500e+03, double 2.964500e+03, i32 2, i32 1, double 4.802400e+03, double 3.002200e+03, i32 2, i32 1, double 4.719700e+03, double 2.912500e+03, i32 2, i32 1, double 4.801900e+03, double 2.934100e+03, i32 2, i32 1, double 4.719500e+03, double 3.055600e+03, i32 2, i32 1, double 4.668500e+03, double 3.006400e+03, i32 2, i32 1, double 4.668500e+03, double 3.035000e+03, i32 2, i32 1, double 4.668100e+03, double 2.933100e+03, i32 2, i32 1, double 4.668500e+03, double 2.963600e+03, i32 2, i32 48)
+  ret i32 0
+}
+
+declare void @variadic(i8*, i8*, i8*, ...)
+
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 4597ba5..36544d1 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
 ; rdar://7352504
 ; Make sure we use "str r9, [sp, #+28]" instead of "sub.w r4, r7, #256" followed by "str r9, [r4, #-32]".
 
@@ -46,10 +46,10 @@ bb119:                                            ; preds = %bb20, %bb20
 
 bb420:                                            ; preds = %bb20, %bb20
 ; CHECK: bb420
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
   store %union.rec* null, %union.rec** @zz_hold, align 4
   store %union.rec* null, %union.rec** @zz_res, align 4
   store %union.rec* %x, %union.rec** @zz_hold, align 4
diff --git a/test/CodeGen/Thumb2/lit.local.cfg b/test/CodeGen/Thumb2/lit.local.cfg
new file mode 100644
index 0000000..cb77b09
--- /dev/null
+++ b/test/CodeGen/Thumb2/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 9ff114e..9aaa821 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -3,11 +3,6 @@
 
 ; This now reduces to a single induction variable.
 
-; TODO: It still gets a GPR shuffle at the end of the loop
-; This is because something in instruction selection has decided
-; that comparing the pre-incremented value with zero is better
-; than comparing the post-incremented value with -4.
-
 @G = external global i32                          ; <i32*> [#uses=2]
 @array = external global i32*                     ; <i32**> [#uses=1]
 
@@ -20,9 +15,9 @@ entry:
 
 bb:                                               ; preds = %bb, %entry
 ; CHECK: LBB0_1:
-; CHECK: cmp [[R2:r[0-9]+]], #0
-; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], [[R2]], #1
-; CHECK: mov [[R2]], [[REGISTER]]
+; CHECK: subs [[R2:r[0-9]+]], #1
+; CHECK: cmp.w [[R2]], #-1
+; CHECK: bne LBB0_1
 
   %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
   %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 46937fc..8285742 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -51,12 +51,11 @@ return:                                           ; preds = %bb, %entry
 define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: mov.w [[R3:r[0-9]+]], #1065353216
-; CHECK: vdup.32 q{{.*}}, [[R3]]
+; CHECK: vmov.f32 q{{.*}}, #1.000000e+00
   br i1 undef, label %bb1, label %bb2
 
 bb1:
-; CHECK-NEXT: %bb1
+; CHECK: %bb1
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
   %tmp1 = shl i32 %indvar, 2
   %gep1 = getelementptr i8* %ptr1, i32 %tmp1
@@ -95,8 +94,8 @@ bb.nph:
 
 bb:                                               ; preds = %bb, %bb.nph
 ; CHECK: bb
-; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK: eor.w
+; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK-NOT: eor
 ; CHECK: and
   %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 0992fa8..893bd0f 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
 ; rdar://7354379
 
-declare double @floor(double) nounwind readnone
+declare double @foo(double) nounwind readnone
 
 define void @t(i32 %c, double %b) {
 entry:
@@ -24,9 +24,8 @@ bb7:                                              ; preds = %bb3
 
 bb9:                                              ; preds = %bb7
 ; CHECK:      cmp	r0, #0
-; CHECK:      cmp	r0, #0
 ; CHECK-NEXT:      cbnz
-  %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  %0 = tail call  double @foo(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11
 
 bb11:                                             ; preds = %bb9, %bb7
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index 00a54a0..f7e9665 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -3,8 +3,8 @@
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
 ; CHECK: clz r
-    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
+    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a, i1 true)
     ret i32 %tmp
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 2c57348..f577f79 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
index 4f2b7c1..b2328e7 100644
--- a/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 | FileCheck %s
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
diff --git a/test/CodeGen/Thumb2/thumb2-ldrd.ll b/test/CodeGen/Thumb2/thumb2-ldrd.ll
index d3b781d..2e83ea1 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrd.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-;CHECK: ldrd r2, r3, [r2]
+; CHECK: ldrd
+; CHECK: umull
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
index 24c45c5..58f9add 100644
--- a/test/CodeGen/Thumb2/thumb2-mls.ll
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -15,5 +15,5 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) {
     ret i32 %tmp2
 }
 ; CHECK: f2:
-; CHECK: 	muls	r0, r0, r1
+; CHECK: 	muls	r0, r1, r0
 
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index bb97d97..ac059bd 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -2,7 +2,7 @@
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
 ; CHECK: f1:
-; CHECK: muls r0, r0, r1
+; CHECK: muls r0, r1, r0
     %tmp = mul i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index ceefabb..74729fd 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -3,8 +3,8 @@
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
 ; CHECK: mvn r0, #-2147483648
-; CHECK: add r0, r1
 ; CHECK: cmp r2, #10
+; CHECK: add r0, r1
 ; CHECK: it  gt
 ; CHECK: movgt r0, r1
         %tmp1 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
deleted file mode 100644
index c62fee1..0000000
--- a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; Linear scan does not currently coalesce any two variables that have
-; overlapping live intervals. When two overlapping intervals have the same
-; value, they can be joined though.
-;
-; RUN: llc < %s -march=x86 -regalloc=linearscan | \
-; RUN:   not grep {mov %\[A-Z\]\\\{2,3\\\}, %\[A-Z\]\\\{2,3\\\}}
-
-define i64 @test(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 4294967297         ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index a871ea1..38bca28 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN:     grep {asm-printer} | grep 34
+; RUN:     grep {asm-printer} | grep 35
 
 target datalayout = "e-p:32:32"
 define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
@@ -30,7 +30,7 @@ cond_true:		; preds = %cond_true, %entry
 	%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp88 = add <4 x i32> %tmp87, %tmp77		; <<4 x i32>> [#uses=2]
 	%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.pcmpgt.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
+	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.psra.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
 	%tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64>		; <<2 x i64>> [#uses=2]
 	%tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 >		; <<2 x i64>> [#uses=1]
 	%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2		; <<2 x i64>> [#uses=1]
@@ -48,4 +48,4 @@ return:		; preds = %cond_true, %entry
 	ret void
 }
 
-declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 6f8b89c..24aa5b9 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -1,5 +1,5 @@
 ; PR1075
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
 
 define float @foo(float %x) nounwind {
     %tmp1 = fmul float %x, 3.000000e+00
diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll
index d1fc70d..7d21b71 100644
--- a/test/CodeGen/X86/2007-05-05-Personality.ll
+++ b/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -10,9 +10,10 @@ entry:
   invoke void @raise()
           to label %eh_then unwind label %unwind
 
-unwind:                                           ; preds = %entry
-  %eh_ptr = tail call i8* @llvm.eh.exception()
-  %eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error)
+unwind:                                           ; preds = %entry 
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*)
+              catch i8* @error
+  %eh_select = extractvalue { i8*, i32 } %eh_ptr, 1
   %eh_typeid = tail call i32 @llvm.eh.typeid.for(i8* @error)
   %tmp2 = icmp eq i32 %eh_select, %eh_typeid
   br i1 %tmp2, label %eh_then, label %Unwind
@@ -21,16 +22,11 @@ eh_then:                                          ; preds = %unwind, %entry
   ret void
 
 Unwind:                                           ; preds = %unwind
-  %0 = tail call i32 (...)* @_Unwind_Resume(i8* %eh_ptr)
-  unreachable
+  resume { i8*, i32 } %eh_ptr
 }
 
 declare void @raise()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gnat_eh_personality(...)
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index f6db0d0..838a0c3 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
+; RUN: llc < %s -march=x86 -mcpu=generic -mattr=+sse2 | not grep lea
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 265d968..2e95082 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
+; RUN: llc < %s -march=x86 -mcpu=generic | grep {(%esp)} | count 2
 ; PR1872
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-01-16-Trampoline.ll b/test/CodeGen/X86/2008-01-16-Trampoline.ll
deleted file mode 100644
index 704b2ba..0000000
--- a/test/CodeGen/X86/2008-01-16-Trampoline.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86
-; RUN: llc < %s -march=x86-64
-
-	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
-
-define fastcc i32 @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets.5146(i64 %table.0.0, i64 %table.0.1, i32 %last, i32 %pos) {
-entry:
-	%tramp22 = call i8* @llvm.init.trampoline( i8* null, i8* bitcast (void (%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets*, i32, i32)* @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177 to i8*), i8* null )		; <i8*> [#uses=0]
-	unreachable
-}
-
-declare void @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177(%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets* nest , i32, i32) nounwind 
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
deleted file mode 100644
index 8f4d353..0000000
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of re-materialization} | grep 2
-; rdar://5761454
-
-	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
-
-define  %struct.quad_struct* @MakeTree(i32 %size, i32 %center_x, i32 %center_y, i32 %lo_proc, i32 %hi_proc, %struct.quad_struct* %parent, i32 %ct, i32 %level) nounwind  {
-entry:
-	br i1 true, label %bb43.i, label %bb.i
-
-bb.i:		; preds = %entry
-	ret %struct.quad_struct* null
-
-bb43.i:		; preds = %entry
-	br i1 true, label %CheckOutside.exit40.i, label %bb11.i38.i
-
-bb11.i38.i:		; preds = %bb43.i
-	ret %struct.quad_struct* null
-
-CheckOutside.exit40.i:		; preds = %bb43.i
-	br i1 true, label %CheckOutside.exit30.i, label %bb11.i28.i
-
-bb11.i28.i:		; preds = %CheckOutside.exit40.i
-	ret %struct.quad_struct* null
-
-CheckOutside.exit30.i:		; preds = %CheckOutside.exit40.i
-	br i1 true, label %CheckOutside.exit20.i, label %bb11.i18.i
-
-bb11.i18.i:		; preds = %CheckOutside.exit30.i
-	ret %struct.quad_struct* null
-
-CheckOutside.exit20.i:		; preds = %CheckOutside.exit30.i
-	br i1 true, label %bb34, label %bb11.i8.i
-
-bb11.i8.i:		; preds = %CheckOutside.exit20.i
-	ret %struct.quad_struct* null
-
-bb34:		; preds = %CheckOutside.exit20.i
-	%tmp15.reg2mem.0 = sdiv i32 %size, 2		; <i32> [#uses=7]
-	%tmp85 = sub i32 %center_y, %tmp15.reg2mem.0		; <i32> [#uses=2]
-	%tmp88 = sub i32 %center_x, %tmp15.reg2mem.0		; <i32> [#uses=2]
-	%tmp92 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp85, i32 0, i32 %hi_proc, %struct.quad_struct* null, i32 2, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	%tmp99 = add i32 0, %hi_proc		; <i32> [#uses=1]
-	%tmp100 = sdiv i32 %tmp99, 2		; <i32> [#uses=1]
-	%tmp110 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp85, i32 0, i32 %tmp100, %struct.quad_struct* null, i32 3, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	%tmp122 = add i32 %tmp15.reg2mem.0, %center_y		; <i32> [#uses=2]
-	%tmp129 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp122, i32 0, i32 0, %struct.quad_struct* null, i32 1, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	%tmp147 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp122, i32 %lo_proc, i32 0, %struct.quad_struct* null, i32 0, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	unreachable
-}
diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
deleted file mode 100644
index 33d658c..0000000
--- a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -regalloc=linearscan | grep movss | count 1
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -regalloc=linearscan -stats |& grep {Number of re-materialization} | grep 1
-
-	%struct..0objc_object = type opaque
-	%struct.OhBoy = type {  }
-	%struct.BooHoo = type { i32 }
-	%struct.objc_selector = type opaque
-@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.OhBoy*, %struct.objc_selector*, i32, %struct.BooHoo*)* @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define void @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]"(%struct.OhBoy* %self, %struct.objc_selector* %_cmd, i32 %delta, %struct.BooHoo* %viewingState) nounwind  {
-entry:
-	%tmp19 = load i32* null, align 4		; <i32> [#uses=1]
-	%tmp24 = tail call float bitcast (void (%struct..0objc_object*, ...)* @objc_msgSend_fpret to float (%struct..0objc_object*, %struct.objc_selector*)*)( %struct..0objc_object* null, %struct.objc_selector* null ) nounwind 		; <float> [#uses=2]
-	%tmp30 = icmp sgt i32 %delta, 0		; <i1> [#uses=1]
-	br i1 %tmp30, label %bb33, label %bb87.preheader
-bb33:		; preds = %entry
-	%tmp28 = fadd float 0.000000e+00, %tmp24		; <float> [#uses=1]
-	%tmp35 = fcmp ogt float %tmp28, 1.800000e+01		; <i1> [#uses=1]
-	br i1 %tmp35, label %bb38, label %bb87.preheader
-bb38:		; preds = %bb33
-	%tmp53 = add i32 %tmp19, %delta		; <i32> [#uses=2]
-	br label %bb43
-bb43:		; preds = %bb38
-	store i32 %tmp53, i32* null, align 4
-	ret void
-bb50:		; preds = %bb38
-	%tmp56 = fsub float 1.800000e+01, %tmp24		; <float> [#uses=1]
-	%tmp57 = fcmp ugt float 0.000000e+00, %tmp56		; <i1> [#uses=1]
-	br i1 %tmp57, label %bb64, label %bb87.preheader
-bb64:		; preds = %bb50
-	ret void
-bb87.preheader:		; preds = %bb50, %bb33, %entry
-	%usableDelta.0 = phi i32 [ %delta, %entry ], [ %delta, %bb33 ], [ %tmp53, %bb50 ]		; <i32> [#uses=1]
-	%tmp100 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* null, %struct.objc_selector* null, %struct..0objc_object* null ) nounwind 		; <%struct..0objc_object*> [#uses=2]
-	%tmp106 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null ) nounwind 		; <%struct..0objc_object*> [#uses=0]
-	%umax = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
-	br label %bb108
-bb108:		; preds = %bb108, %bb87.preheader
-	%attachmentIndex.0.reg2mem.0 = phi i32 [ 0, %bb87.preheader ], [ %indvar.next, %bb108 ]		; <i32> [#uses=2]
-	%tmp114 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null, i32 %attachmentIndex.0.reg2mem.0 ) nounwind 		; <%struct..0objc_object*> [#uses=1]
-	%tmp121 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp114, %struct.objc_selector* null, i32 %usableDelta.0 ) nounwind 		; <%struct..0objc_object*> [#uses=0]
-	%indvar.next = add i32 %attachmentIndex.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb130, label %bb108
-bb130:		; preds = %bb108
-	ret void
-}
-
-declare %struct..0objc_object* @objc_msgSend(%struct..0objc_object*, %struct.objc_selector*, ...)
-
-declare void @objc_msgSend_fpret(%struct..0objc_object*, ...)
diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
index e5dda4a..ac167b0 100644
--- a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -O0 -fast-isel=false -regalloc=linearscan | grep mov | count 5
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false -optimize-regalloc -regalloc=basic | grep mov | count 5
 ; PR2343
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index 0d11546..c068f8a 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -2,8 +2,6 @@
 
 @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
 
-declare i8* @llvm.eh.exception() nounwind 
-
 declare i8* @_Znwm(i32)
 
 declare i8* @__cxa_begin_catch(i8*) nounwind 
diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index 90af387..a6234d3 100644
--- a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 | not grep movsd
 ; RUN: llc < %s -march=x86 | grep movw
 ; RUN: llc < %s -march=x86 | grep addw
-; These transforms are turned off for volatile loads and stores.
+; These transforms are turned off for load volatiles and stores.
 ; Check that they weren't turned off for all loads and stores!
 
 @atomic = global double 0.000000e+00		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 8665282..037559e 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -8,13 +8,13 @@
 
 define i16 @f(i64 %x, double %y) {
 	%b = bitcast i64 %x to double		; <double> [#uses=1]
-	volatile store double %b, double* @atomic ; one processor operation only
-	volatile store double 0.000000e+00, double* @atomic2 ; one processor operation only
+	store volatile double %b, double* @atomic ; one processor operation only
+	store volatile double 0.000000e+00, double* @atomic2 ; one processor operation only
 	%b2 = bitcast double %y to i64		; <i64> [#uses=1]
-	volatile store i64 %b2, i64* @anything ; may transform to store of double
-	%l = volatile load i32* @ioport		; must not narrow
+	store volatile i64 %b2, i64* @anything ; may transform to store of double
+	%l = load volatile i32* @ioport		; must not narrow
 	%t = trunc i32 %l to i16		; <i16> [#uses=1]
-	%l2 = volatile load i32* @ioport		; must not narrow
+	%l2 = load volatile i32* @ioport		; must not narrow
 	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
 	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
 	%f = add i16 %t, %t2		; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
index 101b3c5..f0d46a0 100644
--- a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
+++ b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=core2 | grep pxor | count 2
-; RUN: llc < %s -mcpu=core2 | not grep movapd
+; RUN: llc < %s -mcpu=core2 | grep xorps | count 2
+; RUN: llc < %s -mcpu=core2 | not grep movap
 ; PR2715
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index 2dc1dea..757f1ff 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpcklpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpckhpd
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
 ; originally from PR2687, but things don't work that way any more.
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 511c7b5..6867ae7 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -march=x86 -regalloc=linearscan | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=fast       | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=basic      | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=greedy     | FileCheck %s
diff --git a/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
index 935c4c5..f35245b 100644
--- a/test/CodeGen/X86/2008-09-29-VolatileBug.ll
+++ b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -6,7 +6,7 @@
 
 define i32 @main() nounwind {
 entry:
-	%0 = volatile load i32* @g_407, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32* @g_407, align 4		; <i32> [#uses=1]
 	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
 	%2 = tail call i32 @func_45(i8 zeroext %1) nounwind		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/X86/2008-12-16-BadShift.ll b/test/CodeGen/X86/2008-12-16-BadShift.ll
deleted file mode 100644
index 6c70c5b..0000000
--- a/test/CodeGen/X86/2008-12-16-BadShift.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s | not grep shrl
-; Note: this test is really trying to make sure that the shift
-; returns the right result; shrl is most likely wrong,
-; but if CodeGen starts legitimately using an shrl here,
-; please adjust the test appropriately.
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-@.str = internal constant [6 x i8] c"%lld\0A\00"		; <[6 x i8]*> [#uses=1]
-
-define i64 @mebbe_shift(i32 %xx, i32 %test) nounwind {
-entry:
-	%conv = zext i32 %xx to i64		; <i64> [#uses=1]
-	%tobool = icmp ne i32 %test, 0		; <i1> [#uses=1]
-	%shl = select i1 %tobool, i64 3, i64 0		; <i64> [#uses=1]
-	%x.0 = shl i64 %conv, %shl		; <i64> [#uses=1]
-	ret i64 %x.0
-}
-
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index 75e0b8a..435adbb 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
 ; PR3149
 ; Make sure the copy after inline asm is not coalesced away.
 
diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
index 9d24084..3e42553 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -6,6 +6,6 @@ define void @test(<8 x double>* %P, i64* %Q) nounwind {
 	%B = bitcast <8 x double> %A to i512		; <i512> [#uses=1]
 	%C = lshr i512 %B, 448		; <i512> [#uses=1]
 	%D = trunc i512 %C to i64		; <i64> [#uses=1]
-	volatile store i64 %D, i64* %Q
+	store volatile i64 %D, i64* %Q
 	ret void
 }
diff --git a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
deleted file mode 100644
index a46a20b..0000000
--- a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
-; RUN: grep movss %t | count 2
-; RUN: grep movaps %t | count 2
-; RUN: grep movdqa %t | count 2
-
-define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
-newFuncRoot:
-	%tmp82 = insertelement <4 x float> %wr.2178, float 0.000000e+00, i32 0		; <<4 x float>> [#uses=1]
-	%tmp85 = insertelement <4 x float> %tmp82, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
-	%tmp87 = insertelement <4 x float> %tmp85, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
-	%tmp89 = insertelement <4 x float> %tmp87, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp89, <4 x float>* %tmp89.out
-	ret i1 false
-}
diff --git a/test/CodeGen/X86/2009-03-16-SpillerBug.ll b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
deleted file mode 100644
index 951e191..0000000
--- a/test/CodeGen/X86/2009-03-16-SpillerBug.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=linearscan -stats |& grep virtregrewriter | not grep {stores unfolded}
-; rdar://6682365
-
-; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
-
-	%struct.CAST_KEY = type { [32 x i32], i32 }
-@CAST_S_table0 = constant [2 x i32] [i32 821772500, i32 -1616838901], align 32		; <[2 x i32]*> [#uses=0]
-@CAST_S_table4 = constant [2 x i32] [i32 2127105028, i32 745436345], align 32		; <[2 x i32]*> [#uses=6]
-@CAST_S_table5 = constant [2 x i32] [i32 -151351395, i32 749497569], align 32		; <[2 x i32]*> [#uses=5]
-@CAST_S_table6 = constant [2 x i32] [i32 -2048901095, i32 858518887], align 32		; <[2 x i32]*> [#uses=4]
-@CAST_S_table7 = constant [2 x i32] [i32 -501862387, i32 -1143078916], align 32		; <[2 x i32]*> [#uses=5]
-@CAST_S_table1 = constant [2 x i32] [i32 522195092, i32 -284448933], align 32		; <[2 x i32]*> [#uses=0]
-@CAST_S_table2 = constant [2 x i32] [i32 -1913667008, i32 637164959], align 32		; <[2 x i32]*> [#uses=0]
-@CAST_S_table3 = constant [2 x i32] [i32 -1649212384, i32 532081118], align 32		; <[2 x i32]*> [#uses=0]
-
-define void @CAST_set_key(%struct.CAST_KEY* nocapture %key, i32 %len, i8* nocapture %data) nounwind ssp {
-bb1.thread:
-	%0 = getelementptr [16 x i32]* null, i32 0, i32 5		; <i32*> [#uses=1]
-	%1 = getelementptr [16 x i32]* null, i32 0, i32 8		; <i32*> [#uses=1]
-	%2 = load i32* null, align 4		; <i32> [#uses=1]
-	%3 = shl i32 %2, 24		; <i32> [#uses=1]
-	%4 = load i32* null, align 4		; <i32> [#uses=1]
-	%5 = shl i32 %4, 16		; <i32> [#uses=1]
-	%6 = load i32* null, align 4		; <i32> [#uses=1]
-	%7 = or i32 %5, %3		; <i32> [#uses=1]
-	%8 = or i32 %7, %6		; <i32> [#uses=1]
-	%9 = or i32 %8, 0		; <i32> [#uses=1]
-	%10 = load i32* null, align 4		; <i32> [#uses=1]
-	%11 = shl i32 %10, 24		; <i32> [#uses=1]
-	%12 = load i32* %0, align 4		; <i32> [#uses=1]
-	%13 = shl i32 %12, 16		; <i32> [#uses=1]
-	%14 = load i32* null, align 4		; <i32> [#uses=1]
-	%15 = or i32 %13, %11		; <i32> [#uses=1]
-	%16 = or i32 %15, %14		; <i32> [#uses=1]
-	%17 = or i32 %16, 0		; <i32> [#uses=1]
-	br label %bb11
-
-bb11:		; preds = %bb11, %bb1.thread
-	%18 = phi i32 [ %110, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
-	%19 = phi i32 [ %112, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
-	%20 = phi i32 [ 0, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
-	%21 = phi i32 [ %113, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
-	%X.0.0 = phi i32 [ %9, %bb1.thread ], [ %92, %bb11 ]		; <i32> [#uses=0]
-	%X.1.0 = phi i32 [ %17, %bb1.thread ], [ 0, %bb11 ]		; <i32> [#uses=0]
-	%22 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %21		; <i32*> [#uses=0]
-	%23 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %18		; <i32*> [#uses=0]
-	%24 = load i32* null, align 4		; <i32> [#uses=1]
-	%25 = xor i32 0, %24		; <i32> [#uses=1]
-	%26 = xor i32 %25, 0		; <i32> [#uses=1]
-	%27 = xor i32 %26, 0		; <i32> [#uses=4]
-	%28 = and i32 %27, 255		; <i32> [#uses=2]
-	%29 = lshr i32 %27, 8		; <i32> [#uses=1]
-	%30 = and i32 %29, 255		; <i32> [#uses=2]
-	%31 = lshr i32 %27, 16		; <i32> [#uses=1]
-	%32 = and i32 %31, 255		; <i32> [#uses=1]
-	%33 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %28		; <i32*> [#uses=1]
-	%34 = load i32* %33, align 4		; <i32> [#uses=2]
-	%35 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %30		; <i32*> [#uses=1]
-	%36 = load i32* %35, align 4		; <i32> [#uses=2]
-	%37 = xor i32 %34, 0		; <i32> [#uses=1]
-	%38 = xor i32 %37, %36		; <i32> [#uses=1]
-	%39 = xor i32 %38, 0		; <i32> [#uses=1]
-	%40 = xor i32 %39, 0		; <i32> [#uses=1]
-	%41 = xor i32 %40, 0		; <i32> [#uses=3]
-	%42 = lshr i32 %41, 8		; <i32> [#uses=1]
-	%43 = and i32 %42, 255		; <i32> [#uses=2]
-	%44 = lshr i32 %41, 16		; <i32> [#uses=1]
-	%45 = and i32 %44, 255		; <i32> [#uses=1]
-	%46 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %43		; <i32*> [#uses=1]
-	%47 = load i32* %46, align 4		; <i32> [#uses=1]
-	%48 = load i32* null, align 4		; <i32> [#uses=1]
-	%49 = xor i32 %47, 0		; <i32> [#uses=1]
-	%50 = xor i32 %49, %48		; <i32> [#uses=1]
-	%51 = xor i32 %50, 0		; <i32> [#uses=1]
-	%52 = xor i32 %51, 0		; <i32> [#uses=1]
-	%53 = xor i32 %52, 0		; <i32> [#uses=2]
-	%54 = and i32 %53, 255		; <i32> [#uses=1]
-	%55 = lshr i32 %53, 24		; <i32> [#uses=1]
-	%56 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %55		; <i32*> [#uses=1]
-	%57 = load i32* %56, align 4		; <i32> [#uses=1]
-	%58 = xor i32 0, %57		; <i32> [#uses=1]
-	%59 = xor i32 %58, 0		; <i32> [#uses=1]
-	%60 = xor i32 %59, 0		; <i32> [#uses=1]
-	store i32 %60, i32* null, align 4
-	%61 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
-	%62 = load i32* %61, align 4		; <i32> [#uses=1]
-	%63 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %54		; <i32*> [#uses=1]
-	%64 = load i32* %63, align 4		; <i32> [#uses=1]
-	%65 = xor i32 0, %64		; <i32> [#uses=1]
-	%66 = xor i32 %65, 0		; <i32> [#uses=1]
-	store i32 %66, i32* null, align 4
-	%67 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %45		; <i32*> [#uses=1]
-	%68 = load i32* %67, align 4		; <i32> [#uses=1]
-	%69 = xor i32 %36, %34		; <i32> [#uses=1]
-	%70 = xor i32 %69, 0		; <i32> [#uses=1]
-	%71 = xor i32 %70, %68		; <i32> [#uses=1]
-	%72 = xor i32 %71, 0		; <i32> [#uses=1]
-	store i32 %72, i32* null, align 4
-	%73 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %32		; <i32*> [#uses=1]
-	%74 = load i32* %73, align 4		; <i32> [#uses=2]
-	%75 = load i32* null, align 4		; <i32> [#uses=1]
-	%76 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %43		; <i32*> [#uses=1]
-	%77 = load i32* %76, align 4		; <i32> [#uses=1]
-	%78 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 0		; <i32*> [#uses=1]
-	%79 = load i32* %78, align 4		; <i32> [#uses=1]
-	%80 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %30		; <i32*> [#uses=1]
-	%81 = load i32* %80, align 4		; <i32> [#uses=2]
-	%82 = xor i32 %75, %74		; <i32> [#uses=1]
-	%83 = xor i32 %82, %77		; <i32> [#uses=1]
-	%84 = xor i32 %83, %79		; <i32> [#uses=1]
-	%85 = xor i32 %84, %81		; <i32> [#uses=1]
-	store i32 %85, i32* null, align 4
-	%86 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %28		; <i32*> [#uses=1]
-	%87 = load i32* %86, align 4		; <i32> [#uses=1]
-	%88 = xor i32 %74, %41		; <i32> [#uses=1]
-	%89 = xor i32 %88, %87		; <i32> [#uses=1]
-	%90 = xor i32 %89, 0		; <i32> [#uses=1]
-	%91 = xor i32 %90, %81		; <i32> [#uses=1]
-	%92 = xor i32 %91, 0		; <i32> [#uses=3]
-	%93 = lshr i32 %92, 16		; <i32> [#uses=1]
-	%94 = and i32 %93, 255		; <i32> [#uses=1]
-	store i32 %94, i32* null, align 4
-	%95 = lshr i32 %92, 24		; <i32> [#uses=2]
-	%96 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %95		; <i32*> [#uses=1]
-	%97 = load i32* %96, align 4		; <i32> [#uses=1]
-	%98 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=1]
-	%99 = load i32* %98, align 4		; <i32> [#uses=1]
-	%100 = load i32* null, align 4		; <i32> [#uses=0]
-	%101 = xor i32 %97, 0		; <i32> [#uses=1]
-	%102 = xor i32 %101, %99		; <i32> [#uses=1]
-	%103 = xor i32 %102, 0		; <i32> [#uses=1]
-	%104 = xor i32 %103, 0		; <i32> [#uses=0]
-	store i32 0, i32* null, align 4
-	%105 = xor i32 0, %27		; <i32> [#uses=1]
-	%106 = xor i32 %105, 0		; <i32> [#uses=1]
-	%107 = xor i32 %106, 0		; <i32> [#uses=1]
-	%108 = xor i32 %107, 0		; <i32> [#uses=1]
-	%109 = xor i32 %108, %62		; <i32> [#uses=3]
-	%110 = and i32 %109, 255		; <i32> [#uses=1]
-	%111 = lshr i32 %109, 16		; <i32> [#uses=1]
-	%112 = and i32 %111, 255		; <i32> [#uses=1]
-	%113 = lshr i32 %109, 24		; <i32> [#uses=3]
-	store i32 %113, i32* %1, align 4
-	%114 = load i32* null, align 4		; <i32> [#uses=1]
-	%115 = xor i32 0, %114		; <i32> [#uses=1]
-	%116 = xor i32 %115, 0		; <i32> [#uses=1]
-	%117 = xor i32 %116, 0		; <i32> [#uses=1]
-	%K.0.sum42 = or i32 0, 12		; <i32> [#uses=1]
-	%118 = getelementptr [32 x i32]* null, i32 0, i32 %K.0.sum42		; <i32*> [#uses=1]
-	store i32 %117, i32* %118, align 4
-	%119 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=0]
-	store i32 0, i32* null, align 4
-	%120 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %113		; <i32*> [#uses=1]
-	%121 = load i32* %120, align 4		; <i32> [#uses=1]
-	%122 = xor i32 0, %121		; <i32> [#uses=1]
-	store i32 %122, i32* null, align 4
-	%123 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
-	%124 = load i32* %123, align 4		; <i32> [#uses=1]
-	%125 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %95		; <i32*> [#uses=1]
-	%126 = load i32* %125, align 4		; <i32> [#uses=1]
-	%127 = xor i32 0, %124		; <i32> [#uses=1]
-	%128 = xor i32 %127, 0		; <i32> [#uses=1]
-	%129 = xor i32 %128, %126		; <i32> [#uses=1]
-	%130 = xor i32 %129, 0		; <i32> [#uses=1]
-	store i32 %130, i32* null, align 4
-	br label %bb11
-}
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index 90dabb8..8bbdb0e 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -9,30 +9,30 @@
 @X = external global i64		; <i64*> [#uses=25]
 
 define fastcc i64 @foo() nounwind {
-	%tmp = volatile load i64* @X		; <i64> [#uses=7]
-	%tmp1 = volatile load i64* @X		; <i64> [#uses=5]
-	%tmp2 = volatile load i64* @X		; <i64> [#uses=3]
-	%tmp3 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp4 = volatile load i64* @X		; <i64> [#uses=5]
-	%tmp5 = volatile load i64* @X		; <i64> [#uses=3]
-	%tmp6 = volatile load i64* @X		; <i64> [#uses=2]
-	%tmp7 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp8 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp9 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp10 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp11 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp12 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp13 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp14 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp15 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp16 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp17 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp18 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp19 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp20 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp21 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp22 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp23 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp = load volatile i64* @X		; <i64> [#uses=7]
+	%tmp1 = load volatile i64* @X		; <i64> [#uses=5]
+	%tmp2 = load volatile i64* @X		; <i64> [#uses=3]
+	%tmp3 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp4 = load volatile i64* @X		; <i64> [#uses=5]
+	%tmp5 = load volatile i64* @X		; <i64> [#uses=3]
+	%tmp6 = load volatile i64* @X		; <i64> [#uses=2]
+	%tmp7 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp8 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp9 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp10 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp11 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp12 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp13 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp14 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp15 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp16 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp17 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp18 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp19 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp20 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp21 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp22 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp23 = load volatile i64* @X		; <i64> [#uses=1]
 	%tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8)		; <i64> [#uses=1]
 	%tmp25 = add i64 %tmp6, %tmp5		; <i64> [#uses=1]
 	%tmp26 = add i64 %tmp25, %tmp4		; <i64> [#uses=1]
@@ -229,7 +229,7 @@ define fastcc i64 @foo() nounwind {
 	%tmp217 = add i64 %tmp205, %tmp215		; <i64> [#uses=1]
 	%tmp218 = add i64 %tmp217, %tmp211		; <i64> [#uses=1]
 	%tmp219 = call i64 @llvm.bswap.i64(i64 %tmp23)		; <i64> [#uses=2]
-	volatile store i64 %tmp219, i64* @X, align 8
+	store volatile i64 %tmp219, i64* @X, align 8
 	%tmp220 = add i64 %tmp203, %tmp190		; <i64> [#uses=1]
 	%tmp221 = add i64 %tmp220, %tmp216		; <i64> [#uses=1]
 	%tmp222 = add i64 %tmp219, %tmp177		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 620e0f3..9f5a8c5 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,11 +1,10 @@
 ; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=linearscan < %s | \
+; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
 ; RUN:   FileCheck %s
 ; rdar://6808032
 
 ; CHECK: pextrw $14
 ; CHECK-NEXT: shrl $8
-; CHECK-NEXT: (%ebp)
 ; CHECK-NEXT: pinsrw
 
 define void @update(i8** %args_list) nounwind {
diff --git a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
index a5e28c0..c2cd89c 100644
--- a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
+++ b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
@@ -12,7 +12,7 @@ entry:
 	br label %bb
 
 bb:		; preds = %bb.i, %bb, %entry
-	%2 = volatile load i32* @g_9, align 4		; <i32> [#uses=2]
+	%2 = load volatile i32* @g_9, align 4		; <i32> [#uses=2]
 	%3 = icmp sgt i32 %2, 1		; <i1> [#uses=1]
 	%4 = and i1 %3, %1		; <i1> [#uses=1]
 	br i1 %4, label %bb.i, label %bb
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 12bd285..1259cf4 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
 ; CHECK: subq    $40, %rsp
 ; CHECK: movaps  %xmm8, (%rsp)
 ; CHECK: movaps  %xmm7, 16(%rsp)
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index 5c51480..5f5d5cc 100644
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
 ; RUN: grep movzwl %t1 | count 2
-; RUN: grep movzbl %t1 | count 2
+; RUN: grep movzbl %t1 | count 1
 ; RUN: grep movd %t1 | count 4
 
 define <4 x i16> @a(i32* %x1) nounwind {
diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
index 07ef53e..66caedf 100644
--- a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
+++ b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep movl | count 2
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 -mattr=+mmx | grep movd | count 2
 
 define i64 @a(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-07-17-StackColoringBug.ll b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
deleted file mode 100644
index 3e5bd34..0000000
--- a/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s -mtriple=i386-pc-linux-gnu -disable-fp-elim -color-ss-with-regs | not grep dil
-; PR4552
-
-target triple = "i386-pc-linux-gnu"
-@g_8 = internal global i32 0		; <i32*> [#uses=1]
-@g_72 = internal global i32 0		; <i32*> [#uses=1]
-@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8, i8)* @uint84 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define i32 @uint84(i32 %p_15, i8 signext %p_17, i8 signext %p_19) nounwind {
-entry:
-	%g_72.promoted = load i32* @g_72		; <i32> [#uses=1]
-	%g_8.promoted = load i32* @g_8		; <i32> [#uses=1]
-	br label %bb
-
-bb:		; preds = %func_40.exit, %entry
-	%g_8.tmp.1 = phi i32 [ %g_8.promoted, %entry ], [ %g_8.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
-	%g_72.tmp.1 = phi i32 [ %g_72.promoted, %entry ], [ %g_72.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
-	%retval12.i4.i.i = trunc i32 %g_8.tmp.1 to i8		; <i8> [#uses=2]
-	%0 = trunc i32 %g_72.tmp.1 to i8		; <i8> [#uses=2]
-	%1 = mul i8 %retval12.i4.i.i, %0		; <i8> [#uses=1]
-	%2 = icmp eq i8 %1, 0		; <i1> [#uses=1]
-	br i1 %2, label %bb2.i.i, label %bb.i.i
-
-bb.i.i:		; preds = %bb
-	%3 = sext i8 %0 to i32		; <i32> [#uses=1]
-	%4 = and i32 %3, 50295		; <i32> [#uses=1]
-	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
-	br i1 %5, label %bb2.i.i, label %func_55.exit.i
-
-bb2.i.i:		; preds = %bb.i.i, %bb
-	br label %func_55.exit.i
-
-func_55.exit.i:		; preds = %bb2.i.i, %bb.i.i
-	%g_72.tmp.2 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
-	%6 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
-	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
-	%8 = mul i8 %7, %retval12.i4.i.i		; <i8> [#uses=1]
-	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
-	br i1 %9, label %bb2.i4.i, label %bb.i3.i
-
-bb.i3.i:		; preds = %func_55.exit.i
-	%10 = sext i8 %7 to i32		; <i32> [#uses=1]
-	%11 = and i32 %10, 50295		; <i32> [#uses=1]
-	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
-	br i1 %12, label %bb2.i4.i, label %func_40.exit
-
-bb2.i4.i:		; preds = %bb.i3.i, %func_55.exit.i
-	br label %func_40.exit
-
-func_40.exit:		; preds = %bb2.i4.i, %bb.i3.i
-	%g_72.tmp.0 = phi i32 [ 1, %bb2.i4.i ], [ %g_72.tmp.2, %bb.i3.i ]		; <i32> [#uses=1]
-	%phitmp = icmp sgt i32 %g_8.tmp.1, 0		; <i1> [#uses=1]
-	%g_8.tmp.0 = select i1 %phitmp, i32 %g_8.tmp.1, i32 1		; <i32> [#uses=1]
-	br label %bb
-}
diff --git a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
index 790fd88..410a42a 100644
--- a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
+++ b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -41,18 +41,18 @@ bb3:                                              ; preds = %bb2, %bb
   br i1 undef, label %bb5, label %bb4
 
 bb4:                                              ; preds = %bb3
-  %17 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %17 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   br label %bb5
 
 bb5:                                              ; preds = %bb4, %bb3
-  %18 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %18 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
   %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
   br i1 undef, label %return, label %bb6.preheader
 
 bb6.preheader:                                    ; preds = %bb5
   %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
-  %22 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %22 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
   unreachable
 
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index f6ac2ba..d4a74c9 100644
--- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
 ; Check that lowered argumens do not overwrite the return address before it is moved.
 ; Bug 6225
 ;
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
index 69787c7..5372bc5 100644
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -1,32 +1,35 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
 ; There are no MMX operations here, so we use XMM or i64.
 
+; CHECK: ti8
 define void @ti8(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <8 x i8>
         %tmp2 = bitcast double %b to <8 x i8>
         %tmp3 = add <8 x i8> %tmp1, %tmp2
-; CHECK:  paddb %xmm1, %xmm0
+; CHECK:  paddw
         store <8 x i8> %tmp3, <8 x i8>* null
         ret void
 }
 
+; CHECK: ti16
 define void @ti16(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <4 x i16>
         %tmp2 = bitcast double %b to <4 x i16>
         %tmp3 = add <4 x i16> %tmp1, %tmp2
-; CHECK:  paddw %xmm1, %xmm0
+; CHECK:  paddd
         store <4 x i16> %tmp3, <4 x i16>* null
         ret void
 }
 
+; CHECK: ti32
 define void @ti32(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <2 x i32>
         %tmp2 = bitcast double %b to <2 x i32>
         %tmp3 = add <2 x i32> %tmp1, %tmp2
-; CHECK:  paddd %xmm1, %xmm0
+; CHECK:  paddq
         store <2 x i32> %tmp3, <2 x i32>* null
         ret void
 }
@@ -55,6 +58,7 @@ entry:
         ret void
 }
 
+; CHECK: ti16a
 define void @ti16a(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to x86_mmx
@@ -66,6 +70,7 @@ entry:
         ret void
 }
 
+; CHECK: ti32a
 define void @ti32a(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to x86_mmx
@@ -77,6 +82,7 @@ entry:
         ret void
 }
 
+; CHECK: ti64a
 define void @ti64a(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to x86_mmx
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
index 7af58dc..cbf5502 100644
--- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -30,14 +30,16 @@ invoke.cont:                                      ; preds = %entry
   br label %finally
 
 terminate.handler:                                ; preds = %match.end
-  %exc = call i8* @llvm.eh.exception()            ; <i8*> [#uses=1]
-  %1 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1) ; <i32> [#uses=0]
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           cleanup
   call void @_ZSt9terminatev() noreturn nounwind
   unreachable
 
 try.handler:                                      ; preds = %entry
-  %exc1 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=3]
-  %selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc1, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*), i8* null) ; <i32> [#uses=1]
+  %exc1.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           catch i8* null
+  %exc1 = extractvalue { i8*, i32 } %exc1.ptr, 0
+  %selector = extractvalue { i8*, i32 } %exc1.ptr, 1
   %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) ; <i32> [#uses=1]
   %3 = icmp eq i32 %selector, %2                  ; <i1> [#uses=1]
   br i1 %3, label %match, label %catch.next
@@ -55,9 +57,10 @@ invoke.cont2:                                     ; preds = %match
   br label %match.end
 
 match.handler:                                    ; preds = %match
-  %exc3 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=2]
-  %7 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc3, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0) ; <i32> [#uses=0]
-  store i8* %exc3, i8** %_rethrow
+  %exc3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           cleanup
+  %7 = extractvalue { i8*, i32 } %exc3, 0
+  store i8* %7, i8** %_rethrow
   store i32 2, i32* %cleanup.dst
   br label %match.end
 
@@ -124,10 +127,6 @@ declare void @_Z6throwsv() ssp
 
 declare i32 @__gxx_personality_v0(...)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_ZSt9terminatev()
 
 declare void @_Unwind_Resume_or_Rethrow(i8*)
diff --git a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
index 5accfd7..e0c2c6c 100644
--- a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
+++ b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -mcpu=generic | FileCheck %s
 ; PR6941
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll b/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
deleted file mode 100644
index 2ba12df..0000000
--- a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
+++ /dev/null
@@ -1,108 +0,0 @@
-; RUN: llc -O2 -mtriple=i386-apple-darwin <%s | FileCheck %s
-; Use DW_FORM_addr for DW_AT_entry_pc.
-; Radar 8094785
-
-; CHECK:	.byte	17                      ## DW_TAG_compile_unit
-; CHECK-NEXT:	.byte	1                       ## DW_CHILDREN_yes
-; CHECK-NEXT:	.byte	37                      ## DW_AT_producer
-; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-; CHECK-NEXT:	.byte	19                      ## DW_AT_language
-; CHECK-NEXT:	.byte	5                       ## DW_FORM_data2
-; CHECK-NEXT:	.byte	3                       ## DW_AT_name
-; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-; CHECK-NEXT:	.byte	82                      ## DW_AT_entry_pc
-; CHECK-NEXT:	.byte	1                       ## DW_FORM_addr
-; CHECK-NEXT:	.byte	16                      ## DW_AT_stmt_list
-; CHECK-NEXT:	.byte	6                       ## DW_FORM_data4
-; CHECK-NEXT:	.byte	27                      ## DW_AT_comp_dir
-; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-; CHECK-NEXT:	.byte	225                     ## DW_AT_APPLE_optimized
-
-%struct.a = type { i32, %struct.a* }
-
-@ret = common global i32 0                        ; <i32*> [#uses=2]
-
-define void @foo(i32 %x) nounwind noinline ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !21), !dbg !28
-  store i32 %x, i32* @ret, align 4, !dbg !29
-  ret void, !dbg !31
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-define i8* @bar(%struct.a* %b) nounwind noinline ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !22), !dbg !32
-  %0 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !33 ; <i32*> [#uses=1]
-  %1 = load i32* %0, align 8, !dbg !33            ; <i32> [#uses=1]
-  tail call void @foo(i32 %1) nounwind noinline ssp, !dbg !33
-  %2 = bitcast %struct.a* %b to i8*, !dbg !35     ; <i8*> [#uses=1]
-  ret i8* %2, !dbg !35
-}
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
-entry:
-  %e = alloca %struct.a, align 8                  ; <%struct.a*> [#uses=4]
-  call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !23), !dbg !36
-  call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !24), !dbg !36
-  call void @llvm.dbg.declare(metadata !{%struct.a* %e}, metadata !25), !dbg !37
-  %0 = getelementptr inbounds %struct.a* %e, i64 0, i32 0, !dbg !38 ; <i32*> [#uses=1]
-  store i32 4, i32* %0, align 8, !dbg !38
-  %1 = getelementptr inbounds %struct.a* %e, i64 0, i32 1, !dbg !39 ; <%struct.a**> [#uses=1]
-  store %struct.a* %e, %struct.a** %1, align 8, !dbg !39
-  %2 = call i8* @bar(%struct.a* %e) nounwind noinline ssp, !dbg !40 ; <i8*> [#uses=0]
-  %3 = load i32* @ret, align 4, !dbg !41          ; <i32> [#uses=1]
-  ret i32 %3, !dbg !41
-}
-
-!llvm.dbg.sp = !{!0, !6, !15}
-!llvm.dbg.lv.foo = !{!21}
-!llvm.dbg.lv.bar = !{!22}
-!llvm.dbg.lv.main = !{!23, !24, !25}
-!llvm.dbg.gv = !{!27}
-
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 34, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 38, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 23, i64 128, i64 64, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_structure_type ]
-!12 = metadata !{metadata !13, metadata !14}
-!13 = metadata !{i32 524301, metadata !11, metadata !"c", metadata !1, i32 24, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!14 = metadata !{i32 524301, metadata !11, metadata !"d", metadata !1, i32 25, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
-!15 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 43, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!17 = metadata !{metadata !5, metadata !5, metadata !18}
-!18 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !19} ; [ DW_TAG_pointer_type ]
-!19 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!21 = metadata !{i32 524545, metadata !0, metadata !"x", metadata !1, i32 33, metadata !5} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 524545, metadata !6, metadata !"b", metadata !1, i32 38, metadata !10} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 524545, metadata !15, metadata !"argc", metadata !1, i32 43, metadata !5} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 524545, metadata !15, metadata !"argv", metadata !1, i32 43, metadata !18} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 524544, metadata !26, metadata !"e", metadata !1, i32 44, metadata !11} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 524299, metadata !15, i32 43, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 28, metadata !5, i1 false, i1 true, i32* @ret} ; [ DW_TAG_variable ]
-!28 = metadata !{i32 33, i32 0, metadata !0, null}
-!29 = metadata !{i32 35, i32 0, metadata !30, null}
-!30 = metadata !{i32 524299, metadata !0, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
-!31 = metadata !{i32 36, i32 0, metadata !30, null}
-!32 = metadata !{i32 38, i32 0, metadata !6, null}
-!33 = metadata !{i32 39, i32 0, metadata !34, null}
-!34 = metadata !{i32 524299, metadata !6, i32 38, i32 0} ; [ DW_TAG_lexical_block ]
-!35 = metadata !{i32 40, i32 0, metadata !34, null}
-!36 = metadata !{i32 43, i32 0, metadata !15, null}
-!37 = metadata !{i32 44, i32 0, metadata !26, null}
-!38 = metadata !{i32 45, i32 0, metadata !26, null}
-!39 = metadata !{i32 46, i32 0, metadata !26, null}
-!40 = metadata !{i32 48, i32 0, metadata !26, null}
-!41 = metadata !{i32 49, i32 0, metadata !26, null}
diff --git a/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/test/CodeGen/X86/2010-08-04-MingWCrash.ll
index 98a0887..61f527b 100644
--- a/test/CodeGen/X86/2010-08-04-MingWCrash.ll
+++ b/test/CodeGen/X86/2010-08-04-MingWCrash.ll
@@ -10,14 +10,15 @@ bb1:
   ret void
 
 lpad:
-  %exn = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1, i8* null) nounwind
+  %exn.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           catch i8* null
+  %exn = extractvalue { i8*, i32 } %exn.ptr, 0
+  %eh.selector = extractvalue { i8*, i32 } %exn.ptr, 1
   %ehspec.fails = icmp slt i32 %eh.selector, 0
   br i1 %ehspec.fails, label %ehspec.unexpected, label %cleanup
 
 cleanup:
-  tail call void @_Unwind_Resume_or_Rethrow(i8* %exn) noreturn nounwind
-  unreachable
+  resume { i8*, i32 } %exn.ptr
 
 ehspec.unexpected:
   tail call void @__cxa_call_unexpected(i8* %exn) noreturn nounwind
@@ -26,12 +27,8 @@ ehspec.unexpected:
 
 declare noalias i8* @malloc()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_Unwind_Resume_or_Rethrow(i8*)
 
 declare void @__cxa_call_unexpected(i8*)
diff --git a/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
index d98ef14..b3cc35d 100644
--- a/test/CodeGen/X86/2010-08-10-DbgConstant.ll
+++ b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
@@ -1,6 +1,6 @@
-; RUN: llc  -march=x86 -O0 < %s | FileCheck %s
+; RUN: llc  -mtriple=i686-linux -O0 < %s | FileCheck %s
 ; CHECK: DW_TAG_constant
-; CHECK-NEXT: ascii	 "ro"                   #{{#?}} DW_AT_name
+; CHECK-NEXT: .long .Lstring3 #{{#?}} DW_AT_name
 
 define void @foo() nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 7f13411..166dcf2 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -4,8 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 ; Check debug info for variable z_s
-;CHECK:       .ascii   "z_s"                  ## DW_AT_name
-;CHECK-NEXT:  .byte   0
+;CHECK: .long Lset13
 ;CHECK-NEXT:  ## DW_AT_decl_file
 ;CHECK-NEXT:  ## DW_AT_decl_line
 ;CHECK-NEXT:  ## DW_AT_type
diff --git a/test/CodeGen/X86/2011-08-29-InitOrder.ll b/test/CodeGen/X86/2011-08-29-InitOrder.ll
index 72c79d2..4d5f8d7 100644
--- a/test/CodeGen/X86/2011-08-29-InitOrder.ll
+++ b/test/CodeGen/X86/2011-08-29-InitOrder.ll
@@ -3,22 +3,28 @@
 ; PR5329
 
 @llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }]
-; CHECK-DEFAULT: construct_3
-; CHECK-DEFAULT: construct_2
-; CHECK-DEFAULT: construct_1
+; CHECK-DEFAULT  .section        .ctors.64535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_1
+; CHECK-DEFAULT: .section        .ctors.63535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_2
+; CHECK-DEFAULT: .section        .ctors.62535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_3
 
-; CHECK-DARWIN: construct_1
-; CHECK-DARWIN: construct_2
-; CHECK-DARWIN: construct_3
+; CHECK-DARWIN: .long _construct_1
+; CHECK-DARWIN-NEXT: .long _construct_2
+; CHECK-DARWIN-NEXT: .long _construct_3
 
 @llvm.global_dtors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @destruct_2 }, { i32, void ()* } { i32 1000, void ()* @destruct_1 }, { i32, void ()* } { i32 3000, void ()* @destruct_3 }]
-; CHECK-DEFAULT: destruct_3
-; CHECK-DEFAULT: destruct_2
-; CHECK-DEFAULT: destruct_1
+; CHECK-DEFAULT: .section        .dtors.64535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_1
+; CHECK-DEFAULT: .section        .dtors.63535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_2
+; CHECK-DEFAULT: .section        .dtors.62535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_3
 
-; CHECK-DARWIN: destruct_1
-; CHECK-DARWIN: destruct_2
-; CHECK-DARWIN: destruct_3
+; CHECK-DARWIN:      .long _destruct_1
+; CHECK-DARWIN-NEXT: .long _destruct_2
+; CHECK-DARWIN-NEXT: .long _destruct_3
 
 declare void @construct_1()
 declare void @construct_2()
diff --git a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
new file mode 100644
index 0000000..8c09d97
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=x86 -fast-isel -mattr=+sse < %s | FileCheck %s
+; <rdar://problem/10215997>
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7"
+
+define void @vectortest() nounwind ssp {
+entry:
+  %p1 = alloca <4 x float>, align 16
+  %p2 = alloca <4 x float>, align 16
+  %p3 = alloca <4 x float>, align 16
+  %p4 = alloca <4 x float>, align 16
+  %p5 = alloca <4 x float>, align 16
+  store <4 x float> <float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000>, <4 x float>* %p1, align 16
+  store <4 x float> <float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000>, <4 x float>* %p2, align 16
+  store <4 x float> <float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000>, <4 x float>* %p3, align 16
+  store <4 x float> <float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000>, <4 x float>* %p4, align 16
+  store <4 x float> <float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000>, <4 x float>* %p5, align 16
+  %0 = load <4 x float>* %p1, align 16
+  %1 = load <4 x float>* %p2, align 16
+  %2 = load <4 x float>* %p3, align 16
+  %3 = load <4 x float>* %p4, align 16
+  %4 = load <4 x float>* %p5, align 16
+; CHECK:      movaps {{%xmm[0-7]}}, (%esp)
+; CHECK-NEXT: calll _dovectortest 
+  call void @dovectortest(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4)
+  ret void
+}
+
+declare void @dovectortest(<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
new file mode 100644
index 0000000..a720753
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i8:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.anon = type { <2 x i8> }
+
+@i = global <2 x i8> <i8 150, i8 100>, align 8
+@j = global <2 x i8> <i8 10, i8 13>, align 8
+@res = common global %union.anon zeroinitializer, align 8
+
+; Make sure we load the constants i and j starting offset zero.
+; Also make sure that we sign-extend it.
+; Based on /gcc-4_2-testsuite/src/gcc.c-torture/execute/pr23135.c
+
+; CHECK: main
+define i32 @main() nounwind uwtable {
+entry:
+; CHECK: movsbq  j(%rip), %
+; CHECK: movsbq  i(%rip), %
+  %0 = load <2 x i8>* @i, align 8
+  %1 = load <2 x i8>* @j, align 8
+  %div = sdiv <2 x i8> %1, %0
+  store <2 x i8> %div, <2 x i8>* getelementptr inbounds (%union.anon* @res, i32 0, i32 0), align 8
+  ret i32 0
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
new file mode 100644
index 0000000..e08c5b2
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure that we don't crash when legalizng vselect and vsetcc and that
+; we are able to generate vector blend instructions.
+
+; CHECK: simple_widen
+; CHECK: blend
+; CHECK: ret
+define void @simple_widen() {
+entry:
+  %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+  store <2 x float> %0, <2 x float>* undef
+  ret void
+}
+
+; CHECK: complex_inreg_work
+; CHECK: blend
+; CHECK: ret
+
+define void @complex_inreg_work() {
+entry:
+  %0 = fcmp oeq <2 x float> undef, undef
+  %1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef
+  store <2 x float> %1, <2 x float>* undef
+  ret void
+}
+
+; CHECK: zero_test
+; CHECK: blend
+; CHECK: ret
+
+define void @zero_test() {
+entry:
+  %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer
+  store <2 x float> %0, <2 x float>* undef
+  ret void
+}
+
+; CHECK: full_test
+; CHECK: blend
+; CHECK: ret
+
+define void @full_test() {
+ entry:
+   %Cy300 = alloca <4 x float>
+   %Cy11a = alloca <2 x float>
+   %Cy118 = alloca <2 x float>
+   %Cy119 = alloca <2 x float>
+   br label %B1
+
+ B1:                                               ; preds = %entry
+   %0 = load <2 x float>* %Cy119
+   %1 = fptosi <2 x float> %0 to <2 x i32>
+   %2 = sitofp <2 x i32> %1 to <2 x float>
+   %3 = fcmp ogt <2 x float> %0, zeroinitializer
+   %4 = fadd <2 x float> %2, <float 1.000000e+00, float 1.000000e+00>
+   %5 = select <2 x i1> %3, <2 x float> %4, <2 x float> %2
+   %6 = fcmp oeq <2 x float> %2, %0
+   %7 = select <2 x i1> %6, <2 x float> %0, <2 x float> %5
+   store <2 x float> %7, <2 x float>* %Cy118
+   %8 = load <2 x float>* %Cy118
+   store <2 x float> %8, <2 x float>* %Cy11a
+   ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-10-21-widen-cmp.ll b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
new file mode 100644
index 0000000..2fe645b
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that a <4 x float> compare is generated and that we are
+; not stuck in an endless loop.
+
+; CHECK: cmp_2_floats
+; CHECK: cmpordps
+; CHECK: ret
+
+define void @cmp_2_floats() {
+entry:
+  %0 = fcmp oeq <2 x float> undef, undef
+  %1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef
+  store <2 x float> %1, <2 x float>* undef
+  ret void
+}
+
+; CHECK: cmp_2_doubles
+; CHECK: cmpordpd
+; CHECK: blendvpd
+; CHECK: ret
+define void @cmp_2_doubles() {
+entry:
+  %0 = fcmp oeq <2 x double> undef, undef
+  %1 = select <2 x i1> %0, <2 x double> undef, <2 x double> undef
+  store <2 x double> %1, <2 x double>* undef
+  ret void
+}
+
+; CHECK: mp_11193
+; CHECK: psraw   $15
+; CHECK: ret
+define void @mp_11193(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET)
+nounwind {
+allocas:
+  %bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+  %t = extractelement <8 x i1> %bincmp, i32 0
+  %ft = sitofp i1 %t to float
+  %pp = bitcast <8 x float>* %RET to float*
+  store float %ft, float* %pp
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll
new file mode 100644
index 0000000..6e83f67
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-27-tstore.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: ltstore
+;CHECK: movq
+;CHECK: movq
+;CHECK: ret
+define void @ltstore(<4 x i32>* %pA, <2 x i32>* %pB) {
+entry:
+  %in = load <4 x i32>* %pA
+  %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  store <2 x i32> %j, <2 x i32>* %pB
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2011-10-30-padd.ll b/test/CodeGen/X86/2011-10-30-padd.ll
new file mode 100644
index 0000000..180ca15
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-30-padd.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+
+;CHECK: addXX_test
+;CHECK: padd
+;CHECK: ret
+
+
+define <16 x i8> @addXX_test(<16 x i8> %a) {
+      %b = add <16 x i8> %a, %a
+      ret <16 x i8> %b
+}
+
+;CHECK: instcombine_test
+;CHECK: padd
+;CHECK: ret
+define <16 x i8> @instcombine_test(<16 x i8> %a) {
+  %b = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %b
+}
+
diff --git a/test/CodeGen/X86/2011-11-07-LegalizeBuildVector.ll b/test/CodeGen/X86/2011-11-07-LegalizeBuildVector.ll
new file mode 100644
index 0000000..d316470
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-07-LegalizeBuildVector.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+
+; We don't really care what this outputs; just make sure it's somewhat sane.
+; CHECK: legalize_test
+; CHECK: vmovups
+define void @legalize_test(i32 %x, <8 x i32>* %p) nounwind {
+entry:
+  %t1 = insertelement <8 x i32> <i32 undef, i32 undef, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>, i32 %x, i32 0
+  %t2 = shufflevector <8 x i32> %t1, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %int2float = sitofp <8 x i32> %t2 to <8 x float>
+  %blendAsInt.i821 = bitcast <8 x float> %int2float to <8 x i32>
+  store <8 x i32> %blendAsInt.i821, <8 x i32>* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
new file mode 100644
index 0000000..8174109
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11"
+
+; This test would create a vpand %ymm instruction that is only legal in AVX2.
+; CHECK-NOT: vpand %ymm
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+
+define void @ShadeTile() nounwind {
+allocas:
+  br i1 undef, label %if_then, label %if_else
+
+if_then:                                          ; preds = %allocas
+  unreachable
+
+if_else:                                          ; preds = %allocas
+  br i1 undef, label %for_loop156.lr.ph, label %if_exit
+
+for_loop156.lr.ph:                                ; preds = %if_else
+  %val_6.i21244 = load i16* undef, align 2
+  %0 = insertelement <8 x i16> undef, i16 %val_6.i21244, i32 6
+  %val_7.i21248 = load i16* undef, align 2
+  %1 = insertelement <8 x i16> %0, i16 %val_7.i21248, i32 7
+  %uint2uint32.i20206 = zext <8 x i16> %1 to <8 x i32>
+  %bitop5.i20208 = and <8 x i32> %uint2uint32.i20206, <i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744>
+  %bitop8.i20209 = and <8 x i32> %uint2uint32.i20206, <i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023>
+  %bitop12.i20211 = lshr <8 x i32> %bitop5.i20208, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+  %binop13.i20212 = add <8 x i32> %bitop12.i20211, <i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
+  %bitop15.i20213 = shl <8 x i32> %binop13.i20212, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
+  %bitop17.i20214 = shl <8 x i32> %bitop8.i20209, <i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
+  %bitop20.i20215 = or <8 x i32> undef, %bitop15.i20213
+  %bitop22.i20216 = or <8 x i32> %bitop20.i20215, %bitop17.i20214
+  %int_to_float_bitcast.i.i.i20217 = bitcast <8 x i32> %bitop22.i20216 to <8 x float>
+  %binop401 = fmul <8 x float> undef, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
+  %binop402 = fadd <8 x float> %binop401, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
+  %binop403 = fmul <8 x float> zeroinitializer, %binop402
+  %binop406 = fmul <8 x float> %int_to_float_bitcast.i.i.i20217, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
+  %binop407 = fadd <8 x float> %binop406, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
+  %binop408 = fmul <8 x float> zeroinitializer, %binop407
+  %binop411 = fsub <8 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, undef
+  %val_4.i21290 = load i16* undef, align 2
+  %2 = insertelement <8 x i16> undef, i16 %val_4.i21290, i32 4
+  %val_5.i21294 = load i16* undef, align 2
+  %3 = insertelement <8 x i16> %2, i16 %val_5.i21294, i32 5
+  %val_6.i21298 = load i16* undef, align 2
+  %4 = insertelement <8 x i16> %3, i16 %val_6.i21298, i32 6
+  %ptr_7.i21301 = inttoptr i64 undef to i16*
+  %val_7.i21302 = load i16* %ptr_7.i21301, align 2
+  %5 = insertelement <8 x i16> %4, i16 %val_7.i21302, i32 7
+  %uint2uint32.i20218 = zext <8 x i16> %5 to <8 x i32>
+  %structelement561 = load i8** undef, align 8
+  %ptr2int563 = ptrtoint i8* %structelement561 to i64
+  %smear.ptr_smear7571 = insertelement <8 x i64> undef, i64 %ptr2int563, i32 7
+  %new_ptr582 = add <8 x i64> %smear.ptr_smear7571, zeroinitializer
+  %val_5.i21509 = load i8* null, align 1
+  %6 = insertelement <8 x i8> undef, i8 %val_5.i21509, i32 5
+  %7 = insertelement <8 x i8> %6, i8 undef, i32 6
+  %iptr_7.i21515 = extractelement <8 x i64> %new_ptr582, i32 7
+  %ptr_7.i21516 = inttoptr i64 %iptr_7.i21515 to i8*
+  %val_7.i21517 = load i8* %ptr_7.i21516, align 1
+  %8 = insertelement <8 x i8> %7, i8 %val_7.i21517, i32 7
+  %uint2float.i20245 = uitofp <8 x i8> %8 to <8 x float>
+  %binop.i20246 = fmul <8 x float> %uint2float.i20245, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  br i1 undef, label %for_loop594.lr.ph, label %for_exit595
+
+if_exit:                                          ; preds = %if_else
+  ret void
+
+for_loop594.lr.ph:                                ; preds = %for_loop156.lr.ph
+  %bitop8.i20221 = and <8 x i32> %uint2uint32.i20218, <i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023>
+  br i1 undef, label %cif_test_all730, label %cif_mask_mixed1552
+
+for_exit595:                                      ; preds = %for_loop156.lr.ph
+  unreachable
+
+cif_test_all730:                                  ; preds = %for_loop594.lr.ph
+  %binop11.i20545 = fmul <8 x float> %binop408, zeroinitializer
+  %binop12.i20546 = fadd <8 x float> undef, %binop11.i20545
+  %binop15.i20547 = fmul <8 x float> %binop411, undef
+  %binop16.i20548 = fadd <8 x float> %binop12.i20546, %binop15.i20547
+  %bincmp774 = fcmp ogt <8 x float> %binop16.i20548, zeroinitializer
+  %val_to_boolvec32775 = sext <8 x i1> %bincmp774 to <8 x i32>
+  %floatmask.i20549 = bitcast <8 x i32> %val_to_boolvec32775 to <8 x float>
+  %v.i20550 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i20549) nounwind readnone
+  %cond = icmp eq i32 %v.i20550, 255
+  br i1 %cond, label %cif_test_all794, label %cif_test_mixed
+
+cif_test_all794:                                  ; preds = %cif_test_all730
+  %binop.i20572 = fmul <8 x float> %binop403, undef
+  unreachable
+
+cif_test_mixed:                                   ; preds = %cif_test_all730
+  %binop1207 = fmul <8 x float> %binop.i20246, undef
+  unreachable
+
+cif_mask_mixed1552:                               ; preds = %for_loop594.lr.ph
+  unreachable
+}
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
new file mode 100644
index 0000000..0a949eb
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "x86_64-apple-macosx10.6.6"
+
+; Test that the order of operands is correct
+; CHECK: select_func
+; CHECK: pblendvb        %xmm1, %xmm2
+; CHECK: ret
+
+define void @select_func() {
+entry:
+  %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
+  %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
+  %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
+  %neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
+  %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
+  %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
+  %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
+  store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
new file mode 100644
index 0000000..fcaabdd
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; PR11494
+
+define void @test(<4 x i32>* nocapture %p) nounwind {
+  ; CHECK: test:
+  ; CHECK: vpxor %xmm0, %xmm0, %xmm0
+  ; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
+  ; CHECK-NEXT: vmovdqu	%xmm0, (%rdi)
+  ; CHECK-NEXT: ret
+  %a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> <i32 -8, i32 -9, i32 -10, i32 -11>, <4 x i32> zeroinitializer) nounwind
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
+  %c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  store <4 x i32> %c, <4 x i32>* %p, align 1
+  ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll b/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll
new file mode 100644
index 0000000..7a4126f
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR11495
+
+; CHECK: 1311768467463790320
+@v = global <2 x float> bitcast (<1 x i64> <i64 1311768467463790320> to <2 x float>), align 8
diff --git a/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
new file mode 100644
index 0000000..1561784
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mattr=+avx
+; Various missing patterns causing crashes.
+; rdar://10538793
+
+define void @t1() nounwind {
+entry:
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %t1.exit, %entry
+  br i1 false, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  br i1 undef, label %0, label %t1.exit
+
+; <label>:0                                       ; preds = %loop
+  %1 = load <16 x i32> addrspace(1)* undef, align 64
+  %2 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %1, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0>
+  store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
+  br label %t1.exit
+
+t1.exit:                                 ; preds = %0, %loop
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  ret void
+}
+
+define void @t2() nounwind {
+  br i1 undef, label %1, label %4
+
+; <label>:1                                       ; preds = %0
+  %2 = load <16 x i32> addrspace(1)* undef, align 64
+  %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0>
+  store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
+  br label %4
+
+; <label>:4                                       ; preds = %1, %0
+  ret void
+}
+
+define void @t3() nounwind {
+entry:
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %t2.exit, %entry
+  br i1 false, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  br i1 undef, label %0, label %t2.exit
+
+; <label>:0                                       ; preds = %loop
+  %1 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0>
+  %2 = load <16 x i32> addrspace(1)* undef, align 64
+  %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
+  br label %t2.exit
+
+t2.exit:                                 ; preds = %0, %loop
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  ret void
+}
+
+define <3 x i64> @t4() nounwind {
+entry:
+  %0 = load <2 x i64> addrspace(1)* undef, align 16
+  %1 = extractelement <2 x i64> %0, i32 0
+  %2 = insertelement <3 x i64> <i64 undef, i64 0, i64 0>, i64 %1, i32 0
+  ret <3 x i64> %2
+}
+
+define void @t5() nounwind {
+entry:
+  %0 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %1 = shufflevector <8 x i64> <i64 0, i64 0, i64 0, i64 undef, i64 undef, i64 0, i64 0, i64 0>, <8 x i64> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 8, i32 5, i32 6, i32 7>
+  store <8 x i64> %1, <8 x i64> addrspace(1)* undef, align 64
+
+  ret void
+}
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
new file mode 100644
index 0000000..6f9188c
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=x86-64 -mattr=+sse41 < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse41 < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
+; Test case for r146671
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7"
+
+define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind {
+  ; Make sure operands to pblend are in the right order.
+  ; CHECK-W-SSE4: psllw $4, [[REG1:%xmm.]]
+  ; CHECK-W-SSE4: pblendvb [[REG1]],{{ %xmm.}}
+  ; CHECK-W-SSE4: psllw $2
+
+  ; Make sure we're masking and pcmp'ing the VSELECT conditon vector.
+  ; CHECK-WO-SSE4: psllw $5, [[REG1:%xmm.]]
+  ; CHECK-WO-SSE4: pand [[REG1]], [[REG2:%xmm.]]
+  ; CHECK-WO-SSE4: pcmpeqb {{%xmm., }}[[REG2]]
+  %1 = shl <16 x i8> %a, %b
+  ret <16 x i8> %1
+}
diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
new file mode 100644
index 0000000..39c213f
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
+; Make sure we don't load from the location pointed to by %p
+; twice: it has non-obvious performance implications, and
+; the relevant transformation doesn't know how to update
+; the chains correctly.
+; PR10747
+
+; CHECK: test:
+; CHECK: pextrd $2, %xmm
+define <4 x i32> @test(<4 x i32>* %p) {
+  %v = load <4 x i32>* %p
+  %e = extractelement <4 x i32> %v, i32 2
+  %cmp = icmp eq i32 %e, 3
+  %sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer
+  ret <4 x i32> %sel
+}
diff --git a/test/CodeGen/X86/2011-12-28-vselecti8.ll b/test/CodeGen/X86/2011-12-28-vselecti8.ll
new file mode 100644
index 0000000..dbc122a
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-28-vselecti8.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin  -mcpu=corei7 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11.2.0"
+
+; CHECK: @foo8
+; CHECK: psll
+; CHECK: psraw
+; CHECK: pblendvb
+; CHECK: ret
+define void @foo8(float* nocapture %RET) nounwind {
+allocas:
+  %resultvec.i = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <8 x i8> <i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100>
+  %uint2float = uitofp <8 x i8> %resultvec.i to <8 x float>
+  %ptr = bitcast float * %RET to <8 x float> *
+  store <8 x float> %uint2float, <8 x float>* %ptr, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-12-8-bitcastintprom.ll b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
new file mode 100644
index 0000000..e2b3ebc
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
+
+; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast.
+; CHECK: prom_bug
+; CHECK: shufb
+; CHECK: movd
+; CHECK: movw
+; CHECK: ret
+define void @prom_bug(<4 x i8> %t, i16* %p) {
+  %r = bitcast <4 x i8> %t to <2 x i16>
+  %o = extractelement <2 x i16> %r, i32 0
+  store i16 %o, i16* %p
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
new file mode 100644
index 0000000..75efcf5
--- /dev/null
+++ b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that the booleans are converted using zext and not via sext.
+; 0x1 means that we only look at the first bit.
+
+;CHECK: 0x1
+;CHECK: ui_to_fp_conv
+;CHECK: ret
+define void @ui_to_fp_conv(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
+allocas:
+  %bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+  %bool2float = uitofp <8 x i1> %bincmp to <8 x float>
+  store <8 x float> %bool2float, <8 x float>* %RET, align 4
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
new file mode 100644
index 0000000..832a8eb
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -disable-fp-elim
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7"
+
+; This test case has a landing pad with two predecessors, and a variable that
+; is undef on the first edge while carrying the first function return value on
+; the second edge.
+;
+; Live range splitting tries to isolate the block containing the first function
+; call, and it is important that the last split point is after the function call
+; so the return value can spill.
+;
+; <rdar://problem/10664933>
+
+@Exception = external unnamed_addr constant { i8*, i8* }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @f(i32* nocapture %arg, i32* nocapture %arg1, i32* nocapture %arg2, i32* nocapture %arg3, i32 %arg4, i32 %arg5) optsize ssp {
+bb:
+  br i1 undef, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb
+  %tmp = select i1 false, i32 0, i32 undef
+  br label %bb7
+
+bb7:                                              ; preds = %bb6, %bb
+  %tmp8 = phi i32 [ %tmp, %bb6 ], [ 0, %bb ]
+  %tmp9 = shl i32 %tmp8, 2
+  %tmp10 = invoke noalias i8* @_Znam(i32 undef) optsize
+          to label %bb11 unwind label %bb20
+
+bb11:                                             ; preds = %bb7
+  %tmp12 = ptrtoint i8* %tmp10 to i32
+  %tmp13 = bitcast i8* %tmp10 to i32*
+  %tmp14 = shl i32 %tmp8, 2
+  %tmp15 = getelementptr i32* %tmp13, i32 undef
+  %tmp16 = getelementptr i32* %tmp13, i32 undef
+  %tmp17 = zext i32 %tmp9 to i64
+  %tmp18 = add i64 %tmp17, -1
+  %tmp19 = icmp ugt i64 %tmp18, 4294967295
+  br i1 %tmp19, label %bb29, label %bb31
+
+bb20:                                             ; preds = %bb43, %bb41, %bb29, %bb7
+  %tmp21 = phi i32 [ undef, %bb7 ], [ %tmp12, %bb43 ], [ %tmp12, %bb29 ], [ %tmp12, %bb41 ]
+  %tmp22 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @Exception to i8*)
+  br i1 undef, label %bb23, label %bb69
+
+bb23:                                             ; preds = %bb38, %bb20
+  %tmp24 = phi i32 [ %tmp12, %bb38 ], [ %tmp21, %bb20 ]
+  %tmp25 = icmp eq i32 %tmp24, 0
+  br i1 %tmp25, label %bb28, label %bb26
+
+bb26:                                             ; preds = %bb23
+  %tmp27 = inttoptr i32 %tmp24 to i8*
+  br label %bb28
+
+bb28:                                             ; preds = %bb26, %bb23
+  ret void
+
+bb29:                                             ; preds = %bb11
+  invoke void @OnOverFlow() optsize
+          to label %bb30 unwind label %bb20
+
+bb30:                                             ; preds = %bb29
+  unreachable
+
+bb31:                                             ; preds = %bb11
+  %tmp32 = bitcast i32* %tmp15 to i8*
+  %tmp33 = zext i32 %tmp8 to i64
+  %tmp34 = add i64 %tmp33, -1
+  %tmp35 = icmp ugt i64 %tmp34, 4294967295
+  %tmp36 = icmp sgt i32 %tmp8, 0
+  %tmp37 = add i32 %tmp9, -4
+  br label %bb38
+
+bb38:                                             ; preds = %bb67, %bb31
+  %tmp39 = phi i32 [ %tmp68, %bb67 ], [ undef, %bb31 ]
+  %tmp40 = icmp sgt i32 %tmp39, undef
+  br i1 %tmp40, label %bb41, label %bb23
+
+bb41:                                             ; preds = %bb38
+  invoke void @Pjii(i32* %tmp16, i32 0, i32 %tmp8) optsize
+          to label %bb42 unwind label %bb20
+
+bb42:                                             ; preds = %bb41
+  tail call void @llvm.memset.p0i8.i32(i8* %tmp32, i8 0, i32 %tmp9, i32 1, i1 false) nounwind
+  br i1 %tmp35, label %bb43, label %bb45
+
+bb43:                                             ; preds = %bb42
+  invoke void @OnOverFlow() optsize
+          to label %bb44 unwind label %bb20
+
+bb44:                                             ; preds = %bb43
+  unreachable
+
+bb45:                                             ; preds = %bb57, %bb42
+  %tmp46 = phi i32 [ %tmp58, %bb57 ], [ 255, %bb42 ]
+  %tmp47 = icmp slt i32 undef, 0
+  br i1 %tmp47, label %bb48, label %bb59
+
+bb48:                                             ; preds = %bb45
+  tail call void @llvm.memset.p0i8.i32(i8* %tmp32, i8 0, i32 %tmp9, i32 1, i1 false) nounwind
+  br i1 %tmp36, label %bb49, label %bb57
+
+bb49:                                             ; preds = %bb49, %bb48
+  %tmp50 = phi i32 [ %tmp55, %bb49 ], [ 0, %bb48 ]
+  %tmp51 = add i32 %tmp50, undef
+  %tmp52 = add i32 %tmp50, undef
+  %tmp53 = getelementptr i32* %tmp13, i32 %tmp52
+  %tmp54 = load i32* %tmp53, align 4, !tbaa !0
+  %tmp55 = add i32 %tmp50, 1
+  %tmp56 = icmp eq i32 %tmp55, %tmp8
+  br i1 %tmp56, label %bb57, label %bb49
+
+bb57:                                             ; preds = %bb49, %bb48
+  %tmp58 = add i32 %tmp46, -1
+  br label %bb45
+
+bb59:                                             ; preds = %bb45
+  %tmp60 = ashr i32 %tmp46, 31
+  tail call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 %tmp37, i32 1, i1 false) nounwind
+  br i1 %tmp36, label %bb61, label %bb67
+
+bb61:                                             ; preds = %bb61, %bb59
+  %tmp62 = phi i32 [ %tmp65, %bb61 ], [ 0, %bb59 ]
+  %tmp63 = add i32 %tmp62, %tmp14
+  %tmp64 = getelementptr i32* %tmp13, i32 %tmp63
+  store i32 0, i32* %tmp64, align 4, !tbaa !0
+  %tmp65 = add i32 %tmp62, 1
+  %tmp66 = icmp eq i32 %tmp65, %tmp8
+  br i1 %tmp66, label %bb67, label %bb61
+
+bb67:                                             ; preds = %bb61, %bb59
+  %tmp68 = add i32 %tmp39, -1
+  br label %bb38
+
+bb69:                                             ; preds = %bb20
+  resume { i8*, i32 } %tmp22
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare noalias i8* @_Znam(i32) optsize
+
+declare void @Pjii(i32*, i32, i32) optsize
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare void @OnOverFlow() noreturn optsize ssp align 2
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
new file mode 100644
index 0000000..6b90072
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+;CHECK: add18i16
+define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
+;CHECK: vmovups
+  %b = load <18 x i16>* %bp, align 16
+  %x = add <18 x i16> zeroinitializer, %b
+  store <18 x i16> %x, <18 x i16>* %ret, align 16
+;CHECK: ret
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll
new file mode 100644
index 0000000..fa8e80f
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+; CHECK: endless_loop
+define void @endless_loop() {
+entry:
+  %0 = load <8 x i32> addrspace(1)* undef, align 32
+  %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
+  store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
new file mode 100644
index 0000000..a883d79
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i686-linux -mattr=-sse | FileCheck %s
+; PR11768
+
+@ptr = external global i8*
+
+define void @baz() nounwind ssp {
+entry:
+  %0 = load i8** @ptr, align 4
+  %cmp = icmp eq i8* %0, null
+  fence seq_cst
+  br i1 %cmp, label %if.then, label %if.else
+
+; Make sure the fence comes before the comparison, since it
+; clobbers EFLAGS.
+
+; CHECK: lock
+; CHECK-NEXT: orl {{.*}}, (%esp)
+; CHECK-NEXT: cmpl $0
+
+if.then:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @foo to void ()*)() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @bar to void ()*)() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+declare void @foo(...)
+
+declare void @bar(...)
diff --git a/test/CodeGen/X86/2012-01-18-vbitcast.ll b/test/CodeGen/X86/2012-01-18-vbitcast.ll
new file mode 100644
index 0000000..8a3ccc8
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-18-vbitcast.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s
+
+;CHECK: vcast
+define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
+;CHECK: pshufd
+;CHECK: pshufd
+  %af = bitcast <2 x float> %a to <2 x i32>
+  %bf = bitcast <2 x float> %b to <2 x i32>
+  %x = sub <2 x i32> %af, %bf
+;CHECK: psubq
+  ret <2 x i32> %x
+;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2012-02-12-dagco.ll b/test/CodeGen/X86/2012-02-12-dagco.ll
new file mode 100644
index 0000000..13723a2
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-12-dagco.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx
+target triple = "x86_64-unknown-linux-gnu"
+; Make sure we are not crashing on this one
+define void @dagco_crash() {
+entry:
+  %srcval.i411.i = load <4 x i64>* undef, align 1
+  %0 = extractelement <4 x i64> %srcval.i411.i, i32 3
+  %srcval.i409.i = load <2 x i64>* undef, align 1
+  %1 = extractelement <2 x i64> %srcval.i409.i, i32 0
+  %2 = insertelement <8 x i64> undef, i64 %0, i32 5
+  %3 = insertelement <8 x i64> %2, i64 %1, i32 6
+  %4 = insertelement <8 x i64> %3, i64 undef, i32 7
+  store <8 x i64> %4, <8 x i64> addrspace(1)* undef, align 64
+  unreachable
+}
+
diff --git a/test/CodeGen/X86/2012-02-14-scalar.ll b/test/CodeGen/X86/2012-02-14-scalar.ll
new file mode 100644
index 0000000..1dc076b
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-14-scalar.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx
+target triple = "x86_64-unknown-linux-gnu"
+; Make sure we are not crashing on this one
+define void @autogen_28112_5000() {
+BB:
+  %S17 = icmp sgt <1 x i64> undef, undef
+  %E19 = extractelement <1 x i1> %S17, i32 0
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %S23 = select i1 %E19, i8 undef, i8 undef
+  br label %CF
+}
diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
new file mode 100644
index 0000000..557d49d
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=i386-apple-macosx -mattr=+sse | FileCheck %s
+; PR11940: Do not optimize away movb %al, %ch
+
+%struct.APInt = type { i64* }
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+define void @bug(%struct.APInt* noalias nocapture sret %agg.result, %struct.APInt* nocapture %this, i32 %rotateAmt) nounwind align 2 {
+entry:
+; CHECK: bug:
+  %call = tail call i8* @calloc(i32 1, i32 32)
+  %call.i = tail call i8* @calloc(i32 1, i32 32) nounwind
+  %0 = bitcast i8* %call.i to i64*
+  %rem.i = and i32 %rotateAmt, 63
+  %div.i = lshr i32 %rotateAmt, 6
+  %cmp.i = icmp eq i32 %rem.i, 0
+  br i1 %cmp.i, label %for.cond.preheader.i, label %if.end.i
+
+for.cond.preheader.i:                             ; preds = %entry
+  %sub.i = sub i32 4, %div.i
+  %cmp23.i = icmp eq i32 %div.i, 4
+  br i1 %cmp23.i, label %for.body9.lr.ph.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i:                                 ; preds = %for.cond.preheader.i
+  %pVal.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
+  %.pre5.i = load i64** %pVal.i, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %for.body.lr.ph.i
+  %i.04.i = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc.i, %for.body.i ]
+  %add.i = add i32 %i.04.i, %div.i
+  %arrayidx.i = getelementptr inbounds i64* %.pre5.i, i32 %add.i
+  %1 = load i64* %arrayidx.i, align 4
+  %arrayidx3.i = getelementptr inbounds i64* %0, i32 %i.04.i
+  store i64 %1, i64* %arrayidx3.i, align 4
+  %inc.i = add i32 %i.04.i, 1
+  %cmp2.i = icmp ult i32 %inc.i, %sub.i
+  br i1 %cmp2.i, label %for.body.i, label %if.end.i
+
+if.end.i:                                         ; preds = %for.body.i, %entry
+  %cmp81.i = icmp eq i32 %div.i, 3
+  br i1 %cmp81.i, label %_ZNK5APInt4lshrEj.exit, label %for.body9.lr.ph.i
+
+for.body9.lr.ph.i:                                ; preds = %if.end.i, %for.cond.preheader.i
+  %sub58.i = sub i32 3, %div.i
+  %pVal11.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
+  %sh_prom.i = zext i32 %rem.i to i64
+  %sub17.i = sub i32 64, %rem.i
+  %sh_prom18.i = zext i32 %sub17.i to i64
+  %.pre.i = load i64** %pVal11.i, align 4
+  br label %for.body9.i
+
+for.body9.i:                                      ; preds = %for.body9.i, %for.body9.lr.ph.i
+; CHECK: %for.body9.i
+; CHECK: movb
+; CHECK: shrdl
+  %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ]
+  %add10.i = add i32 %i6.02.i, %div.i
+  %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i
+  %2 = load i64* %arrayidx12.i, align 4
+  %shr.i = lshr i64 %2, %sh_prom.i
+  %add14.i = add i32 %add10.i, 1
+  %arrayidx16.i = getelementptr inbounds i64* %.pre.i, i32 %add14.i
+  %3 = load i64* %arrayidx16.i, align 4
+  %shl.i = shl i64 %3, %sh_prom18.i
+  %or.i = or i64 %shl.i, %shr.i
+  %arrayidx19.i = getelementptr inbounds i64* %0, i32 %i6.02.i
+  store i64 %or.i, i64* %arrayidx19.i, align 4
+  %inc21.i = add i32 %i6.02.i, 1
+  %cmp8.i = icmp ult i32 %inc21.i, %sub58.i
+  br i1 %cmp8.i, label %for.body9.i, label %_ZNK5APInt4lshrEj.exit
+
+_ZNK5APInt4lshrEj.exit:                           ; preds = %for.body9.i, %if.end.i
+  %call.i1 = tail call i8* @calloc(i32 1, i32 32) nounwind
+  %4 = getelementptr inbounds %struct.APInt* %agg.result, i32 0, i32 0
+  store i64* %0, i64** %4, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll b/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll
new file mode 100644
index 0000000..a55c77b
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=x86 -mcpu=i686 -mattr=+mmx < %s | FileCheck %s
+; <rdar://problem/10106006>
+
+define void @func() nounwind ssp {
+; CHECK:  psrlw %mm0, %mm1
+entry:
+  call void asm sideeffect "psrlw $0, %mm1", "y,~{dirflag},~{fpsr},~{flags}"(i32 8) nounwind
+  unreachable
+
+bb367:                                            ; preds = %entry                                                                                                                 
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-02-29-CoalescerBug.ll b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
new file mode 100644
index 0000000..bdce853
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O1 <%s
+; PR12138
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7.0"
+
+%struct.S0 = type { i8, i32 }
+
+@d = external global [2 x [2 x %struct.S0]], align 4
+@c = external global i32, align 4
+@e = external global i32, align 4
+@b = external global i32, align 4
+@a = external global i32, align 4
+
+define void @fn2() nounwind optsize ssp {
+entry:
+  store i64 0, i64* bitcast ([2 x [2 x %struct.S0]]* @d to i64*), align 4
+  %0 = load i32* @c, align 4
+  %tobool2 = icmp eq i32 %0, 0
+  %1 = load i32* @a, align 4
+  %tobool4 = icmp eq i32 %1, 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %if.end, %entry
+  %f.1.0 = phi i32 [ undef, %entry ], [ %sub, %if.end ]
+  %g.0 = phi i64 [ 0, %entry ], [ %ins, %if.end ]
+  %tobool = icmp eq i32 %f.1.0, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %2 = lshr i64 %g.0, 32
+  %conv = trunc i64 %2 to i16
+  br i1 %tobool2, label %lor.rhs, label %lor.end
+
+lor.rhs:                                          ; preds = %for.body
+  store i32 1, i32* @e, align 4
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %for.body
+  %xor.i = xor i16 %conv, 1
+  %p1.lobit.i8 = lshr i64 %g.0, 47
+  %p1.lobit.i8.tr = trunc i64 %p1.lobit.i8 to i16
+  %p1.lobit.i = and i16 %p1.lobit.i8.tr, 1
+  %and.i = and i16 %p1.lobit.i, %xor.i
+  %3 = xor i16 %and.i, 1
+  %sub.conv.i = sub i16 %conv, %3
+  %conv3 = sext i16 %sub.conv.i to i32
+  store i32 %conv3, i32* @b, align 4
+  br i1 %tobool4, label %if.end, label %for.end
+
+if.end:                                           ; preds = %lor.end
+  %mask = and i64 %g.0, -256
+  %ins = or i64 %mask, 1
+  %sub = add nsw i32 %f.1.0, -1
+  br label %for.cond
+
+for.end:                                          ; preds = %lor.end, %for.cond
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-03-15-build_vector_wl.ll b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
new file mode 100644
index 0000000..fec17e9
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
@@ -0,0 +1,10 @@
+
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; CHECK: build_vector_again
+define <4 x i8> @build_vector_again(<16 x i8> %in) nounwind readnone {
+entry:
+  %out = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: shufb
+  ret <4 x i8> %out
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll b/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll
new file mode 100644
index 0000000..d24647e
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/11070338>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK:      _.memset_pattern:
+; CHECK-NEXT: .quad   4575657222473777152
+; CHECK-NEXT: .quad   4575657222473777152
+
+@.memset_pattern = internal unnamed_addr constant i128 or (i128 zext (i64 bitcast (<2 x float> <float 1.000000e+00, float 1.000000e+00> to i64) to i128), i128 shl (i128 zext (i64 bitcast (<2 x float> <float 1.000000e+00, float 1.000000e+00> to i64) to i128), i128 64)), align 16
+
+define void @foo(i8* %a, i64 %b) {
+  call void @memset_pattern16(i8* %a, i8* bitcast (i128* @.memset_pattern to i8*), i64 %b)
+  ret void
+}
+
+declare void @memset_pattern16(i8*, i8*, i64)
diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
new file mode 100644
index 0000000..101ecca
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats |& \
+; RUN:   not grep {Number of machine instructions hoisted out of loops post regalloc}
+
+; rdar://11095580
+
+%struct.ref_s = type { %union.color_sample, i16, i16 }
+%union.color_sample = type { i64 }
+
+@table = external global [3891 x i64]
+
+declare i32 @foo()
+
+define i32 @zarray(%struct.ref_s* nocapture %op) nounwind ssp {
+entry:
+  %call = tail call i32 @foo()
+  %tmp = ashr i32 %call, 31
+  %0 = and i32 %tmp, 1396
+  %index9 = add i32 %0, 2397
+  indirectbr i8* undef, [label %return, label %if.end]
+
+if.end:                                           ; preds = %entry
+  %size5 = getelementptr inbounds %struct.ref_s* %op, i64 0, i32 2
+  %tmp6 = load i16* %size5, align 2
+  %tobool1 = icmp eq i16 %tmp6, 0
+  %1 = select i1 %tobool1, i32 1396, i32 -1910
+  %index10 = add i32 %index9, %1
+  indirectbr i8* undef, [label %return, label %while.body.lr.ph]
+
+while.body.lr.ph:                                 ; preds = %if.end
+  %refs = bitcast %struct.ref_s* %op to %struct.ref_s**
+  %tmp9 = load %struct.ref_s** %refs, align 8
+  %tmp4 = zext i16 %tmp6 to i64
+  %index13 = add i32 %index10, 1658
+  %2 = sext i32 %index13 to i64
+  %3 = getelementptr [3891 x i64]* @table, i64 0, i64 %2
+  %blockaddress14 = load i64* %3, align 8
+  %4 = inttoptr i64 %blockaddress14 to i8*
+  indirectbr i8* %4, [label %while.body]
+
+while.body:                                       ; preds = %while.body, %while.body.lr.ph
+  %index7 = phi i32 [ %index15, %while.body ], [ %index13, %while.body.lr.ph ]
+  %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
+  %type_attrs = getelementptr %struct.ref_s* %tmp9, i64 %indvar, i32 1
+  store i16 32, i16* %type_attrs, align 2
+  %indvar.next = add i64 %indvar, 1
+  %exitcond5 = icmp eq i64 %indvar.next, %tmp4
+  %tmp7 = select i1 %exitcond5, i32 1648, i32 0
+  %index15 = add i32 %index7, %tmp7
+  %tmp8 = select i1 %exitcond5, i64 13, i64 0
+  %5 = sext i32 %index15 to i64
+  %6 = getelementptr [3891 x i64]* @table, i64 0, i64 %5
+  %blockaddress16 = load i64* %6, align 8
+  %7 = inttoptr i64 %blockaddress16 to i8*
+  indirectbr i8* %7, [label %return, label %while.body]
+
+return:                                           ; preds = %while.body, %if.end, %entry
+  %retval.0 = phi i32 [ %call, %entry ], [ 0, %if.end ], [ 0, %while.body ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll b/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll
new file mode 100644
index 0000000..2d90165
--- /dev/null
+++ b/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O1 -verify-coalescing < %s
+; PR12495
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @func(i8* nocapture) nounwind uwtable ssp align 2 {
+  br i1 undef, label %4, label %2
+
+; <label>:2                                       ; preds = %1                  
+  %3 = tail call double @foo() nounwind
+  br label %4
+
+; <label>:4                                       ; preds = %2, %1              
+  %5 = phi double [ %3, %2 ], [ 0.000000e+00, %1 ]
+  %6 = fsub double %5, undef
+  %7 = fcmp olt double %6, 0.000000e+00
+  %8 = select i1 %7, double 0.000000e+00, double %6
+  %9 = fcmp olt double undef, 0.000000e+00
+  %10 = fcmp olt double %8, undef
+  %11 = or i1 %9, %10
+  br i1 %11, label %12, label %14
+
+; <label>:12                                      ; preds = %4                  
+  %13 = tail call double @fmod(double %8, double 0.000000e+00) nounwind
+  unreachable
+
+; <label>:14                                      ; preds = %4                  
+  ret void
+}
+
+declare double @foo()
+
+declare double @fmod(double, double)
diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll
new file mode 100644
index 0000000..ff6be36
--- /dev/null
+++ b/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+;CHECK: bad_cast
+define void @bad_cast() {
+entry:
+  %vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+  %vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> <i32 0, i32 3, i32 4>
+  store <3 x i64> %vecinit8.i, <3 x i64>* undef, align 32
+;CHECK: ret
+  ret void
+}
+
+
+;CHECK: bad_insert
+define void @bad_insert(i32 %t) {
+entry:
+;CHECK: vpinsrd
+  %v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
+  store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32
+;CHECK: ret
+  ret void
+}
+
diff --git a/test/CodeGen/X86/GC/dg.exp b/test/CodeGen/X86/GC/dg.exp
deleted file mode 100644
index 629a147..0000000
--- a/test/CodeGen/X86/GC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/X86/GC/lit.local.cfg b/test/CodeGen/X86/GC/lit.local.cfg
new file mode 100644
index 0000000..a8ad0f1
--- /dev/null
+++ b/test/CodeGen/X86/GC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/X86/SwizzleShuff.ll b/test/CodeGen/X86/SwizzleShuff.ll
new file mode 100644
index 0000000..100817a
--- /dev/null
+++ b/test/CodeGen/X86/SwizzleShuff.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; Check that we perform a scalar XOR on i32.
+
+; CHECK: pull_bitcast
+; CHECK: xorl
+; CHECK: ret
+define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
+  %A = load <4 x i8>* %pA
+  %B = load <4 x i8>* %pB
+  %C = xor <4 x i8> %A, %B
+  store <4 x i8> %C, <4 x i8>* %pA
+  ret void
+}
+
+; CHECK: multi_use_swizzle
+; CHECK: mov
+; CHECK-NEXT: shuf
+; CHECK-NEXT: shuf
+; CHECK-NEXT: shuf
+; CHECK-NEXT: xor
+; CHECK-NEXT: ret
+define <4 x i32> @multi_use_swizzle (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 6>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 2>
+  %S2 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 2>
+  %R = xor <4 x i32> %S1, %S2
+  ret <4 x i32> %R
+}
+
+; CHECK: pull_bitcast2
+; CHECK: xorl
+; CHECK: ret
+define <4 x i8> @pull_bitcast2 (<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) {
+  %A = load <4 x i8>* %pA
+  store <4 x i8> %A, <4 x i8>* %pC
+  %B = load <4 x i8>* %pB
+  %C = xor <4 x i8> %A, %B
+  store <4 x i8> %C, <4 x i8>* %pA
+  ret <4 x i8> %C
+}
+
+
+
+; CHECK: reverse_1
+; CHECK-NOT: shuf
+; CHECK: ret
+define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i32> %S1
+}
+
+
+; CHECK: no_reverse_shuff
+; CHECK: shuf
+; CHECK: ret
+define <4 x i32> @no_reverse_shuff (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
+  ret <4 x i32> %S1
+}
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 5068d29..658ccaa 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 7bf527a..8e871f4 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
 
 ; Some of these tests depend on -join-physregs to commute instructions.
 
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
index b514cf6..aaedf18 100644
--- a/test/CodeGen/X86/apm.ll
+++ b/test/CodeGen/X86/apm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse3 | FileCheck %s -check-prefix=WIN64
 ; PR8573
 
 ; CHECK: foo:
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
new file mode 100644
index 0000000..5942788
--- /dev/null
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=atom %s
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s
+
+declare void @use_arr(i8*)
+declare void @many_params(i32, i32, i32, i32, i32, i32)
+
+define void @test1() nounwind {
+; atom: test1:
+; atom: leal -1052(%esp), %esp
+; atom-NOT: sub
+; atom: call
+; atom: leal 1052(%esp), %esp
+
+; CHECK: test1:
+; CHECK: subl
+; CHECK: call
+; CHECK-NOT: lea
+  %arr = alloca [1024 x i8], align 16
+  %arr_ptr = getelementptr inbounds [1024 x i8]* %arr, i8 0, i8 0
+  call void @use_arr(i8* %arr_ptr)
+  ret void
+}
+
+define void @test2() nounwind {
+; atom: test2:
+; atom: leal -28(%esp), %esp
+; atom: call
+; atom: leal 28(%esp), %esp
+
+; CHECK: test2:
+; CHECK-NOT: lea
+  call void @many_params(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
+  ret void
+}
+
+define void @test3() nounwind {
+; atom: test3:
+; atom: leal -8(%esp), %esp
+; atom: leal 8(%esp), %esp
+
+; CHECK: test3:
+; CHECK-NOT: lea
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  store i32 0, i32* %x, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
new file mode 100644
index 0000000..2301dfc
--- /dev/null
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -0,0 +1,28 @@
+; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
+; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+
+define void @func() nounwind uwtable {
+; atom: imull
+; atom-NOT: movl
+; atom: imull
+; CHECK: imull
+; CHECK: movl
+; CHECK: imull
+entry:
+  %0 = load i32* @b, align 4
+  %1 = load i32* @c, align 4
+  %mul = mul nsw i32 %0, %1
+  store i32 %mul, i32* @a, align 4
+  %2 = load i32* @e, align 4
+  %3 = load i32* @f, align 4
+  %mul1 = mul nsw i32 %2, %3
+  store i32 %mul1, i32* @d, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll
index 59988ca..4aa3370 100644
--- a/test/CodeGen/X86/avx-arith.ll
+++ b/test/CodeGen/X86/avx-arith.ll
@@ -259,3 +259,14 @@ define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
   ret <4 x i64> %x
 }
 
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @int_sqrt_ss() {
+; CHECK: int_sqrt_ss
+; CHECK: vsqrtss
+ %x0 = load float addrspace(1)* undef, align 8
+ %x1 = insertelement <4 x float> undef, float %x0, i32 0
+ %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
+ ret <4 x float> %x2
+}
+
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index 0a46b08..8ad0fa8 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -6,7 +6,7 @@
 
 define void @zero128() nounwind ssp {
 entry:
-  ; CHECK: vpxor
+  ; CHECK: vxorps
   ; CHECK: vmovaps
   store <4 x float> zeroinitializer, <4 x float>* @z, align 16
   ret void
@@ -105,3 +105,19 @@ allocas:
   ret <8 x i32> %updatedret.i30.i
 }
 
+;;;; Don't crash on fneg
+; rdar://10566486
+; CHECK: fneg
+; CHECK: vxorps
+define <16 x float> @fneg(<16 x float> addrspace(1)* nocapture %out) nounwind {
+  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ret <16 x float> %1
+}
+
+;;; Don't crash on build vector
+; CHECK: @build_vec_16x16
+; CHECK: vmovd
+define <16 x i16> @build_vec_16x16(i16 %a) nounwind readonly {
+  %res = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %a, i32 0
+  ret <16 x i16> %res
+}
diff --git a/test/CodeGen/X86/avx-cast.ll b/test/CodeGen/X86/avx-cast.ll
index d6d2415..32d450c 100644
--- a/test/CodeGen/X86/avx-cast.ll
+++ b/test/CodeGen/X86/avx-cast.ll
@@ -16,7 +16,7 @@ entry:
   ret <4 x double> %shuffle.i
 }
 
-; CHECK: vpxor
+; CHECK: vxorps
 ; CHECK-NEXT: vinsertf128 $0
 define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index 6c0bd58..d0a7fe0 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -18,7 +18,7 @@ define <4 x double> @sitofp01(<4 x i32> %a) {
   ret <4 x double> %b
 }
 
-; CHECK: vcvtpd2dqy %ymm
+; CHECK: vcvttpd2dqy %ymm
 define <4 x i32> @fptosi01(<4 x double> %a) {
   %b = fptosi <4 x double> %a to <4 x i32>
   ret <4 x i32> %b
diff --git a/test/CodeGen/X86/avx-fp2int.ll b/test/CodeGen/X86/avx-fp2int.ll
new file mode 100755
index 0000000..a3aadde
--- /dev/null
+++ b/test/CodeGen/X86/avx-fp2int.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+;; Check that FP_TO_SINT and FP_TO_UINT generate convert with truncate
+
+; CHECK: test1:
+; CHECK: vcvttpd2dqy
+; CHECK: ret
+; CHECK: test2:
+; CHECK: vcvttpd2dqy
+; CHECK: ret
+
+define <4 x i8> @test1(<4 x double> %d) {
+  %c = fptoui <4 x double> %d to <4 x i8>
+  ret <4 x i8> %c
+}
+define <4 x i8> @test2(<4 x double> %d) {
+  %c = fptosi <4 x double> %d to <4 x i8>
+  ret <4 x i8> %c
+}
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 5201688..b334932 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -245,34 +245,6 @@ define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
 
 
-define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
-
-
-define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly
-
-
-define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
-  ; CHECK: pushl
-  ; CHECK: movl
-  ; CHECK: vmaskmovdqu
-  ; CHECK: popl
-  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
-  ret void
-}
-declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
-
 
 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vmaxpd
@@ -314,25 +286,10 @@ define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
 
 
-define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
-  ; CHECK: movl
-  ; CHECK: vmovntdq
-  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
-  ret void
-}
-declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
-
-
-define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
-  ; CHECK: movl
-  ; CHECK: vmovntpd
-  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
-  ret void
-}
-declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_mul_sd
   ; CHECK: vmulsd
   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -412,54 +369,6 @@ define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
-  ; CHECK: vpcmpeqb
-  %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK: vpcmpeqd
-  %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
-  ; CHECK: vpcmpeqw
-  %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-
-define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
-  ; CHECK: vpcmpgtb
-  %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK: vpcmpgtd
-  %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
-  ; CHECK: vpcmpgtw
-  %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-
 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
   ; CHECK: vpmaddwd
   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
@@ -749,6 +658,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
 
 
 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: test_x86_sse2_storel_dq
   ; CHECK: movl
   ; CHECK: vmovq
   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
@@ -758,6 +668,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
 
 
 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: test_x86_sse2_storeu_dq
   ; CHECK: movl
   ; CHECK: vmovdqu
   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
@@ -767,15 +678,18 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
 
 
 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_storeu_pd
   ; CHECK: movl
   ; CHECK: vmovupd
-  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_sub_sd
   ; CHECK: vsubsd
   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -955,21 +869,13 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovntdqa
-  %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly
-
 
-define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vmpsadbw
-  %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
+  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
 }
-declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
 
 
 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
@@ -996,14 +902,6 @@ define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vpcmpeqq
-  %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
-
-
 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
   ; CHECK: vphminposuw
   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
@@ -1180,33 +1078,33 @@ define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: sbbl
-  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: seta
   ; CHECK: movzbl
-  %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: sete
   ; CHECK: movzbl
-  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
 
 
 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
@@ -1317,14 +1215,6 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vpcmpgtq
-  %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
-
-
 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
   ; CHECK: movl
@@ -1512,14 +1402,6 @@ define void @test_x86_sse_ldmxcsr(i8* %a0) {
 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
 
 
-define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
-
 
 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vmaxps
@@ -1561,14 +1443,6 @@ define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
 
 
-define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) {
-  ; CHECK: movl
-  ; CHECK: vmovntps
-  call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1)
-  ret void
-}
-declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind
-
 
 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vmulss
@@ -1743,12 +1617,12 @@ define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
 
 
-define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
+define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   ; CHECK: vphaddsw
-  %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
+  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
 }
-declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
 
 
 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
@@ -1783,12 +1657,12 @@ define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpmaddubsw
-  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
   ret <8 x i16> %res
 }
-declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
 
 
 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
@@ -1892,6 +1766,74 @@ define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
+
+define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vcmpeqps
+  %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpltps
+  %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpleps
+  %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpunordps
+  %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneqps
+  %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnltps
+  %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnleps
+  %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpordps
+  %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_uqps
+  %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngeps
+  %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngtps
+  %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpfalseps
+  %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_oqps
+  %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgeps
+  %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgtps
+  %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmptrueps
+  %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_osps
+  %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmplt_oqps
+  %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmple_oqps
+  %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpunord_sps
+  %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_usps
+  %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnlt_uqps
+  %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnle_uqps
+  %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpord_sps
+  %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_usps
+  %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnge_uqps
+  %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngt_uqps
+  %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpfalse_osps
+  %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_osps
+  %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpge_oqps
+  %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgt_oqps
+  %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmptrue_usps
+  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
 
 
@@ -2007,30 +1949,6 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
 
 
-define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
-  ; CHECK: vmovdqu
-  %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
-  ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
-
-
-define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
-  ; CHECK: vmovupd
-  %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
-  ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
-
-
-define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) {
-  ; CHECK: vmovups
-  %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
-  ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly
-
-
 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
   ; CHECK: vmaskmovpd
   %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -2143,29 +2061,10 @@ define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
 
 
-define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
-  ; CHECK: vmovntdq
-  call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
 
 
-define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
-  ; CHECK: vmovntpd
-  call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
 
 
-define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) {
-  ; CHECK: vmovntps
-  call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
-
 
 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
   ; CHECK: vptest
@@ -2245,8 +2144,11 @@ declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
 
 
 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
-  ; CHECK: vmovdqu
-  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1)
+  ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
+  ; CHECK: vmovups
+  ; add operation forces the execution domain.
+  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
   ret void
 }
 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
@@ -2254,7 +2156,9 @@ declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
 
 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
   ; CHECK: vmovupd
-  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1)
+  ; add operation forces the execution domain.
+  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
   ret void
 }
 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
@@ -2292,20 +2196,20 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
 
 
-define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
+define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
   ; CHECK: vbroadcastss
-  %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
+declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
 
 
-define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
+define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
   ; CHECK: vbroadcastss
-  %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
-declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
+declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
 
 
 define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
@@ -2433,6 +2337,12 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
+define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
+  ; CHECK: vpermilps
+  %a2 = load <4 x i32>* %a1
+  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
 
 
@@ -2575,4 +2485,73 @@ define void @test_x86_avx_vzeroupper() {
 }
 declare void @llvm.x86.avx.vzeroupper() nounwind
 
+; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
 
+; CHECK: monitor
+define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+  ret void
+}
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: mwait
+define void @mwait(i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+  ret void
+}
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
+
+; CHECK: sfence
+define void @sfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse.sfence()
+  ret void
+}
+declare void @llvm.x86.sse.sfence() nounwind
+
+; CHECK: lfence
+define void @lfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse2.lfence()
+  ret void
+}
+declare void @llvm.x86.sse2.lfence() nounwind
+
+; CHECK: mfence
+define void @mfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse2.mfence()
+  ret void
+}
+declare void @llvm.x86.sse2.mfence() nounwind
+
+; CHECK: clflush
+define void @clflush(i8* %p) nounwind {
+entry:
+  tail call void @llvm.x86.sse2.clflush(i8* %p)
+  ret void
+}
+declare void @llvm.x86.sse2.clflush(i8*) nounwind
+
+; CHECK: crc32b
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+
+; CHECK: crc32w
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+
+; CHECK: crc32l
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index 07a63ef..c9fc66a 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -25,20 +25,26 @@ declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
 
 ;;
 ;; The two tests below check that we must fold load + scalar_to_vector
-;; + ins_subvec+ zext into only a single vmovss or vmovsd
+;; + ins_subvec+ zext into only a single vmovss or vmovsd or vinsertps from memory
 
-; CHECK: vmovss (%
+; CHECK: mov00
 define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
   %val = load float* %ptr
+; CHECK: vinsertps
+; CHECK: vinsertf128
   %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
   ret <8 x float> %i0
+; CHECK: ret
 }
 
-; CHECK: vmovsd (%
+; CHECK: mov01
 define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
   %val = load double* %ptr
+; CHECK: vmovlpd
+; CHECK: vinsertf128
   %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
   ret <4 x double> %i0
+; CHECK: ret
 }
 
 ; CHECK: vmovaps  %ymm
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index 518c09c..115cefb 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -7,7 +7,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandpd LCP{{.*}}(%rip)
@@ -16,7 +18,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vandps
@@ -45,7 +49,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %xor.i = xor <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %xor.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vxorpd LCP{{.*}}(%rip)
@@ -54,7 +60,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %xor.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vxorps
@@ -83,7 +91,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %or.i = or <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %or.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vorpd LCP{{.*}}(%rip)
@@ -92,7 +102,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %or.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vorps
@@ -122,7 +134,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %1, %neg.i
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandnpd (%
@@ -134,7 +148,9 @@ entry:
   %1 = bitcast <4 x double> %tmp2 to <4 x i64>
   %and.i = and <4 x i64> %1, %neg.i
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandnps
@@ -165,7 +181,9 @@ entry:
 ; CHECK: vpandn  %xmm
 define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
 entry:
-  %y = xor <2 x i64> %a, <i64 -1, i64 -1>
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
   %x = and <2 x i64> %a, %y
   ret <2 x i64> %x
 }
@@ -173,7 +191,9 @@ entry:
 ; CHECK: vpand %xmm
 define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
 entry:
-  %x = and <2 x i64> %a, %b
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = and <2 x i64> %a2, %b
   ret <2 x i64> %x
 }
 
diff --git a/test/CodeGen/X86/avx-minmax.ll b/test/CodeGen/X86/avx-minmax.ll
index f36ba7b..7c58820 100644
--- a/test/CodeGen/X86/avx-minmax.ll
+++ b/test/CodeGen/X86/avx-minmax.ll
@@ -33,7 +33,7 @@ define <4 x float> @minps(<4 x float> %x, <4 x float> %y) {
 }
 
 ; UNSAFE: vmaxpd:
-; UNSAFE: vmaxpd %ymm
+; UNSAFE: vmaxpd {{.+}}, %ymm
 define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
   %max_is_x = fcmp oge <4 x double> %x, %y
   %max = select <4 x i1> %max_is_x, <4 x double> %x, <4 x double> %y
@@ -41,7 +41,7 @@ define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
 }
 
 ; UNSAFE: vminpd:
-; UNSAFE: vminpd %ymm
+; UNSAFE: vminpd {{.+}}, %ymm
 define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
   %min_is_x = fcmp ole <4 x double> %x, %y
   %min = select <4 x i1> %min_is_x, <4 x double> %x, <4 x double> %y
@@ -49,7 +49,7 @@ define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
 }
 
 ; UNSAFE: vmaxps:
-; UNSAFE: vmaxps %ymm
+; UNSAFE: vmaxps {{.+}}, %ymm
 define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
   %max_is_x = fcmp oge <8 x float> %x, %y
   %max = select <8 x i1> %max_is_x, <8 x float> %x, <8 x float> %y
@@ -57,7 +57,7 @@ define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
 }
 
 ; UNSAFE: vminps:
-; UNSAFE: vminps %ymm
+; UNSAFE: vminps {{.+}}, %ymm
 define <8 x float> @vminps(<8 x float> %x, <8 x float> %y) {
   %min_is_x = fcmp ole <8 x float> %x, %y
   %min = select <8 x i1> %min_is_x, <8 x float> %x, <8 x float> %y
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
new file mode 100755
index 0000000..3713a8c
--- /dev/null
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+;CHECK: sext_8i16_to_8i32
+;CHECK: vpmovsxwd
+
+  %B = sext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+;CHECK: sext_4i32_to_4i64
+;CHECK: vpmovsxdq
+
+  %B = sext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index 3ea39a2..681747b 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -62,6 +62,45 @@ define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
   ret <16 x i16> %s
 }
 
+; CHECK: vpsrlw
+; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
+; CHECK: vpsrlw
+; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
+define <32 x i8> @vshift09(<32 x i8> %a) nounwind readnone {
+  %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %s
+}
+
+; CHECK: pxor
+; CHECK: pcmpgtb
+; CHECK: pcmpgtb
+define <32 x i8> @vshift10(<32 x i8> %a) nounwind readnone {
+  %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <32 x i8> %s
+}
+
+; CHECK: vpsrlw
+; CHECK: pand
+; CHECK: vpsrlw
+; CHECK: pand
+define <32 x i8> @vshift11(<32 x i8> %a) nounwind readnone {
+  %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %s
+}
+
+; CHECK: vpsllw
+; CHECK: pand
+; CHECK: vpsllw
+; CHECK: pand
+define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
+  %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %s
+}
+
 ;;; Support variable shifts
 ; CHECK: _vshift08
 ; CHECK: vextractf128 $1
@@ -73,3 +112,27 @@ define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
   ret <8 x i32> %bitop
 }
 
+;;; Uses shifts for sign extension
+; CHECK: _sext_v16i16
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK: vinsertf128
+define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+  %b = trunc <16 x i16> %a to <16 x i8>
+  %c = sext <16 x i8> %b to <16 x i16>
+  ret <16 x i16> %c
+}
+
+; CHECK: _sext_v8i32
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vinsertf128
+define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll
new file mode 100755
index 0000000..5268ec3a
--- /dev/null
+++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i64> @test1(<4 x i64> %a) nounwind {
+ %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i64>%b
+ ; CHECK: test1:
+ ; CHECK: vinsertf128
+ }
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 0db334d..16c447b 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -6,5 +6,199 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
   ret <4 x float> %b
 ; CHECK: test1:
 ; CHECK: vshufps
-; CHECK: vpshufd
+; CHECK: vpermilps
+}
+
+; rdar://10538417
+define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
+; CHECK: test2:
+; CHECK: vinsertf128
+  %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> <i32 0, i32 1, i32 undef>
+  %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
+  ret <3 x i64> %2
+; CHECK: ret
+}
+
+define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
+  %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
+  ret <4 x i64> %c
+; CHECK: test3:
+; CHECK: vperm2f128
+; CHECK: ret
+}
+
+define <8 x float> @test4(float %a) nounwind {
+  %b = insertelement <8 x float> zeroinitializer, float %a, i32 0
+  ret <8 x float> %b
+; CHECK: test4:
+; CHECK: vinsertf128
+}
+
+; rdar://10594409
+define <8 x float> @test5(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast float* %f to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+; CHECK: test5
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+define <4 x double> @test6(double* nocapture %d) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast double* %d to <2 x double>*
+  %1 = load <2 x double>* %0, align 16
+; CHECK: test6
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <2 x double> %1, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x double> %shuffle.i
+}
+
+define <16 x i16> @test7(<4 x i16> %a) nounwind {
+; CHECK: test7
+  %b = shufflevector <4 x i16> %a, <4 x i16> undef, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: ret
+  ret <16 x i16> %b
+}
+
+; CHECK: test8
+define void @test8() {
+entry:
+  %0 = load <16 x i64> addrspace(1)* null, align 128
+  %1 = shufflevector <16 x i64> <i64 undef, i64 undef, i64 0, i64 undef, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i64> %0, <16 x i32> <i32 17, i32 18, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 26>
+  %2 = shufflevector <16 x i64> %1, <16 x i64> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 30, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 22, i32 20, i32 15>
+  store <16 x i64> %2, <16 x i64> addrspace(1)* undef, align 128
+; CHECK: ret
+  ret void
+}
+
+; Extract a value from a shufflevector..
+define i32 @test9(<4 x i32> %a) nounwind {
+; CHECK: test9
+; CHECK: vpextrd
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> 
+  %r = extractelement <8 x i32> %b, i32 2
+; CHECK: ret
+  ret i32 %r
+}
+
+; Extract a value which is the result of an undef mask.
+define i32 @test10(<4 x i32> %a) nounwind {
+; CHECK: @test10
+; CHECK-NEXT: #
+; CHECK-NEXT: ret
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %r = extractelement <8 x i32> %b, i32 2
+  ret i32 %r
+}
+
+define <4 x float> @test11(<4 x float> %a) nounwind  {
+; check: test11
+; check: vpermilps $27
+  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x float> @test12(<4 x float>* %a) nounwind  {
+; CHECK: test12
+; CHECK: vpermilps $27, (
+  %tmp0 = load <4 x float>* %a
+  %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x i32> @test13(<4 x i32> %a) nounwind  {
+; check: test13
+; check: vpshufd $27
+  %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test14(<4 x i32>* %a) nounwind  {
+; CHECK: test14
+; CHECK: vpshufd $27, (
+  %tmp0 = load <4 x i32>* %a
+  %tmp1 = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+; CHECK: test15
+; CHECK: vpshufd $8
+; CHECK: ret
+define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
+  %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  ret <4 x i32>%x1
+}
+
+; rdar://10974078
+define <8 x float> @test16(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast float* %f to <4 x float>*
+  %1 = load <4 x float>* %0, align 8
+; CHECK: test16
+; CHECK: vmovups
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+; PR12413
+; CHECK: vpshufb
+; CHECK: vpshufb
+; CHECK: vpshufb
+; CHECK: vpshufb
+define <32 x i8> @shuf(<32 x i8> %inval1, <32 x i8> %inval2) {
+entry:
+ %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0,
+i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
+22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32
+42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32
+62>
+ ret <32 x i8> %0
+}
+
+; CHECK: blend1
+; CHECK: vblendps
+; CHECK: ret
+define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend2
+; CHECK: vblendps
+; CHECK: ret
+define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend2a
+; CHECK: vblendps
+; CHECK: ret
+define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %t
+}
+
+; CHECK: blend3
+; CHECK-NOT: vblendps
+; CHECK: ret
+define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend4
+; CHECK: vblendpd
+; CHECK: ret
+define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i64> %t
 }
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index af20b90..94bcddd 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -32,7 +32,7 @@ entry:
   ret <4 x i64> %vecinit6.i
 }
 
-; CHECK: vshufpd $0
+; CHECK: vpermilpd $0
 ; CHECK-NEXT: vinsertf128 $1
 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
 entry:
@@ -47,7 +47,7 @@ entry:
 ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
 ; To:
 ;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovdqa
+; CHECK: vmovaps
 ; CHECK-NEXT: vinsertf128  $1
 ; CHECK-NEXT: vpermilps $-1
 define <8 x float> @funcE() nounwind {
diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll
new file mode 100755
index 0000000..d007736
--- /dev/null
+++ b/test/CodeGen/X86/avx-trunc.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i32> @trunc_64_32(<4 x i64> %A) nounwind uwtable readnone ssp{
+; CHECK: trunc_64_32
+; CHECK: pshufd
+  %B = trunc <4 x i64> %A to <4 x i32>
+  ret <4 x i32>%B
+}
+define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{
+; CHECK: trunc_32_16
+; CHECK: pshufb
+  %B = trunc <8 x i32> %A to <8 x i16>
+  ret <8 x i16>%B
+}
+
diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll
index d420101..20f5345 100644
--- a/test/CodeGen/X86/avx-unpack.ll
+++ b/test/CodeGen/X86/avx-unpack.ll
@@ -67,6 +67,15 @@ entry:
   ret <8 x i32> %shuffle.i
 }
 
+; CHECK: vunpckhps (%
+define <8 x i32> @unpackhips2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <8 x i32>* %src1
+  %b = load <8 x i32>* %src2
+  %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
 ; CHECK: vunpckhpd
 define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
 entry:
@@ -74,6 +83,15 @@ entry:
   ret <4 x i64> %shuffle.i
 }
 
+; CHECK: vunpckhpd (%
+define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <4 x i64>* %src1
+  %b = load <4 x i64>* %src2
+  %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
 ; CHECK: vunpcklps
 define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
 entry:
@@ -81,9 +99,63 @@ entry:
   ret <8 x i32> %shuffle.i
 }
 
+; CHECK: vunpcklps (%
+define <8 x i32> @unpacklops2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <8 x i32>* %src1
+  %b = load <8 x i32>* %src2
+  %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i32> %shuffle.i
+}
+
 ; CHECK: vunpcklpd
 define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
 entry:
   %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i64> %shuffle.i
 }
+
+; CHECK: vunpcklpd (%
+define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <4 x i64>* %src1
+  %b = load <4 x i64>* %src2
+  %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+; CHECK: vpunpckhwd
+; CHECK: vinsertf128
+define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+; CHECK: vpunpcklwd
+; CHECK: vinsertf128
+define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpckhbw
+; CHECK: vpunpckhbw
+; CHECK: vinsertf128
+define <32 x i8> @unpackhbw_undef(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+; CHECK: vpunpcklbw
+; CHECK: vinsertf128
+define <32 x i8> @unpacklbw_undef(<32 x i8> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle.i
+}
diff --git a/test/CodeGen/X86/avx-varargs-x86_64.ll b/test/CodeGen/X86/avx-varargs-x86_64.ll
new file mode 100644
index 0000000..b0932bd
--- /dev/null
+++ b/test/CodeGen/X86/avx-varargs-x86_64.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; <rdar://problem/10463281>
+; Check that the <8 x float> is passed on the stack.
+
+@x = common global <8 x float> zeroinitializer, align 32
+declare i32 @f(i32, ...)
+
+; CHECK: test1:
+; CHECK: vmovaps	%ymm0, (%rsp)
+define void @test1() nounwind uwtable ssp {
+entry:
+  %0 = load <8 x float>* @x, align 32
+  %call = call i32 (i32, ...)* @f(i32 1, <8 x float> %0)
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 89b4188..148ae73 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -1,7 +1,4 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-; XFAIL: *
-
-; xfail this file for now because of PR8156, when it gets solved merge this with avx-splat.ll
 
 ; CHECK: vbroadcastsd (%
 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
@@ -50,7 +47,7 @@ entry:
 ;;;; 128-bit versions
 
 ; CHECK: vbroadcastss (%
-define <4 x float> @E(float* %ptr) nounwind uwtable readnone ssp {
+define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
 entry:
   %q = load float* %ptr, align 4
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -60,6 +57,19 @@ entry:
   ret <4 x float> %vecinit6.i
 }
 
+
+; CHECK: _e2
+; CHECK-NOT: vbroadcastss
+; CHECK: ret
+define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+    %vecinit.i = insertelement <4 x float> undef, float      0xbf80000000000000, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
+  ret <4 x float> %vecinit6.i
+}
+
+
 ; CHECK: vbroadcastss (%
 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
 entry:
@@ -74,7 +84,7 @@ entry:
 ; Unsupported vbroadcasts
 
 ; CHECK: _G
-; CHECK-NOT: vbroadcastsd (%
+; CHECK-NOT: broadcast (%
 ; CHECK: ret
 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
 entry:
@@ -85,10 +95,20 @@ entry:
 }
 
 ; CHECK: _H
-; CHECK-NOT: vbroadcastss
+; CHECK-NOT: broadcast
 ; CHECK: ret
 define <4 x i32> @H(<4 x i32> %a) {
   %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
   ret <4 x i32> %x
 }
 
+; CHECK: _I
+; CHECK-NOT: broadcast (%
+; CHECK: ret
+define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 4
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
+}
diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll
index dccf901..fe0f6ca 100644
--- a/test/CodeGen/X86/avx-vextractf128.ll
+++ b/test/CodeGen/X86/avx-vextractf128.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
+; CHECK: @A
 ; CHECK-NOT: vunpck
 ; CHECK: vextractf128 $1
 define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
@@ -8,6 +9,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: @B
 ; CHECK-NOT: vunpck
 ; CHECK: vextractf128 $1
 define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
@@ -16,3 +18,89 @@ entry:
   ret <4 x double> %shuffle
 }
 
+; CHECK: @t0
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t0(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
+  %1 = bitcast float* %addr to <4 x float>*
+  store <4 x float> %0, <4 x float>* %1, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+
+; CHECK: @t1
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovups %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t1(float* %addr, <8 x float> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
+  %1 = bitcast float* %addr to i8*
+  tail call void @llvm.x86.sse.storeu.ps(i8* %1, <4 x float> %0)
+  ret void
+}
+
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+; CHECK: @t2
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t2(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
+  %1 = bitcast double* %addr to <2 x double>*
+  store <2 x double> %0, <2 x double>* %1, align 16
+  ret void
+}
+
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+
+; CHECK: @t3
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovups %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t3(double* %addr, <4 x double> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
+  %1 = bitcast double* %addr to i8*
+  tail call void @llvm.x86.sse2.storeu.pd(i8* %1, <2 x double> %0)
+  ret void
+}
+
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+; CHECK: @t4
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t4(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
+entry:
+  %0 = bitcast <4 x i64> %a to <8 x i32>
+  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
+  %2 = bitcast <4 x i32> %1 to <2 x i64>
+  store <2 x i64> %2, <2 x i64>* %addr, align 16
+  ret void
+}
+
+declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
+
+; CHECK: @t5
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovdqu %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t5(<2 x i64>* %addr, <4 x i64> %a) nounwind uwtable ssp {
+entry:
+  %0 = bitcast <4 x i64> %a to <8 x i32>
+  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
+  %2 = bitcast <2 x i64>* %addr to i8*
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  tail call void @llvm.x86.sse2.storeu.dq(i8* %2, <16 x i8> %3)
+  ret void
+}
+
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll
index cda1331..9a954fe 100644
--- a/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@@ -56,3 +56,76 @@ define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly {
   %2 = add <8 x i32> %1, %v1
   ret <8 x i32> %2
 }
+
+; CHECK: insert_pd
+define <4 x double> @insert_pd(<4 x double> %a0, <2 x double> %a1) {
+; CHECK: vinsertf128
+%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 0)
+ret <4 x double> %res
+}
+
+; CHECK: insert_undef_pd
+define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0)
+ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
+
+
+; CHECK: insert_ps
+define <8 x float> @insert_ps(<8 x float> %a0, <4 x float> %a1) {
+; CHECK: vinsertf128
+%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 0)
+ret <8 x float> %res
+}
+
+; CHECK: insert_undef_ps
+define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0)
+ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
+
+
+; CHECK: insert_si
+define <8 x i32> @insert_si(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK: vinsertf128
+%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 0)
+ret <8 x i32> %res
+}
+
+; CHECK: insert_undef_si
+define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0)
+ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+
+; rdar://10643481
+; CHECK: vinsertf128_combine
+define <8 x float> @vinsertf128_combine(float* nocapture %f) nounwind uwtable readonly ssp {
+; CHECK-NOT: vmovaps
+; CHECK: vinsertf128
+entry:
+  %add.ptr = getelementptr inbounds float* %f, i64 4
+  %0 = bitcast float* %add.ptr to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
+  ret <8 x float> %2
+}
+
+; rdar://11076953
+; CHECK: vinsertf128_ucombine
+define <8 x float> @vinsertf128_ucombine(float* nocapture %f) nounwind uwtable readonly ssp {
+; CHECK-NOT: vmovups
+; CHECK: vinsertf128
+entry:
+  %add.ptr = getelementptr inbounds float* %f, i64 4
+  %0 = bitcast float* %add.ptr to <4 x float>*
+  %1 = load <4 x float>* %0, align 8
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
+  ret <8 x float> %2
+}
diff --git a/test/CodeGen/X86/avx-vperm2f128.ll b/test/CodeGen/X86/avx-vperm2f128.ll
index 3550a90..caa21e5 100644
--- a/test/CodeGen/X86/avx-vperm2f128.ll
+++ b/test/CodeGen/X86/avx-vperm2f128.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
+; CHECK: _A
 ; CHECK: vperm2f128 $1
 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -7,6 +8,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _B
 ; CHECK: vperm2f128 $48
 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -14,6 +16,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _C
 ; CHECK: vperm2f128 $0
 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -21,6 +24,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _D
 ; CHECK: vperm2f128 $17
 define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -28,6 +32,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _E
 ; CHECK: vperm2f128 $17
 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
 entry:
@@ -35,7 +40,8 @@ entry:
   ret <32 x i8> %shuffle
 }
 
-; CHECK: vperm2f128 $33
+; CHECK: _E2
+; CHECK: vperm2f128 $3
 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
@@ -44,6 +50,7 @@ entry:
 
 ;;;; Cases with undef indicies mixed in the mask
 
+; CHECK: _F
 ; CHECK: vperm2f128 $33
 define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index 49b2f54..cb904b9 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -28,6 +28,14 @@ entry:
   ret <4 x i64> %shuffle
 }
 
+; CHECK: vpermilpd
+define <4 x i64> @funcQ(<4 x i64>* %a) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i64>* %a
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
+  ret <4 x i64> %shuffle
+}
+
 ; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
 ; target specific mask was correctly generated.
 ; CHECK: vpermilps $-100
@@ -37,7 +45,8 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK-NOT: vpermilps
+; CHECK: palignr
+; CHECK: palignr
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll
index f06548d..45883b7 100644
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ b/test/CodeGen/X86/avx-vshufp.ll
@@ -7,6 +7,31 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: vshufps  $-53, (%{{.*}}), %ymm
+define <8 x float> @A2(<8 x float>* %a, <8 x float>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <8 x float>* %a
+  %b2 = load <8 x float>* %b
+  %shuffle = shufflevector <8 x float> %a2, <8 x float> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, %ymm
+define <8 x i32> @A3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %ymm
+define <8 x i32> @A4(<8 x i32>* %a, <8 x i32>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <8 x i32>* %a
+  %b2 = load <8 x i32>* %b
+  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x i32> %shuffle
+}
+
 ; CHECK: vshufpd  $10, %ymm
 define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
 entry:
@@ -14,6 +39,31 @@ entry:
   ret <4 x double> %shuffle
 }
 
+; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x double>* %a
+  %b2 = load <4 x double>* %b
+  %shuffle = shufflevector <4 x double> %a2, <4 x double> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %shuffle
+}
+
+; CHECK: vshufpd  $10, %ymm
+define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i64>* %a
+  %b2 = load <4 x i64>* %b
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %shuffle
+}
+
 ; CHECK: vshufps  $-53, %ymm
 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -27,3 +77,81 @@ entry:
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
   ret <4 x double> %shuffle
 }
+
+; CHECK: vshufps $-55, %ymm
+define <8 x float> @E(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 10, i32 0, i32 3, i32 13, i32 14, i32 4, i32 7>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vshufpd  $8, %ymm
+define <4 x double> @F(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
+  ret <4 x double> %shuffle
+}
+
+; CHECK: vshufps  $-53, %xmm
+define <4 x float> @A128(<4 x float> %a, <4 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %xmm
+define <4 x float> @A2128(<4 x float>* %a, <4 x float>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x float>* %a
+  %b2 = load <4 x float>* %b
+  %shuffle = shufflevector <4 x float> %a2, <4 x float> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, %xmm
+define <4 x i32> @A3128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %xmm
+define <4 x i32> @A4128(<4 x i32>* %a, <4 x i32>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i32>* %a
+  %b2 = load <4 x i32>* %b
+  %shuffle = shufflevector <4 x i32> %a2, <4 x i32> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+; CHECK: vshufpd  $1, %xmm
+define <2 x double> @B128(<2 x double> %a, <2 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %shuffle
+}
+
+; CHECK: vshufpd  $1, (%{{.*}}), %xmm
+define <2 x double> @B2128(<2 x double>* %a, <2 x double>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <2 x double>* %a
+  %b2 = load <2 x double>* %b
+  %shuffle = shufflevector <2 x double> %a2, <2 x double> %b2, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %shuffle
+}
+
+; CHECK: vshufpd  $1, %xmm
+define <2 x i64> @B3128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
+
+; CHECK: vshufpd  $1, (%{{.*}}), %xmm
+define <2 x i64> @B4128(<2 x i64>* %a, <2 x i64>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <2 x i64>* %a
+  %b2 = load <2 x i64>* %b
+  %shuffle = shufflevector <2 x i64> %a2, <2 x i64> %b2, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
diff --git a/test/CodeGen/X86/avx-vzeroupper.ll b/test/CodeGen/X86/avx-vzeroupper.ll
index eaf236c..bf4ab5b 100644
--- a/test/CodeGen/X86/avx-vzeroupper.ll
+++ b/test/CodeGen/X86/avx-vzeroupper.ll
@@ -1,26 +1,83 @@
 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
-define <4 x float> @do_sse_local(<4 x float> %a) nounwind uwtable readnone ssp {
-entry:
-  %add.i = fadd <4 x float> %a, %a
-  ret <4 x float> %add.i
-}
+declare <4 x float> @do_sse(<4 x float>)
+declare <8 x float> @do_avx(<8 x float>)
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+@x = common global <4 x float> zeroinitializer, align 16
+@g = common global <8 x float> zeroinitializer, align 32
+
+;; Basic checking - don't emit any vzeroupper instruction
 
 ; CHECK: _test00
 define <4 x float> @test00(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
 entry:
+  ; CHECK-NOT: vzeroupper
   %add.i = fadd <4 x float> %a, %b
+  %call3 = call <4 x float> @do_sse(<4 x float> %add.i) nounwind
+  ; CHECK: ret
+  ret <4 x float> %call3
+}
+
+;; Check parameter 256-bit parameter passing
+
+; CHECK: _test01
+define <8 x float> @test01(<4 x float> %a, <4 x float> %b, <8 x float> %c) nounwind uwtable ssp {
+entry:
+  %tmp = load <4 x float>* @x, align 16
   ; CHECK: vzeroupper
   ; CHECK-NEXT: callq _do_sse
-  %call3 = tail call <4 x float> @do_sse(<4 x float> %add.i) nounwind
-  %sub.i = fsub <4 x float> %call3, %add.i
+  %call = tail call <4 x float> @do_sse(<4 x float> %tmp) nounwind
+  store <4 x float> %call, <4 x float>* @x, align 16
   ; CHECK-NOT: vzeroupper
-  ; CHECK: callq _do_sse_local
-  %call8 = tail call <4 x float> @do_sse_local(<4 x float> %sub.i)
+  ; CHECK: callq _do_sse
+  %call2 = tail call <4 x float> @do_sse(<4 x float> %call) nounwind
+  store <4 x float> %call2, <4 x float>* @x, align 16
+  ; CHECK: ret
+  ret <8 x float> %c
+}
+
+;; Test the pass convergence and also that vzeroupper is only issued when necessary,
+;; for this function it should be only once
+
+; CHECK: _test02
+define <4 x float> @test02(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
+entry:
+  %add.i = fadd <4 x float> %a, %b
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  ; CHECK: LBB
+  ; CHECK-NOT: vzeroupper
+  %i.018 = phi i32 [ 0, %entry ], [ %1, %for.body ]
+  %c.017 = phi <4 x float> [ %add.i, %entry ], [ %call14, %for.body ]
+  ; CHECK: callq _do_sse
+  %call5 = tail call <4 x float> @do_sse(<4 x float> %c.017) nounwind
+  ; CHECK-NEXT: callq _do_sse
+  %call7 = tail call <4 x float> @do_sse(<4 x float> %call5) nounwind
+  %tmp11 = load <8 x float>* @g, align 32
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %tmp11, i8 1) nounwind
   ; CHECK: vzeroupper
-  ; CHECK-NEXT: jmp _do_sse
-  %call10 = tail call <4 x float> @do_sse(<4 x float> %call8) nounwind
-  ret <4 x float> %call10
+  ; CHECK-NEXT: callq _do_sse
+  %call14 = tail call <4 x float> @do_sse(<4 x float> %0) nounwind
+  %1 = add nsw i32 %i.018, 1
+  %exitcond = icmp eq i32 %1, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret <4 x float> %call14
 }
 
-declare <4 x float> @do_sse(<4 x float>)
+;; Check that we also perform vzeroupper when we return from a function.
+
+; CHECK: _test03
+define <4 x float> @test03(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
+entry:
+  %shuf = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ; CHECK-NOT: vzeroupper
+  ; CHECK: call
+  %call = call <8 x float> @do_avx(<8 x float> %shuf) nounwind
+  %shuf2 = shufflevector <8 x float> %call, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK: vzeroupper
+  ; CHECK: ret
+  ret <4 x float> %shuf2
+}
diff --git a/test/CodeGen/X86/avx-win64-args.ll b/test/CodeGen/X86/avx-win64-args.ll
new file mode 100755
index 0000000..85b2634
--- /dev/null
+++ b/test/CodeGen/X86/avx-win64-args.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+declare <8 x float> @foo(<8 x float>, i32)
+
+define <8 x float> @test1(<8 x float> %x, <8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+; CHECK: test1
+; CHECK: leaq {{.*}}, %rcx
+; CHECK: movl {{.*}}, %edx
+; CHECK: call
+; CHECK: ret
+  %x1 = fadd  <8 x float>  %x, %y
+  %call = call  <8 x float> @foo(<8 x float> %x1, i32 1) nounwind
+  %y1 = fsub  <8 x float>  %call, %y
+  ret <8 x float> %y1
+}
+
diff --git a/test/CodeGen/X86/avx-win64.ll b/test/CodeGen/X86/avx-win64.ll
new file mode 100644
index 0000000..dc6bd59
--- /dev/null
+++ b/test/CodeGen/X86/avx-win64.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; PR11862
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-win32"
+
+; This function has live ymm registers across a win64 call.
+; The ymm6-15 registers are still call-clobbered even if xmm6-15 are callee-saved.
+; Verify that callee-saved registers are not being used.
+
+; CHECK: f___vyf
+; CHECK: pushq %rbp
+; CHECK: vmovmsk
+; CHECK: vmovaps %ymm{{.*}}(%r
+; CHECK: vmovaps %ymm{{.*}}(%r
+; CHECK: call
+; Two reloads. It's OK if these get folded.
+; CHECK: vmovaps {{.*\(%r.*}}, %ymm
+; CHECK: vmovaps {{.*\(%r.*}}, %ymm
+; CHECK: blend
+define <8 x float> @f___vyf(<8 x float> %x, <8 x i32> %__mask) nounwind readnone {
+allocas:
+  %bincmp = fcmp oeq <8 x float> %x, zeroinitializer
+  %val_to_boolvec32 = sext <8 x i1> %bincmp to <8 x i32>
+  %"~test" = xor <8 x i32> %val_to_boolvec32, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %"internal_mask&function_mask25" = and <8 x i32> %"~test", %__mask
+  %floatmask.i46 = bitcast <8 x i32> %"internal_mask&function_mask25" to <8 x float>
+  %v.i47 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i46) nounwind readnone
+  %any_mm_cmp27 = icmp eq i32 %v.i47, 0
+  br i1 %any_mm_cmp27, label %safe_if_after_false, label %safe_if_run_false
+
+safe_if_run_false:                                ; preds = %allocas
+  %binop = fadd <8 x float> %x, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  %calltmp = call <8 x float> @f___vyf(<8 x float> %binop, <8 x i32> %"internal_mask&function_mask25")
+  %binop33 = fadd <8 x float> %calltmp, %x
+  %mask_as_float.i48 = bitcast <8 x i32> %"~test" to <8 x float>
+  %blend.i52 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %x, <8 x float> %binop33, <8 x float> %mask_as_float.i48) nounwind
+  br label %safe_if_after_false
+
+safe_if_after_false:                              ; preds = %safe_if_run_false, %allocas
+  %0 = phi <8 x float> [ %x, %allocas ], [ %blend.i52, %safe_if_run_false ]
+  ret <8 x float> %0
+}
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll
new file mode 100755
index 0000000..b630e9d
--- /dev/null
+++ b/test/CodeGen/X86/avx-zext.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+;CHECK: zext_8i16_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: ret
+
+  %B = zext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+;CHECK: zext_4i32_to_4i64
+;CHECK: vpunpckhdq
+;CHECK: ret
+
+  %B = zext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
+
+
+define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
+;CHECK: zext_8i8_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: vpunpcklwd
+;CHECK: vinsertf128
+;CHECK: ret
+  %t = zext <8 x i8> %z to <8 x i32>
+  ret <8 x i32> %t
+}
diff --git a/test/CodeGen/X86/avx2-arith.ll b/test/CodeGen/X86/avx2-arith.ll
new file mode 100644
index 0000000..09f9538
--- /dev/null
+++ b/test/CodeGen/X86/avx2-arith.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpaddq %ymm
+define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = add <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+; CHECK: vpaddd %ymm
+define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = add <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vpaddw %ymm
+define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = add <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vpaddb %ymm
+define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = add <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+; CHECK: vpsubq %ymm
+define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = sub <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+; CHECK: vpsubd %ymm
+define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = sub <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vpsubw %ymm
+define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = sub <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vpsubb %ymm
+define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = sub <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+; CHECK: vpmulld %ymm
+define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = mul <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vpmullw %ymm
+define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = mul <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vpmuludq %ymm
+; CHECK-NEXT: vpsrlq $32, %ymm
+; CHECK-NEXT: vpmuludq %ymm
+; CHECK-NEXT: vpsllq $32, %ymm
+; CHECK-NEXT: vpaddq %ymm
+; CHECK-NEXT: vpsrlq $32, %ymm
+; CHECK-NEXT: vpmuludq %ymm
+; CHECK-NEXT: vpsllq $32, %ymm
+; CHECK-NEXT: vpaddq %ymm
+define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = mul <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
diff --git a/test/CodeGen/X86/avx2-cmp.ll b/test/CodeGen/X86/avx2-cmp.ll
new file mode 100644
index 0000000..df30d9e
--- /dev/null
+++ b/test/CodeGen/X86/avx2-cmp.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpcmpgtd  %ymm
+define <8 x i32> @int256-cmp(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %bincmp = icmp slt <8 x i32> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %x
+}
+
+; CHECK: vpcmpgtq  %ymm
+define <4 x i64> @v4i64-cmp(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %bincmp = icmp slt <4 x i64> %i, %j
+  %x = sext <4 x i1> %bincmp to <4 x i64>
+  ret <4 x i64> %x
+}
+
+; CHECK: vpcmpgtw  %ymm
+define <16 x i16> @v16i16-cmp(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %bincmp = icmp slt <16 x i16> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i16>
+  ret <16 x i16> %x
+}
+
+; CHECK: vpcmpgtb  %ymm
+define <32 x i8> @v32i8-cmp(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %bincmp = icmp slt <32 x i8> %i, %j
+  %x = sext <32 x i1> %bincmp to <32 x i8>
+  ret <32 x i8> %x
+}
+
+; CHECK: vpcmpeqd  %ymm
+define <8 x i32> @int256-cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %bincmp = icmp eq <8 x i32> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %x
+}
+
+; CHECK: vpcmpeqq  %ymm
+define <4 x i64> @v4i64-cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %bincmp = icmp eq <4 x i64> %i, %j
+  %x = sext <4 x i1> %bincmp to <4 x i64>
+  ret <4 x i64> %x
+}
+
+; CHECK: vpcmpeqw  %ymm
+define <16 x i16> @v16i16-cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %bincmp = icmp eq <16 x i16> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i16>
+  ret <16 x i16> %x
+}
+
+; CHECK: vpcmpeqb  %ymm
+define <32 x i8> @v32i8-cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %bincmp = icmp eq <32 x i8> %i, %j
+  %x = sext <32 x i1> %bincmp to <32 x i8>
+  ret <32 x i8> %x
+}
+
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
new file mode 100644
index 0000000..1fb41c0
--- /dev/null
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -0,0 +1,994 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
+
+define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpackssdw
+  %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpacksswb
+  %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpackuswb
+  %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpaddsb
+  %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpaddsw
+  %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpaddusb
+  %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpaddusw
+  %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpavgb
+  %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpavgw
+  %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmaddwd
+  %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmaxs_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmaxsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pmaxu_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpmaxub
+  %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmins_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpminsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpminub
+  %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) {
+  ; CHECK: vpmovmskb
+  %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmulhw
+  %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmulhuw
+  %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmuludq
+  %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsadbw
+  %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpslld
+  %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsllq
+  %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsllw
+  %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) {
+  ; CHECK: vpslld
+  %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) {
+  ; CHECK: vpsllq
+  %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) {
+  ; CHECK: vpsllw
+  %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrad
+  %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsraw
+  %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) {
+  ; CHECK: vpsrad
+  %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) {
+  ; CHECK: vpsraw
+  %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrld
+  %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsrlq
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsrlw
+  %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) {
+  ; CHECK: vpsrld
+  %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) {
+  ; CHECK: vpsrlq
+  %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) {
+  ; CHECK: vpsrlw
+  %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsubsb
+  %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpsubsw
+  %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsubusb
+  %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpsubusw
+  %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) {
+  ; CHECK: vpabsb
+  %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) {
+  ; CHECK: vpabsd
+  %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) {
+  ; CHECK: vpabsw
+  %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vphaddd
+  %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphaddsw
+  %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphaddw
+  %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vphsubd
+  %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphsubsw
+  %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphsubw
+  %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpmaddubsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmulhrsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpshufb
+  %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsignb
+  %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsignd
+  %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpsignw
+  %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovntdqa
+  %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
+
+
+define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vmpsadbw
+  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpackusdw
+  %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
+  ; CHECK: vpblendvb
+  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpblendw
+  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpmaxsb
+  %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmaxsd
+  %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmaxud(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmaxud
+  %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmaxuw
+  %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pminsb(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpminsb
+  %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pminsd(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpminsd
+  %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pminud(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpminud
+  %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpminuw
+  %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbw
+  %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovsxdq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbw
+  %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovzxdq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmuldq
+  %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
+  ; CHECK: vbroadcasti128
+  %res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly
+
+define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpblendd
+  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpblendd
+  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
+
+
+define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
+  ; CHECK: vpbroadcastb
+  %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
+
+
+define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
+  ; CHECK: vpbroadcastb
+  %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
+
+
+define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
+  ; CHECK: vpbroadcastw
+  %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
+
+
+define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
+  ; CHECK: vpbroadcastw
+  %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
+  ; CHECK: vpbroadcastd
+  %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
+  ; CHECK: vpbroadcastd
+  %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
+
+
+define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
+  ; CHECK: vpbroadcastq
+  %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
+  ; CHECK: vpbroadcastq
+  %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpermd
+  %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vpermps
+  %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_permq(<4 x i64> %a0) {
+  ; CHECK: vpermq
+  %res = call <4 x i64> @llvm.x86.avx2.permq(<4 x i64> %a0, i8 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.permq(<4 x i64>, i8) nounwind readonly
+
+
+define <4 x double> @test_x86_avx2_permpd(<4 x double> %a0) {
+  ; CHECK: vpermpd
+  %res = call <4 x double> @llvm.x86.avx2.permpd(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.permpd(<4 x double>, i8) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vperm2i128
+  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
+
+
+define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
+  ; CHECK: vextracti128
+  %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vinserti128
+  %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {
+  ; CHECK: vpmaskmovq
+  %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) {
+  ; CHECK: vpmaskmovq
+  %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: vpmaskmovd
+  %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) {
+  ; CHECK: vpmaskmovd
+  %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
+
+
+define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmaskmovq
+  call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
+
+
+define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
+  ; CHECK: vpmaskmovq
+  call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
+
+
+define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmaskmovd
+  call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
+
+
+define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
+  ; CHECK: vpmaskmovd
+  call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
+
+
+define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsllvd
+  %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsllvd
+  %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsllvq
+  %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vpsllvq
+  %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrlvd
+  %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsrlvd
+  %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsrlvq
+  %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vpsrlvq
+  %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsravd
+  %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsravd
+  %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+  ; CHECK: vmovdqu
+  ; add operation forces the execution domain.
+  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll
new file mode 100644
index 0000000..13ebaa6
--- /dev/null
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpandn
+; CHECK: vpandn  %ymm
+; CHECK: ret
+define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %x = and <4 x i64> %a, %y
+  ret <4 x i64> %x
+}
+
+; CHECK: vpand
+; CHECK: vpand %ymm
+; CHECK: ret
+define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = and <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpor
+; CHECK: vpor %ymm
+; CHECK: ret
+define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = or <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpxor
+; CHECK: vpxor %ymm
+; CHECK: ret
+define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = xor <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpblendvb
+; CHECK: vpblendvb %ymm
+; CHECK: ret
+define <32 x i8> @vpblendvb(<32 x i8> %x, <32 x i8> %y) {
+  %min_is_x = icmp ult <32 x i8> %x, %y
+  %min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y
+  ret <32 x i8> %min
+}
+
+define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind {
+entry:
+; CHECK: signd:
+; CHECK: psignd
+; CHECK-NOT: sub
+; CHECK: ret
+  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <8 x i32> zeroinitializer, %a
+  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <8 x i32> %a, %0
+  %2 = and <8 x i32> %b.lobit, %sub
+  %cond = or <8 x i32> %1, %2
+  ret <8 x i32> %cond
+}
+
+define <8 x i32> @blendvb(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) nounwind {
+entry:
+; CHECK: blendvb:
+; CHECK: pblendvb
+; CHECK: ret
+  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <8 x i32> zeroinitializer, %a
+  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <8 x i32> %c, %0
+  %2 = and <8 x i32> %a, %b.lobit
+  %cond = or <8 x i32> %1, %2
+  ret <8 x i32> %cond
+}
+
+define <8 x i32> @allOnes() nounwind {
+; CHECK: vpcmpeqd
+; CHECK-NOT: vinsert
+        ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
+define <16 x i16> @allOnes2() nounwind {
+; CHECK: vpcmpeqd
+; CHECK-NOT: vinsert
+        ret <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+}
diff --git a/test/CodeGen/X86/avx2-nontemporal.ll b/test/CodeGen/X86/avx2-nontemporal.ll
new file mode 100644
index 0000000..0768aae
--- /dev/null
+++ b/test/CodeGen/X86/avx2-nontemporal.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+avx2 | FileCheck %s
+
+define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) {
+; CHECK: vmovntps
+  %cast = bitcast i8* %B to <8 x float>*
+  %A2 = fadd <8 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <8 x float> %A2, <8 x float>* %cast, align 16, !nontemporal !0
+; CHECK: vmovntdq
+  %cast1 = bitcast i8* %B to <4 x i64>*
+  %E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4>
+  store <4 x i64> %E2, <4 x i64>* %cast1, align 16, !nontemporal !0
+; CHECK: vmovntpd
+  %cast2 = bitcast i8* %B to <4 x double>*
+  %C2 = fadd <4 x double> %C, <double 0x0, double 0x0, double 0x0, double 0x4200000000000000>
+  store <4 x double> %C2, <4 x double>* %cast2, align 16, !nontemporal !0
+; CHECK: movnti
+  %cast3 = bitcast i8* %B to i32*
+  store i32 %D, i32* %cast3, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/avx2-palignr.ll b/test/CodeGen/X86/avx2-palignr.ll
new file mode 100644
index 0000000..53b9da3
--- /dev/null
+++ b/test/CodeGen/X86/avx2-palignr.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+define <8 x i32> @test1(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+  ret <8 x i32> %C
+}
+
+define <8 x i32> @test2(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 undef, i32 12>
+  ret <8 x i32> %C
+}
+
+define <8 x i32> @test3(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+  ret <8 x i32> %C
+}
+;
+define <8 x i32> @test4(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: vpalignr $8
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 10, i32 11, i32 undef, i32 1, i32 14, i32 15, i32 4, i32 5>
+  ret <8 x i32> %C
+}
+
+define <16 x i16> @test5(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test5:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 undef, i32 6, i32 7, i32 16, i32 17, i32 18, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
+  ret <16 x i16> %C
+}
+
+define <16 x i16> @test6(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test6:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
+  ret <16 x i16> %C
+}
+
+define <16 x i16> @test7(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test7:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %C
+}
+
+define <32 x i8> @test8(<32 x i8> %A, <32 x i8> %B) nounwind {
+; CHECK: test8:
+; CHECK: palignr $5
+  %C = shufflevector <32 x i8> %A, <32 x i8> %B, <32 x i32> <i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52>
+  ret <32 x i8> %C
+}
diff --git a/test/CodeGen/X86/avx2-phaddsub.ll b/test/CodeGen/X86/avx2-phaddsub.ll
new file mode 100644
index 0000000..4eac71d
--- /dev/null
+++ b/test/CodeGen/X86/avx2-phaddsub.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx2 | FileCheck %s
+
+; CHECK: phaddw1:
+; CHECK: vphaddw
+define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
+  %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %r = add <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phaddw2:
+; CHECK: vphaddw
+define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %b = shufflevector <16 x i16> %y, <16 x i16> %x, <16 x i32> <i32 16, i32 18, i32 20, i32 22, i32 0, i32 2, i32 4, i32 6, i32 24, i32 26, i32 28, i32 30, i32 8, i32 10, i32 12, i32 14>
+  %r = add <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phaddd1:
+; CHECK: vphaddd
+define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phaddd2:
+; CHECK: vphaddd
+define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
+  %b = shufflevector <8 x i32> %y, <8 x i32> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phaddd3:
+; CHECK: vphaddd
+define <8 x i32> @phaddd3(<8 x i32> %x) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phsubw1:
+; CHECK: vphsubw
+define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
+  %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %r = sub <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phsubd1:
+; CHECK: vphsubd
+define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = sub <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phsubd2:
+; CHECK: vphsubd
+define <8 x i32> @phsubd2(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 undef, i32 8, i32 undef, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 undef, i32 9, i32 11, i32 5, i32 7, i32 undef, i32 15>
+  %r = sub <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll
new file mode 100644
index 0000000..1f192a0
--- /dev/null
+++ b/test/CodeGen/X86/avx2-shift.ll
@@ -0,0 +1,268 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: variable_shl0
+; CHECK: psllvd
+; CHECK: ret
+define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
+  %k = shl <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_shl1
+; CHECK: psllvd
+; CHECK: ret
+define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
+  %k = shl <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+; CHECK: variable_shl2
+; CHECK: psllvq
+; CHECK: ret
+define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
+  %k = shl <2 x i64> %x, %y
+  ret <2 x i64> %k
+}
+; CHECK: variable_shl3
+; CHECK: psllvq
+; CHECK: ret
+define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
+  %k = shl <4 x i64> %x, %y
+  ret <4 x i64> %k
+}
+; CHECK: variable_srl0
+; CHECK: psrlvd
+; CHECK: ret
+define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
+  %k = lshr <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_srl1
+; CHECK: psrlvd
+; CHECK: ret
+define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
+  %k = lshr <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+; CHECK: variable_srl2
+; CHECK: psrlvq
+; CHECK: ret
+define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
+  %k = lshr <2 x i64> %x, %y
+  ret <2 x i64> %k
+}
+; CHECK: variable_srl3
+; CHECK: psrlvq
+; CHECK: ret
+define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
+  %k = lshr <4 x i64> %x, %y
+  ret <4 x i64> %k
+}
+
+; CHECK: variable_sra0
+; CHECK: vpsravd
+; CHECK: ret
+define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
+  %k = ashr <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_sra1
+; CHECK: vpsravd
+; CHECK: ret
+define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
+  %k = ashr <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+
+;;; Shift left
+; CHECK: vpslld
+define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
+  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsllw
+define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
+  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsllq
+define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
+  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Logical Shift right
+; CHECK: vpsrld
+define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
+  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsrlw
+define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
+  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsrlq
+define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
+  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Arithmetic Shift right
+; CHECK: vpsrad
+define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
+  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsraw
+define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
+  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: variable_sra0_load
+; CHECK: vpsravd (%
+; CHECK: ret
+define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = ashr <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+
+; CHECK: variable_sra1_load
+; CHECK: vpsravd (%
+; CHECK: ret
+define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = ashr <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+
+; CHECK: variable_shl0_load
+; CHECK: vpsllvd (%
+; CHECK: ret
+define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = shl <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+; CHECK: variable_shl1_load
+; CHECK: vpsllvd (%
+; CHECK: ret
+define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = shl <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+; CHECK: variable_shl2_load
+; CHECK: vpsllvq (%
+; CHECK: ret
+define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
+  %y1 = load <2 x i64>* %y
+  %k = shl <2 x i64> %x, %y1
+  ret <2 x i64> %k
+}
+; CHECK: variable_shl3_load
+; CHECK: vpsllvq (%
+; CHECK: ret
+define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
+  %y1 = load <4 x i64>* %y
+  %k = shl <4 x i64> %x, %y1
+  ret <4 x i64> %k
+}
+; CHECK: variable_srl0_load
+; CHECK: vpsrlvd (%
+; CHECK: ret
+define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = lshr <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+; CHECK: variable_srl1_load
+; CHECK: vpsrlvd (%
+; CHECK: ret
+define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = lshr <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+; CHECK: variable_srl2_load
+; CHECK: vpsrlvq (%
+; CHECK: ret
+define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
+  %y1 = load <2 x i64>* %y
+  %k = lshr <2 x i64> %x, %y1
+  ret <2 x i64> %k
+}
+; CHECK: variable_srl3_load
+; CHECK: vpsrlvq (%
+; CHECK: ret
+define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
+  %y1 = load <4 x i64>* %y
+  %k = lshr <4 x i64> %x, %y1
+  ret <4 x i64> %k
+}
+
+define <32 x i8> @shl9(<32 x i8> %A) nounwind {
+  %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: shl9:
+; CHECK: vpsllw $3
+; CHECK: vpand
+; CHECK: ret
+}
+
+define <32 x i8> @shr9(<32 x i8> %A) nounwind {
+  %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: shr9:
+; CHECK: vpsrlw $3
+; CHECK: vpand
+; CHECK: ret
+}
+
+define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
+  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <32 x i8> %B
+; CHECK: sra_v32i8_7:
+; CHECK: vpxor
+; CHECK: vpcmpgtb
+; CHECK: ret
+}
+
+define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
+  %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: sra_v32i8:
+; CHECK: vpsrlw $3
+; CHECK: vpand
+; CHECK: vpxor
+; CHECK: vpsubb
+; CHECK: ret
+}
+
+; CHECK: _sext_v16i16
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK-NOT: vinsertf128
+define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+  %b = trunc <16 x i16> %a to <16 x i8>
+  %c = sext <16 x i8> %b to <16 x i16>
+  ret <16 x i16> %c
+}
+
+; CHECK: _sext_v8i32
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK-NOT: vinsertf128
+define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
diff --git a/test/CodeGen/X86/avx2-unpack.ll b/test/CodeGen/X86/avx2-unpack.ll
new file mode 100644
index 0000000..6d17443
--- /dev/null
+++ b/test/CodeGen/X86/avx2-unpack.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpunpckhdq
+define <8 x i32> @unpackhidq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpckhqdq
+define <4 x i64> @unpackhiqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckldq
+define <8 x i32> @unpacklodq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpcklqdq
+define <4 x i64> @unpacklqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+define <16 x i16> @unpackhwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+define <16 x i16> @unpacklwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpckhbw
+define <32 x i8> @unpackhbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+define <32 x i8> @unpacklbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpckhdq
+define <8 x i32> @unpackhidq1_undef(<8 x i32> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpckhqdq
+define <4 x i64> @unpackhiqdq1_undef(<4 x i64> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
new file mode 100644
index 0000000..1a78414
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpbroadcastb (%
+define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i8* %ptr, align 4
+  %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
+  %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
+  %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
+  %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
+  %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
+  %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
+  %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
+  %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
+  %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
+  %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
+  %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
+  %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
+  %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
+  %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
+  %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
+  %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
+  ret <16 x i8> %qf
+}
+; CHECK: vpbroadcastb (%
+define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i8* %ptr, align 4
+  %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
+  %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
+  %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
+  %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
+  %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
+  %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
+  %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
+  %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
+  %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
+  %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
+  %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
+  %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
+  %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
+  %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
+  %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
+  %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
+
+  %q20 = insertelement <32 x i8> %qf, i8 %q,  i32 16
+  %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
+  %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
+  %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
+  %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
+  %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
+  %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
+  %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
+  %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
+  %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
+  %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
+  %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
+  %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
+  %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
+  %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
+  %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
+  ret <32 x i8> %q2f
+}
+; CHECK: vpbroadcastw (%
+
+define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i16* %ptr, align 4
+  %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
+  %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
+  %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
+  %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
+  %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
+  %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
+  %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
+  %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
+  ret <8 x i16> %q7
+}
+; CHECK: vpbroadcastw (%
+define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i16* %ptr, align 4
+  %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
+  %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
+  %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
+  %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
+  %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
+  %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
+  %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
+  %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
+  %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
+  %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
+  %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
+  %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
+  %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
+  %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
+  %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
+  %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
+  ret <16 x i16> %qf
+}
+; CHECK: vpbroadcastd (%
+define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i32* %ptr, align 4
+  %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
+  %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
+  %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
+  %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
+  ret <4 x i32> %q3
+}
+; CHECK: vpbroadcastd (%
+define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i32* %ptr, align 4
+  %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
+  %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
+  %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
+  %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
+  %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
+  %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
+  %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
+  %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
+  ret <8 x i32> %q7
+}
+; CHECK: vpbroadcastq (%
+define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i64* %ptr, align 4
+  %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
+  %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
+  ret <2 x i64> %q1
+}
+; CHECK: vpbroadcastq (%
+define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i64* %ptr, align 4
+  %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
+  %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
+  %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
+  %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
+  ret <4 x i64> %q3
+}
+
+; make sure that we still don't support broadcast double into 128-bit vector
+; this used to crash
+define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 4
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
+}
+
+; CHECK: V111
+; CHECK: vpbroadcastd
+; CHECK: ret
+define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
+entry:
+  %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %g
+}
+
+; CHECK: _e2
+; CHECK: vbroadcastss
+; CHECK: ret
+define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+  %vecinit.i = insertelement <4 x float> undef, float        0xbf80000000000000, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
+  ret <4 x float> %vecinit6.i
+}
+
+; CHECK: _e4
+; CHECK-NOT: broadcast
+; CHECK: ret
+define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
+  %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
+  %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
+  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
+  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
+  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
+  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
+  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
+  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
+  ret <8 x i8> %vecinit7.i
+}
diff --git a/test/CodeGen/X86/avx2-vperm2i128.ll b/test/CodeGen/X86/avx2-vperm2i128.ll
new file mode 100644
index 0000000..1937db5
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vperm2i128.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vperm2i128 $17
+define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+; CHECK: vperm2i128 $3
+define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: vperm2i128 $49
+define <8 x i32> @E3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK: vperm2i128 $2
+define <16 x i16> @E4(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i16> %shuffle
+}
+
+; CHECK: vperm2i128 $2, (%
+define <16 x i16> @E5(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
+entry:
+  %c = load <16 x i16>* %a
+  %d = load <16 x i16>* %b
+  %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i16> %shuffle
+}
diff --git a/test/CodeGen/X86/bc-extract.ll b/test/CodeGen/X86/bc-extract.ll
index ac972a8..ceabcb7 100644
--- a/test/CodeGen/X86/bc-extract.ll
+++ b/test/CodeGen/X86/bc-extract.ll
@@ -11,7 +11,7 @@ entry:
 
 define float @extractFloat2() nounwind {
 entry:
-  ; CHECK: pxor	%xmm0, %xmm0
+  ; CHECK: xorps	%xmm0, %xmm0
   %tmp4 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
   %tmp5 = extractelement <2 x float> %tmp4, i32 1
   ret float %tmp5
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
new file mode 100644
index 0000000..3a10c70
--- /dev/null
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+
+
+; In this test we check that sign-extend of the mask bit is performed by
+; shifting the needed bit to the MSB, and not using shl+sra.
+
+;CHECK: vsel_float
+;CHECK: pslld
+;CHECK-NEXT: blendvps
+;CHECK: ret
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
+
+;CHECK: vsel_4xi8
+;CHECK: pslld
+;CHECK-NEXT: blendvps
+;CHECK: ret
+define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
+  ret <4 x i8> %vsel
+}
+
+
+; We do not have native support for v8i16 blends and we have to use the
+; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not r
+; reduce the mask in this case.
+;CHECK: vsel_8xi16
+;CHECK: psllw
+;CHECK: psraw
+;CHECK: pblendvb
+;CHECK: ret
+define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
+  ret <8 x i16> %vsel
+}
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
new file mode 100644
index 0000000..167d522
--- /dev/null
+++ b/test/CodeGen/X86/block-placement.ll
@@ -0,0 +1,930 @@
+; RUN: llc -mtriple=i686-linux -enable-block-placement < %s | FileCheck %s
+
+declare void @error(i32 %i, i32 %a, i32 %b)
+
+define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
+; Test a chain of ifs, where the block guarded by the if is error handling code
+; that is not expected to run.
+; CHECK: test_ifchains:
+; CHECK: %entry
+; CHECK: %else1
+; CHECK: %else2
+; CHECK: %else3
+; CHECK: %else4
+; CHECK: %exit
+; CHECK: %then1
+; CHECK: %then2
+; CHECK: %then3
+; CHECK: %then4
+; CHECK: %then5
+
+entry:
+  %gep1 = getelementptr i32* %a, i32 1
+  %val1 = load i32* %gep1
+  %cond1 = icmp ugt i32 %val1, 1
+  br i1 %cond1, label %then1, label %else1, !prof !0
+
+then1:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else1
+
+else1:
+  %gep2 = getelementptr i32* %a, i32 2
+  %val2 = load i32* %gep2
+  %cond2 = icmp ugt i32 %val2, 2
+  br i1 %cond2, label %then2, label %else2, !prof !0
+
+then2:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else2
+
+else2:
+  %gep3 = getelementptr i32* %a, i32 3
+  %val3 = load i32* %gep3
+  %cond3 = icmp ugt i32 %val3, 3
+  br i1 %cond3, label %then3, label %else3, !prof !0
+
+then3:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else3
+
+else3:
+  %gep4 = getelementptr i32* %a, i32 4
+  %val4 = load i32* %gep4
+  %cond4 = icmp ugt i32 %val4, 4
+  br i1 %cond4, label %then4, label %else4, !prof !0
+
+then4:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else4
+
+else4:
+  %gep5 = getelementptr i32* %a, i32 3
+  %val5 = load i32* %gep5
+  %cond5 = icmp ugt i32 %val5, 3
+  br i1 %cond5, label %then5, label %exit, !prof !0
+
+then5:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %exit
+
+exit:
+  ret i32 %b
+}
+
+define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
+; Check that we sink cold loop blocks after the hot loop body.
+; CHECK: test_loop_cold_blocks:
+; CHECK: %entry
+; CHECK: %body1
+; CHECK: %body2
+; CHECK: %body3
+; CHECK: %unlikely1
+; CHECK: %unlikely2
+; CHECK: %exit
+
+entry:
+  br label %body1
+
+body1:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body3 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body3 ]
+  %unlikelycond1 = icmp slt i32 %base, 42
+  br i1 %unlikelycond1, label %unlikely1, label %body2, !prof !0
+
+unlikely1:
+  call void @error(i32 %i, i32 1, i32 %base)
+  br label %body2
+
+body2:
+  %unlikelycond2 = icmp sgt i32 %base, 21
+  br i1 %unlikelycond2, label %unlikely2, label %body3, !prof !0
+
+unlikely2:
+  call void @error(i32 %i, i32 2, i32 %base)
+  br label %body3
+
+body3:
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body1
+
+exit:
+  ret i32 %sum
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+
+define i32 @test_loop_early_exits(i32 %i, i32* %a) {
+; Check that we sink early exit blocks out of loop bodies.
+; CHECK: test_loop_early_exits:
+; CHECK: %entry
+; CHECK: %body2
+; CHECK: %body3
+; CHECK: %body4
+; CHECK: %body1
+; CHECK: %bail1
+; CHECK: %bail2
+; CHECK: %bail3
+; CHECK: %exit
+
+entry:
+  br label %body1
+
+body1:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body4 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body4 ]
+  %bailcond1 = icmp eq i32 %base, 42
+  br i1 %bailcond1, label %bail1, label %body2
+
+bail1:
+  ret i32 -1
+
+body2:
+  %bailcond2 = icmp eq i32 %base, 43
+  br i1 %bailcond2, label %bail2, label %body3
+
+bail2:
+  ret i32 -2
+
+body3:
+  %bailcond3 = icmp eq i32 %base, 44
+  br i1 %bailcond3, label %bail3, label %body4
+
+bail3:
+  ret i32 -3
+
+body4:
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body1
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @test_loop_rotate(i32 %i, i32* %a) {
+; Check that we rotate conditional exits from the loop to the bottom of the
+; loop, eliminating unconditional branches to the top.
+; CHECK: test_loop_rotate:
+; CHECK: %entry
+; CHECK: %body1
+; CHECK: %body0
+; CHECK: %exit
+
+entry:
+  br label %body0
+
+body0:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body1 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body1
+
+body1:
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %bailcond1 = icmp eq i32 %sum, 42
+  br label %body0
+
+exit:
+  ret i32 %base
+}
+
+define void @test_loop_rotate_reversed_blocks() {
+; This test case (greatly reduced from an Olden bencmark) ensures that the loop
+; rotate implementation doesn't assume that loops are laid out in a particular
+; order. The first loop will get split into two basic blocks, with the loop
+; header coming after the loop latch.
+;
+; CHECK: test_loop_rotate_reversed_blocks
+; CHECK: %entry
+; Look for a jump into the middle of the loop, and no branches mid-way.
+; CHECK: jmp
+; CHECK: %loop1
+; CHECK-NOT: j{{\w*}} .LBB{{.*}}
+; CHECK: %loop1
+; CHECK: je
+
+entry:
+  %cond1 = load volatile i1* undef
+  br i1 %cond1, label %loop2.preheader, label %loop1
+
+loop1:
+  call i32 @f()
+  %cond2 = load volatile i1* undef
+  br i1 %cond2, label %loop2.preheader, label %loop1
+
+loop2.preheader:
+  call i32 @f()
+  %cond3 = load volatile i1* undef
+  br i1 %cond3, label %exit, label %loop2
+
+loop2:
+  call i32 @f()
+  %cond4 = load volatile i1* undef
+  br i1 %cond4, label %exit, label %loop2
+
+exit:
+  ret void
+}
+
+define i32 @test_loop_align(i32 %i, i32* %a) {
+; Check that we provide basic loop body alignment with the block placement
+; pass.
+; CHECK: test_loop_align:
+; CHECK: %entry
+; CHECK: .align [[ALIGN:[0-9]+]],
+; CHECK-NEXT: %body
+; CHECK: %exit
+
+entry:
+  br label %body
+
+body:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) {
+; Check that we provide nested loop body alignment.
+; CHECK: test_nested_loop_align:
+; CHECK: %entry
+; CHECK: .align [[ALIGN]],
+; CHECK-NEXT: %loop.body.1
+; CHECK: .align [[ALIGN]],
+; CHECK-NEXT: %inner.loop.body
+; CHECK-NOT: .align
+; CHECK: %exit
+
+entry:
+  br label %loop.body.1
+
+loop.body.1:
+  %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %bidx = load i32* %arrayidx
+  br label %inner.loop.body
+
+inner.loop.body:
+  %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ]
+  %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ]
+  %scaled_idx = mul i32 %bidx, %iv
+  %inner.arrayidx = getelementptr inbounds i32* %b, i32 %scaled_idx
+  %0 = load i32* %inner.arrayidx
+  %sum = add nsw i32 %0, %base
+  %inner.next = add i32 %iv, 1
+  %inner.exitcond = icmp eq i32 %inner.next, %i
+  br i1 %inner.exitcond, label %loop.body.2, label %inner.loop.body
+
+loop.body.2:
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %loop.body.1
+
+exit:
+  ret i32 %sum
+}
+
+define void @unnatural_cfg1() {
+; Test that we can handle a loop with an inner unnatural loop at the end of
+; a function. This is a gross CFG reduced out of the single source GCC.
+; CHECK: unnatural_cfg1
+; CHECK: %entry
+; CHECK: %loop.body1
+; CHECK: %loop.body2
+; CHECK: %loop.body3
+
+entry:
+  br label %loop.header
+
+loop.header:
+  br label %loop.body1
+
+loop.body1:
+  br i1 undef, label %loop.body3, label %loop.body2
+
+loop.body2:
+  %ptr = load i32** undef, align 4
+  br label %loop.body3
+
+loop.body3:
+  %myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ]
+  %bcmyptr = bitcast i32* %myptr to i32*
+  %val = load i32* %bcmyptr, align 4
+  %comp = icmp eq i32 %val, 48
+  br i1 %comp, label %loop.body4, label %loop.body5
+
+loop.body4:
+  br i1 undef, label %loop.header, label %loop.body5
+
+loop.body5:
+  %ptr2 = load i32** undef, align 4
+  br label %loop.body3
+}
+
+define void @unnatural_cfg2() {
+; Test that we can handle a loop with a nested natural loop *and* an unnatural
+; loop. This was reduced from a crash on block placement when run over
+; single-source GCC.
+; CHECK: unnatural_cfg2
+; CHECK: %entry
+; CHECK: %loop.body1
+; CHECK: %loop.body2
+; CHECK: %loop.header
+; CHECK: %loop.body3
+; CHECK: %loop.inner1.begin
+; The end block is folded with %loop.body3...
+; CHECK-NOT: %loop.inner1.end
+; CHECK: %loop.body4
+; CHECK: %loop.inner2.begin
+; The loop.inner2.end block is folded
+; CHECK: %bail
+
+entry:
+  br label %loop.header
+
+loop.header:
+  %comp0 = icmp eq i32* undef, null
+  br i1 %comp0, label %bail, label %loop.body1
+
+loop.body1:
+  %val0 = load i32** undef, align 4
+  br i1 undef, label %loop.body2, label %loop.inner1.begin
+
+loop.body2:
+  br i1 undef, label %loop.body4, label %loop.body3
+
+loop.body3:
+  %ptr1 = getelementptr inbounds i32* %val0, i32 0
+  %castptr1 = bitcast i32* %ptr1 to i32**
+  %val1 = load i32** %castptr1, align 4
+  br label %loop.inner1.begin
+
+loop.inner1.begin:
+  %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ]
+  %castval = bitcast i32* %valphi to i32*
+  %comp1 = icmp eq i32 undef, 48
+  br i1 %comp1, label %loop.inner1.end, label %loop.body4
+
+loop.inner1.end:
+  %ptr2 = getelementptr inbounds i32* %valphi, i32 0
+  %castptr2 = bitcast i32* %ptr2 to i32**
+  %val2 = load i32** %castptr2, align 4
+  br label %loop.inner1.begin
+
+loop.body4.dead:
+  br label %loop.body4
+
+loop.body4:
+  %comp2 = icmp ult i32 undef, 3
+  br i1 %comp2, label %loop.inner2.begin, label %loop.end
+
+loop.inner2.begin:
+  br i1 false, label %loop.end, label %loop.inner2.end
+
+loop.inner2.end:
+  %comp3 = icmp eq i32 undef, 1769472
+  br i1 %comp3, label %loop.end, label %loop.inner2.begin
+
+loop.end:
+  br label %loop.header
+
+bail:
+  unreachable
+}
+
+define i32 @problematic_switch() {
+; This function's CFG caused overlow in the machine branch probability
+; calculation, triggering asserts. Make sure we don't crash on it.
+; CHECK: problematic_switch
+
+entry:
+  switch i32 undef, label %exit [
+    i32 879, label %bogus
+    i32 877, label %step
+    i32 876, label %step
+    i32 875, label %step
+    i32 874, label %step
+    i32 873, label %step
+    i32 872, label %step
+    i32 868, label %step
+    i32 867, label %step
+    i32 866, label %step
+    i32 861, label %step
+    i32 860, label %step
+    i32 856, label %step
+    i32 855, label %step
+    i32 854, label %step
+    i32 831, label %step
+    i32 830, label %step
+    i32 829, label %step
+    i32 828, label %step
+    i32 815, label %step
+    i32 814, label %step
+    i32 811, label %step
+    i32 806, label %step
+    i32 805, label %step
+    i32 804, label %step
+    i32 803, label %step
+    i32 802, label %step
+    i32 801, label %step
+    i32 800, label %step
+    i32 799, label %step
+    i32 798, label %step
+    i32 797, label %step
+    i32 796, label %step
+    i32 795, label %step
+  ]
+bogus:
+  unreachable
+step:
+  br label %exit
+exit:
+  %merge = phi i32 [ 3, %step ], [ 6, %entry ]
+  ret i32 %merge
+}
+
+define void @fpcmp_unanalyzable_branch(i1 %cond) {
+; This function's CFG contains an unanalyzable branch that is likely to be
+; split due to having a different high-probability predecessor.
+; CHECK: fpcmp_unanalyzable_branch
+; CHECK: %entry
+; CHECK: %exit
+; CHECK-NOT: %if.then
+; CHECK-NOT: %if.end
+; CHECK-NOT: jne
+; CHECK-NOT: jnp
+; CHECK: jne
+; CHECK-NEXT: jnp
+; CHECK-NEXT: %if.then
+
+entry:
+; Note that this branch must be strongly biased toward
+; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for
+; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that
+; chain which would violate the unanalyzable branch in 'exit', but we won't even
+; try this trick unless 'if.then' is believed to almost always be reached from
+; 'entry.if.then_crit_edge'.
+  br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
+
+entry.if.then_crit_edge:
+  %.pre14 = load i8* undef, align 1, !tbaa !0
+  br label %if.then
+
+lor.lhs.false:
+  br i1 undef, label %if.end, label %exit
+
+exit:
+  %cmp.i = fcmp une double 0.000000e+00, undef
+  br i1 %cmp.i, label %if.then, label %if.end
+
+if.then:
+  %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
+  %1 = and i8 %0, 1
+  store i8 %1, i8* undef, align 4, !tbaa !0
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+!1 = metadata !{metadata !"branch_weights", i32 1000, i32 1}
+
+declare i32 @f()
+declare i32 @g()
+declare i32 @h(i32 %x)
+
+define i32 @test_global_cfg_break_profitability() {
+; Check that our metrics for the profitability of a CFG break are global rather
+; than local. A successor may be very hot, but if the current block isn't, it
+; doesn't matter. Within this test the 'then' block is slightly warmer than the
+; 'else' block, but not nearly enough to merit merging it with the exit block
+; even though the probability of 'then' branching to the 'exit' block is very
+; high.
+; CHECK: test_global_cfg_break_profitability
+; CHECK: calll {{_?}}f
+; CHECK: calll {{_?}}g
+; CHECK: calll {{_?}}h
+; CHECK: ret
+
+entry:
+  br i1 undef, label %then, label %else, !prof !2
+
+then:
+  %then.result = call i32 @f()
+  br label %exit
+
+else:
+  %else.result = call i32 @g()
+  br label %exit
+
+exit:
+  %result = phi i32 [ %then.result, %then ], [ %else.result, %else ]
+  %result2 = call i32 @h(i32 %result)
+  ret i32 %result
+}
+
+!2 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+declare i32 @__gxx_personality_v0(...)
+
+define void @test_eh_lpad_successor() {
+; Some times the landing pad ends up as the first successor of an invoke block.
+; When this happens, a strange result used to fall out of updateTerminators: we
+; didn't correctly locate the fallthrough successor, assuming blindly that the
+; first one was the fallthrough successor. As a result, we would add an
+; erroneous jump to the landing pad thinking *that* was the default successor.
+; CHECK: test_eh_lpad_successor
+; CHECK: %entry
+; CHECK-NOT: jmp
+; CHECK: %loop
+
+entry:
+  invoke i32 @f() to label %preheader unwind label %lpad
+
+preheader:
+  br label %loop
+
+lpad:
+  %lpad.val = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } %lpad.val
+
+loop:
+  br label %loop
+}
+
+declare void @fake_throw() noreturn
+
+define void @test_eh_throw() {
+; For blocks containing a 'throw' (or similar functionality), we have
+; a no-return invoke. In this case, only EH successors will exist, and
+; fallthrough simply won't occur. Make sure we don't crash trying to update
+; terminators for such constructs.
+;
+; CHECK: test_eh_throw
+; CHECK: %entry
+; CHECK: %cleanup
+
+entry:
+  invoke void @fake_throw() to label %continue unwind label %cleanup
+
+continue:
+  unreachable
+
+cleanup:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  unreachable
+}
+
+define void @test_unnatural_cfg_backwards_inner_loop() {
+; Test that when we encounter an unnatural CFG structure after having formed
+; a chain for an inner loop which happened to be laid out backwards we don't
+; attempt to merge onto the wrong end of the inner loop just because we find it
+; first. This was reduced from a crasher in GCC's single source.
+;
+; CHECK: test_unnatural_cfg_backwards_inner_loop
+; CHECK: %entry
+; CHECK: %body
+; CHECK: %loop2b
+; CHECK: %loop1
+; CHECK: %loop2a
+
+entry:
+  br i1 undef, label %loop2a, label %body
+
+body:
+  br label %loop2a
+
+loop1:
+  %next.load = load i32** undef
+  br i1 %comp.a, label %loop2a, label %loop2b
+
+loop2a:
+  %var = phi i32* [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ]
+  %next.var = phi i32* [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ]
+  %comp.a = icmp eq i32* %var, null
+  br label %loop3
+
+loop2b:
+  %gep = getelementptr inbounds i32* %var.phi, i32 0
+  %next.ptr = bitcast i32* %gep to i32**
+  store i32* %next.phi, i32** %next.ptr
+  br label %loop3
+
+loop3:
+  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+  br label %loop1
+}
+
+define void @unanalyzable_branch_to_loop_header() {
+; Ensure that we can handle unanalyzable branches into loop headers. We
+; pre-form chains for unanalyzable branches, and will find the tail end of that
+; at the start of the loop. This function uses floating point comparison
+; fallthrough because that happens to always produce unanalyzable branches on
+; x86.
+;
+; CHECK: unanalyzable_branch_to_loop_header
+; CHECK: %entry
+; CHECK: %loop
+; CHECK: %exit
+
+entry:
+  %cmp = fcmp une double 0.000000e+00, undef
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+  %cond = icmp eq i8 undef, 42
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @unanalyzable_branch_to_best_succ(i1 %cond) {
+; Ensure that we can handle unanalyzable branches where the destination block
+; gets selected as the optimal sucessor to merge.
+;
+; CHECK: unanalyzable_branch_to_best_succ
+; CHECK: %entry
+; CHECK: %foo
+; CHECK: %bar
+; CHECK: %exit
+
+entry:
+  ; Bias this branch toward bar to ensure we form that chain.
+  br i1 %cond, label %bar, label %foo, !prof !1
+
+foo:
+  %cmp = fcmp une double 0.000000e+00, undef
+  br i1 %cmp, label %bar, label %exit
+
+bar:
+  call i32 @f()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @unanalyzable_branch_to_free_block(float %x) {
+; Ensure that we can handle unanalyzable branches where the destination block
+; gets selected as the best free block in the CFG.
+;
+; CHECK: unanalyzable_branch_to_free_block
+; CHECK: %entry
+; CHECK: %a
+; CHECK: %b
+; CHECK: %c
+; CHECK: %exit
+
+entry:
+  br i1 undef, label %a, label %b
+
+a:
+  call i32 @f()
+  br label %c
+
+b:
+  %cmp = fcmp une float %x, undef
+  br i1 %cmp, label %c, label %exit
+
+c:
+  call i32 @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @many_unanalyzable_branches() {
+; Ensure that we don't crash as we're building up many unanalyzable branches,
+; blocks, and loops.
+;
+; CHECK: many_unanalyzable_branches
+; CHECK: %entry
+; CHECK: %exit
+
+entry:
+  br label %0
+
+  %val0 = load volatile float* undef
+  %cmp0 = fcmp une float %val0, undef
+  br i1 %cmp0, label %1, label %0
+  %val1 = load volatile float* undef
+  %cmp1 = fcmp une float %val1, undef
+  br i1 %cmp1, label %2, label %1
+  %val2 = load volatile float* undef
+  %cmp2 = fcmp une float %val2, undef
+  br i1 %cmp2, label %3, label %2
+  %val3 = load volatile float* undef
+  %cmp3 = fcmp une float %val3, undef
+  br i1 %cmp3, label %4, label %3
+  %val4 = load volatile float* undef
+  %cmp4 = fcmp une float %val4, undef
+  br i1 %cmp4, label %5, label %4
+  %val5 = load volatile float* undef
+  %cmp5 = fcmp une float %val5, undef
+  br i1 %cmp5, label %6, label %5
+  %val6 = load volatile float* undef
+  %cmp6 = fcmp une float %val6, undef
+  br i1 %cmp6, label %7, label %6
+  %val7 = load volatile float* undef
+  %cmp7 = fcmp une float %val7, undef
+  br i1 %cmp7, label %8, label %7
+  %val8 = load volatile float* undef
+  %cmp8 = fcmp une float %val8, undef
+  br i1 %cmp8, label %9, label %8
+  %val9 = load volatile float* undef
+  %cmp9 = fcmp une float %val9, undef
+  br i1 %cmp9, label %10, label %9
+  %val10 = load volatile float* undef
+  %cmp10 = fcmp une float %val10, undef
+  br i1 %cmp10, label %11, label %10
+  %val11 = load volatile float* undef
+  %cmp11 = fcmp une float %val11, undef
+  br i1 %cmp11, label %12, label %11
+  %val12 = load volatile float* undef
+  %cmp12 = fcmp une float %val12, undef
+  br i1 %cmp12, label %13, label %12
+  %val13 = load volatile float* undef
+  %cmp13 = fcmp une float %val13, undef
+  br i1 %cmp13, label %14, label %13
+  %val14 = load volatile float* undef
+  %cmp14 = fcmp une float %val14, undef
+  br i1 %cmp14, label %15, label %14
+  %val15 = load volatile float* undef
+  %cmp15 = fcmp une float %val15, undef
+  br i1 %cmp15, label %16, label %15
+  %val16 = load volatile float* undef
+  %cmp16 = fcmp une float %val16, undef
+  br i1 %cmp16, label %17, label %16
+  %val17 = load volatile float* undef
+  %cmp17 = fcmp une float %val17, undef
+  br i1 %cmp17, label %18, label %17
+  %val18 = load volatile float* undef
+  %cmp18 = fcmp une float %val18, undef
+  br i1 %cmp18, label %19, label %18
+  %val19 = load volatile float* undef
+  %cmp19 = fcmp une float %val19, undef
+  br i1 %cmp19, label %20, label %19
+  %val20 = load volatile float* undef
+  %cmp20 = fcmp une float %val20, undef
+  br i1 %cmp20, label %21, label %20
+  %val21 = load volatile float* undef
+  %cmp21 = fcmp une float %val21, undef
+  br i1 %cmp21, label %22, label %21
+  %val22 = load volatile float* undef
+  %cmp22 = fcmp une float %val22, undef
+  br i1 %cmp22, label %23, label %22
+  %val23 = load volatile float* undef
+  %cmp23 = fcmp une float %val23, undef
+  br i1 %cmp23, label %24, label %23
+  %val24 = load volatile float* undef
+  %cmp24 = fcmp une float %val24, undef
+  br i1 %cmp24, label %25, label %24
+  %val25 = load volatile float* undef
+  %cmp25 = fcmp une float %val25, undef
+  br i1 %cmp25, label %26, label %25
+  %val26 = load volatile float* undef
+  %cmp26 = fcmp une float %val26, undef
+  br i1 %cmp26, label %27, label %26
+  %val27 = load volatile float* undef
+  %cmp27 = fcmp une float %val27, undef
+  br i1 %cmp27, label %28, label %27
+  %val28 = load volatile float* undef
+  %cmp28 = fcmp une float %val28, undef
+  br i1 %cmp28, label %29, label %28
+  %val29 = load volatile float* undef
+  %cmp29 = fcmp une float %val29, undef
+  br i1 %cmp29, label %30, label %29
+  %val30 = load volatile float* undef
+  %cmp30 = fcmp une float %val30, undef
+  br i1 %cmp30, label %31, label %30
+  %val31 = load volatile float* undef
+  %cmp31 = fcmp une float %val31, undef
+  br i1 %cmp31, label %32, label %31
+  %val32 = load volatile float* undef
+  %cmp32 = fcmp une float %val32, undef
+  br i1 %cmp32, label %33, label %32
+  %val33 = load volatile float* undef
+  %cmp33 = fcmp une float %val33, undef
+  br i1 %cmp33, label %34, label %33
+  %val34 = load volatile float* undef
+  %cmp34 = fcmp une float %val34, undef
+  br i1 %cmp34, label %35, label %34
+  %val35 = load volatile float* undef
+  %cmp35 = fcmp une float %val35, undef
+  br i1 %cmp35, label %36, label %35
+  %val36 = load volatile float* undef
+  %cmp36 = fcmp une float %val36, undef
+  br i1 %cmp36, label %37, label %36
+  %val37 = load volatile float* undef
+  %cmp37 = fcmp une float %val37, undef
+  br i1 %cmp37, label %38, label %37
+  %val38 = load volatile float* undef
+  %cmp38 = fcmp une float %val38, undef
+  br i1 %cmp38, label %39, label %38
+  %val39 = load volatile float* undef
+  %cmp39 = fcmp une float %val39, undef
+  br i1 %cmp39, label %40, label %39
+  %val40 = load volatile float* undef
+  %cmp40 = fcmp une float %val40, undef
+  br i1 %cmp40, label %41, label %40
+  %val41 = load volatile float* undef
+  %cmp41 = fcmp une float %val41, undef
+  br i1 %cmp41, label %42, label %41
+  %val42 = load volatile float* undef
+  %cmp42 = fcmp une float %val42, undef
+  br i1 %cmp42, label %43, label %42
+  %val43 = load volatile float* undef
+  %cmp43 = fcmp une float %val43, undef
+  br i1 %cmp43, label %44, label %43
+  %val44 = load volatile float* undef
+  %cmp44 = fcmp une float %val44, undef
+  br i1 %cmp44, label %45, label %44
+  %val45 = load volatile float* undef
+  %cmp45 = fcmp une float %val45, undef
+  br i1 %cmp45, label %46, label %45
+  %val46 = load volatile float* undef
+  %cmp46 = fcmp une float %val46, undef
+  br i1 %cmp46, label %47, label %46
+  %val47 = load volatile float* undef
+  %cmp47 = fcmp une float %val47, undef
+  br i1 %cmp47, label %48, label %47
+  %val48 = load volatile float* undef
+  %cmp48 = fcmp une float %val48, undef
+  br i1 %cmp48, label %49, label %48
+  %val49 = load volatile float* undef
+  %cmp49 = fcmp une float %val49, undef
+  br i1 %cmp49, label %50, label %49
+  %val50 = load volatile float* undef
+  %cmp50 = fcmp une float %val50, undef
+  br i1 %cmp50, label %51, label %50
+  %val51 = load volatile float* undef
+  %cmp51 = fcmp une float %val51, undef
+  br i1 %cmp51, label %52, label %51
+  %val52 = load volatile float* undef
+  %cmp52 = fcmp une float %val52, undef
+  br i1 %cmp52, label %53, label %52
+  %val53 = load volatile float* undef
+  %cmp53 = fcmp une float %val53, undef
+  br i1 %cmp53, label %54, label %53
+  %val54 = load volatile float* undef
+  %cmp54 = fcmp une float %val54, undef
+  br i1 %cmp54, label %55, label %54
+  %val55 = load volatile float* undef
+  %cmp55 = fcmp une float %val55, undef
+  br i1 %cmp55, label %56, label %55
+  %val56 = load volatile float* undef
+  %cmp56 = fcmp une float %val56, undef
+  br i1 %cmp56, label %57, label %56
+  %val57 = load volatile float* undef
+  %cmp57 = fcmp une float %val57, undef
+  br i1 %cmp57, label %58, label %57
+  %val58 = load volatile float* undef
+  %cmp58 = fcmp une float %val58, undef
+  br i1 %cmp58, label %59, label %58
+  %val59 = load volatile float* undef
+  %cmp59 = fcmp une float %val59, undef
+  br i1 %cmp59, label %60, label %59
+  %val60 = load volatile float* undef
+  %cmp60 = fcmp une float %val60, undef
+  br i1 %cmp60, label %61, label %60
+  %val61 = load volatile float* undef
+  %cmp61 = fcmp une float %val61, undef
+  br i1 %cmp61, label %62, label %61
+  %val62 = load volatile float* undef
+  %cmp62 = fcmp une float %val62, undef
+  br i1 %cmp62, label %63, label %62
+  %val63 = load volatile float* undef
+  %cmp63 = fcmp une float %val63, undef
+  br i1 %cmp63, label %64, label %63
+  %val64 = load volatile float* undef
+  %cmp64 = fcmp une float %val64, undef
+  br i1 %cmp64, label %65, label %64
+
+  br label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index 88c09e3..43c47c0 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -1,40 +1,65 @@
-; RUN: llc < %s -march=x86-64 -mattr=+bmi | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+bmi,+bmi2 | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-       %tmp = tail call i32 @llvm.cttz.i32( i32 %x )
-       ret i32 %tmp
+declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+
+define i8 @t1(i8 %x) nounwind  {
+  %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
+  ret i8 %tmp
 ; CHECK: t1:
 ; CHECK: tzcntl
 }
 
-declare i32 @llvm.cttz.i32(i32) nounwind readnone
-
 define i16 @t2(i16 %x) nounwind  {
-       %tmp = tail call i16 @llvm.cttz.i16( i16 %x )
-       ret i16 %tmp
+  %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
+  ret i16 %tmp
 ; CHECK: t2:
 ; CHECK: tzcntw
 }
 
-declare i16 @llvm.cttz.i16(i16) nounwind readnone
-
-define i64 @t3(i64 %x) nounwind  {
-       %tmp = tail call i64 @llvm.cttz.i64( i64 %x )
-       ret i64 %tmp
+define i32 @t3(i32 %x) nounwind  {
+  %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
+  ret i32 %tmp
 ; CHECK: t3:
+; CHECK: tzcntl
+}
+
+define i64 @t4(i64 %x) nounwind  {
+  %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
+  ret i64 %tmp
+; CHECK: t4:
 ; CHECK: tzcntq
 }
 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
+define i8 @t5(i8 %x) nounwind  {
+  %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 true )
+  ret i8 %tmp
+; CHECK: t5:
+; CHECK: tzcntl
+}
 
-define i8 @t4(i8 %x) nounwind  {
-       %tmp = tail call i8 @llvm.cttz.i8( i8 %x )
-       ret i8 %tmp
-; CHECK: t4:
+define i16 @t6(i16 %x) nounwind  {
+  %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 true )
+  ret i16 %tmp
+; CHECK: t6:
 ; CHECK: tzcntw
 }
 
-declare i8 @llvm.cttz.i8(i8) nounwind readnone
+define i32 @t7(i32 %x) nounwind  {
+  %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: t7:
+; CHECK: tzcntl
+}
+
+define i64 @t8(i64 %x) nounwind  {
+  %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: t8:
+; CHECK: tzcntq
+}
 
 define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
   %tmp1 = xor i32 %x, -1
@@ -51,3 +76,124 @@ define i64 @andn64(i64 %x, i64 %y) nounwind readnone {
 ; CHECK: andn64:
 ; CHECK: andnq
 }
+
+define i32 @bextr32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: bextr32:
+; CHECK: bextrl
+}
+
+declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
+
+define i64 @bextr64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: bextr64:
+; CHECK: bextrq
+}
+
+declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
+
+define i32 @bzhi32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: bzhi32:
+; CHECK: bzhil
+}
+
+declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
+
+define i64 @bzhi64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: bzhi64:
+; CHECK: bzhiq
+}
+
+declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
+
+define i32 @blsi32(i32 %x) nounwind readnone {
+  %tmp = sub i32 0, %x
+  %tmp2 = and i32 %x, %tmp
+  ret i32 %tmp2
+; CHECK: blsi32:
+; CHECK: blsil
+}
+
+define i64 @blsi64(i64 %x) nounwind readnone {
+  %tmp = sub i64 0, %x
+  %tmp2 = and i64 %tmp, %x
+  ret i64 %tmp2
+; CHECK: blsi64:
+; CHECK: blsiq
+}
+
+define i32 @blsmsk32(i32 %x) nounwind readnone {
+  %tmp = sub i32 %x, 1
+  %tmp2 = xor i32 %x, %tmp
+  ret i32 %tmp2
+; CHECK: blsmsk32:
+; CHECK: blsmskl
+}
+
+define i64 @blsmsk64(i64 %x) nounwind readnone {
+  %tmp = sub i64 %x, 1
+  %tmp2 = xor i64 %tmp, %x
+  ret i64 %tmp2
+; CHECK: blsmsk64:
+; CHECK: blsmskq
+}
+
+define i32 @blsr32(i32 %x) nounwind readnone {
+  %tmp = sub i32 %x, 1
+  %tmp2 = and i32 %x, %tmp
+  ret i32 %tmp2
+; CHECK: blsr32:
+; CHECK: blsrl
+}
+
+define i64 @blsr64(i64 %x) nounwind readnone {
+  %tmp = sub i64 %x, 1
+  %tmp2 = and i64 %tmp, %x
+  ret i64 %tmp2
+; CHECK: blsr64:
+; CHECK: blsrq
+}
+
+define i32 @pdep32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: pdep32:
+; CHECK: pdepl
+}
+
+declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone
+
+define i64 @pdep64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: pdep64:
+; CHECK: pdepq
+}
+
+declare i64 @llvm.x86.bmi.pdep.64(i64, i64) nounwind readnone
+
+define i32 @pext32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: pext32:
+; CHECK: pextl
+}
+
+declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone
+
+define i64 @pext64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: pext64:
+; CHECK: pextq
+}
+
+declare i64 @llvm.x86.bmi.pext.64(i64, i64) nounwind readnone
+
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 5cdc100..44670c8 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=penryn | FileCheck %s
+
 ; rdar://7475489
 
 define i32 @test1(i32 %a, i32 %b) nounwind ssp {
@@ -106,3 +107,4 @@ bb2:                                              ; preds = %entry, %bb1
   %.0 = fptrunc double %.0.in to float            ; <float> [#uses=1]
   ret float %.0
 }
+
diff --git a/test/CodeGen/X86/btq.ll b/test/CodeGen/X86/btq.ll
new file mode 100644
index 0000000..9c137a7
--- /dev/null
+++ b/test/CodeGen/X86/btq.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+declare void @bar()
+
+define void @test1(i64 %foo) nounwind {
+  %and = and i64 %foo, 4294967296
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: test1:
+; CHECK: btq $32
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test2(i64 %foo) nounwind {
+  %and = and i64 %foo, 2147483648
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: test2:
+; CHECK: testl $-2147483648
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
index b060369..2d39901 100644
--- a/test/CodeGen/X86/byval6.ll
+++ b/test/CodeGen/X86/byval6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep add | not grep 16
+; RUN: llc < %s -mcpu=generic -march=x86 | grep add | not grep 16
 
 	%struct.W = type { x86_fp80, x86_fp80 }
 @B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
diff --git a/test/CodeGen/X86/cfstring.ll b/test/CodeGen/X86/cfstring.ll
new file mode 100644
index 0000000..7420ce7
--- /dev/null
+++ b/test/CodeGen/X86/cfstring.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/10564621>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+; Make sure that the string ends up the the correct section.
+
+; CHECK:        .section __TEXT,__cstring
+; CHECK-NEXT: l_.str3:
+
+; CHECK:        .section  __DATA,__cfstring
+; CHECK-NEXT:   .align  4
+; CHECK-NEXT: L__unnamed_cfstring_4:
+; CHECK-NEXT:   .quad  ___CFConstantStringClassReference
+; CHECK-NEXT:   .long  1992
+; CHECK-NEXT:   .space  4
+; CHECK-NEXT:   .quad  l_.str3
+; CHECK-NEXT:   .long  0
+; CHECK-NEXT:   .space  4
+
+@isLogVisible = global i8 0, align 1
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
+@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
+
+define linkonce_odr void @bar() nounwind ssp align 2 {
+entry:
+  %stack = alloca i8*, align 4
+  %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
+  store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
diff --git a/test/CodeGen/X86/change-compare-stride-0.ll b/test/CodeGen/X86/change-compare-stride-0.ll
deleted file mode 100644
index 439f7b0..0000000
--- a/test/CodeGen/X86/change-compare-stride-0.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: llc < %s -march=x86 -enable-lsr-nested | FileCheck %s
-;
-; Nested LSR is required to optimize this case.
-; We do not expect to see this form of IR without -enable-iv-rewrite.
-
-define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
-; CHECK: borf:
-; CHECK-NOT: inc
-; CHECK-NOT: leal 1(
-; CHECK-NOT: leal -1(
-; CHECK: decl
-; CHECK-NEXT: cmpl $-478
-; CHECK: ret
-
-bb4.thread:
-	br label %bb2.outer
-
-bb2.outer:		; preds = %bb4, %bb4.thread
-	%indvar18 = phi i32 [ 0, %bb4.thread ], [ %indvar.next28, %bb4 ]		; <i32> [#uses=3]
-	%tmp34 = mul i32 %indvar18, 65535		; <i32> [#uses=1]
-	%i.0.reg2mem.0.ph = add i32 %tmp34, 639		; <i32> [#uses=1]
-	%0 = and i32 %i.0.reg2mem.0.ph, 65535		; <i32> [#uses=1]
-	%1 = mul i32 %0, 480		; <i32> [#uses=1]
-	%tmp20 = mul i32 %indvar18, -478		; <i32> [#uses=1]
-	br label %bb2
-
-bb2:		; preds = %bb2, %bb2.outer
-	%indvar = phi i32 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ]		; <i32> [#uses=3]
-	%ctg2 = getelementptr i8* %out, i32 %tmp20		; <i8*> [#uses=1]
-	%tmp21 = ptrtoint i8* %ctg2 to i32		; <i32> [#uses=1]
-	%tmp23 = sub i32 %tmp21, %indvar		; <i32> [#uses=1]
-	%out_addr.0.reg2mem.0 = inttoptr i32 %tmp23 to i8*		; <i8*> [#uses=1]
-	%tmp25 = mul i32 %indvar, 65535		; <i32> [#uses=1]
-	%j.0.reg2mem.0 = add i32 %tmp25, 479		; <i32> [#uses=1]
-	%2 = and i32 %j.0.reg2mem.0, 65535		; <i32> [#uses=1]
-	%3 = add i32 %1, %2		; <i32> [#uses=9]
-	%4 = add i32 %3, -481		; <i32> [#uses=1]
-	%5 = getelementptr i8* %in, i32 %4		; <i8*> [#uses=1]
-	%6 = load i8* %5, align 1		; <i8> [#uses=1]
-	%7 = add i32 %3, -480		; <i32> [#uses=1]
-	%8 = getelementptr i8* %in, i32 %7		; <i8*> [#uses=1]
-	%9 = load i8* %8, align 1		; <i8> [#uses=1]
-	%10 = add i32 %3, -479		; <i32> [#uses=1]
-	%11 = getelementptr i8* %in, i32 %10		; <i8*> [#uses=1]
-	%12 = load i8* %11, align 1		; <i8> [#uses=1]
-	%13 = add i32 %3, -1		; <i32> [#uses=1]
-	%14 = getelementptr i8* %in, i32 %13		; <i8*> [#uses=1]
-	%15 = load i8* %14, align 1		; <i8> [#uses=1]
-	%16 = getelementptr i8* %in, i32 %3		; <i8*> [#uses=1]
-	%17 = load i8* %16, align 1		; <i8> [#uses=1]
-	%18 = add i32 %3, 1		; <i32> [#uses=1]
-	%19 = getelementptr i8* %in, i32 %18		; <i8*> [#uses=1]
-	%20 = load i8* %19, align 1		; <i8> [#uses=1]
-	%21 = add i32 %3, 481		; <i32> [#uses=1]
-	%22 = getelementptr i8* %in, i32 %21		; <i8*> [#uses=1]
-	%23 = load i8* %22, align 1		; <i8> [#uses=1]
-	%24 = add i32 %3, 480		; <i32> [#uses=1]
-	%25 = getelementptr i8* %in, i32 %24		; <i8*> [#uses=1]
-	%26 = load i8* %25, align 1		; <i8> [#uses=1]
-	%27 = add i32 %3, 479		; <i32> [#uses=1]
-	%28 = getelementptr i8* %in, i32 %27		; <i8*> [#uses=1]
-	%29 = load i8* %28, align 1		; <i8> [#uses=1]
-	%30 = add i8 %9, %6		; <i8> [#uses=1]
-	%31 = add i8 %30, %12		; <i8> [#uses=1]
-	%32 = add i8 %31, %15		; <i8> [#uses=1]
-	%33 = add i8 %32, %17		; <i8> [#uses=1]
-	%34 = add i8 %33, %20		; <i8> [#uses=1]
-	%35 = add i8 %34, %23		; <i8> [#uses=1]
-	%36 = add i8 %35, %26		; <i8> [#uses=1]
-	%37 = add i8 %36, %29		; <i8> [#uses=1]
-	store i8 %37, i8* %out_addr.0.reg2mem.0, align 1
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next, 478		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb4, label %bb2
-
-bb4:		; preds = %bb2
-	%indvar.next28 = add i32 %indvar18, 1		; <i32> [#uses=2]
-	%exitcond29 = icmp eq i32 %indvar.next28, 638		; <i1> [#uses=1]
-	br i1 %exitcond29, label %return, label %bb2.outer
-
-return:		; preds = %bb4
-	ret void
-}
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
index 8b53ae2..1c5c113 100644
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -3,6 +3,10 @@
 ; Nested LSR is required to optimize this case.
 ; We do not expect to see this form of IR without -enable-iv-rewrite.
 
+; xfailed for now because the scheduler two-address hack has been disabled.
+; Now it's generating a leal -1 rather than a decq.
+; XFAIL: *
+
 define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
 ; CHECK: borf:
 ; CHECK-NOT: inc
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index d76fab4..763079f 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,48 +1,141 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=yonah | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
-	ret i32 %tmp
-; CHECK: t1:
-; CHECK: bsrl
-; CHECK: cmov
+declare i8 @llvm.cttz.i8(i8, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i8 @llvm.ctlz.i8(i8, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i8 @cttz_i8(i8 %x)  {
+  %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
+  ret i8 %tmp
+; CHECK: cttz_i8:
+; CHECK: bsfl
+; CHECK-NOT: cmov
+; CHECK: ret
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+define i16 @cttz_i16(i16 %x)  {
+  %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
+  ret i16 %tmp
+; CHECK: cttz_i16:
+; CHECK: bsfw
+; CHECK-NOT: cmov
+; CHECK: ret
+}
 
-define i32 @t2(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.cttz.i32( i32 %x )
-	ret i32 %tmp
-; CHECK: t2:
+define i32 @cttz_i32(i32 %x)  {
+  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: cttz_i32:
 ; CHECK: bsfl
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i64 @cttz_i64(i64 %x)  {
+  %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: cttz_i64:
+; CHECK: bsfq
+; CHECK-NOT: cmov
+; CHECK: ret
 }
 
-declare i32 @llvm.cttz.i32(i32) nounwind readnone 
+define i8 @ctlz_i8(i8 %x) {
+entry:
+  %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
+  ret i8 %tmp2
+; CHECK: ctlz_i8:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: xorl $7,
+; CHECK: ret
+}
 
-define i16 @t3(i16 %x, i16 %y) nounwind  {
+define i16 @ctlz_i16(i16 %x) {
 entry:
-        %tmp1 = add i16 %x, %y
-	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1 )		; <i16> [#uses=1]
-	ret i16 %tmp2
-; CHECK: t3:
+  %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
+  ret i16 %tmp2
+; CHECK: ctlz_i16:
 ; CHECK: bsrw
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: xorl $15,
+; CHECK: ret
+}
+
+define i32 @ctlz_i32(i32 %x) {
+  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: ctlz_i32:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+}
+
+define i64 @ctlz_i64(i64 %x) {
+  %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: ctlz_i64:
+; CHECK: bsrq
+; CHECK-NOT: cmov
+; CHECK: xorq $63,
+; CHECK: ret
 }
 
-declare i16 @llvm.ctlz.i16(i16) nounwind readnone 
+define i32 @ctlz_i32_cmov(i32 %n) {
+entry:
+; Generate a cmov to handle zero inputs when necessary.
+; CHECK: ctlz_i32_cmov:
+; CHECK: bsrl
+; CHECK: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+  %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  ret i32 %tmp1
+}
 
+define i32 @ctlz_i32_fold_cmov(i32 %n) {
+entry:
 ; Don't generate the cmovne when the source is known non-zero (and bsr would
 ; not set ZF).
 ; rdar://9490949
-
-define i32 @t4(i32 %n) nounwind {
-entry:
-; CHECK: t4:
+; CHECK: ctlz_i32_fold_cmov:
 ; CHECK: bsrl
 ; CHECK-NOT: cmov
+; CHECK: xorl $31,
 ; CHECK: ret
   %or = or i32 %n, 1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or)
+  %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
   ret i32 %tmp1
 }
+
+define i32 @ctlz_bsr(i32 %n) {
+entry:
+; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
+; the most significant bit, which is what 'bsr' does natively.
+; CHECK: ctlz_bsr:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
+
+define i32 @ctlz_bsr_cmov(i32 %n) {
+entry:
+; Same as ctlz_bsr, but ensure this happens even when there is a potential
+; zero.
+; CHECK: ctlz_bsr_cmov:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 7a8d6e6..2e7ffbf 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -84,7 +84,7 @@ entry:
   br i1 %3, label %func_4.exit.i, label %bb.i.i.i
 
 bb.i.i.i:                                         ; preds = %entry
-  %4 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  %4 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_4.exit.i
 
 ; CHECK: test4:
@@ -101,7 +101,7 @@ func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
   br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
 
 bb.i.i:                                           ; preds = %func_4.exit.i
-  %5 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  %5 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_1.exit
 
 func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
diff --git a/test/CodeGen/X86/cmpxchg16b.ll b/test/CodeGen/X86/cmpxchg16b.ll
index ba1c4ef..edbd0bc 100644
--- a/test/CodeGen/X86/cmpxchg16b.ll
+++ b/test/CodeGen/X86/cmpxchg16b.ll
@@ -3,7 +3,7 @@
 ; Basic 128-bit cmpxchg
 define void @t1(i128* nocapture %p) nounwind ssp {
 entry:
-; CHECK movl	$1, %ebx
+; CHECK: movl	$1, %ebx
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg16b
   %r = cmpxchg i128* %p, i128 0, i128 1 seq_cst
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index 8aa0bfd..d9e0778 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -21,6 +21,6 @@ bb:		; preds = %bb, %entry
 	br i1 %exitcond, label %bb13, label %bb
 
 bb13:		; preds = %bb
-	volatile store float %tmp6, float* @G, align 4
+	store volatile float %tmp6, float* @G, align 4
 	ret void
 }
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 1531457..cf6e27d 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -6,16 +6,16 @@
 ; Chain and flag folding issues.
 define i32 @test1() nounwind ssp {
 entry:
-  %tmp5.i = volatile load i32* undef              ; <i32> [#uses=1]
+  %tmp5.i = load volatile i32* undef              ; <i32> [#uses=1]
   %conv.i = zext i32 %tmp5.i to i64               ; <i64> [#uses=1]
-  %tmp12.i = volatile load i32* undef             ; <i32> [#uses=1]
+  %tmp12.i = load volatile i32* undef             ; <i32> [#uses=1]
   %conv13.i = zext i32 %tmp12.i to i64            ; <i64> [#uses=1]
   %shl.i = shl i64 %conv13.i, 32                  ; <i64> [#uses=1]
   %or.i = or i64 %shl.i, %conv.i                  ; <i64> [#uses=1]
   %add16.i = add i64 %or.i, 256                   ; <i64> [#uses=1]
   %shr.i = lshr i64 %add16.i, 8                   ; <i64> [#uses=1]
   %conv19.i = trunc i64 %shr.i to i32             ; <i32> [#uses=1]
-  volatile store i32 %conv19.i, i32* undef
+  store volatile i32 %conv19.i, i32* undef
   ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll
index 3a849aa..adf9854 100644
--- a/test/CodeGen/X86/dbg-file-name.ll
+++ b/test/CodeGen/X86/dbg-file-name.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
+; RUN: llc -enable-dwarf-directory -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
 
 ; Radar 8884898
-; CHECK: file	1 "/Users/manav/one/two{{/|\\\\}}simple.c"
+; CHECK: file	1 "simple.c"
 
 declare i32 @printf(i8*, ...) nounwind
 
diff --git a/test/CodeGen/X86/dbg-inline.ll b/test/CodeGen/X86/dbg-inline.ll
deleted file mode 100644
index 523c62e..0000000
--- a/test/CodeGen/X86/dbg-inline.ll
+++ /dev/null
@@ -1,140 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; Radar 7881628, 9747970
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.7.0"
-
-%class.APFloat = type { i32 }
-
-define i32 @_ZNK7APFloat9partCountEv(%class.APFloat* nocapture %this) nounwind uwtable readonly optsize ssp align 2 {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !28), !dbg !41
-  %prec = getelementptr inbounds %class.APFloat* %this, i64 0, i32 0, !dbg !42
-  %tmp = load i32* %prec, align 4, !dbg !42, !tbaa !44
-  tail call void @llvm.dbg.value(metadata !{i32 %tmp}, i64 0, metadata !47), !dbg !48
-  %add.i = add i32 %tmp, 42, !dbg !49
-  ret i32 %add.i, !dbg !42
-}
-
-define zeroext i1 @_ZNK7APFloat14bitwiseIsEqualERKS_(%class.APFloat* %this, %class.APFloat* %rhs) uwtable optsize ssp align 2 {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !29), !dbg !51
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %rhs}, i64 0, metadata !30), !dbg !52
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !53), !dbg !55
-  %prec.i = getelementptr inbounds %class.APFloat* %this, i64 0, i32 0, !dbg !56
-;CHECK: DW_TAG_inlined_subroutine
-;CHECK: DW_AT_abstract_origin
-;CHECK: DW_AT_ranges
-  %tmp.i = load i32* %prec.i, align 4, !dbg !56, !tbaa !44
-  tail call void @llvm.dbg.value(metadata !{i32 %tmp.i}, i64 0, metadata !57), !dbg !58
-  %add.i.i = add i32 %tmp.i, 42, !dbg !59
-  tail call void @llvm.dbg.value(metadata !{i32 %add.i.i}, i64 0, metadata !31), !dbg !54
-  %call2 = tail call i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat* %this) optsize, !dbg !60
-  tail call void @llvm.dbg.value(metadata !{i64* %call2}, i64 0, metadata !34), !dbg !60
-  %call3 = tail call i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat* %rhs) optsize, !dbg !61
-  tail call void @llvm.dbg.value(metadata !{i64* %call3}, i64 0, metadata !37), !dbg !61
-  %tmp = zext i32 %add.i.i to i64
-  br label %for.cond, !dbg !62
-
-for.cond:                                         ; preds = %for.inc, %entry
-  %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
-  %tmp13 = sub i64 %tmp, %indvar, !dbg !62
-  %i.0 = trunc i64 %tmp13 to i32, !dbg !62
-  %cmp = icmp sgt i32 %i.0, 0, !dbg !62
-  br i1 %cmp, label %for.body, label %return, !dbg !62
-
-for.body:                                         ; preds = %for.cond
-  %p.0 = getelementptr i64* %call2, i64 %indvar, !dbg !63
-  %tmp6 = load i64* %p.0, align 8, !dbg !63, !tbaa !66
-  %tmp8 = load i64* %call3, align 8, !dbg !63, !tbaa !66
-  %cmp9 = icmp eq i64 %tmp6, %tmp8, !dbg !63
-  br i1 %cmp9, label %for.inc, label %return, !dbg !63
-
-for.inc:                                          ; preds = %for.body
-  %indvar.next = add i64 %indvar, 1, !dbg !67
-  br label %for.cond, !dbg !67
-
-return:                                           ; preds = %for.cond, %for.body
-  %retval.0 = phi i1 [ false, %for.body ], [ true, %for.cond ]
-  ret i1 %retval.0, !dbg !68
-}
-
-declare i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat*) optsize
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1, !7, !12, !23, !24, !25}
-!llvm.dbg.lv._ZNK7APFloat9partCountEv = !{!28}
-!llvm.dbg.lv._ZNK7APFloat14bitwiseIsEqualERKS_ = !{!29, !30, !31, !34, !37}
-!llvm.dbg.lv._ZL16partCountForBitsj = !{!38}
-!llvm.dbg.gv = !{!39}
-
-!0 = metadata !{i32 655377, i32 0, i32 4, metadata !"/Volumes/Athwagate/R9747970/apf.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 136149)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 655406, i32 0, metadata !2, metadata !"bitwiseIsEqual", metadata !"bitwiseIsEqual", metadata !"_ZNK7APFloat14bitwiseIsEqualERKS_", metadata !3, i32 8, metadata !19, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 655362, metadata !0, metadata !"APFloat", metadata !3, i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
-!3 = metadata !{i32 655401, metadata !"/Volumes/Athwagate/R9747970/apf.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!4 = metadata !{metadata !5, metadata !1, metadata !7, metadata !12}
-!5 = metadata !{i32 655373, metadata !2, metadata !"prec", metadata !3, i32 13, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 655396, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 655406, i32 0, metadata !2, metadata !"partCount", metadata !"partCount", metadata !"_ZNK7APFloat9partCountEv", metadata !3, i32 9, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!9 = metadata !{metadata !6, metadata !10}
-!10 = metadata !{i32 655375, metadata !0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!12 = metadata !{i32 655406, i32 0, metadata !2, metadata !"significandParts", metadata !"significandParts", metadata !"_ZNK7APFloat16significandPartsEv", metadata !3, i32 11, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!14 = metadata !{metadata !15, metadata !10}
-!15 = metadata !{i32 655375, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
-!16 = metadata !{i32 655382, metadata !0, metadata !"integerPart", metadata !3, i32 2, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ]
-!17 = metadata !{i32 655382, metadata !0, metadata !"uint64_t", metadata !3, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ]
-!18 = metadata !{i32 655396, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!19 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !20, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!20 = metadata !{metadata !21, metadata !10, metadata !22}
-!21 = metadata !{i32 655396, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!22 = metadata !{i32 655376, metadata !0, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_reference_type ]
-!23 = metadata !{i32 655406, i32 0, metadata !0, metadata !"partCount", metadata !"partCount", metadata !"_ZNK7APFloat9partCountEv", metadata !3, i32 23, metadata !8, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%class.APFloat*)* @_ZNK7APFloat9partCountEv, null, metadata !7} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 655406, i32 0, metadata !0, metadata !"bitwiseIsEqual", metadata !"bitwiseIsEqual", metadata !"_ZNK7APFloat14bitwiseIsEqualERKS_", metadata !3, i32 28, metadata !19, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (%class.APFloat*, %class.APFloat*)* @_ZNK7APFloat14bitwiseIsEqualERKS_, null, metadata !1} ; [ DW_TAG_subprogram ]
-!25 = metadata !{i32 655406, i32 0, metadata !3, metadata !"partCountForBits", metadata !"partCountForBits", metadata !"", metadata !3, i32 17, metadata !26, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null} ; [ DW_TAG_subprogram ]
-!26 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !27, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!27 = metadata !{metadata !6}
-!28 = metadata !{i32 655617, metadata !23, metadata !"this", metadata !3, i32 16777238, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!29 = metadata !{i32 655617, metadata !24, metadata !"this", metadata !3, i32 16777244, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!30 = metadata !{i32 655617, metadata !24, metadata !"rhs", metadata !3, i32 33554460, metadata !22, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 655616, metadata !32, metadata !"i", metadata !3, i32 29, metadata !33, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!32 = metadata !{i32 655371, metadata !24, i32 28, i32 56, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
-!33 = metadata !{i32 655396, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!34 = metadata !{i32 655616, metadata !32, metadata !"p", metadata !3, i32 30, metadata !35, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!35 = metadata !{i32 655375, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !36} ; [ DW_TAG_pointer_type ]
-!36 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_const_type ]
-!37 = metadata !{i32 655616, metadata !32, metadata !"q", metadata !3, i32 31, metadata !35, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!38 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!39 = metadata !{i32 655412, i32 0, metadata !3, metadata !"integerPartWidth", metadata !"integerPartWidth", metadata !"integerPartWidth", metadata !3, i32 3, metadata !40, i32 1, i32 1, i32 42} ; [ DW_TAG_variable ]
-!40 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_const_type ]
-!41 = metadata !{i32 22, i32 23, metadata !23, null}
-!42 = metadata !{i32 24, i32 10, metadata !43, null}
-!43 = metadata !{i32 655371, metadata !23, i32 23, i32 1, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
-!44 = metadata !{metadata !"int", metadata !45}
-!45 = metadata !{metadata !"omnipotent char", metadata !46}
-!46 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!47 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, metadata !42} ; [ DW_TAG_arg_variable ]
-!48 = metadata !{i32 16, i32 58, metadata !25, metadata !42}
-!49 = metadata !{i32 18, i32 3, metadata !50, metadata !42}
-!50 = metadata !{i32 655371, metadata !25, i32 17, i32 1, metadata !3, i32 4} ; [ DW_TAG_lexical_block ]
-!51 = metadata !{i32 28, i32 15, metadata !24, null}
-!52 = metadata !{i32 28, i32 45, metadata !24, null}
-!53 = metadata !{i32 655617, metadata !23, metadata !"this", metadata !3, i32 16777238, metadata !10, i32 64, metadata !54} ; [ DW_TAG_arg_variable ]
-!54 = metadata !{i32 29, i32 10, metadata !32, null}
-!55 = metadata !{i32 22, i32 23, metadata !23, metadata !54}
-!56 = metadata !{i32 24, i32 10, metadata !43, metadata !54}
-!57 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, metadata !56} ; [ DW_TAG_arg_variable ]
-!58 = metadata !{i32 16, i32 58, metadata !25, metadata !56}
-!59 = metadata !{i32 18, i32 3, metadata !50, metadata !56}
-!60 = metadata !{i32 30, i32 24, metadata !32, null}
-!61 = metadata !{i32 31, i32 24, metadata !32, null}
-!62 = metadata !{i32 32, i32 3, metadata !32, null}
-!63 = metadata !{i32 33, i32 5, metadata !64, null}
-!64 = metadata !{i32 655371, metadata !65, i32 32, i32 25, metadata !3, i32 3} ; [ DW_TAG_lexical_block ]
-!65 = metadata !{i32 655371, metadata !32, i32 32, i32 3, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]
-!66 = metadata !{metadata !"long long", metadata !45}
-!67 = metadata !{i32 32, i32 15, metadata !65, null}
-!68 = metadata !{i32 37, i32 1, metadata !32, null}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index afe1729..c35935f 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin8"
 ;CHECK-NEXT:    .short  Lset
 ;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
-;CHECK-NEXT: Ltmp7
+;CHECK-NEXT: Ltmp5
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
 
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
new file mode 100644
index 0000000..788910c
--- /dev/null
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -0,0 +1,37 @@
+; RUN: llc -O0 < %s | FileCheck %s
+; Radar 10464995
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+@s = common global [4294967296 x i8] zeroinitializer, align 16
+;CHECK: .long	4294967295
+
+define void @bar() nounwind uwtable ssp {
+entry:
+  store i8 97, i8* getelementptr inbounds ([4294967296 x i8]* @s, i32 0, i64 0), align 1, !dbg !18
+  ret void, !dbg !20
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"small.c", metadata !"/private/tmp", metadata !"clang version 3.1 (trunk 144833)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"small.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!15 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 720929, i64 0, i64 4294967295} ; [ DW_TAG_subrange_type ]
+!18 = metadata !{i32 5, i32 3, metadata !19, null}
+!19 = metadata !{i32 720907, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!20 = metadata !{i32 6, i32 1, metadata !19, null}
diff --git a/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
index 481c4ba..d248a41 100644
--- a/test/CodeGen/X86/dbg-value-inlined-parameter.ll
+++ b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
@@ -8,7 +8,7 @@
 ;CHECK-NEXT: DW_AT_call_file
 ;CHECK-NEXT: DW_AT_call_line
 ;CHECK-NEXT: DW_TAG_formal_parameter
-;CHECK-NEXT: .ascii   "sp"                   ## DW_AT_name
+;CHECK-NEXT: Lstring11-Lsection_str ## DW_AT_name
 
 %struct.S1 = type { float*, i32 }
 
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
index a0e4d16..05e29ec 100644
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -4,8 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 ;Radar 8950491
 
-;CHECK:        .ascii   "var"                  ## DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset5
 ;CHECK-NEXT:        ## DW_AT_decl_file
 ;CHECK-NEXT:        ## DW_AT_decl_line
 ;CHECK-NEXT:        ## DW_AT_type
diff --git a/test/CodeGen/X86/dg.exp b/test/CodeGen/X86/dg.exp
deleted file mode 100644
index 629a147..0000000
--- a/test/CodeGen/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 87c1be5..e577ecb 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
new file mode 100644
index 0000000..c64752c
--- /dev/null
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -0,0 +1,16 @@
+; RUN: llc %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"empty.c", metadata !"/home/nlewycky", metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+
+; The important part of the following check is that dir = #0.
+;                        Dir  Mod Time   File Len   File Name
+;                        ---- ---------- ---------- ---------------------------
+; CHECK: file_names[  1]    0 0x00000000 0x00000000 empty.c
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
index 874c53a..ac5174d 100644
--- a/test/CodeGen/X86/empty-functions.ll
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -6,14 +6,11 @@ entry:
   unreachable
 }
 ; CHECK-NO-FP:     _func:
-; CHECK-NO-FP-NEXT: :
 ; CHECK-NO-FP-NEXT: .cfi_startproc
 ; CHECK-NO-FP:     nop
-; CHECK-NO-FP-NEXT: :
 ; CHECK-NO-FP-NEXT: .cfi_endproc
 
 ; CHECK-FP:      _func:
-; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_startproc
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: pushq %rbp
@@ -25,5 +22,4 @@ entry:
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_def_cfa_register %rbp
 ; CHECK-FP-NEXT: nop
-; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 52dcb61..0f16a64 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 | not grep lea
-; RUN: llc < %s -march=x86 | grep {movl	%ebp}
+; RUN: llc < %s -mcpu=generic -march=x86 | not grep lea
+; RUN: llc < %s -mcpu=generic -march=x86 | grep {movl	%ebp}
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/f16c-intrinsics.ll b/test/CodeGen/X86/f16c-intrinsics.ll
new file mode 100644
index 0000000..2135f94
--- /dev/null
+++ b/test/CodeGen/X86/f16c-intrinsics.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mattr=+avx,+f16c | FileCheck %s
+
+define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
+  ; CHECK: vcvtph2ps
+  %res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly
+
+
+define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
+  ; CHECK: vcvtph2ps
+  %res = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly
+
+
+define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
+  ; CHECK: vcvtps2ph
+  %res = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly
+
+
+define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
+  ; CHECK: vcvtps2ph
+  %res = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index e151821..e4982f0 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {add	ESP, 8}
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
index 4abc3b5..8ac15cd 100644
--- a/test/CodeGen/X86/fast-isel-bc.ll
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+; RUN: llc < %s -O0 -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
 ; PR4684
 
 target datalayout =
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 91d1f5d..f0375f8 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -82,9 +82,8 @@ define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
   ret i64 %v11
 ; X64: test5:
 ; X64: movslq	%e[[A1]], %rax
-; X64-NEXT: movq	(%r[[A0]],%rax), %rax
-; X64-NEXT: addq	%{{rdx|r8}}, %rax
-; X64-NEXT: ret
+; X64-NEXT: (%r[[A0]],%rax),
+; X64: ret
 }
 
 ; PR9500, rdar://9156159 - Don't do non-local address mode folding,
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index 6a5a102..d8f4663 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -82,7 +82,7 @@ entry:
   ret i64 %mul
 
 ; CHECK: test6:
-; CHECK: leaq	(,%rdi,8), %rax
+; CHECK: shlq	$3, %rdi
 }
 
 define i32 @test7(i32 %x) nounwind ssp {
@@ -90,7 +90,7 @@ entry:
   %mul = mul nsw i32 %x, 8
   ret i32 %mul
 ; CHECK: test7:
-; CHECK: leal	(,%rdi,8), %eax
+; CHECK: shll	$3, %edi
 }
 
 
@@ -225,18 +225,20 @@ if.else:                                          ; preds = %entry
 ; CHECK-NEXT: je 
 }
 
-; Check that 0.0 is materialized using pxor
+; Check that 0.0 is materialized using xorps
 define void @test18(float* %p1) {
   store float 0.0, float* %p1
   ret void
 ; CHECK: test18:
-; CHECK: pxor
+; CHECK: xorps
 }
+
+; Without any type hints, doubles use the smaller xorps instead of xorpd.
 define void @test19(double* %p1) {
   store double 0.0, double* %p1
   ret void
 ; CHECK: test19:
-; CHECK: pxor
+; CHECK: xorps
 }
 
 ; Check that we fast-isel sret
@@ -252,12 +254,12 @@ entry:
 }
 declare void @test20sret(%struct.a* sret)
 
-; Check that -0.0 is not materialized using pxor
+; Check that -0.0 is not materialized using xor
 define void @test21(double* %p1) {
   store double -0.0, double* %p1
   ret void
 ; CHECK: test21:
-; CHECK-NOT: pxor
+; CHECK-NOT: xor
 ; CHECK: movsd	LCPI
 }
 
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 19972f7..b9598bb 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc -fast-isel -O0 -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -fast-isel -O0 -mcpu=generic -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
 
 ; This should use flds to set the return value.
 ; CHECK: test0:
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 8391860..c88d529 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -99,7 +99,6 @@ define void @load_store_i1(i1* %p, i1* %q) nounwind {
   ret void
 }
 
-
 @crash_test1x = external global <2 x i32>, align 8
 
 define void @crash_test1() nounwind ssp {
@@ -108,3 +107,13 @@ define void @crash_test1() nounwind ssp {
   ret void
 }
 
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+define i64* @life() nounwind {
+  %a1 = alloca i64*, align 8
+  %a2 = bitcast i64** %a1 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %a2) nounwind      
+  %a3 = load i64** %a1, align 8
+  ret i64* %a3
+}
+
diff --git a/test/CodeGen/X86/fdiv.ll b/test/CodeGen/X86/fdiv.ll
new file mode 100644
index 0000000..0749682
--- /dev/null
+++ b/test/CodeGen/X86/fdiv.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | FileCheck %s
+
+define double @exact(double %x) {
+; Exact division by a constant converted to multiplication.
+; CHECK: @exact
+; CHECK: mulsd
+  %div = fdiv double %x, 2.0
+  ret double %div
+}
+
+define double @inexact(double %x) {
+; Inexact division by a constant converted to multiplication.
+; CHECK: @inexact
+; CHECK: mulsd
+  %div = fdiv double %x, 0x41DFFFFFFFC00000 
+  ret double %div
+}
+
+define double @funky(double %x) {
+; No conversion to multiplication if too funky.
+; CHECK: @funky
+; CHECK: divsd
+  %div = fdiv double %x, 0.0
+  ret double %div
+}
+
+define double @denormal1(double %x) {
+; Don't generate multiplication by a denormal.
+; CHECK: @denormal1
+; CHECK: divsd
+  %div = fdiv double %x, 0x7FD0000000000001
+  ret double %div
+}
+
+define double @denormal2(double %x) {
+; Don't generate multiplication by a denormal.
+; CHECK: @denormal
+; CHECK: divsd
+  %div = fdiv double %x, 0x7FEFFFFFFFFFFFFF
+  ret double %div
+}
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
index 2ffcb96..81511a3 100644
--- a/test/CodeGen/X86/fltused.ll
+++ b/test/CodeGen/X86/fltused.ll
@@ -4,6 +4,8 @@
 
 ; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
 ; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+; RUN: llc < %s -O0 -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -O0 -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
 
 @.str = private constant [4 x i8] c"%f\0A\00"
 
diff --git a/test/CodeGen/X86/fltused_function_pointer.ll b/test/CodeGen/X86/fltused_function_pointer.ll
new file mode 100644
index 0000000..cfe484a
--- /dev/null
+++ b/test/CodeGen/X86/fltused_function_pointer.ll
@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+; RUN: llc < %s -O0 -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -O0 -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @foo(i32 (i8*, ...)* %f) nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* %f(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  ret i32 0
+}
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
new file mode 100644
index 0000000..5ed03ef
--- /dev/null
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -0,0 +1,295 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
+
+; VFMADD
+define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
+  ; CHECK: vfmaddss (%{{.*}})
+  %x = load float *%a2
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
+  %x = load float *%a1
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
+  ; CHECK: vfmaddsd (%{{.*}})
+  %x = load double *%a2
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
+  %x = load double *%a1
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
+  ; CHECK: vfmaddps (%{{.*}})
+  %x = load <4 x float>* %a2
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
+  %x = load <4 x float>* %a1
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
+  ; CHECK: vfmaddpd (%{{.*}})
+  %x = load <2 x double>* %a2
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
+  %x = load <2 x double>* %a1
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMSUB
+define < 4 x float > @test_x86_fma4_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubss
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFNMADD
+define < 4 x float > @test_x86_fma4_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmaddss
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmaddsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfnmaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfnmaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFNMSUB
+define < 4 x float > @test_x86_fma4_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmsubss
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmsubsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfnmsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfnmsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMADDSUB
+define < 4 x float > @test_x86_fma4_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmaddsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmaddsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMSUBADD
+define < 4 x float > @test_x86_fma4_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmsubaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmsubaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
index 9f79f77..93baa0e 100644
--- a/test/CodeGen/X86/fold-and-shift.ll
+++ b/test/CodeGen/X86/fold-and-shift.ll
@@ -1,21 +1,77 @@
-; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @t1(i8* %X, i32 %i) {
+; CHECK: t1:
+; CHECK-NOT: and
+; CHECK: movzbl
+; CHECK: movl (%{{...}},%{{...}},4),
+; CHECK: ret
+
 entry:
-	%tmp2 = shl i32 %i, 2		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp2, 1020		; <i32> [#uses=1]
-	%tmp7 = getelementptr i8* %X, i32 %tmp4		; <i8*> [#uses=1]
-	%tmp78 = bitcast i8* %tmp7 to i32*		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
-	ret i32 %tmp9
+  %tmp2 = shl i32 %i, 2
+  %tmp4 = and i32 %tmp2, 1020
+  %tmp7 = getelementptr i8* %X, i32 %tmp4
+  %tmp78 = bitcast i8* %tmp7 to i32*
+  %tmp9 = load i32* %tmp78
+  ret i32 %tmp9
 }
 
 define i32 @t2(i16* %X, i32 %i) {
+; CHECK: t2:
+; CHECK-NOT: and
+; CHECK: movzwl
+; CHECK: movl (%{{...}},%{{...}},4),
+; CHECK: ret
+
+entry:
+  %tmp2 = shl i32 %i, 1
+  %tmp4 = and i32 %tmp2, 131070
+  %tmp7 = getelementptr i16* %X, i32 %tmp4
+  %tmp78 = bitcast i16* %tmp7 to i32*
+  %tmp9 = load i32* %tmp78
+  ret i32 %tmp9
+}
+
+define i32 @t3(i16* %i.ptr, i32* %arr) {
+; This case is tricky. The lshr followed by a gep will produce a lshr followed
+; by an and to remove the low bits. This can be simplified by doing the lshr by
+; a greater constant and using the addressing mode to scale the result back up.
+; To make matters worse, because of the two-phase zext of %i and their reuse in
+; the function, the DAG can get confusing trying to re-use both of them and
+; prevent easy analysis of the mask in order to match this.
+; CHECK: t3:
+; CHECK-NOT: and
+; CHECK: shrl
+; CHECK: addl (%{{...}},%{{...}},4),
+; CHECK: ret
+
+entry:
+  %i = load i16* %i.ptr
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %val.ptr = getelementptr inbounds i32* %arr, i32 %index
+  %val = load i32* %val.ptr
+  %sum = add i32 %val, %i.zext
+  ret i32 %sum
+}
+
+define i32 @t4(i16* %i.ptr, i32* %arr) {
+; A version of @t3 that has more zero extends and more re-use of intermediate
+; values. This exercise slightly different bits of canonicalization.
+; CHECK: t4:
+; CHECK-NOT: and
+; CHECK: shrl
+; CHECK: addl (%{{...}},%{{...}},4),
+; CHECK: ret
+
 entry:
-	%tmp2 = shl i32 %i, 1		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp2, 131070		; <i32> [#uses=1]
-	%tmp7 = getelementptr i16* %X, i32 %tmp4		; <i16*> [#uses=1]
-	%tmp78 = bitcast i16* %tmp7 to i32*		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
-	ret i32 %tmp9
+  %i = load i16* %i.ptr
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %index.zext = zext i32 %index to i64
+  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+  %val = load i32* %val.ptr
+  %sum.1 = add i32 %val, %i.zext
+  %sum.2 = add i32 %sum.1, %index
+  ret i32 %sum.2
 }
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index 5525af2..e03cb7e 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
 @stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index 647bbdb..1d315ff 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
+; DISABLED: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
+
+; i386 test has been disabled when scheduler 2-addr hack is disabled.
 
 ; This testcase shouldn't need to spill the -1 value,
 ; so it should just use pcmpeqd to materialize an all-ones vector.
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 9f8d990..9cf4607 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -1,15 +1,14 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=basic | FileCheck %s
 
 ; This testcase should need to spill the -1 value on both x86-32 and x86-64,
 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
 ; should use a constant-pool load instead.
+;
+; RAGreedy defeats the test by splitting live ranges.
 
 ; Constant pool all-ones vector:
-; CHECK: .long 4294967295
-; CHECK-NEXT: .long 4294967295
-; CHECK-NEXT: .long 4294967295
-; CHECK-NEXT: .long 4294967295
+; CHECK: .space 16,255
 
 ; No pcmpeqd instructions, everybody uses the constant pool.
 ; CHECK: program_1:
diff --git a/test/CodeGen/X86/fp-stack-O0.ll b/test/CodeGen/X86/fp-stack-O0.ll
index b9cb5d7..df90254 100644
--- a/test/CodeGen/X86/fp-stack-O0.ll
+++ b/test/CodeGen/X86/fp-stack-O0.ll
@@ -10,7 +10,7 @@ declare i32 @x2(x86_fp80, x86_fp80) nounwind
 ; Pass arguments on the stack.
 ; CHECK-NEXT: movq %rsp, [[RCX:%r..]]
 ; Copy constant-pool value.
-; CHECK-NEXT: fldt LCPI
+; CHECK-NEXT: fldl LCPI
 ; CHECK-NEXT: fstpt 16([[RCX]])
 ; Copy x1 return value.
 ; CHECK-NEXT: fstpt ([[RCX]])
diff --git a/test/CodeGen/X86/fp-stack-ret-conv.ll b/test/CodeGen/X86/fp-stack-ret-conv.ll
index f220b24..3e26141 100644
--- a/test/CodeGen/X86/fp-stack-ret-conv.ll
+++ b/test/CodeGen/X86/fp-stack-ret-conv.ll
@@ -10,7 +10,7 @@ entry:
 	%tmp13 = tail call double @foo()
 	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
 	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
-	volatile store double %tmp3940, double* %b
+	store volatile double %tmp3940, double* %b
 	ret void
 }
 
diff --git a/test/CodeGen/X86/fsgsbase.ll b/test/CodeGen/X86/fsgsbase.ll
new file mode 100644
index 0000000..0c22e3c
--- /dev/null
+++ b/test/CodeGen/X86/fsgsbase.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=core-avx-i -mattr=fsgsbase | FileCheck %s
+
+define i32 @test_x86_rdfsbase_32() {
+  ; CHECK: rdfsbasel
+  %res = call i32 @llvm.x86.rdfsbase.32()
+  ret i32 %res
+}
+declare i32 @llvm.x86.rdfsbase.32() nounwind readnone
+
+define i32 @test_x86_rdgsbase_32() {
+  ; CHECK: rdgsbasel
+  %res = call i32 @llvm.x86.rdgsbase.32()
+  ret i32 %res
+}
+declare i32 @llvm.x86.rdgsbase.32() nounwind readnone
+
+define i64 @test_x86_rdfsbase_64() {
+  ; CHECK: rdfsbaseq
+  %res = call i64 @llvm.x86.rdfsbase.64()
+  ret i64 %res
+}
+declare i64 @llvm.x86.rdfsbase.64() nounwind readnone
+
+define i64 @test_x86_rdgsbase_64() {
+  ; CHECK: rdgsbaseq
+  %res = call i64 @llvm.x86.rdgsbase.64()
+  ret i64 %res
+}
+declare i64 @llvm.x86.rdgsbase.64() nounwind readnone
+
+define void @test_x86_wrfsbase_32(i32 %x) {
+  ; CHECK: wrfsbasel
+  call void @llvm.x86.wrfsbase.32(i32 %x)
+  ret void
+}
+declare void @llvm.x86.wrfsbase.32(i32) nounwind readnone
+
+define void @test_x86_wrgsbase_32(i32 %x) {
+  ; CHECK: wrgsbasel
+  call void @llvm.x86.wrgsbase.32(i32 %x)
+  ret void
+}
+declare void @llvm.x86.wrgsbase.32(i32) nounwind readnone
+
+define void @test_x86_wrfsbase_64(i64 %x) {
+  ; CHECK: wrfsbaseq
+  call void @llvm.x86.wrfsbase.64(i64 %x)
+  ret void
+}
+declare void @llvm.x86.wrfsbase.64(i64) nounwind readnone
+
+define void @test_x86_wrgsbase_64(i64 %x) {
+  ; CHECK: wrgsbaseq
+  call void @llvm.x86.wrgsbase.64(i64 %x)
+  ret void
+}
+declare void @llvm.x86.wrgsbase.64(i64) nounwind readnone
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
new file mode 100644
index 0000000..d89e9dc
--- /dev/null
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple x86_64-apple-darwin %s -o - | FileCheck %s
+@_ZTIi = external constant i8*
+
+define i32 @main() uwtable optsize ssp {
+entry:
+  invoke void @_Z1fv() optsize
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  br label %eh.resume
+
+try.cont:
+  ret i32 0
+
+eh.resume:
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z1fv() optsize
+
+declare i32 @__gxx_personality_v0(...)
+
+; CHECK: Leh_func_end0:
+; CHECK: GCC_except_table0
+; CHECK: = Leh_func_end0-
diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll
index 91758ea..5f1f4fd 100644
--- a/test/CodeGen/X86/haddsub.ll
+++ b/test/CodeGen/X86/haddsub.ll
@@ -192,3 +192,94 @@ define <4 x float> @hsubps4(<4 x float> %x) {
   %r = fsub <4 x float> %a, %b
   ret <4 x float> %r
 }
+
+; SSE3: vhaddps1:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps1:
+; AVX: vhaddps
+define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddps2:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps2:
+; AVX: vhaddps
+define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
+  %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddps3:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps3:
+; AVX: vhaddps
+define <8 x float> @vhaddps3(<8 x float> %x) {
+  %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhsubps1:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; SSE3: hsubps
+; AVX: vhsubps1:
+; AVX: vhsubps
+define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = fsub <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhsubps3:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; SSE3: hsubps
+; AVX: vhsubps3:
+; AVX: vhsubps
+define <8 x float> @vhsubps3(<8 x float> %x) {
+  %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = fsub <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddpd1:
+; SSE3-NOT: vhaddpd
+; SSE3: haddpd
+; SSE3: haddpd
+; AVX: vhaddpd1:
+; AVX: vhaddpd
+define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) {
+  %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %r = fadd <4 x double> %a, %b
+  ret <4 x double> %r
+}
+
+; SSE3: vhsubpd1:
+; SSE3-NOT: vhsubpd
+; SSE3: hsubpd
+; SSE3: hsubpd
+; AVX: vhsubpd1:
+; AVX: vhsubpd
+define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) {
+  %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %r = fsub <4 x double> %a, %b
+  ret <4 x double> %r
+}
diff --git a/test/CodeGen/X86/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll
new file mode 100644
index 0000000..4289fa7
--- /dev/null
+++ b/test/CodeGen/X86/hoist-invariant-load.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -stats -O2 |& grep "1 machine-licm"
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__DATA, __objc_imageinfo, regular, no_dead_strip"
+@llvm.used = appending global [3 x i8*] [i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata"
+
+define void @test(i8* %x) uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !invariant.load !0
+  %call = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %x, i8* %0)
+  %inc = add i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+
+!0 = metadata !{}
diff --git a/test/CodeGen/X86/i128-sdiv.ll b/test/CodeGen/X86/i128-sdiv.ll
new file mode 100644
index 0000000..ab5cdda
--- /dev/null
+++ b/test/CodeGen/X86/i128-sdiv.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; Make sure none of these crash, and that the power-of-two transformations
+; trigger correctly.
+
+define i128 @test1(i128 %x) {
+  ; CHECK: test1:
+  ; CHECK-NOT: call
+  %tmp = sdiv i128 %x, 73786976294838206464
+  ret i128 %tmp
+}
+
+define i128 @test2(i128 %x) {
+  ; CHECK: test2:
+  ; CHECK-NOT: call
+  %tmp = sdiv i128 %x, -73786976294838206464
+  ret i128 %tmp
+}
+
+define i128 @test3(i128 %x) {
+  ; CHECK: test3:
+  ; CHECK: call
+  %tmp = sdiv i128 %x, -73786976294838206467
+  ret i128 %tmp
+}
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index c9a1c1c..2249618 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin | FileCheck %s
 
 ; There should be no stack manipulations between the inline asm and ret.
 ; CHECK: test1
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
index 1c8e2f9..fca68ba 100644
--- a/test/CodeGen/X86/inline-asm-q-regs.ll
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -march=x86-64 -mattr=+avx
 ; rdar://7066579
 
 	%0 = type { i64, i64, i64, i64, i64 }		; type %0
@@ -20,3 +20,18 @@ define void @test3(double %tmp) nounwind {
   call void asm sideeffect "$0", "q"(double %tmp) nounwind
   ret void
 }
+
+; rdar://10392864
+define void @test4(i8 signext %val, i8 signext %a, i8 signext %b, i8 signext %c, i8 signext %d) nounwind {
+entry:
+  %0 = tail call { i8, i8, i8, i8, i8 } asm "foo $1, $2, $3, $4, $1\0Axchgb ${0:b}, ${0:h}", "=q,={ax},={bx},={cx},={dx},0,1,2,3,4,~{dirflag},~{fpsr},~{flags}"(i8 %val, i8 %a, i8 %b, i8 %c, i8 %d) nounwind
+  ret void
+}
+
+; rdar://10614894
+define <8 x float> @test5(<8 x float> %a, <8 x float> %b) nounwind {
+entry:
+  %0 = tail call <8 x float> asm "vperm2f128 $3, $2, $1, $0", "=x,x,x,i,~{dirflag},~{fpsr},~{flags}"(<8 x float> %a, <8 x float> %b, i32 16) nounwind
+  ret <8 x float> %0
+}
+
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
index 79b6885..91576fb 100644
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -regalloc=linearscan | grep {movl	%edx, 4(%esp)} | count 2
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -optimize-regalloc -regalloc=basic | FileCheck %s
 ; rdar://6992609
 
+; CHECK: movl [[EDX:%e..]], 4(%esp)
+; CHECK: movl [[EDX]], 4(%esp)
 target triple = "i386-apple-darwin9.0"
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
 
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
deleted file mode 100644
index 8f79fb8..0000000
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ /dev/null
@@ -1,300 +0,0 @@
-; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t
-; RUN: not grep inc %t
-; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 12
-; RUN: not grep addb %t
-; RUN: not grep leaq %t
-; RUN: not grep leal %t
-; RUN: not grep movq %t
-
-; IV users in each of the loops from other loops shouldn't cause LSR
-; to insert new induction variables. Previously it would create a
-; flood of new induction variables.
-; Also, the loop reversal should kick in once.
-;
-; In this example, performing LSR on the entire loop nest,
-; as opposed to only the inner loop can further reduce induction variables,
-; and their related instructions and registers.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @foo(float* %A, i32 %IA, float* %B, i32 %IB, float* nocapture %C, i32 %N) nounwind {
-entry:
-      %0 = xor i32 %IA, 1		; <i32> [#uses=1]
-      %1 = xor i32 %IB, 1		; <i32> [#uses=1]
-      %2 = or i32 %1, %0		; <i32> [#uses=1]
-      %3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
-      br i1 %3, label %bb2, label %bb13
-
-bb:		; preds = %bb3
-      %4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
-      %5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
-      %6 = fmul float %4, %5		; <float> [#uses=1]
-      %7 = fadd float %6, %Sum0.0		; <float> [#uses=1]
-      %indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
-      br label %bb2
-
-bb2:		; preds = %entry, %bb
-      %B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
-      %Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
-      %indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
-      %N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
-      %A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
-      %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
-      %8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
-      br i1 %8, label %bb3, label %bb4
-
-bb3:		; preds = %bb2
-      %9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
-      %10 = and i64 %9, 15		; <i64> [#uses=1]
-      %11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
-      br i1 %11, label %bb4, label %bb
-
-bb4:		; preds = %bb3, %bb2
-      %12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
-      %13 = and i64 %12, 15		; <i64> [#uses=1]
-      %14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
-      %15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
-      br i1 %14, label %bb6.preheader, label %bb10.preheader
-
-bb10.preheader:		; preds = %bb4
-      br i1 %15, label %bb9, label %bb12.loopexit
-
-bb6.preheader:		; preds = %bb4
-      br i1 %15, label %bb5, label %bb8.loopexit
-
-bb5:		; preds = %bb5, %bb6.preheader
-      %indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
-      %vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum1.070 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum2.069 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum3.067 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=1]
-	%indvar145 = trunc i64 %indvar143 to i32		; <i32> [#uses=1]
-	%tmp150 = mul i32 %indvar145, -16		; <i32> [#uses=1]
-	%N_addr.268 = add i32 %tmp150, %N_addr.0		; <i32> [#uses=1]
-	%A_addr.273.rec = shl i64 %indvar143, 4		; <i64> [#uses=5]
-	%B_addr.0.sum180 = add i64 %B_addr.0.rec, %A_addr.273.rec		; <i64> [#uses=2]
-	%B_addr.271 = getelementptr float* %B, i64 %B_addr.0.sum180		; <float*> [#uses=1]
-	%A_addr.273 = getelementptr float* %A, i64 %B_addr.0.sum180		; <float*> [#uses=1]
-	tail call void asm sideeffect ";# foo", "~{dirflag},~{fpsr},~{flags}"() nounwind
-	%16 = bitcast float* %A_addr.273 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%17 = load <4 x float>* %16, align 16		; <<4 x float>> [#uses=1]
-	%18 = bitcast float* %B_addr.271 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%19 = load <4 x float>* %18, align 16		; <<4 x float>> [#uses=1]
-	%20 = fmul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
-	%21 = fadd <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum163 = or i64 %A_addr.273.rec, 4		; <i64> [#uses=1]
-	%A_addr.0.sum175 = add i64 %B_addr.0.rec, %A_addr.273.sum163		; <i64> [#uses=2]
-	%22 = getelementptr float* %A, i64 %A_addr.0.sum175		; <float*> [#uses=1]
-	%23 = bitcast float* %22 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%24 = load <4 x float>* %23, align 16		; <<4 x float>> [#uses=1]
-	%25 = getelementptr float* %B, i64 %A_addr.0.sum175		; <float*> [#uses=1]
-	%26 = bitcast float* %25 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%27 = load <4 x float>* %26, align 16		; <<4 x float>> [#uses=1]
-	%28 = fmul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
-	%29 = fadd <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum161 = or i64 %A_addr.273.rec, 8		; <i64> [#uses=1]
-	%A_addr.0.sum174 = add i64 %B_addr.0.rec, %A_addr.273.sum161		; <i64> [#uses=2]
-	%30 = getelementptr float* %A, i64 %A_addr.0.sum174		; <float*> [#uses=1]
-	%31 = bitcast float* %30 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%32 = load <4 x float>* %31, align 16		; <<4 x float>> [#uses=1]
-	%33 = getelementptr float* %B, i64 %A_addr.0.sum174		; <float*> [#uses=1]
-	%34 = bitcast float* %33 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%35 = load <4 x float>* %34, align 16		; <<4 x float>> [#uses=1]
-	%36 = fmul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
-	%37 = fadd <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum159 = or i64 %A_addr.273.rec, 12		; <i64> [#uses=1]
-	%A_addr.0.sum173 = add i64 %B_addr.0.rec, %A_addr.273.sum159		; <i64> [#uses=2]
-	%38 = getelementptr float* %A, i64 %A_addr.0.sum173		; <float*> [#uses=1]
-	%39 = bitcast float* %38 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%40 = load <4 x float>* %39, align 16		; <<4 x float>> [#uses=1]
-	%41 = getelementptr float* %B, i64 %A_addr.0.sum173		; <float*> [#uses=1]
-	%42 = bitcast float* %41 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%43 = load <4 x float>* %42, align 16		; <<4 x float>> [#uses=1]
-	%44 = fmul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
-	%45 = fadd <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
-	%.rec83 = add i64 %A_addr.273.rec, 16		; <i64> [#uses=1]
-	%A_addr.0.sum172 = add i64 %B_addr.0.rec, %.rec83		; <i64> [#uses=2]
-	%46 = getelementptr float* %A, i64 %A_addr.0.sum172		; <float*> [#uses=1]
-	%47 = getelementptr float* %B, i64 %A_addr.0.sum172		; <float*> [#uses=1]
-	%48 = add i32 %N_addr.268, -16		; <i32> [#uses=2]
-	%49 = icmp sgt i32 %48, 15		; <i1> [#uses=1]
-	%indvar.next144 = add i64 %indvar143, 1		; <i64> [#uses=1]
-	br i1 %49, label %bb5, label %bb8.loopexit
-
-bb7:		; preds = %bb7, %bb8.loopexit
-	%indvar130 = phi i64 [ 0, %bb8.loopexit ], [ %indvar.next131, %bb7 ]		; <i64> [#uses=3]
-	%vSum0.260 = phi <4 x float> [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ]		; <<4 x float>> [#uses=1]
-	%indvar132 = trunc i64 %indvar130 to i32		; <i32> [#uses=1]
-	%tmp133 = mul i32 %indvar132, -4		; <i32> [#uses=1]
-	%N_addr.358 = add i32 %tmp133, %N_addr.2.lcssa		; <i32> [#uses=1]
-	%A_addr.361.rec = shl i64 %indvar130, 2		; <i64> [#uses=3]
-	%B_addr.359 = getelementptr float* %B_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
-	%A_addr.361 = getelementptr float* %A_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
-	%50 = bitcast float* %A_addr.361 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%51 = load <4 x float>* %50, align 16		; <<4 x float>> [#uses=1]
-	%52 = bitcast float* %B_addr.359 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%53 = load <4 x float>* %52, align 16		; <<4 x float>> [#uses=1]
-	%54 = fmul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
-	%55 = fadd <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
-	%.rec85 = add i64 %A_addr.361.rec, 4		; <i64> [#uses=2]
-	%56 = getelementptr float* %A_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
-	%57 = getelementptr float* %B_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
-	%58 = add i32 %N_addr.358, -4		; <i32> [#uses=2]
-	%59 = icmp sgt i32 %58, 3		; <i1> [#uses=1]
-	%indvar.next131 = add i64 %indvar130, 1		; <i64> [#uses=1]
-	br i1 %59, label %bb7, label %bb13
-
-bb8.loopexit:		; preds = %bb5, %bb6.preheader
-	%A_addr.2.lcssa = phi float* [ %A_addr.0, %bb6.preheader ], [ %46, %bb5 ]		; <float*> [#uses=3]
-	%vSum0.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=2]
-	%B_addr.2.lcssa = phi float* [ %B_addr.0, %bb6.preheader ], [ %47, %bb5 ]		; <float*> [#uses=3]
-	%vSum1.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=2]
-	%vSum2.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=2]
-	%N_addr.2.lcssa = phi i32 [ %N_addr.0, %bb6.preheader ], [ %48, %bb5 ]		; <i32> [#uses=3]
-	%vSum3.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=2]
-	%60 = icmp sgt i32 %N_addr.2.lcssa, 3		; <i1> [#uses=1]
-	br i1 %60, label %bb7, label %bb13
-
-bb9:		; preds = %bb9, %bb10.preheader
-	%indvar106 = phi i64 [ 0, %bb10.preheader ], [ %indvar.next107, %bb9 ]		; <i64> [#uses=3]
-	%vSum0.339 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum1.237 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum2.236 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum3.234 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=1]
-	%indvar108 = trunc i64 %indvar106 to i32		; <i32> [#uses=1]
-	%tmp113 = mul i32 %indvar108, -16		; <i32> [#uses=1]
-	%N_addr.435 = add i32 %tmp113, %N_addr.0		; <i32> [#uses=1]
-	%A_addr.440.rec = shl i64 %indvar106, 4		; <i64> [#uses=5]
-	%B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec		; <i64> [#uses=2]
-	%B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum		; <float*> [#uses=1]
-	%A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum		; <float*> [#uses=1]
-	%61 = bitcast float* %B_addr.438 to <4 x float>*		; <i8*> [#uses=1]
-	%62 = load <4 x float>* %61, align 1
-	%B_addr.438.sum169 = or i64 %A_addr.440.rec, 4		; <i64> [#uses=1]
-	%B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169		; <i64> [#uses=2]
-	%63 = getelementptr float* %B, i64 %B_addr.0.sum187		; <float*> [#uses=1]
-	%64 = bitcast float* %63 to <4 x float>*		; <i8*> [#uses=1]
-	%65 = load <4 x float>* %64, align 1
-	%B_addr.438.sum168 = or i64 %A_addr.440.rec, 8		; <i64> [#uses=1]
-	%B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168		; <i64> [#uses=2]
-	%66 = getelementptr float* %B, i64 %B_addr.0.sum186		; <float*> [#uses=1]
-	%67 = bitcast float* %66 to <4 x float>*		; <i8*> [#uses=1]
-	%68 = load <4 x float>* %67, align 1
-	%B_addr.438.sum167 = or i64 %A_addr.440.rec, 12		; <i64> [#uses=1]
-	%B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167		; <i64> [#uses=2]
-	%69 = getelementptr float* %B, i64 %B_addr.0.sum185		; <float*> [#uses=1]
-	%70 = bitcast float* %69 to <4 x float>*		; <i8*> [#uses=1]
-	%71 = load <4 x float>* %70, align 1
-	%72 = bitcast float* %A_addr.440 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%73 = load <4 x float>* %72, align 16		; <<4 x float>> [#uses=1]
-	%74 = fmul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
-	%75 = fadd <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
-	%76 = getelementptr float* %A, i64 %B_addr.0.sum187		; <float*> [#uses=1]
-	%77 = bitcast float* %76 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%78 = load <4 x float>* %77, align 16		; <<4 x float>> [#uses=1]
-	%79 = fmul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
-	%80 = fadd <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
-	%81 = getelementptr float* %A, i64 %B_addr.0.sum186		; <float*> [#uses=1]
-	%82 = bitcast float* %81 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%83 = load <4 x float>* %82, align 16		; <<4 x float>> [#uses=1]
-	%84 = fmul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
-	%85 = fadd <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
-	%86 = getelementptr float* %A, i64 %B_addr.0.sum185		; <float*> [#uses=1]
-	%87 = bitcast float* %86 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%88 = load <4 x float>* %87, align 16		; <<4 x float>> [#uses=1]
-	%89 = fmul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
-	%90 = fadd <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
-	%.rec89 = add i64 %A_addr.440.rec, 16		; <i64> [#uses=1]
-	%A_addr.0.sum170 = add i64 %B_addr.0.rec, %.rec89		; <i64> [#uses=2]
-	%91 = getelementptr float* %A, i64 %A_addr.0.sum170		; <float*> [#uses=1]
-	%92 = getelementptr float* %B, i64 %A_addr.0.sum170		; <float*> [#uses=1]
-	%93 = add i32 %N_addr.435, -16		; <i32> [#uses=2]
-	%94 = icmp sgt i32 %93, 15		; <i1> [#uses=1]
-	%indvar.next107 = add i64 %indvar106, 1		; <i64> [#uses=1]
-	br i1 %94, label %bb9, label %bb12.loopexit
-
-bb11:		; preds = %bb11, %bb12.loopexit
-	%indvar = phi i64 [ 0, %bb12.loopexit ], [ %indvar.next, %bb11 ]		; <i64> [#uses=3]
-	%vSum0.428 = phi <4 x float> [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
-	%indvar96 = trunc i64 %indvar to i32		; <i32> [#uses=1]
-	%tmp = mul i32 %indvar96, -4		; <i32> [#uses=1]
-	%N_addr.526 = add i32 %tmp, %N_addr.4.lcssa		; <i32> [#uses=1]
-	%A_addr.529.rec = shl i64 %indvar, 2		; <i64> [#uses=3]
-	%B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
-	%A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
-	%95 = bitcast float* %B_addr.527 to <4 x float>*		; <i8*> [#uses=1]
-	%96 = load <4 x float>* %95, align 1
-	%97 = bitcast float* %A_addr.529 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%98 = load <4 x float>* %97, align 16		; <<4 x float>> [#uses=1]
-	%99 = fmul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
-	%100 = fadd <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
-	%.rec91 = add i64 %A_addr.529.rec, 4		; <i64> [#uses=2]
-	%101 = getelementptr float* %A_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
-	%102 = getelementptr float* %B_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
-	%103 = add i32 %N_addr.526, -4		; <i32> [#uses=2]
-	%104 = icmp sgt i32 %103, 3		; <i1> [#uses=1]
-	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
-	br i1 %104, label %bb11, label %bb13
-
-bb12.loopexit:		; preds = %bb9, %bb10.preheader
-	%A_addr.4.lcssa = phi float* [ %A_addr.0, %bb10.preheader ], [ %91, %bb9 ]		; <float*> [#uses=3]
-	%vSum0.3.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=2]
-	%B_addr.4.lcssa = phi float* [ %B_addr.0, %bb10.preheader ], [ %92, %bb9 ]		; <float*> [#uses=3]
-	%vSum1.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=2]
-	%vSum2.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=2]
-	%N_addr.4.lcssa = phi i32 [ %N_addr.0, %bb10.preheader ], [ %93, %bb9 ]		; <i32> [#uses=3]
-	%vSum3.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=2]
-	%105 = icmp sgt i32 %N_addr.4.lcssa, 3		; <i1> [#uses=1]
-	br i1 %105, label %bb11, label %bb13
-
-bb13:		; preds = %bb12.loopexit, %bb11, %bb8.loopexit, %bb7, %entry
-	%Sum0.1 = phi float [ 0.000000e+00, %entry ], [ %Sum0.0, %bb7 ], [ %Sum0.0, %bb8.loopexit ], [ %Sum0.0, %bb11 ], [ %Sum0.0, %bb12.loopexit ]		; <float> [#uses=1]
-	%vSum3.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum3.0.lcssa, %bb7 ], [ %vSum3.0.lcssa, %bb8.loopexit ], [ %vSum3.2.lcssa, %bb11 ], [ %vSum3.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%N_addr.1 = phi i32 [ %N, %entry ], [ %N_addr.2.lcssa, %bb8.loopexit ], [ %58, %bb7 ], [ %N_addr.4.lcssa, %bb12.loopexit ], [ %103, %bb11 ]		; <i32> [#uses=2]
-	%vSum2.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum2.0.lcssa, %bb7 ], [ %vSum2.0.lcssa, %bb8.loopexit ], [ %vSum2.2.lcssa, %bb11 ], [ %vSum2.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%vSum1.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum1.0.lcssa, %bb7 ], [ %vSum1.0.lcssa, %bb8.loopexit ], [ %vSum1.2.lcssa, %bb11 ], [ %vSum1.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%B_addr.1 = phi float* [ %B, %entry ], [ %B_addr.2.lcssa, %bb8.loopexit ], [ %57, %bb7 ], [ %B_addr.4.lcssa, %bb12.loopexit ], [ %102, %bb11 ]		; <float*> [#uses=1]
-	%vSum0.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ], [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
-	%A_addr.1 = phi float* [ %A, %entry ], [ %A_addr.2.lcssa, %bb8.loopexit ], [ %56, %bb7 ], [ %A_addr.4.lcssa, %bb12.loopexit ], [ %101, %bb11 ]		; <float*> [#uses=1]
-	%106 = fadd <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
-	%107 = fadd <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
-	%108 = fadd <4 x float> %106, %107		; <<4 x float>> [#uses=4]
-	%tmp23 = extractelement <4 x float> %108, i32 0		; <float> [#uses=1]
-	%tmp21 = extractelement <4 x float> %108, i32 1		; <float> [#uses=1]
-	%109 = fadd float %tmp23, %tmp21		; <float> [#uses=1]
-	%tmp19 = extractelement <4 x float> %108, i32 2		; <float> [#uses=1]
-	%tmp17 = extractelement <4 x float> %108, i32 3		; <float> [#uses=1]
-	%110 = fadd float %tmp19, %tmp17		; <float> [#uses=1]
-	%111 = fadd float %109, %110		; <float> [#uses=1]
-	%Sum0.254 = fadd float %111, %Sum0.1		; <float> [#uses=2]
-	%112 = icmp sgt i32 %N_addr.1, 0		; <i1> [#uses=1]
-	br i1 %112, label %bb.nph56, label %bb16
-
-bb.nph56:		; preds = %bb13
-	%tmp. = zext i32 %N_addr.1 to i64		; <i64> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb14, %bb.nph56
-	%indvar117 = phi i64 [ 0, %bb.nph56 ], [ %indvar.next118, %bb14 ]		; <i64> [#uses=3]
-	%Sum0.255 = phi float [ %Sum0.254, %bb.nph56 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
-	%tmp.122 = sext i32 %IB to i64		; <i64> [#uses=1]
-	%B_addr.652.rec = mul i64 %indvar117, %tmp.122		; <i64> [#uses=1]
-	%tmp.124 = sext i32 %IA to i64		; <i64> [#uses=1]
-	%A_addr.653.rec = mul i64 %indvar117, %tmp.124		; <i64> [#uses=1]
-	%B_addr.652 = getelementptr float* %B_addr.1, i64 %B_addr.652.rec		; <float*> [#uses=1]
-	%A_addr.653 = getelementptr float* %A_addr.1, i64 %A_addr.653.rec		; <float*> [#uses=1]
-	%113 = load float* %A_addr.653, align 4		; <float> [#uses=1]
-	%114 = load float* %B_addr.652, align 4		; <float> [#uses=1]
-	%115 = fmul float %113, %114		; <float> [#uses=1]
-	%Sum0.2 = fadd float %115, %Sum0.255		; <float> [#uses=2]
-	%indvar.next118 = add i64 %indvar117, 1		; <i64> [#uses=2]
-	%exitcond = icmp eq i64 %indvar.next118, %tmp.		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb16, label %bb14
-
-bb16:		; preds = %bb14, %bb13
-	%Sum0.2.lcssa = phi float [ %Sum0.254, %bb13 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
-	store float %Sum0.2.lcssa, float* %C, align 4
-	ret void
-}
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index 5e8e162..dbd133c 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 | grep jns
+; RUN: llc < %s -march=x86 -mcpu=pentiumpro | FileCheck %s
 
 define i32 @f(i32 %X) {
 entry:
+; CHECK: f:
+; CHECK: jns
 	%tmp1 = add i32 %X, 1		; <i32> [#uses=1]
 	%tmp = icmp slt i32 %tmp1, 0		; <i1> [#uses=1]
 	br i1 %tmp, label %cond_true, label %cond_next
@@ -18,3 +20,15 @@ cond_next:		; preds = %cond_true, %entry
 declare i32 @bar(...)
 
 declare i32 @baz(...)
+
+; rdar://10633221
+define i32 @g(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: g:
+; CHECK-NOT: test
+; CHECK: cmovs
+  %sub = sub nsw i32 %a, %b
+  %cmp = icmp sgt i32 %sub, 0
+  %cond = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %cond
+}
diff --git a/test/CodeGen/X86/legalize-libcalls.ll b/test/CodeGen/X86/legalize-libcalls.ll
new file mode 100644
index 0000000..879dc98
--- /dev/null
+++ b/test/CodeGen/X86/legalize-libcalls.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=x86 < %s
+; RUN: llc -march=x86-64 < %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+define float @MakeSphere(float %theta.079) nounwind {
+entry:
+  %add36 = fadd float %theta.079, undef
+  %call = call float @cosf(float %theta.079) nounwind readnone
+  %call45 = call float @sinf(float %theta.079) nounwind readnone
+  %call37 = call float @sinf(float %add36) nounwind readnone
+  store float %call, float* undef, align 8
+  store float %call37, float* undef, align 8
+  store float %call45, float* undef, align 8
+  ret float %add36
+}
+
+define hidden fastcc void @unroll_loop(i64 %storemerge32129) nounwind {
+entry:
+  call fastcc void @copy_rtx() nounwind
+  call fastcc void @copy_rtx() nounwind
+  %tmp225 = alloca i8, i64 %storemerge32129, align 8 ; [#uses=0 type=i8*]
+  %cmp651201 = icmp slt i64 %storemerge32129, 0   ; [#uses=1 type=i1]
+  br i1 %cmp651201, label %for.body653.lr.ph, label %if.end638.for.end659_crit_edge
+
+for.body653.lr.ph:                                ; preds = %entry
+  unreachable
+
+if.end638.for.end659_crit_edge:                   ; preds = %entry
+  unreachable
+}
+
+declare float @cosf(float) nounwind readnone
+declare float @sinf(float) nounwind readnone
+declare hidden fastcc void @copy_rtx() nounwind
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
new file mode 100644
index 0000000..c9f2fc2
--- /dev/null
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mcpu=generic -march=x86 < %s | FileCheck %s
+
+define i64 @test1(i32 %xx, i32 %test) nounwind {
+  %conv = zext i32 %xx to i64
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shl = shl i64 %conv, %sh_prom
+  ret i64 %shl
+; CHECK: test1:
+; CHECK: shll	%cl, %eax
+; CHECK: shrl	%edx
+; CHECK: xorb	$31
+; CHECK: shrl	%cl, %edx
+}
+
+define i64 @test2(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shl = shl i64 %xx, %sh_prom
+  ret i64 %shl
+; CHECK: test2:
+; CHECK: shll	%cl, %esi
+; CHECK: shrl	%edx
+; CHECK: xorb	$31
+; CHECK: shrl	%cl, %edx
+; CHECK: orl	%esi, %edx
+; CHECK: shll	%cl, %eax
+}
+
+define i64 @test3(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shr = lshr i64 %xx, %sh_prom
+  ret i64 %shr
+; CHECK: test3:
+; CHECK: shrl	%cl, %esi
+; CHECK: leal	(%edx,%edx), %eax
+; CHECK: xorb	$31, %cl
+; CHECK: shll	%cl, %eax
+; CHECK: orl	%esi, %eax
+; CHECK: shrl	%cl, %edx
+}
+
+define i64 @test4(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shr = ashr i64 %xx, %sh_prom
+  ret i64 %shr
+; CHECK: test4:
+; CHECK: shrl	%cl, %esi
+; CHECK: leal	(%edx,%edx), %eax
+; CHECK: xorb	$31, %cl
+; CHECK: shll	%cl, %eax
+; CHECK: orl	%esi, %eax
+; CHECK: sarl	%cl, %edx
+}
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
new file mode 100644
index 0000000..a8ad0f1
--- /dev/null
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/X86/log2_not_readnone.ll b/test/CodeGen/X86/log2_not_readnone.ll
new file mode 100644
index 0000000..5620835
--- /dev/null
+++ b/test/CodeGen/X86/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=i386-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: calll log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: calll exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
deleted file mode 100644
index d6c265f..0000000
--- a/test/CodeGen/X86/loop-strength-reduce3.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=x86 -enable-lsr-nested | grep cmp | grep 240
-; RUN: llc < %s -march=x86 -enable-lsr-nested | grep inc | count 1
-
-define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind {
-entry:
-	%tmp2955 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
-	br i1 %tmp2955, label %bb26.outer.us, label %bb40.split
-
-bb26.outer.us:		; preds = %bb26.bb32_crit_edge.us, %entry
-	%i.044.0.ph.us = phi i32 [ 0, %entry ], [ %indvar.next57, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=2]
-	%k.1.ph.us = phi i32 [ 0, %entry ], [ %k.0.us, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=1]
-	%tmp3.us = mul i32 %i.044.0.ph.us, 6		; <i32> [#uses=1]
-	br label %bb1.us
-
-bb1.us:		; preds = %bb1.us, %bb26.outer.us
-	%j.053.us = phi i32 [ 0, %bb26.outer.us ], [ %tmp25.us, %bb1.us ]		; <i32> [#uses=2]
-	%k.154.us = phi i32 [ %k.1.ph.us, %bb26.outer.us ], [ %k.0.us, %bb1.us ]		; <i32> [#uses=1]
-	%tmp5.us = add i32 %tmp3.us, %j.053.us		; <i32> [#uses=1]
-	%tmp7.us = shl i32 %D, %tmp5.us		; <i32> [#uses=2]
-	%tmp9.us = icmp eq i32 %tmp7.us, %B		; <i1> [#uses=1]
-	%tmp910.us = zext i1 %tmp9.us to i32		; <i32> [#uses=1]
-	%tmp12.us = and i32 %tmp7.us, %A		; <i32> [#uses=1]
-	%tmp19.us = and i32 %tmp12.us, %tmp910.us		; <i32> [#uses=1]
-	%k.0.us = add i32 %tmp19.us, %k.154.us		; <i32> [#uses=3]
-	%tmp25.us = add i32 %j.053.us, 1		; <i32> [#uses=2]
-	%tmp29.us = icmp slt i32 %tmp25.us, %C		; <i1> [#uses=1]
-	br i1 %tmp29.us, label %bb1.us, label %bb26.bb32_crit_edge.us
-
-bb26.bb32_crit_edge.us:		; preds = %bb1.us
-	%indvar.next57 = add i32 %i.044.0.ph.us, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next57, 40		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb40.split, label %bb26.outer.us
-
-bb40.split:		; preds = %bb26.bb32_crit_edge.us, %entry
-	%k.1.lcssa.lcssa.us-lcssa = phi i32 [ %k.0.us, %bb26.bb32_crit_edge.us ], [ 0, %entry ]		; <i32> [#uses=1]
-	ret i32 %k.1.lcssa.lcssa.us-lcssa
-}
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
index b07eeb6..d50a668 100644
--- a/test/CodeGen/X86/loop-strength-reduce5.ll
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -11,9 +11,9 @@ entry:
 bb:		; preds = %bb, %entry
 	%i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
 	%tmp1 = trunc i32 %i.014.0 to i16		; <i16> [#uses=2]
-	volatile store i16 %tmp1, i16* @X, align 2
+	store volatile i16 %tmp1, i16* @X, align 2
 	%tmp34 = shl i16 %tmp1, 2		; <i16> [#uses=1]
-	volatile store i16 %tmp34, i16* @Y, align 2
+	store volatile i16 %tmp34, i16* @Y, align 2
 	%indvar.next = add i32 %i.014.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
 	br i1 %exitcond, label %return, label %bb
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index 938023f..ebda9f2 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,6 +1,8 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
 
+; CHECK: t:
 ; CHECK: decq
+; CHECK-NEXT: movl (
 ; CHECK-NEXT: jne
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
@@ -135,3 +137,44 @@ bb2:		; preds = %bb
 	store i8 %92, i8* %93, align 1
 	ret void
 }
+
+; Check that DAGCombiner doesn't mess up the IV update when the exiting value
+; is equal to the stride.
+; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0).
+
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: incl [[IV:%e..]]
+; CHECK: cmpl $1, [[IV]]
+; CHECK: jne
+; CHECK: ret
+
+define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp eq i32 %i, 1
+  br i1 %cmp4, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = sext i32 %i to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
+  %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %cmp1 = icmp ugt i32 %1, %b.05
+  %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
+  %2 = trunc i64 %indvars.iv to i32
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %2, i32 %bi.06
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
+
diff --git a/test/CodeGen/X86/lsr-nonaffine.ll b/test/CodeGen/X86/lsr-nonaffine.ll
index d0d2bbd..d825b5a 100644
--- a/test/CodeGen/X86/lsr-nonaffine.ll
+++ b/test/CodeGen/X86/lsr-nonaffine.ll
@@ -19,7 +19,7 @@ entry:
 
 loop:
   %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
-  volatile store i64 %i, i64* %p
+  store volatile i64 %i, i64* %p
   %i.next = add i64 %i, %s
   %c = icmp slt i64 %i.next, %n
   br i1 %c, label %loop, label %exit
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 527a5a6..1311a73 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
 target datalayout = "e-p:64:64:64"
 target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll
index 1f3b59a..b85ddeb 100644
--- a/test/CodeGen/X86/lsr-sort.ll
+++ b/test/CodeGen/X86/lsr-sort.ll
@@ -12,7 +12,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
 	%1 = trunc i32 %i.03 to i16		; <i16> [#uses=1]
-	volatile store i16 %1, i16* @X, align 2
+	store volatile i16 %1, i16* @X, align 2
 	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
 	br i1 %exitcond, label %return, label %bb
diff --git a/test/CodeGen/X86/lzcnt.ll b/test/CodeGen/X86/lzcnt.ll
index e5a55ab..2faa24a 100644
--- a/test/CodeGen/X86/lzcnt.ll
+++ b/test/CodeGen/X86/lzcnt.ll
@@ -1,38 +1,62 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+lzcnt | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
-	ret i32 %tmp
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i8 @t1(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 false )
+	ret i8 %tmp
 ; CHECK: t1:
 ; CHECK: lzcntl
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
-
 define i16 @t2(i16 %x) nounwind  {
-	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x )
+	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 false )
 	ret i16 %tmp
 ; CHECK: t2:
 ; CHECK: lzcntw
 }
 
-declare i16 @llvm.ctlz.i16(i16) nounwind readnone
+define i32 @t3(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 false )
+	ret i32 %tmp
+; CHECK: t3:
+; CHECK: lzcntl
+}
 
-define i64 @t3(i64 %x) nounwind  {
-	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x )
+define i64 @t4(i64 %x) nounwind  {
+	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 false )
 	ret i64 %tmp
-; CHECK: t3:
+; CHECK: t4:
 ; CHECK: lzcntq
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
-
-define i8 @t4(i8 %x) nounwind  {
-	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x )
+define i8 @t5(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 true )
 	ret i8 %tmp
-; CHECK: t4:
+; CHECK: t5:
+; CHECK: lzcntl
+}
+
+define i16 @t6(i16 %x) nounwind  {
+	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 true )
+	ret i16 %tmp
+; CHECK: t6:
 ; CHECK: lzcntw
 }
 
-declare i8 @llvm.ctlz.i8(i8) nounwind readnone
+define i32 @t7(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+	ret i32 %tmp
+; CHECK: t7:
+; CHECK: lzcntl
+}
 
+define i64 @t8(i64 %x) nounwind  {
+	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 true )
+	ret i64 %tmp
+; CHECK: t8:
+; CHECK: lzcntq
+}
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
new file mode 100644
index 0000000..54fa01c
--- /dev/null
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s | FileCheck %s
+
+; After tail duplication, two copies in an early exit BB can be cancelled out.
+; rdar://10640363
+define i32 @t1(i32 %a, i32 %b) nounwind  {
+entry:
+; CHECK: t1:
+; CHECK: jne
+  %cmp1 = icmp eq i32 %b, 0
+  br i1 %cmp1, label %while.end, label %while.body
+
+; CHECK: BB
+; CHECK-NOT: mov
+; CHECK: ret
+
+while.body:                                       ; preds = %entry, %while.body
+  %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
+  %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
+  %rem = srem i32 %a.addr.03, %b.addr.02
+  %cmp = icmp eq i32 %rem, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
+  ret i32 %a.addr.0.lcssa
+}
+
+; Two movdqa (from phi-elimination) in the entry BB cancels out.
+; rdar://10428165
+define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK-NOT: movdqa
+  %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+  ret <8 x i16> %tmp8
+}
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index d819fc8..a757cde 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx < %s | FileCheck %s
 ; rdar://7610418
 
 %ptr = type { i8* }
@@ -77,3 +77,25 @@ bb.nph743.us:                                     ; preds = %for.body53.us, %if.
 sw.bb307:                                         ; preds = %sw.bb, %entry
   ret void
 }
+
+; CSE physical register defining instruction across MBB boundary.
+; rdar://10660865
+define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: cross_mbb_phys_cse:
+; CHECK: cmpl
+; CHECK: ja
+  %cmp = icmp ugt i32 %a, %b
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+; CHECK-NOT: cmpl
+; CHECK: sbbl
+  %cmp1 = icmp ult i32 %a, %b
+  %. = sext i1 %cmp1 to i32
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 0b4d73a..a7b036e 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movz %t
 ; RUN: not grep sar %t
 ; RUN: not grep shl %t
-; RUN: grep add %t | count 2
+; RUN: grep add %t | count 1
 ; RUN: grep inc %t | count 4
 ; RUN: grep dec %t | count 2
-; RUN: grep lea %t | count 2
+; RUN: grep lea %t | count 3
 
 ; Optimize away zext-inreg and sext-inreg on the loop induction
 ; variable using trip-count information.
diff --git a/test/CodeGen/X86/mcinst-avx-lowering.ll b/test/CodeGen/X86/mcinst-avx-lowering.ll
new file mode 100644
index 0000000..41f96e8
--- /dev/null
+++ b/test/CodeGen/X86/mcinst-avx-lowering.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10 -mattr=avx -show-mc-encoding < %s | FileCheck %s
+
+define i64 @t1(double %d_ivar) nounwind uwtable ssp {
+entry:
+; CHECK: t1
+  %0 = bitcast double %d_ivar to i64
+; CHECK: vmovd
+; CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
+  ret i64 %0
+}
+
+define double @t2(i64 %d_ivar) nounwind uwtable ssp {
+entry:
+; CHECK: t2
+  %0 = bitcast i64 %d_ivar to double
+; CHECK: vmovd
+; CHECK: encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
+  ret double %0
+}
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index f43b0bf..86c6862 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -79,3 +79,16 @@ entry:
 ; LINUX movq
 }
 
+
+@.str = private unnamed_addr constant [30 x i8] c"\00aaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
+
+define void @test5(i8* nocapture %C) nounwind uwtable ssp {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
+  ret void
+
+; DARWIN: movabsq	$7016996765293437281
+; DARWIN: movabsq	$7016996765293437184
+}
+
+
diff --git a/test/CodeGen/X86/misched-new.ll b/test/CodeGen/X86/misched-new.ll
new file mode 100644
index 0000000..8f2f6f7
--- /dev/null
+++ b/test/CodeGen/X86/misched-new.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=x86-64 -mcpu=core2 -enable-misched -misched=shuffle -misched-bottomup < %s
+; REQUIRES: asserts
+;
+; Interesting MachineScheduler cases.
+;
+; FIXME: There should be an assert in the coalescer that we're not rematting
+; "not-quite-dead" copies, but that breaks a lot of tests <rdar://problem/11148682>.
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; From oggenc.
+; After coalescing, we have a dead superreg (RAX) definition.
+;
+; CHECK: xorl %esi, %esi
+; CHECK: movl $32, %ecx
+; CHECK: rep;movsl
+define fastcc void @_preextrapolate_helper() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %for.cond.preheader, label %if.end
+
+for.cond.preheader:                               ; preds = %entry
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* null, i64 128, i32 4, i1 false) nounwind
+  unreachable
+
+if.end:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
index 3ac0e4e..8b7200d 100644
--- a/test/CodeGen/X86/mmx-builtins.ll
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
 
 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
 
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index 6062b50..d9c7c67 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
+; RUN: llc < %s  -mtriple=x86_64-linux -mcpu=corei7 | grep pinsr
 ; PR2562
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
deleted file mode 100644
index a7ce7d9..0000000
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
-
-	%struct.vS1024 = type { [8 x <4 x i32>] }
-	%struct.vS512 = type { [4 x <4 x i32>] }
-
-declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
-
-define void @t() nounwind {
-entry:
-	br label %bb554
-
-bb554:		; preds = %bb554, %entry
-	%sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ]		; <<1 x i64>> [#uses=1]
-	%0 = load x86_mmx* null, align 8		; <<1 x i64>> [#uses=2]
-	%1 = bitcast x86_mmx %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
-	%tmp555 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
-	%2 = bitcast <2 x i32> %tmp555 to x86_mmx		; <<1 x i64>> [#uses=1]
-	%3 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
-        store <1 x i64> %sum.0.reg2mem.0, <1 x i64>* null
-        %tmp3 = bitcast x86_mmx %2 to <1 x i64>
-	%tmp558 = add <1 x i64> %sum.0.reg2mem.0, %tmp3		; <<1 x i64>> [#uses=1]
-        %tmp5 = bitcast <1 x i64> %tmp558 to x86_mmx
-	%4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %tmp5, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
-        %tmp6 = bitcast x86_mmx %4 to <1 x i64>
-        %tmp7 = bitcast x86_mmx %3 to <1 x i64>
-	%tmp562 = add <1 x i64> %tmp6, %tmp7		; <<1 x i64>> [#uses=1]
-	br label %bb554
-}
diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
deleted file mode 100644
index 191e261..0000000
--- a/test/CodeGen/X86/mmx-vzmovl.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep movq | count 2
-; There are no MMX operations here; this is promoted to XMM.
-
-define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
-entry:
-	%0 = load <1 x i64>* %a, align 8		; <<1 x i64>> [#uses=1]
-	%1 = bitcast <1 x i64> %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
-	%2 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
-	%3 = bitcast <2 x i32> %2 to <1 x i64>		; <<1 x i64>> [#uses=1]
-	store <1 x i64> %3, <1 x i64>* %b, align 8
-	br label %bb2
-
-bb2:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/X86/movmsk.ll b/test/CodeGen/X86/movmsk.ll
index 2368548..928ad03 100644
--- a/test/CodeGen/X86/movmsk.ll
+++ b/test/CodeGen/X86/movmsk.ll
@@ -78,6 +78,22 @@ entry:
   ret i32 %shr.i
 }
 
+; PR11570
+define void @float_call_signbit(double %n) {
+entry:
+; FIXME: This should also use movmskps; we don't form the FGETSIGN node
+; in this case, though.
+; CHECK: float_call_signbit:
+; CHECK: movd %xmm0, %rdi
+; FIXME
+  %t0 = bitcast double %n to i64
+  %tobool.i.i.i.i = icmp slt i64 %t0, 0
+  tail call void @float_call_signbit_callee(i1 zeroext %tobool.i.i.i.i)
+  ret void
+}
+declare void @float_call_signbit_callee(i1 zeroext)
+
+
 ; rdar://10247336
 ; movmskp{s|d} only set low 4/2 bits, high bits are known zero
 
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 51a0611..4f7e28a 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,6 +1,10 @@
 ; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
 ; rdar://7236213
 
+; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
+; The code isn't any worse though.
+; XFAIL: *
+
 ; CodeGen shouldn't require any lea instructions inside the marked loop.
 ; It should properly set up post-increment uses and do coalescing for
 ; the induction variables.
diff --git a/test/CodeGen/X86/nancvt.ll b/test/CodeGen/X86/nancvt.ll
index 82b7331..8036710 100644
--- a/test/CodeGen/X86/nancvt.ll
+++ b/test/CodeGen/X86/nancvt.ll
@@ -52,8 +52,8 @@ bb:		; preds = %bb23
 	%tmp17 = ashr i64 %tmp16, %.cast		; <i64> [#uses=1]
 	%tmp1718 = trunc i64 %tmp17 to i32		; <i32> [#uses=1]
 	%tmp19 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp1718, i32* @var
-	volatile store i32 %tmp13, i32* @var
+	store volatile i32 %tmp1718, i32* @var
+	store volatile i32 %tmp13, i32* @var
 	%tmp21 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp22 = add i32 %tmp21, 1		; <i32> [#uses=1]
 	store i32 %tmp22, i32* %i, align 4
@@ -86,7 +86,7 @@ bb28:		; preds = %bb46
 	%tmp3940 = bitcast float* %tmp39 to i32*		; <i32*> [#uses=1]
 	%tmp41 = load i32* %tmp3940, align 4		; <i32> [#uses=1]
 	%tmp42 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp41, i32* @var
+	store volatile i32 %tmp41, i32* @var
 	%tmp44 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp45 = add i32 %tmp44, 1		; <i32> [#uses=1]
 	store i32 %tmp45, i32* %i, align 4
@@ -127,8 +127,8 @@ bb52:		; preds = %bb78
 	%tmp72 = ashr i64 %tmp70, %.cast71		; <i64> [#uses=1]
 	%tmp7273 = trunc i64 %tmp72 to i32		; <i32> [#uses=1]
 	%tmp74 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp7273, i32* @var
-	volatile store i32 %tmp66, i32* @var
+	store volatile i32 %tmp7273, i32* @var
+	store volatile i32 %tmp66, i32* @var
 	%tmp76 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp77 = add i32 %tmp76, 1		; <i32> [#uses=1]
 	store i32 %tmp77, i32* %i, align 4
@@ -161,7 +161,7 @@ bb84:		; preds = %bb101
 	%tmp9495 = bitcast float* %tmp94 to i32*		; <i32*> [#uses=1]
 	%tmp96 = load i32* %tmp9495, align 4		; <i32> [#uses=1]
 	%tmp97 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp96, i32* @var
+	store volatile i32 %tmp96, i32* @var
 	%tmp99 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp100 = add i32 %tmp99, 1		; <i32> [#uses=1]
 	store i32 %tmp100, i32* %i, align 4
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
index ef27cbc..7822453 100644
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -67,7 +67,7 @@ declare void @exit(i32) noreturn
 ; DAG Combiner can't fold this into a load of the 1'th byte.
 ; PR8757
 define i32 @test3(i32 *%P) nounwind ssp {
-  volatile store i32 128, i32* %P
+  store volatile i32 128, i32* %P
   %tmp4.pre = load i32* %P
   %phitmp = trunc i32 %tmp4.pre to i16
   %phitmp13 = shl i16 %phitmp, 8
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index c3f412e..92850f2 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -486,10 +486,6 @@ declare void @_ZN7CDSListIP9HingeNodeEC1Eii(%"struct.CDSList<HingeNode*>"*, i32,
 
 declare i8* @_Znwm(i32)
 
-declare i8* @llvm.eh.exception() nounwind
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
 
 declare void @_ZdlPv(i8*) nounwind
diff --git a/test/CodeGen/X86/no-cfi.ll b/test/CodeGen/X86/no-cfi.ll
index f9985d4..5bb9bb2 100644
--- a/test/CodeGen/X86/no-cfi.ll
+++ b/test/CodeGen/X86/no-cfi.ll
@@ -24,15 +24,11 @@ invoke.cont:
   ret void
 
 lpad:
-  %exn = call i8* @llvm.eh.exception() nounwind
-  %eh.selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null) nounwind
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
   ret void
 }
 
 declare i32 @foo()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
index 1d09535..ae04435 100644
--- a/test/CodeGen/X86/nontemporal.ll
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -3,13 +3,16 @@
 define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
 ; CHECK: movntps
   %cast = bitcast i8* %B to <4 x float>*
-  store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+  %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0
 ; CHECK: movntdq
   %cast1 = bitcast i8* %B to <2 x i64>*
-  store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+  %E2 = add <2 x i64> %E, <i64 1, i64 2>
+  store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0
 ; CHECK: movntpd
   %cast2 = bitcast i8* %B to <2 x double>*
-  store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+  %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
+  store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
 ; CHECK: movnti
   %cast3 = bitcast i8* %B to i32*
   store i32 %D, i32* %cast3, align 16, !nontemporal !0
diff --git a/test/CodeGen/X86/null-streamer.ll b/test/CodeGen/X86/null-streamer.ll
new file mode 100644
index 0000000..7c0e82f
--- /dev/null
+++ b/test/CodeGen/X86/null-streamer.ll
@@ -0,0 +1,11 @@
+; Check the MCNullStreamer operates correctly, at least on a minimal test case.
+;
+; RUN: llc -filetype=null -o %t -march=x86 %s
+
+define void @f0()  {
+  ret void
+}
+
+define void @f1() {
+  ret void
+}
diff --git a/test/CodeGen/X86/objc-gc-module-flags.ll b/test/CodeGen/X86/objc-gc-module-flags.ll
new file mode 100644
index 0000000..8cb2c03
--- /dev/null
+++ b/test/CodeGen/X86/objc-gc-module-flags.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; CHECK:        .section  __DATA,__objc_imageinfo,regular,no_dead_strip
+; CHECK-NEXT: L_OBJC_IMAGE_INFO:
+; CHECK-NEXT:   .long  0
+; CHECK-NEXT:   .long  2
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 1, metadata !"Objective-C Garbage Collection", i32 2}
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
index 0493edc..8f1eabd 100644
--- a/test/CodeGen/X86/object-size.ll
+++ b/test/CodeGen/X86/object-size.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=linearscan < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64
 
 ; ModuleID = 'ts.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -12,8 +12,8 @@ entry:
   %tmp = load i8** @p                             ; <i8*> [#uses=1]
   %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i1 0) ; <i64> [#uses=1]
   %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
-; X64: movabsq $-1, %rax
-; X64: cmpq    $-1, %rax
+; X64: movabsq $-1, [[RAX:%r..]]
+; X64: cmpq    $-1, [[RAX]]
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
diff --git a/test/CodeGen/X86/odr_comdat.ll b/test/CodeGen/X86/odr_comdat.ll
new file mode 100644
index 0000000..547334c
--- /dev/null
+++ b/test/CodeGen/X86/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=X86LINUX
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; X86LINUX:   .section        .bss._ZN1CIiE1iE,"aGw",@nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; X86LINUX:   .section        .data._ZN1CIiE1jE,"aGw",@progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index e42aa9d..d092916 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
 
 ; LSR's OptimizeMax should eliminate the select (max).
 
diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll
index c1fc041..d185af1 100644
--- a/test/CodeGen/X86/overlap-shift.ll
+++ b/test/CodeGen/X86/overlap-shift.ll
@@ -13,7 +13,7 @@
 
 define i32 @test1(i32 %X) {
         %Z = shl i32 %X, 2              ; <i32> [#uses=1]
-        volatile store i32 %Z, i32* @G
+        store volatile i32 %Z, i32* @G
         ret i32 %X
 }
 
diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
index 528c4bc..a379980 100644
--- a/test/CodeGen/X86/peep-test-3.ll
+++ b/test/CodeGen/X86/peep-test-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -post-RA-scheduler=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -post-RA-scheduler=false | FileCheck %s
 ; rdar://7226797
 
 ; LLVM should omit the testl and use the flags result from the orl.
diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
index 5e18044..d48a331 100644
--- a/test/CodeGen/X86/peep-vector-extract-insert.ll
+++ b/test/CodeGen/X86/peep-vector-extract-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
+; RUN: llc < %s -march=x86-64 | grep {xorps	%xmm0, %xmm0} | count 2
 
 define float @foo(<4 x float> %a) {
   %b = insertelement <4 x float> %a, float 0.0, i32 3
diff --git a/test/CodeGen/X86/personality_size.ll b/test/CodeGen/X86/personality_size.ll
new file mode 100644
index 0000000..30a5d39
--- /dev/null
+++ b/test/CodeGen/X86/personality_size.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -relocation-model=pic -disable-cfi -mtriple=x86_64-pc-solaris2.11 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -relocation-model=pic -disable-cfi -mtriple=i386-pc-solaris2.11 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+  invoke void @_Z1gv()
+          to label %return unwind label %unwind
+
+unwind:                                           ; preds = %entry
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  ret void
+
+return:                                           ; preds = %eh_then, %entry
+  ret void
+}
+
+declare void @_Z1gv()
+
+declare i32 @__gxx_personality_v0(...)
+
+; X64:      .size	DW.ref.__gxx_personality_v0, 8
+; X64:      .quad	__gxx_personality_v0
+
+; X32:      .size	DW.ref.__gxx_personality_v0, 4
+; X32:      .long	__gxx_personality_v0
+
diff --git a/test/CodeGen/X86/phaddsub.ll b/test/CodeGen/X86/phaddsub.ll
new file mode 100644
index 0000000..62d85f7
--- /dev/null
+++ b/test/CodeGen/X86/phaddsub.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s -march=x86-64 -mattr=+ssse3,-avx | FileCheck %s -check-prefix=SSSE3
+; RUN: llc < %s -march=x86-64 -mattr=-ssse3,+avx | FileCheck %s -check-prefix=AVX
+
+; SSSE3: phaddw1:
+; SSSE3-NOT: vphaddw
+; SSSE3: phaddw
+; AVX: phaddw1:
+; AVX: vphaddw
+define <8 x i16> @phaddw1(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %r = add <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phaddw2:
+; SSSE3-NOT: vphaddw
+; SSSE3: phaddw
+; AVX: phaddw2:
+; AVX: vphaddw
+define <8 x i16> @phaddw2(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14>
+  %b = shufflevector <8 x i16> %y, <8 x i16> %x, <8 x i32> <i32 8, i32 11, i32 12, i32 15, i32 0, i32 3, i32 4, i32 7>
+  %r = add <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phaddd1:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd1:
+; AVX: vphaddd
+define <4 x i32> @phaddd1(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd2:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd2:
+; AVX: vphaddd
+define <4 x i32> @phaddd2(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+  %b = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd3:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd3:
+; AVX: vphaddd
+define <4 x i32> @phaddd3(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd4:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd4:
+; AVX: vphaddd
+define <4 x i32> @phaddd4(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd5:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd5:
+; AVX: vphaddd
+define <4 x i32> @phaddd5(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd6:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd6:
+; AVX: vphaddd
+define <4 x i32> @phaddd6(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd7:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd7:
+; AVX: vphaddd
+define <4 x i32> @phaddd7(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubw1:
+; SSSE3-NOT: vphsubw
+; SSSE3: phsubw
+; AVX: phsubw1:
+; AVX: vphsubw
+define <8 x i16> @phsubw1(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %r = sub <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phsubd1:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd1:
+; AVX: vphsubd
+define <4 x i32> @phsubd1(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd2:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd2:
+; AVX: vphsubd
+define <4 x i32> @phsubd2(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd3:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd3:
+; AVX: vphsubd
+define <4 x i32> @phsubd3(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd4:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd4:
+; AVX: vphsubd
+define <4 x i32> @phsubd4(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index fb60ac2..fc06309 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
 
 @ptr = external global i32* 
 @dst = external global i32 
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
new file mode 100644
index 0000000..cc1df2f
--- /dev/null
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -0,0 +1,138 @@
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 | FileCheck %s
+; RUN: opt -instsimplify %s -disable-output
+
+;CHECK: SHUFF0
+define <8 x i32*> @SHUFF0(<4 x i32*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i32*> %ptrv, <4 x i32*> %ptrv, <8 x i32> <i32 2, i32 7, i32 1, i32 2, i32 4, i32 5, i32 1, i32 1>
+;CHECK: pshufd
+  ret <8 x i32*> %G
+;CHECK: ret
+}
+
+;CHECK: SHUFF1
+define <4 x i32*> @SHUFF1(<4 x i32*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i32*> %ptrv, <4 x i32*> %ptrv, <4 x i32> <i32 2, i32 7, i32 7, i32 2>
+;CHECK: pshufd
+  ret <4 x i32*> %G
+;CHECK: ret
+}
+
+;CHECK: SHUFF3
+define <4 x i8*> @SHUFF3(<4 x i8*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i8*> %ptrv, <4 x i8*> undef, <4 x i32> <i32 2, i32 7, i32 1, i32 2>
+;CHECK: pshufd
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD0
+define <4 x i8*> @LOAD0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movaps
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD1
+define <4 x i8*> @LOAD1(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movdqa
+;CHECK: pshufd
+;CHECK: movdqa
+  %T = shufflevector <4 x i8*> %G, <4 x i8*> %G, <4 x i32> <i32 7, i32 1, i32 4, i32 3>
+  store <4 x i8*> %T, <4 x i8*>* %p
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD2
+define <4 x i8*> @LOAD2(<4 x i8*>* %p) nounwind {
+entry:
+  %I = alloca <4 x i8*>
+;CHECK: sub
+  %G = load <4 x i8*>* %p
+;CHECK: movaps
+  store <4 x i8*> %G, <4 x i8*>* %I
+;CHECK: movaps
+  %Z = load <4 x i8*>* %I
+  ret <4 x i8*> %Z
+;CHECK: add
+;CHECK: ret
+}
+
+;CHECK: INT2PTR0
+define <4 x i32> @INT2PTR0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movl
+;CHECK: movaps
+  %K = ptrtoint <4 x i8*> %G to <4 x i32>
+;CHECK: ret
+  ret <4 x i32> %K
+}
+
+;CHECK: INT2PTR1
+define <4 x i32*> @INT2PTR1(<4 x i8>* %p) nounwind {
+entry:
+  %G = load <4 x i8>* %p
+;CHECK: movl
+;CHECK: movd
+;CHECK: pshufb
+;CHECK: pand
+  %K = inttoptr <4 x i8> %G to <4 x i32*>
+;CHECK: ret
+  ret <4 x i32*> %K
+}
+
+;CHECK: BITCAST0
+define <4 x i32*> @BITCAST0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movl
+  %T = bitcast <4 x i8*> %G to <4 x i32*>
+;CHECK: movaps
+;CHECK: ret
+  ret <4 x i32*> %T
+}
+
+;CHECK: BITCAST1
+define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
+entry:
+  %G = load <2 x i8*>* %p
+;CHECK: movl
+;CHECK: movd
+;CHECK: pinsrd
+  %T = bitcast <2 x i8*> %G to <2 x i32*>
+;CHECK: ret
+  ret <2 x i32*> %T
+}
+
+;CHECK: ICMP0
+define <4 x i32> @ICMP0(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
+entry:
+  %g0 = load <4 x i8*>* %p0
+  %g1 = load <4 x i8*>* %p1
+  %k = icmp sgt <4 x i8*> %g0, %g1
+  ;CHECK: pcmpgtd
+  %j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
+  ret <4 x i32> %j
+  ;CHECK: ret
+}
+
+;CHECK: ICMP1
+define <4 x i32> @ICMP1(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
+entry:
+  %g0 = load <4 x i8*>* %p0
+  %g1 = load <4 x i8*>* %p1
+  %k = icmp eq <4 x i8*> %g0, %g1
+  ;CHECK: pcmpeqd
+  %j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
+  ret <4 x i32> %j
+  ;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/pr11202.ll b/test/CodeGen/X86/pr11202.ll
new file mode 100644
index 0000000..13070d1
--- /dev/null
+++ b/test/CodeGen/X86/pr11202.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+@bb = constant [1 x i8*] [i8* blockaddress(@main, %l2)]
+
+define void @main() {
+entry:
+  br label %l1
+
+l1:                                               ; preds = %l2, %entry
+  %a = zext i1 false to i32
+  br label %l2
+
+l2:                                               ; preds = %l1
+  %b = zext i1 false to i32
+  br label %l1
+}
+
+; CHECK: .Ltmp0:                                 # Address of block that was removed by CodeGen
+; CHECK: .quad	.Ltmp0
diff --git a/test/CodeGen/X86/pr11415.ll b/test/CodeGen/X86/pr11415.ll
new file mode 100644
index 0000000..e1fa032
--- /dev/null
+++ b/test/CodeGen/X86/pr11415.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=x86_64-pc-linux %s -o - -regalloc=fast | FileCheck %s
+
+; We used to consider the early clobber in the second asm statement as
+; defining %0 before it was read. This caused us to omit the
+; movq	-8(%rsp), %rdx
+
+; CHECK: 	#APP
+; CHECK-NEXT:	#NO_APP
+; CHECK-NEXT:	movq	%rcx, %rax
+; CHECK-NEXT:	movq	%rax, -8(%rsp)
+; CHECK-NEXT:	movq	-8(%rsp), %rdx
+; CHECK-NEXT:	#APP
+; CHECK-NEXT:	#NO_APP
+; CHECK-NEXT:	movq	%rdx, %rax
+; CHECK-NEXT:	movq	%rdx, -8(%rsp)
+; CHECK-NEXT:	ret
+
+define i64 @foo() {
+entry:
+  %0 = tail call i64 asm "", "={cx}"() nounwind
+  %1 = tail call i64 asm "", "=&r,0,r,~{rax}"(i64 %0, i64 %0) nounwind
+  ret i64 %1
+}
diff --git a/test/CodeGen/X86/pr12360.ll b/test/CodeGen/X86/pr12360.ll
new file mode 100644
index 0000000..f29e50e
--- /dev/null
+++ b/test/CodeGen/X86/pr12360.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define zeroext i1 @f1(i8* %x) {
+; CHECK: f1:
+; CHECK: movb	(%rdi), %al
+; CHECK-NEXT: ret
+
+entry:
+  %0 = load i8* %x, align 1, !range !0
+  %tobool = trunc i8 %0 to i1
+  ret i1 %tobool
+}
+
+define zeroext i1 @f2(i8* %x) {
+; CHECK: f2:
+; CHECK: movb	(%rdi), %al
+; CHECK-NEXT: ret
+
+entry:
+  %0 = load i8* %x, align 1, !range !0
+  %tobool = icmp ne i8 %0, 0
+  ret i1 %tobool
+}
+
+!0 = metadata !{i8 0, i8 2}
+
+
+; check that we don't build a "trunc" from i1 to i1, which would assert.
+define zeroext i1 @f3(i1 %x) {
+; CHECK: f3:
+
+entry:
+  %tobool = icmp ne i1 %x, 0
+  ret i1 %tobool
+}
+
+; check that we don't build a trunc when other bits are needed
+define zeroext i1 @f4(i32 %x) {
+; CHECK: f4:
+; CHECK: and
+
+entry:
+  %y = and i32 %x, 32768
+  %z = icmp ne i32 %y, 0
+  ret i1 %z
+}
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index 945ec4c..9b0ef83 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -33,7 +33,7 @@ declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 define i32 @main() {
 entry:
 ; CHECK: flds
-	%tmp6 = volatile load float* @a		; <float> [#uses=1]
+	%tmp6 = load volatile float* @a		; <float> [#uses=1]
 ; CHECK: fstps (%esp)
 ; CHECK: tanf
 	%tmp9 = tail call float @tanf( float %tmp6 )		; <float> [#uses=1]
@@ -41,7 +41,7 @@ entry:
 ; CHECK: fstp
 
 ; CHECK: fldl
-	%tmp12 = volatile load double* @b		; <double> [#uses=1]
+	%tmp12 = load volatile double* @b		; <double> [#uses=1]
 ; CHECK: fstpl (%esp)
 ; CHECK: tan
 	%tmp13 = tail call double @tan( double %tmp12 )		; <double> [#uses=1]
diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
index 2a8bb35..02a3605 100644
--- a/test/CodeGen/X86/pr2182.ll
+++ b/test/CodeGen/X86/pr2182.ll
@@ -15,17 +15,17 @@ define void @loop_2() nounwind  {
 ; CHECK-NEXT: addl $3, (%{{.*}})
 ; CHECK-NEXT: ret
 
-  %tmp = volatile load i32* @x, align 4           ; <i32> [#uses=1]
+  %tmp = load volatile i32* @x, align 4           ; <i32> [#uses=1]
   %tmp1 = add i32 %tmp, 3         ; <i32> [#uses=1]
-  volatile store i32 %tmp1, i32* @x, align 4
-  %tmp.1 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1, i32* @x, align 4
+  %tmp.1 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.1 = add i32 %tmp.1, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.1, i32* @x, align 4
-  %tmp.2 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1.1, i32* @x, align 4
+  %tmp.2 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.2 = add i32 %tmp.2, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.2, i32* @x, align 4
-  %tmp.3 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1.2, i32* @x, align 4
+  %tmp.3 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.3 = add i32 %tmp.3, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.3, i32* @x, align 4
+  store volatile i32 %tmp1.3, i32* @x, align 4
   ret void
 }
diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll
deleted file mode 100644
index a4204e5..0000000
--- a/test/CodeGen/X86/pr3495-2.ll
+++ /dev/null
@@ -1,54 +0,0 @@
-; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats -regalloc=linearscan |& grep {Number of loads added} | grep 1
-; PR3495
-;
-; This test may not be testing what it was supposed to test.
-; It used to have two spills and four reloads, but not it only has one spill and one reload.
-
-target datalayout = "e-p:32:32:32"
-target triple = "i386-apple-darwin9.6"
-	%struct.constraintVCGType = type { i32, i32, i32, i32 }
-	%struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 }
-
-define fastcc void @SCC_DFSBelowVCG(%struct.nodeVCGType* %VCG, i32 %net, i32 %label) nounwind {
-entry:
-	%0 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 5		; <i32*> [#uses=2]
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
-	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
-	br i1 %2, label %bb5, label %bb.nph3
-
-bb.nph3:		; preds = %entry
-	%3 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 4		; <%struct.constraintVCGType**> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb3, %bb.nph3
-	%s.02 = phi i32 [ 0, %bb.nph3 ], [ %12, %bb3 ]		; <i32> [#uses=2]
-	%4 = load %struct.constraintVCGType** %3, align 4		; <%struct.constraintVCGType*> [#uses=1]
-	%5 = icmp eq i32 0, 0		; <i1> [#uses=1]
-	br i1 %5, label %bb1, label %bb3
-
-bb1:		; preds = %bb
-	%6 = getelementptr %struct.constraintVCGType* %4, i32 %s.02, i32 0		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=2]
-	%8 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 7		; <i32*> [#uses=1]
-	%9 = load i32* %8, align 4		; <i32> [#uses=1]
-	%10 = icmp eq i32 %9, 0		; <i1> [#uses=1]
-	br i1 %10, label %bb2, label %bb3
-
-bb2:		; preds = %bb1
-	%11 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 4		; <%struct.constraintVCGType**> [#uses=0]
-	br label %bb.i
-
-bb.i:		; preds = %bb.i, %bb2
-	br label %bb.i
-
-bb3:		; preds = %bb1, %bb
-	%12 = add i32 %s.02, 1		; <i32> [#uses=2]
-	%13 = load i32* %0, align 4		; <i32> [#uses=1]
-	%14 = icmp ugt i32 %13, %12		; <i1> [#uses=1]
-	br i1 %14, label %bb, label %bb5
-
-bb5:		; preds = %bb3, %entry
-	%15 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 6		; <i32*> [#uses=1]
-	store i32 %label, i32* %15, align 4
-	ret void
-}
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
deleted file mode 100644
index 7efd35b..0000000
--- a/test/CodeGen/X86/pr3495.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of loads added} | grep 2
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of spill slots allocated} | grep 1
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of machine instrs printed} | grep 34
-; PR3495
-;
-; Note: this should not spill at all with either good LSR or good regalloc.
-
-target triple = "i386-pc-linux-gnu"
-@x = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=1]
-@rows = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=2]
-@up = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=2]
-@down = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=1]
-
-define i32 @queens(i32 %c) nounwind {
-entry:
-	%tmp91 = add i32 %c, 1		; <i32> [#uses=3]
-	%tmp135 = getelementptr [8 x i32]* @x, i32 0, i32 %tmp91		; <i32*> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb569, %entry
-	%r25.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %indvar.next715, %bb569 ]		; <i32> [#uses=4]
-	%tmp27 = getelementptr [8 x i32]* @rows, i32 0, i32 %r25.0.reg2mem.0		; <i32*> [#uses=1]
-	%tmp28 = load i32* %tmp27, align 4		; <i32> [#uses=1]
-	%tmp29 = icmp eq i32 %tmp28, 0		; <i1> [#uses=1]
-	br i1 %tmp29, label %bb569, label %bb31
-
-bb31:		; preds = %bb
-	%tmp35 = sub i32 %r25.0.reg2mem.0, 0		; <i32> [#uses=1]
-	%tmp36 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp35		; <i32*> [#uses=1]
-	%tmp37 = load i32* %tmp36, align 4		; <i32> [#uses=1]
-	%tmp38 = icmp eq i32 %tmp37, 0		; <i1> [#uses=1]
-	br i1 %tmp38, label %bb569, label %bb41
-
-bb41:		; preds = %bb31
-	%tmp54 = sub i32 %r25.0.reg2mem.0, %c		; <i32> [#uses=1]
-	%tmp55 = add i32 %tmp54, 7		; <i32> [#uses=1]
-	%tmp62 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp55		; <i32*> [#uses=2]
-	store i32 0, i32* %tmp62, align 4
-	br label %bb92
-
-bb92:		; preds = %bb545, %bb41
-	%r20.0.reg2mem.0 = phi i32 [ 0, %bb41 ], [ %indvar.next711, %bb545 ]		; <i32> [#uses=5]
-	%tmp94 = getelementptr [8 x i32]* @rows, i32 0, i32 %r20.0.reg2mem.0		; <i32*> [#uses=1]
-	%tmp95 = load i32* %tmp94, align 4		; <i32> [#uses=0]
-	%tmp112 = add i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=1]
-	%tmp113 = getelementptr [15 x i32]* @down, i32 0, i32 %tmp112		; <i32*> [#uses=2]
-	%tmp114 = load i32* %tmp113, align 4		; <i32> [#uses=1]
-	%tmp115 = icmp eq i32 %tmp114, 0		; <i1> [#uses=1]
-	br i1 %tmp115, label %bb545, label %bb118
-
-bb118:		; preds = %bb92
-	%tmp122 = sub i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=0]
-	store i32 0, i32* %tmp113, align 4
-	store i32 %r20.0.reg2mem.0, i32* %tmp135, align 4
-	br label %bb142
-
-bb142:		; preds = %bb142, %bb118
-	%k18.0.reg2mem.0 = phi i32 [ 0, %bb118 ], [ %indvar.next709, %bb142 ]		; <i32> [#uses=1]
-	%indvar.next709 = add i32 %k18.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond710 = icmp eq i32 %indvar.next709, 8		; <i1> [#uses=1]
-	br i1 %exitcond710, label %bb155, label %bb142
-
-bb155:		; preds = %bb142
-	%tmp156 = tail call i32 @putchar(i32 10) nounwind		; <i32> [#uses=0]
-	br label %bb545
-
-bb545:		; preds = %bb155, %bb92
-	%indvar.next711 = add i32 %r20.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond712 = icmp eq i32 %indvar.next711, 8		; <i1> [#uses=1]
-	br i1 %exitcond712, label %bb553, label %bb92
-
-bb553:		; preds = %bb545
-	store i32 1, i32* %tmp62, align 4
-	br label %bb569
-
-bb569:		; preds = %bb553, %bb31, %bb
-	%indvar.next715 = add i32 %r25.0.reg2mem.0, 1		; <i32> [#uses=1]
-	br label %bb
-}
-
-declare i32 @putchar(i32)
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index ebe11a5..ec2f302 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,4 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+
+; rdar://10538297
 
 define void @t(i8* %ptr) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/promote.ll b/test/CodeGen/X86/promote.ll
new file mode 100644
index 0000000..8b30dc7
--- /dev/null
+++ b/test/CodeGen/X86/promote.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i8:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+; CHECK: mul_f
+define i32 @mul_f(<4 x i8>* %A) {
+entry:
+; CHECK: pmul
+; CHECK-NOT: mulb
+  %0 = load <4 x i8>* %A, align 8
+  %mul = mul <4 x i8> %0, %0
+  store <4 x i8> %mul, <4 x i8>* undef
+  ret i32 0
+; CHECK: ret
+}
+
+
+; CHECK: shuff_f
+define i32 @shuff_f(<4 x i8>* %A) {
+entry:
+; CHECK: pshufb
+; CHECK: paddd
+; CHECK: pshufb
+  %0 = load <4 x i8>* %A, align 8
+  %add = add <4 x i8> %0, %0
+  store <4 x i8> %add, <4 x i8>* undef
+  ret i32 0
+; CHECK: ret
+}
+
+; CHECK: bitcast_widen
+define <2 x float> @bitcast_widen(<4 x i32> %in) nounwind readnone {
+entry:
+; CHECK-NOT: pshufd
+ %x = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %y = bitcast <2 x i32> %x to <2 x float>
+ ret <2 x float> %y
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
new file mode 100644
index 0000000..faca3d7
--- /dev/null
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -0,0 +1,179 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+%struct.obj = type { i64 }
+
+; CHECK: _Z7releaseP3obj
+define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp {
+entry:
+; CHECK: decq	(%{{rdi|rcx}})
+; CHECK-NEXT: je
+  %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0
+  %0 = load i64* %refcnt, align 8, !tbaa !0
+  %dec = add i64 %0, -1
+  store i64 %dec, i64* %refcnt, align 8, !tbaa !0
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %if.end, label %return
+
+if.end:                                           ; preds = %entry
+  %1 = bitcast %struct.obj* %o to i8*
+  tail call void @free(i8* %1)
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  ret void
+}
+
+@c = common global i64 0, align 8
+@a = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
+@b = common global i32 0, align 4
+
+; CHECK: test
+define i32 @test() nounwind uwtable ssp {
+entry:
+; CHECK: decq
+; CHECK-NOT: decq
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %dec.i, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+; CHECK: test2
+define i32 @test2() nounwind uwtable ssp {
+entry:
+; CHECK-NOT: decq ({{.*}})
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %0, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @free(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
+
+%struct.obj2 = type { i64, i32, i16, i8 }
+
+declare void @other(%struct.obj2* ) nounwind;
+
+; CHECK: example_dec
+define void @example_dec(%struct.obj2* %o) nounwind uwtable ssp {
+; 64 bit dec
+entry:
+  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+; CHECK-NOT: load 
+  %0 = load i64* %s64, align 8
+; CHECK: decq ({{.*}})
+  %dec = add i64 %0, -1
+  store i64 %dec, i64* %s64, align 8
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %if.end, label %return
+
+; 32 bit dec
+if.end:
+  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+; CHECK-NOT: load 
+  %1 = load i32* %s32, align 4
+; CHECK: decl {{[0-9][0-9]*}}({{.*}})
+  %dec1 = add i32 %1, -1
+  store i32 %dec1, i32* %s32, align 4
+  %tobool2 = icmp eq i32 %dec1, 0
+  br i1 %tobool2, label %if.end1, label %return
+
+; 16 bit dec
+if.end1:
+  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+; CHECK-NOT: load 
+  %2 = load i16* %s16, align 2
+; CHECK: decw {{[0-9][0-9]*}}({{.*}})
+  %dec2 = add i16 %2, -1
+  store i16 %dec2, i16* %s16, align 2
+  %tobool3 = icmp eq i16 %dec2, 0
+  br i1 %tobool3, label %if.end2, label %return
+
+; 8 bit dec
+if.end2:
+  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+; CHECK-NOT: load 
+  %3 = load i8* %s8
+; CHECK: decb {{[0-9][0-9]*}}({{.*}})
+  %dec3 = add i8 %3, -1
+  store i8 %dec3, i8* %s8
+  %tobool4 = icmp eq i8 %dec3, 0
+  br i1 %tobool4, label %if.end4, label %return
+
+if.end4:
+  tail call void @other(%struct.obj2* %o) nounwind
+  br label %return
+
+return:                                           ; preds = %if.end4, %if.end, %entry                                                                                                                                                                               
+  ret void
+}
+
+; CHECK: example_inc
+define void @example_inc(%struct.obj2* %o) nounwind uwtable ssp {
+; 64 bit inc
+entry:
+  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+; CHECK-NOT: load 
+  %0 = load i64* %s64, align 8
+; CHECK: incq ({{.*}})
+  %inc = add i64 %0, 1
+  store i64 %inc, i64* %s64, align 8
+  %tobool = icmp eq i64 %inc, 0
+  br i1 %tobool, label %if.end, label %return
+
+; 32 bit inc
+if.end:
+  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+; CHECK-NOT: load 
+  %1 = load i32* %s32, align 4
+; CHECK: incl {{[0-9][0-9]*}}({{.*}})
+  %inc1 = add i32 %1, 1
+  store i32 %inc1, i32* %s32, align 4
+  %tobool2 = icmp eq i32 %inc1, 0
+  br i1 %tobool2, label %if.end1, label %return
+
+; 16 bit inc
+if.end1:
+  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+; CHECK-NOT: load 
+  %2 = load i16* %s16, align 2
+; CHECK: incw {{[0-9][0-9]*}}({{.*}})
+  %inc2 = add i16 %2, 1
+  store i16 %inc2, i16* %s16, align 2
+  %tobool3 = icmp eq i16 %inc2, 0
+  br i1 %tobool3, label %if.end2, label %return
+
+; 8 bit inc
+if.end2:
+  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+; CHECK-NOT: load 
+  %3 = load i8* %s8
+; CHECK: incb {{[0-9][0-9]*}}({{.*}})
+  %inc3 = add i8 %3, 1
+  store i8 %inc3, i8* %s8
+  %tobool4 = icmp eq i8 %inc3, 0
+  br i1 %tobool4, label %if.end4, label %return
+
+if.end4:
+  tail call void @other(%struct.obj2* %o) nounwind
+  br label %return
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
index d936971..d99a7a4 100644
--- a/test/CodeGen/X86/red-zone.ll
+++ b/test/CodeGen/X86/red-zone.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 
 ; First without noredzone.
 ; CHECK: f0:
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
index 9557d17..f092163 100644
--- a/test/CodeGen/X86/red-zone2.ll
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
 ; RUN: grep subq %t | count 1
 ; RUN: grep addq %t | count 1
 
diff --git a/test/CodeGen/X86/reghinting.ll b/test/CodeGen/X86/reghinting.ll
index 87f65ed..6759115 100644
--- a/test/CodeGen/X86/reghinting.ll
+++ b/test/CodeGen/X86/reghinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-macosx | FileCheck %s
 ; PR10221
 
 ;; The registers %x and %y must both spill across the finit call.
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
index f6f0ed1..75f438d 100644
--- a/test/CodeGen/X86/remat-scalar-zero.ll
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
 ; RUN: not grep xor %t
 ; RUN: not grep movap %t
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
new file mode 100644
index 0000000..0dd74ea
--- /dev/null
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
+
+define float @test1(float %x) nounwind  {
+  %call = tail call float @floorf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test1:
+; CHECK-SSE: roundss $1
+
+; CHECK-AVX: test1:
+; CHECK-AVX: vroundss $1
+}
+
+declare float @floorf(float) nounwind readnone
+
+define double @test2(double %x) nounwind  {
+  %call = tail call double @floor(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test2:
+; CHECK-SSE: roundsd $1
+
+; CHECK-AVX: test2:
+; CHECK-AVX: vroundsd $1
+}
+
+declare double @floor(double) nounwind readnone
+
+define float @test3(float %x) nounwind  {
+  %call = tail call float @nearbyintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test3:
+; CHECK-SSE: roundss $12
+
+; CHECK-AVX: test3:
+; CHECK-AVX: vroundss $12
+}
+
+declare float @nearbyintf(float) nounwind readnone
+
+define double @test4(double %x) nounwind  {
+  %call = tail call double @nearbyint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test4:
+; CHECK-SSE: roundsd $12
+
+; CHECK-AVX: test4:
+; CHECK-AVX: vroundsd $12
+}
+
+declare double @nearbyint(double) nounwind readnone
+
+define float @test5(float %x) nounwind  {
+  %call = tail call float @ceilf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test5:
+; CHECK-SSE: roundss $2
+
+; CHECK-AVX: test5:
+; CHECK-AVX: vroundss $2
+}
+
+declare float @ceilf(float) nounwind readnone
+
+define double @test6(double %x) nounwind  {
+  %call = tail call double @ceil(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test6:
+; CHECK-SSE: roundsd $2
+
+; CHECK-AVX: test6:
+; CHECK-AVX: vroundsd $2
+}
+
+declare double @ceil(double) nounwind readnone
+
+define float @test7(float %x) nounwind  {
+  %call = tail call float @rintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test7:
+; CHECK-SSE: roundss $4
+
+; CHECK-AVX: test7:
+; CHECK-AVX: vroundss $4
+}
+
+declare float @rintf(float) nounwind readnone
+
+define double @test8(double %x) nounwind  {
+  %call = tail call double @rint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test8:
+; CHECK-SSE: roundsd $4
+
+; CHECK-AVX: test8:
+; CHECK-AVX: vroundsd $4
+}
+
+declare double @rint(double) nounwind readnone
+
+define float @test9(float %x) nounwind  {
+  %call = tail call float @truncf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test9:
+; CHECK-SSE: roundss $3
+
+; CHECK-AVX: test9:
+; CHECK-AVX: vroundss $3
+}
+
+declare float @truncf(float) nounwind readnone
+
+define double @test10(double %x) nounwind  {
+  %call = tail call double @trunc(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test10:
+; CHECK-SSE: roundsd $3
+
+; CHECK-AVX: test10:
+; CHECK-AVX: vroundsd $3
+}
+
+declare double @trunc(double) nounwind readnone
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index adc58ac..e99ea93 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -3,9 +3,10 @@
 ; Verify when widening a divide/remainder operation, we only generate a
 ; divide/rem per element since divide/remainder can trap.
 
+; CHECK: vectorDiv
 define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
-; CHECK: idivl
-; CHECK: idivl
+; CHECK: idivq
+; CHECK: idivq
 ; CHECK-NOT: idivl
 ; CHECK: ret
 entry:
@@ -32,6 +33,7 @@ entry:
   ret void
 }
 
+; CHECK: test_char_div
 define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
 ; CHECK: idivb
 ; CHECK: idivb
@@ -42,6 +44,7 @@ define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
   ret <3 x i8>  %div.r
 }
 
+; CHECK: test_uchar_div
 define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
 ; CHECK: divb
 ; CHECK: divb
@@ -52,6 +55,7 @@ define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
   ret <3 x i8>  %div.r
 }
 
+; CHECK: test_short_div
 define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
 ; CHECK: idivw
 ; CHECK: idivw
@@ -64,17 +68,19 @@ define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
   ret <5 x i16>  %div.r
 }
 
+; CHECK: test_ushort_div
 define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
-; CHECK: divw
-; CHECK: divw
-; CHECK: divw
-; CHECK: divw
-; CHECK-NOT: divw
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK-NOT: divl
 ; CHECK: ret
   %div.r = udiv <4 x i16> %num, %div
   ret <4 x i16>  %div.r
 }
 
+; CHECK: test_uint_div
 define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
 ; CHECK: divl
 ; CHECK: divl
@@ -85,6 +91,7 @@ define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
   ret <3 x i32>  %div.r
 }
 
+; CHECK: test_long_div
 define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
 ; CHECK: idivq
 ; CHECK: idivq
@@ -95,6 +102,7 @@ define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
   ret <3 x i64>  %div.r
 }
 
+; CHECK: test_ulong_div
 define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
 ; CHECK: divq
 ; CHECK: divq
@@ -105,18 +113,19 @@ define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
   ret <3 x i64>  %div.r
 }
 
-
+; CHECK: test_char_rem
 define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
-; CHECK: idivb
-; CHECK: idivb
-; CHECK: idivb
-; CHECK: idivb
-; CHECK-NOT: idivb
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
 ; CHECK: ret
   %rem.r = srem <4 x i8> %num, %rem
   ret <4 x i8>  %rem.r
 }
 
+; CHECK: test_short_rem
 define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
 ; CHECK: idivw
 ; CHECK: idivw
@@ -129,6 +138,7 @@ define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
   ret <5 x i16>  %rem.r
 }
 
+; CHECK: test_uint_rem
 define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
 ; CHECK: idivl
 ; CHECK: idivl
@@ -141,6 +151,7 @@ define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
 }
 
 
+; CHECK: test_ulong_rem
 define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
 ; CHECK: divq
 ; CHECK: divq
@@ -153,6 +164,7 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
   ret <5 x i64>  %rem.r
 }
 
+; CHECK: test_int_div
 define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
 ; CHECK: idivl
 ; CHECK: idivl
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
new file mode 100644
index 0000000..5ce08aa
--- /dev/null
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+
+; Just to prevent the alloca from being optimized away
+declare void @dummy_use(i32*, i32)
+
+define i32 @test_basic(i32 %l) {
+        %mem = alloca i32, i32 %l
+        call void @dummy_use (i32* %mem, i32 %l)
+        %terminate = icmp eq i32 %l, 0
+        br i1 %terminate, label %true, label %false
+
+true:
+        ret i32 0
+
+false:
+        %newlen = sub i32 %l, 1
+        %retvalue = call i32 @test_basic(i32 %newlen)
+        ret i32 %retvalue
+
+; X32:      test_basic:
+
+; X32:      cmpl %gs:48, %esp
+; X32-NEXT: ja      .LBB0_2
+
+; X32:      pushl $4
+; X32-NEXT: pushl $12
+; X32-NEXT: calll __morestack
+; X32-NEXT: ret
+
+; X32:      movl %esp, %eax
+; X32-NEXT: subl %ecx, %eax
+; X32-NEXT: cmpl %eax, %gs:48
+
+; X32:      movl %eax, %esp
+
+; X32:      subl $12, %esp
+; X32-NEXT: pushl %ecx
+; X32-NEXT: calll __morestack_allocate_stack_space
+; X32-NEXT: addl $16, %esp
+
+; X64:      test_basic:
+
+; X64:      cmpq %fs:112, %rsp
+; X64-NEXT: ja      .LBB0_2
+
+; X64:      movabsq $24, %r10
+; X64-NEXT: movabsq $0, %r11
+; X64-NEXT: callq __morestack
+; X64-NEXT: ret
+
+; X64:      movq %rsp, %rdi
+; X64-NEXT: subq %rax, %rdi
+; X64-NEXT: cmpq %rdi, %fs:112
+
+; X64:      movq %rdi, %rsp
+
+; X64:      movq %rax, %rdi
+; X64-NEXT: callq __morestack_allocate_stack_space
+; X64-NEXT: movq %rax, %rdi
+
+}
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index ecdb00d..5407b87 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -1,60 +1,97 @@
-; RUN: llc < %s -mtriple=i686-linux -segmented-stacks | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux  -segmented-stacks | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD
+
+; We used to crash with filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -filetype=obj
+
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-MinGW
+; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X32-FreeBSD
+
+; X64-Solaris: Segmented stacks not supported on this platform
+; X64-MinGW: Segmented stacks not supported on this platform
+; X32-FreeBSD: Segmented stacks not supported on FreeBSD i386
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define i32 @test_basic(i32 %l) {
-        %mem = alloca i32, i32 %l
-        call void @dummy_use (i32* %mem, i32 %l)
-        %terminate = icmp eq i32 %l, 0
-        br i1 %terminate, label %true, label %false
+define void @test_basic() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+	ret void
+
+; X32-Linux:       test_basic:
+
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB0_2
 
-true:
-        ret i32 0
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $60
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
 
-false:
-        %newlen = sub i32 %l, 1
-        %retvalue = call i32 @test_basic(i32 %newlen)
-        ret i32 %retvalue
+; X64-Linux:       test_basic:
 
-; X32:      test_basic:
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB0_2
 
-; X32:      leal -12(%esp), %ecx
-; X32-NEXT: cmpl %gs:48, %ecx
+; X64-Linux:       movabsq $40, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
 
-; X32:      subl $8, %esp
-; X32-NEXT: pushl $4
-; X32-NEXT: pushl $12
-; X32-NEXT: calll __morestack
-; X32-NEXT: addl $8, %esp
-; X32-NEXT: ret 
+; X32-Darwin:      test_basic:
 
-; X32:      movl %eax, %esp
+; X32-Darwin:      movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp
+; X32-Darwin-NEXT: ja      LBB0_2
 
-; X32:      subl $12, %esp
-; X32-NEXT: pushl %ecx
-; X32-NEXT: calll __morestack_allocate_stack_space
-; X32-NEXT: addl $16, %esp
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
-; X64:      test_basic:
+; X64-Darwin:      test_basic:
 
-; X64:      leaq -24(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB0_2
 
-; X64:      movabsq $24, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Darwin:      movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
 
-; X64:      movq %rsp, %rax
-; X64-NEXT: subq %rcx, %rax
-; X64-NEXT: cmpq %rax, %fs:112
+; X32-MinGW:       test_basic:
 
-; X64:      movq %rax, %rsp
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB0_2
 
-; X64:      movq %rcx, %rdi
-; X64-NEXT: callq __morestack_allocate_stack_space
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $48
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_basic:
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB0_2
+
+; X64-FreeBSD:       movabsq $40, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
 
 }
 
@@ -63,25 +100,286 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
        %result = add i32 %other, %addend
        ret i32 %result
 
-; X32:      leal (%esp), %edx
-; X32-NEXT: cmpl %gs:48, %edx
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB1_2
+
+; X32-Linux:       pushl $4
+; X32-Linux-NEXT:  pushl $0
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB1_2
+
+; X64-Linux:       movq %r10, %rax
+; X64-Linux-NEXT:  movabsq $0, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+; X64-Linux-NEXT:  movq %rax, %r10
+
+; X32-Darwin:      movl $432, %edx
+; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
+; X32-Darwin-NEXT: ja      LBB1_2
+
+; X32-Darwin:      pushl $4
+; X32-Darwin-NEXT: pushl $0
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB1_2
+
+; X64-Darwin:      movq %r10, %rax
+; X64-Darwin-NEXT: movabsq $0, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+; X64-Darwin-NEXT: movq %rax, %r10
+
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB1_2
+
+; X32-MinGW:       pushl $4
+; X32-MinGW-NEXT:  pushl $0
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB1_2
+
+; X64-FreeBSD:       movq %r10, %rax
+; X64-FreeBSD-NEXT:  movabsq $0, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+; X64-FreeBSD-NEXT:  movq %rax, %r10
+
+}
+
+define void @test_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; X32-Linux:       leal -40012(%esp), %ecx
+; X32-Linux-NEXT:  cmpl %gs:48, %ecx
+; X32-Linux-NEXT:  ja      .LBB2_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $40012
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       leaq -40008(%rsp), %r11
+; X64-Linux-NEXT:  cmpq %fs:112, %r11
+; X64-Linux-NEXT:  ja      .LBB2_2
+
+; X64-Linux:       movabsq $40008, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      leal -40012(%esp), %ecx
+; X32-Darwin-NEXT: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
+; X32-Darwin-NEXT: ja      LBB2_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja      LBB2_2
+
+; X64-Darwin:      movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       leal -40008(%esp), %ecx
+; X32-MinGW-NEXT:  cmpl %fs:20, %ecx
+; X32-MinGW-NEXT:  ja      LBB2_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $40008
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       leaq -40008(%rsp), %r11
+; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
+; X64-FreeBSD-NEXT:  ja      .LBB2_2
+
+; X64-FreeBSD:       movabsq $40008, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+        ret void
+
+; X32-Linux:       test_fastcc:
+
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB3_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $60
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       test_fastcc:
+
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB3_2
+
+; X64-Linux:       movabsq $40, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      test_fastcc:
+
+; X32-Darwin:      movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp
+; X32-Darwin-NEXT: ja      LBB3_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      test_fastcc:
+
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB3_2
+
+; X64-Darwin:      movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       test_fastcc:
+
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB3_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $48
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_fastcc:
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB3_2
+
+; X64-FreeBSD:       movabsq $40, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; X32-Linux:       test_fastcc_large:
+
+; X32-Linux:       leal -40012(%esp), %eax
+; X32-Linux-NEXT:  cmpl %gs:48, %eax
+; X32-Linux-NEXT:  ja      .LBB4_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $40012
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       test_fastcc_large:
+
+; X64-Linux:       leaq -40008(%rsp), %r11
+; X64-Linux-NEXT:  cmpq %fs:112, %r11
+; X64-Linux-NEXT:  ja      .LBB4_2
+
+; X64-Linux:       movabsq $40008, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      test_fastcc_large:
+
+; X32-Darwin:      leal -40012(%esp), %eax
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: ja      LBB4_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      test_fastcc_large:
+
+; X64-Darwin:      leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja      LBB4_2
+
+; X64-Darwin:      movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       test_fastcc_large:
+
+; X32-MinGW:       leal -40008(%esp), %eax
+; X32-MinGW-NEXT:  cmpl %fs:20, %eax
+; X32-MinGW-NEXT:  ja      LBB4_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $40008
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_fastcc_large:
+
+; X64-FreeBSD:       leaq -40008(%rsp), %r11
+; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
+; X64-FreeBSD-NEXT:  ja      .LBB4_2
+
+; X64-FreeBSD:       movabsq $40008, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 %a)
+        ret void
 
+; This is testing that the Mac implementation preserves ecx
 
-; X32:      subl $8, %esp
-; X32-NEXT: pushl $4
-; X32-NEXT: pushl $0
-; X32-NEXT: calll __morestack
-; X32-NEXT: addl $8, %esp
-; X32-NEXT: ret
+; X32-Darwin:      test_fastcc_large_with_ecx_arg:
 
-; X64:      leaq (%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
+; X32-Darwin:      leal -40012(%esp), %eax
+; X32-Darwin-NEXT: pushl %ecx
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: popl %ecx
+; X32-Darwin-NEXT: ja      LBB5_2
 
-; X64:      movq %r10, %rax
-; X64-NEXT: movabsq $0, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
-; X64:      movq %rax, %r10
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
 }
diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll
index b2b9f81..a128af9 100644
--- a/test/CodeGen/X86/sext-subreg.ll
+++ b/test/CodeGen/X86/sext-subreg.ll
@@ -8,10 +8,10 @@ define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
 ; CHECK: movl %eax
   %C = add i64 %A, %B
   %D = trunc i64 %C to i32
-  volatile store i32 %D, i32* %P
+  store volatile i32 %D, i32* %P
   %E = shl i64 %C, 32
   %F = ashr i64 %E, 32  
-  volatile store i64 %F, i64 *%P2
-  volatile store i32 %D, i32* %P
+  store volatile i64 %F, i64 *%P2
+  store volatile i32 %D, i32* %P
   ret i64 undef
 }
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index fd278c2..b747cc5 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86    | grep and | count 2
 ; RUN: llc < %s -march=x86-64 | not grep and 
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
@@ -7,9 +7,15 @@ define i32 @t1(i32 %t, i32 %val) nounwind {
        ret i32 %res
 }
 
+define i32 @t2(i32 %t, i32 %val) nounwind {
+       %shamt = and i32 %t, 63
+       %res = shl i32 %val, %shamt
+       ret i32 %res
+}
+
 @X = internal global i16 0
 
-define void @t2(i16 %t) nounwind {
+define void @t3(i16 %t) nounwind {
        %shamt = and i16 %t, 31
        %tmp = load i16* @X
        %tmp1 = ashr i16 %tmp, %shamt
@@ -17,8 +23,14 @@ define void @t2(i16 %t) nounwind {
        ret void
 }
 
-define i64 @t3(i64 %t, i64 %val) nounwind {
+define i64 @t4(i64 %t, i64 %val) nounwind {
        %shamt = and i64 %t, 63
        %res = lshr i64 %val, %shamt
        ret i64 %res
 }
+
+define i64 @t5(i64 %t, i64 %val) nounwind {
+       %shamt = and i64 %t, 191
+       %res = lshr i64 %val, %shamt
+       ret i64 %res
+}
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index e443ac1..51f8303 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -1,15 +1,19 @@
-; RUN: llc < %s | not grep shrl
+; RUN: llc -march=x86 < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
-@array = weak global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+@array = weak global [4 x i32] zeroinitializer
+
+define i32 @test_lshr_and(i32 %x) {
+; CHECK: test_lshr_and:
+; CHECK-NOT: shrl
+; CHECK: andl $12,
+; CHECK: movl {{.*}}array{{.*}},
+; CHECK: ret
 
-define i32 @foo(i32 %x) {
 entry:
-	%tmp2 = lshr i32 %x, 2		; <i32> [#uses=1]
-	%tmp3 = and i32 %tmp2, 3		; <i32> [#uses=1]
-	%tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3		; <i32*> [#uses=1]
-	%tmp5 = load i32* %tmp4, align 4		; <i32> [#uses=1]
-	ret i32 %tmp5
+  %tmp2 = lshr i32 %x, 2
+  %tmp3 = and i32 %tmp2, 3
+  %tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3
+  %tmp5 = load i32* %tmp4, align 4
+  ret i32 %tmp5
 }
 
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index d9c3061..3ea6011 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,28 +1,70 @@
-; RUN: llc < %s -march=x86 | \
-; RUN:   grep {s\[ah\]\[rl\]l} | count 1
-
-define i32* @test1(i32* %P, i32 %X) nounwind {
-        %Y = lshr i32 %X, 2             ; <i32> [#uses=1]
-        %gep.upgrd.1 = zext i32 %Y to i64               ; <i64> [#uses=1]
-        %P2 = getelementptr i32* %P, i64 %gep.upgrd.1           ; <i32*> [#uses=1]
-        ret i32* %P2
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32* @test1(i32* %P, i32 %X) {
+; CHECK: test1:
+; CHECK-NOT: shrl
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = lshr i32 %X, 2
+  %gep.upgrd.1 = zext i32 %Y to i64
+  %P2 = getelementptr i32* %P, i64 %gep.upgrd.1
+  ret i32* %P2
 }
 
-define i32* @test2(i32* %P, i32 %X) nounwind {
-        %Y = shl i32 %X, 2              ; <i32> [#uses=1]
-        %gep.upgrd.2 = zext i32 %Y to i64               ; <i64> [#uses=1]
-        %P2 = getelementptr i32* %P, i64 %gep.upgrd.2           ; <i32*> [#uses=1]
-        ret i32* %P2
+define i32* @test2(i32* %P, i32 %X) {
+; CHECK: test2:
+; CHECK: shll $4
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = shl i32 %X, 2
+  %gep.upgrd.2 = zext i32 %Y to i64
+  %P2 = getelementptr i32* %P, i64 %gep.upgrd.2
+  ret i32* %P2
 }
 
-define i32* @test3(i32* %P, i32 %X) nounwind {
-        %Y = ashr i32 %X, 2             ; <i32> [#uses=1]
-        %P2 = getelementptr i32* %P, i32 %Y             ; <i32*> [#uses=1]
-        ret i32* %P2
+define i32* @test3(i32* %P, i32 %X) {
+; CHECK: test3:
+; CHECK-NOT: shrl
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = ashr i32 %X, 2
+  %P2 = getelementptr i32* %P, i32 %Y
+  ret i32* %P2
 }
 
-define fastcc i32 @test4(i32* %d) nounwind {
+define fastcc i32 @test4(i32* %d) {
+; CHECK: test4:
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
   %tmp4 = load i32* %d
   %tmp512 = lshr i32 %tmp4, 24
   ret i32 %tmp512
 }
+
+define i64 @test5(i16 %i, i32* %arr) {
+; Ensure that we don't fold away shifts which have multiple uses, as they are
+; just re-introduced for the second use.
+; CHECK: test5:
+; CHECK-NOT: shrl
+; CHECK: shrl $11
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %index.zext = zext i32 %index to i64
+  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+  %val = load i32* %val.ptr
+  %val.zext = zext i32 %val to i64
+  %sum = add i64 %val.zext, %index.zext
+  ret i64 %sum
+}
diff --git a/test/CodeGen/X86/shl-i64.ll b/test/CodeGen/X86/shl-i64.ll
new file mode 100644
index 0000000..f00058a
--- /dev/null
+++ b/test/CodeGen/X86/shl-i64.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 -mattr=+sse2 < %s | FileCheck %s
+
+; Make sure that we don't generate an illegal i64 extract after LegalizeType.
+; CHECK: shll
+
+
+define void @test_cl(<4 x i64>*  %dst, <4 x i64>* %src, i32 %idx) {
+entry:
+  %arrayidx = getelementptr inbounds <4 x i64> * %src, i32 %idx
+  %0 = load <4 x i64> * %arrayidx, align 32
+  %arrayidx1 = getelementptr inbounds <4 x i64> * %dst, i32 %idx
+  %1 = load <4 x i64> * %arrayidx1, align 32
+  %2 = extractelement <4 x i64> %1, i32 0
+  %and = and i64 %2, 63
+  %3 = insertelement <4 x i64> undef, i64 %and, i32 0    
+  %splat = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i64> %0, %splat
+  store <4 x i64> %shl, <4 x i64> * %arrayidx1, align 32
+  ret void
+}
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
index 9d74121..937817e 100644
--- a/test/CodeGen/X86/sibcall-5.ll
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin8 -mattr=+sse2  | FileCheck %s --check-prefix=X32
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-sse3 | FileCheck %s --check-prefix=X64_BAD
 
 ; Sibcall optimization of expanded libcalls.
 ; rdar://8707777
@@ -29,3 +30,31 @@ entry:
 declare float @sinf(float) nounwind readonly
 
 declare double @sin(double) nounwind readonly
+
+; rdar://10930395
+%0 = type opaque
+
+@"\01L_OBJC_SELECTOR_REFERENCES_2" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+define hidden { double, double } @foo2(%0* %self, i8* nocapture %_cmd) uwtable optsize ssp {
+; X64_BAD: foo
+; X64_BAD: call
+; X64_BAD: call
+; X64_BAD: call
+  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2", align 8, !invariant.load !0
+  %2 = bitcast %0* %self to i8*
+  %3 = tail call { double, double } bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to { double, double } (i8*, i8*)*)(i8* %2, i8* %1) optsize
+  %4 = extractvalue { double, double } %3, 0
+  %5 = extractvalue { double, double } %3, 1
+  %6 = tail call double @floor(double %4) optsize
+  %7 = tail call double @floor(double %5) optsize
+  %insert.i.i = insertvalue { double, double } undef, double %6, 0
+  %insert5.i.i = insertvalue { double, double } %insert.i.i, double %7, 1
+  ret { double, double } %insert5.i.i
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare double @floor(double) optsize
+
+!0 = metadata !{}
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
index 2b13029..81a072f 100644
--- a/test/CodeGen/X86/splat-scalar-load.ll
+++ b/test/CodeGen/X86/splat-scalar-load.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
 ; rdar://7434544
 
-define <2 x i64> @t2() nounwind ssp {
+define <2 x i64> @t2() nounwind {
 entry:
 ; CHECK: t2:
 ; CHECK: pshufd	$85, (%esp), %xmm0
diff --git a/test/CodeGen/X86/sret.ll b/test/CodeGen/X86/sret.ll
deleted file mode 100644
index b945530..0000000
--- a/test/CodeGen/X86/sret.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=x86 | grep ret | grep 4
-
-	%struct.foo = type { [4 x i32] }
-
-define void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
-entry:
-	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
-	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
-	store i32 1, i32* %tmp3, align 8
-        ret void
-}
-
-@dst = external global i32
-
-define void @foo() nounwind {
-	%memtmp = alloca %struct.foo, align 4
-        call void @bar( %struct.foo* sret %memtmp ) nounwind
-        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
-	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
-        %tmp6 = load i32* %tmp5
-        store i32 %tmp6, i32* @dst
-        ret void
-}
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
index 04f2161..b6b0471 100644
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
 ; CHECK-NOT:     movapd
 ; CHECK:     movaps
-; CHECK-NOT:     movaps
-; CHECK:     movapd
+; CHECK-NOT:     movapd
+; CHECK:     movaps
 ; CHECK-NOT:     movap
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
new file mode 100644
index 0000000..d1e07c8
--- /dev/null
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7"
+
+; CHECK: f
+;
+; This function contains load / store / and operations that all can execute in
+; any domain.  The only domain-specific operation is the %add = shl... operation
+; which is <4 x i32>.
+;
+; The paddd instruction can only influence the other operations through the loop
+; back-edge. Check that everything is still moved into the integer domain.
+
+define void @f(<4 x i32>* nocapture %p, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %while.body
+
+; Materialize a zeroinitializer and a constant-pool load in the integer domain.
+; The order is not important.
+; CHECK: pxor
+; CHECK: movdqa
+
+; The instructions in the loop must all be integer domain as well.
+; CHECK: while.body
+; CHECK: pand
+; CHECK: movdqa
+; CHECK: movdqa
+; Finally, the controlling integer-only instruction.
+; CHECK: paddd
+while.body:
+  %p.addr.04 = phi <4 x i32>* [ %incdec.ptr, %while.body ], [ %p, %entry ]
+  %n.addr.03 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+  %x.02 = phi <4 x i32> [ %add, %while.body ], [ zeroinitializer, %entry ]
+  %dec = add nsw i32 %n.addr.03, -1
+  %and = and <4 x i32> %x.02, <i32 127, i32 127, i32 127, i32 127>
+  %incdec.ptr = getelementptr inbounds <4 x i32>* %p.addr.04, i64 1
+  store <4 x i32> %and, <4 x i32>* %p.addr.04, align 16
+  %0 = load <4 x i32>* %incdec.ptr, align 16
+  %add = shl <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+; CHECK: f2
+; CHECK: for.body
+;
+; This loop contains two cvtsi2ss instructions that update the same xmm
+; register.  Verify that the execution dependency fix pass breaks those
+; dependencies by inserting xorps instructions.
+;
+; If the register allocator chooses different registers for the two cvtsi2ss
+; instructions, they are still dependent on themselves.
+; CHECK: xorps [[XMM1:%xmm[0-9]+]]
+; CHECK: , [[XMM1]]
+; CHECK: cvtsi2ss %{{.*}}, [[XMM1]]
+; CHECK: xorps [[XMM2:%xmm[0-9]+]]
+; CHECK: , [[XMM2]]
+; CHECK: cvtsi2ss %{{.*}}, [[XMM2]]
+;
+define float @f2(i32 %m) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i32 %m, 0
+  br i1 %tobool3, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %m.addr.07 = phi i32 [ %dec, %for.body ], [ %m, %entry ]
+  %s1.06 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ]
+  %s2.05 = phi float [ %add2, %for.body ], [ 0.000000e+00, %entry ]
+  %n.04 = phi i32 [ %inc, %for.body ], [ 1, %entry ]
+  %conv = sitofp i32 %n.04 to float
+  %add = fadd float %s1.06, %conv
+  %conv1 = sitofp i32 %m.addr.07 to float
+  %add2 = fadd float %s2.05, %conv1
+  %inc = add nsw i32 %n.04, 1
+  %dec = add nsw i32 %m.addr.07, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %s1.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %s2.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add2, %for.body ]
+  %sub = fsub float %s1.0.lcssa, %s2.0.lcssa
+  ret float %sub
+}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index af1a73b..1112440 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -140,15 +140,15 @@ define double @ole_inverse(double %x, double %y) nounwind {
 }
 
 ; CHECK:      x_ogt:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ogt:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ogt:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ogt(double %x) nounwind {
@@ -158,15 +158,15 @@ define double @x_ogt(double %x) nounwind {
 }
 
 ; CHECK:      x_olt:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_olt:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_olt:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_olt(double %x) nounwind {
@@ -176,17 +176,17 @@ define double @x_olt(double %x) nounwind {
 }
 
 ; CHECK:      x_ogt_inverse:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ogt_inverse:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ogt_inverse:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -197,17 +197,17 @@ define double @x_ogt_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_olt_inverse:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_olt_inverse:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_olt_inverse:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -220,11 +220,11 @@ define double @x_olt_inverse(double %x) nounwind {
 ; CHECK:      x_oge:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_oge:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_oge:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_oge(double %x) nounwind {
@@ -236,11 +236,11 @@ define double @x_oge(double %x) nounwind {
 ; CHECK:      x_ole:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ole:
-; UNSAFE-NEXT: pxor %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ole:
-; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ole(double %x) nounwind {
@@ -252,12 +252,12 @@ define double @x_ole(double %x) nounwind {
 ; CHECK:      x_oge_inverse:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_oge_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_oge_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -270,12 +270,12 @@ define double @x_oge_inverse(double %x) nounwind {
 ; CHECK:      x_ole_inverse:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ole_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ole_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -414,11 +414,11 @@ define double @ule_inverse(double %x, double %y) nounwind {
 ; CHECK:      x_ugt:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ugt:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ugt:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ugt(double %x) nounwind {
@@ -430,11 +430,11 @@ define double @x_ugt(double %x) nounwind {
 ; CHECK:      x_ult:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_ult:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ult:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ult(double %x) nounwind {
@@ -446,12 +446,12 @@ define double @x_ult(double %x) nounwind {
 ; CHECK:      x_ugt_inverse:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ugt_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ugt_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -464,12 +464,12 @@ define double @x_ugt_inverse(double %x) nounwind {
 ; CHECK:      x_ult_inverse:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_ult_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ult_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -480,16 +480,16 @@ define double @x_ult_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_uge:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_uge:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_uge:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_uge(double %x) nounwind {
@@ -499,16 +499,16 @@ define double @x_uge(double %x) nounwind {
 }
 
 ; CHECK:      x_ule:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ule:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ule:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ule(double %x) nounwind {
@@ -518,16 +518,16 @@ define double @x_ule(double %x) nounwind {
 }
 
 ; CHECK:      x_uge_inverse:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_uge_inverse:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_uge_inverse:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -538,16 +538,16 @@ define double @x_uge_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_ule_inverse:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ule_inverse:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ule_inverse:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 56b099e..2f4317b 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -promote-elements -mattr=+sse2,-sse41 | FileCheck %s
-
-
-; currently (xor v4i32) is defined as illegal, so we scalarize the code.
+; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
 
+; CHECK: vsel_float
+; CHECK: pandn
+; CHECK: pand
+; CHECK: por
+; CHECK: ret
 define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
   %A = load <4 x float>* %v1
   %B = load <4 x float>* %v2
@@ -11,8 +13,11 @@ define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
   ret void
 }
 
-; currently (xor v4i32) is defined as illegal, so we scalarize the code.
-
+; CHECK: vsel_i32
+; CHECK: pandn
+; CHECK: pand
+; CHECK: por
+; CHECK: ret
 define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
   %A = load <4 x i32>* %v1
   %B = load <4 x i32>* %v2
@@ -21,9 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
   ret void
 }
 
+; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_i64
-; CHECK: pxor
-; CHECK: pand
+; CHECK: xorps
+; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
@@ -36,14 +42,14 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
   ret void
 }
 
+; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_double
-; CHECK: pxor
-; CHECK: pand
+; CHECK: xorps
+; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
 
-
 define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
   %A = load <4 x double>* %v1
   %B = load <4 x double>* %v2
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 70e0a8a..36a0fd9 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -98,7 +98,7 @@ define void @test7() nounwind {
         ret void
         
 ; CHECK: test7:
-; CHECK:	pxor	%xmm0, %xmm0
+; CHECK:	xorps	%xmm0, %xmm0
 ; CHECK:	movaps	%xmm0, 0
 }
 
@@ -144,7 +144,7 @@ define <2 x double> @test11(double %a, double %b) nounwind {
 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp7
 ; CHECK: test11:
-; CHECK: movapd	4(%esp), %xmm0
+; CHECK: movaps	4(%esp), %xmm0
 }
 
 define void @test12() nounwind {
@@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp27
 ; CHECK: test14:
-; CHECK: 	addps	[[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
-; CHECK: 	subps	[[X1]], [[X2:%xmm[0-9]+]]
+; CHECK: 	subps	[[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
+; CHECK: 	addps	[[X1]], [[X0:%xmm[0-9]+]]
 ; CHECK: 	movlhps	[[X2]], [[X0]]
 }
 
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 8b3a317..5ea1b4d 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -16,10 +16,8 @@ entry:
 	ret void
         
 ; X64: t0:
-; X64: 	movddup	(%rsi), %xmm0
-; X64:  pshuflw	$0, %xmm0, %xmm0
-; X64:	xorl	%eax, %eax
-; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	movdqa	(%rsi), %xmm0
+; X64:	pslldq	$2, %xmm0
 ; X64:	movdqa	%xmm0, (%rdi)
 ; X64:	ret
 }
@@ -31,9 +29,8 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 	ret <8 x i16> %tmp3
         
 ; X64: t1:
-; X64: 	movl	(%rsi), %eax
 ; X64: 	movdqa	(%rdi), %xmm0
-; X64: 	pinsrw	$0, %eax, %xmm0
+; X64: 	pinsrw	$0, (%rsi), %xmm0
 ; X64: 	ret
 }
 
@@ -167,12 +164,12 @@ define internal void @t10() nounwind {
         store <4 x i16> %6, <4 x i16>* @g2, align 8
         ret void
 ; X64: 	t10:
-; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %eax
-; X64: 		unpcklpd [[X1:%xmm[0-9]+]]
-; X64: 		pshuflw	$8, [[X1]], [[X2:%xmm[0-9]+]]
-; X64: 		pinsrw	$2, %eax, [[X2]]
+; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %ecx
 ; X64: 		pextrw	$6, [[X0]], %eax
-; X64: 		pinsrw	$3, %eax, [[X2]]
+; X64: 		movlhps [[X0]], [[X0]]
+; X64: 		pshuflw	$8, [[X0]], [[X0]]
+; X64: 		pinsrw	$2, %ecx, [[X0]]
+; X64: 		pinsrw	$3, %eax, [[X0]]
 }
 
 
@@ -229,7 +226,7 @@ entry:
 }
 
 
-
+; FIXME: t15 is worse off from disabling of scheduler 2-address hack.
 define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 entry:
         %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
@@ -250,13 +247,11 @@ entry:
         %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
         ret <16 x i8> %tmp9
 ; X64: 	t16:
-; X64: 		pinsrw	$0, %eax, [[X1:%xmm[0-9]+]]
-; X64: 		pextrw	$8, [[X0:%xmm[0-9]+]], %eax
-; X64: 		pinsrw	$1, %eax, [[X1]]
-; X64: 		pextrw	$1, [[X1]], %ecx
-; X64: 		movd	[[X1]], %edx
-; X64: 		pinsrw	$0, %edx, %xmm
-; X64: 		pinsrw	$1, %eax, %xmm
+; X64: 		pextrw	$8, %xmm0, %eax
+; X64: 		pslldq	$2, %xmm0
+; X64: 		movd	%xmm0, %ecx
+; X64: 		pextrw	$1, %xmm0, %edx
+; X64: 		pinsrw	$0, %ecx, %xmm0
 ; X64: 		ret
 }
 
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 2ac4cb4..54264b1 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -183,8 +183,8 @@ define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    insertps  $0, %xmm1, %xmm0        
 }
 
-define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_1:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -195,8 +195,8 @@ define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    sete	%al
 }
 
-define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_2:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -207,8 +207,8 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    sbbl	%eax
 }
 
-define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_3:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -220,9 +220,9 @@ define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
 }
 
 
-declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
-declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
-declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
 
 ; This used to compile to insertps $0  + insertps $16.  insertps $0 is always
 ; pointless.
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 793c026..f6c13ec 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -11,13 +11,13 @@ define void @test({ double, double }* byval  %z, double* %P) nounwind {
 entry:
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
 	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
-        volatile store double %tmp4, double* %P
+        store volatile double %tmp4, double* %P
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp1 = volatile load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp1 = load volatile double* %tmp, align 8		; <double> [#uses=1]
 	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
     ; CHECK: andpd{{.*}}4(%esp), %xmm
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
-	volatile store double %tmp6, double* %P, align 8
+	store volatile double %tmp6, double* %P, align 8
 	ret void
 }
 
diff --git a/test/CodeGen/X86/stack-align2.ll b/test/CodeGen/X86/stack-align2.ll
new file mode 100644
index 0000000..18cce72
--- /dev/null
+++ b/test/CodeGen/X86/stack-align2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i386-linux | FileCheck %s -check-prefix=LINUX-I386
+; RUN: llc < %s -mcpu=generic -mtriple=i386-netbsd | FileCheck %s -check-prefix=NETBSD-I386
+; RUN: llc < %s -mcpu=generic -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-I386
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX-X86_64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-netbsd | FileCheck %s -check-prefix=NETBSD-X86_64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-X86_64
+
+define i32 @test() nounwind {
+entry:
+  call void @test2()
+  ret i32 0
+
+; LINUX-I386:     subl	$12, %esp
+; DARWIN-I386:    subl	$12, %esp
+; NETBSD-I386-NOT: subl	{{.*}}, %esp
+
+; LINUX-X86_64:      pushq %{{.*}}
+; LINUX-X86_64-NOT:  subq	{{.*}}, %rsp
+; DARWIN-X86_64:     pushq %{{.*}}
+; DARWIN-X86_64-NOT: subq	{{.*}}, %rsp
+; NETBSD-X86_64:     pushq %{{.*}}
+; NETBSD-X86_64-NOT: subq	{{.*}}, %rsp
+}
+
+declare void @test2()
diff --git a/test/CodeGen/X86/store-empty-member.ll b/test/CodeGen/X86/store-empty-member.ll
index 37f86c6..aea85b9 100644
--- a/test/CodeGen/X86/store-empty-member.ll
+++ b/test/CodeGen/X86/store-empty-member.ll
@@ -9,6 +9,6 @@
 
 define void @foo() nounwind {
   %1 = alloca %testType
-  volatile store %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
+  store volatile %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
   ret void
 }
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 1168622..8313166 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL
 
 target datalayout = "e-p:32:32"
         %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
@@ -16,9 +17,14 @@ cond_true2732.preheader:                ; preds = %entry
         store i64 %tmp2676.us.us, i64* %tmp2666
         ret i32 0
 
-; CHECK: 	and	{{E..}}, DWORD PTR [360]
-; CHECK:	and	DWORD PTR [356], {{E..}}
-; CHECK:	mov	DWORD PTR [360], {{E..}}
+; INTEL: 	and	{{E..}}, DWORD PTR [360]
+; INTEL:	and	DWORD PTR [356], {{E..}}
+; FIXME:	mov	DWORD PTR [360], {{E..}}
+; The above line comes out as 'mov 360, EAX', but when the register is ECX it works?
+
+; ATT: 	andl	360, %{{e..}}
+; ATT:	andl	%{{e..}}, 356
+; ATT:	movl	%{{e..}}, 360
 
 }
 
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 1251a24..81de22c 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86            | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86            | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 ; CHECK-NOT:     lea
 
 @B = external global [1000 x float], align 32
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 4522e91..749b5db 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/tail-dup-addr.ll b/test/CodeGen/X86/tail-dup-addr.ll
index c5a105c..c68a8c6 100644
--- a/test/CodeGen/X86/tail-dup-addr.ll
+++ b/test/CodeGen/X86/tail-dup-addr.ll
@@ -2,8 +2,8 @@
 
 ; Test that we don't drop a block that has its address taken.
 
+; CHECK: Ltmp0:                                  ## Block address taken
 ; CHECK: Ltmp1:                                  ## Block address taken
-; CHECK: Ltmp2:                                  ## Block address taken
 
 @a = common global i32 0, align 4
 @p = common global i8* null, align 8
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index d6c16ca..f1b9f20 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -314,7 +314,7 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
   unreachable
 
 bbx:
@@ -323,7 +323,7 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
   unreachable
 
 return:
@@ -352,8 +352,8 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 bbx:
@@ -362,8 +362,8 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 return:
@@ -390,8 +390,8 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 bbx:
@@ -400,8 +400,8 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 return:
diff --git a/test/CodeGen/X86/tailcall-disable.ll b/test/CodeGen/X86/tailcall-disable.ll
new file mode 100644
index 0000000..b628f5e
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-disable.ll
@@ -0,0 +1,40 @@
+; RUN: llc -disable-tail-calls < %s | FileCheck --check-prefix=CALL %s
+; RUN: llc < %s | FileCheck --check-prefix=JMP %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @helper() nounwind {
+entry:
+  ret i32 7
+}
+
+define i32 @test1() nounwind {
+entry:
+  %call = tail call i32 @helper()
+  ret i32 %call
+}
+
+; CALL: test1:
+; CALL-NOT: ret
+; CALL: callq helper
+; CALL: ret
+
+; JMP: test1:
+; JMP-NOT: ret
+; JMP: jmp helper # TAILCALL
+
+define i32 @test2() nounwind {
+entry:
+  %call = tail call i32 @test2()
+  ret i32 %call
+}
+
+; CALL: test2:
+; CALL-NOT: ret
+; CALL: callq test2
+; CALL: ret
+
+; JMP: test2:
+; JMP-NOT: ret
+; JMP: jmp test2 # TAILCALL
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7ecf379..7621602 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
 
 ; FIXME: Win64 does not support byval.
 
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index c18c7aa..bff5f99 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
-; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
 
 ; FIXME: Redundant unused stack allocation could be eliminated.
 ; CHECK: subq  ${{24|72|80}}, %rsp
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
new file mode 100644
index 0000000..a7be483
--- /dev/null
+++ b/test/CodeGen/X86/thiscall-struct-return.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=i386-PC-Win32 | FileCheck %s
+
+%class.C = type { i8 }
+%struct.S = type { i32 }
+%struct.M = type { i32, i32 }
+
+declare void @_ZN1CC1Ev(%class.C* %this) unnamed_addr nounwind align 2
+declare x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* noalias sret %agg.result, %class.C* %this) nounwind align 2
+declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result, %class.C* %this) nounwind align 2
+
+define void @testv() nounwind {
+; CHECK: testv:
+; CHECK: leal
+; CHECK-NEXT: movl	%esi, (%esp)
+; CHECK-NEXT: calll _ZN1CC1Ev
+; CHECK: leal 8(%esp), %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: calll _ZNK1C5SmallEv
+entry:
+  %c = alloca %class.C, align 1
+  %tmp = alloca %struct.S, align 4
+  call void @_ZN1CC1Ev(%class.C* %c)
+  ; This call should put the return structure as a pointer
+  ; into EAX instead of returning directly in EAX.  The this
+  ; pointer should go into ECX
+  call x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* sret %tmp, %class.C* %c)
+  ret void
+}
+
+define void @test2v() nounwind {
+; CHECK: test2v:
+; CHECK: leal
+; CHECK-NEXT: movl	%esi, (%esp)
+; CHECK-NEXT: calll _ZN1CC1Ev
+; CHECK: leal 8(%esp), %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: calll _ZNK1C6MediumEv
+entry:
+  %c = alloca %class.C, align 1
+  %tmp = alloca %struct.M, align 4
+  call void @_ZN1CC1Ev(%class.C* %c)
+  ; This call should put the return structure as a pointer
+  ; into EAX instead of returning directly in EAX/EDX.  The this
+  ; pointer should go into ECX
+  call x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* sret %tmp, %class.C* %c)
+  ret void
+}
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
new file mode 100644
index 0000000..e2e58a54
--- /dev/null
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X64 %s
+
+@i = thread_local global i32 15
+@i2 = external thread_local global i32
+
+define i32 @f1() {
+; X32: f1:
+; X32:      movl %gs:i@NTPOFF, %eax
+; X32-NEXT: ret
+; X64: f1:
+; X64:      movl %fs:i@TPOFF, %eax
+; X64-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32: f2:
+; X32:      movl %gs:0, %eax
+; X32-NEXT: leal i@NTPOFF(%eax), %eax
+; X32-NEXT: ret
+; X64: f2:
+; X64:      movq %fs:0, %rax
+; X64-NEXT: leaq i@TPOFF(%rax), %rax
+; X64-NEXT: ret
+
+entry:
+	ret i32* @i
+}
+
+define i32 @f3() {
+; X32: f3:
+; X32:      movl i2@INDNTPOFF, %eax
+; X32-NEXT: movl %gs:(%eax), %eax
+; X32-NEXT: ret
+; X64: f3:
+; X64:      movq i2@GOTTPOFF(%rip), %rax
+; X64-NEXT: movl %fs:(%rax), %eax
+; X64-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i2
+	ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32: f4:
+; X32:      movl %gs:0, %eax
+; X32-NEXT: addl i2@INDNTPOFF, %eax
+; X32-NEXT: ret
+; X64: f4:
+; X64:      movq %fs:0, %rax
+; X64-NEXT: addq i2@GOTTPOFF(%rip), %rax
+; X64-NEXT: ret
+
+entry:
+	ret i32* @i2
+}
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
new file mode 100644
index 0000000..e8a79bf
--- /dev/null
+++ b/test/CodeGen/X86/tls.ll
@@ -0,0 +1,329 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
+
+@i1 = thread_local global i32 15
+@i2 = external thread_local global i32
+@i3 = internal thread_local global i32 15
+@i4 = hidden thread_local global i32 15
+@i5 = external hidden thread_local global i32
+@s1 = thread_local global i16 15
+@b1 = thread_local global i8 0
+
+define i32 @f1() {
+; X32_LINUX: f1:
+; X32_LINUX:      movl %gs:i1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f1:
+; X64_LINUX:      movl %fs:i1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f1:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f1:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i1
+	ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32_LINUX: f2:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f2:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f2:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f2:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i1@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i1
+}
+
+define i32 @f3() nounwind {
+; X32_LINUX: f3:
+; X32_LINUX:      movl i2@INDNTPOFF, %eax
+; X32_LINUX-NEXT: movl %gs:(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f3:
+; X64_LINUX:      movq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: movl %fs:(%rax), %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f3:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f3:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i2@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i2
+	ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32_LINUX: f4:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: addl i2@INDNTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f4:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f4:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f4:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i2@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i2
+}
+
+define i32 @f5() nounwind {
+; X32_LINUX: f5:
+; X32_LINUX:      movl %gs:i3@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f5:
+; X64_LINUX:      movl %fs:i3@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f5:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f5:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i3@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i3
+	ret i32 %tmp1
+}
+
+define i32* @f6() {
+; X32_LINUX: f6:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f6:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f6:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f6:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i3@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i3
+}
+
+define i32 @f7() {
+; X32_LINUX: f7:
+; X32_LINUX:      movl %gs:i4@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f7:
+; X64_LINUX:      movl %fs:i4@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i4
+	ret i32 %tmp1
+}
+
+define i32* @f8() {
+; X32_LINUX: f8:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f8:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+
+entry:
+	ret i32* @i4
+}
+
+define i32 @f9() {
+; X32_LINUX: f9:
+; X32_LINUX:      movl %gs:i5@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f9:
+; X64_LINUX:      movl %fs:i5@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i5
+	ret i32 %tmp1
+}
+
+define i32* @f10() {
+; X32_LINUX: f10:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f10:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+
+entry:
+	ret i32* @i5
+}
+
+define i16 @f11() {
+; X32_LINUX: f11:
+; X32_LINUX:      movzwl %gs:s1@NTPOFF, %eax
+; Why is this kill line here, but no where else?
+; X32_LINUX-NEXT: # kill
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f11:
+; X64_LINUX:      movzwl %fs:s1@TPOFF, %eax
+; X64_LINUX-NEXT: # kill
+; X64_LINUX-NEXT: ret
+; X32_WIN: f11:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movzwl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: # kill
+; X32_WIN-NEXT: ret
+; X64_WIN: f11:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movzwl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: # kill
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i16* @s1
+	ret i16 %tmp1
+}
+
+define i32 @f12() {
+; X32_LINUX: f12:
+; X32_LINUX:      movswl %gs:s1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f12:
+; X64_LINUX:      movswl %fs:s1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f12:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movswl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f12:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movswl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i16* @s1
+  %tmp2 = sext i16 %tmp1 to i32
+	ret i32 %tmp2
+}
+
+define i8 @f13() {
+; X32_LINUX: f13:
+; X32_LINUX:      movb %gs:b1@NTPOFF, %al
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f13:
+; X64_LINUX:      movb %fs:b1@TPOFF, %al
+; X64_LINUX-NEXT: ret
+; X32_WIN: f13:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movb _b1@SECREL(%eax), %al
+; X32_WIN-NEXT: ret
+; X64_WIN: f13:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movb b1@SECREL(%rax), %al
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i8* @b1
+	ret i8 %tmp1
+}
+
+define i32 @f14() {
+; X32_LINUX: f14:
+; X32_LINUX:      movsbl %gs:b1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f14:
+; X64_LINUX:      movsbl %fs:b1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f14:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movsbl _b1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f14:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movsbl b1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i8* @b1
+  %tmp2 = sext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
+
diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
deleted file mode 100644
index 0cae5c4..0000000
--- a/test/CodeGen/X86/tls1.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = thread_local global i32 15
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls10.ll b/test/CodeGen/X86/tls10.ll
deleted file mode 100644
index fb61596..0000000
--- a/test/CodeGen/X86/tls10.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = external hidden thread_local global i32
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
deleted file mode 100644
index 514a168..0000000
--- a/test/CodeGen/X86/tls11.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movzwl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movzwl	%fs:i@TPOFF, %eax} %t2
-
-@i = thread_local global i16 15
-
-define i16 @f() {
-entry:
-	%tmp1 = load i16* @i
-	ret i16 %tmp1
-}
diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
deleted file mode 100644
index c29f6ad..0000000
--- a/test/CodeGen/X86/tls12.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
-
-@i = thread_local global i8 15
-
-define i8 @f() {
-entry:
-	%tmp1 = load i8* @i
-	ret i8 %tmp1
-}
diff --git a/test/CodeGen/X86/tls13.ll b/test/CodeGen/X86/tls13.ll
deleted file mode 100644
index 08778ec..0000000
--- a/test/CodeGen/X86/tls13.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
-; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
-; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
-
-@i = thread_local global i16 0
-@j = thread_local global i16 0
-
-define void @f() nounwind optsize {
-entry:
-        %0 = load i16* @i, align 2
-        %1 = sext i16 %0 to i32
-        tail call void @g(i32 %1) nounwind
-        %2 = load i16* @j, align 2
-        %3 = zext i16 %2 to i32
-        tail call void @h(i32 %3) nounwind
-        ret void
-}
-
-declare void @g(i32)
-
-declare void @h(i32)
diff --git a/test/CodeGen/X86/tls14.ll b/test/CodeGen/X86/tls14.ll
deleted file mode 100644
index 88426dd..0000000
--- a/test/CodeGen/X86/tls14.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
-; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
-; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
-
-@i = thread_local global i8 0
-@j = thread_local global i8 0
-
-define void @f() nounwind optsize {
-entry:
-        %0 = load i8* @i, align 2
-        %1 = sext i8 %0 to i32
-        tail call void @g(i32 %1) nounwind
-        %2 = load i8* @j, align 2
-        %3 = zext i8 %2 to i32
-        tail call void @h(i32 %3) nounwind
-        ret void
-}
-
-declare void @g(i32)
-
-declare void @h(i32)
diff --git a/test/CodeGen/X86/tls15.ll b/test/CodeGen/X86/tls15.ll
deleted file mode 100644
index 7abf070..0000000
--- a/test/CodeGen/X86/tls15.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t | count 1
-; RUN: grep {leal	i@NTPOFF(%eax), %ecx} %t
-; RUN: grep {leal	j@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2 | count 1
-; RUN: grep {leaq	i@TPOFF(%rax), %rcx} %t2
-; RUN: grep {leaq	j@TPOFF(%rax), %rax} %t2
-
-@i = thread_local global i32 0
-@j = thread_local global i32 0
-
-define void @f(i32** %a, i32** %b) {
-entry:
-	store i32* @i, i32** %a, align 8
-	store i32* @j, i32** %b, align 8
-	ret void
-}
diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
deleted file mode 100644
index 5a94296..0000000
--- a/test/CodeGen/X86/tls2.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
deleted file mode 100644
index 7327cc4..0000000
--- a/test/CodeGen/X86/tls3.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	i@INDNTPOFF, %eax} %t
-; RUN: grep {movl	%gs:(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
-; RUN: grep {movl	%fs:(%rax), %eax} %t2
-
-@i = external thread_local global i32		; <i32*> [#uses=2]
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls4.ll b/test/CodeGen/X86/tls4.ll
deleted file mode 100644
index d2e40e3..0000000
--- a/test/CodeGen/X86/tls4.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {addl	i@INDNTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
-
-@i = external thread_local global i32		; <i32*> [#uses=2]
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls5.ll b/test/CodeGen/X86/tls5.ll
deleted file mode 100644
index 4d2cc02..0000000
--- a/test/CodeGen/X86/tls5.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = internal thread_local global i32 15
-
-define i32 @f() {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
deleted file mode 100644
index 505106e..0000000
--- a/test/CodeGen/X86/tls6.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = internal thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls7.ll b/test/CodeGen/X86/tls7.ll
deleted file mode 100644
index e9116e7..0000000
--- a/test/CodeGen/X86/tls7.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = hidden thread_local global i32 15
-
-define i32 @f() {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls8.ll b/test/CodeGen/X86/tls8.ll
deleted file mode 100644
index 375af94..0000000
--- a/test/CodeGen/X86/tls8.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = hidden thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
deleted file mode 100644
index 7d08df8..0000000
--- a/test/CodeGen/X86/tls9.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = external hidden thread_local global i32
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index a1d797f..9d58019 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -5,7 +5,7 @@
 ;; allocator turns the shift into an LEA.  This also occurs for ADD.
 
 ; Check that the shift gets turned into an LEA.
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin | FileCheck %s
 
 @G = external global i32
 
@@ -14,7 +14,7 @@ define i32 @test1(i32 %X) nounwind {
 ; CHECK-NOT: mov
 ; CHECK: leal 1(%rdi)
         %Z = add i32 %X, 1
-        volatile store i32 %Z, i32* @G
+        store volatile i32 %Z, i32* @G
         ret i32 %X
 }
 
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
index 1dbbdcf..e853e77 100644
--- a/test/CodeGen/X86/uint64-to-float.ll
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s
 ; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
 ; by the compiler_rt implementation of __floatundisf.
 ; <rdar://problem/8493982>
@@ -6,37 +6,12 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
-; FIXME: This test could generate this code:
-;
-; ## BB#0:                                ## %entry
-; 	testq	%rdi, %rdi
-; 	jns	LBB0_2
-; ## BB#1:
-; 	movq	%rdi, %rax
-; 	shrq	%rax
-; 	andq	$1, %rdi
-; 	orq	%rax, %rdi
-; 	cvtsi2ssq	%rdi, %xmm0
-; 	addss	%xmm0, %xmm0
-; 	ret
-; LBB0_2:                                 ## %entry
-; 	cvtsi2ssq	%rdi, %xmm0
-; 	ret
-;
-; The blocks come from lowering:
-;
-;   %vreg7<def> = CMOV_FR32 %vreg6<kill>, %vreg5<kill>, 15, %EFLAGS<imp-use>; FR32:%vreg7,%vreg6,%vreg5
-;
-; If the instruction had an EFLAGS<kill> flag, it wouldn't need to mark EFLAGS
-; as live-in on the new blocks, and machine sinking would be able to sink
-; everything below the test.
-
-; CHECK: shrq
-; CHECK: andq
-; CHECK-NEXT: orq
 ; CHECK: testq %rdi, %rdi
 ; CHECK-NEXT: jns LBB0_2
-; CHECK: cvtsi2ss
+; CHECK: shrq
+; CHECK-NEXT: andq
+; CHECK-NEXT: orq
+; CHECK-NEXT: cvtsi2ss
 ; CHECK: LBB0_2
 ; CHECK-NEXT: cvtsi2ss
 define float @test(i64 %a) {
diff --git a/test/CodeGen/X86/unreachable-stack-protector.ll b/test/CodeGen/X86/unreachable-stack-protector.ll
index eeebcee..b066297 100644
--- a/test/CodeGen/X86/unreachable-stack-protector.ll
+++ b/test/CodeGen/X86/unreachable-stack-protector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -disable-cgp-delete-dead-blocks | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/CodeGen/X86/utf16-cfstrings.ll b/test/CodeGen/X86/utf16-cfstrings.ll
new file mode 100644
index 0000000..af76a33
--- /dev/null
+++ b/test/CodeGen/X86/utf16-cfstrings.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10 | FileCheck %s
+; <rdar://problem/10655949>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i64 }
+
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str = internal unnamed_addr constant [5 x i16] [i16 252, i16 98, i16 101, i16 114, i16 0], align 2
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 2000, i8* bitcast ([5 x i16]* @.str to i8*), i64 4 }, section "__DATA,__cfstring"
+
+; CHECK:         .section      __TEXT,__ustring
+; CHECK-NEXT:    .align        1
+; CHECK-NEXT: _.str:
+; CHECK-NEXT:    .short  252     ## 0xfc
+; CHECK-NEXT:    .short  98      ## 0x62
+; CHECK-NEXT:    .short  101     ## 0x65
+; CHECK-NEXT:    .short  114     ## 0x72
+; CHECK-NEXT:    .short  0       ## 0x0
+
+define i32 @main() uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void (%0*, ...)* @NSLog(%0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to %0*))
+  ret i32 0
+}
+
+declare void @NSLog(%0*, ...)
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/X86/utf8.ll b/test/CodeGen/X86/utf8.ll
new file mode 100644
index 0000000..67bc5ae2
--- /dev/null
+++ b/test/CodeGen/X86/utf8.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK: iΔ
+@"i\CE\94" = common global i32 0, align 4
diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll
index 3bee700..8655c6c 100644
--- a/test/CodeGen/X86/v-binop-widen.ll
+++ b/test/CodeGen/X86/v-binop-widen.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
 ; CHECK: divss
 ; CHECK: divps
 ; CHECK: divps
diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
index b3efc7b..f2fc7e7 100644
--- a/test/CodeGen/X86/vec_call.ll
+++ b/test/CodeGen/X86/vec_call.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {subl.*60}
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movaps.*32}
 
 
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 04bb725..91777f7 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -mcpu=penryn | FileCheck %s
 
 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
@@ -8,9 +8,12 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
 
 define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
 entry:
+; CHECK: cfi_def_cfa_offset
 ; CHECK-NOT: set
-; CHECK: pcmpgt
-; CHECK: blendvps
+; CHECK: movzwl
+; CHECK: movzwl
+; CHECK: pshufd
+; CHECK: pshufb
   %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
   %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
   %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index f0158d6..bddd535 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,15 +1,15 @@
 ; RUN: llc < %s -march=x86-64
 
-declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
-declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
 
 define <2 x i64> @footz(<2 x i64> %a) nounwind {
-  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a)
+  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
 }
 define <2 x i64> @foolz(<2 x i64> %a) nounwind {
-  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a)
+  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
 }
 define <2 x i64> @foopop(<2 x i64> %a) nounwind {
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index f487654..42d7f27 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -2,7 +2,7 @@
 ; RUN: not grep extractps   %t
 ; RUN: not grep pextrd      %t
 ; RUN: not grep pshufd  %t
-; RUN: grep movss   %t | count 2
+; RUN: not grep movss   %t
 
 define void @t1(float* %R, <4 x float>* %P1) nounwind {
 	%X = load <4 x float>* %P1
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
new file mode 100644
index 0000000..05b263e
--- /dev/null
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+
+; PR11674
+define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
+entry:
+; TODO: We should be able to generate cvtps2pd for the load.
+; For now, just check that we generate something sane.
+; CHECK: cvtss2sd
+; CHECK: cvtss2sd
+  %0 = load <2 x float>* %in, align 8
+  %1 = fpext <2 x float> %0 to <2 x double>
+  store <2 x double> %1, <2 x double>* %out, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_return.ll b/test/CodeGen/X86/vec_return.ll
index 676be9b..2cf5dc6 100644
--- a/test/CodeGen/X86/vec_return.ll
+++ b/test/CodeGen/X86/vec_return.ll
@@ -1,12 +1,16 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep pxor %t | count 1
-; RUN: grep movaps %t | count 1
-; RUN: not grep shuf %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
+; Without any typed operations, always use the smaller xorps.
+; CHECK: test
+; CHECK: xorps
 define <2 x double> @test() {
 	ret <2 x double> zeroinitializer
 }
 
+; Prefer a constant pool load here.
+; CHECK: test2
+; CHECK-NOT: shuf
+; CHECK: movaps {{.*}}CPI
 define <4 x i32> @test2() nounwind  {
 	ret <4 x i32> < i32 0, i32 0, i32 1, i32 0 >
 }
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index fc06b95..b6b8ba6 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-23.ll b/test/CodeGen/X86/vec_shuffle-23.ll
index 05a3a1e..2468735 100644
--- a/test/CodeGen/X86/vec_shuffle-23.ll
+++ b/test/CodeGen/X86/vec_shuffle-23.ll
@@ -5,7 +5,7 @@ define i32 @t() nounwind {
 entry:
 	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
 	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
-	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	store volatile <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
 	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp, <4 x i32>* %b
 	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
index 1b104de..d038daf 100644
--- a/test/CodeGen/X86/vec_shuffle-24.ll
+++ b/test/CodeGen/X86/vec_shuffle-24.ll
@@ -5,7 +5,7 @@ entry:
 ; CHECK: punpckldq
 	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
 	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
-	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	store volatile <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
 	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp, <4 x i32>* %b
 	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index 950040a..430aa04 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -4,10 +4,10 @@
 
 define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
 entry:
-; CHECK: movaps  ({{%rdi|%rcx}}), %xmm0
-; CHECK: movaps  %xmm0, %xmm1
-; CHECK-NEXT: movss   %xmm2, %xmm1
-; CHECK-NEXT: shufps  $36, %xmm1, %xmm0
+; CHECK: movaps  ({{%rdi|%rcx}}), %[[XMM0:xmm[0-9]+]]
+; CHECK: movaps  %[[XMM0]], %[[XMM1:xmm[0-9]+]]
+; CHECK-NEXT: movss   %xmm{{[0-9]+}}, %[[XMM1]]
+; CHECK-NEXT: shufps  $36, %[[XMM1]], %[[XMM0]]
   %0 = load <4 x i32>* undef, align 16
   %1 = load <4 x i32>* %a0, align 16
   %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
@@ -26,10 +26,12 @@ entry:
 
 define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
 entry:
-; CHECK: movaps  32({{%rdi|%rcx}}), %xmm0
-; CHECK-NEXT: movaps  48({{%rdi|%rcx}}), %xmm1
-; CHECK-NEXT: movss   %xmm1, %xmm0
-; CHECK-NEXT: movq    %xmm0, ({{%rsi|%rdx}}) 
+; CHECK: t02
+; CHECK: movaps
+; CHECK: shufps
+; CHECK: pshufd
+; CHECK: movq
+; CHECK: ret
   %0 = bitcast <8 x i32>* %source to <4 x i32>*
   %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
   %tmp2 = load <4 x i32>* %arrayidx, align 16
diff --git a/test/CodeGen/X86/vec_shuffle-38.ll b/test/CodeGen/X86/vec_shuffle-38.ll
index 69a2ede..96ef883 100644
--- a/test/CodeGen/X86/vec_shuffle-38.ll
+++ b/test/CodeGen/X86/vec_shuffle-38.ll
@@ -46,10 +46,9 @@ entry:
 
 ; rdar://10119696
 ; CHECK: f
-define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp {
+define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind readonly ssp {
 entry:
-  ; CHECK: movsd  (%
-  ; CHECK-NEXT: movsd  %xmm
+  ; CHECK: movlps  (%{{rdi|rdx}}), %xmm0
   %u110.i = load double* %y, align 1
   %tmp8.i = insertelement <2 x double> undef, double %u110.i, i32 0
   %tmp9.i = bitcast <2 x double> %tmp8.i to <4 x float>
@@ -57,3 +56,22 @@ entry:
   ret <4 x float> %shuffle.i
 }
 
+define <4 x float> @loadhpi2(%struct.Float2* nocapture %vHiCoefPtr_0, %struct.Float2* nocapture %vLoCoefPtr_0, i32 %s) nounwind readonly ssp {
+entry:
+; CHECK: loadhpi2
+; CHECK: movhps (
+; CHECK-NOT: movlhps
+  %0 = bitcast %struct.Float2* %vHiCoefPtr_0 to <1 x i64>*
+  %idx.ext = sext i32 %s to i64
+  %add.ptr = getelementptr inbounds <1 x i64>* %0, i64 %idx.ext
+  %add.ptr.val = load <1 x i64>* %add.ptr, align 1
+  %1 = bitcast <1 x i64> %add.ptr.val to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = bitcast %struct.Float2* %vLoCoefPtr_0 to <1 x i64>*
+  %add.ptr2 = getelementptr inbounds <1 x i64>* %2, i64 %idx.ext
+  %add.ptr2.val = load <1 x i64>* %add.ptr2, align 1
+  %3 = bitcast <1 x i64> %add.ptr2.val to <2 x float>
+  %shuffle.i4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle1.i5 = shufflevector <4 x float> %shuffle.i, <4 x float> %shuffle.i4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x float> %shuffle1.i5
+}
diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll
new file mode 100644
index 0000000..55531e3
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-39.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; rdar://10050222, rdar://10134392
+
+define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
+entry:
+; CHECK: t1:
+; CHECK: movlps (%rdi), %xmm0
+; CHECK: ret
+  %p.val = load <1 x i64>* %p, align 1
+  %0 = bitcast <1 x i64> %p.val to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle1.i = shufflevector <4 x float> %a, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x float> %shuffle1.i
+}
+
+define <4 x float> @t1a(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
+entry:
+; CHECK: t1a:
+; CHECK: movlps (%rdi), %xmm0
+; CHECK: ret
+  %0 = bitcast <1 x i64>* %p to double*
+  %1 = load double* %0
+  %2 = insertelement <2 x double> undef, double %1, i32 0
+  %3 = bitcast <2 x double> %2 to <4 x float>
+  %4 = shufflevector <4 x float> %a, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x float> %4
+}
+
+define void @t2(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: movlps %xmm0, (%rdi)
+; CHECK: ret
+  %cast.i = bitcast <4 x float> %a to <2 x i64>
+  %extract.i = extractelement <2 x i64> %cast.i, i32 0
+  %0 = getelementptr inbounds <1 x i64>* %p, i64 0, i64 0
+  store i64 %extract.i, i64* %0, align 8
+  ret void
+}
+
+define void @t2a(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
+entry:
+; CHECK: t2a:
+; CHECK: movlps %xmm0, (%rdi)
+; CHECK: ret
+  %0 = bitcast <1 x i64>* %p to double*
+  %1 = bitcast <4 x float> %a to <2 x double>
+  %2 = extractelement <2 x double> %1, i32 0
+  store double %2, double* %0
+  ret void
+}
+
+; rdar://10436044
+define <2 x double> @t3() nounwind readonly {
+bb:
+; CHECK: t3:
+; CHECK: punpcklqdq %xmm1, %xmm0
+; CHECK: movq (%rax), %xmm1
+; CHECK: movsd %xmm1, %xmm0
+  %tmp0 = load i128* null, align 1
+  %tmp1 = load <2 x i32>* undef, align 8
+  %tmp2 = bitcast i128 %tmp0 to <16 x i8>
+  %tmp3 = bitcast <2 x i32> %tmp1 to i64
+  %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
+  %tmp5 = bitcast <16 x i8> %tmp2 to <2 x double>
+  %tmp6 = bitcast <2 x i64> %tmp4 to <2 x double>
+  %tmp7 = shufflevector <2 x double> %tmp5, <2 x double> %tmp6, <2 x i32> <i32 2, i32 1>
+  ret <2 x double> %tmp7
+}
+
+; rdar://10450317
+define <2 x i64> @t4() nounwind readonly {
+bb:
+; CHECK: t4:
+; CHECK: punpcklqdq %xmm0, %xmm1
+; CHECK: movq (%rax), %xmm0
+; CHECK: movsd %xmm1, %xmm0
+  %tmp0 = load i128* null, align 1
+  %tmp1 = load <2 x i32>* undef, align 8
+  %tmp2 = bitcast i128 %tmp0 to <16 x i8>
+  %tmp3 = bitcast <2 x i32> %tmp1 to i64
+  %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
+  %tmp5 = bitcast <16 x i8> %tmp2 to <2 x i64>
+  %tmp6 = shufflevector <2 x i64> %tmp4, <2 x i64> %tmp5, <2 x i32> <i32 2, i32 1>
+  ret <2 x i64> %tmp6
+}
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
index 2a48de2..6599598 100644
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep movq    %t | count 1
-; RUN: grep pshufd  %t | count 1
-; RUN: grep movupd  %t | count 1
-; RUN: grep pshufhw %t | count 1
+; RUN: llc < %s -mtriple=i686-linux -mcpu=core2 | FileCheck %s
 
+; CHECK: test_v4sf
+; CHECK: movq 8(%esp)
+; CHECK: pshufd $80
 define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
 	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
@@ -13,6 +12,9 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
 	ret void
 }
 
+; CHECK: test_v2sd
+; CHECK: movups	8(%esp)
+; CHECK: movaps
 define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
 	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
 	%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1		; <<2 x double>> [#uses=1]
@@ -20,6 +22,9 @@ define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
 	ret void
 }
 
+; CHECK: test_v8i16
+; CHECK: pshufhw $-58
+; CHECK: movdqa
 define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
 	%tmp = load <2 x i64>* %A		; <<2 x i64>> [#uses=1]
 	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>		; <<8 x i16>> [#uses=8]
diff --git a/test/CodeGen/X86/vec_udiv_to_shift.ll b/test/CodeGen/X86/vec_udiv_to_shift.ll
new file mode 100644
index 0000000..6edfcc0
--- /dev/null
+++ b/test/CodeGen/X86/vec_udiv_to_shift.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <8 x i16> @udiv_vec8x16(<8 x i16> %var) {
+entry:
+; CHECK: lshr <8 x i16> %var, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+%0 = udiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+ret <8 x i16> %0
+}
+
+define <4 x i32> @udiv_vec4x32(<4 x i32> %var) {
+entry:
+; CHECK: lshr <4 x i32> %var, <i32 4, i32 4, i32 4, i32 4>
+%0 = udiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
+ret <4 x i32> %0
+}
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 4d1f056..682a0df 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
+; CHECK: foo
 ; CHECK: xorps
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
@@ -8,6 +9,7 @@ define void @foo(<4 x float>* %P) {
         ret void
 }
 
+; CHECK: bar
 ; CHECK: pxor
 define void @bar(<4 x i32>* %P) {
         %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
@@ -16,3 +18,13 @@ define void @bar(<4 x i32>* %P) {
         ret void
 }
 
+; Without any type hints from operations, we fall back to the smaller xorps.
+; The IR type <4 x i32> is ignored.
+; CHECK: untyped_zero
+; CHECK: xorps
+; CHECK: movaps
+define void @untyped_zero(<4 x i32>* %p) {
+entry:
+  store <4 x i32> zeroinitializer, <4 x i32>* %p, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 8aa5094..41ea024 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
 ; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
 ; 64-bit stores here do not use MMX.
 
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
new file mode 100644
index 0000000..3476e36
--- /dev/null
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt -instsimplify %s -disable-output
+
+;CHECK: AGEP0:
+define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
+entry:
+  %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
+  %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
+  %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
+  %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+;CHECK: pslld $2
+;CHECK: padd
+  %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
+  ret <4 x i32*> %A3
+;CHECK: ret
+}
+
+;CHECK: AGEP1:
+define i32 @AGEP1(<4 x i32*> %param) nounwind {
+entry:
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  %k = extractelement <4 x i32*> %A2, i32 3
+  %v = load i32* %k
+  ret i32 %v
+;CHECK: ret
+}
+
+;CHECK: AGEP2:
+define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
+  %k = extractelement <4 x i32*> %A2, i32 3
+  %v = load i32* %k
+  ret i32 %v
+;CHECK: ret
+}
+
+;CHECK: AGEP3:
+define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
+  %v = alloca i32
+  %k = insertelement <4 x i32*> %A2, i32* %v, i32 3
+  ret <4 x i32*> %k
+;CHECK: ret
+}
+
+;CHECK: AGEP4:
+define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
+entry:
+; Multiply offset by two (add it to itself).
+;CHECK: padd
+; add the base to the offset
+;CHECK: padd
+  %A = getelementptr <4 x i16*> %param, <4 x i32> %off
+  ret <4 x i16*> %A
+;CHECK: ret
+}
+
+;CHECK: AGEP5:
+define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
+entry:
+;CHECK: paddd
+  %A = getelementptr <4 x i8*> %param, <4 x i8> %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
+
+; The size of each element is 1 byte. No need to multiply by element size.
+;CHECK: AGEP6:
+define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK-NOT: pslld
+  %A = getelementptr <4 x i8*> %param, <4 x i32> %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/vector-variable-idx2.ll b/test/CodeGen/X86/vector-variable-idx2.ll
new file mode 100644
index 0000000..d47df90
--- /dev/null
+++ b/test/CodeGen/X86/vector-variable-idx2.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.0"
+
+define i64 @__builtin_ia32_vec_ext_v2di(<2 x i64> %a, i32 %i) nounwind {
+  %1 = alloca <2 x i64>, align 16
+  %2 = alloca i32, align 4
+  store <2 x i64> %a, <2 x i64>* %1, align 16
+  store i32 %i, i32* %2, align 4
+  %3 = load <2 x i64>* %1, align 16
+  %4 = load i32* %2, align 4
+  %5 = extractelement <2 x i64> %3, i32 %4
+  ret i64 %5
+}
+
+define <2 x i64> @__builtin_ia32_vec_int_v2di(<2 x i64> %a, i32 %i) nounwind {
+  %1 = alloca <2 x i64>, align 16
+  %2 = alloca i32, align 4
+  store <2 x i64> %a, <2 x i64>* %1, align 16
+  store i32 %i, i32* %2, align 4
+  %3 = load <2 x i64>* %1, align 16
+  %4 = load i32* %2, align 4
+  %5 = insertelement <2 x i64> %3, i64 1, i32 %4
+  ret <2 x i64> %5
+}
diff --git a/test/CodeGen/X86/volatile.ll b/test/CodeGen/X86/volatile.ll
index 2e5742a..1a82014 100644
--- a/test/CodeGen/X86/volatile.ll
+++ b/test/CodeGen/X86/volatile.ll
@@ -4,14 +4,14 @@
 @x = external global double
 
 define void @foo() nounwind  {
-  %a = volatile load double* @x
-  volatile store double 0.0, double* @x
-  volatile store double 0.0, double* @x
-  %b = volatile load double* @x
+  %a = load volatile double* @x
+  store volatile double 0.0, double* @x
+  store volatile double 0.0, double* @x
+  %b = load volatile double* @x
   ret void
 }
 
 define void @bar() nounwind  {
-  %c = volatile load double* @x
+  %c = load volatile double* @x
   ret void
 }
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
index 97dacfd..ee98806 100644
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86 |  FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux |  FileCheck %s
 
-
-define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
-; CHECK: andb
+define void @t0(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
+; CHECK: t0
+; CHECK: pand
+; CHECK: ret
   %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
   %cmp2 = icmp ne <2 x i64> %src2, zeroinitializer
   %t1 = and <2 x i1> %cmp1, %cmp2
@@ -12,7 +13,9 @@ define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind reado
 }
 
 define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
-; CHECK: andb
+; CHECK: t2
+; CHECK-NOT: pand
+; CHECK: ret
   %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
   %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
   %t1 = and <3 x i1> %cmp1, %cmp2
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 4b8016d..661cde8 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 |  FileCheck %s
-
-; Widen a v3i8 to v16i8 to use a vector add
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 |  FileCheck %s
 
 define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
 entry:
 ; CHECK-NOT: pextrw
-; CHECK: paddb
-; CHECK: pextrb
+; CHECK: add
+
 	%dst.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
 	%src.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
 	%n.addr = alloca i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index 03b3fea..d35abc3 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: paddb
+; CHECK: padd
 ; CHECK: pand
 
 ; widen v8i8 to v16i8 (checks even power of 2 widening with add & and)
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index 0574923..f55b184 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
-; CHECK: paddw
-; CHECK: pextrw
-; CHECK: movd
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
+; CHECK: incl
+; CHECK: incl
+; CHECK: incl
+; CHECK: addl
 
 ; Widen a v3i16 to v8i16 to do a vector add
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 1eace9e..4330aae 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
-; CHECK: paddw
+; CHECK: paddd
 ; CHECK: pextrd
 ; CHECK: movd
 
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 8e1adf5..5ea5426 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,16 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-
-; v8i8 that is widen to v16i8 then split
-; FIXME: This is widen to v16i8 and split to 16 and we then rebuild the vector.
-; Unfortunately, we don't split the store so we don't get the code we want.
+; CHECK: psraw
+; CHECK: psraw
 
 define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
 entry:
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index f6810cd..51f1c88 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,6 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: pshufd
-; CHECK: paddd
+; CHECK: paddq
 
 ; truncate v2i64 to v2i32
 
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index 80f3a49..affd796 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: cvtsi2ss
+; CHECK-NOT: cvtsi2ss
 
 ; unsigned to float v7i16 to v7f32
 
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index c91627c..4aeec91 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -o - -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -o - -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -o - -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -o - -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s -check-prefix=WIN64
 ; PR4891
 
 ; Both loads should happen before either store.
 
-; CHECK: movl  (%rdi), %[[R1:...]]
-; CHECK: movl  (%rsi), %[[R2:...]]
-; CHECK: movl  %[[R2]], (%rdi)
-; CHECK: movl  %[[R1]], (%rsi)
+; CHECK: movd  ({{.*}}), {{.*}}
+; CHECK: movd  ({{.*}}), {{.*}}
+; CHECK: movd  {{.*}}, ({{.*}})
+; CHECK: movd  {{.*}}, ({{.*}})
 
-; WIN64: movl  (%rcx), %[[R1:...]]
-; WIN64: movl  (%rdx), %[[R2:...]]
-; WIN64: movl  %[[R2]], (%rcx)
-; WIN64: movl  %[[R1]], (%rdx)
+; WIN64: movd  ({{.*}}), {{.*}}
+; WIN64: movd  ({{.*}}), {{.*}}
+; WIN64: movd  {{.*}}, ({{.*}})
+; WIN64: movd  {{.*}}, ({{.*}})
 
 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
index 639617f..9705d14 100644
--- a/test/CodeGen/X86/widen_load-1.ll
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -1,5 +1,6 @@
 ; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; PR4891
+; PR5626
 
 ; This load should be before the call, not after.
 
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 6422063..79aa000 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse42 | FileCheck %s
 
 ; Test based on pr5626 to load/store
 ;
 
 %i32vec3 = type <3 x i32>
+; CHECK: add3i32
 define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 ; CHECK: movdqa
 ; CHECK: paddd
@@ -16,6 +17,7 @@ define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 	ret void
 }
 
+; CHECK: add3i32_2
 define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 ; CHECK: movq
 ; CHECK: pinsrd
@@ -32,6 +34,7 @@ define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 }
 
 %i32vec7 = type <7 x i32>
+; CHECK: add7i32
 define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
 ; CHECK: movdqa
 ; CHECK: movdqa
@@ -47,6 +50,7 @@ define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
 	ret void
 }
 
+; CHECK: add12i32
 %i32vec12 = type <12 x i32>
 define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
 ; CHECK: movdqa
@@ -66,12 +70,14 @@ define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
 }
 
 
+; CHECK: add3i16
 %i16vec3 = type <3 x i16>
 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddw
-; CHECK: movd
-; CHECK: pextrw
+; CHECK: add3i16
+; CHECK: addl
+; CHECK: addl
+; CHECK: addl
+; CHECK: ret
 	%a = load %i16vec3* %ap, align 16
 	%b = load %i16vec3* %bp, align 16
 	%x = add %i16vec3 %a, %b
@@ -79,10 +85,11 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
 	ret void
 }
 
+; CHECK: add4i16
 %i16vec4 = type <4 x i16>
 define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddw
+; CHECK: add4i16
+; CHECK: paddd
 ; CHECK: movq
 	%a = load %i16vec4* %ap, align 16
 	%b = load %i16vec4* %bp, align 16
@@ -91,6 +98,7 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp
 	ret void
 }
 
+; CHECK: add12i16
 %i16vec12 = type <12 x i16>
 define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
 ; CHECK: movdqa
@@ -106,6 +114,7 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
 	ret void
 }
 
+; CHECK: add18i16
 %i16vec18 = type <18 x i16>
 define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
 ; CHECK: movdqa
@@ -125,12 +134,13 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
 }
 
 
+; CHECK: add3i8
 %i8vec3 = type <3 x i8>
 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddb
-; CHECK: pextrb
-; CHECK: movb
+; CHECK: addb
+; CHECK: addb
+; CHECK: addb
+; CHECK: ret
 	%a = load %i8vec3* %ap, align 16
 	%b = load %i8vec3* %bp, align 16
 	%x = add %i8vec3 %a, %b
@@ -138,6 +148,7 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
 	ret void
 }
 
+; CHECK: add31i8:
 %i8vec31 = type <31 x i8>
 define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
 ; CHECK: movdqa
@@ -147,6 +158,7 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
 ; CHECK: movq
 ; CHECK: pextrb
 ; CHECK: pextrw
+; CHECK: ret
 	%a = load %i8vec31* %ap, align 16
 	%b = load %i8vec31* %bp, align 16
 	%x = add %i8vec31 %a, %b
@@ -155,9 +167,10 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
 }
 
 
+; CHECK: rot
 %i8vec3pack = type { <3 x i8>, i8 }
 define %i8vec3pack  @rot() nounwind {
-; CHECK: shrb
+; CHECK: movd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
 entry:
   %X = alloca %i8vec3pack, align 4
   %rot = alloca %i8vec3pack, align 4
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 8e951b7..7bebb27 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -10,6 +10,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }
 
 
@@ -23,6 +24,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }
 
 ; Example of when widening a v3float operation causes the DAG to replace a node
@@ -31,7 +33,7 @@ entry:
 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
 entry:
 ; CHECK: shuf3:
-; CHECK: pshufd
+; CHECK: shufps
   %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
   %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -45,12 +47,23 @@ entry:
   %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
   store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst
   ret void
+; CHECK: ret
 }
 
 ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
 define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
 ; CHECK: shuf4:
-; CHECK: punpckldq
+; CHECK-NOT: punpckldq
   %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %vshuf
+; CHECK: ret
+}
+
+; PR11389: another CONCAT_VECTORS case
+define void @shuf5(<8 x i8>* %p) nounwind {
+; CHECK: shuf5:
+  %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  store <8 x i8> %v, <8 x i8>* %p, align 8
+  ret void
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
new file mode 100644
index 0000000..878c6db
--- /dev/null
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+
+; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
+; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
+; arguments are caller-cleanup like normal arguments.
+
+define void @sret1(i8* sret) nounwind {
+entry:
+; WIN_X32:    {{ret$}}
+; MINGW_X32:  ret $4
+; LINUX:      ret $4
+  ret void
+}
+
+define void @sret2(i32* sret %x, i32 %y) nounwind {
+entry:
+; WIN_X32:    {{ret$}}
+; MINGW_X32:  ret $4
+; LINUX:      ret $4
+  store i32 %y, i32* %x
+  ret void
+}
+
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index e39d007..a961c6a 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -join-physregs -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -join-physregs -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -join-physregs -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
 
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
index efe8bca..52bc509 100644
--- a/test/CodeGen/X86/win64_vararg.ll
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-win32 | FileCheck %s
 
 ; Verify that the var arg parameters which are passed in registers are stored
 ; in home stack slots allocated by the caller and that AP is correctly
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
new file mode 100644
index 0000000..596b426
--- /dev/null
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
+
+; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
+; function has a nonstandard calling convention: the input value is expected on
+; the x87 stack instead of the callstack. The input value is popped by the
+; callee. Mingw32 uses normal cdecl compiler-rt functions.
+
+define i64 @double_ui64(double %x) nounwind {
+entry:
+; COMPILERRT: @double_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @double_ui64
+; FTOL: fldl
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui double %x to i64
+  ret i64 %0
+}
+
+define i64 @float_ui64(float %x) nounwind {
+entry:
+; COMPILERRT: @float_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @float_ui64
+; FTOL: flds
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui float %x to i64
+  ret i64 %0
+}
+
+define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_2
+; FTOL: @double_ui64_2
+; FTOL_2: @double_ui64_2
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %2 %1
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %3, %4
+  ret i64 %5
+}
+
+define i64 @double_ui64_3(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_3
+; FTOL: @double_ui64_3
+; FTOL_2: @double_ui64_3
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %1 %2 (still)
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %4, %3
+  ret i64 %5
+}
+
+define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
+; COMPILERRT: @double_ui64_4
+; FTOL: @double_ui64_4
+; FTOL_2: @double_ui64_4
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %y
+; FTOL_2: fldl
+;; stack is %x %y
+; FTOL_2: fxch
+;; stack is %y %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+; FTOL_2: fld %st(0)
+;; stack is %x %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+
+  %1 = fptoui double %x to i64
+  %2 = fptoui double %y to i64
+  %3 = sub i64 %1, %2
+  %4 = insertvalue {double, i64} undef, double %x, 0
+  %5 = insertvalue {double, i64} %4, i64 %3, 1
+  ret {double, i64} %5
+}
+
+define i32 @double_ui32_5(double %X) {
+; FTOL: @double_ui32_5
+; FTOL: calll __ftol2
+  %tmp.1 = fptoui double %X to i32
+  ret i32 %tmp.1
+}
+
+define i64 @double_ui64_5(double %X) {
+; FTOL: @double_ui64_5
+; FTOL: calll __ftol2
+  %tmp.1 = fptoui double %X to i64
+  ret i64 %tmp.1
+}
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll
index fdf68f9..20bccab 100644
--- a/test/CodeGen/X86/x86-shifts.ll
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -6,8 +6,9 @@
 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
 entry:
 ; CHECK:      shl4
+; CHECK:      padd
 ; CHECK:      pslld
-; CHECK-NEXT: pslld
+; CHECK:      ret
   %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
   %K = xor <4 x i32> %B, %C
@@ -19,6 +20,7 @@ entry:
 ; CHECK:      shr4
 ; CHECK:      psrld
 ; CHECK-NEXT: psrld
+; CHECK:      ret
   %B = lshr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = lshr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
   %K = xor <4 x i32> %B, %C
@@ -30,6 +32,7 @@ entry:
 ; CHECK:      sra4
 ; CHECK:      psrad
 ; CHECK-NEXT: psrad
+; CHECK:      ret
   %B = ashr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = ashr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
   %K = xor <4 x i32> %B, %C
@@ -41,6 +44,7 @@ entry:
 ; CHECK:      shl2
 ; CHECK:      psllq
 ; CHECK-NEXT: psllq
+; CHECK:      ret
   %B = shl <2 x i64> %A,  < i64 2, i64 2>
   %C = shl <2 x i64> %A,  < i64 9, i64 9>
   %K = xor <2 x i64> %B, %C
@@ -52,6 +56,7 @@ entry:
 ; CHECK:      shr2
 ; CHECK:      psrlq
 ; CHECK-NEXT: psrlq
+; CHECK:      ret
   %B = lshr <2 x i64> %A,  < i64 8, i64 8>
   %C = lshr <2 x i64> %A,  < i64 1, i64 1>
   %K = xor <2 x i64> %B, %C
@@ -62,8 +67,9 @@ entry:
 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
 entry:
 ; CHECK:      shl8
+; CHECK:      padd
 ; CHECK:      psllw
-; CHECK-NEXT: psllw
+; CHECK:      ret
   %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -75,6 +81,7 @@ entry:
 ; CHECK:      shr8
 ; CHECK:      psrlw
 ; CHECK-NEXT: psrlw
+; CHECK:      ret
   %B = lshr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = lshr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -86,6 +93,7 @@ entry:
 ; CHECK:      sra8
 ; CHECK:      psraw
 ; CHECK-NEXT: psraw
+; CHECK:      ret
   %B = ashr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = ashr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -100,6 +108,7 @@ entry:
 ; CHECK: sll8_nosplat
 ; CHECK-NOT: psll
 ; CHECK-NOT: psll
+; CHECK:      ret
   %B = shl <8 x i16> %A,  < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
   %C = shl <8 x i16> %A,  < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -112,6 +121,7 @@ entry:
 ; CHECK: shr2_nosplat
 ; CHECK-NOT:  psrlq
 ; CHECK-NOT:  psrlq
+; CHECK:      ret
   %B = lshr <2 x i64> %A,  < i64 8, i64 1>
   %C = lshr <2 x i64> %A,  < i64 1, i64 0>
   %K = xor <2 x i64> %B, %C
@@ -124,7 +134,8 @@ entry:
 define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
 entry:
 ; CHECK: shl2_other
-; CHECK-not:      psllq
+; CHECK: psllq
+; CHECK: ret
   %B = shl <2 x i32> %A,  < i32 2, i32 2>
   %C = shl <2 x i32> %A,  < i32 9, i32 9>
   %K = xor <2 x i32> %B, %C
@@ -134,9 +145,48 @@ entry:
 define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
 entry:
 ; CHECK: shr2_other
-; CHECK-NOT:      psrlq
+; CHECK: psrlq
+; CHECK: ret
   %B = lshr <2 x i32> %A,  < i32 8, i32 8>
   %C = lshr <2 x i32> %A,  < i32 1, i32 1>
   %K = xor <2 x i32> %B, %C
   ret <2 x i32> %K
 }
+
+define <16 x i8> @shl9(<16 x i8> %A) nounwind {
+  %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %B
+; CHECK: shl9:
+; CHECK: psllw $3
+; CHECK: pand
+; CHECK: ret
+}
+
+define <16 x i8> @shr9(<16 x i8> %A) nounwind {
+  %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %B
+; CHECK: shr9:
+; CHECK: psrlw $3
+; CHECK: pand
+; CHECK: ret
+}
+
+define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
+  %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %B
+; CHECK: sra_v16i8_7:
+; CHECK: pxor
+; CHECK: pcmpgtb
+; CHECK: ret
+}
+
+define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
+  %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %B
+; CHECK: sra_v16i8:
+; CHECK: psrlw $3
+; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
new file mode 100644
index 0000000..a2521b0
--- /dev/null
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -0,0 +1,969 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4,+xop | FileCheck %s
+
+define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vpermil2pd
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  %vec = load <2 x double>* %a1
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  %vec = load <2 x double>* %a2
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a1
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a2
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vpermil2ps
+  %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vpermil2ps
+  ; CHECK: ymm
+  %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
+  ; CHECK: vpcmov
+  %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
+  ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+  %vec = load <4 x i64>* %a1
+  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
+  ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+ %vec = load <4 x i64>* %a2
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK:vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK:vpcomb
+  %vec = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
+  ; CHECK: vphaddbd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
+  ; CHECK: vphaddbq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
+  ; CHECK: vphaddbw
+  %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
+  ; CHECK: vphadddq
+  %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
+  ; CHECK: vphaddubd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
+  ; CHECK: vphaddubq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
+  ; CHECK: vphaddubw
+  %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
+  ; CHECK: vphaddudq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
+  ; CHECK: vphadduwd
+  %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
+  ; CHECK: vphadduwq
+  %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
+  ; CHECK: vphaddwd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
+  ; CHECK: vphaddwq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
+  ; CHECK: vphsubbw
+  %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
+  ; CHECK: vphsubdq
+  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vphsubdq
+  %vec = load <4 x i32>* %a0
+  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
+  ; CHECK: vphsubwd
+  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vphsubwd
+  %vec = load <8 x i16>* %a0
+  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacsdd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacsdqh
+  %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacsdql
+  %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacssdd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacssdqh
+  %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacssdql
+  %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacsswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpmacssww
+  %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpmacsww
+  %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmadcsswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmadcswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpmadcswd
+  %vec = load <8 x i16>* %a1
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  ; CHECK: vpperm
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpperm
+  %vec = load <16 x i8>* %a2
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpperm
+  %vec = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vprotb
+  %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vprotd
+  %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vprotq
+  %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vprotw
+  %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshab
+  %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpshad
+  %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpshaq
+  %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpshaw
+  %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshlb
+  %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpshld
+  %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpshlq
+  %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpshlw
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpshlw
+  %vec = load <8 x i16>* %a1
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
+  ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpshlw
+  %vec = load <8 x i16>* %a0
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczss
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ;
+  ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczss
+  %elem = load float* %a1
+  %vec = insertelement <4 x float> undef, float %elem, i32 0
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczsd
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ;
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczsd
+  %elem = load double* %a1
+  %vec = insertelement <2 x double> undef, double %elem, i32 0
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
+  ; CHECK: vfrczpd
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczpd
+  %vec = load <2 x double>* %a0
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
+  ; CHECK: vfrczpd
+  ; CHECK: ymm
+  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczpd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a0
+  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
+  ; CHECK: vfrczps
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
+  ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczps
+  %vec = load <4 x float>* %a0
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
+  ; CHECK: vfrczps
+  ; CHECK: ymm
+  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
+  ret <8 x float> %res
+}
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczps
+  ; CHECK: ymm
+  %vec = load <8 x float>* %a0
+  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
+
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index 178c59d..ddc4cab 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -8,7 +8,7 @@ define <4 x i32> @test1() nounwind {
 	ret <4 x i32> %tmp
         
 ; X32: test1:
-; X32:	pxor	%xmm0, %xmm0
+; X32:	xorps	%xmm0, %xmm0
 ; X32:	ret
 }
 
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 4470074..4242530 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -16,9 +16,9 @@ define double @foo() nounwind {
 ;CHECK-32: ret
 
 ;CHECK-64: foo:
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: call
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: ret
 }
 
@@ -33,8 +33,8 @@ define float @foof() nounwind {
 ;CHECK-32: ret
 
 ;CHECK-64: foof:
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: call
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: ret
 }
diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll
index b3f5cdb..ff93c68 100644
--- a/test/CodeGen/X86/zext-fold.ll
+++ b/test/CodeGen/X86/zext-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 
 ;; Simple case
 define i32 @test1(i8 %x) nounwind readnone {
@@ -34,7 +34,7 @@ define void @test3(i8 %x) nounwind readnone {
   ret void
 }
 ; CHECK: test3
-; CHECK: movzbl 16(%esp), [[REGISTER:%e[a-z]{2}]]
+; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]]
 ; CHECK-NEXT: movl [[REGISTER]], 4(%esp)
 ; CHECK-NEXT: andl $224, [[REGISTER]]
 ; CHECK-NEXT: movl [[REGISTER]], (%esp)
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll
index cea9e9c..6432ae3 100644
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; <rdar://problem/8006248>
 
diff --git a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
index 7d6d7ba..84e21e4 100644
--- a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
+++ b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
@@ -16,5 +16,5 @@ allocas:
 ; CHECK: f:
 ; CHECK: ldaw [[REGISTER:r[0-9]+]], {{r[0-9]+}}[-r1]
 ; CHECK: set sp, [[REGISTER]]
-; CHECK extsp 1
-; CHECK bl g
+; CHECK: extsp 1
+; CHECK: bl g
diff --git a/test/CodeGen/XCore/cos.ll b/test/CodeGen/XCore/cos.ll
deleted file mode 100644
index 8211f85..0000000
--- a/test/CodeGen/XCore/cos.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl cosf" %t1.s | count 1
-; RUN: grep "bl cos" %t1.s | count 2
-declare double @llvm.cos.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.cos.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.cos.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.cos.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/dg.exp b/test/CodeGen/XCore/dg.exp
deleted file mode 100644
index 7110eab..0000000
--- a/test/CodeGen/XCore/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target XCore] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/XCore/exp.ll b/test/CodeGen/XCore/exp.ll
deleted file mode 100644
index d23d484..0000000
--- a/test/CodeGen/XCore/exp.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl expf" %t1.s | count 1
-; RUN: grep "bl exp" %t1.s | count 2
-declare double @llvm.exp.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.exp.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.exp.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.exp.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/exp2.ll b/test/CodeGen/XCore/exp2.ll
deleted file mode 100644
index 4c4d17f4..0000000
--- a/test/CodeGen/XCore/exp2.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl exp2f" %t1.s | count 1
-; RUN: grep "bl exp2" %t1.s | count 2
-declare double @llvm.exp2.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.exp2.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.exp2.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.exp2.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/float-intrinsics.ll b/test/CodeGen/XCore/float-intrinsics.ll
new file mode 100644
index 0000000..69a40f3
--- /dev/null
+++ b/test/CodeGen/XCore/float-intrinsics.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+declare double @llvm.cos.f64(double)
+declare double @llvm.exp.f64(double)
+declare double @llvm.exp2.f64(double)
+declare double @llvm.log.f64(double)
+declare double @llvm.log10.f64(double)
+declare double @llvm.log2.f64(double)
+declare double @llvm.pow.f64(double, double)
+declare double @llvm.powi.f64(double, i32)
+declare double @llvm.sin.f64(double)
+declare double @llvm.sqrt.f64(double)
+
+define double @cos(double %F) {
+; CHECK: cos:
+; CHECK: bl cos
+        %result = call double @llvm.cos.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.cos.f32(float)
+
+; CHECK: cosf:
+; CHECK: bl cosf
+define float @cosf(float %F) {
+        %result = call float @llvm.cos.f32(float %F)
+	ret float %result
+}
+
+define double @exp(double %F) {
+; CHECK: exp:
+; CHECK: bl exp
+        %result = call double @llvm.exp.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.exp.f32(float)
+
+define float @expf(float %F) {
+; CHECK: expf:
+; CHECK: bl expf
+        %result = call float @llvm.exp.f32(float %F)
+	ret float %result
+}
+
+define double @exp2(double %F) {
+; CHECK: exp2:
+; CHECK: bl exp2
+        %result = call double @llvm.exp2.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.exp2.f32(float)
+
+define float @exp2f(float %F) {
+; CHECK: exp2f:
+; CHECK: bl exp2f
+        %result = call float @llvm.exp2.f32(float %F)
+	ret float %result
+}
+
+define double @log(double %F) {
+; CHECK: log:
+; CHECK: bl log
+        %result = call double @llvm.log.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log.f32(float)
+
+define float @logf(float %F) {
+; CHECK: logf:
+; CHECK: bl logf
+        %result = call float @llvm.log.f32(float %F)
+	ret float %result
+}
+
+define double @log10(double %F) {
+; CHECK: log10:
+; CHECK: bl log10
+        %result = call double @llvm.log10.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log10.f32(float)
+
+define float @log10f(float %F) {
+; CHECK: log10f:
+; CHECK: bl log10f
+        %result = call float @llvm.log10.f32(float %F)
+	ret float %result
+}
+
+define double @log2(double %F) {
+; CHECK: log2:
+; CHECK: bl log2
+        %result = call double @llvm.log2.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log2.f32(float)
+
+define float @log2f(float %F) {
+; CHECK: log2f:
+; CHECK: bl log2f
+        %result = call float @llvm.log2.f32(float %F)
+	ret float %result
+}
+
+define double @pow(double %F, double %power) {
+; CHECK: pow:
+; CHECK: bl pow
+        %result = call double @llvm.pow.f64(double %F, double %power)
+	ret double %result
+}
+
+declare float @llvm.pow.f32(float, float)
+
+define float @powf(float %F, float %power) {
+; CHECK: powf:
+; CHECK: bl powf
+        %result = call float @llvm.pow.f32(float %F, float %power)
+	ret float %result
+}
+
+define double @powi(double %F, i32 %power) {
+; CHECK: powi:
+; CHECK: bl __powidf2
+        %result = call double @llvm.powi.f64(double %F, i32 %power)
+	ret double %result
+}
+
+declare float @llvm.powi.f32(float, i32)
+
+define float @powif(float %F, i32 %power) {
+; CHECK: powif:
+; CHECK: bl __powisf2
+        %result = call float @llvm.powi.f32(float %F, i32 %power)
+	ret float %result
+}
+
+define double @sin(double %F) {
+; CHECK: sin:
+; CHECK: bl sin
+        %result = call double @llvm.sin.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.sin.f32(float)
+
+define float @sinf(float %F) {
+; CHECK: sinf:
+; CHECK: bl sinf
+        %result = call float @llvm.sin.f32(float %F)
+	ret float %result
+}
+
+define double @sqrt(double %F) {
+; CHECK: sqrt:
+; CHECK: bl sqrt
+        %result = call double @llvm.sqrt.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.sqrt.f32(float)
+
+define float @sqrtf(float %F) {
+; CHECK: sqrtf:
+; CHECK: bl sqrtf
+        %result = call float @llvm.sqrt.f32(float %F)
+	ret float %result
+}
diff --git a/test/CodeGen/XCore/fneg.ll b/test/CodeGen/XCore/fneg.ll
index e3dd3dd..d442a19 100644
--- a/test/CodeGen/XCore/fneg.ll
+++ b/test/CodeGen/XCore/fneg.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=xcore | grep "xor" | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 define i1 @test(double %F) nounwind {
 entry:
+; CHECK: test:
+; CHECK: xor
 	%0 = fsub double -0.000000e+00, %F
 	%1 = fcmp olt double 0.000000e+00, %0
 	ret i1 %1
diff --git a/test/CodeGen/XCore/getid.ll b/test/CodeGen/XCore/getid.ll
index ecab65c..ec46071 100644
--- a/test/CodeGen/XCore/getid.ll
+++ b/test/CodeGen/XCore/getid.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "get r11, id" %t1.s | count 1 
+; RUN: llc < %s -march=xcore | FileCheck %s
 declare i32 @llvm.xcore.getid()
 
 define i32 @test() {
+; CHECK: test:
+; CHECK: get r11, id
+; CHECK-NEXT: mov r0, r11
 	%result = call i32 @llvm.xcore.getid()
 	ret i32 %result
 }
diff --git a/test/CodeGen/XCore/global_negative_offset.ll b/test/CodeGen/XCore/global_negative_offset.ll
new file mode 100644
index 0000000..0328fb0
--- /dev/null
+++ b/test/CodeGen/XCore/global_negative_offset.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+; Don't fold negative offsets into cp / dp accesses to avoid a relocation
+; error if the address + addend is less than the start of the cp / dp.
+
+@a = external constant [0 x i32], section ".cp.rodata"
+@b = external global [0 x i32]
+
+define i32 *@f() nounwind {
+entry:
+; CHECK: f:
+; CHECK: ldaw r11, cp[a]
+; CHECK: sub r0, r11, 4
+	%0 = getelementptr [0 x i32]* @a, i32 0, i32 -1
+	ret i32* %0
+}
+
+define i32 *@g() nounwind {
+entry:
+; CHECK: g:
+; CHECK: ldaw [[REG:r[0-9]+]], dp[b]
+; CHECK: sub r0, [[REG]], 4
+	%0 = getelementptr [0 x i32]* @b, i32 0, i32 -1
+	ret i32* %0
+}
diff --git a/test/CodeGen/XCore/ladd_lsub_combine.ll b/test/CodeGen/XCore/ladd_lsub_combine.ll
index a693ee2..cd89966 100644
--- a/test/CodeGen/XCore/ladd_lsub_combine.ll
+++ b/test/CodeGen/XCore/ladd_lsub_combine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore | FileCheck %s
+; RUN: llc -march=xcore < %s | FileCheck %s
 
 ; Only needs one ladd
 define i64 @f1(i32 %x, i32 %y) nounwind {
diff --git a/test/CodeGen/XCore/licm-ldwcp.ll b/test/CodeGen/XCore/licm-ldwcp.ll
index 4884f70..794c6bb 100644
--- a/test/CodeGen/XCore/licm-ldwcp.ll
+++ b/test/CodeGen/XCore/licm-ldwcp.ll
@@ -13,6 +13,6 @@ entry:
   br label %bb
 
 bb:                                               ; preds = %bb, %entry
-  volatile store i32 525509670, i32* %p, align 4
+  store volatile i32 525509670, i32* %p, align 4
   br label %bb
 }
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
new file mode 100644
index 0000000..f8726af
--- /dev/null
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/XCore/load.ll b/test/CodeGen/XCore/load.ll
index adfea21..faff03b 100644
--- a/test/CodeGen/XCore/load.ll
+++ b/test/CodeGen/XCore/load.ll
@@ -1,15 +1,9 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: not grep add %t1.s
-; RUN: not grep ldaw %t1.s
-; RUN: not grep lda16 %t1.s
-; RUN: not grep zext %t1.s
-; RUN: not grep sext %t1.s
-; RUN: grep "ldw" %t1.s | count 2
-; RUN: grep "ld16s" %t1.s | count 1
-; RUN: grep "ld8u" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 define i32 @load32(i32* %p, i32 %offset) nounwind {
 entry:
+; CHECK: load32:
+; CHECK: ldw r0, r0[r1]
 	%0 = getelementptr i32* %p, i32 %offset
 	%1 = load i32* %0, align 4
 	ret i32 %1
@@ -17,6 +11,8 @@ entry:
 
 define i32 @load32_imm(i32* %p) nounwind {
 entry:
+; CHECK: load32_imm:
+; CHECK: ldw r0, r0[11]
 	%0 = getelementptr i32* %p, i32 11
 	%1 = load i32* %0, align 4
 	ret i32 %1
@@ -24,6 +20,9 @@ entry:
 
 define i32 @load16(i16* %p, i32 %offset) nounwind {
 entry:
+; CHECK: load16:
+; CHECK: ld16s r0, r0[r1]
+; CHECK-NOT: sext
 	%0 = getelementptr i16* %p, i32 %offset
 	%1 = load i16* %0, align 2
 	%2 = sext i16 %1 to i32
@@ -32,6 +31,9 @@ entry:
 
 define i32 @load8(i8* %p, i32 %offset) nounwind {
 entry:
+; CHECK: load8:
+; CHECK: ld8u r0, r0[r1]
+; CHECK-NOT: zext
 	%0 = getelementptr i8* %p, i32 %offset
 	%1 = load i8* %0, align 1
 	%2 = zext i8 %1 to i32
diff --git a/test/CodeGen/XCore/log.ll b/test/CodeGen/XCore/log.ll
deleted file mode 100644
index a08471f..0000000
--- a/test/CodeGen/XCore/log.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl logf" %t1.s | count 1
-; RUN: grep "bl log" %t1.s | count 2
-declare double @llvm.log.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.log.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.log.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.log.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/log10.ll b/test/CodeGen/XCore/log10.ll
deleted file mode 100644
index a72b8bf..0000000
--- a/test/CodeGen/XCore/log10.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl log10f" %t1.s | count 1
-; RUN: grep "bl log10" %t1.s | count 2
-declare double @llvm.log10.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.log10.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.log10.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.log10.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/log2.ll b/test/CodeGen/XCore/log2.ll
deleted file mode 100644
index d257433..0000000
--- a/test/CodeGen/XCore/log2.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl log2f" %t1.s | count 1
-; RUN: grep "bl log2" %t1.s | count 2
-declare double @llvm.log2.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.log2.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.log2.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.log2.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/pow.ll b/test/CodeGen/XCore/pow.ll
deleted file mode 100644
index b461185..0000000
--- a/test/CodeGen/XCore/pow.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl powf" %t1.s | count 1
-; RUN: grep "bl pow" %t1.s | count 2
-declare double @llvm.pow.f64(double, double)
-
-define double @test(double %F, double %power) {
-        %result = call double @llvm.pow.f64(double %F, double %power)
-	ret double %result
-}
-
-declare float @llvm.pow.f32(float, float)
-
-define float @testf(float %F, float %power) {
-        %result = call float @llvm.pow.f32(float %F, float %power)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/powi.ll b/test/CodeGen/XCore/powi.ll
deleted file mode 100644
index de31cbe..0000000
--- a/test/CodeGen/XCore/powi.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl __powidf2" %t1.s | count 1
-; RUN: grep "bl __powisf2" %t1.s | count 1
-declare double @llvm.powi.f64(double, i32)
-
-define double @test(double %F, i32 %power) {
-        %result = call double @llvm.powi.f64(double %F, i32 %power)
-	ret double %result
-}
-
-declare float @llvm.powi.f32(float, i32)
-
-define float @testf(float %F, i32 %power) {
-        %result = call float @llvm.powi.f32(float %F, i32 %power)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index 537d63b..80b7db4 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -1,19 +1,21 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -march=xcore > %t
-; RUN: grep .Lfoo: %t
-; RUN: grep bl.*\.Lfoo %t
-; RUN: grep .Lbaz: %t
-; RUN: grep ldw.*\.Lbaz %t
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 define private void @foo() {
+; CHECK: .Lfoo:
         ret void
 }
 
 @baz = private global i32 4
 
 define i32 @bar() {
+; CHECK: bar:
+; CHECK: bl .Lfoo
+; CHECK: ldw r0, dp[.Lbaz]
         call void @foo()
 	%1 = load i32* @baz, align 4
         ret i32 %1
 }
+
+; CHECK: .Lbaz:
diff --git a/test/CodeGen/XCore/scavenging.ll b/test/CodeGen/XCore/scavenging.ll
index 3181e96..5b612d0 100644
--- a/test/CodeGen/XCore/scavenging.ll
+++ b/test/CodeGen/XCore/scavenging.ll
@@ -18,32 +18,32 @@ entry:
 	%x = alloca [100 x i32], align 4		; <[100 x i32]*> [#uses=2]
 	%0 = load i32* @size, align 4		; <i32> [#uses=1]
 	%1 = alloca i32, i32 %0, align 4		; <i32*> [#uses=1]
-	%2 = volatile load i32* @g0, align 4		; <i32> [#uses=1]
-	%3 = volatile load i32* @g1, align 4		; <i32> [#uses=1]
-	%4 = volatile load i32* @g2, align 4		; <i32> [#uses=1]
-	%5 = volatile load i32* @g3, align 4		; <i32> [#uses=1]
-	%6 = volatile load i32* @g4, align 4		; <i32> [#uses=1]
-	%7 = volatile load i32* @g5, align 4		; <i32> [#uses=1]
-	%8 = volatile load i32* @g6, align 4		; <i32> [#uses=1]
-	%9 = volatile load i32* @g7, align 4		; <i32> [#uses=1]
-	%10 = volatile load i32* @g8, align 4		; <i32> [#uses=1]
-	%11 = volatile load i32* @g9, align 4		; <i32> [#uses=1]
-	%12 = volatile load i32* @g10, align 4		; <i32> [#uses=1]
-	%13 = volatile load i32* @g11, align 4		; <i32> [#uses=2]
+	%2 = load volatile i32* @g0, align 4		; <i32> [#uses=1]
+	%3 = load volatile i32* @g1, align 4		; <i32> [#uses=1]
+	%4 = load volatile i32* @g2, align 4		; <i32> [#uses=1]
+	%5 = load volatile i32* @g3, align 4		; <i32> [#uses=1]
+	%6 = load volatile i32* @g4, align 4		; <i32> [#uses=1]
+	%7 = load volatile i32* @g5, align 4		; <i32> [#uses=1]
+	%8 = load volatile i32* @g6, align 4		; <i32> [#uses=1]
+	%9 = load volatile i32* @g7, align 4		; <i32> [#uses=1]
+	%10 = load volatile i32* @g8, align 4		; <i32> [#uses=1]
+	%11 = load volatile i32* @g9, align 4		; <i32> [#uses=1]
+	%12 = load volatile i32* @g10, align 4		; <i32> [#uses=1]
+	%13 = load volatile i32* @g11, align 4		; <i32> [#uses=2]
 	%14 = getelementptr [100 x i32]* %x, i32 0, i32 50		; <i32*> [#uses=1]
 	store i32 %13, i32* %14, align 4
-	volatile store i32 %13, i32* @g11, align 4
-	volatile store i32 %12, i32* @g10, align 4
-	volatile store i32 %11, i32* @g9, align 4
-	volatile store i32 %10, i32* @g8, align 4
-	volatile store i32 %9, i32* @g7, align 4
-	volatile store i32 %8, i32* @g6, align 4
-	volatile store i32 %7, i32* @g5, align 4
-	volatile store i32 %6, i32* @g4, align 4
-	volatile store i32 %5, i32* @g3, align 4
-	volatile store i32 %4, i32* @g2, align 4
-	volatile store i32 %3, i32* @g1, align 4
-	volatile store i32 %2, i32* @g0, align 4
+	store volatile i32 %13, i32* @g11, align 4
+	store volatile i32 %12, i32* @g10, align 4
+	store volatile i32 %11, i32* @g9, align 4
+	store volatile i32 %10, i32* @g8, align 4
+	store volatile i32 %9, i32* @g7, align 4
+	store volatile i32 %8, i32* @g6, align 4
+	store volatile i32 %7, i32* @g5, align 4
+	store volatile i32 %6, i32* @g4, align 4
+	store volatile i32 %5, i32* @g3, align 4
+	store volatile i32 %4, i32* @g2, align 4
+	store volatile i32 %3, i32* @g1, align 4
+	store volatile i32 %2, i32* @g0, align 4
 	%x1 = getelementptr [100 x i32]* %x, i32 0, i32 0		; <i32*> [#uses=1]
 	call void @g(i32* %x1, i32* %1) nounwind
 	ret void
diff --git a/test/CodeGen/XCore/sin.ll b/test/CodeGen/XCore/sin.ll
deleted file mode 100644
index ced026f..0000000
--- a/test/CodeGen/XCore/sin.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl sinf" %t1.s | count 1
-; RUN: grep "bl sin" %t1.s | count 2
-declare double @llvm.sin.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.sin.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.sin.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.sin.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/sqrt.ll b/test/CodeGen/XCore/sqrt.ll
deleted file mode 100644
index 364d1a1..0000000
--- a/test/CodeGen/XCore/sqrt.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl sqrtf" %t1.s | count 1
-; RUN: grep "bl sqrt" %t1.s | count 2
-declare double @llvm.sqrt.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.sqrt.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.sqrt.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.sqrt.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/store.ll b/test/CodeGen/XCore/store.ll
index 2213743..836b125 100644
--- a/test/CodeGen/XCore/store.ll
+++ b/test/CodeGen/XCore/store.ll
@@ -1,13 +1,9 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: not grep add %t1.s
-; RUN: not grep ldaw %t1.s
-; RUN: not grep lda16 %t1.s
-; RUN: grep "stw" %t1.s | count 2
-; RUN: grep "st16" %t1.s | count 1
-; RUN: grep "st8" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 define void @store32(i32* %p, i32 %offset, i32 %val) nounwind {
 entry:
+; CHECK: store32:
+; CHECK: stw r2, r0[r1]
 	%0 = getelementptr i32* %p, i32 %offset
 	store i32 %val, i32* %0, align 4
 	ret void
@@ -15,6 +11,8 @@ entry:
 
 define void @store32_imm(i32* %p, i32 %val) nounwind {
 entry:
+; CHECK: store32_imm:
+; CHECK: stw r1, r0[11]
 	%0 = getelementptr i32* %p, i32 11
 	store i32 %val, i32* %0, align 4
 	ret void
@@ -22,6 +20,8 @@ entry:
 
 define void @store16(i16* %p, i32 %offset, i16 %val) nounwind {
 entry:
+; CHECK: store16:
+; CHECK: st16 r2, r0[r1]
 	%0 = getelementptr i16* %p, i32 %offset
 	store i16 %val, i16* %0, align 2
 	ret void
@@ -29,6 +29,8 @@ entry:
 
 define void @store8(i8* %p, i32 %offset, i8 %val) nounwind {
 entry:
+; CHECK: store8:
+; CHECK: st8 r2, r0[r1]
 	%0 = getelementptr i8* %p, i32 %offset
 	store i8 %val, i8* %0, align 1
 	ret void
diff --git a/test/CodeGen/XCore/trap.ll b/test/CodeGen/XCore/trap.ll
index 45f886d..eb71cb6 100644
--- a/test/CodeGen/XCore/trap.ll
+++ b/test/CodeGen/XCore/trap.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "ecallf" %t1.s | count 1
-; RUN: grep "ldc" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 define i32 @test() noreturn nounwind  {
 entry:
+; CHECK: test:
+; CHECK: ldc
+; CHECK: ecallf
 	tail call void @llvm.trap( )
 	unreachable
 }
diff --git a/test/CodeGen/XCore/unaligned_store_combine.ll b/test/CodeGen/XCore/unaligned_store_combine.ll
index 493ca6a..c997b78 100644
--- a/test/CodeGen/XCore/unaligned_store_combine.ll
+++ b/test/CodeGen/XCore/unaligned_store_combine.ll
@@ -1,11 +1,12 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl memmove" %t1.s | count 1
-; RUN: grep "ldc r., 8" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 ; Unaligned load / store pair. Should be combined into a memmove
 ; of size 8
 define void @f(i64* %dst, i64* %src) nounwind {
 entry:
+; CHECK: f:
+; CHECK: ldc r2, 8
+; CHECK: bl memmove
 	%0 = load i64* %src, align 1
 	store i64 %0, i64* %dst, align 1
 	ret void
diff --git a/test/DebugInfo/2009-01-15-dbg_declare.ll b/test/DebugInfo/2009-01-15-dbg_declare.ll
deleted file mode 100644
index ab404af..0000000
--- a/test/DebugInfo/2009-01-15-dbg_declare.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc %s -o /dev/null
-
-        %llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }*, i8*, i8* }
-@llvm.dbg.variable24 = external constant %llvm.dbg.variable.type                ; <%llvm.dbg.variable.type*> [#uses=1]
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-define i32 @isascii(i32 %_c) nounwind {
-entry:
-	%j = alloca i32
-	%0 = bitcast i32* %j to { }*
-        call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable24 to { }*))
-        unreachable
-}
-
-
diff --git a/test/DebugInfo/2010-04-25-CU-entry_pc.ll b/test/DebugInfo/2010-04-25-CU-entry_pc.ll
deleted file mode 100644
index de099b6..0000000
--- a/test/DebugInfo/2010-04-25-CU-entry_pc.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s | grep entry_pc | count 2
-@i = global i32 1                                 ; <i32*> [#uses=0]
-
-!llvm.dbg.gv = !{!0}
-
-!0 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32* @i} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 524329, metadata !"b.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"b.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
index 0e1d1fd..94bddc0 100644
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -19,7 +19,7 @@ entry:
   %0 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 2, i32 0 ; <i8*> [#uses=1]
   %1 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 9, i32 0 ; <i8*> [#uses=1]
   call void @uuid_LtoB(i8* %0, i8* %1) nounwind, !dbg !0
-  %a9 = volatile load i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
+  %a9 = load volatile i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
   %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
   %a11 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
   %a12 = load i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
@@ -29,7 +29,7 @@ entry:
   call void @llvm.dbg.value(metadata !18, i64 0, metadata !19) nounwind
   call void @llvm.dbg.declare(metadata !6, metadata !23) nounwind
   call void @llvm.dbg.value(metadata !{i64* %data_addr.i17}, i64 0, metadata !34) nounwind
-  %a13 = volatile load i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
+  %a13 = load volatile i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
   %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
   %a15 = add i64 %a10, %a14, !dbg !7              ; <i64> [#uses=1]
   %a16 = sub i64 %a15, %a14                       ; <i64> [#uses=1]
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index 001e938..2557c9c 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -2,7 +2,6 @@
 ; Check struct X for dead variable xyz from inlined function foo.
 
 ; CHECK:	DW_TAG_structure_type
-; CHECK-NEXT:	DW_AT_sibling
 ; CHECK-NEXT:	DW_AT_name
  
 
diff --git a/test/DebugInfo/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/2011-09-26-GlobalVarContext.ll
deleted file mode 100644
index 3e9fa88..0000000
--- a/test/DebugInfo/2011-09-26-GlobalVarContext.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc -asm-verbose %s -o - | FileCheck %s
-
-; ModuleID = 'test.c'
-
-@GLOBAL = common global i32 0, align 4
-
-define i32 @f() nounwind {
-  %LOCAL = alloca i32, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %LOCAL}, metadata !15), !dbg !17
-  %1 = load i32* @GLOBAL, align 4, !dbg !18
-  store i32 %1, i32* %LOCAL, align 4, !dbg !18
-  %2 = load i32* @GLOBAL, align 4, !dbg !19
-  ret i32 %2, !dbg !19
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", metadata !"clang version 3.0 (trunk)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{metadata !14}
-!14 = metadata !{i32 720948, i32 0, null, metadata !"GLOBAL", metadata !"GLOBAL", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLOBAL} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 721152, metadata !16, metadata !"LOCAL", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!16 = metadata !{i32 720907, metadata !5, i32 3, i32 9, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
-!17 = metadata !{i32 4, i32 9, metadata !16, null}
-!18 = metadata !{i32 4, i32 23, metadata !16, null}
-!19 = metadata !{i32 5, i32 5, metadata !16, null}
-
-; CHECK: .ascii	 "GLOBAL"
-; CHECK: .byte	1
-; CHECK: .byte	1
-
-; CHECK: .ascii	 "LOCAL"
-; CHECK: .byte	1
-; CHECK: .byte	4
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
new file mode 100644
index 0000000..934fa81
--- /dev/null
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -asm-verbose %s -o - | FileCheck %s
+
+; ModuleID = 'test.c'
+
+@GLB = common global i32 0, align 4
+
+define i32 @f() nounwind {
+  %LOC = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %LOC}, metadata !15), !dbg !17
+  %1 = load i32* @GLB, align 4, !dbg !18
+  store i32 %1, i32* %LOC, align 4, !dbg !18
+  %2 = load i32* @GLB, align 4, !dbg !19
+  ret i32 %2, !dbg !19
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", metadata !"clang version 3.0 (trunk)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{metadata !14}
+!14 = metadata !{i32 720948, i32 0, null, metadata !"GLB", metadata !"GLB", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLB} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 721152, metadata !16, metadata !"LOC", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!16 = metadata !{i32 720907, metadata !5, i32 3, i32 9, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 4, i32 9, metadata !16, null}
+!18 = metadata !{i32 4, i32 23, metadata !16, null}
+!19 = metadata !{i32 5, i32 5, metadata !16, null}
+
+; CHECK: .long .Lstring3
+; CHECK: .byte	1
+; CHECK: .byte	1
+
+; CHECK: .long .Lstring6
+; CHECK: .byte	1
+; CHECK: .byte	4
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
new file mode 100644
index 0000000..6e20169
--- /dev/null
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -0,0 +1,172 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.7 %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: b_ref
+; CHECK-NOT: AT_bit_size
+
+%struct.bar = type { %struct.baz, %struct.baz* }
+%struct.baz = type { i32 }
+
+define i32 @main(i32 %argc, i8** %argv) uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %myBar = alloca %struct.bar, align 8
+  store i32 0, i32* %retval
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !49), !dbg !50
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !51), !dbg !52
+  call void @llvm.dbg.declare(metadata !{%struct.bar* %myBar}, metadata !53), !dbg !55
+  call void @_ZN3barC1Ei(%struct.bar* %myBar, i32 1), !dbg !56
+  ret i32 0, !dbg !57
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr void @_ZN3barC1Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.bar*, align 8
+  %x.addr = alloca i32, align 4
+  store %struct.bar* %this, %struct.bar** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.bar** %this.addr}, metadata !58), !dbg !59
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !60), !dbg !61
+  %this1 = load %struct.bar** %this.addr
+  %0 = load i32* %x.addr, align 4, !dbg !62
+  call void @_ZN3barC2Ei(%struct.bar* %this1, i32 %0), !dbg !62
+  ret void, !dbg !62
+}
+
+define linkonce_odr void @_ZN3barC2Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.bar*, align 8
+  %x.addr = alloca i32, align 4
+  store %struct.bar* %this, %struct.bar** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.bar** %this.addr}, metadata !63), !dbg !64
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !65), !dbg !66
+  %this1 = load %struct.bar** %this.addr
+  %b = getelementptr inbounds %struct.bar* %this1, i32 0, i32 0, !dbg !67
+  %0 = load i32* %x.addr, align 4, !dbg !67
+  call void @_ZN3bazC1Ei(%struct.baz* %b, i32 %0), !dbg !67
+  %1 = getelementptr inbounds %struct.bar* %this1, i32 0, i32 1, !dbg !67
+  %b2 = getelementptr inbounds %struct.bar* %this1, i32 0, i32 0, !dbg !67
+  store %struct.baz* %b2, %struct.baz** %1, align 8, !dbg !67
+  ret void, !dbg !68
+}
+
+define linkonce_odr void @_ZN3bazC1Ei(%struct.baz* %this, i32 %a) unnamed_addr uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.baz*, align 8
+  %a.addr = alloca i32, align 4
+  store %struct.baz* %this, %struct.baz** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.baz** %this.addr}, metadata !70), !dbg !71
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !72), !dbg !73
+  %this1 = load %struct.baz** %this.addr
+  %0 = load i32* %a.addr, align 4, !dbg !74
+  call void @_ZN3bazC2Ei(%struct.baz* %this1, i32 %0), !dbg !74
+  ret void, !dbg !74
+}
+
+define linkonce_odr void @_ZN3bazC2Ei(%struct.baz* %this, i32 %a) unnamed_addr nounwind uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.baz*, align 8
+  %a.addr = alloca i32, align 4
+  store %struct.baz* %this, %struct.baz** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.baz** %this.addr}, metadata !75), !dbg !76
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !77), !dbg !78
+  %this1 = load %struct.baz** %this.addr
+  %h = getelementptr inbounds %struct.baz* %this1, i32 0, i32 0, !dbg !79
+  %0 = load i32* %a.addr, align 4, !dbg !79
+  store i32 %0, i32* %h, align 4, !dbg !79
+  ret void, !dbg !80
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", metadata !"clang version 3.1 (trunk 146596)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !9}
+!5 = metadata !{i32 720898, null, metadata !"bar", metadata !6, i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
+!6 = metadata !{i32 720937, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{metadata !8, metadata !19, metadata !21}
+!8 = metadata !{i32 720909, metadata !5, metadata !"b", metadata !6, i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!9 = metadata !{i32 720898, null, metadata !"baz", metadata !6, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ]
+!10 = metadata !{metadata !11, metadata !13}
+!11 = metadata !{i32 720909, metadata !9, metadata !"h", metadata !6, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
+!12 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 720942, i32 0, metadata !9, metadata !"baz", metadata !"baz", metadata !"", metadata !6, i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17} ; [ DW_TAG_subprogram ]
+!14 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!15 = metadata !{null, metadata !16, metadata !12}
+!16 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!19 = metadata !{i32 720909, metadata !5, metadata !"b_ref", metadata !6, i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
+!21 = metadata !{i32 720942, i32 0, metadata !5, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
+!22 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!23 = metadata !{null, metadata !24, metadata !12}
+!24 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
+!25 = metadata !{metadata !26}
+!26 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!27 = metadata !{metadata !28}
+!28 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
+!29 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ]
+!30 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !31, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!31 = metadata !{metadata !12, metadata !12, metadata !32}
+!32 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!35 = metadata !{metadata !36}
+!36 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!37 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38} ; [ DW_TAG_subprogram ]
+!38 = metadata !{metadata !39}
+!39 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!40 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41} ; [ DW_TAG_subprogram ]
+!41 = metadata !{metadata !42}
+!42 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!43 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44} ; [ DW_TAG_subprogram ]
+!44 = metadata !{metadata !45}
+!45 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!46 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47} ; [ DW_TAG_subprogram ]
+!47 = metadata !{metadata !48}
+!48 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!49 = metadata !{i32 721153, metadata !29, metadata !"argc", metadata !6, i32 16777232, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!50 = metadata !{i32 16, i32 14, metadata !29, null}
+!51 = metadata !{i32 721153, metadata !29, metadata !"argv", metadata !6, i32 33554448, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!52 = metadata !{i32 16, i32 27, metadata !29, null}
+!53 = metadata !{i32 721152, metadata !54, metadata !"myBar", metadata !6, i32 18, metadata !5, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!54 = metadata !{i32 720907, metadata !29, i32 17, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!55 = metadata !{i32 18, i32 9, metadata !54, null}
+!56 = metadata !{i32 18, i32 17, metadata !54, null}
+!57 = metadata !{i32 19, i32 5, metadata !54, null}
+!58 = metadata !{i32 721153, metadata !37, metadata !"this", metadata !6, i32 16777229, metadata !24, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!59 = metadata !{i32 13, i32 5, metadata !37, null}
+!60 = metadata !{i32 721153, metadata !37, metadata !"x", metadata !6, i32 33554445, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!61 = metadata !{i32 13, i32 13, metadata !37, null}
+!62 = metadata !{i32 13, i32 34, metadata !37, null}
+!63 = metadata !{i32 721153, metadata !40, metadata !"this", metadata !6, i32 16777229, metadata !24, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!64 = metadata !{i32 13, i32 5, metadata !40, null}
+!65 = metadata !{i32 721153, metadata !40, metadata !"x", metadata !6, i32 33554445, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!66 = metadata !{i32 13, i32 13, metadata !40, null}
+!67 = metadata !{i32 13, i32 33, metadata !40, null}
+!68 = metadata !{i32 13, i32 34, metadata !69, null}
+!69 = metadata !{i32 720907, metadata !40, i32 13, i32 33, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!70 = metadata !{i32 721153, metadata !43, metadata !"this", metadata !6, i32 16777222, metadata !16, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!71 = metadata !{i32 6, i32 5, metadata !43, null}
+!72 = metadata !{i32 721153, metadata !43, metadata !"a", metadata !6, i32 33554438, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!73 = metadata !{i32 6, i32 13, metadata !43, null}
+!74 = metadata !{i32 6, i32 24, metadata !43, null}
+!75 = metadata !{i32 721153, metadata !46, metadata !"this", metadata !6, i32 16777222, metadata !16, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!76 = metadata !{i32 6, i32 5, metadata !46, null}
+!77 = metadata !{i32 721153, metadata !46, metadata !"a", metadata !6, i32 33554438, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!78 = metadata !{i32 6, i32 13, metadata !46, null}
+!79 = metadata !{i32 6, i32 23, metadata !46, null}
+!80 = metadata !{i32 6, i32 24, metadata !81, null}
+!81 = metadata !{i32 720907, metadata !46, i32 6, i32 23, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
new file mode 100644
index 0000000..59280e0
--- /dev/null
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Checks that we don't emit a size for a pointer type.
+; CHECK: DW_TAG_pointer_type
+; CHECK-NEXT: DW_AT_type
+; CHECK-NOT-NEXT: DW_AT_byte_size
+
+%struct.A = type { i32 }
+
+define i32 @_Z3fooP1A(%struct.A* %a) nounwind uwtable ssp {
+entry:
+  %a.addr = alloca %struct.A*, align 8
+  store %struct.A* %a, %struct.A** %a.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.A** %a.addr}, metadata !16), !dbg !17
+  %0 = load %struct.A** %a.addr, align 8, !dbg !18
+  %b = getelementptr inbounds %struct.A* %0, i32 0, i32 0, !dbg !18
+  %1 = load i32* %b, align 4, !dbg !18
+  ret i32 %1, !dbg !18
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 150996)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786445, metadata !11, metadata !"b", metadata !6, i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 3, i32 13, metadata !5, null}
+!18 = metadata !{i32 4, i32 3, metadata !19, null}
+!19 = metadata !{i32 786443, metadata !5, i32 3, i32 16, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
new file mode 100644
index 0000000..078b740
--- /dev/null
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; test that the DW_AT_specification is a back edge in the file.
+
+; CHECK: 0x0000003a: DW_TAG_subprogram [5] *
+; CHECK: 0x00000060: DW_AT_specification [DW_FORM_ref4]      (cu + 0x003a => {0x0000003a})
+
+
+@_ZZN3foo3barEvE1x = constant i32 0, align 4
+
+define void @_ZN3foo3barEv()  {
+entry:
+  ret void, !dbg !25
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"<unknown>", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 720915, null, metadata !"foo", metadata !6, i32 1, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!11 = metadata !{i32 720942, i32 0, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 720898, null, metadata !"foo", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ]
+!13 = metadata !{metadata !11}
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!18 = metadata !{metadata !19}
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 720948, i32 0, metadata !5, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 5, metadata !21, i32 1, i32 1, i32* @_ZZN3foo3barEvE1x} ; [ DW_TAG_variable ]
+!21 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ]
+!22 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 6, i32 1, metadata !26, null}
+!26 = metadata !{i32 720907, metadata !5, i32 4, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
new file mode 100644
index 0000000..a0dcec3
--- /dev/null
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Check that the friend tag is there and is followed by a DW_AT_friend that has a reference back.
+
+; CHECK: 0x00000032:   DW_TAG_class_type [4]
+; CHECK: 0x00000077:   DW_TAG_class_type [4]
+; CHECK: 0x000000a0:     DW_TAG_friend [9]  
+; CHECK: 0x000000a1:       DW_AT_friend [DW_FORM_ref4]   (cu + 0x0032 => {0x00000032})
+
+
+%class.A = type { i32 }
+%class.B = type { i32 }
+
+@a = global %class.A zeroinitializer, align 4
+@b = global %class.B zeroinitializer, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !17}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ]
+!8 = metadata !{metadata !9, metadata !11}
+!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{null, metadata !14}
+!14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !7} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{metadata !16}
+!16 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !6, i32 11, metadata !18, i32 0, i32 1, %class.B* @b} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 786434, null, metadata !"B", metadata !6, i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_class_type ]
+!19 = metadata !{metadata !20, metadata !21, metadata !27}
+!20 = metadata !{i32 786445, metadata !18, metadata !"b", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
+!21 = metadata !{i32 786478, i32 0, metadata !18, metadata !"B", metadata !"B", metadata !"", metadata !6, i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
+!22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!23 = metadata !{null, metadata !24}
+!24 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !18} ; [ DW_TAG_pointer_type ]
+!25 = metadata !{metadata !26}
+!26 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!27 = metadata !{i32 786474, metadata !18, null, metadata !6, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_friend ]
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
new file mode 100644
index 0000000..4953c42
--- /dev/null
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -0,0 +1,127 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Checks that we emit debug info for the block variable declare.
+; CHECK: 0x00000030:   DW_TAG_subprogram [3]
+; CHECK: 0x0000005b:     DW_TAG_variable [5]
+; CHECK: 0x0000005c:       DW_AT_name [DW_FORM_strp]     ( .debug_str[0x000000e6] = "block")
+; CHECK: 0x00000066:       DW_AT_location [DW_FORM_data4]        (0x00000023)
+
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define hidden void @__foo_block_invoke_0(i8* %.block_descriptor) uwtable ssp {
+entry:
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  call void @llvm.dbg.value(metadata !{i8* %.block_descriptor}, i64 0, metadata !39), !dbg !51
+  %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>*, !dbg !52
+  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>* %block}, metadata !53), !dbg !54
+  %block.capture.addr = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>* %block, i32 0, i32 5, !dbg !55
+  %0 = load void ()** %block.capture.addr, align 8, !dbg !55
+  %block.literal = bitcast void ()* %0 to %struct.__block_literal_generic*, !dbg !55
+  %1 = getelementptr inbounds %struct.__block_literal_generic* %block.literal, i32 0, i32 3, !dbg !55
+  %2 = bitcast %struct.__block_literal_generic* %block.literal to i8*, !dbg !55
+  %3 = load i8** %1, !dbg !55
+  %4 = bitcast i8* %3 to void (i8*)*, !dbg !55
+  invoke void %4(i8* %2)
+          to label %invoke.cont unwind label %lpad, !dbg !55
+
+invoke.cont:                                      ; preds = %entry
+  br label %eh.cont, !dbg !58
+
+eh.cont:                                          ; preds = %catch, %invoke.cont
+  ret void, !dbg !61
+
+lpad:                                             ; preds = %entry
+  %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null, !dbg !55
+  %6 = extractvalue { i8*, i32 } %5, 0, !dbg !55
+  store i8* %6, i8** %exn.slot, !dbg !55
+  %7 = extractvalue { i8*, i32 } %5, 1, !dbg !55
+  store i32 %7, i32* %ehselector.slot, !dbg !55
+  br label %catch, !dbg !55
+
+catch:                                            ; preds = %lpad
+  %exn = load i8** %exn.slot, !dbg !62
+  %exn.adjusted = call i8* @objc_begin_catch(i8* %exn) nounwind, !dbg !62
+  call void @objc_end_catch(), !dbg !58
+  br label %eh.cont, !dbg !58
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i8* @objc_begin_catch(i8*)
+
+declare void @objc_end_catch()
+
+declare i32 @__objc_personality_v0(...)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35, !36, !37, !38}
+
+!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 151227)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 786454, null, metadata !"dispatch_block_t", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_generic", metadata !6, i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!12 = metadata !{metadata !13, metadata !15, metadata !17, metadata !18, metadata !19}
+!13 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
+!16 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
+!18 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
+!19 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 786451, metadata !6, metadata !"__block_descriptor", metadata !6, i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!22 = metadata !{metadata !23, metadata !25}
+!23 = metadata !{i32 786445, metadata !6, metadata !"reserved", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
+!24 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 786445, metadata !6, metadata !"Size", metadata !6, i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ]
+!26 = metadata !{metadata !27}
+!27 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!28 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", metadata !6, i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!30 = metadata !{null, metadata !14}
+!31 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", metadata !6, i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!32 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!33 = metadata !{null, metadata !14, metadata !14}
+!34 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", metadata !6, i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!35 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!36 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!37 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!38 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!39 = metadata !{i32 786689, metadata !28, metadata !".block_descriptor", metadata !6, i32 16777223, metadata !40, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!40 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ]
+!41 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_1", metadata !6, i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!42 = metadata !{metadata !43, metadata !44, metadata !45, metadata !46, metadata !47, metadata !50}
+!43 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
+!44 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
+!45 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 7, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
+!46 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
+!47 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ]
+!48 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ]
+!49 = metadata !{i32 786451, null, metadata !"__block_descriptor_withcopydispose", metadata !6, i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ]
+!50 = metadata !{i32 786445, metadata !6, metadata !"block", metadata !6, i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
+!51 = metadata !{i32 7, i32 18, metadata !28, null}
+!52 = metadata !{i32 7, i32 19, metadata !28, null}
+!53 = metadata !{i32 786688, metadata !28, metadata !"block", metadata !6, i32 5, metadata !9, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ]
+!54 = metadata !{i32 5, i32 27, metadata !28, null}
+!55 = metadata !{i32 8, i32 22, metadata !56, null}
+!56 = metadata !{i32 786443, metadata !57, i32 7, i32 26, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!57 = metadata !{i32 786443, metadata !28, i32 7, i32 19, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!58 = metadata !{i32 10, i32 20, metadata !59, null}
+!59 = metadata !{i32 786443, metadata !60, i32 9, i32 35, metadata !6, i32 4} ; [ DW_TAG_lexical_block ]
+!60 = metadata !{i32 786443, metadata !57, i32 9, i32 35, metadata !6, i32 3} ; [ DW_TAG_lexical_block ]
+!61 = metadata !{i32 10, i32 21, metadata !28, null}
+!62 = metadata !{i32 9, i32 20, metadata !56, null}
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
new file mode 100644
index 0000000..a227071
--- /dev/null
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -0,0 +1,96 @@
+; RUN: llc -mtriple=x86_64-linux %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; test that we add DW_AT_inline even when we only have concrete out of line
+; instances.
+
+; first check that we have a TAG_subprogram at a given offset and it has
+; AT_inline.
+
+; CHECK: 0x00000134:   DW_TAG_subprogram [18]
+; CHECK-NEXT:     DW_AT_MIPS_linkage_name
+; CHECK-NEXT:     DW_AT_specification
+; CHECK-NEXT:     DW_AT_inline
+
+
+; and then that a TAG_subprogram refers to it with AT_abstract_origin.
+
+; CHECK: 0x00000184:   DW_TAG_subprogram [20]
+; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x0134 => {0x00000134})
+
+define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
+entry:
+  store i32 1, i32* null, align 4, !dbg !50
+  tail call void @_Z8moz_freePv(i8* null) nounwind, !dbg !54
+  ret i32 0
+}
+
+define void @_ZN17nsAutoRefCntD1Ev() {
+entry:
+  tail call void @_Z8moz_freePv(i8* null) nounwind, !dbg !57
+  ret void
+}
+
+declare void @_Z8moz_freePv(i8*)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"nsAutoRefCnt.cpp", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src", metadata !"clang version 3.1 ()", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
+!5 = metadata !{i32 720942, i32 0, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"nsAutoRefCnt.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 720915, null, metadata !"nsAutoRefCnt", metadata !6, i32 10, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 720942, i32 0, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 720898, null, metadata !"nsAutoRefCnt", metadata !6, i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ]
+!14 = metadata !{metadata !12, metadata !15}
+!15 = metadata !{i32 720942, i32 0, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", metadata !6, i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{null, metadata !10}
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!20 = metadata !{metadata !21}
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 721153, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24} ; [ DW_TAG_subprogram ]
+!24 = metadata !{metadata !25}
+!25 = metadata !{metadata !26}
+!26 = metadata !{i32 721153, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!27 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28} ; [ DW_TAG_subprogram ]
+!28 = metadata !{metadata !29}
+!29 = metadata !{metadata !30}
+!30 = metadata !{i32 721153, metadata !27, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 720942, i32 0, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43} ; [ DW_TAG_subprogram ]
+!32 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!33 = metadata !{metadata !9, metadata !34, metadata !9}
+!34 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ]
+!35 = metadata !{i32 720915, null, metadata !"nsAutoRefCnt", metadata !6, i32 2, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!36 = metadata !{i32 720942, i32 0, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 720898, null, metadata !"nsAutoRefCnt", metadata !6, i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null} ; [ DW_TAG_class_type ]
+!38 = metadata !{metadata !39, metadata !40, metadata !36}
+!39 = metadata !{i32 720909, metadata !37, metadata !"mValue", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!40 = metadata !{i32 720942, i32 0, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", metadata !6, i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
+!41 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!42 = metadata !{null, metadata !34}
+!43 = metadata !{metadata !44}
+!44 = metadata !{metadata !45, metadata !46}
+!45 = metadata !{i32 721153, metadata !31, metadata !"this", metadata !6, i32 16777220, metadata !34, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!46 = metadata !{i32 721153, metadata !31, metadata !"aValue", metadata !6, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!47 = metadata !{metadata !48}
+!48 = metadata !{metadata !49}
+!49 = metadata !{i32 720948, i32 0, null, metadata !"mRefCnt", metadata !"mRefCnt", metadata !"", metadata !6, i32 9, metadata !37, i32 0, i32 1, i32* null} ; [ DW_TAG_variable ]
+!50 = metadata !{i32 5, i32 5, metadata !51, metadata !52}
+!51 = metadata !{i32 720907, metadata !31, i32 4, i32 29, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!52 = metadata !{i32 15, i32 0, metadata !53, null}
+!53 = metadata !{i32 720907, metadata !5, i32 14, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!54 = metadata !{i32 19, i32 3, metadata !55, metadata !56}
+!55 = metadata !{i32 720907, metadata !27, i32 18, i32 41, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!56 = metadata !{i32 18, i32 41, metadata !23, metadata !52}
+!57 = metadata !{i32 19, i32 3, metadata !55, metadata !58}
+!58 = metadata !{i32 18, i32 41, metadata !23, null}
diff --git a/test/DebugInfo/X86/dg.exp b/test/DebugInfo/X86/dg.exp
deleted file mode 100644
index 7b7bd4e..0000000
--- a/test/DebugInfo/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
new file mode 100644
index 0000000..6935c47
--- /dev/null
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Check that the line table starts at 7, not 4, but that the first
+; statement isn't until line 8.
+
+; CHECK-NOT: 0x0000000000000000      7      0      1   0  is_stmt
+; CHECK: 0x0000000000000000      7      0      1   0
+; CHECK: 0x0000000000000004      8     18      1   0  is_stmt prologue_end
+
+define i32 @callee(i32 %x) nounwind uwtable ssp {
+entry:
+  %x.addr = alloca i32, align 4
+  %y = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !12), !dbg !13
+  call void @llvm.dbg.declare(metadata !{i32* %y}, metadata !14), !dbg !16
+  %0 = load i32* %x.addr, align 4, !dbg !17
+  %1 = load i32* %x.addr, align 4, !dbg !17
+  %mul = mul nsw i32 %0, %1, !dbg !17
+  store i32 %mul, i32* %y, align 4, !dbg !17
+  %2 = load i32* %y, align 4, !dbg !18
+  %sub = sub nsw i32 %2, 2, !dbg !18
+  ret i32 %sub, !dbg !18
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"ending-run.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"ending-run.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 5, i32 5, metadata !5, null}
+!14 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !6, i32 8, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786443, metadata !5, i32 7, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 8, i32 9, metadata !15, null}
+!17 = metadata !{i32 8, i32 18, metadata !15, null}
+!18 = metadata !{i32 9, i32 5, metadata !15, null}
diff --git a/test/DebugInfo/X86/lit.local.cfg b/test/DebugInfo/X86/lit.local.cfg
new file mode 100644
index 0000000..0d694da
--- /dev/null
+++ b/test/DebugInfo/X86/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
new file mode 100644
index 0000000..f9d9b91
--- /dev/null
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Check that we use DW_AT_low_pc
+
+; CHECK: DW_TAG_compile_unit [1]
+; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+; CHECK: DW_TAG_subprogram [2]
+
+define i32 @_Z1qv() nounwind uwtable readnone ssp {
+entry:
+  ret i32 undef, !dbg !13
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !12}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"q", metadata !"q", metadata !"_Z1qv", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786478, i32 0, metadata !6, metadata !"t", metadata !"t", metadata !"", metadata !6, i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 7, i32 1, metadata !14, null}
+!14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
new file mode 100644
index 0000000..1a815f9
--- /dev/null
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: 0x00000027:   DW_TAG_structure_type
+; CHECK: 0x0000002c:     DW_AT_declaration
+; CHECK: 0x0000002d:     DW_AT_APPLE_runtime_class
+
+%0 = type opaque
+
+@a = common global %0* null, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10, !11, !12}
+
+!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, null, metadata !"FooBarBaz", metadata !6, i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!10 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!11 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!12 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
new file mode 100644
index 0000000..f11fbe4
--- /dev/null
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.7 %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: ptr
+; CHECK-NOT: AT_bit_size
+
+%struct.crass = type { i8* }
+
+@crass = common global %struct.crass zeroinitializer, align 8
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"foo.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 147882)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 720937, metadata !"foo.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720915, null, metadata !"crass", metadata !6, i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 720909, metadata !7, metadata !"ptr", metadata !6, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
+!11 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
new file mode 100644
index 0000000..5a001ee
--- /dev/null
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -0,0 +1,65 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; test that the DW_AT_specification is a back edge in the file.
+
+; CHECK: 0x0000005c:     DW_TAG_subprogram [5]
+; CHECK: 0x0000007c:     DW_AT_specification [DW_FORM_ref4]      (cu + 0x005c => {0x0000005c})
+
+%struct.foo = type { i8 }
+
+define void @_Z3zedP3foo(%struct.foo* %x) uwtable {
+entry:
+  %x.addr = alloca %struct.foo*, align 8
+  store %struct.foo* %x, %struct.foo** %x.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.foo** %x.addr}, metadata !23), !dbg !24
+  %0 = load %struct.foo** %x.addr, align 8, !dbg !25
+  call void @_ZN3foo3barEv(%struct.foo* %0), !dbg !25
+  ret void, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr void @_ZN3foo3barEv(%struct.foo* %this) nounwind uwtable align 2 {
+entry:
+  %this.addr = alloca %struct.foo*, align 8
+  store %struct.foo* %this, %struct.foo** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.foo** %this.addr}, metadata !28), !dbg !29
+  %this1 = load %struct.foo** %this.addr
+  ret void, !dbg !30
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !20}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 720898, null, metadata !"foo", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 720942, i32 0, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{null, metadata !15}
+!15 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!20 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21} ; [ DW_TAG_subprogram ]
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 721153, metadata !5, metadata !"x", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 4, i32 15, metadata !5, null}
+!25 = metadata !{i32 4, i32 20, metadata !26, null}
+!26 = metadata !{i32 720907, metadata !5, i32 4, i32 18, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 4, i32 30, metadata !26, null}
+!28 = metadata !{i32 721153, metadata !20, metadata !"this", metadata !6, i32 16777218, metadata !15, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!29 = metadata !{i32 2, i32 8, metadata !20, null}
+!30 = metadata !{i32 2, i32 15, metadata !31, null}
+!31 = metadata !{i32 720907, metadata !20, i32 2, i32 14, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
new file mode 100644
index 0000000..2cd1001
--- /dev/null
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LINUX
+; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s --check-prefix=DARWIN
+
+@yyyy = common global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"z.c", metadata !"/home/nicholas", metadata !"clang version 3.1 (trunk 143009)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720948, i32 0, null, metadata !"yyyy", metadata !"yyyy", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @yyyy} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 720937, metadata !"z.c", metadata !"/home/nicholas", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+
+; Verify that we refer to 'yyyy' with a relocation.
+; LINUX:      .long   .Lstring3               # DW_AT_name
+; LINUX-NEXT: .long   39                      # DW_AT_type
+; LINUX-NEXT: .byte   1                       # DW_AT_external
+; LINUX-NEXT: .byte   1                       # DW_AT_decl_file
+; LINUX-NEXT: .byte   1                       # DW_AT_decl_line
+; LINUX-NEXT: .byte   9                       # DW_AT_location
+; LINUX-NEXT: .byte   3
+; LINUX-NEXT: .quad   yyyy
+
+; Verify that we refer to 'yyyy' without a relocation.
+; DARWIN: Lset5 = Lstring3-Lsection_str               ## DW_AT_name
+; DARWIN-NEXT:        .long   Lset5
+; DARWIN-NEXT:        .long   39                      ## DW_AT_type
+; DARWIN-NEXT:        .byte   1                       ## DW_AT_external
+; DARWIN-NEXT:        .byte   1                       ## DW_AT_decl_file
+; DARWIN-NEXT:        .byte   1                       ## DW_AT_decl_line
+; DARWIN-NEXT:        .byte   9                       ## DW_AT_location
+; DARWIN-NEXT:        .byte   3
+; DARWIN-NEXT:        .quad   _yyyy
+
+; Verify that "yyyy" ended up in the stringpool.
+; LINUX: .section .debug_str,"MS",@progbits,1
+; LINUX-NOT: .section
+; LINUX: yyyy
+; DARWIN: .section __DWARF,__debug_str,regular,debug
+; DARWIN-NOT: .section
+; DARWIN: yyyy
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
new file mode 100644
index 0000000..9a047388
--- /dev/null
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Make sure that structures have a decl file and decl line attached.
+; CHECK: DW_TAG_structure_type [3]
+; CHECK: DW_AT_decl_file
+; CHECK: DW_AT_decl_line
+; CHECK: DW_TAG_member
+
+%struct.foo = type { i32 }
+
+@f = common global %struct.foo zeroinitializer, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/bug_null_debuginfo.ll b/test/DebugInfo/bug_null_debuginfo.ll
new file mode 100644
index 0000000..a7fdf70
--- /dev/null
+++ b/test/DebugInfo/bug_null_debuginfo.ll
@@ -0,0 +1,6 @@
+; RUN: llc
+
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{null, null, null}
diff --git a/test/DebugInfo/dg.exp b/test/DebugInfo/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/DebugInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/DebugInfo/lit.local.cfg b/test/DebugInfo/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/DebugInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/ExecutionEngine/2002-12-16-ArgTest.ll b/test/ExecutionEngine/2002-12-16-ArgTest.ll
index eba58cc..eb2fe8c 100644
--- a/test/ExecutionEngine/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/2002-12-16-ArgTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 @.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
index 577226b..3182193 100644
--- a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
+++ b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define i32 @foo(i32 %X, i32 %Y, double %A) {
diff --git a/test/ExecutionEngine/2003-01-04-LoopTest.ll b/test/ExecutionEngine/2003-01-04-LoopTest.ll
index 61b0a1b..3e27e06 100644
--- a/test/ExecutionEngine/2003-01-04-LoopTest.ll
+++ b/test/ExecutionEngine/2003-01-04-LoopTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2003-01-04-PhiTest.ll b/test/ExecutionEngine/2003-01-04-PhiTest.ll
index 2bc70d7..48576e7 100644
--- a/test/ExecutionEngine/2003-01-04-PhiTest.ll
+++ b/test/ExecutionEngine/2003-01-04-PhiTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/2003-01-09-SARTest.ll b/test/ExecutionEngine/2003-01-09-SARTest.ll
index 560cd3e..ed58e11 100644
--- a/test/ExecutionEngine/2003-01-09-SARTest.ll
+++ b/test/ExecutionEngine/2003-01-09-SARTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; We were accidentally inverting the signedness of right shifts.  Whoops.
 
diff --git a/test/ExecutionEngine/2003-01-10-FUCOM.ll b/test/ExecutionEngine/2003-01-10-FUCOM.ll
index 8512f63..4960e59 100644
--- a/test/ExecutionEngine/2003-01-10-FUCOM.ll
+++ b/test/ExecutionEngine/2003-01-10-FUCOM.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
index df15037..80e19ba 100644
--- a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
+++ b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define i32 @bar(i8* %X) {
diff --git a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
index 26429a0..6f61aa6 100644
--- a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
+++ b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
@@ -1,6 +1,6 @@
 ; This testcase should return with an exit code of 1.
 ;
-; RUN: not lli %s
+; RUN: not %lli %s
 ; XFAIL: arm
 
 @test = global i64 0		; <i64*> [#uses=1]
diff --git a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
index 566f3ae3..236be18 100644
--- a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
+++ b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s test
+; RUN: %lli %s test
 ; XFAIL: arm
 
 declare i32 @puts(i8*)
diff --git a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
index bcdb114..45279ad 100644
--- a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
+++ b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
index 37dae86..4342aa4 100644
--- a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
+++ b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/2003-06-05-PHIBug.ll b/test/ExecutionEngine/2003-06-05-PHIBug.ll
index f7bd8b7..03b66c4 100644
--- a/test/ExecutionEngine/2003-06-05-PHIBug.ll
+++ b/test/ExecutionEngine/2003-06-05-PHIBug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
index 6c2f340..22dd4cc 100644
--- a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
+++ b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 ; This testcase failed to work because two variable sized allocas confused the
diff --git a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
index 29cbaac..60dc3d6 100644
--- a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
+++ b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 ;
diff --git a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
index 6711d4d..04a5e17 100644
--- a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
+++ b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 ; This testcase exposes a bug in the local register allocator where it runs out
diff --git a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
index fe18211..6e48c60 100644
--- a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
+++ b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 @A = global i32 0		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/2005-12-02-TailCallBug.ll
index 874ce39..8523b5e 100644
--- a/test/ExecutionEngine/2005-12-02-TailCallBug.ll
+++ b/test/ExecutionEngine/2005-12-02-TailCallBug.ll
@@ -1,5 +1,5 @@
 ; PR672
-; RUN: lli %s
+; RUN: %lli %s
 ; XFAIL: arm
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll b/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
index c0dc4cf..4183611 100644
--- a/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
+++ b/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter %s
+; RUN: %lli -force-interpreter %s
 ; PR1836
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
index 07cc659..0ab0274 100644
--- a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter=true %s | grep 1
+; RUN: %lli -force-interpreter=true %s | grep 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/ExecutionEngine/2010-01-15-UndefValue.ll b/test/ExecutionEngine/2010-01-15-UndefValue.ll
index 6e7a392..01cb21f 100644
--- a/test/ExecutionEngine/2010-01-15-UndefValue.ll
+++ b/test/ExecutionEngine/2010-01-15-UndefValue.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter=true %s
+; RUN: %lli -force-interpreter=true %s
 
 define i32 @main() {
        %a = add i32 0, undef
diff --git a/test/ExecutionEngine/dg.exp b/test/ExecutionEngine/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/ExecutionEngine/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/ExecutionEngine/fpbitcast.ll b/test/ExecutionEngine/fpbitcast.ll
index 47cbb02..fa84be4 100644
--- a/test/ExecutionEngine/fpbitcast.ll
+++ b/test/ExecutionEngine/fpbitcast.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter=true %s | grep 40091eb8
+; RUN: %lli -force-interpreter=true %s | grep 40091eb8
 ;
 define i32 @test(double %x) {
 entry:
diff --git a/test/ExecutionEngine/hello.ll b/test/ExecutionEngine/hello.ll
index 92c26a6..f2c4a7f 100644
--- a/test/ExecutionEngine/hello.ll
+++ b/test/ExecutionEngine/hello.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 @.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/hello2.ll b/test/ExecutionEngine/hello2.ll
index 10557ab..155ed41 100644
--- a/test/ExecutionEngine/hello2.ll
+++ b/test/ExecutionEngine/hello2.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 @X = global i32 7		; <i32*> [#uses=0]
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/ExecutionEngine/simplesttest.ll b/test/ExecutionEngine/simplesttest.ll
index ad38485..85c1715 100644
--- a/test/ExecutionEngine/simplesttest.ll
+++ b/test/ExecutionEngine/simplesttest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	ret i32 0
diff --git a/test/ExecutionEngine/simpletest.ll b/test/ExecutionEngine/simpletest.ll
index 797b359..83f9b84 100644
--- a/test/ExecutionEngine/simpletest.ll
+++ b/test/ExecutionEngine/simpletest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define i32 @bar() {
diff --git a/test/ExecutionEngine/stubs.ll b/test/ExecutionEngine/stubs.ll
index 2039ab5..b40e4be 100644
--- a/test/ExecutionEngine/stubs.ll
+++ b/test/ExecutionEngine/stubs.ll
@@ -1,4 +1,4 @@
-; RUN: lli -disable-lazy-compilation=false %s
+; RUN: %lli -disable-lazy-compilation=false %s
 ; XFAIL: arm
 
 define i32 @main() nounwind {
diff --git a/test/ExecutionEngine/test-arith.ll b/test/ExecutionEngine/test-arith.ll
index 354ecd2..79f989f 100644
--- a/test/ExecutionEngine/test-arith.ll
+++ b/test/ExecutionEngine/test-arith.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%A = add i8 0, 12		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/test-branch.ll b/test/ExecutionEngine/test-branch.ll
index 7d4fd56..3ae55d0 100644
--- a/test/ExecutionEngine/test-branch.ll
+++ b/test/ExecutionEngine/test-branch.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test unconditional branch
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-call-no-external-funcs.ll b/test/ExecutionEngine/test-call-no-external-funcs.ll
new file mode 100644
index 0000000..b2dd532
--- /dev/null
+++ b/test/ExecutionEngine/test-call-no-external-funcs.ll
@@ -0,0 +1,15 @@
+; RUN: %lli %s > /dev/null
+; XFAIL: arm
+
+define i32 @_Z14func_exit_codev() nounwind uwtable {
+entry:
+  ret i32 0
+}
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @_Z14func_exit_codev()
+  ret i32 %call
+}
diff --git a/test/ExecutionEngine/test-call.ll b/test/ExecutionEngine/test-call.ll
index c4131a2..3fd39fe 100644
--- a/test/ExecutionEngine/test-call.ll
+++ b/test/ExecutionEngine/test-call.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 declare void @exit(i32)
diff --git a/test/ExecutionEngine/test-cast.ll b/test/ExecutionEngine/test-cast.ll
index f41448c..667fa80 100644
--- a/test/ExecutionEngine/test-cast.ll
+++ b/test/ExecutionEngine/test-cast.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @foo() {
 	ret i32 0
diff --git a/test/ExecutionEngine/test-common-symbols.ll b/test/ExecutionEngine/test-common-symbols.ll
new file mode 100644
index 0000000..4dd9265
--- /dev/null
+++ b/test/ExecutionEngine/test-common-symbols.ll
@@ -0,0 +1,89 @@
+; RUN: %lli -O0 -disable-lazy-compilation=false %s
+; XFAIL: arm
+
+; The intention of this test is to verify that symbols mapped to COMMON in ELF
+; work as expected.
+;
+; Compiled from this C code:
+;
+; int zero_int;
+; double zero_double;
+; int zero_arr[10];
+; 
+; int main()
+; {
+;     zero_arr[zero_int + 5] = 40;
+; 
+;     if (zero_double < 1.0)
+;         zero_arr[zero_int + 2] = 70;
+; 
+;     for (int i = 1; i < 10; ++i) {
+;         zero_arr[i] = zero_arr[i - 1] + zero_arr[i];
+;     }
+;     return zero_arr[9] == 110 ? 0 : -1;
+; }
+
+@zero_int = common global i32 0, align 4
+@zero_arr = common global [10 x i32] zeroinitializer, align 16
+@zero_double = common global double 0.000000e+00, align 8
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @zero_int, align 4
+  %add = add nsw i32 %0, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom
+  store i32 40, i32* %arrayidx, align 4
+  %1 = load double* @zero_double, align 8
+  %cmp = fcmp olt double %1, 1.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* @zero_int, align 4
+  %add1 = add nsw i32 %2, 2
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
+  store i32 70, i32* %arrayidx3, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store i32 1, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %if.end
+  %3 = load i32* %i, align 4
+  %cmp4 = icmp slt i32 %3, 10
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 %4, 1
+  %idxprom5 = sext i32 %sub to i64
+  %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
+  %5 = load i32* %arrayidx6, align 4
+  %6 = load i32* %i, align 4
+  %idxprom7 = sext i32 %6 to i64
+  %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
+  %7 = load i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %5, %7
+  %8 = load i32* %i, align 4
+  %idxprom10 = sext i32 %8 to i64
+  %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
+  store i32 %add9, i32* %arrayidx11, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %9 = load i32* %i, align 4
+  %inc = add nsw i32 %9, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %cmp12 = icmp eq i32 %10, 110
+  %cond = select i1 %cmp12, i32 0, i32 -1
+  ret i32 %cond
+}
diff --git a/test/ExecutionEngine/test-constantexpr.ll b/test/ExecutionEngine/test-constantexpr.ll
index d6d90e3..d01479a 100644
--- a/test/ExecutionEngine/test-constantexpr.ll
+++ b/test/ExecutionEngine/test-constantexpr.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; This tests to make sure that we can evaluate weird constant expressions
 
diff --git a/test/ExecutionEngine/test-fp-no-external-funcs.ll b/test/ExecutionEngine/test-fp-no-external-funcs.ll
new file mode 100644
index 0000000..61b12c2
--- /dev/null
+++ b/test/ExecutionEngine/test-fp-no-external-funcs.ll
@@ -0,0 +1,21 @@
+; RUN: %lli  %s > /dev/null
+
+define double @test(double* %DP, double %Arg) {
+	%D = load double* %DP		; <double> [#uses=1]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
+	%Y = fdiv double %X, %X		; <double> [#uses=2]
+	%Q = fadd double %Y, %Arg		; <double> [#uses=1]
+	%R = bitcast double %Q to double		; <double> [#uses=1]
+	store double %Q, double* %DP
+	ret double %Y
+}
+
+define i32 @main() {
+	%X = alloca double		; <double*> [#uses=2]
+	store double 0.000000e+00, double* %X
+	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/test-fp.ll b/test/ExecutionEngine/test-fp.ll
index f653660..2bf0210 100644
--- a/test/ExecutionEngine/test-fp.ll
+++ b/test/ExecutionEngine/test-fp.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/test-global-init-nonzero.ll b/test/ExecutionEngine/test-global-init-nonzero.ll
new file mode 100644
index 0000000..ef2d37b
--- /dev/null
+++ b/test/ExecutionEngine/test-global-init-nonzero.ll
@@ -0,0 +1,35 @@
+; RUN: %lli  %s > /dev/null
+; XFAIL: arm
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/test-global.ll b/test/ExecutionEngine/test-global.ll
new file mode 100644
index 0000000..2ea50de
--- /dev/null
+++ b/test/ExecutionEngine/test-global.ll
@@ -0,0 +1,35 @@
+; RUN: %lli %s > /dev/null
+; XFAIL: arm
+
+@count = global i32 0, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/test-loadstore.ll b/test/ExecutionEngine/test-loadstore.ll
index 7eb57cb..7574314 100644
--- a/test/ExecutionEngine/test-loadstore.ll
+++ b/test/ExecutionEngine/test-loadstore.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
diff --git a/test/ExecutionEngine/test-local.ll b/test/ExecutionEngine/test-local.ll
new file mode 100644
index 0000000..240b174
--- /dev/null
+++ b/test/ExecutionEngine/test-local.ll
@@ -0,0 +1,35 @@
+; RUN: %lli %s > /dev/null
+; XFAIL: arm
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %count = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %count, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* %count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/test-logical.ll b/test/ExecutionEngine/test-logical.ll
index 710763a..05b381b 100644
--- a/test/ExecutionEngine/test-logical.ll
+++ b/test/ExecutionEngine/test-logical.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%A = and i8 4, 8		; <i8> [#uses=2]
diff --git a/test/ExecutionEngine/test-loop.ll b/test/ExecutionEngine/test-loop.ll
index f0e6f7a..e951a14 100644
--- a/test/ExecutionEngine/test-loop.ll
+++ b/test/ExecutionEngine/test-loop.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/test-phi.ll b/test/ExecutionEngine/test-phi.ll
index c5848a8..c5bdfd5 100644
--- a/test/ExecutionEngine/test-phi.ll
+++ b/test/ExecutionEngine/test-phi.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test phi node
 @Y = global i32 6		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/test-ret.ll b/test/ExecutionEngine/test-ret.ll
index beec399..025f53e 100644
--- a/test/ExecutionEngine/test-ret.ll
+++ b/test/ExecutionEngine/test-ret.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test return instructions
 define void @test1() {
diff --git a/test/ExecutionEngine/test-return.ll b/test/ExecutionEngine/test-return.ll
new file mode 100644
index 0000000..d464a4b
--- /dev/null
+++ b/test/ExecutionEngine/test-return.ll
@@ -0,0 +1,8 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/test-setcond-fp.ll b/test/ExecutionEngine/test-setcond-fp.ll
index d1d6d05..68276e6 100644
--- a/test/ExecutionEngine/test-setcond-fp.ll
+++ b/test/ExecutionEngine/test-setcond-fp.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-setcond-int.ll b/test/ExecutionEngine/test-setcond-int.ll
index f59d325..48dc021 100644
--- a/test/ExecutionEngine/test-setcond-int.ll
+++ b/test/ExecutionEngine/test-setcond-int.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%int1 = add i32 0, 0		; <i32> [#uses=6]
diff --git a/test/ExecutionEngine/test-shift.ll b/test/ExecutionEngine/test-shift.ll
index d0fb90a..590e262 100644
--- a/test/ExecutionEngine/test-shift.ll
+++ b/test/ExecutionEngine/test-shift.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%shamt = add i8 0, 1		; <i8> [#uses=8]
diff --git a/test/Feature/const_pv.ll b/test/Feature/const_pv.ll
new file mode 100644
index 0000000..6fd6abd
--- /dev/null
+++ b/test/Feature/const_pv.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as %s -disable-output
+@G = constant <3 x i64> ptrtoint (<3 x i8*> <i8* null, i8* null, i8* null> to <3 x i64>)
+
+@G1 = global i8 zeroinitializer
+@g = constant <2 x i8*> getelementptr (<2 x i8*> <i8* @G1, i8* @G1>, <2 x i32> <i32 0, i32 0>)
+
+@t = constant <2 x i1> icmp ((<2 x i32> ptrtoint (<2 x i8*> zeroinitializer to <2 x i32>), <2 x i32> zeroinitializer )
+
diff --git a/test/Feature/dg.exp b/test/Feature/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Feature/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Feature/float.ll b/test/Feature/float.ll
index 6c6c5dd..b875afe 100644
--- a/test/Feature/float.ll
+++ b/test/Feature/float.ll
@@ -2,5 +2,6 @@
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 
+@H1     = global half 0x4010000000000000
 @F1     = global float 0x4010000000000000
 @D1     = global double 0x4010000000000000
diff --git a/test/Feature/global_pv.ll b/test/Feature/global_pv.ll
new file mode 100644
index 0000000..d257ec0
--- /dev/null
+++ b/test/Feature/global_pv.ll
@@ -0,0 +1,14 @@
+; RUN: opt -instcombine -S -o - %s | llvm-as
+; RUN: opt -instcombine -globalopt -S -o - %s | llvm-as
+@G1 = global i32 zeroinitializer
+@G2 = global i32 zeroinitializer
+@g = global <2 x i32*> zeroinitializer
+%0 = type { i32, void ()* }
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @test }]
+define internal void @test() {
+  %A = insertelement <2 x i32*> undef, i32* @G1, i32 0
+  %B = insertelement <2 x i32*> %A,  i32* @G2, i32 1
+  store <2 x i32*> %B, <2 x i32*>* @g
+  ret void
+}
+
diff --git a/test/Feature/intrinsics.ll b/test/Feature/intrinsics.ll
index 2dd6b53..c4e3db6 100644
--- a/test/Feature/intrinsics.ll
+++ b/test/Feature/intrinsics.ll
@@ -6,7 +6,6 @@ declare i1 @llvm.isunordered.f32(float, float)
 
 declare i1 @llvm.isunordered.f64(double, double)
 
-declare void @llvm.prefetch(i8*, i32, i32)
 
 declare i8 @llvm.ctpop.i8(i8)
 
@@ -16,21 +15,21 @@ declare i32 @llvm.ctpop.i32(i32)
 
 declare i64 @llvm.ctpop.i64(i64)
 
-declare i8 @llvm.cttz.i8(i8)
+declare i8 @llvm.cttz.i8(i8, i1)
 
-declare i16 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16, i1)
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
-declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.cttz.i64(i64, i1)
 
-declare i8 @llvm.ctlz.i8(i8)
+declare i8 @llvm.ctlz.i8(i8, i1)
 
-declare i16 @llvm.ctlz.i16(i16)
+declare i16 @llvm.ctlz.i16(i16, i1)
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
 
 declare float @llvm.sqrt.f32(float)
 
@@ -41,21 +40,20 @@ declare double @llvm.sqrt.f64(double)
 define void @libm() {
         fcmp uno float 1.000000e+00, 2.000000e+00               ; <i1>:1 [#uses=0]
         fcmp uno double 3.000000e+00, 4.000000e+00              ; <i1>:2 [#uses=0]
-        call void @llvm.prefetch( i8* null, i32 1, i32 3 )
         call float @llvm.sqrt.f32( float 5.000000e+00 )         ; <float>:3 [#uses=0]
         call double @llvm.sqrt.f64( double 6.000000e+00 )               ; <double>:4 [#uses=0]
         call i8  @llvm.ctpop.i8( i8 10 )                ; <i32>:5 [#uses=0]
         call i16 @llvm.ctpop.i16( i16 11 )              ; <i32>:6 [#uses=0]
         call i32 @llvm.ctpop.i32( i32 12 )              ; <i32>:7 [#uses=0]
         call i64 @llvm.ctpop.i64( i64 13 )              ; <i32>:8 [#uses=0]
-        call i8  @llvm.ctlz.i8( i8 14 )         ; <i32>:9 [#uses=0]
-        call i16 @llvm.ctlz.i16( i16 15 )               ; <i32>:10 [#uses=0]
-        call i32 @llvm.ctlz.i32( i32 16 )               ; <i32>:11 [#uses=0]
-        call i64 @llvm.ctlz.i64( i64 17 )               ; <i32>:12 [#uses=0]
-        call i8  @llvm.cttz.i8( i8 18 )         ; <i32>:13 [#uses=0]
-        call i16 @llvm.cttz.i16( i16 19 )               ; <i32>:14 [#uses=0]
-        call i32 @llvm.cttz.i32( i32 20 )               ; <i32>:15 [#uses=0]
-        call i64 @llvm.cttz.i64( i64 21 )               ; <i32>:16 [#uses=0]
+        call i8  @llvm.ctlz.i8( i8 14, i1 true )         ; <i32>:9 [#uses=0]
+        call i16 @llvm.ctlz.i16( i16 15, i1 true )               ; <i32>:10 [#uses=0]
+        call i32 @llvm.ctlz.i32( i32 16, i1 true )               ; <i32>:11 [#uses=0]
+        call i64 @llvm.ctlz.i64( i64 17, i1 true )               ; <i32>:12 [#uses=0]
+        call i8  @llvm.cttz.i8( i8 18, i1 true )         ; <i32>:13 [#uses=0]
+        call i16 @llvm.cttz.i16( i16 19, i1 true )               ; <i32>:14 [#uses=0]
+        call i32 @llvm.cttz.i32( i32 20, i1 true )               ; <i32>:15 [#uses=0]
+        call i64 @llvm.cttz.i64( i64 21, i1 true )               ; <i32>:16 [#uses=0]
         ret void
 }
 
diff --git a/test/Feature/lit.local.cfg b/test/Feature/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Feature/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Feature/llvm2cpp.exp b/test/Feature/llvm2cpp.exp
deleted file mode 100644
index de0126c..0000000
--- a/test/Feature/llvm2cpp.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm2cpp.exp
-
-llvm2cpp-test [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
new file mode 100644
index 0000000..35c5c4a
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -asan -S | llc -o /dev/null
+; The bug manifests as a reg alloc failure:
+; error: ran out of registers during register allocation
+; ModuleID = 'z.o'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+%struct.DSPContext = type { void (i16*, i8*, i32)*, void (i16*, i8*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, i32 (i16*)*, void (i8*, i8*, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, void (i16*)*, void (i16*)*, i32 (i8*, i32)*, i32 (i8*, i32)*, [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], i32 (i8*, i16*, i32)*, [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [2 x void (i8*, i8*, i8*, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [8 x void (i8*, i8*, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [10 x void (i8*, i32, i32, i32, i32)*], [10 x void (i8*, i8*, i32, i32, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, [2 x [4 x i32 (i8*, i8*, i8*, i32, i32)*]], void (i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, i32 (i8*, i8*, i32, i32)*, void (i8*, i8*, i32, i32*, i32*, i32*)*, void (i8*, i8*, i8*, i32, i32)*, void (i32*, i32*, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void ([4 x [4 x i16]]*, i8*, [40 x i8]*, [40 x [2 x i16]]*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32*)*, void (i8*, i32, i32*)*, void (i8*, i8*, i32, i16*, i16*)*, void (float*, float*, i32)*, void ([256 x float]*, [2 x float]*, i32, i32, i32)*, void (i32*, i32, i32, double*)*, void (float*, float*, i32)*, void (float*, float*, float*, i32)*, void (float*, float*, float*, float*, i32)*, void (float*, float*, float*, float*, float, i32)*, void (float*, i32*, float, i32)*, void (float*, float*, float, float, i32)*, void (float*, float*, float, i32)*, [2 x void (float*, float*, float**, float, i32)*], [2 x void (float*, float**, float, i32)*], float (float*, float*, i32)*, void (float*, float*, i32)*, void (i16*, float*, i32)*, void (i16*, float**, i32, i32)*, void (i16*)*, void (i16*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, [64 x i8], i32, i32 (i16*, i16*, i16*, i32)*, void (i16*, i16*, i32)*, void (i8*, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void ([4 x i16]*)*, void (i8*, i32*, i16*, i32, i8*)*, void (i8*, i32*, i16*, i32, i8*)*, void (i8**, i32*, i16*, i32, i8*)*, void (i8*, i32*, i16*, i32, i8*)*, void (i16*, i16*, i16*, i16*, i16*, i16*, i32)*, void (i16*, i32)*, void (i8*, i32, i8**, i32, i32, i32, i32, i32, %struct.slice_buffer_s*, i32, i8*)*, void (i8*, i32, i32)*, [4 x void (i8*, i32, i8*, i32, i32, i32)*], void (i32*, i32*, i32, i32, i32, i32, i32, i32*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32)*, void (i8*, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, [16 x void (i8*, i8*, i32, i32)*], [16 x void (i8*, i8*, i32, i32)*], [12 x void (i8*, i8*, i32)*], void (i8*, i8*, i32, i32*, i32*, i32)*, void (i16*, i16*, i32)*, void (i16*, i16*, i32)*, i32 (i16*, i16*, i32, i32)*, [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*] }
+%struct.slice_buffer_s = type opaque
+%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8*, i32*, i32, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, float, i64, i32, i64, i64, float, float, %struct.AVHWAccel*, i32, i8*, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*, i32, i32)*, i8*, i32*, i32)*, i32, i32, i32, i32, i32, i32, i8*, float, float, float, float, i32, i32, i32, float, float, float, i32, i32, i32, i32, [4 x i32], i8*, i32, i32, i32, i32 }
+%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+%struct.AVOption = type opaque
+%struct.AVRational = type { i32, i32 }
+%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*], i64, i8* }
+%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, %struct.AVPacket*)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32*, i8*, i32*, i32*, i64* }
+%struct.AVPacket = type { i64, i64, i8*, i32, i32, i32, i32, void (%struct.AVPacket*)*, i8*, i64, i64 }
+%struct.RcOverride = type { i32, i32, i32, float }
+%struct.AVPaletteControl = type { i32, [256 x i32] }
+%struct.AVHWAccel = type { i8*, i32, i32, i32, i32, %struct.AVHWAccel*, i32 (%struct.AVCodecContext*, i8*, i32)*, i32 (%struct.AVCodecContext*, i8*, i32)*, i32 (%struct.AVCodecContext*)*, i32 }
+
+@firtable = internal unnamed_addr constant [9 x i8*] [i8* @ff_mlp_firorder_0, i8* @ff_mlp_firorder_1, i8* @ff_mlp_firorder_2, i8* @ff_mlp_firorder_3, i8* @ff_mlp_firorder_4, i8* @ff_mlp_firorder_5, i8* @ff_mlp_firorder_6, i8* @ff_mlp_firorder_7, i8* @ff_mlp_firorder_8], align 4
+@iirtable = internal unnamed_addr constant [5 x i8*] [i8* @ff_mlp_iirorder_0, i8* @ff_mlp_iirorder_1, i8* @ff_mlp_iirorder_2, i8* @ff_mlp_iirorder_3, i8* @ff_mlp_iirorder_4], align 4
+@ff_mlp_iirorder_0 = external global i8
+@ff_mlp_iirorder_1 = external global i8
+@ff_mlp_iirorder_2 = external global i8
+@ff_mlp_iirorder_3 = external global i8
+@ff_mlp_iirorder_4 = external global i8
+@ff_mlp_firorder_0 = external global i8
+@ff_mlp_firorder_1 = external global i8
+@ff_mlp_firorder_2 = external global i8
+@ff_mlp_firorder_3 = external global i8
+@ff_mlp_firorder_4 = external global i8
+@ff_mlp_firorder_5 = external global i8
+@ff_mlp_firorder_6 = external global i8
+@ff_mlp_firorder_7 = external global i8
+@ff_mlp_firorder_8 = external global i8
+
+define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind address_safety {
+entry:
+  %mlp_filter_channel = getelementptr inbounds %struct.DSPContext* %c, i32 0, i32 131
+  store void (i32*, i32*, i32, i32, i32, i32, i32, i32*)* @mlp_filter_channel_x86, void (i32*, i32*, i32, i32, i32, i32, i32, i32*)** %mlp_filter_channel, align 4, !tbaa !0
+  ret void
+}
+
+define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind address_safety {
+entry:
+  %filter_shift.addr = alloca i32, align 4
+  %mask.addr = alloca i32, align 4
+  %blocksize.addr = alloca i32, align 4
+  %firjump = alloca i8*, align 4
+  %iirjump = alloca i8*, align 4
+  store i32 %filter_shift, i32* %filter_shift.addr, align 4, !tbaa !3
+  store i32 %mask, i32* %mask.addr, align 4, !tbaa !3
+  %arrayidx = getelementptr inbounds [9 x i8*]* @firtable, i32 0, i32 %firorder
+  %0 = load i8** %arrayidx, align 4, !tbaa !0
+  store i8* %0, i8** %firjump, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds [5 x i8*]* @iirtable, i32 0, i32 %iirorder
+  %1 = load i8** %arrayidx1, align 4, !tbaa !0
+  store i8* %1, i8** %iirjump, align 4, !tbaa !0
+  %sub = sub nsw i32 0, %blocksize
+  store i32 %sub, i32* %blocksize.addr, align 4, !tbaa !3
+  %2 = call { i32*, i32*, i32* } asm sideeffect "1:                           \0A\09xor           %esi, %esi\0A\09xor           %ecx, %ecx\0A\09jmp  *$5                     \0A\09ff_mlp_firorder_8:            \0A\09mov   0x1c+0($0), %eax\0A\09imull 0x1c+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_7:            \0A\09mov   0x18+0($0), %eax\0A\09imull 0x18+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_6:            \0A\09mov   0x14+0($0), %eax\0A\09imull 0x14+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_5:            \0A\09mov   0x10+0($0), %eax\0A\09imull 0x10+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_4:            \0A\09mov   0x0c+0($0), %eax\0A\09imull 0x0c+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_3:            \0A\09mov   0x08+0($0), %eax\0A\09imull 0x08+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_2:            \0A\09mov   0x04+0($0), %eax\0A\09imull 0x04+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_1:            \0A\09mov   0x00+0($0), %eax\0A\09imull 0x00+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_0:\0A\09jmp  *$6                     \0A\09ff_mlp_iirorder_4:            \0A\09mov   0x0c+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x0c+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_3:            \0A\09mov   0x08+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x08+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_2:            \0A\09mov   0x04+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x04+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_1:            \0A\09mov   0x00+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x00+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_0:\0A\09mov           %ecx, %edx\0A\09mov           %esi, %eax\0A\09movzbl        $7   , %ecx\0A\09shrd    %cl, %edx, %eax\0A\09mov  %eax  ,%edx      \0A\09add  ($2)      ,%eax     \0A\09and   $4       ,%eax     \0A\09sub   $$4       ,  $0         \0A\09mov  %eax, ($0)        \0A\09mov  %eax, ($2)        \0A\09add $$4* 8    ,  $2         \0A\09sub  %edx   ,%eax     \0A\09mov  %eax,4*(8 + (40 * 4))($0)  \0A\09incl              $3         \0A\09js 1b                        \0A\09", "=r,=r,=r,=*m,*m,*m,*m,*m,0,1,2,*m,~{eax},~{edx},~{esi},~{ecx},~{dirflag},~{fpsr},~{flags}"(i32* %blocksize.addr, i32* %mask.addr, i8** %firjump, i8** %iirjump, i32* %filter_shift.addr, i32* %state, i32* %coeff, i32* %sample_buffer, i32* %blocksize.addr) nounwind, !srcloc !4
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{i32 156132, i32 156164, i32 156205, i32 156238, i32 156282, i32 156332, i32 156370, i32 156408, i32 156447, i32 156486, i32 156536, i32 156574, i32 156612, i32 156651, i32 156690, i32 156740, i32 156778, i32 156816, i32 156855, i32 156894, i32 156944, i32 156982, i32 157020, i32 157059, i32 157098, i32 157148, i32 157186, i32 157224, i32 157263, i32 157302, i32 157352, i32 157390, i32 157428, i32 157467, i32 157506, i32 157556, i32 157594, i32 157632, i32 157671, i32 157710, i32 157760, i32 157798, i32 157836, i32 157875, i32 157914, i32 157952, i32 157996, i32 158046, i32 158099, i32 158140, i32 158179, i32 158218, i32 158268, i32 158321, i32 158362, i32 158401, i32 158440, i32 158490, i32 158543, i32 158584, i32 158623, i32 158662, i32 158712, i32 158765, i32 158806, i32 158845, i32 158884, i32 158922, i32 158963, i32 158996, i32 159029, i32 159062, i32 159109, i32 159154, i32 159199, i32 159243, i32 159286, i32 159329, i32 159375, i32 159422, i32 159478, i32 159522, i32 159566}
diff --git a/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
new file mode 100644
index 0000000..b05ed3c
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
new file mode 100644
index 0000000..c0fe15e
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -basicaa -gvn -asan -S | FileCheck %s
+; ASAN conflicts with load widening iff the widened load accesses data out of bounds
+; (while the original unwidened loads do not).
+; http://code.google.com/p/address-sanitizer/issues/detail?id=20#c1
+
+
+; 32-bit little endian target.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+%struct_of_7_bytes_4_aligned = type { i32, i8, i8, i8}
+
+@f = global %struct_of_7_bytes_4_aligned zeroinitializer, align 4
+
+; Accessing bytes 4 and 6, not ok to widen to i32 if address_safety is set.
+
+define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone address_safety {
+entry:
+  %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 3), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+  ret i32 %add
+; CHECK: @test_widening_bad
+; CHECK: __asan_report_load1
+; CHECK: __asan_report_load1
+; CHECK-NOT: __asan_report
+; We can not use check for "ret" here because __asan_report_load1 calls live after ret.
+; CHECK: end_test_widening_bad
+}
+
+define void @end_test_widening_bad() {
+  entry:
+  ret void
+}
+
+;; Accessing bytes 4 and 5. Ok to widen to i16.
+
+define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone address_safety {
+entry:
+  %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 2), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+  ret i32 %add
+; CHECK: @test_widening_ok
+; CHECK: __asan_report_load2
+; CHECK-NOT: __asan_report
+; CHECK: end_test_widening_ok
+}
+
+define void @end_test_widening_ok() {
+  entry:
+  ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll b/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll
new file mode 100644
index 0000000..1687877
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; no action should be taken for these globals
+@v1 = linkonce_odr constant i8 1
+; CHECK-NOT: __asan_register_globals
diff --git a/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll b/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll
new file mode 100644
index 0000000..89644d4
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; no action should be taken for thread locals
+@xxx = thread_local global i32 0, align 4
+; CHECK-NOT: __asan_register_globals
diff --git a/test/Instrumentation/AddressSanitizer/instrument-no-return.ll b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
new file mode 100644
index 0000000..80f1b1c
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+; AddressSanitizer must insert __asan_handle_no_return
+; before every noreturn call.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @MyNoReturnFunc(i32) noreturn
+
+define i32 @_Z5ChildPv(i8* nocapture %arg) uwtable address_safety {
+entry:
+  call void @MyNoReturnFunc(i32 1) noreturn
+  unreachable
+}
+
+; CHECK:        call void @__asan_handle_no_return
+; CHECK-NEXT:   call void @MyNoReturnFunc
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
new file mode 100644
index 0000000..ba8d65a
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+@xxx = global i32 0, align 4
+
+; If a global is present, __asan_[un]register_globals should be called from
+; module ctor/dtor
+
+; CHECK: llvm.global_dtors
+; CHECK: llvm.global_ctors
+
+; CHECK: define internal void @asan.module_ctor
+; CHECK-NOT: ret
+; CHECK: call void @__asan_register_globals
+; CHECK: ret
+
+; CHECK: define internal void @asan.module_dtor
+; CHECK-NOT: ret
+; CHECK: call void @__asan_unregister_globals
+; CHECK: ret
diff --git a/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll b/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
new file mode 100644
index 0000000..633bf9a
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
@@ -0,0 +1,25 @@
+; Test that AddressSanitizer instruments "(*a)++" only once.
+; RUN: opt < %s -asan -S -asan-opt=1 | FileCheck %s -check-prefix=OPT1
+; RUN: opt < %s -asan -S -asan-opt=0 | FileCheck %s -check-prefix=OPT0
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define void @IncrementMe(i32* %a) address_safety {
+entry:
+  %tmp1 = load i32* %a, align 4
+  %tmp2 = add i32 %tmp1,  1
+  store i32 %tmp2, i32* %a, align 4
+  ret void
+}
+
+; With optimizations enabled we should see only one call to __asan_report_*
+; OPT1: IncrementMe
+; OPT1: __asan_report_
+; OPT1-NOT: __asan_report_
+; OPT1: asan.module_ctor
+
+; Without optimizations we should see two calls to __asan_report_*
+; OPT0: IncrementMe
+; OPT0: __asan_report_
+; OPT0: __asan_report_
+; OPT0: asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/lit.local.cfg b/test/Instrumentation/AddressSanitizer/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/AddressSanitizer/test64.ll b/test/Instrumentation/AddressSanitizer/test64.ll
new file mode 100644
index 0000000..fc27de9
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/test64.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @read_4_bytes(i32* %a) address_safety {
+entry:
+  %tmp1 = load i32* %a, align 4
+  ret i32 %tmp1
+}
+; CHECK: @read_4_bytes
+; CHECK-NOT: ret
+; CHECK: lshr {{.*}} 3
+; Check for ASAN's Offset for 64-bit (2^44)
+; CHECK-NEXT: 17592186044416
+; CHECK: ret
diff --git a/test/Instrumentation/ThreadSanitizer/lit.local.cfg b/test/Instrumentation/ThreadSanitizer/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/ThreadSanitizer/read_before_write.ll b/test/Instrumentation/ThreadSanitizer/read_before_write.ll
new file mode 100644
index 0000000..482362a
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/read_before_write.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @IncrementMe(i32* nocapture %ptr) nounwind uwtable {
+entry:
+  %0 = load i32* %ptr, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %ptr, align 4
+  ret void
+}
+; CHECK: define void @IncrementMe
+; CHECK-NOT: __tsan_read
+; CHECK: __tsan_write
+; CHECK: ret void
+
+define void @IncrementMeWithCallInBetween(i32* nocapture %ptr) nounwind uwtable {
+entry:
+  %0 = load i32* %ptr, align 4
+  %inc = add nsw i32 %0, 1
+  call void @foo()
+  store i32 %inc, i32* %ptr, align 4
+  ret void
+}
+
+; CHECK: define void @IncrementMeWithCallInBetween
+; CHECK: __tsan_read
+; CHECK: __tsan_write
+; CHECK: ret void
+
+declare void @foo()
+
diff --git a/test/Instrumentation/ThreadSanitizer/read_from_global.ll b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
new file mode 100644
index 0000000..a08453a
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+; Check that tsan does not instrument reads from constant globals.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@const_global = external constant i32
+define i32 @read_from_const_global() nounwind uwtable readnone {
+entry:
+  %0 = load i32* @const_global, align 4
+  ret i32 %0
+}
+; CHECK: define i32 @read_from_const_global
+; CHECK-NOT: __tsan
+; CHECK: ret i32
+
+@non_const_global = global i32 0, align 4
+define i32 @read_from_non_const_global() nounwind uwtable readonly {
+entry:
+  %0 = load i32* @non_const_global, align 4
+  ret i32 %0
+}
+
+; CHECK:  define i32 @read_from_non_const_global
+; CHECK: __tsan_read
+; CHECK: ret i32
+
+@const_global_array = external constant [10 x i32]
+define i32 @read_from_const_global_array(i32 %idx) nounwind uwtable readnone {
+entry:
+  %idxprom = sext i32 %idx to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @const_global_array, i64 0, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; CHECK: define i32 @read_from_const_global_array
+; CHECK-NOT: __tsan
+; CHECK: ret i32
+
+%struct.Foo = type { i32 (...)** }
+define void @call_virtual_func(%struct.Foo* %f) uwtable {
+entry:
+  %0 = bitcast %struct.Foo* %f to void (%struct.Foo*)***
+  %vtable = load void (%struct.Foo*)*** %0, align 8, !tbaa !3
+  %1 = load void (%struct.Foo*)** %vtable, align 8
+  call void %1(%struct.Foo* %f)
+  ret void
+}
+
+; CHECK: define void @call_virtual_func
+; CHECK: __tsan_read
+; CHECK: = load
+; CHECK-NOT: __tsan_read
+; CHECK: = load
+; CHECK: ret void
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"vtable pointer", metadata !2}
+
diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
new file mode 100644
index 0000000..33c703b
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @read_4_bytes(i32* %a) {
+entry:
+  %tmp1 = load i32* %a, align 4
+  ret i32 %tmp1
+}
+
+; CHECK: @llvm.global_ctors = {{.*}}@__tsan_init
+
+; CHECK: define i32 @read_4_bytes(i32* %a) {
+; CHECK:        call void @__tsan_func_entry(i8* %0)
+; CHECK-NEXT:   %1 = bitcast i32* %a to i8*
+; CHECK-NEXT:   call void @__tsan_read4(i8* %1)
+; CHECK-NEXT:   %tmp1 = load i32* %a, align 4
+; CHECK-NEXT:   call void @__tsan_func_exit()
+; CHECK: ret i32
+
+
diff --git a/test/Instrumentation/ThreadSanitizer/vptr_update.ll b/test/Instrumentation/ThreadSanitizer/vptr_update.ll
new file mode 100644
index 0000000..f318659
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/vptr_update.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+; Check that vtable pointer updates are treated in a special way.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @Foo(i8** nocapture %a, i8* %b) nounwind uwtable {
+entry:
+; CHECK: call void @__tsan_vptr_update
+  store i8* %b, i8** %a, align 8, !tbaa !0
+  ret void
+}
+!0 = metadata !{metadata !"vtable pointer", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+
diff --git a/test/Integer/dg.exp b/test/Integer/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Integer/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Integer/lit.local.cfg b/test/Integer/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Integer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Linker/2004-05-07-TypeResolution1.ll b/test/Linker/2004-05-07-TypeResolution1.ll
index f0ade33..4cff9ac 100644
--- a/test/Linker/2004-05-07-TypeResolution1.ll
+++ b/test/Linker/2004-05-07-TypeResolution1.ll
@@ -30,6 +30,6 @@ declare void @func(%struct2*)
 
 define void @tty_init() {
 entry:
-	volatile store void (%struct2*)* @func, void (%struct2*)** getelementptr (%struct1* @driver1, i64 0, i32 1)
+	store volatile void (%struct2*)* @func, void (%struct2*)** getelementptr (%struct1* @driver1, i64 0, i32 1)
 	ret void
 }
diff --git a/test/Linker/2004-05-07-TypeResolution2.ll b/test/Linker/2004-05-07-TypeResolution2.ll
index 74fe39f..3807127 100644
--- a/test/Linker/2004-05-07-TypeResolution2.ll
+++ b/test/Linker/2004-05-07-TypeResolution2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32"
 define internal void @f1(%struct1* %tty) {
 loopentry.preheader:
 	%tmp.2.i.i = getelementptr %struct1* %tty, i64 0, i32 1		; <void (%struct2*)**> [#uses=1]
-	%tmp.3.i.i = volatile load void (%struct2*)** %tmp.2.i.i		; <void (%struct2*)*> [#uses=0]
+	%tmp.3.i.i = load volatile void (%struct2*)** %tmp.2.i.i		; <void (%struct2*)*> [#uses=0]
 	ret void
 }
 
diff --git a/test/Linker/2011-08-18-unique-debug-type.ll b/test/Linker/2011-08-18-unique-debug-type.ll
index 4ef0e0e..696fdb3 100644
--- a/test/Linker/2011-08-18-unique-debug-type.ll
+++ b/test/Linker/2011-08-18-unique-debug-type.ll
@@ -1,5 +1,5 @@
 
-; RUN: llvm-link %s %p/2011-08-18-unique-debug-type2.ll -S -o - | grep "int" | count 1
+; RUN: llvm-link %s %p/2011-08-18-unique-debug-type2.ll -S -o - | grep "int" | grep -v "^; ModuleID" | count 1
 ; Test to check only one MDNode for "int" after linking.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
diff --git a/test/Linker/dg.exp b/test/Linker/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Linker/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Linker/link-type-names.ll b/test/Linker/link-type-names.ll
new file mode 100644
index 0000000..bfc3b64
--- /dev/null
+++ b/test/Linker/link-type-names.ll
@@ -0,0 +1,10 @@
+; RUN: echo "%X = type { i32 } @G2 = global %X { i32 4 }" > %t.ll
+; RUN: llvm-link %s %t.ll -S | FileCheck %s
+; PR11464
+
+%X = type { i32 }
+@G = global %X { i32 4 }
+
+
+; CHECK: @G = global %X { i32 4 }
+; CHECK: @G2 = global %X { i32 4 }
diff --git a/test/Linker/lit.local.cfg b/test/Linker/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Linker/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Linker/module-flags-1-a.ll b/test/Linker/module-flags-1-a.ll
new file mode 100644
index 0000000..973aa80
--- /dev/null
+++ b/test/Linker/module-flags-1-a.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-link %s %p/module-flags-1-b.ll -S -o - | sort | FileCheck %s
+
+; Test basic functionality of module flags.
+
+; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
+; CHECK: !1 = metadata !{i32 1, metadata !"qux", i32 42}
+; CHECK: !2 = metadata !{i32 1, metadata !"mux", metadata !3}
+; CHECK: !3 = metadata !{metadata !"hello world", i32 927}
+; CHECK: !4 = metadata !{i32 2, metadata !"bar", i32 42}
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !4}
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 2, metadata !"bar", i32 42 }
+!2 = metadata !{ i32 1, metadata !"mux", metadata !{ metadata !"hello world", i32 927 } }
+
+!llvm.module.flags = !{ !0, !1, !2 }
diff --git a/test/Linker/module-flags-1-b.ll b/test/Linker/module-flags-1-b.ll
new file mode 100644
index 0000000..bf3f5e5
--- /dev/null
+++ b/test/Linker/module-flags-1-b.ll
@@ -0,0 +1,8 @@
+; This file is used with module-flags-1-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 1, metadata !"qux", i32 42 }
+!2 = metadata !{ i32 1, metadata !"mux", metadata !{ metadata !"hello world", i32 927 } }
+
+!llvm.module.flags = !{ !0, !1, !2 }
diff --git a/test/Linker/module-flags-2-a.ll b/test/Linker/module-flags-2-a.ll
new file mode 100644
index 0000000..3ae0288
--- /dev/null
+++ b/test/Linker/module-flags-2-a.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-link %s %p/module-flags-2-b.ll -S -o - | sort | FileCheck %s
+
+; Test the 'override' behavior.
+
+; CHECK: !0 = metadata !{i32 4, metadata !"foo", i32 37}
+; CHECK: !llvm.module.flags = !{!0}
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 927 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-2-b.ll b/test/Linker/module-flags-2-b.ll
new file mode 100644
index 0000000..ab55e4b
--- /dev/null
+++ b/test/Linker/module-flags-2-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-2-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 4, metadata !"foo", i32 37 } ; Override the "foo" value.
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-3-a.ll b/test/Linker/module-flags-3-a.ll
new file mode 100644
index 0000000..4233a0a
--- /dev/null
+++ b/test/Linker/module-flags-3-a.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-link %s %p/module-flags-3-b.ll -S -o - | sort | FileCheck %s
+
+; Test 'require' behavior.
+
+; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
+; CHECK: !1 = metadata !{i32 3, metadata !"foo", metadata !2}
+; CHECK: !2 = metadata !{metadata !"bar", i32 42}
+; CHECK: !3 = metadata !{i32 1, metadata !"bar", i32 42}
+; CHECK: !llvm.module.flags = !{!0, !1, !3}
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 1, metadata !"bar", i32 42 }
+
+!llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-3-b.ll b/test/Linker/module-flags-3-b.ll
new file mode 100644
index 0000000..76be802
--- /dev/null
+++ b/test/Linker/module-flags-3-b.ll
@@ -0,0 +1,8 @@
+; This file is used with module-flags-3-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 3, metadata !"foo",
+  metadata !{ metadata !"bar", i32 42 }
+}
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-4-a.ll b/test/Linker/module-flags-4-a.ll
new file mode 100644
index 0000000..f411a56
--- /dev/null
+++ b/test/Linker/module-flags-4-a.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-link %s %p/module-flags-4-b.ll -S -o - |& FileCheck %s
+
+; Test 'require' error.
+
+; CHECK: linking module flags 'bar': does not have the required value
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 1, metadata !"bar", i32 927 }
+
+!llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-4-b.ll b/test/Linker/module-flags-4-b.ll
new file mode 100644
index 0000000..3a460bb
--- /dev/null
+++ b/test/Linker/module-flags-4-b.ll
@@ -0,0 +1,8 @@
+; This file is used with module-flags-4-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 3, metadata !"foo",
+  metadata !{ metadata !"bar", i32 42 }
+}
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-5-a.ll b/test/Linker/module-flags-5-a.ll
new file mode 100644
index 0000000..2e59ecc
--- /dev/null
+++ b/test/Linker/module-flags-5-a.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-link %s %p/module-flags-5-b.ll -S -o - |& FileCheck %s
+
+; Test the 'override' error.
+
+; CHECK: linking module flags 'foo': IDs have conflicting override values
+
+!0 = metadata !{ i32 4, metadata !"foo", i32 927 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-5-b.ll b/test/Linker/module-flags-5-b.ll
new file mode 100644
index 0000000..1e99b20
--- /dev/null
+++ b/test/Linker/module-flags-5-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-5-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 4, metadata !"foo", i32 37 } ; Override the "foo" value.
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-6-a.ll b/test/Linker/module-flags-6-a.ll
new file mode 100644
index 0000000..c3e0225
--- /dev/null
+++ b/test/Linker/module-flags-6-a.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-link %s %p/module-flags-6-b.ll -S -o - |& FileCheck %s
+
+; Test module flags error messages.
+
+; CHECK: linking module flags 'foo': IDs have conflicting values
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-6-b.ll b/test/Linker/module-flags-6-b.ll
new file mode 100644
index 0000000..2bc5a96
--- /dev/null
+++ b/test/Linker/module-flags-6-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-6-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 38 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/multiple-merged-structs.ll b/test/Linker/multiple-merged-structs.ll
new file mode 100644
index 0000000..348cd89
--- /dev/null
+++ b/test/Linker/multiple-merged-structs.ll
@@ -0,0 +1,19 @@
+; RUN: echo {%bug_type = type opaque \
+; RUN:     declare i32 @bug_a(%bug_type*) \
+; RUN:     declare i32 @bug_b(%bug_type*) } > %t.ll
+; RUN: llvm-link %t.ll %s
+; PR11464
+
+%bug_type = type { %bug_type* }
+%bar = type { i32 }
+
+define i32 @bug_a(%bug_type* %fp) nounwind uwtable {
+entry:
+  %d_stream = getelementptr inbounds %bug_type* %fp, i64 0, i32 0
+  ret i32 0
+}
+
+define i32 @bug_b(%bar* %a) nounwind uwtable {
+entry:
+  ret i32 0
+}
diff --git a/test/Linker/visibility1.ll b/test/Linker/visibility1.ll
new file mode 100644
index 0000000..131f6d5
--- /dev/null
+++ b/test/Linker/visibility1.ll
@@ -0,0 +1,46 @@
+; RUN: llvm-link %s %p/visibility2.ll -S | FileCheck %s
+; RUN: llvm-link %p/visibility2.ll %s -S | FileCheck %s
+
+; The values in this file are strong, the ones in visibility2.ll are weak,
+; but we should still get the visibility from them.
+
+; Variables
+; CHECK: @v1 = hidden global i32 0
+@v1 = global i32 0
+
+; CHECK: @v2 = protected  global i32 0
+@v2 = global i32 0
+
+; CHECK: @v3 = hidden global i32 0
+@v3 = protected global i32 0
+
+
+; Aliases
+; CHECK: @a1 = hidden alias i32* @v1
+@a1 = alias i32* @v1
+
+; CHECK: @a2 = protected alias i32* @v2
+@a2 = alias i32* @v2
+
+; CHECK: @a3 = hidden alias i32* @v3
+@a3 = protected alias i32* @v3
+
+
+; Functions
+; CHECK: define hidden void @f1()
+define void @f1()  {
+entry:
+  ret void
+}
+
+; CHECK: define protected void @f2()
+define void @f2()  {
+entry:
+  ret void
+}
+
+; CHECK: define hidden void @f3()
+define protected void @f3()  {
+entry:
+  ret void
+}
diff --git a/test/Linker/visibility2.ll b/test/Linker/visibility2.ll
new file mode 100644
index 0000000..e6363ca
--- /dev/null
+++ b/test/Linker/visibility2.ll
@@ -0,0 +1,27 @@
+; This file is used by visibility1.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+; Variables
+@v1 = weak hidden global i32 0
+@v2 = weak protected global i32 0
+@v3 = weak hidden global i32 0
+
+; Aliases
+@a1 = hidden alias weak i32* @v1
+@a2 = protected alias weak i32* @v2
+@a3 = hidden alias weak i32* @v3
+
+; Functions
+define weak hidden void @f1() {
+entry:
+  ret void
+}
+define weak protected void @f2() {
+entry:
+  ret void
+}
+define weak hidden void @f3() {
+entry:
+  ret void
+}
diff --git a/test/MC/ARM/arm-aliases.s b/test/MC/ARM/arm-aliases.s
new file mode 100644
index 0000000..d4ea0df
--- /dev/null
+++ b/test/MC/ARM/arm-aliases.s
@@ -0,0 +1,17 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+  .syntax unified
+
+@ Shift-by-zero should canonicalize to no shift at all (lsl #0 encoding)
+        add r1, r2, r3, lsl #0
+        sub r1, r2, r3, ror #0
+        eor r1, r2, r3, lsr #0
+        orr r1, r2, r3, asr #0
+        and r1, r2, r3, ror #0
+        bic r1, r2, r3, lsl #0
+
+@ CHECK: add	r1, r2, r3              @ encoding: [0x03,0x10,0x82,0xe0]
+@ CHECK: sub	r1, r2, r3              @ encoding: [0x03,0x10,0x42,0xe0]
+@ CHECK: eor	r1, r2, r3              @ encoding: [0x03,0x10,0x22,0xe0]
+@ CHECK: orr	r1, r2, r3              @ encoding: [0x03,0x10,0x82,0xe1]
+@ CHECK: and	r1, r2, r3              @ encoding: [0x03,0x10,0x02,0xe0]
+@ CHECK: bic	r1, r2, r3              @ encoding: [0x03,0x10,0xc2,0xe1]
diff --git a/test/MC/ARM/arm-it-block.s b/test/MC/ARM/arm-it-block.s
new file mode 100644
index 0000000..e5e5491
--- /dev/null
+++ b/test/MC/ARM/arm-it-block.s
@@ -0,0 +1,11 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+  .syntax unified
+  .globl _func
+
+_func:
+@ CHECK: _func:
+        it eq
+        moveq r2, r3
+@ 'it' is parsed but not encoded.
+@ CHECK-NOT: it
+@ CHECK: moveq	r2, r3          @ encoding: [0x03,0x20,0xa0,0x01]
diff --git a/test/MC/ARM/arm-memory-instructions.s b/test/MC/ARM/arm-memory-instructions.s
index 783ac28..d8d9130 100644
--- a/test/MC/ARM/arm-memory-instructions.s
+++ b/test/MC/ARM/arm-memory-instructions.s
@@ -130,8 +130,13 @@ _func:
 
 
 @------------------------------------------------------------------------------
-@ FIXME: LDRD (label)
+@ LDRD (label)
 @------------------------------------------------------------------------------
+        ldrd r2, r3, Lbaz
+Lbaz: .quad 0
+
+@ CHECK: ldrd	r2, r3, Lbaz            @ encoding: [0xd0'A',0x20'A',0x4f'A',0xe1'A']
+
 
 @------------------------------------------------------------------------------
 @ LDRD (register)
diff --git a/test/MC/ARM/arm_fixups.s b/test/MC/ARM/arm_fixups.s
index aba0cd8..74dfb99 100644
--- a/test/MC/ARM/arm_fixups.s
+++ b/test/MC/ARM/arm_fixups.s
@@ -3,7 +3,7 @@
 
     bl _printf
 @ CHECK: bl _printf @ encoding: [A,A,A,0xeb]
-@ CHECK: @ fixup A - offset: 0, value: _printf, kind: fixup_arm_uncondbranch
+@ CHECK: @ fixup A - offset: 0, value: _printf, kind: fixup_arm_uncondbl
 
     mov r9, :lower16:(_foo)
     movw r9, :lower16:(_foo)
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 55d9f02..4788ac7 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -133,9 +133,9 @@ Lforward:
         adr	r2, #-3
 
 @ CHECK: Lback:
-@ CHECK: adr	r2, Lback    @ encoding: [0bAAAAAAA0,0x20'A',0x0f'A',0b1110001A]
+@ CHECK: adr	r2, Lback    @ encoding: [A,0x20'A',0x0f'A',0xe2'A']
 @ CHECK:  @   fixup A - offset: 0, value: Lback, kind: fixup_arm_adr_pcrel_12
-@ CHECK: adr	r3, Lforward @ encoding: [0bAAAAAAA0,0x30'A',0x0f'A',0b1110001A]
+@ CHECK: adr	r3, Lforward @ encoding: [A,0x30'A',0x0f'A',0xe2'A']
 @ CHECK:  @   fixup A - offset: 0, value: Lforward, kind: fixup_arm_adr_pcrel_12
 @ CHECK: Lforward:
 @ CHECK: adr	r2, #3                  @ encoding: [0x03,0x20,0x8f,0xe2]
@@ -153,6 +153,7 @@ Lforward:
         add r4, r5, r6, asr #5
         add r4, r5, r6, ror #5
         add r6, r7, r8, lsl r9
+        add r4, r4, r3, asl r9
         add r6, r7, r8, lsr r9
         add r6, r7, r8, asr r9
         add r6, r7, r8, ror r9
@@ -172,6 +173,9 @@ Lforward:
         add r6, r7, ror r9
         add r4, r5, rrx
 
+	add r0, #-4
+	add r4, r5, #-21
+
 @ CHECK: add	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe2]
 @ CHECK: add	r4, r5, r6              @ encoding: [0x06,0x40,0x85,0xe0]
 @ CHECK: add	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0x85,0xe0]
@@ -180,12 +184,12 @@ Lforward:
 @ CHECK: add	r4, r5, r6, asr #5      @ encoding: [0xc6,0x42,0x85,0xe0]
 @ CHECK: add	r4, r5, r6, ror #5      @ encoding: [0xe6,0x42,0x85,0xe0]
 @ CHECK: add	r6, r7, r8, lsl r9      @ encoding: [0x18,0x69,0x87,0xe0]
+@ CHECK: add	r4, r4, r3, lsl r9      @ encoding: [0x13,0x49,0x84,0xe0]
 @ CHECK: add	r6, r7, r8, lsr r9      @ encoding: [0x38,0x69,0x87,0xe0]
 @ CHECK: add	r6, r7, r8, asr r9      @ encoding: [0x58,0x69,0x87,0xe0]
 @ CHECK: add	r6, r7, r8, ror r9      @ encoding: [0x78,0x69,0x87,0xe0]
 @ CHECK: add	r4, r5, r6, rrx         @ encoding: [0x66,0x40,0x85,0xe0]
 
-
 @ CHECK: add	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x85,0xe2]
 @ CHECK: add	r4, r4, r5              @ encoding: [0x05,0x40,0x84,0xe0]
 @ CHECK: add	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0x84,0xe0]
@@ -199,6 +203,9 @@ Lforward:
 @ CHECK: add	r6, r6, r7, ror r9      @ encoding: [0x77,0x69,0x86,0xe0]
 @ CHECK: add	r4, r4, r5, rrx         @ encoding: [0x65,0x40,0x84,0xe0]
 
+@ CHECK: sub	r0, r0, #4              @ encoding: [0x04,0x00,0x40,0xe2]
+@ CHECK: sub	r4, r5, #21             @ encoding: [0x15,0x40,0x45,0xe2]
+
 
 @------------------------------------------------------------------------------
 @ AND
@@ -215,6 +222,7 @@ Lforward:
     and r6, r7, r8, asr r2
     and r6, r7, r8, ror r2
     and r10, r1, r6, rrx
+    and r2, r3, #0x7fffffff
 
     @ destination register is optional
     and r1, #0xf
@@ -242,6 +250,7 @@ Lforward:
 @ CHECK: and	r6, r7, r8, asr r2      @ encoding: [0x58,0x62,0x07,0xe0]
 @ CHECK: and	r6, r7, r8, ror r2      @ encoding: [0x78,0x62,0x07,0xe0]
 @ CHECK: and	r10, r1, r6, rrx        @ encoding: [0x66,0xa0,0x01,0xe0]
+@ CHECK: bic	r2, r3, #-2147483648    @ encoding: [0x02,0x21,0xc3,0xe3]
 
 @ CHECK: and	r1, r1, #15             @ encoding: [0x0f,0x10,0x01,0xe2]
 @ CHECK: and	r10, r10, r1            @ encoding: [0x01,0xa0,0x0a,0xe0]
@@ -257,8 +266,19 @@ Lforward:
 @ CHECK: and	r10, r10, r1, rrx       @ encoding: [0x61,0xa0,0x0a,0xe0]
 
 @------------------------------------------------------------------------------
-@ FIXME: ASR
+@ ASR
 @------------------------------------------------------------------------------
+	asr r2, r4, #32
+	asr r2, r4, #2
+	asr r2, r4, #0
+	asr r4, #2
+
+@ CHECK: asr	r2, r4, #32             @ encoding: [0x44,0x20,0xa0,0xe1]
+@ CHECK: asr	r2, r4, #2              @ encoding: [0x44,0x21,0xa0,0xe1]
+@ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: asr	r4, r4, #2              @ encoding: [0x44,0x41,0xa0,0xe1]
+
+
 @------------------------------------------------------------------------------
 @ B
 @------------------------------------------------------------------------------
@@ -362,15 +382,18 @@ Lforward:
 @------------------------------------------------------------------------------
 
         bl _bar
+        bleq _bar
         blx _bar
         blls #28634268
         blx	#32424576
         blx	#16212288
 
 @ CHECK: bl  _bar @ encoding: [A,A,A,0xeb]
-@ CHECK:   @   fixup A - offset: 0, value: _bar, kind: fixup_arm_uncondbranch
+@ CHECK:   @   fixup A - offset: 0, value: _bar, kind: fixup_arm_uncondbl
+@ CHECK: bleq  _bar @ encoding: [A,A,A,0x0b]
+@ CHECK:   @   fixup A - offset: 0, value: _bar, kind: fixup_arm_condbl
 @ CHECK: blx	_bar @ encoding: [A,A,A,0xfa]
-           @   fixup A - offset: 0, value: _bar, kind: fixup_arm_uncondbranch
+           @   fixup A - offset: 0, value: _bar, kind: fixup_arm_blx
 @ CHECK: blls	#28634268               @ encoding: [0x27,0x3b,0x6d,0x9b]
 @ CHECK: blx	#32424576               @ encoding: [0xa0,0xb0,0x7b,0xfa]
 @ CHECK: blx	#16212288               @ encoding: [0x50,0xd8,0x3d,0xfa]
@@ -473,6 +496,8 @@ Lforward:
         cmp r7, r8, asr r2
         cmp r7, r8, ror r2
         cmp r1, r6, rrx
+        cmp r0, #-2
+        cmp lr, #0
 
 @ CHECK: cmp	r1, #15                 @ encoding: [0x0f,0x00,0x51,0xe3]
 @ CHECK: cmp	r1, r6                  @ encoding: [0x06,0x00,0x51,0xe1]
@@ -486,6 +511,8 @@ Lforward:
 @ CHECK: cmp	r7, r8, asr r2          @ encoding: [0x58,0x02,0x57,0xe1]
 @ CHECK: cmp	r7, r8, ror r2          @ encoding: [0x78,0x02,0x57,0xe1]
 @ CHECK: cmp	r1, r6, rrx             @ encoding: [0x66,0x00,0x51,0xe1]
+@ CHECK: cmn	r0, #2                  @ encoding: [0x02,0x00,0x70,0xe3]
+@ CHECK: cmp  lr, #0                    @ encoding: [0x00,0x00,0x5e,0xe3]
 
 
 @------------------------------------------------------------------------------
@@ -744,6 +771,10 @@ Lforward:
         ldmda     r2!, {r1,r3-r6,sp}
         ldmdb     r2!, {r1,r3-r6,sp}
 
+        @ system version
+        ldm r0, {r0, r2, lr}^
+        ldm sp!, {r0-r3, pc}^
+
 @ CHECK: ldm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
 @ CHECK: ldm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
 @ CHECK: ldmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe9]
@@ -755,6 +786,8 @@ Lforward:
 @ CHECK: ldmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe9]
 @ CHECK: ldmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe8]
 @ CHECK: ldmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe9]
+@ CHECK: ldm	r0, {lr, r0, r2} ^          @ encoding: [0x05,0x40,0xd0,0xe8]
+@ CHECK: ldm	sp!, {pc, r0, r1, r2, r3} ^ @ encoding: [0x0f,0x80,0xfd,0xe8]
 
 
 @------------------------------------------------------------------------------
@@ -780,11 +813,32 @@ Lforward:
 @ CHECK: ldrhthi r8, [r11], #0          @ encoding: [0xb0,0x80,0xfb,0x80]
 
 @------------------------------------------------------------------------------
-@ FIXME: LSL
+@ LSL
 @------------------------------------------------------------------------------
+	lsl r2, r4, #31
+	lsl r2, r4, #1
+	lsl r2, r4, #0
+	lsl r4, #1
+
+@ CHECK: lsl	r2, r4, #31             @ encoding: [0x84,0x2f,0xa0,0xe1]
+@ CHECK: lsl	r2, r4, #1              @ encoding: [0x84,0x20,0xa0,0xe1]
+@ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: lsl	r4, r4, #1              @ encoding: [0x84,0x40,0xa0,0xe1]
+
+
 @------------------------------------------------------------------------------
-@ FIXME: LSR
+@ LSR
 @------------------------------------------------------------------------------
+	lsr r2, r4, #32
+	lsr r2, r4, #2
+	lsr r2, r4, #0
+	lsr r4, #2
+
+@ CHECK: lsr	r2, r4, #32             @ encoding: [0x24,0x20,0xa0,0xe1]
+@ CHECK: lsr	r2, r4, #2              @ encoding: [0x24,0x21,0xa0,0xe1]
+@ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: lsr	r4, r4, #2              @ encoding: [0x24,0x41,0xa0,0xe1]
+
 
 @------------------------------------------------------------------------------
 @ MCR/MCR2
@@ -855,11 +909,28 @@ Lforward:
         movs r2, r3
         moveq r2, r3
         movseq r2, r3
+        mov r12, r8, lsl #(2 - 2)
+        lsl r2, r3, #(2 - 2)
+        mov r12, r8, lsr #(2 - 2)
+        lsr r2, r3, #(2 - 2)
+        mov r12, r8, asr #(2 - 2)
+        asr r2, r3, #(2 - 2)
+        mov r12, r8, ror #(2 - 2)
+        ror r2, r3, #(2 - 2)
 
 @ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
 @ CHECK: movs	r2, r3                  @ encoding: [0x03,0x20,0xb0,0xe1]
 @ CHECK: moveq	r2, r3                  @ encoding: [0x03,0x20,0xa0,0x01]
 @ CHECK: movseq	r2, r3                  @ encoding: [0x03,0x20,0xb0,0x01]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+
 
 @------------------------------------------------------------------------------
 @ MOVT
@@ -975,6 +1046,7 @@ Lforward:
         muls r5, r6, r7
         mulgt r5, r6, r7
         mulsle r5, r6, r7
+        mul r11, r5
 
 @ CHECK: mul	r5, r6, r7              @ encoding: [0x96,0x07,0x05,0xe0]
 @ CHECK: muls	r5, r6, r7              @ encoding: [0x96,0x07,0x15,0xe0]
@@ -1038,6 +1110,14 @@ Lforward:
 @ CHECK: mvnslt	r5, r6, ror r7          @ encoding: [0x76,0x57,0xf0,0xb1]
 
 @------------------------------------------------------------------------------
+@ NEG
+@------------------------------------------------------------------------------
+        neg r5, r8
+
+@ CHECK: rsb	r5, r8, #0              @ encoding: [0x00,0x50,0x68,0xe2]
+
+
+@------------------------------------------------------------------------------
 @ NOP
 @------------------------------------------------------------------------------
         nop
@@ -1313,6 +1393,20 @@ Lforward:
 
 
 @------------------------------------------------------------------------------
+@ ROR
+@------------------------------------------------------------------------------
+	ror r2, r4, #31
+	ror r2, r4, #1
+	ror r2, r4, #0
+	ror r4, #1
+
+@ CHECK: ror	r2, r4, #31             @ encoding: [0xe4,0x2f,0xa0,0xe1]
+@ CHECK: ror	r2, r4, #1              @ encoding: [0xe4,0x20,0xa0,0xe1]
+@ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: ror	r4, r4, #1              @ encoding: [0xe4,0x40,0xa0,0xe1]
+
+
+@------------------------------------------------------------------------------
 @ RSB
 @------------------------------------------------------------------------------
         rsb r4, r5, #0xf000
diff --git a/test/MC/ARM/basic-thumb-instructions.s b/test/MC/ARM/basic-thumb-instructions.s
index 0fa52b0..bc2605c 100644
--- a/test/MC/ARM/basic-thumb-instructions.s
+++ b/test/MC/ARM/basic-thumb-instructions.s
@@ -59,12 +59,16 @@ _func:
         add sp, sp, #4
         add r2, sp, #8
         add r2, sp, #1020
+	add sp, sp, #-8
+	add sp, #-8
 
 @ CHECK: add	sp, #4                  @ encoding: [0x01,0xb0]
 @ CHECK: add	sp, #508                @ encoding: [0x7f,0xb0]
 @ CHECK: add	sp, #4                  @ encoding: [0x01,0xb0]
 @ CHECK: add	r2, sp, #8              @ encoding: [0x02,0xaa]
 @ CHECK: add	r2, sp, #1020           @ encoding: [0xff,0xaa]
+@ CHECK: sub	sp, #8                  @ encoding: [0x82,0xb0]
+@ CHECK: sub	sp, #8                  @ encoding: [0x82,0xb0]
 
 
 @------------------------------------------------------------------------------
@@ -93,10 +97,16 @@ _func:
         asrs r2, r3, #32
         asrs r2, r3, #5
         asrs r2, r3, #1
+        asrs r5, #21
+        asrs r5, r5, #21
+        asrs r3, r5, #21
 
 @ CHECK: asrs	r2, r3, #32             @ encoding: [0x1a,0x10]
 @ CHECK: asrs	r2, r3, #5              @ encoding: [0x5a,0x11]
 @ CHECK: asrs	r2, r3, #1              @ encoding: [0x5a,0x10]
+@ CHECK: asrs	r5, r5, #21             @ encoding: [0x6d,0x15]
+@ CHECK: asrs	r5, r5, #21             @ encoding: [0x6d,0x15]
+@ CHECK: asrs	r3, r5, #21             @ encoding: [0x6b,0x15]
 
 
 @------------------------------------------------------------------------------
@@ -315,9 +325,15 @@ _func:
 @------------------------------------------------------------------------------
         lsls r4, r5, #0
         lsls r4, r5, #4
+        lsls r3, #12
+        lsls r3, r3, #12
+        lsls r1, r3, #12
 
 @ CHECK: lsls	r4, r5, #0              @ encoding: [0x2c,0x00]
 @ CHECK: lsls	r4, r5, #4              @ encoding: [0x2c,0x01]
+@ CHECK: lsls	r3, r3, #12             @ encoding: [0x1b,0x03]
+@ CHECK: lsls	r3, r3, #12             @ encoding: [0x1b,0x03]
+@ CHECK: lsls	r1, r3, #12             @ encoding: [0x19,0x03]
 
 
 @------------------------------------------------------------------------------
@@ -333,9 +349,15 @@ _func:
 @------------------------------------------------------------------------------
         lsrs r1, r3, #1
         lsrs r1, r3, #32
+        lsrs r4, #20
+        lsrs r4, r4, #20
+        lsrs r2, r4, #20
 
 @ CHECK: lsrs	r1, r3, #1              @ encoding: [0x59,0x08]
 @ CHECK: lsrs	r1, r3, #32             @ encoding: [0x19,0x08]
+@ CHECK: lsrs	r4, r4, #20             @ encoding: [0x24,0x0d]
+@ CHECK: lsrs	r4, r4, #20             @ encoding: [0x24,0x0d]
+@ CHECK: lsrs	r2, r4, #20             @ encoding: [0x22,0x0d]
 
 
 @------------------------------------------------------------------------------
@@ -372,9 +394,11 @@ _func:
 @ MUL
 @------------------------------------------------------------------------------
         muls r1, r2, r1
+        muls r2, r2, r3
         muls r3, r4
 
 @ CHECK: muls	r1, r2, r1              @ encoding: [0x51,0x43]
+@ CHECK: muls	r2, r3, r2              @ encoding: [0x5a,0x43]
 @ CHECK: muls	r3, r4, r3              @ encoding: [0x63,0x43]
 
 
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index 68815da..d2e208b 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -73,6 +73,10 @@ _func:
         add r12, r6, #0x100
         addw r12, r6, #0x100
         adds r1, r2, #0x1f0
+	add r2, #1
+        add r0, r0, #32
+        adds r2, r2, #56
+        adds r2, #56
 
 @ CHECK: itet	eq                      @ encoding: [0x0a,0xbf]
 @ CHECK: addeq	r1, r2, #4              @ encoding: [0x11,0x1d]
@@ -85,6 +89,10 @@ _func:
 @ CHECK: add.w	r12, r6, #256           @ encoding: [0x06,0xf5,0x80,0x7c]
 @ CHECK: addw	r12, r6, #256           @ encoding: [0x06,0xf2,0x00,0x1c]
 @ CHECK: adds.w	r1, r2, #496            @ encoding: [0x12,0xf5,0xf8,0x71]
+@ CHECK: add.w	r2, r2, #1              @ encoding: [0x02,0xf1,0x01,0x02]
+@ CHECK: add.w	r0, r0, #32             @ encoding: [0x00,0xf1,0x20,0x00]
+@ CHECK: adds	r2, #56                 @ encoding: [0x38,0x32]
+@ CHECK: adds	r2, #56                 @ encoding: [0x38,0x32]
 
 
 @------------------------------------------------------------------------------
@@ -95,12 +103,16 @@ _func:
         adds r7, r3, r1, lsl #31
         adds.w r0, r3, r6, lsr #25
         add.w r4, r8, r1, ror #12
+        add r10, r8
+        add r10, r10, r8
 
 @ CHECK: add.w	r1, r2, r8              @ encoding: [0x02,0xeb,0x08,0x01]
 @ CHECK: add.w	r5, r9, r2, asr #32     @ encoding: [0x09,0xeb,0x22,0x05]
 @ CHECK: adds.w	r7, r3, r1, lsl #31     @ encoding: [0x13,0xeb,0xc1,0x77]
 @ CHECK: adds.w	r0, r3, r6, lsr #25     @ encoding: [0x13,0xeb,0x56,0x60]
 @ CHECK: add.w	r4, r8, r1, ror #12     @ encoding: [0x08,0xeb,0x31,0x34]
+@ CHECK: add	r10, r8                 @ encoding: [0xc2,0x44]
+@ CHECK: add	r10, r8                 @ encoding: [0xc2,0x44]
 
 
 @------------------------------------------------------------------------------
@@ -360,6 +372,8 @@ _func:
         cmp sp, r6, lsr #1
         cmp r2, r5, asr #24
         cmp r1, r4, ror #15
+        cmp r2, #-2
+        cmp r9, #1
 
 @ CHECK: cmp.w	r5, #65280              @ encoding: [0xb5,0xf5,0x7f,0x4f]
 @ CHECK: cmp.w	r4, r12                 @ encoding: [0xb4,0xeb,0x0c,0x0f]
@@ -368,6 +382,8 @@ _func:
 @ CHECK: cmp.w	sp, r6, lsr #1          @ encoding: [0xbd,0xeb,0x56,0x0f]
 @ CHECK: cmp.w	r2, r5, asr #24         @ encoding: [0xb2,0xeb,0x25,0x6f]
 @ CHECK: cmp.w	r1, r4, ror #15         @ encoding: [0xb1,0xeb,0xf4,0x3f]
+@ CHECK: cmn.w	r2, #2                  @ encoding: [0x12,0xf1,0x02,0x0f]
+@ CHECK: cmp.w	r9, #1                  @ encoding: [0xb9,0xf1,0x01,0x0f]
 
 
 @------------------------------------------------------------------------------
@@ -573,6 +589,7 @@ _func:
         ldm r4, {r5, r6}
         ldm r5!, {r3, r8}
         ldmfd r5!, {r3, r8}
+        ldmia sp!, {r4-r11, pc}
 
 @ CHECK: ldm.w	r4, {r4, r5, r8, r9}    @ encoding: [0x94,0xe8,0x30,0x03]
 @ CHECK: ldm.w	r4, {r5, r6}            @ encoding: [0x94,0xe8,0x60,0x00]
@@ -590,6 +607,7 @@ _func:
 @ CHECK: ldm.w	r4, {r5, r6}            @ encoding: [0x94,0xe8,0x60,0x00]
 @ CHECK: ldm.w	r5!, {r3, r8}           @ encoding: [0xb5,0xe8,0x08,0x01]
 @ CHECK: ldm.w	r5!, {r3, r8}           @ encoding: [0xb5,0xe8,0x08,0x01]
+@ CHECK: pop.w	{pc, r4, r5, r6, r7, r8, r9, r10, r11} @ encoding: [0xbd,0xe8,0xf0,0x8f]
 
 
 @------------------------------------------------------------------------------
@@ -599,11 +617,15 @@ _func:
         ldmdb r4, {r5, r6}
         ldmdb r5!, {r3, r8}
         ldmea r5!, {r3, r8}
+        ldmdb.w r4, {r5, r6}
+        ldmdb.w r5!, {r3, r8}
 
 @ CHECK: ldmdb	r4, {r4, r5, r8, r9}    @ encoding: [0x14,0xe9,0x30,0x03]
 @ CHECK: ldmdb	r4, {r5, r6}            @ encoding: [0x14,0xe9,0x60,0x00]
 @ CHECK: ldmdb	r5!, {r3, r8}           @ encoding: [0x35,0xe9,0x08,0x01]
 @ CHECK: ldmdb	r5!, {r3, r8}           @ encoding: [0x35,0xe9,0x08,0x01]
+@ CHECK: ldmdb	r4, {r5, r6}            @ encoding: [0x14,0xe9,0x60,0x00]
+@ CHECK: ldmdb	r5!, {r3, r8}           @ encoding: [0x35,0xe9,0x08,0x01]
 
 
 @------------------------------------------------------------------------------
@@ -638,9 +660,12 @@ _func:
 @ LDR(literal)
 @------------------------------------------------------------------------------
         ldr.w r5, _foo
+        ldr   lr, (_strcmp-4)
 
 @ CHECK: ldr.w	r5, _foo                @ encoding: [0x5f'A',0xf8'A',A,0x50'A']
-            @   fixup A - offset: 0, value: _foo, kind: fixup_t2_ldst_pcrel_12
+@ CHECK: @   fixup A - offset: 0, value: _foo, kind: fixup_t2_ldst_pcrel_12
+@ CHECK: ldr.w	lr, _strcmp-4           @ encoding: [0x5f'A',0xf8'A',A,0xe0'A']
+@ CHECK: @   fixup A - offset: 0, value: _strcmp-4, kind: fixup_t2_ldst_pcrel_12
 
 
 @------------------------------------------------------------------------------
@@ -813,7 +838,7 @@ _func:
 @------------------------------------------------------------------------------
         ldrh r5, _bar
 
-@ CHECK: ldrh.w	r5, _bar                @ encoding: [0xbf'A',0xf8'A',A,0x50'A']
+@ CHECK: ldrh.w	r5, _bar                @ encoding: [0x3f'A',0xf8'A',A,0x50'A']
 @ CHECK:     @   fixup A - offset: 0, value: _bar, kind: fixup_t2_ldst_pcrel_12
 
 
@@ -882,7 +907,7 @@ _func:
 @------------------------------------------------------------------------------
         ldrsb r5, _bar
 
-@ CHECK: ldrsb.w r5, _bar               @ encoding: [0x9f'A',0xf9'A',A,0x50'A']
+@ CHECK: ldrsb.w r5, _bar               @ encoding: [0x1f'A',0xf9'A',A,0x50'A']
 @ CHECK:      @   fixup A - offset: 0, value: _bar, kind: fixup_t2_ldst_pcrel_12
 
 
@@ -951,7 +976,7 @@ _func:
 @------------------------------------------------------------------------------
         ldrsh r5, _bar
 
-@ CHECK: ldrsh.w r5, _bar               @ encoding: [0xbf'A',0xf9'A',A,0x50'A']
+@ CHECK: ldrsh.w r5, _bar               @ encoding: [0x3f'A',0xf9'A',A,0x50'A']
 @ CHECK:      @   fixup A - offset: 0, value: _bar, kind: fixup_t2_ldst_pcrel_12
 
 @ TEMPORARILY DISABLED:
@@ -1066,9 +1091,13 @@ _func:
 @------------------------------------------------------------------------------
         mcr  p7, #1, r5, c1, c1, #4
         mcr2  p7, #1, r5, c1, c1, #4
+        mcr p14, #0, r4, c0, c5
+        mcr2 p4, #2, r2, c1, c3
 
 @ CHECK: mcr	p7, #1, r5, c1, c1, #4  @ encoding: [0x21,0xee,0x91,0x57]
 @ CHECK: mcr2	p7, #1, r5, c1, c1, #4  @ encoding: [0x21,0xfe,0x91,0x57]
+@ CHECK: mcr	p14, #0, r4, c0, c5, #0 @ encoding: [0x00,0xee,0x15,0x4e]
+@ CHECK: mcr2	p4, #2, r2, c1, c3, #0  @ encoding: [0x41,0xfe,0x13,0x24]
 
 
 @------------------------------------------------------------------------------
@@ -1108,6 +1137,12 @@ _func:
         moveq r1, #12
         movne.w r1, #12
         mov.w r6, #450
+        it lo
+        movlo r1, #-1
+
+        @ alias for mvn
+	mov r3, #-3
+
 
 @ CHECK: movs	r1, #21                 @ encoding: [0x15,0x21]
 @ CHECK: movs.w	r1, #21                 @ encoding: [0x5f,0xf0,0x15,0x01]
@@ -1123,6 +1158,48 @@ _func:
 @ CHECK: moveq	r1, #12                 @ encoding: [0x0c,0x21]
 @ CHECK: movne.w r1, #12                @ encoding: [0x4f,0xf0,0x0c,0x01]
 @ CHECK: mov.w	r6, #450                @ encoding: [0x4f,0xf4,0xe1,0x76]
+@ CHECK: it	lo                      @ encoding: [0x38,0xbf]
+@ CHECK: movlo.w	r1, #-1         @ encoding: [0x4f,0xf0,0xff,0x31]
+@ CHECK: mvn	r3, #2                  @ encoding: [0x6f,0xf0,0x02,0x03]
+
+@------------------------------------------------------------------------------
+@ MOV(shifted register)
+@------------------------------------------------------------------------------
+        mov r6, r2, lsl #16
+        mov r6, r2, lsr #16
+        movs r6, r2, asr #32
+        movs r6, r2, ror #5
+        movs r4, r4, lsl r5
+        movs r4, r4, lsr r5
+        movs r4, r4, asr r5
+        movs r4, r4, ror r5
+        mov r4, r4, lsl r5
+        movs r4, r4, ror r8
+        movs r4, r5, lsr r6
+        itttt eq
+        moveq r4, r4, lsl r5
+        moveq r4, r4, lsr r5
+        moveq r4, r4, asr r5
+        moveq r4, r4, ror r5
+        mov r4, r4, rrx
+
+@ CHECK: lsl.w	r6, r2, #16             @ encoding: [0x4f,0xea,0x02,0x46]
+@ CHECK: lsr.w	r6, r2, #16             @ encoding: [0x4f,0xea,0x12,0x46]
+@ CHECK: asrs	r6, r2, #32             @ encoding: [0x16,0x10]
+@ CHECK: rors.w	r6, r2, #5              @ encoding: [0x5f,0xea,0x72,0x16]
+@ CHECK: lsls	r4, r5                  @ encoding: [0xac,0x40]
+@ CHECK: lsrs	r4, r5                  @ encoding: [0xec,0x40]
+@ CHECK: asrs	r4, r5                  @ encoding: [0x2c,0x41]
+@ CHECK: rors	r4, r5                  @ encoding: [0xec,0x41]
+@ CHECK: lsl.w	r4, r4, r5              @ encoding: [0x04,0xfa,0x05,0xf4]
+@ CHECK: rors.w	r4, r4, r8              @ encoding: [0x74,0xfa,0x08,0xf4]
+@ CHECK: lsrs.w	r4, r5, r6              @ encoding: [0x35,0xfa,0x06,0xf4]
+@ CHECK: itttt	eq                      @ encoding: [0x01,0xbf]
+@ CHECK: lsleq	r4, r5                  @ encoding: [0xac,0x40]
+@ CHECK: lsreq	r4, r5                  @ encoding: [0xec,0x40]
+@ CHECK: asreq	r4, r5                  @ encoding: [0x2c,0x41]
+@ CHECK: roreq	r4, r5                  @ encoding: [0xec,0x41]
+@ CHECK: rrx	r4, r4                  @ encoding: [0x4f,0xea,0x34,0x04]
 
 
 @------------------------------------------------------------------------------
@@ -1143,9 +1220,13 @@ _func:
 @------------------------------------------------------------------------------
         mrc  p14, #0, r1, c1, c2, #4
         mrc2  p14, #0, r1, c1, c2, #4
+        mrc p11, #1, r1, c2, c2
+        mrc2 p12, #3, r3, c3, c4
 
 @ CHECK: mrc	p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xee,0x92,0x1e]
 @ CHECK: mrc2	p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xfe,0x92,0x1e]
+@ CHECK: mrc	p11, #1, r1, c2, c2, #0 @ encoding: [0x32,0xee,0x12,0x1b]
+@ CHECK: mrc2	p12, #3, r3, c3, c4, #0 @ encoding: [0x73,0xfe,0x14,0x3c]
 
 
 @------------------------------------------------------------------------------
@@ -1187,6 +1268,7 @@ _func:
         msr  spsr_fc, r0
         msr  SPSR_fsxc, r5
         msr  cpsr_fsxc, r8
+        msr  cpsr, r3
 
 @ CHECK: msr	APSR_nzcvq, r1          @ encoding: [0x81,0xf3,0x00,0x88]
 @ CHECK: msr	APSR_g, r2              @ encoding: [0x82,0xf3,0x00,0x84]
@@ -1202,6 +1284,7 @@ _func:
 @ CHECK: msr	SPSR_fc, r0             @ encoding: [0x90,0xf3,0x00,0x89]
 @ CHECK: msr	SPSR_fsxc, r5           @ encoding: [0x95,0xf3,0x00,0x8f]
 @ CHECK: msr	CPSR_fsxc, r8           @ encoding: [0x88,0xf3,0x00,0x8f]
+@ CHECK: msr	CPSR_fc, r3             @ encoding: [0x83,0xf3,0x00,0x89]
 
 
 @------------------------------------------------------------------------------
@@ -1212,12 +1295,18 @@ _func:
         mul r3, r4, r6
         it eq
         muleq r3, r4, r5
+        it le
+        mulle r4, r4, r8
+        mul r5, r6
 
 @ CHECK: muls	r3, r4, r3              @ encoding: [0x63,0x43]
 @ CHECK: mul	r3, r4, r3              @ encoding: [0x04,0xfb,0x03,0xf3]
 @ CHECK: mul	r3, r4, r6              @ encoding: [0x04,0xfb,0x06,0xf3]
 @ CHECK: it	eq                      @ encoding: [0x08,0xbf]
 @ CHECK: muleq	r3, r4, r5              @ encoding: [0x04,0xfb,0x05,0xf3]
+@ CHECK: it	le                      @ encoding: [0xd8,0xbf]
+@ CHECK: mulle	r4, r4, r8              @ encoding: [0x04,0xfb,0x08,0xf4]
+@ CHECK: mul	r5, r6, r5              @ encoding: [0x06,0xfb,0x05,0xf5]
 
 
 @------------------------------------------------------------------------------
@@ -1228,7 +1317,7 @@ _func:
         mvns r0, #0x3fc0000
         itte eq
         mvnseq r1, #12
-        mvneq r1, #12
+        mvneq.w r1, #12
         mvnne r1, #12
 
 @ CHECK: mvns	r8, #21                 @ encoding: [0x7f,0xf0,0x15,0x08]
@@ -1247,7 +1336,7 @@ _func:
         mvns r2, r3
         mvn r5, r6, lsl #19
         mvn r5, r6, lsr #9
-        mvn r5, r6, asr #4
+        mvn.w r5, r6, asr #4
         mvn r5, r6, ror #6
         mvn r5, r6, rrx
         it eq
@@ -1264,6 +1353,16 @@ _func:
 @ CHECK: mvneq	r2, r3                  @ encoding: [0xda,0x43]
 
 @------------------------------------------------------------------------------
+@ NEG
+@------------------------------------------------------------------------------
+        neg r5, r2
+        neg r5, r8
+
+@ CHECK: rsb.w	r5, r2, #0              @ encoding: [0xc2,0xf1,0x00,0x05]
+@ CHECK: rsb.w	r5, r8, #0              @ encoding: [0xc8,0xf1,0x00,0x05]
+
+
+@------------------------------------------------------------------------------
 @ NOP
 @------------------------------------------------------------------------------
         nop.w
@@ -1343,20 +1442,24 @@ _func:
         pld [r6, #33]
         pld [r6, #257]
         pld [r7, #257]
+        pld [r1, #0]
+        pld [r1, #-0]
 
 @ CHECK: pld	[r5, #-4]               @ encoding: [0x15,0xf8,0x04,0xfc]
 @ CHECK: pld	[r6, #32]               @ encoding: [0x96,0xf8,0x20,0xf0]
 @ CHECK: pld	[r6, #33]               @ encoding: [0x96,0xf8,0x21,0xf0]
 @ CHECK: pld	[r6, #257]              @ encoding: [0x96,0xf8,0x01,0xf1]
 @ CHECK: pld	[r7, #257]              @ encoding: [0x97,0xf8,0x01,0xf1]
+@ CHECK: pld	[r1]                    @ encoding: [0x91,0xf8,0x00,0xf0]
+@ CHECK: pld	[r1, #-0]               @ encoding: [0x11,0xf8,0x00,0xfc]
 
 
 @------------------------------------------------------------------------------
 @ PLD(literal)
 @------------------------------------------------------------------------------
-        pld  _foo
+@        pld  _foo
 
-@ CHECK: pld	_foo                    @ encoding: [0x9f'A',0xf8'A',A,0xf0'A']
+@ FIXME: pld	_foo                    @ encoding: [0x9f'A',0xf8'A',A,0xf0'A']
             @   fixup A - offset: 0, value: _foo, kind: fixup_t2_ldst_pcrel_12
 
 
@@ -1396,10 +1499,10 @@ _func:
 @------------------------------------------------------------------------------
 @ PLI(literal)
 @------------------------------------------------------------------------------
-        pli  _foo
+@        pli  _foo
 
 
-@ CHECK: pli	_foo                    @ encoding: [0x9f'A',0xf9'A',A,0xf0'A']
+@ FIXME: pli	_foo                    @ encoding: [0x9f'A',0xf9'A',A,0xf0'A']
            @   fixup A - offset: 0, value: _foo, kind: fixup_t2_ldst_pcrel_12
 
 
@@ -1420,6 +1523,21 @@ _func:
 @ CHECK: pli	[sp, r2, lsl #1]        @ encoding: [0x1d,0xf9,0x12,0xf0]
 @ CHECK: pli	[sp, r2]                @ encoding: [0x1d,0xf9,0x02,0xf0]
 
+@------------------------------------------------------------------------------
+@ POP (alias)
+@------------------------------------------------------------------------------
+        pop {r2, r9}
+
+@ CHECK: pop.w	{r2, r9}                @ encoding: [0xbd,0xe8,0x04,0x02]
+
+
+@------------------------------------------------------------------------------
+@ PUSH (alias)
+@------------------------------------------------------------------------------
+        push {r2, r9}
+
+@ CHECK: push.w	{r2, r9}                @ encoding: [0x2d,0xe9,0x04,0x02]
+
 
 @------------------------------------------------------------------------------
 @ QADD/QADD16/QADD8
@@ -1609,11 +1727,19 @@ _func:
         rsbs r3, r12, #0xf
         rsb r1, #0xff
         rsb r1, r1, #0xff
+        rsb r11, r11, #0
+        rsb r9, #0
+        rsbs r3, r1, #0
+        rsb r3, r1, #0
 
 @ CHECK: rsb.w	r2, r5, #1044480        @ encoding: [0xc5,0xf5,0x7f,0x22]
 @ CHECK: rsbs.w	r3, r12, #15            @ encoding: [0xdc,0xf1,0x0f,0x03]
 @ CHECK: rsb.w	r1, r1, #255            @ encoding: [0xc1,0xf1,0xff,0x01]
 @ CHECK: rsb.w	r1, r1, #255            @ encoding: [0xc1,0xf1,0xff,0x01]
+@ CHECK: rsb.w	r11, r11, #0            @ encoding: [0xcb,0xf1,0x00,0x0b]
+@ CHECK: rsb.w	r9, r9, #0              @ encoding: [0xc9,0xf1,0x00,0x09]
+@ CHECK: rsbs	r3, r1, #0              @ encoding: [0x4b,0x42]
+@ CHECK: rsb.w	r3, r1, #0              @ encoding: [0xc1,0xf1,0x00,0x03]
 
 
 @------------------------------------------------------------------------------
@@ -2287,11 +2413,13 @@ _func:
         stmdb r4, {r5, r6}
         stmdb r5!, {r3, r8}
         stmea r5!, {r3, r8}
+        stmdb.w r5, {r0, r1}
 
 @ CHECK: stmdb	r4, {r4, r5, r8, r9}    @ encoding: [0x04,0xe9,0x30,0x03]
 @ CHECK: stmdb	r4, {r5, r6}            @ encoding: [0x04,0xe9,0x60,0x00]
 @ CHECK: stmdb	r5!, {r3, r8}           @ encoding: [0x25,0xe9,0x08,0x01]
 @ CHECK: stm.w	r5!, {r3, r8}           @ encoding: [0xa5,0xe8,0x08,0x01]
+@ CHECK: stmdb	r5, {r0, r1}            @ encoding: [0x05,0xe9,0x03,0x00]
 
 
 @------------------------------------------------------------------------------
@@ -2526,6 +2654,10 @@ _func:
         sub r12, r6, #0x100
         subw r12, r6, #0x100
         subs r1, r2, #0x1f0
+	sub r2, #1
+        sub r0, r0, #32
+        subs r2, r2, #56
+        subs r2, #56
 
 @ CHECK: itet	eq                      @ encoding: [0x0a,0xbf]
 @ CHECK: subeq	r1, r2, #4              @ encoding: [0x11,0x1f]
@@ -2538,6 +2670,10 @@ _func:
 @ CHECK: sub.w	r12, r6, #256           @ encoding: [0xa6,0xf5,0x80,0x7c]
 @ CHECK: subw	r12, r6, #256           @ encoding: [0xa6,0xf2,0x00,0x1c]
 @ CHECK: subs.w	r1, r2, #496            @ encoding: [0xb2,0xf5,0xf8,0x71]
+@ CHECK: sub.w	r2, r2, #1              @ encoding: [0xa2,0xf1,0x01,0x02]
+@ CHECK: sub.w	r0, r0, #32             @ encoding: [0xa0,0xf1,0x20,0x00]
+@ CHECK: subs	r2, #56                 @ encoding: [0x38,0x3a]
+@ CHECK: subs	r2, #56                 @ encoding: [0x38,0x3a]
 
 
 @------------------------------------------------------------------------------
@@ -2550,6 +2686,12 @@ _func:
         sub r4, r5, r6, asr #5
         sub r4, r5, r6, ror #5
         sub.w r5, r2, r12, rrx
+        sub r2, sp, ip
+        sub sp, sp, ip
+        sub sp, ip
+        sub.w r2, sp, ip
+        sub.w sp, sp, ip
+        sub.w sp, ip
 
 @ CHECK: sub.w	r4, r5, r6              @ encoding: [0xa5,0xeb,0x06,0x04]
 @ CHECK: sub.w	r4, r5, r6, lsl #5      @ encoding: [0xa5,0xeb,0x46,0x14]
@@ -2558,6 +2700,12 @@ _func:
 @ CHECK: sub.w	r4, r5, r6, asr #5      @ encoding: [0xa5,0xeb,0x66,0x14]
 @ CHECK: sub.w	r4, r5, r6, ror #5      @ encoding: [0xa5,0xeb,0x76,0x14]
 @ CHECK: sub.w r5, r2, r12, rrx         @ encoding: [0xa2,0xeb,0x3c,0x05]
+@ CHECK: sub.w	r2, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x02]
+@ CHECK: sub.w	sp, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x0d]
+@ CHECK: sub.w	sp, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x0d]
+@ CHECK: sub.w	r2, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x02]
+@ CHECK: sub.w	sp, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x0d]
+@ CHECK: sub.w	sp, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x0d]
 
 
 @------------------------------------------------------------------------------
@@ -3211,3 +3359,30 @@ _func:
 @ CHECK: wfelt                          @ encoding: [0x20,0xbf]
 @ CHECK: wfige                          @ encoding: [0x30,0xbf]
 @ CHECK: yieldlt                        @ encoding: [0x10,0xbf]
+
+
+@------------------------------------------------------------------------------
+@ Alternate syntax for LDR*(literal) encodings
+@------------------------------------------------------------------------------
+        ldr r11, [pc, #-22]
+        ldrb r11, [pc, #-22]
+        ldrh r11, [pc, #-22]
+        ldrsb r11, [pc, #-22]
+        ldrsh r11, [pc, #-22]
+
+        ldr.w r11, [pc, #-22]
+        ldrb.w r11, [pc, #-22]
+        ldrh.w r11, [pc, #-22]
+        ldrsb.w r11, [pc, #-22]
+        ldrsh.w r11, [pc, #-22]
+
+@ CHECK: ldr.w	r11, [pc, #-22]         @ encoding: [0x5f,0xf8,0x16,0xb0]
+@ CHECK: ldrb.w	r11, [pc, #-22]         @ encoding: [0x1f,0xf8,0x16,0xb0]
+@ CHECK: ldrh.w	r11, [pc, #-22]         @ encoding: [0x3f,0xf8,0x16,0xb0]
+@ CHECK: ldrsb.w r11, [pc, #-22]        @ encoding: [0x1f,0xf9,0x16,0xb0]
+@ CHECK: ldrsh.w r11, [pc, #-22]        @ encoding: [0x3f,0xf9,0x16,0xb0]
+@ CHECK: ldr.w	r11, [pc, #-22]         @ encoding: [0x5f,0xf8,0x16,0xb0]
+@ CHECK: ldrb.w	r11, [pc, #-22]         @ encoding: [0x1f,0xf8,0x16,0xb0]
+@ CHECK: ldrh.w	r11, [pc, #-22]         @ encoding: [0x3f,0xf8,0x16,0xb0]
+@ CHECK: ldrsb.w r11, [pc, #-22]        @ encoding: [0x1f,0xf9,0x16,0xb0]
+@ CHECK: ldrsh.w r11, [pc, #-22]        @ encoding: [0x3f,0xf9,0x16,0xb0]
diff --git a/test/MC/ARM/cxx-global-constructor.ll b/test/MC/ARM/cxx-global-constructor.ll
new file mode 100644
index 0000000..e06d2c7
--- /dev/null
+++ b/test/MC/ARM/cxx-global-constructor.ll
@@ -0,0 +1,12 @@
+; RUN: llc %s -mtriple=armv7-linux-gnueabi -relocation-model=pic \
+; RUN: -filetype=obj -o - | elf-dump --dump-section-data | FileCheck %s
+
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }]
+
+define void @f() {
+  ret void
+}
+
+; Check for a relocation of type R_ARM_TARGET1.
+; CHECK: ('r_type', 0x26)
diff --git a/test/MC/ARM/darwin-ARM-reloc.s b/test/MC/ARM/darwin-ARM-reloc.s
deleted file mode 100644
index 86b45e0..0000000
--- a/test/MC/ARM/darwin-ARM-reloc.s
+++ /dev/null
@@ -1,171 +0,0 @@
-@ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
-@ RUN: FileCheck < %t.dump %s
-
-	.syntax unified
-        .text
-_f0:
-        bl _printf
-
-_f1:
-        bl _f0
-
-        .data
-_d0:
-Ld0_0:  
-        .long Lsc0_0 - Ld0_0
-        
-	.section	__TEXT,__cstring,cstring_literals
-Lsc0_0:
-        .long 0
-
-@ CHECK: ('cputype', 12)
-@ CHECK: ('cpusubtype', 9)
-@ CHECK: ('filetype', 1)
-@ CHECK: ('num_load_commands', 3)
-@ CHECK: ('load_commands_size', 364)
-@ CHECK: ('flag', 0)
-@ CHECK: ('load_commands', [
-@ CHECK:   # Load Command 0
-@ CHECK:  (('command', 1)
-@ CHECK:   ('size', 260)
-@ CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:   ('vm_addr', 0)
-@ CHECK:   ('vm_size', 16)
-@ CHECK:   ('file_offset', 392)
-@ CHECK:   ('file_size', 16)
-@ CHECK:   ('maxprot', 7)
-@ CHECK:   ('initprot', 7)
-@ CHECK:   ('num_sections', 3)
-@ CHECK:   ('flags', 0)
-@ CHECK:   ('sections', [
-@ CHECK:     # Section 0
-@ CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 0)
-@ CHECK:     ('size', 8)
-@ CHECK:     ('offset', 392)
-@ CHECK:     ('alignment', 0)
-@ CHECK:     ('reloc_offset', 408)
-@ CHECK:     ('num_reloc', 2)
-@ CHECK:     ('flags', 0x80000400)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:     # Relocation 0
-@ CHECK:     (('word-0', 0x4),
-@ CHECK:      ('word-1', 0x55000001)),
-@ CHECK:     # Relocation 1
-@ CHECK:     (('word-0', 0x0),
-@ CHECK:      ('word-1', 0x5d000003)),
-@ CHECK:   ])
-@ CHECK:   ('_section_data', 'feffffeb fdffffeb')
-@ CHECK:     # Section 1
-@ CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 8)
-@ CHECK:     ('size', 4)
-@ CHECK:     ('offset', 400)
-@ CHECK:     ('alignment', 0)
-@ CHECK:     ('reloc_offset', 424)
-@ CHECK:     ('num_reloc', 2)
-@ CHECK:     ('flags', 0x0)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:     # Relocation 0
-@ CHECK:     (('word-0', 0xa2000000),
-@ CHECK:      ('word-1', 0xc)),
-@ CHECK:     # Relocation 1
-@ CHECK:     (('word-0', 0xa1000000),
-@ CHECK:      ('word-1', 0x8)),
-@ CHECK:   ])
-@ CHECK:   ('_section_data', '04000000')
-@ CHECK:     # Section 2
-@ CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 12)
-@ CHECK:     ('size', 4)
-@ CHECK:     ('offset', 404)
-@ CHECK:     ('alignment', 0)
-@ CHECK:     ('reloc_offset', 0)
-@ CHECK:     ('num_reloc', 0)
-@ CHECK:     ('flags', 0x2)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:   ])
-@ CHECK:   ('_section_data', '00000000')
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK:   # Load Command 1
-@ CHECK:  (('command', 2)
-@ CHECK:   ('size', 24)
-@ CHECK:   ('symoff', 440)
-@ CHECK:   ('nsyms', 4)
-@ CHECK:   ('stroff', 488)
-@ CHECK:   ('strsize', 24)
-@ CHECK:   ('_string_data', '\x00_printf\x00_f0\x00_f1\x00_d0\x00\x00\x00\x00')
-@ CHECK:   ('_symbols', [
-@ CHECK:     # Symbol 0
-@ CHECK:    (('n_strx', 9)
-@ CHECK:     ('n_type', 0xe)
-@ CHECK:     ('n_sect', 1)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 0)
-@ CHECK:     ('_string', '_f0')
-@ CHECK:    ),
-@ CHECK:     # Symbol 1
-@ CHECK:    (('n_strx', 13)
-@ CHECK:     ('n_type', 0xe)
-@ CHECK:     ('n_sect', 1)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 4)
-@ CHECK:     ('_string', '_f1')
-@ CHECK:    ),
-@ CHECK:     # Symbol 2
-@ CHECK:    (('n_strx', 17)
-@ CHECK:     ('n_type', 0xe)
-@ CHECK:     ('n_sect', 2)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 8)
-@ CHECK:     ('_string', '_d0')
-@ CHECK:    ),
-@ CHECK:     # Symbol 3
-@ CHECK:    (('n_strx', 1)
-@ CHECK:     ('n_type', 0x1)
-@ CHECK:     ('n_sect', 0)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 0)
-@ CHECK:     ('_string', '_printf')
-@ CHECK:    ),
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK:   # Load Command 2
-@ CHECK:  (('command', 11)
-@ CHECK:   ('size', 80)
-@ CHECK:   ('ilocalsym', 0)
-@ CHECK:   ('nlocalsym', 3)
-@ CHECK:   ('iextdefsym', 3)
-@ CHECK:   ('nextdefsym', 0)
-@ CHECK:   ('iundefsym', 3)
-@ CHECK:   ('nundefsym', 1)
-@ CHECK:   ('tocoff', 0)
-@ CHECK:   ('ntoc', 0)
-@ CHECK:   ('modtaboff', 0)
-@ CHECK:   ('nmodtab', 0)
-@ CHECK:   ('extrefsymoff', 0)
-@ CHECK:   ('nextrefsyms', 0)
-@ CHECK:   ('indirectsymoff', 0)
-@ CHECK:   ('nindirectsyms', 0)
-@ CHECK:   ('extreloff', 0)
-@ CHECK:   ('nextrel', 0)
-@ CHECK:   ('locreloff', 0)
-@ CHECK:   ('nlocrel', 0)
-@ CHECK:   ('_indirect_symbols', [
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK: ])
diff --git a/test/MC/ARM/dg.exp b/test/MC/ARM/dg.exp
deleted file mode 100644
index 055fa25..0000000
--- a/test/MC/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index f722dd7..7da79c3 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -216,7 +216,7 @@
         @ Out of order STM registers
         stmda sp!, {r5, r2}
 
-@ CHECK-ERRORS: error: register list not in ascending order
+@ CHECK-ERRORS: warning: register list not in ascending order
 @ CHECK-ERRORS:         stmda     sp!, {r5, r2}
 @ CHECK-ERRORS:                            ^
 
diff --git a/test/MC/ARM/dot-req.s b/test/MC/ARM/dot-req.s
new file mode 100644
index 0000000..3b4cf5c
--- /dev/null
+++ b/test/MC/ARM/dot-req.s
@@ -0,0 +1,11 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+        .syntax unified
+bar:
+fred .req r5
+        mov r11, fred
+.unreq fred
+fred .req r6
+        mov r1, fred
+
+@ CHECK: mov	r11, r5                 @ encoding: [0x05,0xb0,0xa0,0xe1]
+@ CHECK: mov	r1, r6                  @ encoding: [0x06,0x10,0xa0,0xe1]
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
index e6efe7e..6899d92 100644
--- a/test/MC/ARM/elf-reloc-01.ll
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -42,12 +42,12 @@ entry:
   ]
 
 bb:                                               ; preds = %entry
-  volatile store i32 11, i32* @var_tls, align 4
-  volatile store double 2.200000e+01, double* @var_tls_double, align 8
-  volatile store i32 33, i32* @var_static, align 4
-  volatile store double 4.400000e+01, double* @var_static_double, align 8
-  volatile store i32 55, i32* @var_global, align 4
-  volatile store double 6.600000e+01, double* @var_global_double, align 8
+  store volatile i32 11, i32* @var_tls, align 4
+  store volatile double 2.200000e+01, double* @var_tls_double, align 8
+  store volatile i32 33, i32* @var_static, align 4
+  store volatile double 4.400000e+01, double* @var_static_double, align 8
+  store volatile i32 55, i32* @var_global, align 4
+  store volatile double 6.600000e+01, double* @var_global_double, align 8
   br label %bb3
 
 bb2:                                              ; preds = %entry
diff --git a/test/MC/ARM/elf-reloc-condcall.s b/test/MC/ARM/elf-reloc-condcall.s
new file mode 100644
index 0000000..dcc62d3
--- /dev/null
+++ b/test/MC/ARM/elf-reloc-condcall.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -triple=armv7-linux-gnueabi -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        bleq some_label
+        bl some_label
+        blx some_label
+// OBJ: .rel.text
+
+// OBJ: 'r_offset', 0x00000000
+// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT: 'r_type', 0x1d
+
+// OBJ: 'r_offset', 0x00000004
+// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT: 'r_type', 0x1c
+
+// OBJ: 'r_offset', 0x00000008
+// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT: 'r_type', 0x1c
+
+// OBJ: .symtab
+// OBJ: Symbol 4
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.s b/test/MC/ARM/elf-thumbfunc-reloc.s
new file mode 100644
index 0000000..4a311dd
--- /dev/null
+++ b/test/MC/ARM/elf-thumbfunc-reloc.s
@@ -0,0 +1,23 @@
+@@ test st_value bit 0 of thumb function
+@ RUN: llvm-mc %s -triple=arm-freebsd-eabi -filetype=obj -o - | \
+@ RUN: elf-dump  | FileCheck %s
+
+
+	.syntax unified
+        .text
+        .globl  f
+        .align  2
+        .type   f,%function
+        .code   16
+        .thumb_func
+f:
+        push    {r7, lr}
+        mov     r7, sp
+        bl      g
+        pop     {r7, pc}
+
+@@ make sure an R_ARM_THM_CALL relocation is generated for the call to g
+@CHECK:        ('_relocations', [
+@CHECK:         (('r_offset', 0x00000004)
+@CHECK-NEXT:     ('r_sym', 0x{{[0-9a-fA-F]+}})
+@CHECK-NEXT:     ('r_type', 0x0a)
diff --git a/test/MC/ARM/lit.local.cfg b/test/MC/ARM/lit.local.cfg
new file mode 100644
index 0000000..5700913
--- /dev/null
+++ b/test/MC/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/ARM/mode-switch.s b/test/MC/ARM/mode-switch.s
index 9d49954..afcc082 100644
--- a/test/MC/ARM/mode-switch.s
+++ b/test/MC/ARM/mode-switch.s
@@ -13,3 +13,14 @@
 .code 16
         adds    r0, r0, r1
 @ CHECK: adds	r0, r0, r1              @ encoding: [0x40,0x18]
+
+.arm
+	add	r0, r0, r1
+@ CHECK: add	r0, r0, r1              @ encoding: [0x01,0x00,0x80,0xe0]
+
+.thumb
+	add.w	r0, r0, r1
+        adds    r0, r0, r1
+
+@ CHECK: add.w	r0, r0, r1              @ encoding: [0x00,0xeb,0x01,0x00]
+@ CHECK: adds	r0, r0, r1              @ encoding: [0x40,0x18]
diff --git a/test/MC/ARM/neon-add-encoding.s b/test/MC/ARM/neon-add-encoding.s
index e425397..1fdfa4c 100644
--- a/test/MC/ARM/neon-add-encoding.s
+++ b/test/MC/ARM/neon-add-encoding.s
@@ -90,39 +90,81 @@
 @ CHECK: vrhadd.u32	q8, q8, q9      @ encoding: [0xe2,0x01,0x60,0xf3]
 	vrhadd.u32	q8, q8, q9
 
-@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
 	vqadd.s8	d16, d16, d17
-@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
 	vqadd.s16	d16, d16, d17
-@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
 	vqadd.s32	d16, d16, d17
-@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
 	vqadd.s64	d16, d16, d17
-@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
 	vqadd.u8	d16, d16, d17
-@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
 	vqadd.u16	d16, d16, d17
-@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
 	vqadd.u32	d16, d16, d17
-@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
 	vqadd.u64	d16, d16, d17
-@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+
+@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
+
 	vqadd.s8	q8, q8, q9
-@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
 	vqadd.s16	q8, q8, q9
-@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
 	vqadd.s32	q8, q8, q9
-@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
 	vqadd.s64	q8, q8, q9
-@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
 	vqadd.u8	q8, q8, q9
-@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
 	vqadd.u16	q8, q8, q9
-@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
 	vqadd.u32	q8, q8, q9
-@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
 	vqadd.u64	q8, q8, q9
 
+@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
+
+
+@ two-operand variants.
+	vqadd.s8	d16, d17
+	vqadd.s16	d16, d17
+	vqadd.s32	d16, d17
+	vqadd.s64	d16, d17
+	vqadd.u8	d16, d17
+	vqadd.u16	d16, d17
+	vqadd.u32	d16, d17
+	vqadd.u64	d16, d17
+
+@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
+
+	vqadd.s8	q8, q9
+	vqadd.s16	q8, q9
+	vqadd.s32	q8, q9
+	vqadd.s64	q8, q9
+	vqadd.u8	q8, q9
+	vqadd.u16	q8, q9
+	vqadd.u32	q8, q9
+	vqadd.u64	q8, q9
+
+@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
+
+
 @ CHECK: vaddhn.i16	d16, q8, q9     @ encoding: [0xa2,0x04,0xc0,0xf2]
 	vaddhn.i16	d16, q8, q9
 @ CHECK: vaddhn.i32	d16, q8, q9     @ encoding: [0xa2,0x04,0xd0,0xf2]
@@ -135,3 +177,43 @@
 	vraddhn.i32	d16, q8, q9
 @ CHECK: vraddhn.i64	d16, q8, q9     @ encoding: [0xa2,0x04,0xe0,0xf3]
 	vraddhn.i64	d16, q8, q9
+
+
+@ Two-operand variants
+
+	vadd.i8  d6, d5
+	vadd.i16 d7, d1
+	vadd.i32 d8, d2
+	vadd.i64 d9, d3
+
+	vadd.i8  q6, q5
+	vadd.i16 q7, q1
+	vadd.i32 q8, q2
+	vadd.i64 q9, q3
+
+@ CHECK: vadd.i8	d6, d6, d5      @ encoding: [0x05,0x68,0x06,0xf2]
+@ CHECK: vadd.i16	d7, d7, d1      @ encoding: [0x01,0x78,0x17,0xf2]
+@ CHECK: vadd.i32	d8, d8, d2      @ encoding: [0x02,0x88,0x28,0xf2]
+@ CHECK: vadd.i64	d9, d9, d3      @ encoding: [0x03,0x98,0x39,0xf2]
+
+@ CHECK: vadd.i8	q6, q6, q5      @ encoding: [0x4a,0xc8,0x0c,0xf2]
+@ CHECK: vadd.i16	q7, q7, q1      @ encoding: [0x42,0xe8,0x1e,0xf2]
+@ CHECK: vadd.i32	q8, q8, q2      @ encoding: [0xc4,0x08,0x60,0xf2]
+@ CHECK: vadd.i64	q9, q9, q3      @ encoding: [0xc6,0x28,0x72,0xf2]
+
+
+	vaddw.s8  q6, d5
+	vaddw.s16 q7, d1
+	vaddw.s32 q8, d2
+
+	vaddw.u8  q6, d5
+	vaddw.u16 q7, d1
+	vaddw.u32 q8, d2
+
+@ CHECK: vaddw.s8	q6, q6, d5      @ encoding: [0x05,0xc1,0x8c,0xf2]
+@ CHECK: vaddw.s16	q7, q7, d1      @ encoding: [0x01,0xe1,0x9e,0xf2]
+@ CHECK: vaddw.s32	q8, q8, d2      @ encoding: [0x82,0x01,0xe0,0xf2]
+
+@ CHECK: vaddw.u8	q6, q6, d5      @ encoding: [0x05,0xc1,0x8c,0xf3]
+@ CHECK: vaddw.u16	q7, q7, d1      @ encoding: [0x01,0xe1,0x9e,0xf3]
+@ CHECK: vaddw.u32	q8, q8, d2      @ encoding: [0x82,0x01,0xe0,0xf3]
diff --git a/test/MC/ARM/neon-bitwise-encoding.s b/test/MC/ARM/neon-bitwise-encoding.s
index 81e2c4d..2ce9bcc 100644
--- a/test/MC/ARM/neon-bitwise-encoding.s
+++ b/test/MC/ARM/neon-bitwise-encoding.s
@@ -22,9 +22,9 @@
 	vorr.i32	q8, #0x1000000
 	vorr.i32	q8, #0x0
 
-@ FIXME: vorr.i32	d16, #0x1000000 @ encoding: [0x11,0x07,0xc0,0xf2]
-@ FIXME: vorr.i32	q8, #0x1000000  @ encoding: [0x51,0x07,0xc0,0xf2]
-@ FIXME: vorr.i32	q8, #0x0        @ encoding: [0x50,0x01,0xc0,0xf2]
+@ CHECK: vorr.i32	d16, #0x1000000 @ encoding: [0x11,0x07,0xc0,0xf2]
+@ CHECK: vorr.i32	q8, #0x1000000  @ encoding: [0x51,0x07,0xc0,0xf2]
+@ CHECK: vorr.i32	q8, #0x0        @ encoding: [0x50,0x01,0xc0,0xf2]
 
 	vbic	d16, d17, d16
 	vbic	q8, q8, q9
@@ -33,8 +33,8 @@
 
 @ CHECK: vbic	d16, d17, d16           @ encoding: [0xb0,0x01,0x51,0xf2]
 @ CHECK: vbic	q8, q8, q9              @ encoding: [0xf2,0x01,0x50,0xf2]
-@ FIXME: vbic.i32	d16, #0xFF000000 @ encoding: [0x3f,0x07,0xc7,0xf3]
-@ FIXME: vbic.i32	q8, #0xFF000000 @ encoding: [0x7f,0x07,0xc7,0xf3]
+@ CHECK: vbic.i32	d16, #0xff000000 @ encoding: [0x3f,0x07,0xc7,0xf3]
+@ CHECK: vbic.i32	q8, #0xff000000 @ encoding: [0x7f,0x07,0xc7,0xf3]
 
 	vorn	d16, d17, d16
 	vorn	q8, q8, q9
@@ -53,3 +53,211 @@
 
 @ CHECK: vbsl	d18, d17, d16           @ encoding: [0xb0,0x21,0x51,0xf3]
 @ CHECK: vbsl	q8, q10, q9             @ encoding: [0xf2,0x01,0x54,0xf3]
+
+
+@ Size suffices are optional.
+        veor q4, q7, q3
+        veor.8 q4, q7, q3
+        veor.16 q4, q7, q3
+        veor.32 q4, q7, q3
+        veor.64 q4, q7, q3
+
+        veor.i8 q4, q7, q3
+        veor.i16 q4, q7, q3
+        veor.i32 q4, q7, q3
+        veor.i64 q4, q7, q3
+
+        veor.s8 q4, q7, q3
+        veor.s16 q4, q7, q3
+        veor.s32 q4, q7, q3
+        veor.s64 q4, q7, q3
+
+        veor.u8 q4, q7, q3
+        veor.u16 q4, q7, q3
+        veor.u32 q4, q7, q3
+        veor.u64 q4, q7, q3
+
+        veor.p8 q4, q7, q3
+        veor.p16 q4, q7, q3
+        veor.f32 q4, q7, q3
+        veor.f64 q4, q7, q3
+
+        veor.f q4, q7, q3
+        veor.d q4, q7, q3
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+
+        vand d4, d7, d3
+        vand.8 d4, d7, d3
+        vand.16 d4, d7, d3
+        vand.32 d4, d7, d3
+        vand.64 d4, d7, d3
+
+        vand.i8 d4, d7, d3
+        vand.i16 d4, d7, d3
+        vand.i32 d4, d7, d3
+        vand.i64 d4, d7, d3
+
+        vand.s8 d4, d7, d3
+        vand.s16 d4, d7, d3
+        vand.s32 d4, d7, d3
+        vand.s64 d4, d7, d3
+
+        vand.u8 d4, d7, d3
+        vand.u16 d4, d7, d3
+        vand.u32 d4, d7, d3
+        vand.u64 d4, d7, d3
+
+        vand.p8 d4, d7, d3
+        vand.p16 d4, d7, d3
+        vand.f32 d4, d7, d3
+        vand.f64 d4, d7, d3
+
+        vand.f d4, d7, d3
+        vand.d d4, d7, d3
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+        vorr d4, d7, d3
+        vorr.8 d4, d7, d3
+        vorr.16 d4, d7, d3
+        vorr.32 d4, d7, d3
+        vorr.64 d4, d7, d3
+
+        vorr.i8 d4, d7, d3
+        vorr.i16 d4, d7, d3
+        vorr.i32 d4, d7, d3
+        vorr.i64 d4, d7, d3
+
+        vorr.s8 d4, d7, d3
+        vorr.s16 d4, d7, d3
+        vorr.s32 q4, q7, q3
+        vorr.s64 q4, q7, q3
+
+        vorr.u8 q4, q7, q3
+        vorr.u16 q4, q7, q3
+        vorr.u32 q4, q7, q3
+        vorr.u64 q4, q7, q3
+
+        vorr.p8 q4, q7, q3
+        vorr.p16 q4, q7, q3
+        vorr.f32 q4, q7, q3
+        vorr.f64 q4, q7, q3
+
+        vorr.f q4, q7, q3
+        vorr.d q4, q7, q3
+
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+
+@ Two-operand aliases
+	vand.s8  q6, q5
+	vand.s16 q7, q1
+	vand.s32 q8, q2
+	vand.f64 q8, q2
+
+	veor.8   q6, q5
+	veor.p16 q7, q1
+	veor.u32 q8, q2
+	veor.d   q8, q2
+
+	veor.i8  q6, q5
+	veor.16  q7, q1
+	veor.f   q8, q2
+	veor.i64 q8, q2
+
+@ CHECK: vand	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf2]
+@ CHECK: vand	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf2]
+@ CHECK: vand	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf2]
+@ CHECK: vand	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf2]
+
+@ CHECK: veor	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf3]
+@ CHECK: veor	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+
+@ CHECK: veor	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf3]
+@ CHECK: veor	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
diff --git a/test/MC/ARM/neon-cmp-encoding.s b/test/MC/ARM/neon-cmp-encoding.s
index d94e2f7..b3aedb8 100644
--- a/test/MC/ARM/neon-cmp-encoding.s
+++ b/test/MC/ARM/neon-cmp-encoding.s
@@ -111,3 +111,66 @@
 @ CHECK: vcle.s8	d16, d16, #0    @ encoding: [0xa0,0x01,0xf1,0xf3]
 @ CHECK: vcgt.s8	d16, d16, #0    @ encoding: [0x20,0x00,0xf1,0xf3]
 @ CHECK: vclt.s8	d16, d16, #0    @ encoding: [0x20,0x02,0xf1,0xf3]
+
+
+        vclt.s8 q12, q13, q3
+        vclt.s16 q12, q13, q3
+        vclt.s32 q12, q13, q3
+        vclt.u8 q12, q13, q3
+        vclt.u16 q12, q13, q3
+        vclt.u32 q12, q13, q3
+        vclt.f32 q12, q13, q3
+
+        vclt.s8 d12, d13, d3
+        vclt.s16 d12, d13, d3
+        vclt.s32 d12, d13, d3
+        vclt.u8 d12, d13, d3
+        vclt.u16 d12, d13, d3
+        vclt.u32 d12, d13, d3
+        vclt.f32 d12, d13, d3
+
+@ CHECK: vcgt.s8	q12, q3, q13    @ encoding: [0x6a,0x83,0x46,0xf2]
+@ CHECK: vcgt.s16	q12, q3, q13    @ encoding: [0x6a,0x83,0x56,0xf2]
+@ CHECK: vcgt.s32	q12, q3, q13    @ encoding: [0x6a,0x83,0x66,0xf2]
+@ CHECK: vcgt.u8	q12, q3, q13    @ encoding: [0x6a,0x83,0x46,0xf3]
+@ CHECK: vcgt.u16	q12, q3, q13    @ encoding: [0x6a,0x83,0x56,0xf3]
+@ CHECK: vcgt.u32	q12, q3, q13    @ encoding: [0x6a,0x83,0x66,0xf3]
+@ CHECK: vcgt.f32	q12, q3, q13    @ encoding: [0x6a,0x8e,0x66,0xf3]
+
+@ CHECK: vcgt.s8	d12, d3, d13    @ encoding: [0x0d,0xc3,0x03,0xf2]
+@ CHECK: vcgt.s16	d12, d3, d13    @ encoding: [0x0d,0xc3,0x13,0xf2]
+@ CHECK: vcgt.s32	d12, d3, d13    @ encoding: [0x0d,0xc3,0x23,0xf2]
+@ CHECK: vcgt.u8	d12, d3, d13    @ encoding: [0x0d,0xc3,0x03,0xf3]
+@ CHECK: vcgt.u16	d12, d3, d13    @ encoding: [0x0d,0xc3,0x13,0xf3]
+@ CHECK: vcgt.u32	d12, d3, d13    @ encoding: [0x0d,0xc3,0x23,0xf3]
+@ CHECK: vcgt.f32	d12, d3, d13    @ encoding: [0x0d,0xce,0x23,0xf3]
+
+	vcle.s8	d16, d16, d17
+	vcle.s16 d16, d16, d17
+	vcle.s32 d16, d16, d17
+	vcle.u8	d16, d16, d17
+	vcle.u16 d16, d16, d17
+	vcle.u32 d16, d16, d17
+	vcle.f32 d16, d16, d17
+	vcle.s8	q8, q8, q9
+	vcle.s16 q8, q8, q9
+	vcle.s32 q8, q8, q9
+	vcle.u8	q8, q8, q9
+	vcle.u16 q8, q8, q9
+	vcle.u32 q8, q8, q9
+	vcle.f32 q8, q8, q9
+
+@ CHECK: vcge.s8	d16, d17, d16           @ encoding: [0xb0,0x03,0x41,0xf2]
+@ CHECK: vcge.s16	d16, d17, d16   @ encoding: [0xb0,0x03,0x51,0xf2]
+@ CHECK: vcge.s32	d16, d17, d16   @ encoding: [0xb0,0x03,0x61,0xf2]
+@ CHECK: vcge.u8	d16, d17, d16           @ encoding: [0xb0,0x03,0x41,0xf3]
+@ CHECK: vcge.u16	d16, d17, d16   @ encoding: [0xb0,0x03,0x51,0xf3]
+@ CHECK: vcge.u32	d16, d17, d16   @ encoding: [0xb0,0x03,0x61,0xf3]
+@ CHECK: vcge.f32	d16, d17, d16   @ encoding: [0xa0,0x0e,0x41,0xf3]
+@ CHECK: vcge.s8	q8, q9, q8              @ encoding: [0xf0,0x03,0x42,0xf2]
+@ CHECK: vcge.s16	q8, q9, q8      @ encoding: [0xf0,0x03,0x52,0xf2]
+@ CHECK: vcge.s32	q8, q9, q8      @ encoding: [0xf0,0x03,0x62,0xf2]
+@ CHECK: vcge.u8	q8, q9, q8              @ encoding: [0xf0,0x03,0x42,0xf3]
+@ CHECK: vcge.u16	q8, q9, q8      @ encoding: [0xf0,0x03,0x52,0xf3]
+@ CHECK: vcge.u32	q8, q9, q8      @ encoding: [0xf0,0x03,0x62,0xf3]
+@ CHECK: vcge.f32	q8, q9, q8      @ encoding: [0xe0,0x0e,0x42,0xf3]
diff --git a/test/MC/ARM/neon-minmax-encoding.s b/test/MC/ARM/neon-minmax-encoding.s
index 2d0d8c9..b1eb258 100644
--- a/test/MC/ARM/neon-minmax-encoding.s
+++ b/test/MC/ARM/neon-minmax-encoding.s
@@ -1,58 +1,124 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vmin.s8	d16, d16, d17           @ encoding: [0xb1,0x06,0x40,0xf2]
-	vmin.s8	d16, d16, d17
-@ CHECK: vmin.s16	d16, d16, d17   @ encoding: [0xb1,0x06,0x50,0xf2]
-	vmin.s16	d16, d16, d17
-@ CHECK: vmin.s32	d16, d16, d17   @ encoding: [0xb1,0x06,0x60,0xf2]
-	vmin.s32	d16, d16, d17
-@ CHECK: vmin.u8	d16, d16, d17           @ encoding: [0xb1,0x06,0x40,0xf3]
-	vmin.u8	d16, d16, d17
-@ CHECK: vmin.u16	d16, d16, d17   @ encoding: [0xb1,0x06,0x50,0xf3]
-	vmin.u16	d16, d16, d17
-@ CHECK: vmin.u32	d16, d16, d17   @ encoding: [0xb1,0x06,0x60,0xf3]
-	vmin.u32	d16, d16, d17
-@ CHECK: vmin.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x60,0xf2]
-	vmin.f32	d16, d16, d17
-@ CHECK: vmin.s8	q8, q8, q9              @ encoding: [0xf2,0x06,0x40,0xf2]
-	vmin.s8	q8, q8, q9
-@ CHECK: vmin.s16	q8, q8, q9      @ encoding: [0xf2,0x06,0x50,0xf2]
-	vmin.s16	q8, q8, q9
-@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf2]
-	vmin.s32	q8, q8, q9
-@ CHECK: vmin.u8	q8, q8, q9              @ encoding: [0xf2,0x06,0x40,0xf3]
-	vmin.u8	q8, q8, q9
-@ CHECK: vmin.u16	q8, q8, q9      @ encoding: [0xf2,0x06,0x50,0xf3]
-	vmin.u16	q8, q8, q9
-@ CHECK: vmin.u32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf3]
-	vmin.u32	q8, q8, q9
-@ CHECK: vmin.f32	q8, q8, q9      @ encoding: [0xe2,0x0f,0x60,0xf2]
-	vmin.f32	q8, q8, q9
-@ CHECK: vmax.s8	d16, d16, d17           @ encoding: [0xa1,0x06,0x40,0xf2]
-	vmax.s8	d16, d16, d17
-@ CHECK: vmax.s16	d16, d16, d17   @ encoding: [0xa1,0x06,0x50,0xf2]
-	vmax.s16	d16, d16, d17
-@ CHECK: vmax.s32	d16, d16, d17   @ encoding: [0xa1,0x06,0x60,0xf2]
-	vmax.s32	d16, d16, d17
-@ CHECK: vmax.u8	d16, d16, d17           @ encoding: [0xa1,0x06,0x40,0xf3]
-	vmax.u8	d16, d16, d17
-@ CHECK: vmax.u16	d16, d16, d17   @ encoding: [0xa1,0x06,0x50,0xf3]
-	vmax.u16	d16, d16, d17
-@ CHECK: vmax.u32	d16, d16, d17   @ encoding: [0xa1,0x06,0x60,0xf3]
-	vmax.u32	d16, d16, d17
-@ CHECK: vmax.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x40,0xf2]
-	vmax.f32	d16, d16, d17
-@ CHECK: vmax.s8	q8, q8, q9              @ encoding: [0xe2,0x06,0x40,0xf2]
-	vmax.s8	q8, q8, q9
-@ CHECK: vmax.s16	q8, q8, q9      @ encoding: [0xe2,0x06,0x50,0xf2]
-	vmax.s16	q8, q8, q9
+        vmax.s8 d1, d2, d3
+        vmax.s16 d4, d5, d6
+        vmax.s32 d7, d8, d9
+        vmax.u8 d10, d11, d12
+        vmax.u16 d13, d14, d15
+        vmax.u32 d16, d17, d18
+        vmax.f32 d19, d20, d21
+
+        vmax.s8 d2, d3
+        vmax.s16 d5, d6
+        vmax.s32 d8, d9
+        vmax.u8 d11, d12
+        vmax.u16 d14, d15
+        vmax.u32 d17, d18
+        vmax.f32 d20, d21
+
+        vmax.s8 q1, q2, q3
+        vmax.s16 q4, q5, q6
+        vmax.s32 q7, q8, q9
+        vmax.u8 q10, q11, q12
+        vmax.u16 q13, q14, q15
+        vmax.u32 q6, q7, q8
+        vmax.f32 q9, q5, q1
+
+        vmax.s8 q2, q3
+        vmax.s16 q5, q6
+        vmax.s32 q8, q9
+        vmax.u8 q11, q2
+        vmax.u16 q4, q5
+        vmax.u32 q7, q8
+        vmax.f32 q2, q1
+
+@ CHECK: vmax.s8	d1, d2, d3      @ encoding: [0x03,0x16,0x02,0xf2]
+@ CHECK: vmax.s16	d4, d5, d6      @ encoding: [0x06,0x46,0x15,0xf2]
+@ CHECK: vmax.s32	d7, d8, d9      @ encoding: [0x09,0x76,0x28,0xf2]
+@ CHECK: vmax.u8	d10, d11, d12   @ encoding: [0x0c,0xa6,0x0b,0xf3]
+@ CHECK: vmax.u16	d13, d14, d15   @ encoding: [0x0f,0xd6,0x1e,0xf3]
+@ CHECK: vmax.u32	d16, d17, d18   @ encoding: [0xa2,0x06,0x61,0xf3]
+@ CHECK: vmax.f32	d19, d20, d21   @ encoding: [0xa5,0x3f,0x44,0xf2]
+@ CHECK: vmax.s8	d2, d2, d3      @ encoding: [0x03,0x26,0x02,0xf2]
+@ CHECK: vmax.s16	d5, d5, d6      @ encoding: [0x06,0x56,0x15,0xf2]
+@ CHECK: vmax.s32	d8, d8, d9      @ encoding: [0x09,0x86,0x28,0xf2]
+@ CHECK: vmax.u8	d11, d11, d12   @ encoding: [0x0c,0xb6,0x0b,0xf3]
+@ CHECK: vmax.u16	d14, d14, d15   @ encoding: [0x0f,0xe6,0x1e,0xf3]
+@ CHECK: vmax.u32	d17, d17, d18   @ encoding: [0xa2,0x16,0x61,0xf3]
+@ CHECK: vmax.f32	d20, d20, d21   @ encoding: [0xa5,0x4f,0x44,0xf2]
+@ CHECK: vmax.s8	q1, q2, q3      @ encoding: [0x46,0x26,0x04,0xf2]
+@ CHECK: vmax.s16	q4, q5, q6      @ encoding: [0x4c,0x86,0x1a,0xf2]
+@ CHECK: vmax.s32	q7, q8, q9      @ encoding: [0xe2,0xe6,0x20,0xf2]
+@ CHECK: vmax.u8	q10, q11, q12   @ encoding: [0xe8,0x46,0x46,0xf3]
+@ CHECK: vmax.u16	q13, q14, q15   @ encoding: [0xee,0xa6,0x5c,0xf3]
+@ CHECK: vmax.u32	q6, q7, q8      @ encoding: [0x60,0xc6,0x2e,0xf3]
+@ CHECK: vmax.f32	q9, q5, q1      @ encoding: [0x42,0x2f,0x4a,0xf2]
+@ CHECK: vmax.s8	q2, q2, q3      @ encoding: [0x46,0x46,0x04,0xf2]
+@ CHECK: vmax.s16	q5, q5, q6      @ encoding: [0x4c,0xa6,0x1a,0xf2]
 @ CHECK: vmax.s32	q8, q8, q9      @ encoding: [0xe2,0x06,0x60,0xf2]
-	vmax.s32	q8, q8, q9
-@ CHECK: vmax.u8	q8, q8, q9              @ encoding: [0xe2,0x06,0x40,0xf3]
-	vmax.u8	q8, q8, q9
-@ CHECK: vmax.u16	q8, q8, q9      @ encoding: [0xe2,0x06,0x50,0xf3]
-	vmax.u16	q8, q8, q9
-@ CHECK: vmax.u32	q8, q8, q9      @ encoding: [0xe2,0x06,0x60,0xf3]
-	vmax.u32	q8, q8, q9
-@ CHECK: vmax.f32	q8, q8, q9      @ encoding: [0xe2,0x0f,0x40,0xf2]
-	vmax.f32	q8, q8, q9
+@ CHECK: vmax.u8	q11, q11, q2    @ encoding: [0xc4,0x66,0x46,0xf3]
+@ CHECK: vmax.u16	q4, q4, q5      @ encoding: [0x4a,0x86,0x18,0xf3]
+@ CHECK: vmax.u32	q7, q7, q8      @ encoding: [0x60,0xe6,0x2e,0xf3]
+@ CHECK: vmax.f32	q2, q2, q1      @ encoding: [0x42,0x4f,0x04,0xf2]
+
+
+        vmin.s8 d1, d2, d3
+        vmin.s16 d4, d5, d6
+        vmin.s32 d7, d8, d9
+        vmin.u8 d10, d11, d12
+        vmin.u16 d13, d14, d15
+        vmin.u32 d16, d17, d18
+        vmin.f32 d19, d20, d21
+
+        vmin.s8 d2, d3
+        vmin.s16 d5, d6
+        vmin.s32 d8, d9
+        vmin.u8 d11, d12
+        vmin.u16 d14, d15
+        vmin.u32 d17, d18
+        vmin.f32 d20, d21
+
+        vmin.s8 q1, q2, q3
+        vmin.s16 q4, q5, q6
+        vmin.s32 q7, q8, q9
+        vmin.u8 q10, q11, q12
+        vmin.u16 q13, q14, q15
+        vmin.u32 q6, q7, q8
+        vmin.f32 q9, q5, q1
+
+        vmin.s8 q2, q3
+        vmin.s16 q5, q6
+        vmin.s32 q8, q9
+        vmin.u8 q11, q2
+        vmin.u16 q4, q5
+        vmin.u32 q7, q8
+        vmin.f32 q2, q1
+
+@ CHECK: vmin.s8	d1, d2, d3      @ encoding: [0x13,0x16,0x02,0xf2]
+@ CHECK: vmin.s16	d4, d5, d6      @ encoding: [0x16,0x46,0x15,0xf2]
+@ CHECK: vmin.s32	d7, d8, d9      @ encoding: [0x19,0x76,0x28,0xf2]
+@ CHECK: vmin.u8	d10, d11, d12   @ encoding: [0x1c,0xa6,0x0b,0xf3]
+@ CHECK: vmin.u16	d13, d14, d15   @ encoding: [0x1f,0xd6,0x1e,0xf3]
+@ CHECK: vmin.u32	d16, d17, d18   @ encoding: [0xb2,0x06,0x61,0xf3]
+@ CHECK: vmin.f32	d19, d20, d21   @ encoding: [0xa5,0x3f,0x64,0xf2]
+@ CHECK: vmin.s8	d2, d2, d3      @ encoding: [0x13,0x26,0x02,0xf2]
+@ CHECK: vmin.s16	d5, d5, d6      @ encoding: [0x16,0x56,0x15,0xf2]
+@ CHECK: vmin.s32	d8, d8, d9      @ encoding: [0x19,0x86,0x28,0xf2]
+@ CHECK: vmin.u8	d11, d11, d12   @ encoding: [0x1c,0xb6,0x0b,0xf3]
+@ CHECK: vmin.u16	d14, d14, d15   @ encoding: [0x1f,0xe6,0x1e,0xf3]
+@ CHECK: vmin.u32	d17, d17, d18   @ encoding: [0xb2,0x16,0x61,0xf3]
+@ CHECK: vmin.f32	d20, d20, d21   @ encoding: [0xa5,0x4f,0x64,0xf2]
+@ CHECK: vmin.s8	q1, q2, q3      @ encoding: [0x56,0x26,0x04,0xf2]
+@ CHECK: vmin.s16	q4, q5, q6      @ encoding: [0x5c,0x86,0x1a,0xf2]
+@ CHECK: vmin.s32	q7, q8, q9      @ encoding: [0xf2,0xe6,0x20,0xf2]
+@ CHECK: vmin.u8	q10, q11, q12   @ encoding: [0xf8,0x46,0x46,0xf3]
+@ CHECK: vmin.u16	q13, q14, q15   @ encoding: [0xfe,0xa6,0x5c,0xf3]
+@ CHECK: vmin.u32	q6, q7, q8      @ encoding: [0x70,0xc6,0x2e,0xf3]
+@ CHECK: vmin.f32	q9, q5, q1      @ encoding: [0x42,0x2f,0x6a,0xf2]
+@ CHECK: vmin.s8	q2, q2, q3      @ encoding: [0x56,0x46,0x04,0xf2]
+@ CHECK: vmin.s16	q5, q5, q6      @ encoding: [0x5c,0xa6,0x1a,0xf2]
+@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf2]
+@ CHECK: vmin.u8	q11, q11, q2    @ encoding: [0xd4,0x66,0x46,0xf3]
+@ CHECK: vmin.u16	q4, q4, q5      @ encoding: [0x5a,0x86,0x18,0xf3]
+@ CHECK: vmin.u32	q7, q7, q8      @ encoding: [0x70,0xe6,0x2e,0xf3]
+@ CHECK: vmin.f32	q2, q2, q1      @ encoding: [0x42,0x4f,0x24,0xf2]
diff --git a/test/MC/ARM/neon-mov-encoding.s b/test/MC/ARM/neon-mov-encoding.s
index 02eec12..6f26a13 100644
--- a/test/MC/ARM/neon-mov-encoding.s
+++ b/test/MC/ARM/neon-mov-encoding.s
@@ -1,5 +1,4 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 	vmov.i8		d16, #0x8
 	vmov.i16	d16, #0x10
@@ -19,9 +18,9 @@
 @ CHECK: vmov.i32	d16, #0x2000    @ encoding: [0x10,0x02,0xc2,0xf2]
 @ CHECK: vmov.i32	d16, #0x200000  @ encoding: [0x10,0x04,0xc2,0xf2]
 @ CHECK: vmov.i32	d16, #0x20000000 @ encoding: [0x10,0x06,0xc2,0xf2]
-@ CHECK: vmov.i32	d16, #0x20FF    @ encoding: [0x10,0x0c,0xc2,0xf2]
-@ CHECK: vmov.i32	d16, #0x20FFFF  @ encoding: [0x10,0x0d,0xc2,0xf2]
-@ CHECK: vmov.i64 d16, #0xFF0000FF0000FFFF @ encoding: [0x33,0x0e,0xc1,0xf3]
+@ CHECK: vmov.i32	d16, #0x20ff    @ encoding: [0x10,0x0c,0xc2,0xf2]
+@ CHECK: vmov.i32	d16, #0x20ffff  @ encoding: [0x10,0x0d,0xc2,0xf2]
+@ CHECK: vmov.i64 d16, #0xff0000ff0000ffff @ encoding: [0x33,0x0e,0xc1,0xf3]
 
 
 
@@ -43,9 +42,9 @@
 @ CHECK: vmov.i32	q8, #0x2000     @ encoding: [0x50,0x02,0xc2,0xf2]
 @ CHECK: vmov.i32	q8, #0x200000   @ encoding: [0x50,0x04,0xc2,0xf2]
 @ CHECK: vmov.i32	q8, #0x20000000 @ encoding: [0x50,0x06,0xc2,0xf2]
-@ CHECK: vmov.i32	q8, #0x20FF     @ encoding: [0x50,0x0c,0xc2,0xf2]
-@ CHECK: vmov.i32	q8, #0x20FFFF   @ encoding: [0x50,0x0d,0xc2,0xf2]
-@ CHECK: vmov.i64 q8, #0xFF0000FF0000FFFF @ encoding: [0x73,0x0e,0xc1,0xf3]
+@ CHECK: vmov.i32	q8, #0x20ff     @ encoding: [0x50,0x0c,0xc2,0xf2]
+@ CHECK: vmov.i32	q8, #0x20ffff   @ encoding: [0x50,0x0d,0xc2,0xf2]
+@ CHECK: vmov.i64 q8, #0xff0000ff0000ffff @ encoding: [0x73,0x0e,0xc1,0xf3]
 
 	vmvn.i16	d16, #0x10
 	vmvn.i16	d16, #0x1000
@@ -62,8 +61,8 @@
 @ CHECK: vmvn.i32	d16, #0x2000    @ encoding: [0x30,0x02,0xc2,0xf2]
 @ CHECK: vmvn.i32	d16, #0x200000  @ encoding: [0x30,0x04,0xc2,0xf2]
 @ CHECK: vmvn.i32	d16, #0x20000000 @ encoding: [0x30,0x06,0xc2,0xf2]
-@ CHECK: vmvn.i32	d16, #0x20FF    @ encoding: [0x30,0x0c,0xc2,0xf2]
-@ CHECK: vmvn.i32	d16, #0x20FFFF  @ encoding: [0x30,0x0d,0xc2,0xf2]
+@ CHECK: vmvn.i32	d16, #0x20ff    @ encoding: [0x30,0x0c,0xc2,0xf2]
+@ CHECK: vmvn.i32	d16, #0x20ffff  @ encoding: [0x30,0x0d,0xc2,0xf2]
 
 	vmovl.s8	q8, d16
 	vmovl.s16	q8, d16
diff --git a/test/MC/ARM/neon-mul-accum-encoding.s b/test/MC/ARM/neon-mul-accum-encoding.s
index ed9ceb3..e71ad71 100644
--- a/test/MC/ARM/neon-mul-accum-encoding.s
+++ b/test/MC/ARM/neon-mul-accum-encoding.s
@@ -1,66 +1,94 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vmla.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xf2]
 	vmla.i8	d16, d18, d17
-@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf2]
 	vmla.i16	d16, d18, d17
-@ CHECK: vmla.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf2]
 	vmla.i32	d16, d18, d17
-@ CHECK: vmla.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x42,0xf2]
 	vmla.f32	d16, d18, d17
-@ CHECK: vmla.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xf2]
 	vmla.i8	q9, q8, q10
-@ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf2]
 	vmla.i16	q9, q8, q10
-@ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf2]
 	vmla.i32	q9, q8, q10
-@ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x40,0xf2]
 	vmla.f32	q9, q8, q10
-@ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf2]
+	vmla.i32	q12, q8, d3[0]
+
+@ CHECK: vmla.i8	d16, d18, d17   @ encoding: [0xa1,0x09,0x42,0xf2]
+@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf2]
+@ CHECK: vmla.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf2]
+@ CHECK: vmla.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x42,0xf2]
+@ CHECK: vmla.i8	q9, q8, q10     @ encoding: [0xe4,0x29,0x40,0xf2]
+@ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf2]
+@ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf2]
+@ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x40,0xf2]
+@ CHECK: vmla.i32	q12, q8, d3[0]  @ encoding: [0xc3,0x80,0xe0,0xf3]
+
+
 	vmlal.s8	q8, d19, d18
-@ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf2]
 	vmlal.s16	q8, d19, d18
-@ CHECK: vmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf2]
 	vmlal.s32	q8, d19, d18
-@ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf3]
 	vmlal.u8	q8, d19, d18
-@ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf3]
 	vmlal.u16	q8, d19, d18
-@ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf3]
 	vmlal.u32	q8, d19, d18
-@ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x09,0xd3,0xf2]
+
+@ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf2]
+@ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf2]
+@ CHECK: vmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf2]
+@ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf3]
+@ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf3]
+@ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf3]
+
+
 	vqdmlal.s16	q8, d19, d18
-@ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x09,0xe3,0xf2]
 	vqdmlal.s32	q8, d19, d18
-@ CHECK: vmls.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xf3]
+        vqdmlal.s16 q11, d11, d7[0]
+        vqdmlal.s16 q11, d11, d7[1]
+        vqdmlal.s16 q11, d11, d7[2]
+        vqdmlal.s16 q11, d11, d7[3]
+
+@ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x09,0xd3,0xf2]
+@ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x09,0xe3,0xf2]
+@ CHECK: vqdmlal.s16	q11, d11, d7[0] @ encoding: [0x47,0x63,0xdb,0xf2]
+@ CHECK: vqdmlal.s16	q11, d11, d7[1] @ encoding: [0x4f,0x63,0xdb,0xf2]
+@ CHECK: vqdmlal.s16	q11, d11, d7[2] @ encoding: [0x67,0x63,0xdb,0xf2]
+@ CHECK: vqdmlal.s16	q11, d11, d7[3] @ encoding: [0x6f,0x63,0xdb,0xf2]
+
+
 	vmls.i8	d16, d18, d17
-@ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf3]
 	vmls.i16	d16, d18, d17
-@ CHECK: vmls.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf3]
 	vmls.i32	d16, d18, d17
-@ CHECK: vmls.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x62,0xf2]
 	vmls.f32	d16, d18, d17
-@ CHECK: vmls.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xf3]
 	vmls.i8	q9, q8, q10
-@ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf3]
 	vmls.i16	q9, q8, q10
-@ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf3]
 	vmls.i32	q9, q8, q10
-@ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x60,0xf2]
 	vmls.f32	q9, q8, q10
-@ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf2]
+	vmls.i16	q4, q12, d6[2]
+
+@ CHECK: vmls.i8	d16, d18, d17   @ encoding: [0xa1,0x09,0x42,0xf3]
+@ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf3]
+@ CHECK: vmls.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf3]
+@ CHECK: vmls.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x62,0xf2]
+@ CHECK: vmls.i8	q9, q8, q10     @ encoding: [0xe4,0x29,0x40,0xf3]
+@ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf3]
+@ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf3]
+@ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x60,0xf2]
+@ CHECK: vmls.i16	q4, q12, d6[2]  @ encoding: [0xe6,0x84,0x98,0xf3]
+
+
 	vmlsl.s8	q8, d19, d18
-@ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf2]
 	vmlsl.s16	q8, d19, d18
-@ CHECK: vmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf2]
 	vmlsl.s32	q8, d19, d18
-@ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf3]
 	vmlsl.u8	q8, d19, d18
-@ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf3]
 	vmlsl.u16	q8, d19, d18
-@ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf3]
 	vmlsl.u32	q8, d19, d18
-@ CHECK: vqdmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0b,0xd3,0xf2]
+
+@ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf2]
+@ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf2]
+@ CHECK: vmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf2]
+@ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf3]
+@ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf3]
+@ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf3]
+
+
 	vqdmlsl.s16	q8, d19, d18
-@ CHECK: vqdmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0b,0xe3,0xf2]
 	vqdmlsl.s32	q8, d19, d18
+
+@ CHECK: vqdmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0b,0xd3,0xf2]
+@ CHECK: vqdmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0b,0xe3,0xf2]
diff --git a/test/MC/ARM/neon-mul-encoding.s b/test/MC/ARM/neon-mul-encoding.s
index 4dc7803..d6bc1f3 100644
--- a/test/MC/ARM/neon-mul-encoding.s
+++ b/test/MC/ARM/neon-mul-encoding.s
@@ -1,82 +1,168 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-	vmla.i8	d16, d18, d17
-	vmla.i16	d16, d18, d17
-	vmla.i32	d16, d18, d17
-	vmla.f32	d16, d18, d17
-	vmla.i8	q9, q8, q10
-	vmla.i16	q9, q8, q10
-	vmla.i32	q9, q8, q10
-	vmla.f32	q9, q8, q10
-
-@ CHECK: vmla.i8	d16, d18, d17   @ encoding: [0xa1,0x09,0x42,0xf2]
-@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf2]
-@ CHECK: vmla.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf2]
-@ CHECK: vmla.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x42,0xf2]
-@ CHECK: vmla.i8	q9, q8, q10     @ encoding: [0xe4,0x29,0x40,0xf2]
-@ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf2]
-@ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf2]
-@ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x40,0xf2]
-
-
-	vmlal.s8	q8, d19, d18
-	vmlal.s16	q8, d19, d18
-	vmlal.s32	q8, d19, d18
-	vmlal.u8	q8, d19, d18
-	vmlal.u16	q8, d19, d18
-	vmlal.u32	q8, d19, d18
-
-@ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf2]
-@ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf2]
-@ CHECK: vmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf2]
-@ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf3]
-@ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf3]
-@ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf3]
-
-
-	vqdmlal.s16	q8, d19, d18
-	vqdmlal.s32	q8, d19, d18
-
-@ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x09,0xd3,0xf2]
-@ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x09,0xe3,0xf2]
-
-
-	vmls.i8	d16, d18, d17
-	vmls.i16	d16, d18, d17
-	vmls.i32	d16, d18, d17
-	vmls.f32	d16, d18, d17
-	vmls.i8	q9, q8, q10
-	vmls.i16	q9, q8, q10
-	vmls.i32	q9, q8, q10
-	vmls.f32	q9, q8, q10
-
-@ CHECK: vmls.i8	d16, d18, d17   @ encoding: [0xa1,0x09,0x42,0xf3]
-@ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf3]
-@ CHECK: vmls.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf3]
-@ CHECK: vmls.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x62,0xf2]
-@ CHECK: vmls.i8	q9, q8, q10     @ encoding: [0xe4,0x29,0x40,0xf3]
-@ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf3]
-@ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf3]
-@ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x60,0xf2]
-
-
-	vmlsl.s8	q8, d19, d18
-	vmlsl.s16	q8, d19, d18
-	vmlsl.s32	q8, d19, d18
-	vmlsl.u8	q8, d19, d18
-	vmlsl.u16	q8, d19, d18
-	vmlsl.u32	q8, d19, d18
-
-@ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf2]
-@ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf2]
-@ CHECK: vmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf2]
-@ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf3]
-@ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf3]
-@ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf3]
-
-
-	vqdmlsl.s16	q8, d19, d18
-	vqdmlsl.s32	q8, d19, d18
-
-@ CHECK: vqdmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0b,0xd3,0xf2]
-@ CHECK: vqdmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0b,0xe3,0xf2]
+	vmul.i8	d16, d16, d17
+	vmul.i16	d16, d16, d17
+	vmul.i32	d16, d16, d17
+	vmul.f32	d16, d16, d17
+	vmul.i8	q8, q8, q9
+	vmul.i16	q8, q8, q9
+	vmul.i32	q8, q8, q9
+	vmul.f32	q8, q8, q9
+	vmul.p8	d16, d16, d17
+	vmul.p8	q8, q8, q9
+	vmul.i16	d18, d8, d0[3]
+
+	vmul.i8	d16, d17
+	vmul.i16	d16, d17
+	vmul.i32	d16, d17
+	vmul.f32	d16, d17
+	vmul.i8	q8, q9
+	vmul.i16	q8, q9
+	vmul.i32	q8, q9
+	vmul.f32	q8, q9
+	vmul.p8	d16, d17
+	vmul.p8	q8, q9
+
+@ CHECK: vmul.i8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf2]
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0xb1,0x09,0x50,0xf2]
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0xb1,0x09,0x60,0xf2]
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0xb1,0x0d,0x40,0xf3]
+@ CHECK: vmul.i8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf2]
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0xf2,0x09,0x50,0xf2]
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0xf2,0x09,0x60,0xf2]
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0xf2,0x0d,0x40,0xf3]
+@ CHECK: vmul.p8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf3]
+@ CHECK: vmul.p8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf3]
+@ CHECK: vmul.i16	d18, d8, d0[3]  @ encoding: [0x68,0x28,0xd8,0xf2]
+
+@ CHECK: vmul.i8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf2]
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0xb1,0x09,0x50,0xf2]
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0xb1,0x09,0x60,0xf2]
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0xb1,0x0d,0x40,0xf3]
+@ CHECK: vmul.i8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf2]
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0xf2,0x09,0x50,0xf2]
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0xf2,0x09,0x60,0xf2]
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0xf2,0x0d,0x40,0xf3]
+@ CHECK: vmul.p8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf3]
+@ CHECK: vmul.p8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf3]
+
+
+	vqdmulh.s16	d16, d16, d17
+	vqdmulh.s32	d16, d16, d17
+	vqdmulh.s16	q8, q8, q9
+	vqdmulh.s32	q8, q8, q9
+	vqdmulh.s16	d16, d17
+	vqdmulh.s32	d16, d17
+	vqdmulh.s16	q8, q9
+	vqdmulh.s32	q8, q9
+	vqdmulh.s16	d11, d2, d3[0]
+
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf2]
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf2]
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf2]
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf2]
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf2]
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf2]
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf2]
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf2]
+@ CHECK: vqdmulh.s16	d11, d2, d3[0]  @ encoding: [0x43,0xbc,0x92,0xf2]
+
+
+	vqrdmulh.s16	d16, d16, d17
+	vqrdmulh.s32	d16, d16, d17
+	vqrdmulh.s16	q8, q8, q9
+	vqrdmulh.s32	q8, q8, q9
+
+@ CHECK: vqrdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf3]
+@ CHECK: vqrdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf3]
+@ CHECK: vqrdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf3]
+@ CHECK: vqrdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf3]
+
+
+	vmull.s8	q8, d16, d17
+	vmull.s16	q8, d16, d17
+	vmull.s32	q8, d16, d17
+	vmull.u8	q8, d16, d17
+	vmull.u16	q8, d16, d17
+	vmull.u32	q8, d16, d17
+	vmull.p8	q8, d16, d17
+
+@ CHECK: vmull.s8	q8, d16, d17    @ encoding: [0xa1,0x0c,0xc0,0xf2]
+@ CHECK: vmull.s16	q8, d16, d17    @ encoding: [0xa1,0x0c,0xd0,0xf2]
+@ CHECK: vmull.s32	q8, d16, d17    @ encoding: [0xa1,0x0c,0xe0,0xf2]
+@ CHECK: vmull.u8	q8, d16, d17    @ encoding: [0xa1,0x0c,0xc0,0xf3]
+@ CHECK: vmull.u16	q8, d16, d17    @ encoding: [0xa1,0x0c,0xd0,0xf3]
+@ CHECK: vmull.u32	q8, d16, d17    @ encoding: [0xa1,0x0c,0xe0,0xf3]
+@ CHECK: vmull.p8	q8, d16, d17    @ encoding: [0xa1,0x0e,0xc0,0xf2]
+
+
+	vqdmull.s16	q8, d16, d17
+	vqdmull.s32	q8, d16, d17
+
+@ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xa1,0x0d,0xd0,0xf2]
+@ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xa1,0x0d,0xe0,0xf2]
+
+
+        vmul.i16 d0, d4[2]
+        vmul.s16 d1, d7[3]
+        vmul.u16 d2, d1[1]
+        vmul.i32 d3, d2[0]
+        vmul.s32 d4, d3[1]
+        vmul.u32 d5, d4[0]
+        vmul.f32 d6, d5[1]
+
+        vmul.i16 q0, d4[2]
+        vmul.s16 q1, d7[3]
+        vmul.u16 q2, d1[1]
+        vmul.i32 q3, d2[0]
+        vmul.s32 q4, d3[1]
+        vmul.u32 q5, d4[0]
+        vmul.f32 q6, d5[1]
+
+        vmul.i16 d9, d0, d4[2]
+        vmul.s16 d8, d1, d7[3]
+        vmul.u16 d7, d2, d1[1]
+        vmul.i32 d6, d3, d2[0]
+        vmul.s32 d5, d4, d3[1]
+        vmul.u32 d4, d5, d4[0]
+        vmul.f32 d3, d6, d5[1]
+
+        vmul.i16 q9, q0, d4[2]
+        vmul.s16 q8, q1, d7[3]
+        vmul.u16 q7, q2, d1[1]
+        vmul.i32 q6, q3, d2[0]
+        vmul.s32 q5, q4, d3[1]
+        vmul.u32 q4, q5, d4[0]
+        vmul.f32 q3, q6, d5[1]
+
+@ CHECK: vmul.i16	d0, d0, d4[2]   @ encoding: [0x64,0x08,0x90,0xf2]
+@ CHECK: vmul.i16	d1, d1, d7[3]   @ encoding: [0x6f,0x18,0x91,0xf2]
+@ CHECK: vmul.i16	d2, d2, d1[1]   @ encoding: [0x49,0x28,0x92,0xf2]
+@ CHECK: vmul.i32	d3, d3, d2[0]   @ encoding: [0x42,0x38,0xa3,0xf2]
+@ CHECK: vmul.i32	d4, d4, d3[1]   @ encoding: [0x63,0x48,0xa4,0xf2]
+@ CHECK: vmul.i32	d5, d5, d4[0]   @ encoding: [0x44,0x58,0xa5,0xf2]
+@ CHECK: vmul.f32	d6, d6, d5[1]   @ encoding: [0x65,0x69,0xa6,0xf2]
+
+@ CHECK: vmul.i16	q0, q0, d4[2]   @ encoding: [0x64,0x08,0x90,0xf3]
+@ CHECK: vmul.i16	q1, q1, d7[3]   @ encoding: [0x6f,0x28,0x92,0xf3]
+@ CHECK: vmul.i16	q2, q2, d1[1]   @ encoding: [0x49,0x48,0x94,0xf3]
+@ CHECK: vmul.i32	q3, q3, d2[0]   @ encoding: [0x42,0x68,0xa6,0xf3]
+@ CHECK: vmul.i32	q4, q4, d3[1]   @ encoding: [0x63,0x88,0xa8,0xf3]
+@ CHECK: vmul.i32	q5, q5, d4[0]   @ encoding: [0x44,0xa8,0xaa,0xf3]
+@ CHECK: vmul.f32	q6, q6, d5[1]   @ encoding: [0x65,0xc9,0xac,0xf3]
+
+@ CHECK: vmul.i16	d9, d0, d4[2]   @ encoding: [0x64,0x98,0x90,0xf2]
+@ CHECK: vmul.i16	d8, d1, d7[3]   @ encoding: [0x6f,0x88,0x91,0xf2]
+@ CHECK: vmul.i16	d7, d2, d1[1]   @ encoding: [0x49,0x78,0x92,0xf2]
+@ CHECK: vmul.i32	d6, d3, d2[0]   @ encoding: [0x42,0x68,0xa3,0xf2]
+@ CHECK: vmul.i32	d5, d4, d3[1]   @ encoding: [0x63,0x58,0xa4,0xf2]
+@ CHECK: vmul.i32	d4, d5, d4[0]   @ encoding: [0x44,0x48,0xa5,0xf2]
+@ CHECK: vmul.f32	d3, d6, d5[1]   @ encoding: [0x65,0x39,0xa6,0xf2]
+
+@ CHECK: vmul.i16	q9, q0, d4[2]   @ encoding: [0x64,0x28,0xd0,0xf3]
+@ CHECK: vmul.i16	q8, q1, d7[3]   @ encoding: [0x6f,0x08,0xd2,0xf3]
+@ CHECK: vmul.i16	q7, q2, d1[1]   @ encoding: [0x49,0xe8,0x94,0xf3]
+@ CHECK: vmul.i32	q6, q3, d2[0]   @ encoding: [0x42,0xc8,0xa6,0xf3]
+@ CHECK: vmul.i32	q5, q4, d3[1]   @ encoding: [0x63,0xa8,0xa8,0xf3]
+@ CHECK: vmul.i32	q4, q5, d4[0]   @ encoding: [0x44,0x88,0xaa,0xf3]
+@ CHECK: vmul.f32	q3, q6, d5[1]   @ encoding: [0x65,0x69,0xac,0xf3]
diff --git a/test/MC/ARM/neon-pairwise-encoding.s b/test/MC/ARM/neon-pairwise-encoding.s
index 65c47bd..b1e86aa 100644
--- a/test/MC/ARM/neon-pairwise-encoding.s
+++ b/test/MC/ARM/neon-pairwise-encoding.s
@@ -8,6 +8,16 @@
 	vpadd.i32	d16, d17, d16
 @ CHECK: vpadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xf3]
 	vpadd.f32	d16, d16, d17
+
+@ CHECK: vpadd.i8	d17, d17, d16   @ encoding: [0xb0,0x1b,0x41,0xf2]
+	vpadd.i8	d17, d16
+@ CHECK: vpadd.i16	d17, d17, d16   @ encoding: [0xb0,0x1b,0x51,0xf2]
+	vpadd.i16	d17, d16
+@ CHECK: vpadd.i32	d17, d17, d16   @ encoding: [0xb0,0x1b,0x61,0xf2]
+	vpadd.i32	d17, d16
+@ CHECK: vpadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xf3]
+	vpadd.f32	d16, d17
+
 @ CHECK: vpaddl.s8	d16, d16        @ encoding: [0x20,0x02,0xf0,0xf3]
 	vpaddl.s8	d16, d16
 @ CHECK: vpaddl.s16	d16, d16        @ encoding: [0x20,0x02,0xf4,0xf3]
diff --git a/test/MC/ARM/neon-shift-encoding.s b/test/MC/ARM/neon-shift-encoding.s
index a7a1b83..54ed173 100644
--- a/test/MC/ARM/neon-shift-encoding.s
+++ b/test/MC/ARM/neon-shift-encoding.s
@@ -1,134 +1,255 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
 _foo:
-@ CHECK: vshl.u8	d16, d17, d16  @ encoding: [0xa1,0x04,0x40,0xf3]
 	vshl.u8	d16, d17, d16
-@ CHECK: vshl.u16	d16, d17, d16  @ encoding: [0xa1,0x04,0x50,0xf3]
 	vshl.u16	d16, d17, d16
-@ CHECK: vshl.u32	d16, d17, d16  @ encoding: [0xa1,0x04,0x60,0xf3]
 	vshl.u32	d16, d17, d16
-@ CHECK: vshl.u64	d16, d17, d16  @ encoding: [0xa1,0x04,0x70,0xf3]
 	vshl.u64	d16, d17, d16
-@ CHECK: vshl.i8	d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf2]
 	vshl.i8	d16, d16, #7
-@ CHECK: vshl.i16	d16, d16, #15  @ encoding: [0x30,0x05,0xdf,0xf2]
 	vshl.i16	d16, d16, #15
-@ CHECK: vshl.i32	d16, d16, #31  @ encoding: [0x30,0x05,0xff,0xf2]
 	vshl.i32	d16, d16, #31
-@ CHECK: vshl.i64	d16, d16, #63  @ encoding: [0xb0,0x05,0xff,0xf2]
 	vshl.i64	d16, d16, #63
-@ CHECK: vshl.u8	q8, q9, q8  @ encoding: [0xe2,0x04,0x40,0xf3]
 	vshl.u8	q8, q9, q8
-@ CHECK: vshl.u16	q8, q9, q8  @ encoding: [0xe2,0x04,0x50,0xf3]
 	vshl.u16	q8, q9, q8
-@ CHECK: vshl.u32	q8, q9, q8  @ encoding: [0xe2,0x04,0x60,0xf3]
 	vshl.u32	q8, q9, q8
-@ CHECK: vshl.u64	q8, q9, q8  @ encoding: [0xe2,0x04,0x70,0xf3]
 	vshl.u64	q8, q9, q8
-@ CHECK: vshl.i8	q8, q8, #7  @ encoding: [0x70,0x05,0xcf,0xf2]
 	vshl.i8	q8, q8, #7
-@ CHECK: vshl.i16	q8, q8, #15  @ encoding: [0x70,0x05,0xdf,0xf2]
 	vshl.i16	q8, q8, #15
-@ CHECK: vshl.i32	q8, q8, #31  @ encoding: [0x70,0x05,0xff,0xf2]
 	vshl.i32	q8, q8, #31
-@ CHECK: vshl.i64	q8, q8, #63  @ encoding: [0xf0,0x05,0xff,0xf2]
 	vshl.i64	q8, q8, #63
-@ CHECK: vshr.u8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf3]
+
+@ CHECK: vshl.u8	d16, d17, d16  @ encoding: [0xa1,0x04,0x40,0xf3]
+@ CHECK: vshl.u16	d16, d17, d16  @ encoding: [0xa1,0x04,0x50,0xf3]
+@ CHECK: vshl.u32	d16, d17, d16  @ encoding: [0xa1,0x04,0x60,0xf3]
+@ CHECK: vshl.u64	d16, d17, d16  @ encoding: [0xa1,0x04,0x70,0xf3]
+@ CHECK: vshl.i8	d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf2]
+@ CHECK: vshl.i16	d16, d16, #15  @ encoding: [0x30,0x05,0xdf,0xf2]
+@ CHECK: vshl.i32	d16, d16, #31  @ encoding: [0x30,0x05,0xff,0xf2]
+@ CHECK: vshl.i64	d16, d16, #63  @ encoding: [0xb0,0x05,0xff,0xf2]
+@ CHECK: vshl.u8	q8, q9, q8  @ encoding: [0xe2,0x04,0x40,0xf3]
+@ CHECK: vshl.u16	q8, q9, q8  @ encoding: [0xe2,0x04,0x50,0xf3]
+@ CHECK: vshl.u32	q8, q9, q8  @ encoding: [0xe2,0x04,0x60,0xf3]
+@ CHECK: vshl.u64	q8, q9, q8  @ encoding: [0xe2,0x04,0x70,0xf3]
+@ CHECK: vshl.i8	q8, q8, #7  @ encoding: [0x70,0x05,0xcf,0xf2]
+@ CHECK: vshl.i16	q8, q8, #15  @ encoding: [0x70,0x05,0xdf,0xf2]
+@ CHECK: vshl.i32	q8, q8, #31  @ encoding: [0x70,0x05,0xff,0xf2]
+@ CHECK: vshl.i64	q8, q8, #63  @ encoding: [0xf0,0x05,0xff,0xf2]
+
+
 	vshr.u8	d16, d16, #7
-@ CHECK: vshr.u16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf3]
 	vshr.u16	d16, d16, #15
-@ CHECK: vshr.u32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf3]
 	vshr.u32	d16, d16, #31
-@ CHECK: vshr.u64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf3]
 	vshr.u64	d16, d16, #63
-@ CHECK: vshr.u8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf3]
 	vshr.u8	q8, q8, #7
-@ CHECK: vshr.u16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf3]
 	vshr.u16	q8, q8, #15
-@ CHECK: vshr.u32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf3]
 	vshr.u32	q8, q8, #31
-@ CHECK: vshr.u64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf3]
 	vshr.u64	q8, q8, #63
-@ CHECK: vshr.s8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf2]
 	vshr.s8	d16, d16, #7
-@ CHECK: vshr.s16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf2]
 	vshr.s16	d16, d16, #15
-@ CHECK: vshr.s32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf2]
 	vshr.s32	d16, d16, #31
-@ CHECK: vshr.s64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf2]
 	vshr.s64	d16, d16, #63
-@ CHECK: vshr.s8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf2]
 	vshr.s8	q8, q8, #7
-@ CHECK: vshr.s16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf2]
 	vshr.s16	q8, q8, #15
-@ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
 	vshr.s32	q8, q8, #31
-@ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
 	vshr.s64	q8, q8, #63
-@ CHECK: vsra.u8  d16, d16, #7   @ encoding: [0x30,0x01,0xc9,0xf3]
-	vsra.u8   d16, d16, #7
-@ CHECK: vsra.u16 d16, d16, #15  @ encoding: [0x30,0x01,0xd1,0xf3]
-	vsra.u16  d16, d16, #15
-@ CHECK: vsra.u32 d16, d16, #31  @ encoding: [0x30,0x01,0xe1,0xf3]
-	vsra.u32  d16, d16, #31
-@ CHECK: vsra.u64 d16, d16, #63  @ encoding: [0xb0,0x01,0xc1,0xf3]
-	vsra.u64  d16, d16, #63
-@ CHECK: vsra.u8  q8, q8, #7     @ encoding: [0x70,0x01,0xc9,0xf3]
-	vsra.u8   q8, q8, #7
-@ CHECK: vsra.u16 q8, q8, #15    @ encoding: [0x70,0x01,0xd1,0xf3]
-	vsra.u16  q8, q8, #15
-@ CHECK: vsra.u32 q8, q8, #31    @ encoding: [0x70,0x01,0xe1,0xf3]
-	vsra.u32  q8, q8, #31
-@ CHECK: vsra.u64 q8, q8, #63    @ encoding: [0xf0,0x01,0xc1,0xf3]
-	vsra.u64  q8, q8, #63
-@ CHECK: vsra.s8  d16, d16, #7   @ encoding: [0x30,0x01,0xc9,0xf2]
-	vsra.s8   d16, d16, #7
-@ CHECK: vsra.s16 d16, d16, #15  @ encoding: [0x30,0x01,0xd1,0xf2]
-	vsra.s16  d16, d16, #15
-@ CHECK: vsra.s32 d16, d16, #31  @ encoding: [0x30,0x01,0xe1,0xf2]
-	vsra.s32  d16, d16, #31
-@ CHECK: vsra.s64 d16, d16, #63  @ encoding: [0xb0,0x01,0xc1,0xf2]
-	vsra.s64  d16, d16, #63
-@ CHECK: vsra.s8  q8, q8, #7     @ encoding: [0x70,0x01,0xc9,0xf2]
-	vsra.s8   q8, q8, #7
-@ CHECK: vsra.s16 q8, q8, #15    @ encoding: [0x70,0x01,0xd1,0xf2]
-	vsra.s16  q8, q8, #15
-@ CHECK: vsra.s32 q8, q8, #31    @ encoding: [0x70,0x01,0xe1,0xf2]
-	vsra.s32  q8, q8, #31
-@ CHECK: vsra.s64 q8, q8, #63    @ encoding: [0xf0,0x01,0xc1,0xf2]
-	vsra.s64  q8, q8, #63
-@ CHECK: vsri.8   d16, d16, #7  @ encoding: [0x30,0x04,0xc9,0xf3]
-	vsri.8   d16, d16, #7
-@ CHECK: vsri.16  d16, d16, #15 @ encoding: [0x30,0x04,0xd1,0xf3]
-	vsri.16  d16, d16, #15
-@ CHECK: vsri.32  d16, d16, #31 @ encoding: [0x30,0x04,0xe1,0xf3]
-	vsri.32  d16, d16, #31
-@ CHECK: vsri.64  d16, d16, #63 @ encoding: [0xb0,0x04,0xc1,0xf3]
-	vsri.64  d16, d16, #63
-@ CHECK: vsri.8   q8, q8, #7    @ encoding: [0x70,0x04,0xc9,0xf3]
-	vsri.8   q8, q8, #7
-@ CHECK: vsri.16  q8, q8, #15   @ encoding: [0x70,0x04,0xd1,0xf3]
-	vsri.16  q8, q8, #15
-@ CHECK: vsri.32  q8, q8, #31   @ encoding: [0x70,0x04,0xe1,0xf3]
-	vsri.32  q8, q8, #31
-@ CHECK: vsri.64  q8, q8, #63   @ encoding: [0xf0,0x04,0xc1,0xf3]
-	vsri.64  q8, q8, #63
-@ CHECK: vsli.8   d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf3]
-	vsli.8   d16, d16, #7
-@ CHECK: vsli.16  d16, d16, #15 @ encoding: [0x30,0x05,0xdf,0xf3]
-	vsli.16  d16, d16, #15
-@ CHECK: vsli.32  d16, d16, #31 @ encoding: [0x30,0x05,0xff,0xf3]
-	vsli.32  d16, d16, #31
-@ CHECK: vsli.64  d16, d16, #63 @ encoding: [0xb0,0x05,0xff,0xf3]
-	vsli.64  d16, d16, #63
-@ CHECK: vsli.8   q8, q8, #7    @ encoding: [0x70,0x05,0xcf,0xf3]
-	vsli.8   q8, q8, #7
-@ CHECK: vsli.16  q8, q8, #15   @ encoding: [0x70,0x05,0xdf,0xf3]
-	vsli.16  q8, q8, #15
-@ CHECK: vsli.32  q8, q8, #31   @ encoding: [0x70,0x05,0xff,0xf3]
-	vsli.32  q8, q8, #31
-@ CHECK: vsli.64  q8, q8, #63   @ encoding: [0xf0,0x05,0xff,0xf3]
-	vsli.64  q8, q8, #63
+
+@ CHECK: vshr.u8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf3]
+@ CHECK: vshr.u8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf3]
+@ CHECK: vshr.s8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf2]
+@ CHECK: vshr.s8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
+
+@ implied destination operand variants.
+	vshr.u8	d16, #7
+	vshr.u16	d16, #15
+	vshr.u32	d16, #31
+	vshr.u64	d16, #63
+	vshr.u8	q8, #7
+	vshr.u16	q8, #15
+	vshr.u32	q8, #31
+	vshr.u64	q8, #63
+	vshr.s8	d16, #7
+	vshr.s16	d16, #15
+	vshr.s32	d16, #31
+	vshr.s64	d16, #63
+	vshr.s8	q8, #7
+	vshr.s16	q8, #15
+	vshr.s32	q8, #31
+	vshr.s64	q8, #63
+
+@ CHECK: vshr.u8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf3]
+@ CHECK: vshr.u8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf3]
+@ CHECK: vshr.s8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf2]
+@ CHECK: vshr.s8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
+
+
+	vsra.s8   d16, d6, #7
+	vsra.s16  d26, d18, #15
+	vsra.s32  d11, d10, #31
+	vsra.s64  d12, d19, #63
+	vsra.s8   q1, q8, #7
+	vsra.s16  q2, q7, #15
+	vsra.s32  q3, q6, #31
+	vsra.s64  q4, q5, #63
+
+	vsra.s8   d16, #7
+	vsra.s16  d15, #15
+	vsra.s32  d14, #31
+	vsra.s64  d13, #63
+	vsra.s8   q4, #7
+	vsra.s16  q5, #15
+	vsra.s32  q6, #31
+	vsra.s64  q7, #63
+
+@ CHECK: vsra.s8	d16, d6, #7     @ encoding: [0x16,0x01,0xc9,0xf2]
+@ CHECK: vsra.s16	d26, d18, #15   @ encoding: [0x32,0xa1,0xd1,0xf2]
+@ CHECK: vsra.s32	d11, d10, #31   @ encoding: [0x1a,0xb1,0xa1,0xf2]
+@ CHECK: vsra.s64	d12, d19, #63   @ encoding: [0xb3,0xc1,0x81,0xf2]
+@ CHECK: vsra.s8	q1, q8, #7      @ encoding: [0x70,0x21,0x89,0xf2]
+@ CHECK: vsra.s16	q2, q7, #15     @ encoding: [0x5e,0x41,0x91,0xf2]
+@ CHECK: vsra.s32	q3, q6, #31     @ encoding: [0x5c,0x61,0xa1,0xf2]
+@ CHECK: vsra.s64	q4, q5, #63     @ encoding: [0xda,0x81,0x81,0xf2]
+@ CHECK: vsra.s8	d16, d16, #7    @ encoding: [0x30,0x01,0xc9,0xf2]
+@ CHECK: vsra.s16	d15, d15, #15   @ encoding: [0x1f,0xf1,0x91,0xf2]
+@ CHECK: vsra.s32	d14, d14, #31   @ encoding: [0x1e,0xe1,0xa1,0xf2]
+@ CHECK: vsra.s64	d13, d13, #63   @ encoding: [0x9d,0xd1,0x81,0xf2]
+@ CHECK: vsra.s8	q4, q4, #7      @ encoding: [0x58,0x81,0x89,0xf2]
+@ CHECK: vsra.s16	q5, q5, #15     @ encoding: [0x5a,0xa1,0x91,0xf2]
+@ CHECK: vsra.s32	q6, q6, #31     @ encoding: [0x5c,0xc1,0xa1,0xf2]
+@ CHECK: vsra.s64	q7, q7, #63     @ encoding: [0xde,0xe1,0x81,0xf2]
+
+
+	vsra.u8   d16, d6, #7
+	vsra.u16  d26, d18, #15
+	vsra.u32  d11, d10, #31
+	vsra.u64  d12, d19, #63
+	vsra.u8   q1, q8, #7
+	vsra.u16  q2, q7, #15
+	vsra.u32  q3, q6, #31
+	vsra.u64  q4, q5, #63
+
+	vsra.u8   d16, #7
+	vsra.u16  d15, #15
+	vsra.u32  d14, #31
+	vsra.u64  d13, #63
+	vsra.u8   q4, #7
+	vsra.u16  q5, #15
+	vsra.u32  q6, #31
+	vsra.u64  q7, #63
+
+@ CHECK: vsra.u8	d16, d6, #7     @ encoding: [0x16,0x01,0xc9,0xf3]
+@ CHECK: vsra.u16	d26, d18, #15   @ encoding: [0x32,0xa1,0xd1,0xf3]
+@ CHECK: vsra.u32	d11, d10, #31   @ encoding: [0x1a,0xb1,0xa1,0xf3]
+@ CHECK: vsra.u64	d12, d19, #63   @ encoding: [0xb3,0xc1,0x81,0xf3]
+@ CHECK: vsra.u8	q1, q8, #7      @ encoding: [0x70,0x21,0x89,0xf3]
+@ CHECK: vsra.u16	q2, q7, #15     @ encoding: [0x5e,0x41,0x91,0xf3]
+@ CHECK: vsra.u32	q3, q6, #31     @ encoding: [0x5c,0x61,0xa1,0xf3]
+@ CHECK: vsra.u64	q4, q5, #63     @ encoding: [0xda,0x81,0x81,0xf3]
+@ CHECK: vsra.u8	d16, d16, #7    @ encoding: [0x30,0x01,0xc9,0xf3]
+@ CHECK: vsra.u16	d15, d15, #15   @ encoding: [0x1f,0xf1,0x91,0xf3]
+@ CHECK: vsra.u32	d14, d14, #31   @ encoding: [0x1e,0xe1,0xa1,0xf3]
+@ CHECK: vsra.u64	d13, d13, #63   @ encoding: [0x9d,0xd1,0x81,0xf3]
+@ CHECK: vsra.u8	q4, q4, #7      @ encoding: [0x58,0x81,0x89,0xf3]
+@ CHECK: vsra.u16	q5, q5, #15     @ encoding: [0x5a,0xa1,0x91,0xf3]
+@ CHECK: vsra.u32	q6, q6, #31     @ encoding: [0x5c,0xc1,0xa1,0xf3]
+@ CHECK: vsra.u64	q7, q7, #63     @ encoding: [0xde,0xe1,0x81,0xf3]
+
+
+	vsri.8   d16, d6, #7
+	vsri.16  d26, d18, #15
+	vsri.32  d11, d10, #31
+	vsri.64  d12, d19, #63
+	vsri.8   q1, q8, #7
+	vsri.16  q2, q7, #15
+	vsri.32  q3, q6, #31
+	vsri.64  q4, q5, #63
+
+	vsri.8   d16, #7
+	vsri.16  d15, #15
+	vsri.32  d14, #31
+	vsri.64  d13, #63
+	vsri.8   q4, #7
+	vsri.16  q5, #15
+	vsri.32  q6, #31
+	vsri.64  q7, #63
+
+@ CHECK: vsri.8	d16, d6, #7             @ encoding: [0x16,0x04,0xc9,0xf3]
+@ CHECK: vsri.16 d26, d18, #15          @ encoding: [0x32,0xa4,0xd1,0xf3]
+@ CHECK: vsri.32 d11, d10, #31          @ encoding: [0x1a,0xb4,0xa1,0xf3]
+@ CHECK: vsri.64 d12, d19, #63          @ encoding: [0xb3,0xc4,0x81,0xf3]
+@ CHECK: vsri.8	q1, q8, #7              @ encoding: [0x70,0x24,0x89,0xf3]
+@ CHECK: vsri.16 q2, q7, #15            @ encoding: [0x5e,0x44,0x91,0xf3]
+@ CHECK: vsri.32 q3, q6, #31            @ encoding: [0x5c,0x64,0xa1,0xf3]
+@ CHECK: vsri.64 q4, q5, #63            @ encoding: [0xda,0x84,0x81,0xf3]
+@ CHECK: vsri.8	d16, d16, #7            @ encoding: [0x30,0x04,0xc9,0xf3]
+@ CHECK: vsri.16 d15, d15, #15          @ encoding: [0x1f,0xf4,0x91,0xf3]
+@ CHECK: vsri.32 d14, d14, #31          @ encoding: [0x1e,0xe4,0xa1,0xf3]
+@ CHECK: vsri.64 d13, d13, #63          @ encoding: [0x9d,0xd4,0x81,0xf3]
+@ CHECK: vsri.8	q4, q4, #7              @ encoding: [0x58,0x84,0x89,0xf3]
+@ CHECK: vsri.16 q5, q5, #15            @ encoding: [0x5a,0xa4,0x91,0xf3]
+@ CHECK: vsri.32 q6, q6, #31            @ encoding: [0x5c,0xc4,0xa1,0xf3]
+@ CHECK: vsri.64 q7, q7, #63            @ encoding: [0xde,0xe4,0x81,0xf3]
+
+
+	vsli.8   d16, d6, #7
+	vsli.16  d26, d18, #15
+	vsli.32  d11, d10, #31
+	vsli.64  d12, d19, #63
+	vsli.8   q1, q8, #7
+	vsli.16  q2, q7, #15
+	vsli.32  q3, q6, #31
+	vsli.64  q4, q5, #63
+
+	vsli.8   d16, #7
+	vsli.16  d15, #15
+	vsli.32  d14, #31
+	vsli.64  d13, #63
+	vsli.8   q4, #7
+	vsli.16  q5, #15
+	vsli.32  q6, #31
+	vsli.64  q7, #63
+
+@ CHECK: vsli.8	d16, d6, #7             @ encoding: [0x16,0x05,0xcf,0xf3]
+@ CHECK: vsli.16 d26, d18, #15          @ encoding: [0x32,0xa5,0xdf,0xf3]
+@ CHECK: vsli.32 d11, d10, #31          @ encoding: [0x1a,0xb5,0xbf,0xf3]
+@ CHECK: vsli.64 d12, d19, #63          @ encoding: [0xb3,0xc5,0xbf,0xf3]
+@ CHECK: vsli.8	q1, q8, #7              @ encoding: [0x70,0x25,0x8f,0xf3]
+@ CHECK: vsli.16 q2, q7, #15            @ encoding: [0x5e,0x45,0x9f,0xf3]
+@ CHECK: vsli.32 q3, q6, #31            @ encoding: [0x5c,0x65,0xbf,0xf3]
+@ CHECK: vsli.64 q4, q5, #63            @ encoding: [0xda,0x85,0xbf,0xf3]
+@ CHECK: vsli.8	d16, d16, #7            @ encoding: [0x30,0x05,0xcf,0xf3]
+@ CHECK: vsli.16 d15, d15, #15          @ encoding: [0x1f,0xf5,0x9f,0xf3]
+@ CHECK: vsli.32 d14, d14, #31          @ encoding: [0x1e,0xe5,0xbf,0xf3]
+@ CHECK: vsli.64 d13, d13, #63          @ encoding: [0x9d,0xd5,0xbf,0xf3]
+@ CHECK: vsli.8	q4, q4, #7              @ encoding: [0x58,0x85,0x8f,0xf3]
+@ CHECK: vsli.16 q5, q5, #15            @ encoding: [0x5a,0xa5,0x9f,0xf3]
+@ CHECK: vsli.32 q6, q6, #31            @ encoding: [0x5c,0xc5,0xbf,0xf3]
+@ CHECK: vsli.64 q7, q7, #63            @ encoding: [0xde,0xe5,0xbf,0xf3]
+
+
 @ CHECK: vshll.s8	q8, d16, #7  @ encoding: [0x30,0x0a,0xcf,0xf2]
 	vshll.s8	q8, d16, #7
 @ CHECK: vshll.s16	q8, d16, #15  @ encoding: [0x30,0x0a,0xdf,0xf2]
@@ -235,3 +356,134 @@ _foo:
 	vqrshrn.u32	d16, q8, #13
 @ CHECK: vqrshrn.u64	d16, q8, #13  @ encoding: [0x70,0x09,0xf3,0xf3]
 	vqrshrn.u64	d16, q8, #13
+
+@ Optional destination operand variants.
+        vshl.s8 q4, q5
+        vshl.s16 q4, q5
+        vshl.s32 q4, q5
+        vshl.s64 q4, q5
+
+        vshl.u8 q4, q5
+        vshl.u16 q4, q5
+        vshl.u32 q4, q5
+        vshl.u64 q4, q5
+
+        vshl.s8 d4, d5
+        vshl.s16 d4, d5
+        vshl.s32 d4, d5
+        vshl.s64 d4, d5
+
+        vshl.u8 d4, d5
+        vshl.u16 d4, d5
+        vshl.u32 d4, d5
+        vshl.u64 d4, d5
+
+@ CHECK: vshl.s8	q4, q4, q5      @ encoding: [0x48,0x84,0x0a,0xf2]
+@ CHECK: vshl.s16	q4, q4, q5      @ encoding: [0x48,0x84,0x1a,0xf2]
+@ CHECK: vshl.s32	q4, q4, q5      @ encoding: [0x48,0x84,0x2a,0xf2]
+@ CHECK: vshl.s64	q4, q4, q5      @ encoding: [0x48,0x84,0x3a,0xf2]
+
+@ CHECK: vshl.u8	q4, q4, q5      @ encoding: [0x48,0x84,0x0a,0xf3]
+@ CHECK: vshl.u16	q4, q4, q5      @ encoding: [0x48,0x84,0x1a,0xf3]
+@ CHECK: vshl.u32	q4, q4, q5      @ encoding: [0x48,0x84,0x2a,0xf3]
+@ CHECK: vshl.u64	q4, q4, q5      @ encoding: [0x48,0x84,0x3a,0xf3]
+
+@ CHECK: vshl.s8	d4, d4, d5      @ encoding: [0x04,0x44,0x05,0xf2]
+@ CHECK: vshl.s16	d4, d4, d5      @ encoding: [0x04,0x44,0x15,0xf2]
+@ CHECK: vshl.s32	d4, d4, d5      @ encoding: [0x04,0x44,0x25,0xf2]
+@ CHECK: vshl.s64	d4, d4, d5      @ encoding: [0x04,0x44,0x35,0xf2]
+
+@ CHECK: vshl.u8	d4, d4, d5      @ encoding: [0x04,0x44,0x05,0xf3]
+@ CHECK: vshl.u16	d4, d4, d5      @ encoding: [0x04,0x44,0x15,0xf3]
+@ CHECK: vshl.u32	d4, d4, d5      @ encoding: [0x04,0x44,0x25,0xf3]
+@ CHECK: vshl.u64	d4, d4, d5      @ encoding: [0x04,0x44,0x35,0xf3]
+
+        vshl.s8 q4, #2
+        vshl.s16 q4, #14
+        vshl.s32 q4, #27
+        vshl.s64 q4, #35
+
+        vshl.s8 d4, #6
+        vshl.u16 d4, #10
+        vshl.s32 d4, #17
+        vshl.u64 d4, #43
+
+@ CHECK: vshl.i8	q4, q4, #2      @ encoding: [0x58,0x85,0x8a,0xf2]
+@ CHECK: vshl.i16	q4, q4, #14     @ encoding: [0x58,0x85,0x9e,0xf2]
+@ CHECK: vshl.i32	q4, q4, #27     @ encoding: [0x58,0x85,0xbb,0xf2]
+@ CHECK: vshl.i64	q4, q4, #35     @ encoding: [0xd8,0x85,0xa3,0xf2]
+
+@ CHECK: vshl.i8	d4, d4, #6      @ encoding: [0x14,0x45,0x8e,0xf2]
+@ CHECK: vshl.i16	d4, d4, #10     @ encoding: [0x14,0x45,0x9a,0xf2]
+@ CHECK: vshl.i32	d4, d4, #17     @ encoding: [0x14,0x45,0xb1,0xf2]
+@ CHECK: vshl.i64	d4, d4, #43     @ encoding: [0x94,0x45,0xab,0xf2]
+
+
+@ Two-operand forms.
+	vshr.s8	d15, #8
+	vshr.s16	d12, #16
+	vshr.s32	d13, #32
+	vshr.s64	d14, #64
+	vshr.u8	d16, #8
+	vshr.u16	d17, #16
+	vshr.u32	d6, #32
+	vshr.u64	d10, #64
+	vshr.s8	q1, #8
+	vshr.s16	q2, #16
+	vshr.s32	q3, #32
+	vshr.s64	q4, #64
+	vshr.u8	q5, #8
+	vshr.u16	q6, #16
+	vshr.u32	q7, #32
+	vshr.u64	q8, #64
+
+@ CHECK: vshr.s8	d15, d15, #8    @ encoding: [0x1f,0xf0,0x88,0xf2]
+@ CHECK: vshr.s16	d12, d12, #16   @ encoding: [0x1c,0xc0,0x90,0xf2]
+@ CHECK: vshr.s32	d13, d13, #32   @ encoding: [0x1d,0xd0,0xa0,0xf2]
+@ CHECK: vshr.s64	d14, d14, #64   @ encoding: [0x9e,0xe0,0x80,0xf2]
+@ CHECK: vshr.u8	d16, d16, #8    @ encoding: [0x30,0x00,0xc8,0xf3]
+@ CHECK: vshr.u16	d17, d17, #16   @ encoding: [0x31,0x10,0xd0,0xf3]
+@ CHECK: vshr.u32	d6, d6, #32     @ encoding: [0x16,0x60,0xa0,0xf3]
+@ CHECK: vshr.u64	d10, d10, #64   @ encoding: [0x9a,0xa0,0x80,0xf3]
+@ CHECK: vshr.s8	q1, q1, #8      @ encoding: [0x52,0x20,0x88,0xf2]
+@ CHECK: vshr.s16	q2, q2, #16     @ encoding: [0x54,0x40,0x90,0xf2]
+@ CHECK: vshr.s32	q3, q3, #32     @ encoding: [0x56,0x60,0xa0,0xf2]
+@ CHECK: vshr.s64	q4, q4, #64     @ encoding: [0xd8,0x80,0x80,0xf2]
+@ CHECK: vshr.u8	q5, q5, #8      @ encoding: [0x5a,0xa0,0x88,0xf3]
+@ CHECK: vshr.u16	q6, q6, #16     @ encoding: [0x5c,0xc0,0x90,0xf3]
+@ CHECK: vshr.u32	q7, q7, #32     @ encoding: [0x5e,0xe0,0xa0,0xf3]
+@ CHECK: vshr.u64	q8, q8, #64     @ encoding: [0xf0,0x00,0xc0,0xf3]
+
+	vrshr.s8	d15, #8
+	vrshr.s16	d12, #16
+	vrshr.s32	d13, #32
+	vrshr.s64	d14, #64
+	vrshr.u8	d16, #8
+	vrshr.u16	d17, #16
+	vrshr.u32	d6, #32
+	vrshr.u64	d10, #64
+	vrshr.s8	q1, #8
+	vrshr.s16	q2, #16
+	vrshr.s32	q3, #32
+	vrshr.s64	q4, #64
+	vrshr.u8	q5, #8
+	vrshr.u16	q6, #16
+	vrshr.u32	q7, #32
+	vrshr.u64	q8, #64
+
+@ CHECK: vrshr.s8	d15, d15, #8    @ encoding: [0x1f,0xf2,0x88,0xf2]
+@ CHECK: vrshr.s16	d12, d12, #16   @ encoding: [0x1c,0xc2,0x90,0xf2]
+@ CHECK: vrshr.s32	d13, d13, #32   @ encoding: [0x1d,0xd2,0xa0,0xf2]
+@ CHECK: vrshr.s64	d14, d14, #64   @ encoding: [0x9e,0xe2,0x80,0xf2]
+@ CHECK: vrshr.u8	d16, d16, #8    @ encoding: [0x30,0x02,0xc8,0xf3]
+@ CHECK: vrshr.u16	d17, d17, #16   @ encoding: [0x31,0x12,0xd0,0xf3]
+@ CHECK: vrshr.u32	d6, d6, #32     @ encoding: [0x16,0x62,0xa0,0xf3]
+@ CHECK: vrshr.u64	d10, d10, #64   @ encoding: [0x9a,0xa2,0x80,0xf3]
+@ CHECK: vrshr.s8	q1, q1, #8      @ encoding: [0x52,0x22,0x88,0xf2]
+@ CHECK: vrshr.s16	q2, q2, #16     @ encoding: [0x54,0x42,0x90,0xf2]
+@ CHECK: vrshr.s32	q3, q3, #32     @ encoding: [0x56,0x62,0xa0,0xf2]
+@ CHECK: vrshr.s64	q4, q4, #64     @ encoding: [0xd8,0x82,0x80,0xf2]
+@ CHECK: vrshr.u8	q5, q5, #8      @ encoding: [0x5a,0xa2,0x88,0xf3]
+@ CHECK: vrshr.u16	q6, q6, #16     @ encoding: [0x5c,0xc2,0x90,0xf3]
+@ CHECK: vrshr.u32	q7, q7, #32     @ encoding: [0x5e,0xe2,0xa0,0xf3]
+@ CHECK: vrshr.u64	q8, q8, #64     @ encoding: [0xf0,0x02,0xc0,0xf3]
diff --git a/test/MC/ARM/neon-shiftaccum-encoding.s b/test/MC/ARM/neon-shiftaccum-encoding.s
deleted file mode 100644
index 0dc630d..0000000
--- a/test/MC/ARM/neon-shiftaccum-encoding.s
+++ /dev/null
@@ -1,98 +0,0 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
-
-@ CHECK: vsra.s8	d17, d16, #8            @ encoding: [0x30,0x11,0xc8,0xf2]
-	vsra.s8	d17, d16, #8
-@ CHECK: vsra.s16	d17, d16, #16   @ encoding: [0x30,0x11,0xd0,0xf2]
-	vsra.s16	d17, d16, #16
-@ CHECK: vsra.s32	d17, d16, #32   @ encoding: [0x30,0x11,0xe0,0xf2]
-	vsra.s32	d17, d16, #32
-@ CHECK: vsra.s64	d17, d16, #64   @ encoding: [0xb0,0x11,0xc0,0xf2]
-	vsra.s64	d17, d16, #64
-@ CHECK: vsra.s8	q8, q9, #8              @ encoding: [0x72,0x01,0xc8,0xf2]
-	vsra.s8	q8, q9, #8
-@ CHECK: vsra.s16	q8, q9, #16     @ encoding: [0x72,0x01,0xd0,0xf2]
-	vsra.s16	q8, q9, #16
-@ CHECK: vsra.s32	q8, q9, #32     @ encoding: [0x72,0x01,0xe0,0xf2]
-	vsra.s32	q8, q9, #32
-@ CHECK: vsra.s64	q8, q9, #64     @ encoding: [0xf2,0x01,0xc0,0xf2]
-	vsra.s64	q8, q9, #64
-@ CHECK: vsra.u8	d17, d16, #8            @ encoding: [0x30,0x11,0xc8,0xf3]
-	vsra.u8	d17, d16, #8
-@ CHECK: vsra.u16	d17, d16, #16   @ encoding: [0x30,0x11,0xd0,0xf3]
-	vsra.u16	d17, d16, #16
-@ CHECK: vsra.u32	d17, d16, #32   @ encoding: [0x30,0x11,0xe0,0xf3]
-	vsra.u32	d17, d16, #32
-@ CHECK: vsra.u64	d17, d16, #64   @ encoding: [0xb0,0x11,0xc0,0xf3]
-	vsra.u64	d17, d16, #64
-@ CHECK: vsra.u8	q8, q9, #8              @ encoding: [0x72,0x01,0xc8,0xf3]
-	vsra.u8	q8, q9, #8
-@ CHECK: vsra.u16	q8, q9, #16     @ encoding: [0x72,0x01,0xd0,0xf3]
-	vsra.u16	q8, q9, #16
-@ CHECK: vsra.u32	q8, q9, #32     @ encoding: [0x72,0x01,0xe0,0xf3]
-	vsra.u32	q8, q9, #32
-@ CHECK: vsra.u64	q8, q9, #64     @ encoding: [0xf2,0x01,0xc0,0xf3]
-	vsra.u64	q8, q9, #64
-@ CHECK: vrsra.s8	d17, d16, #8    @ encoding: [0x30,0x13,0xc8,0xf2]
-	vrsra.s8	d17, d16, #8
-@ CHECK: vrsra.s16	d17, d16, #16   @ encoding: [0x30,0x13,0xd0,0xf2]
-	vrsra.s16	d17, d16, #16
-@ CHECK: vrsra.s32	d17, d16, #32   @ encoding: [0x30,0x13,0xe0,0xf2]
-	vrsra.s32	d17, d16, #32
-@ CHECK: vrsra.s64	d17, d16, #64   @ encoding: [0xb0,0x13,0xc0,0xf2]
-	vrsra.s64	d17, d16, #64
-@ CHECK: vrsra.u8	d17, d16, #8    @ encoding: [0x30,0x13,0xc8,0xf3]
-	vrsra.u8	d17, d16, #8
-@ CHECK: vrsra.u16	d17, d16, #16   @ encoding: [0x30,0x13,0xd0,0xf3]
-	vrsra.u16	d17, d16, #16
-@ CHECK: vrsra.u32	d17, d16, #32   @ encoding: [0x30,0x13,0xe0,0xf3]
-	vrsra.u32	d17, d16, #32
-@ CHECK: vrsra.u64	d17, d16, #64   @ encoding: [0xb0,0x13,0xc0,0xf3]
-	vrsra.u64	d17, d16, #64
-@ CHECK: vrsra.s8	q8, q9, #8      @ encoding: [0x72,0x03,0xc8,0xf2]
-	vrsra.s8	q8, q9, #8
-@ CHECK: vrsra.s16	q8, q9, #16     @ encoding: [0x72,0x03,0xd0,0xf2]
-	vrsra.s16	q8, q9, #16
-@ CHECK: vrsra.s32	q8, q9, #32     @ encoding: [0x72,0x03,0xe0,0xf2]
-	vrsra.s32	q8, q9, #32
-@ CHECK: vrsra.s64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf2]
-	vrsra.s64	q8, q9, #64
-@ CHECK: vrsra.u8	q8, q9, #8      @ encoding: [0x72,0x03,0xc8,0xf3]
-	vrsra.u8	q8, q9, #8
-@ CHECK: vrsra.u16	q8, q9, #16     @ encoding: [0x72,0x03,0xd0,0xf3]
-	vrsra.u16	q8, q9, #16
-@ CHECK: vrsra.u32	q8, q9, #32     @ encoding: [0x72,0x03,0xe0,0xf3]
-	vrsra.u32	q8, q9, #32
-@ CHECK: vrsra.u64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf3]
-	vrsra.u64	q8, q9, #64
-@ CHECK: vsli.8	d17, d16, #7            @ encoding: [0x30,0x15,0xcf,0xf3]
-	vsli.8	d17, d16, #7
-@ CHECK: vsli.16	d17, d16, #15           @ encoding: [0x30,0x15,0xdf,0xf3]
-	vsli.16	d17, d16, #15
-@ CHECK: vsli.32	d17, d16, #31           @ encoding: [0x30,0x15,0xff,0xf3]
-	vsli.32	d17, d16, #31
-@ CHECK: vsli.64	d17, d16, #63           @ encoding: [0xb0,0x15,0xff,0xf3]
-	vsli.64	d17, d16, #63
-@ CHECK: vsli.8	q9, q8, #7              @ encoding: [0x70,0x25,0xcf,0xf3]
-	vsli.8	q9, q8, #7
-@ CHECK: vsli.16	q9, q8, #15             @ encoding: [0x70,0x25,0xdf,0xf3]
-	vsli.16	q9, q8, #15
-@ CHECK: vsli.32	q9, q8, #31             @ encoding: [0x70,0x25,0xff,0xf3]
-	vsli.32	q9, q8, #31
-@ CHECK: vsli.64	q9, q8, #63             @ encoding: [0xf0,0x25,0xff,0xf3]
-	vsli.64	q9, q8, #63
-@ CHECK: vsri.8	d17, d16, #8            @ encoding: [0x30,0x14,0xc8,0xf3]
-	vsri.8	d17, d16, #8
-@ CHECK: vsri.16	d17, d16, #16           @ encoding: [0x30,0x14,0xd0,0xf3]
-	vsri.16	d17, d16, #16
-@ CHECK: vsri.32	d17, d16, #32           @ encoding: [0x30,0x14,0xe0,0xf3]
-	vsri.32	d17, d16, #32
-@ CHECK: vsri.64	d17, d16, #64           @ encoding: [0xb0,0x14,0xc0,0xf3]
-	vsri.64	d17, d16, #64
-@ CHECK: vsri.8	q9, q8, #8              @ encoding: [0x70,0x24,0xc8,0xf3]
-	vsri.8	q9, q8, #8
-@ CHECK: vsri.16	q9, q8, #16             @ encoding: [0x70,0x24,0xd0,0xf3]
-	vsri.16	q9, q8, #16
-@ CHECK: vsri.32	q9, q8, #32             @ encoding: [0x70,0x24,0xe0,0xf3]
-	vsri.32	q9, q8, #32
-@ CHECK: vsri.64	q9, q8, #64             @ encoding: [0xf0,0x24,0xc0,0xf3]
-	vsri.64	q9, q8, #64
diff --git a/test/MC/ARM/neon-shuffle-encoding.s b/test/MC/ARM/neon-shuffle-encoding.s
index ce7eb66..0f07d9f 100644
--- a/test/MC/ARM/neon-shuffle-encoding.s
+++ b/test/MC/ARM/neon-shuffle-encoding.s
@@ -1,46 +1,136 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xa0,0x03,0xf1,0xf2]
 	vext.8	d16, d17, d16, #3
-@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xa0,0x05,0xf1,0xf2]
 	vext.8	d16, d17, d16, #5
-@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xe0,0x03,0xf2,0xf2]
 	vext.8	q8, q9, q8, #3
-@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xe0,0x07,0xf2,0xf2]
 	vext.8	q8, q9, q8, #7
-@ CHECK: vext.16	d16, d17, d16, #3       @ encoding: [0xa0,0x06,0xf1,0xf2]
 	vext.16	d16, d17, d16, #3
-@ CHECK: vext.32	q8, q9, q8, #3          @ encoding: [0xe0,0x0c,0xf2,0xf2]
 	vext.32	q8, q9, q8, #3
-@ CHECK: vtrn.8	d17, d16                @ encoding: [0xa0,0x10,0xf2,0xf3]
+	vext.64	q8, q9, q8, #1
+
+	vext.8	d17, d16, #3
+	vext.8	d7, d11, #5
+	vext.8	q3, q8, #3
+	vext.8	q9, q4, #7
+	vext.16	d1, d26, #3
+	vext.32	q5, q8, #3
+	vext.64	q5, q8, #1
+
+
+@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xa0,0x03,0xf1,0xf2]
+@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xa0,0x05,0xf1,0xf2]
+@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xe0,0x03,0xf2,0xf2]
+@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xe0,0x07,0xf2,0xf2]
+@ CHECK: vext.16 d16, d17, d16, #3      @ encoding: [0xa0,0x06,0xf1,0xf2]
+@ CHECK: vext.32 q8, q9, q8, #3         @ encoding: [0xe0,0x0c,0xf2,0xf2]
+@ CHECK: vext.64 q8, q9, q8, #1         @ encoding: [0xe0,0x08,0xf2,0xf2]
+
+@ CHECK: vext.8	d17, d17, d16, #3       @ encoding: [0xa0,0x13,0xf1,0xf2]
+@ CHECK: vext.8	d7, d7, d11, #5         @ encoding: [0x0b,0x75,0xb7,0xf2]
+@ CHECK: vext.8	q3, q3, q8, #3          @ encoding: [0x60,0x63,0xb6,0xf2]
+@ CHECK: vext.8	q9, q9, q4, #7          @ encoding: [0xc8,0x27,0xf2,0xf2]
+@ CHECK: vext.16 d1, d1, d26, #3        @ encoding: [0x2a,0x16,0xb1,0xf2]
+@ CHECK: vext.32 q5, q5, q8, #3         @ encoding: [0x60,0xac,0xba,0xf2]
+@ CHECK: vext.64 q5, q5, q8, #1         @ encoding: [0x60,0xa8,0xba,0xf2]
+
+
 	vtrn.8	d17, d16
-@ CHECK: vtrn.16	d17, d16                @ encoding: [0xa0,0x10,0xf6,0xf3]
 	vtrn.16	d17, d16
-@ CHECK: vtrn.32	d17, d16                @ encoding: [0xa0,0x10,0xfa,0xf3]
 	vtrn.32	d17, d16
-@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xe0,0x20,0xf2,0xf3]
 	vtrn.8	q9, q8
-@ CHECK: vtrn.16	q9, q8                  @ encoding: [0xe0,0x20,0xf6,0xf3]
 	vtrn.16	q9, q8
-@ CHECK: vtrn.32	q9, q8                  @ encoding: [0xe0,0x20,0xfa,0xf3]
 	vtrn.32	q9, q8
-@ CHECK: vuzp.8	d17, d16                @ encoding: [0x20,0x11,0xf2,0xf3]
+
+@ CHECK: vtrn.8	d17, d16                @ encoding: [0xa0,0x10,0xf2,0xf3]
+@ CHECK: vtrn.16 d17, d16               @ encoding: [0xa0,0x10,0xf6,0xf3]
+@ CHECK: vtrn.32 d17, d16               @ encoding: [0xa0,0x10,0xfa,0xf3]
+@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xe0,0x20,0xf2,0xf3]
+@ CHECK: vtrn.16 q9, q8                 @ encoding: [0xe0,0x20,0xf6,0xf3]
+@ CHECK: vtrn.32 q9, q8                 @ encoding: [0xe0,0x20,0xfa,0xf3]
+
+
 	vuzp.8	d17, d16
-@ CHECK: vuzp.16	d17, d16                @ encoding: [0x20,0x11,0xf6,0xf3]
 	vuzp.16	d17, d16
-@ CHECK: vuzp.8	q9, q8                  @ encoding: [0x60,0x21,0xf2,0xf3]
 	vuzp.8	q9, q8
-@ CHECK: vuzp.16	q9, q8                  @ encoding: [0x60,0x21,0xf6,0xf3]
 	vuzp.16	q9, q8
-@ CHECK: vuzp.32	q9, q8                  @ encoding: [0x60,0x21,0xfa,0xf3]
 	vuzp.32	q9, q8
-@ CHECK: vzip.8	d17, d16                @ encoding: [0xa0,0x11,0xf2,0xf3]
 	vzip.8	d17, d16
-@ CHECK: vzip.16	d17, d16                @ encoding: [0xa0,0x11,0xf6,0xf3]
 	vzip.16	d17, d16
-@ CHECK: vzip.8	q9, q8                  @ encoding: [0xe0,0x21,0xf2,0xf3]
 	vzip.8	q9, q8
-@ CHECK: vzip.16	q9, q8                  @ encoding: [0xe0,0x21,0xf6,0xf3]
 	vzip.16	q9, q8
-@ CHECK: vzip.32	q9, q8                  @ encoding: [0xe0,0x21,0xfa,0xf3]
 	vzip.32	q9, q8
+        vzip.32 d2, d3
+        vuzp.32 d2, d3
+
+@ CHECK: vuzp.8	d17, d16                @ encoding: [0x20,0x11,0xf2,0xf3]
+@ CHECK: vuzp.16 d17, d16               @ encoding: [0x20,0x11,0xf6,0xf3]
+@ CHECK: vuzp.8	q9, q8                  @ encoding: [0x60,0x21,0xf2,0xf3]
+@ CHECK: vuzp.16 q9, q8                 @ encoding: [0x60,0x21,0xf6,0xf3]
+@ CHECK: vuzp.32 q9, q8                 @ encoding: [0x60,0x21,0xfa,0xf3]
+@ CHECK: vzip.8	d17, d16                @ encoding: [0xa0,0x11,0xf2,0xf3]
+@ CHECK: vzip.16 d17, d16               @ encoding: [0xa0,0x11,0xf6,0xf3]
+@ CHECK: vzip.8	q9, q8                  @ encoding: [0xe0,0x21,0xf2,0xf3]
+@ CHECK: vzip.16 q9, q8                 @ encoding: [0xe0,0x21,0xf6,0xf3]
+@ CHECK: vzip.32 q9, q8                 @ encoding: [0xe0,0x21,0xfa,0xf3]
+@ CHECK: vtrn.32 d2, d3                 @ encoding: [0x83,0x20,0xba,0xf3]
+@ CHECK: vtrn.32 d2, d3                 @ encoding: [0x83,0x20,0xba,0xf3]
+
+
+@ VTRN alternate size suffices
+
+        vtrn.8 d3, d9
+        vtrn.i8 d3, d9
+        vtrn.u8 d3, d9
+        vtrn.p8 d3, d9
+        vtrn.16 d3, d9
+        vtrn.i16 d3, d9
+        vtrn.u16 d3, d9
+        vtrn.p16 d3, d9
+        vtrn.32 d3, d9
+        vtrn.i32 d3, d9
+        vtrn.u32 d3, d9
+        vtrn.f32 d3, d9
+        vtrn.f d3, d9
+
+        vtrn.8 q14, q6
+        vtrn.i8 q14, q6
+        vtrn.u8 q14, q6
+        vtrn.p8 q14, q6
+        vtrn.16 q14, q6
+        vtrn.i16 q14, q6
+        vtrn.u16 q14, q6
+        vtrn.p16 q14, q6
+        vtrn.32 q14, q6
+        vtrn.i32 q14, q6
+        vtrn.u32 q14, q6
+        vtrn.f32 q14, q6
+        vtrn.f q14, q6
+
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+
diff --git a/test/MC/ARM/neon-sub-encoding.s b/test/MC/ARM/neon-sub-encoding.s
index 241a01f..0622e19 100644
--- a/test/MC/ARM/neon-sub-encoding.s
+++ b/test/MC/ARM/neon-sub-encoding.s
@@ -1,25 +1,51 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vsub.i8	d16, d17, d16           @ encoding: [0xa0,0x08,0x41,0xf3]
 	vsub.i8	d16, d17, d16
-@ CHECK: vsub.i16	d16, d17, d16   @ encoding: [0xa0,0x08,0x51,0xf3]
 	vsub.i16	d16, d17, d16
-@ CHECK: vsub.i32	d16, d17, d16   @ encoding: [0xa0,0x08,0x61,0xf3]
 	vsub.i32	d16, d17, d16
-@ CHECK: vsub.i64	d16, d17, d16   @ encoding: [0xa0,0x08,0x71,0xf3]
 	vsub.i64	d16, d17, d16
-@ CHECK: vsub.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xf2]
 	vsub.f32	d16, d16, d17
-@ CHECK: vsub.i8	q8, q8, q9              @ encoding: [0xe2,0x08,0x40,0xf3]
 	vsub.i8	q8, q8, q9
-@ CHECK: vsub.i16	q8, q8, q9      @ encoding: [0xe2,0x08,0x50,0xf3]
 	vsub.i16	q8, q8, q9
-@ CHECK: vsub.i32	q8, q8, q9      @ encoding: [0xe2,0x08,0x60,0xf3]
 	vsub.i32	q8, q8, q9
-@ CHECK: vsub.i64	q8, q8, q9      @ encoding: [0xe2,0x08,0x70,0xf3]
 	vsub.i64	q8, q8, q9
-@ CHECK: vsub.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xf2]
 	vsub.f32	q8, q8, q9
+
+	vsub.i8  d13, d21
+	vsub.i16 d14, d22
+	vsub.i32 d15, d23
+	vsub.i64 d16, d24
+	vsub.f32 d17, d25
+	vsub.i8  q1, q10
+	vsub.i16 q2, q9
+	vsub.i32 q3, q8
+	vsub.i64 q4, q7
+	vsub.f32 q5, q6
+
+@ CHECK: vsub.i8	d16, d17, d16   @ encoding: [0xa0,0x08,0x41,0xf3]
+@ CHECK: vsub.i16	d16, d17, d16   @ encoding: [0xa0,0x08,0x51,0xf3]
+@ CHECK: vsub.i32	d16, d17, d16   @ encoding: [0xa0,0x08,0x61,0xf3]
+@ CHECK: vsub.i64	d16, d17, d16   @ encoding: [0xa0,0x08,0x71,0xf3]
+@ CHECK: vsub.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xf2]
+@ CHECK: vsub.i8	q8, q8, q9      @ encoding: [0xe2,0x08,0x40,0xf3]
+@ CHECK: vsub.i16	q8, q8, q9      @ encoding: [0xe2,0x08,0x50,0xf3]
+@ CHECK: vsub.i32	q8, q8, q9      @ encoding: [0xe2,0x08,0x60,0xf3]
+@ CHECK: vsub.i64	q8, q8, q9      @ encoding: [0xe2,0x08,0x70,0xf3]
+@ CHECK: vsub.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xf2]
+
+@ CHECK: vsub.i8	d13, d13, d21   @ encoding: [0x25,0xd8,0x0d,0xf3]
+@ CHECK: vsub.i16	d14, d14, d22   @ encoding: [0x26,0xe8,0x1e,0xf3]
+@ CHECK: vsub.i32	d15, d15, d23   @ encoding: [0x27,0xf8,0x2f,0xf3]
+@ CHECK: vsub.i64	d16, d16, d24   @ encoding: [0xa8,0x08,0x70,0xf3]
+@ CHECK: vsub.f32	d17, d17, d25   @ encoding: [0xa9,0x1d,0x61,0xf2]
+@ CHECK: vsub.i8	q1, q1, q10     @ encoding: [0x64,0x28,0x02,0xf3]
+@ CHECK: vsub.i16	q2, q2, q9      @ encoding: [0x62,0x48,0x14,0xf3]
+@ CHECK: vsub.i32	q3, q3, q8      @ encoding: [0x60,0x68,0x26,0xf3]
+@ CHECK: vsub.i64	q4, q4, q7      @ encoding: [0x4e,0x88,0x38,0xf3]
+@ CHECK: vsub.f32	q5, q5, q6      @ encoding: [0x4c,0xad,0x2a,0xf2]
+
+
+
 @ CHECK: vsubl.s8	q8, d17, d16    @ encoding: [0xa0,0x02,0xc1,0xf2]
 	vsubl.s8	q8, d17, d16
 @ CHECK: vsubl.s16	q8, d17, d16    @ encoding: [0xa0,0x02,0xd1,0xf2]
diff --git a/test/MC/ARM/neon-table-encoding.s b/test/MC/ARM/neon-table-encoding.s
index 7bf47c7..343ae83 100644
--- a/test/MC/ARM/neon-table-encoding.s
+++ b/test/MC/ARM/neon-table-encoding.s
@@ -1,19 +1,22 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
-@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xf3]
 	vtbl.8	d16, {d17}, d16
-@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xf3]
 	vtbl.8	d16, {d16, d17}, d18
-@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xf3]
 	vtbl.8	d16, {d16, d17, d18}, d20
-@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xf3]
 	vtbl.8	d16, {d16, d17, d18, d19}, d20
-@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xf3]
+
+@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xf3]
+@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xf3]
+@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xf3]
+@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xf3]
+
+
 	vtbx.8	d18, {d16}, d17
-@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xf3]
 	vtbx.8	d19, {d16, d17}, d18
-@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xf3]
 	vtbx.8	d20, {d16, d17, d18}, d21
-@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xf3]
 	vtbx.8	d20, {d16, d17, d18, d19}, d21
+
+@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xf3]
+@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xf3]
+@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xf3]
+@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xf3]
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
index 55c8868..3cc6bf1 100644
--- a/test/MC/ARM/neon-vld-encoding.s
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -1,5 +1,4 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 	vld1.8	{d16}, [r0, :64]
 	vld1.16	{d16}, [r0]
@@ -9,15 +8,107 @@
 	vld1.16	{d16, d17}, [r0, :128]
 	vld1.32	{d16, d17}, [r0]
 	vld1.64	{d16, d17}, [r0]
+	vld1.8 {d1, d2, d3}, [r3]
+	vld1.16 {d4, d5, d6}, [r3, :64]
+	vld1.32 {d5, d6, d7}, [r3]
+	vld1.64 {d6, d7, d8}, [r3, :64]
+	vld1.8 {d1, d2, d3, d4}, [r3]
+	vld1.16 {d4, d5, d6, d7}, [r3, :64]
+	vld1.32 {d5, d6, d7, d8}, [r3]
+	vld1.64 {d6, d7, d8, d9}, [r3, :64]
 
-@ CHECK: vld1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf4]
-@ CHECK: vld1.16	{d16}, [r0]     @ encoding: [0x4f,0x07,0x60,0xf4]
-@ CHECK: vld1.32	{d16}, [r0]     @ encoding: [0x8f,0x07,0x60,0xf4]
-@ CHECK: vld1.64	{d16}, [r0]     @ encoding: [0xcf,0x07,0x60,0xf4]
-@ CHECK: vld1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf4]
-@ CHECK: vld1.16	{d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x60,0xf4]
-@ CHECK: vld1.32	{d16, d17}, [r0]@ encoding: [0x8f,0x0a,0x60,0xf4]
-@ CHECK: vld1.64	{d16, d17}, [r0]@ encoding: [0xcf,0x0a,0x60,0xf4]
+	vld1.8	{d16}, [r0, :64]!
+	vld1.16	{d16}, [r0]!
+	vld1.32	{d16}, [r0]!
+	vld1.64	{d16}, [r0]!
+	vld1.8	{d16, d17}, [r0, :64]!
+	vld1.16	{d16, d17}, [r0, :128]!
+	vld1.32	{d16, d17}, [r0]!
+	vld1.64	{d16, d17}, [r0]!
+
+	vld1.8	{d16}, [r0, :64], r5
+	vld1.16	{d16}, [r0], r5
+	vld1.32	{d16}, [r0], r5
+	vld1.64	{d16}, [r0], r5
+	vld1.8	{d16, d17}, [r0, :64], r5
+	vld1.16	{d16, d17}, [r0, :128], r5
+	vld1.32	{d16, d17}, [r0], r5
+	vld1.64	{d16, d17}, [r0], r5
+
+	vld1.8 {d1, d2, d3}, [r3]!
+	vld1.16 {d4, d5, d6}, [r3, :64]!
+	vld1.32 {d5, d6, d7}, [r3]!
+	vld1.64 {d6, d7, d8}, [r3, :64]!
+
+	vld1.8 {d1, d2, d3}, [r3], r6
+	vld1.16 {d4, d5, d6}, [r3, :64], r6
+	vld1.32 {d5, d6, d7}, [r3], r6
+	vld1.64 {d6, d7, d8}, [r3, :64], r6
+
+	vld1.8 {d1, d2, d3, d4}, [r3]!
+	vld1.16 {d4, d5, d6, d7}, [r3, :64]!
+	vld1.32 {d5, d6, d7, d8}, [r3]!
+	vld1.64 {d6, d7, d8, d9}, [r3, :64]!
+
+	vld1.8 {d1, d2, d3, d4}, [r3], r8
+	vld1.16 {d4, d5, d6, d7}, [r3, :64], r8
+	vld1.32 {d5, d6, d7, d8}, [r3], r8
+	vld1.64 {d6, d7, d8, d9}, [r3, :64], r8
+
+@ CHECK: vld1.8 {d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf4]
+@ CHECK: vld1.16 {d16}, [r0]            @ encoding: [0x4f,0x07,0x60,0xf4]
+@ CHECK: vld1.32 {d16}, [r0]            @ encoding: [0x8f,0x07,0x60,0xf4]
+@ CHECK: vld1.64 {d16}, [r0]            @ encoding: [0xcf,0x07,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x60,0xf4]
+@ CHECK: vld1.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x0a,0x60,0xf4]
+@ CHECK: vld1.64 {d16, d17}, [r0]       @ encoding: [0xcf,0x0a,0x60,0xf4]
+@ CHECK: vld1.8 {d1, d2, d3}, [r3]      @ encoding: [0x0f,0x16,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64] @ encoding: [0x5f,0x46,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7}, [r3]     @ encoding: [0x8f,0x56,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64] @ encoding: [0xdf,0x66,0x23,0xf4]
+@ CHECK: vld1.8 {d1, d2, d3, d4}, [r3]  @ encoding: [0x0f,0x12,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64] @ encoding: [0x5f,0x42,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7, d8}, [r3]  @ encoding: [0x8f,0x52,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64] @ encoding: [0xdf,0x62,0x23,0xf4]
+@ CHECK: vld1.8	{d16}, [r0, :64]!       @ encoding: [0x1d,0x07,0x60,0xf4]
+
+@ CHECK: vld1.16 {d16}, [r0]!           @ encoding: [0x4d,0x07,0x60,0xf4]
+@ CHECK: vld1.32 {d16}, [r0]!           @ encoding: [0x8d,0x07,0x60,0xf4]
+@ CHECK: vld1.64 {d16}, [r0]!           @ encoding: [0xcd,0x07,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0, :64]!  @ encoding: [0x1d,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0, :128]! @ encoding: [0x6d,0x0a,0x60,0xf4]
+@ CHECK: vld1.32 {d16, d17}, [r0]!      @ encoding: [0x8d,0x0a,0x60,0xf4]
+@ CHECK: vld1.64 {d16, d17}, [r0]!      @ encoding: [0xcd,0x0a,0x60,0xf4]
+
+@ CHECK: vld1.8 {d16}, [r0, :64], r5    @ encoding: [0x15,0x07,0x60,0xf4]
+@ CHECK: vld1.16 {d16}, [r0], r5        @ encoding: [0x45,0x07,0x60,0xf4]
+@ CHECK: vld1.32 {d16}, [r0], r5        @ encoding: [0x85,0x07,0x60,0xf4]
+@ CHECK: vld1.64 {d16}, [r0], r5        @ encoding: [0xc5,0x07,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0, :64], r5 @ encoding: [0x15,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0, :128], r5 @ encoding: [0x65,0x0a,0x60,0xf4]
+@ CHECK: vld1.32 {d16, d17}, [r0], r5   @ encoding: [0x85,0x0a,0x60,0xf4]
+@ CHECK: vld1.64 {d16, d17}, [r0], r5   @ encoding: [0xc5,0x0a,0x60,0xf4]
+
+@ CHECK: vld1.8	{d1, d2, d3}, [r3]!     @ encoding: [0x0d,0x16,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64]! @ encoding: [0x5d,0x46,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7}, [r3]!     @ encoding: [0x8d,0x56,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64]! @ encoding: [0xdd,0x66,0x23,0xf4]
+
+@ CHECK: vld1.8	{d1, d2, d3}, [r3], r6  @ encoding: [0x06,0x16,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64], r6 @ encoding: [0x56,0x46,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7}, [r3], r6  @ encoding: [0x86,0x56,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64], r6 @ encoding: [0xd6,0x66,0x23,0xf4]
+
+@ CHECK: vld1.8	{d1, d2, d3, d4}, [r3]! @ encoding: [0x0d,0x12,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64]! @ encoding: [0x5d,0x42,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7, d8}, [r3]! @ encoding: [0x8d,0x52,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64]! @ encoding: [0xdd,0x62,0x23,0xf4]
+
+@ CHECK: vld1.8	{d1, d2, d3, d4}, [r3], r8 @ encoding: [0x08,0x12,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64], r8 @ encoding: [0x58,0x42,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7, d8}, [r3], r8 @ encoding: [0x88,0x52,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64], r8 @ encoding: [0xd8,0x62,0x23,0xf4]
 
 
 	vld2.8	{d16, d17}, [r0, :64]
@@ -27,63 +118,154 @@
 	vld2.16	{d16, d17, d18, d19}, [r0, :128]
 	vld2.32	{d16, d17, d18, d19}, [r0, :256]
 
+	vld2.8	{d19, d20}, [r0, :64]!
+	vld2.16	{d16, d17}, [r0, :128]!
+	vld2.32	{q10}, [r0]!
+	vld2.8	{d4-d7}, [r0, :64]!
+	vld2.16	{d1, d2, d3, d4}, [r0, :128]!
+	vld2.32	{q7, q8}, [r0, :256]!
+
+	vld2.8	{d19, d20}, [r0, :64], r6
+	vld2.16	{d16, d17}, [r0, :128], r6
+	vld2.32	{q10}, [r0], r6
+	vld2.8	{d4-d7}, [r0, :64], r6
+	vld2.16	{d1, d2, d3, d4}, [r0, :128], r6
+	vld2.32	{q7, q8}, [r0, :256], r6
+
 @ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf4]
 @ CHECK: vld2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x60,0xf4]
-@ CHECK: vld2.32 {d16, d17}, [r0]@ encoding: [0x8f,0x08,0x60,0xf4]
-@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64]@ encoding: [0x1f,0x03,0x60,0xf4]
+@ CHECK: vld2.32 {d16, d17}, [r0] @ encoding: [0x8f,0x08,0x60,0xf4]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf4]
 @ CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf4]
 @ CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf4]
 
+@ CHECK: vld2.8	{d19, d20}, [r0, :64]!  @ encoding: [0x1d,0x38,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0, :128]! @ encoding: [0x6d,0x08,0x60,0xf4]
+@ CHECK: vld2.32 {d20, d21}, [r0]!       @ encoding: [0x8d,0x48,0x60,0xf4]
+@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0, :64]! @ encoding: [0x1d,0x43,0x20,0xf4]
+@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128]! @ encoding: [0x6d,0x13,0x20,0xf4]
+@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256]! @ encoding: [0xbd,0xe3,0x20,0xf4]
+
+@ CHECK: vld2.8	{d19, d20}, [r0, :64], r6 @ encoding: [0x16,0x38,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0, :128], r6 @ encoding: [0x66,0x08,0x60,0xf4]
+@ CHECK: vld2.32 {d20, d21}, [r0], r6    @ encoding: [0x86,0x48,0x60,0xf4]
+@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0, :64], r6 @ encoding: [0x16,0x43,0x20,0xf4]
+@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128], r6 @ encoding: [0x66,0x13,0x20,0xf4]
+@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256], r6 @ encoding: [0xb6,0xe3,0x20,0xf4]
+
+
+	vld3.8 {d16, d17, d18}, [r1]
+	vld3.16 {d6, d7, d8}, [r2]
+	vld3.32 {d1, d2, d3}, [r3]
+	vld3.8 {d16, d18, d20}, [r0, :64]
+	vld3.u16 {d27, d29, d31}, [r4]
+	vld3.i32 {d6, d8, d10}, [r5]
 
-	vld3.8	{d16, d17, d18}, [r0, :64]
-	vld3.16	{d16, d17, d18}, [r0]
-	vld3.32	{d16, d17, d18}, [r0]
-	vld3.8	{d16, d18, d20}, [r0, :64]!
-	vld3.8	{d17, d19, d21}, [r0, :64]!
-	vld3.16	{d16, d18, d20}, [r0]!
-	vld3.16	{d17, d19, d21}, [r0]!
-	vld3.32	{d16, d18, d20}, [r0]!
-	vld3.32	{d17, d19, d21}, [r0]!
+	vld3.i8 {d12, d13, d14}, [r6], r1
+	vld3.i16 {d11, d12, d13}, [r7], r2
+	vld3.u32 {d2, d3, d4}, [r8], r3
+	vld3.8 {d4, d6, d8}, [r9], r4
+	vld3.u16 {d14, d16, d18}, [r9], r4
+	vld3.i32 {d16, d18, d20}, [r10], r5
 
-@ CHECK: vld3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf4]
-@ CHECK: vld3.16 {d16, d17, d18}, [r0]  @ encoding: [0x4f,0x04,0x60,0xf4]
-@ CHECK: vld3.32 {d16, d17, d18}, [r0]  @ encoding: [0x8f,0x04,0x60,0xf4]
+	vld3.p8 {d6, d7, d8}, [r8]!
+	vld3.16 {d9, d10, d11}, [r7]!
+	vld3.f32 {d1, d2, d3}, [r6]!
+	vld3.8 {d16, d18, d20}, [r0, :64]!
+	vld3.p16 {d20, d22, d24}, [r5]!
+	vld3.32 {d5, d7, d9}, [r4]!
+
+
+@ CHECK: vld3.8	{d16, d17, d18}, [r1]   @ encoding: [0x0f,0x04,0x61,0xf4]
+@ CHECK: vld3.16	{d6, d7, d8}, [r2]      @ encoding: [0x4f,0x64,0x22,0xf4]
+@ CHECK: vld3.32	{d1, d2, d3}, [r3]      @ encoding: [0x8f,0x14,0x23,0xf4]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x60,0xf4]
+@ CHECK: vld3.16	{d27, d29, d31}, [r4]   @ encoding: [0x4f,0xb5,0x64,0xf4]
+@ CHECK: vld3.32	{d6, d8, d10}, [r5]     @ encoding: [0x8f,0x65,0x25,0xf4]
+@ CHECK: vld3.8	{d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x26,0xf4]
+@ CHECK: vld3.16	{d11, d12, d13}, [r7], r2 @ encoding: [0x42,0xb4,0x27,0xf4]
+@ CHECK: vld3.32	{d2, d3, d4}, [r8], r3  @ encoding: [0x83,0x24,0x28,0xf4]
+@ CHECK: vld3.8	{d4, d6, d8}, [r9], r4  @ encoding: [0x04,0x45,0x29,0xf4]
+@ CHECK: vld3.16	{d14, d16, d18}, [r9], r4 @ encoding: [0x44,0xe5,0x29,0xf4]
+@ CHECK: vld3.32	{d16, d18, d20}, [r10], r5 @ encoding: [0x85,0x05,0x6a,0xf4]
+@ CHECK: vld3.8	{d6, d7, d8}, [r8]!     @ encoding: [0x0d,0x64,0x28,0xf4]
+@ CHECK: vld3.16	{d9, d10, d11}, [r7]!   @ encoding: [0x4d,0x94,0x27,0xf4]
+@ CHECK: vld3.32	{d1, d2, d3}, [r6]!     @ encoding: [0x8d,0x14,0x26,0xf4]
 @ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4]
-@ CHECK: vld3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf4]
-@ CHECK: vld3.16 {d16, d18, d20}, [r0]! @ encoding: [0x4d,0x05,0x60,0xf4]
-@ CHECK: vld3.16 {d17, d19, d21}, [r0]! @ encoding: [0x4d,0x15,0x60,0xf4]
-@ CHECK: vld3.32 {d16, d18, d20}, [r0]! @ encoding: [0x8d,0x05,0x60,0xf4]
-@ CHECK: vld3.32 {d17, d19, d21}, [r0]! @ encoding: [0x8d,0x15,0x60,0xf4]
-
-
-	vld4.8	{d16, d17, d18, d19}, [r0, :64]
-	vld4.16	{d16, d17, d18, d19}, [r0, :128]
-	vld4.32	{d16, d17, d18, d19}, [r0, :256]
-	vld4.8	{d16, d18, d20, d22}, [r0, :256]!
-	vld4.8	{d17, d19, d21, d23}, [r0, :256]!
-	vld4.16	{d16, d18, d20, d22}, [r0]!
-	vld4.16	{d17, d19, d21, d23}, [r0]!
-	vld4.32	{d16, d18, d20, d22}, [r0]!
-	vld4.32	{d17, d19, d21, d23}, [r0]!
-
-@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64]@ encoding: [0x1f,0x00,0x60,0xf4]
-@ CHECK: vld4.16 {d16, d17, d18, d19}, [r0,:128]@ encoding:[0x6f,0x00,0x60,0xf4]
-@ CHECK: vld4.32 {d16, d17, d18, d19}, [r0,:256]@ encoding:[0xbf,0x00,0x60,0xf4]
-@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0,:256]!@ encoding:[0x3d,0x01,0x60,0xf4]
-@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0,:256]!@ encoding:[0x3d,0x11,0x60,0xf4]
-@ CHECK: vld4.16 {d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf4]
-@ CHECK: vld4.16 {d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf4]
-@ CHECK: vld4.32 {d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf4]
-@ CHECK: vld4.32 {d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf4]
+@ CHECK: vld3.16	{d20, d22, d24}, [r5]!  @ encoding: [0x4d,0x45,0x65,0xf4]
+@ CHECK: vld3.32	{d5, d7, d9}, [r4]!     @ encoding: [0x8d,0x55,0x24,0xf4]
+
+
+	vld4.8 {d16, d17, d18, d19}, [r1, :64]
+	vld4.16 {d16, d17, d18, d19}, [r2, :128]
+	vld4.32 {d16, d17, d18, d19}, [r3, :256]
+	vld4.8 {d17, d19, d21, d23}, [r5, :256]
+	vld4.16 {d17, d19, d21, d23}, [r7]
+	vld4.32 {d16, d18, d20, d22}, [r8]
+
+	vld4.s8 {d16, d17, d18, d19}, [r1, :64]!
+	vld4.s16 {d16, d17, d18, d19}, [r2, :128]!
+	vld4.s32 {d16, d17, d18, d19}, [r3, :256]!
+	vld4.u8 {d17, d19, d21, d23}, [r5, :256]!
+	vld4.u16 {d17, d19, d21, d23}, [r7]!
+	vld4.u32 {d16, d18, d20, d22}, [r8]!
+
+	vld4.p8 {d16, d17, d18, d19}, [r1, :64], r8
+	vld4.p16 {d16, d17, d18, d19}, [r2], r7
+	vld4.f32 {d16, d17, d18, d19}, [r3, :64], r5
+	vld4.i8 {d16, d18, d20, d22}, [r4, :256], r2
+	vld4.i16 {d16, d18, d20, d22}, [r6], r3
+	vld4.i32 {d17, d19, d21, d23}, [r9], r4
+
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x65,0xf4]
+@ CHECK: vld4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x67,0xf4]
+@ CHECK: vld4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x68,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x65,0xf4]
+@ CHECK: vld4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x67,0xf4]
+@ CHECK: vld4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x68,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x64,0xf4]
+@ CHECK: vld4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x66,0xf4]
+@ CHECK: vld4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x69,0xf4]
+
+
+	vld1.8 {d4[]}, [r1]
+	vld1.8 {d4[]}, [r1]!
+	vld1.8 {d4[]}, [r1], r3
+	vld1.8 {d4[], d5[]}, [r1]
+	vld1.8 {d4[], d5[]}, [r1]!
+	vld1.8 {d4[], d5[]}, [r1], r3
 
+@ CHECK: vld1.8	{d4[]}, [r1]            @ encoding: [0x0f,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[]}, [r1]!           @ encoding: [0x0d,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[]}, [r1], r3        @ encoding: [0x03,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[], d5[]}, [r1]      @ encoding: [0x2f,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[], d5[]}, [r1]!     @ encoding: [0x2d,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[], d5[]}, [r1], r3  @ encoding: [0x23,0x4c,0xa1,0xf4]
 
 	vld1.8	{d16[3]}, [r0]
 	vld1.16	{d16[2]}, [r0, :16]
 	vld1.32	{d16[1]}, [r0, :32]
+        vld1.p8 d12[6], [r2]!
+        vld1.i8 d12[6], [r2], r2
+        vld1.u16 d12[3], [r2]!
+        vld1.16 d12[2], [r2], r2
 
 @ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf4]
 @ CHECK: vld1.16 {d16[2]}, [r0, :16]    @ encoding: [0x9f,0x04,0xe0,0xf4]
 @ CHECK: vld1.32 {d16[1]}, [r0, :32]    @ encoding: [0xbf,0x08,0xe0,0xf4]
+@ CHECK: vld1.8	{d12[6]}, [r2]!         @ encoding: [0xcd,0xc0,0xa2,0xf4]
+@ CHECK: vld1.8	{d12[6]}, [r2], r2      @ encoding: [0xc2,0xc0,0xa2,0xf4]
+@ CHECK: vld1.16 {d12[3]}, [r2]!        @ encoding: [0xcd,0xc4,0xa2,0xf4]
+@ CHECK: vld1.16 {d12[2]}, [r2], r2     @ encoding: [0x82,0xc4,0xa2,0xf4]
 
 
 	vld2.8	{d16[1], d17[1]}, [r0, :16]
@@ -91,35 +273,225 @@
 	vld2.32	{d16[1], d17[1]}, [r0]
 	vld2.16	{d17[1], d19[1]}, [r0]
 	vld2.32	{d17[0], d19[0]}, [r0, :64]
+	vld2.32	{d17[0], d19[0]}, [r0, :64]!
+        vld2.8 {d2[4], d3[4]}, [r2], r3
+        vld2.8 {d2[4], d3[4]}, [r2]!
+        vld2.8 {d2[4], d3[4]}, [r2]
+        vld2.32 {d22[], d23[]}, [r1]
+        vld2.32 {d22[], d24[]}, [r1]
+        vld2.32 {d10[ ],d11[ ]}, [r3]!
+        vld2.32 {d14[ ],d16[ ]}, [r4]!
+        vld2.32 {d22[ ],d23[ ]}, [r5], r4
+        vld2.32 {d22[ ],d24[ ]}, [r6], r4
 
 @ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4]
 @ CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4]
 @ CHECK: vld2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf4]
 @ CHECK: vld2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf4]
 @ CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf4]
+@ CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]! @ encoding: [0x5d,0x19,0xe0,0xf4]
+@ CHECK: vld2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0xa2,0xf4]
+@ CHECK: vld2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0xa2,0xf4]
+@ CHECK: vld2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0xa2,0xf4]
+@ CHECK: vld2.32 {d22[], d23[]}, [r1]    @ encoding: [0x8f,0x6d,0xe1,0xf4]
+@ CHECK: vld2.32 {d22[], d24[]}, [r1]    @ encoding: [0xaf,0x6d,0xe1,0xf4]
+@ CHECK: vld2.32 {d10[], d11[]}, [r3]!   @ encoding: [0x8d,0xad,0xa3,0xf4]
+@ CHECK: vld2.32 {d14[], d16[]}, [r4]!   @ encoding: [0xad,0xed,0xa4,0xf4]
+@ CHECK: vld2.32 {d22[], d23[]}, [r5], r4 @ encoding: [0x84,0x6d,0xe5,0xf4]
+@ CHECK: vld2.32 {d22[], d24[]}, [r6], r4 @ encoding: [0xa4,0x6d,0xe6,0xf4]
+
+
+	vld3.8 {d16[1], d17[1], d18[1]}, [r1]
+	vld3.16 {d6[1], d7[1], d8[1]}, [r2]
+	vld3.32 {d1[1], d2[1], d3[1]}, [r3]
+	vld3.u16 {d27[2], d29[2], d31[2]}, [r4]
+	vld3.i32 {d6[0], d8[0], d10[0]}, [r5]
+
+	vld3.i8 {d12[3], d13[3], d14[3]}, [r6], r1
+	vld3.i16 {d11[2], d12[2], d13[2]}, [r7], r2
+	vld3.u32 {d2[1], d3[1], d4[1]}, [r8], r3
+	vld3.u16 {d14[2], d16[2], d18[2]}, [r9], r4
+	vld3.i32 {d16[0], d18[0], d20[0]}, [r10], r5
+
+	vld3.p8 {d6[6], d7[6], d8[6]}, [r8]!
+	vld3.16 {d9[2], d10[2], d11[2]}, [r7]!
+	vld3.f32 {d1[1], d2[1], d3[1]}, [r6]!
+	vld3.p16 {d20[2], d22[2], d24[2]}, [r5]!
+	vld3.32 {d5[0], d7[0], d9[0]}, [r4]!
+
+@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r1] @ encoding: [0x2f,0x02,0xe1,0xf4]
+@ CHECK: vld3.16 {d6[1], d7[1], d8[1]}, [r2] @ encoding: [0x4f,0x66,0xa2,0xf4]
+@ CHECK: vld3.32 {d1[1], d2[1], d3[1]}, [r3] @ encoding: [0x8f,0x1a,0xa3,0xf4]
+@ CHECK: vld3.16 {d27[2], d29[2], d31[2]}, [r4] @ encoding: [0xaf,0xb6,0xe4,0xf4]
+@ CHECK: vld3.32 {d6[0], d8[0], d10[0]}, [r5] @ encoding: [0x4f,0x6a,0xa5,0xf4]
+@ CHECK: vld3.8	{d12[3], d13[3], d14[3]}, [r6], r1 @ encoding: [0x61,0xc2,0xa6,0xf4]
+@ CHECK: vld3.16 {d11[2], d12[2], d13[2]}, [r7], r2 @ encoding: [0x82,0xb6,0xa7,0xf4]
+@ CHECK: vld3.32 {d2[1], d3[1], d4[1]}, [r8], r3 @ encoding: [0x83,0x2a,0xa8,0xf4]
+@ CHECK: vld3.16 {d14[2], d16[2], d18[2]}, [r9], r4 @ encoding: [0xa4,0xe6,0xa9,0xf4]
+@ CHECK: vld3.32 {d16[0], d18[0], d20[0]}, [r10], r5 @ encoding: [0x45,0x0a,0xea,0xf4]
+@ CHECK: vld3.8	{d6[6], d7[6], d8[6]}, [r8]! @ encoding: [0xcd,0x62,0xa8,0xf4]
+@ CHECK: vld3.16 {d9[2], d10[2], d11[2]}, [r7]! @ encoding: [0x8d,0x96,0xa7,0xf4]
+@ CHECK: vld3.32 {d1[1], d2[1], d3[1]}, [r6]! @ encoding: [0x8d,0x1a,0xa6,0xf4]
+@ CHECK: vld3.16 {d20[2], d21[2], d22[2]}, [r5]! @ encoding: [0xad,0x46,0xe5,0xf4]
+@ CHECK: vld3.32 {d5[0], d7[0], d9[0]}, [r4]! @ encoding: [0x4d,0x5a,0xa4,0xf4]
+
+
+	vld3.8 {d16[], d17[], d18[]}, [r1]
+	vld3.16 {d16[], d17[], d18[]}, [r2]
+	vld3.32 {d16[], d17[], d18[]}, [r3]
+	vld3.8 {d17[], d19[], d21[]}, [r7]
+	vld3.16 {d17[], d19[], d21[]}, [r7]
+	vld3.32 {d16[], d18[], d20[]}, [r8]
+
+	vld3.s8 {d16[], d17[], d18[]}, [r1]!
+	vld3.s16 {d16[], d17[], d18[]}, [r2]!
+	vld3.s32 {d16[], d17[], d18[]}, [r3]!
+	vld3.u8 {d17[], d19[], d21[]}, [r7]!
+	vld3.u16 {d17[], d19[], d21[]}, [r7]!
+	vld3.u32 {d16[], d18[], d20[]}, [r8]!
+
+	vld3.p8 {d16[], d17[], d18[]}, [r1], r8
+	vld3.p16 {d16[], d17[], d18[]}, [r2], r7
+	vld3.f32 {d16[], d17[], d18[]}, [r3], r5
+	vld3.i8 {d16[], d18[], d20[]}, [r6], r3
+	vld3.i16 {d16[], d18[], d20[]}, [r6], r3
+	vld3.i32 {d17[], d19[], d21[]}, [r9], r4
+
+@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1] @ encoding: [0x0f,0x0e,0xe1,0xf4]
+@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2] @ encoding: [0x4f,0x0e,0xe2,0xf4]
+@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3] @ encoding: [0x8f,0x0e,0xe3,0xf4]
+@ CHECK: vld3.8 {d17[], d19[], d21[]}, [r7] @ encoding: [0x2f,0x1e,0xe7,0xf4]
+@ CHECK: vld3.16 {d17[], d19[], d21[]}, [r7] @ encoding: [0x6f,0x1e,0xe7,0xf4]
+@ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8] @ encoding: [0xaf,0x0e,0xe8,0xf4]
+@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1]! @ encoding: [0x0d,0x0e,0xe1,0xf4]
+@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2]! @ encoding: [0x4d,0x0e,0xe2,0xf4]
+@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3]! @ encoding: [0x8d,0x0e,0xe3,0xf4]
+@ CHECK: vld3.8 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x2d,0x1e,0xe7,0xf4]
+@ CHECK: vld3.16 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x6d,0x1e,0xe7,0xf4]
+@ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8]! @ encoding: [0xad,0x0e,0xe8,0xf4]
+@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1], r8 @ encoding: [0x08,0x0e,0xe1,0xf4]
+@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2], r7 @ encoding: [0x47,0x0e,0xe2,0xf4]
+@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3], r5 @ encoding: [0x85,0x0e,0xe3,0xf4]
+@ CHECK: vld3.8 {d16[], d18[], d20[]}, [r6], r3 @ encoding: [0x23,0x0e,0xe6,0xf4]
+@ CHECK: vld3.16 {d16[], d18[], d20[]}, [r6], r3 @ encoding: [0x63,0x0e,0xe6,0xf4]
+@ CHECK: vld3.32 {d17[], d19[], d21[]}, [r9], r4 @ encoding: [0xa4,0x1e,0xe9,0xf4]
+
+
+	vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1]
+	vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2]
+	vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3]
+	vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7]
+	vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]
+
+	vld4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	vld4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]!
+	vld4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]!
+	vld4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]!
+	vld4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]!
+
+	vld4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8
+	vld4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7
+	vld4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5
+	vld4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3
+	vld4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4
+
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1] @ encoding: [0x2f,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] @ encoding: [0x4f,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xe3,0xf4]
+@ CHECK: vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xe8,0xf4]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xe3,0xf4]
+@ CHECK: vld4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xe8,0xf4]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xe3,0xf4]
+@ CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xe6,0xf4]
+@ CHECK: vld4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xe9,0xf4]
+
+
+	vld4.8 {d16[], d17[], d18[], d19[]}, [r1]
+	vld4.16 {d16[], d17[], d18[], d19[]}, [r2]
+	vld4.32 {d16[], d17[], d18[], d19[]}, [r3]
+	vld4.8 {d17[], d19[], d21[], d23[]}, [r7]
+	vld4.16 {d17[], d19[], d21[], d23[]}, [r7]
+	vld4.32 {d16[], d18[], d20[], d22[]}, [r8]
+
+	vld4.s8 {d16[], d17[], d18[], d19[]}, [r1]!
+	vld4.s16 {d16[], d17[], d18[], d19[]}, [r2]!
+	vld4.s32 {d16[], d17[], d18[], d19[]}, [r3]!
+	vld4.u8 {d17[], d19[], d21[], d23[]}, [r7]!
+	vld4.u16 {d17[], d19[], d21[], d23[]}, [r7]!
+	vld4.u32 {d16[], d18[], d20[], d22[]}, [r8]!
+
+	vld4.p8 {d16[], d17[], d18[], d19[]}, [r1], r8
+	vld4.p16 {d16[], d17[], d18[], d19[]}, [r2], r7
+	vld4.f32 {d16[], d17[], d18[], d19[]}, [r3], r5
+	vld4.i8 {d16[], d18[], d20[], d22[]}, [r6], r3
+	vld4.i16 {d16[], d18[], d20[], d22[]}, [r6], r3
+	vld4.i32 {d17[], d19[], d21[], d23[]}, [r9], r4
+
+@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1] @ encoding: [0x0f,0x0f,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2] @ encoding: [0x4f,0x0f,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3] @ encoding: [0x8f,0x0f,0xe3,0xf4]
+@ CHECK: vld4.8 {d17[], d19[], d21[], d23[]}, [r7] @ encoding: [0x2f,0x1f,0xe7,0xf4]
+@ CHECK: vld4.16 {d17[], d19[], d21[], d23[]}, [r7] @ encoding: [0x6f,0x1f,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r8] @ encoding: [0xaf,0x0f,0xe8,0xf4]
+@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1]! @ encoding: [0x0d,0x0f,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2]! @ encoding: [0x4d,0x0f,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3]! @ encoding: [0x8d,0x0f,0xe3,0xf4]
+@ CHECK: vld4.8 {d17[], d18[], d19[], d20[]}, [r7]! @ encoding: [0x2d,0x1f,0xe7,0xf4]
+@ CHECK: vld4.16 {d17[], d18[], d19[], d20[]}, [r7]! @ encoding: [0x6d,0x1f,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r8]! @ encoding: [0xad,0x0f,0xe8,0xf4]
+@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1], r8 @ encoding: [0x08,0x0f,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2], r7 @ encoding: [0x47,0x0f,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3], r5 @ encoding: [0x85,0x0f,0xe3,0xf4]
+@ CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r6], r3 @ encoding: [0x23,0x0f,0xe6,0xf4]
+@ CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r6], r3 @ encoding: [0x63,0x0f,0xe6,0xf4]
+@ CHECK: vld4.32 {d17[], d19[], d21[], d23[]}, [r9], r4 @ encoding: [0xa4,0x1f,0xe9,0xf4]
+
+@ Handle 'Q' registers in register lists as if the sub-reg D regs were
+@ specified instead.
+	vld1.8 {q3}, [r9]
+	vld1.8 {q3, q4}, [r9]
+
+@ CHECK: vld1.8	{d6, d7}, [r9]          @ encoding: [0x0f,0x6a,0x29,0xf4]
+@ CHECK: vld1.8	{d6, d7, d8, d9}, [r9]  @ encoding: [0x0f,0x62,0x29,0xf4]
+
+
+@ Spot-check additional size-suffix aliases.
+        vld1.8 {d2}, [r2]
+        vld1.p8 {d2}, [r2]
+        vld1.u8 {d2}, [r2]
+
+        vld1.8 {q2}, [r2]
+        vld1.p8 {q2}, [r2]
+        vld1.u8 {q2}, [r2]
+        vld1.f32 {q2}, [r2]
+
+        vld1.u8 {d2, d3, d4}, [r2]
+        vld1.i32 {d2, d3, d4}, [r2]
+        vld1.f64 {d2, d3, d4}, [r2]
 
+@ CHECK: vld1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x22,0xf4]
+@ CHECK: vld1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x22,0xf4]
+@ CHECK: vld1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x22,0xf4]
 
-	vld3.8	{d16[1], d17[1], d18[1]}, [r0]
-	vld3.16	{d16[1], d17[1], d18[1]}, [r0]
-	vld3.32	{d16[1], d17[1], d18[1]}, [r0]
-	vld3.16	{d16[1], d18[1], d20[1]}, [r0]
-	vld3.32	{d17[1], d19[1], d21[1]}, [r0]
+@ CHECK: vld1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x22,0xf4]
+@ CHECK: vld1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x22,0xf4]
+@ CHECK: vld1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x22,0xf4]
+@ CHECK: vld1.32 {d4, d5}, [r2]         @ encoding: [0x8f,0x4a,0x22,0xf4]
 
-@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf4]
-@ CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x4f,0x06,0xe0,0xf4]
-@ CHECK: vld3.32 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x8f,0x0a,0xe0,0xf4]
-@ CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]@ encoding: [0x6f,0x06,0xe0,0xf4]
-@ CHECK: vld3.32 {d17[1], d19[1], d21[1]}, [r0]@ encoding: [0xcf,0x1a,0xe0,0xf4]
+@ CHECK: vld1.8	{d2, d3, d4}, [r2]      @ encoding: [0x0f,0x26,0x22,0xf4]
+@ CHECK: vld1.32 {d2, d3, d4}, [r2]     @ encoding: [0x8f,0x26,0x22,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4}, [r2]     @ encoding: [0xcf,0x26,0x22,0xf4]
 
 
-	vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
-	vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-	vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
-	vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
-	vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
+@ Register lists can use the range syntax, just like VLDM
+	vld1.f64 {d2-d5}, [r2,:128]!
+	vld1.f64 {d2,d3,d4,d5}, [r2,:128]!
 
-@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf4]
-@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf4]
-@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf4]
-@ CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf4]
-@ CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2, :128]! @ encoding: [0xed,0x22,0x22,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2, :128]! @ encoding: [0xed,0x22,0x22,0xf4]
diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s
index c595aa2..f5feca4 100644
--- a/test/MC/ARM/neon-vst-encoding.s
+++ b/test/MC/ARM/neon-vst-encoding.s
@@ -1,101 +1,278 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
-@ XFAIL: *
+
+	vst1.8	{d16}, [r0, :64]
+	vst1.16	{d16}, [r0]
+	vst1.32	{d16}, [r0]
+	vst1.64	{d16}, [r0]
+	vst1.8	{d16, d17}, [r0, :64]
+	vst1.16	{d16, d17}, [r0, :128]
+	vst1.32	{d16, d17}, [r0]
+	vst1.64	{d16, d17}, [r0]
+        vst1.8  {d16, d17, d18}, [r0, :64]
+        vst1.8  {d16, d17, d18}, [r0, :64]!
+        vst1.8  {d16, d17, d18}, [r0], r3
+        vst1.8  {d16, d17, d18, d19}, [r0, :64]
+        vst1.16  {d16, d17, d18, d19}, [r1, :64]!
+        vst1.64  {d16, d17, d18, d19}, [r3], r2
 
 @ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf4]
-  vst1.8	{d16}, [r0, :64]
-@ CHECK: vst1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x40,0xf4]
-  vst1.16	{d16}, [r0]
-@ CHECK: vst1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x40,0xf4]
-  vst1.32	{d16}, [r0]
-@ CHECK: vst1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x40,0xf4]
-  vst1.64	{d16}, [r0]
+@ CHECK: vst1.16 {d16}, [r0]            @ encoding: [0x4f,0x07,0x40,0xf4]
+@ CHECK: vst1.32 {d16}, [r0]            @ encoding: [0x8f,0x07,0x40,0xf4]
+@ CHECK: vst1.64 {d16}, [r0]            @ encoding: [0xcf,0x07,0x40,0xf4]
 @ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf4]
-  vst1.8	{d16, d17}, [r0, :64]
-@ CHECK: vst1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x40,0xf4]
-  vst1.16	{d16, d17}, [r0, :128]
-@ CHECK: vst1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x40,0xf4]
-  vst1.32	{d16, d17}, [r0]
-@ CHECK: vst1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x40,0xf4]
-  vst1.64	{d16, d17}, [r0]
+@ CHECK: vst1.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x40,0xf4]
+@ CHECK: vst1.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x0a,0x40,0xf4]
+@ CHECK: vst1.64 {d16, d17}, [r0]       @ encoding: [0xcf,0x0a,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0, :64]! @ encoding: [0x1d,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0], r3 @ encoding: [0x03,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x02,0x40,0xf4]
+@ CHECK: vst1.16 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x5d,0x02,0x41,0xf4]
+@ CHECK: vst1.64 {d16, d17, d18, d19}, [r3], r2 @ encoding: [0xc2,0x02,0x43,0xf4]
+
+
+	vst2.8	{d16, d17}, [r0, :64]
+	vst2.16	{d16, d17}, [r0, :128]
+	vst2.32	{d16, d17}, [r0]
+	vst2.8	{d16, d17, d18, d19}, [r0, :64]
+	vst2.16	{d16, d17, d18, d19}, [r0, :128]
+	vst2.32	{d16, d17, d18, d19}, [r0, :256]
+	vst2.8	{d16, d17}, [r0, :64]!
+	vst2.16	{q15}, [r0, :128]!
+	vst2.32	{d14, d15}, [r0]!
+	vst2.8	{d16, d17, d18, d19}, [r0, :64]!
+	vst2.16	{d18-d21}, [r0, :128]!
+	vst2.32	{q4, q5}, [r0, :256]!
 
 @ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf4]
-  vst2.8	{d16, d17}, [r0, :64]
-@ CHECK: vst2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x40,0xf4]
-  vst2.16	{d16, d17}, [r0, :128]
-@ CHECK: vst2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x40,0xf4]
-  vst2.32	{d16, d17}, [r0]
+@ CHECK: vst2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x40,0xf4]
+@ CHECK: vst2.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x08,0x40,0xf4]
 @ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf4]
-  vst2.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf4]
-  vst2.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf4]
-  vst2.32	{d16, d17, d18, d19}, [r0, :256]
-
-@ CHECK: vst3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x40,0xf4]
-  vst3.8	{d16, d17, d18}, [r0, :64]
-@ CHECK: vst3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x40,0xf4]
-  vst3.16	{d16, d17, d18}, [r0]
-@ CHECK: vst3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x40,0xf4]
-  vst3.32	{d16, d17, d18}, [r0]
+@ CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf4]
+@ CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf4]
+@ CHECK: vst2.8	{d16, d17}, [r0, :64]!  @ encoding: [0x1d,0x08,0x40,0xf4]
+@ CHECK: vst2.16	{d30, d31}, [r0, :128]! @ encoding: [0x6d,0xe8,0x40,0xf4]
+@ CHECK: vst2.32	{d14, d15}, [r0]!       @ encoding: [0x8d,0xe8,0x00,0xf4]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64]! @ encoding: [0x1d,0x03,0x40,0xf4]
+@ CHECK: vst2.16	{d18, d19, d20, d21}, [r0, :128]! @ encoding: [0x6d,0x23,0x40,0xf4]
+@ CHECK: vst2.32	{d8, d9, d10, d11}, [r0, :256]! @ encoding: [0xbd,0x83,0x00,0xf4]
+
+
+	vst3.8 {d16, d17, d18}, [r1]
+	vst3.16 {d6, d7, d8}, [r2]
+	vst3.32 {d1, d2, d3}, [r3]
+	vst3.8 {d16, d18, d20}, [r0, :64]
+	vst3.u16 {d27, d29, d31}, [r4]
+	vst3.i32 {d6, d8, d10}, [r5]
+
+	vst3.i8 {d12, d13, d14}, [r6], r1
+	vst3.i16 {d11, d12, d13}, [r7], r2
+	vst3.u32 {d2, d3, d4}, [r8], r3
+	vst3.8 {d4, d6, d8}, [r9], r4
+	vst3.u16 {d14, d16, d18}, [r9], r4
+	vst3.i32 {d16, d18, d20}, [r10], r5
+
+	vst3.p8 {d6, d7, d8}, [r8]!
+	vst3.16 {d9, d10, d11}, [r7]!
+	vst3.f32 {d1, d2, d3}, [r6]!
+	vst3.8 {d16, d18, d20}, [r0, :64]!
+	vst3.p16 {d20, d22, d24}, [r5]!
+	vst3.32 {d5, d7, d9}, [r4]!
+
+@ CHECK: vst3.8	{d16, d17, d18}, [r1]   @ encoding: [0x0f,0x04,0x41,0xf4]
+@ CHECK: vst3.16	{d6, d7, d8}, [r2]      @ encoding: [0x4f,0x64,0x02,0xf4]
+@ CHECK: vst3.32	{d1, d2, d3}, [r3]      @ encoding: [0x8f,0x14,0x03,0xf4]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x40,0xf4]
+@ CHECK: vst3.16	{d27, d29, d31}, [r4]   @ encoding: [0x4f,0xb5,0x44,0xf4]
+@ CHECK: vst3.32	{d6, d8, d10}, [r5]     @ encoding: [0x8f,0x65,0x05,0xf4]
+@ CHECK: vst3.8	{d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x06,0xf4]
+@ CHECK: vst3.16	{d11, d12, d13}, [r7], r2 @ encoding: [0x42,0xb4,0x07,0xf4]
+@ CHECK: vst3.32	{d2, d3, d4}, [r8], r3  @ encoding: [0x83,0x24,0x08,0xf4]
+@ CHECK: vst3.8	{d4, d6, d8}, [r9], r4  @ encoding: [0x04,0x45,0x09,0xf4]
+@ CHECK: vst3.16	{d14, d16, d18}, [r9], r4 @ encoding: [0x44,0xe5,0x09,0xf4]
+@ CHECK: vst3.32	{d16, d18, d20}, [r10], r5 @ encoding: [0x85,0x05,0x4a,0xf4]
+@ CHECK: vst3.8	{d6, d7, d8}, [r8]!     @ encoding: [0x0d,0x64,0x08,0xf4]
+@ CHECK: vst3.16	{d9, d10, d11}, [r7]!   @ encoding: [0x4d,0x94,0x07,0xf4]
+@ CHECK: vst3.32	{d1, d2, d3}, [r6]!     @ encoding: [0x8d,0x14,0x06,0xf4]
 @ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf4]
-  vst3.8	{d16, d18, d20}, [r0, :64]!
-@ CHECK: vst3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x40,0xf4]
-  vst3.8	{d17, d19, d21}, [r0, :64]!
-@ CHECK: vst3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x40,0xf4]
-  vst3.16	{d16, d18, d20}, [r0]!
-@ CHECK: vst3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x40,0xf4]
-  vst3.16	{d17, d19, d21}, [r0]!
-@ CHECK: vst3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x40,0xf4]
-  vst3.32	{d16, d18, d20}, [r0]!
-@ CHECK: vst3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x40,0xf4]
-  vst3.32	{d17, d19, d21}, [r0]!
-
-@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x40,0xf4]
-  vst4.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x40,0xf4]
-  vst4.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x40,0xf4]
-  vst4.8	{d16, d18, d20, d22}, [r0, :256]!
-@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x40,0xf4]
-  vst4.8	{d17, d19, d21, d23}, [r0, :256]!
-@ CHECK: vst4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf4]
-  vst4.16	{d16, d18, d20, d22}, [r0]!
-@ CHECK: vst4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf4]
-  vst4.16	{d17, d19, d21, d23}, [r0]!
-@ CHECK: vst4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x40,0xf4]
-  vst4.32	{d16, d18, d20, d22}, [r0]!
-@ CHECK: vst4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf4]
-  vst4.32	{d17, d19, d21, d23}, [r0]!
+@ CHECK: vst3.16	{d20, d22, d24}, [r5]!  @ encoding: [0x4d,0x45,0x45,0xf4]
+@ CHECK: vst3.32	{d5, d7, d9}, [r4]!     @ encoding: [0x8d,0x55,0x04,0xf4]
+
+
+	vst4.8 {d16, d17, d18, d19}, [r1, :64]
+	vst4.16 {d16, d17, d18, d19}, [r2, :128]
+	vst4.32 {d16, d17, d18, d19}, [r3, :256]
+	vst4.8 {d17, d19, d21, d23}, [r5, :256]
+	vst4.16 {d17, d19, d21, d23}, [r7]
+	vst4.32 {d16, d18, d20, d22}, [r8]
+
+	vst4.s8 {d16, d17, d18, d19}, [r1, :64]!
+	vst4.s16 {d16, d17, d18, d19}, [r2, :128]!
+	vst4.s32 {d16, d17, d18, d19}, [r3, :256]!
+	vst4.u8 {d17, d19, d21, d23}, [r5, :256]!
+	vst4.u16 {d17, d19, d21, d23}, [r7]!
+	vst4.u32 {d16, d18, d20, d22}, [r8]!
+
+	vst4.p8 {d16, d17, d18, d19}, [r1, :64], r8
+	vst4.p16 {d16, d17, d18, d19}, [r2], r7
+	vst4.f32 {d16, d17, d18, d19}, [r3, :64], r5
+	vst4.i8 {d16, d18, d20, d22}, [r4, :256], r2
+	vst4.i16 {d16, d18, d20, d22}, [r6], r3
+	vst4.i32 {d17, d19, d21, d23}, [r9], r4
+
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x45,0xf4]
+@ CHECK: vst4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x47,0xf4]
+@ CHECK: vst4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x48,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x45,0xf4]
+@ CHECK: vst4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x47,0xf4]
+@ CHECK: vst4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x48,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x44,0xf4]
+@ CHECK: vst4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x46,0xf4]
+@ CHECK: vst4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x49,0xf4]
+
+
+	vst2.8	{d16[1], d17[1]}, [r0, :16]
+	vst2.p16	{d16[1], d17[1]}, [r0, :32]
+	vst2.i32	{d16[1], d17[1]}, [r0]
+	vst2.u16	{d17[1], d19[1]}, [r0]
+	vst2.f32	{d17[0], d19[0]}, [r0, :64]
+
+        vst2.8 {d2[4], d3[4]}, [r2], r3
+        vst2.u8 {d2[4], d3[4]}, [r2]!
+        vst2.p8 {d2[4], d3[4]}, [r2]
+
+        vst2.16 {d17[1], d19[1]}, [r0]
+        vst2.32 {d17[0], d19[0]}, [r0, :64]
+        vst2.i16 {d7[1], d9[1]}, [r1]!
+        vst2.32 {d6[0], d8[0]}, [r2, :64]!
+        vst2.16 {d2[1], d4[1]}, [r3], r5
+        vst2.u32 {d5[0], d7[0]}, [r4, :64], r7
 
 @ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4]
-  vst2.8	{d16[1], d17[1]}, [r0, :16]
-@ CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
-  vst2.16	{d16[1], d17[1]}, [r0, :32]
-@ CHECK: vst2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
-  vst2.32	{d16[1], d17[1]}, [r0]
-@ CHECK: vst2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
-  vst2.16	{d17[1], d19[1]}, [r0]
-@ CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
-  vst2.32	{d17[0], d19[0]}, [r0, :64]
-
-@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf4]
-  vst3.8	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vst3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xc0,0xf4]
-  vst3.16	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vst3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xc0,0xf4]
-  vst3.32	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vst3.16	{d17[2], d19[2], d21[2]}, [r0] @ encoding: [0xaf,0x16,0xc0,0xf4]
-  vst3.16	{d17[2], d19[2], d21[2]}, [r0]
-@ CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0] @ encoding: [0x4f,0x0a,0xc0,0xf4]
-  vst3.32	{d16[0], d18[0], d20[0]}, [r0]
-
-@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf4]
-  vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
-@ CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf4]
-  vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf4]
-  vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
-@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf4]
-  vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
-@ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf4]
-  vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
+@ CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
+@ CHECK: vst2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
+@ CHECK: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
+@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+
+@ CHECK: vst2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0x82,0xf4]
+@ CHECK: vst2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0x82,0xf4]
+@ CHECK: vst2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0x82,0xf4]
+
+@ CHECK: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
+@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+@ CHECK: vst2.16 {d7[1], d9[1]}, [r1]!   @ encoding: [0x6d,0x75,0x81,0xf4]
+@ CHECK: vst2.32 {d6[0], d8[0]}, [r2, :64]! @ encoding: [0x5d,0x69,0x82,0xf4]
+@ CHECK: vst2.16 {d2[1], d4[1]}, [r3], r5 @ encoding: [0x65,0x25,0x83,0xf4]
+@ CHECK: vst2.32 {d5[0], d7[0]}, [r4, :64], r7 @ encoding: [0x57,0x59,0x84,0xf4]
+
+
+	vst3.8 {d16[1], d17[1], d18[1]}, [r1]
+	vst3.16 {d6[1], d7[1], d8[1]}, [r2]
+	vst3.32 {d1[1], d2[1], d3[1]}, [r3]
+	vst3.u16 {d27[1], d29[1], d31[1]}, [r4]
+	vst3.i32 {d6[1], d8[1], d10[1]}, [r5]
+
+	vst3.i8 {d12[1], d13[1], d14[1]}, [r6], r1
+	vst3.i16 {d11[1], d12[1], d13[1]}, [r7], r2
+	vst3.u32 {d2[1], d3[1], d4[1]}, [r8], r3
+	vst3.u16 {d14[1], d16[1], d18[1]}, [r9], r4
+	vst3.i32 {d16[1], d18[1], d20[1]}, [r10], r5
+
+	vst3.p8 {d6[1], d7[1], d8[1]}, [r8]!
+	vst3.16 {d9[1], d10[1], d11[1]}, [r7]!
+	vst3.f32 {d1[1], d2[1], d3[1]}, [r6]!
+	vst3.p16 {d20[1], d22[1], d24[1]}, [r5]!
+	vst3.32 {d5[1], d7[1], d9[1]}, [r4]!
+
+@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r1] @ encoding: [0x2f,0x02,0xc1,0xf4]
+@ CHECK: vst3.16	{d6[1], d7[1], d8[1]}, [r2] @ encoding: [0x4f,0x66,0x82,0xf4]
+@ CHECK: vst3.32	{d1[1], d2[1], d3[1]}, [r3] @ encoding: [0x8f,0x1a,0x83,0xf4]
+@ CHECK: vst3.16	{d27[1], d29[1], d31[1]}, [r4] @ encoding: [0x6f,0xb6,0xc4,0xf4]
+@ CHECK: vst3.32	{d6[1], d8[1], d10[1]}, [r5] @ encoding: [0xcf,0x6a,0x85,0xf4]
+@ CHECK: vst3.8	{d12[1], d13[1], d14[1]}, [r6], r1 @ encoding: [0x21,0xc2,0x86,0xf4]
+@ CHECK: vst3.16	{d11[1], d12[1], d13[1]}, [r7], r2 @ encoding: [0x42,0xb6,0x87,0xf4]
+@ CHECK: vst3.32	{d2[1], d3[1], d4[1]}, [r8], r3 @ encoding: [0x83,0x2a,0x88,0xf4]
+@ CHECK: vst3.16	{d14[1], d16[1], d18[1]}, [r9], r4 @ encoding: [0x64,0xe6,0x89,0xf4]
+@ CHECK: vst3.32	{d16[1], d18[1], d20[1]}, [r10], r5 @ encoding: [0xc5,0x0a,0xca,0xf4]
+@ CHECK: vst3.8	{d6[1], d7[1], d8[1]}, [r8]! @ encoding: [0x2d,0x62,0x88,0xf4]
+@ CHECK: vst3.16	{d9[1], d10[1], d11[1]}, [r7]! @ encoding: [0x4d,0x96,0x87,0xf4]
+@ CHECK: vst3.32	{d1[1], d2[1], d3[1]}, [r6]! @ encoding: [0x8d,0x1a,0x86,0xf4]
+@ CHECK: vst3.16	{d20[1], d21[1], d22[1]}, [r5]! @ encoding: [0x6d,0x46,0xc5,0xf4]
+@ CHECK: vst3.32	{d5[1], d7[1], d9[1]}, [r4]! @ encoding: [0xcd,0x5a,0x84,0xf4]
+
+
+	vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1]
+	vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2]
+	vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3]
+	vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7]
+	vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]
+
+	vst4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	vst4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]!
+	vst4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]!
+	vst4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]!
+	vst4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]!
+
+	vst4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8
+	vst4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7
+	vst4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5
+	vst4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3
+	vst4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4
+
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1] @ encoding: [0x2f,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] @ encoding: [0x4f,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xc3,0xf4]
+@ CHECK: vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xc7,0xf4]
+@ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xc8,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xc3,0xf4]
+@ CHECK: vst4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xc7,0xf4]
+@ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xc8,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xc3,0xf4]
+@ CHECK: vst4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xc6,0xf4]
+@ CHECK: vst4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xc9,0xf4]
+
+
+@ Spot-check additional size-suffix aliases.
+
+        vst1.8 {d2}, [r2]
+        vst1.p8 {d2}, [r2]
+        vst1.u8 {d2}, [r2]
+
+        vst1.8 {q2}, [r2]
+        vst1.p8 {q2}, [r2]
+        vst1.u8 {q2}, [r2]
+        vst1.f32 {q2}, [r2]
+
+@ CHECK: vst1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x02,0xf4]
+@ CHECK: vst1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x02,0xf4]
+@ CHECK: vst1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x02,0xf4]
+
+@ CHECK: vst1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x02,0xf4]
+@ CHECK: vst1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x02,0xf4]
+@ CHECK: vst1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x02,0xf4]
+@ CHECK: vst1.32 {d4, d5}, [r2]         @ encoding: [0x8f,0x4a,0x02,0xf4]
+
+@ rdar://11082188
+        vst2.8 {d8, d10}, [r4]
+@ CHECK: vst2.8	{d8, d10}, [r4]         @ encoding: [0x0f,0x89,0x04,0xf4]
+
+        vst1.32 {d9[1]}, [r3, :32]
+        vst1.32 {d27[1]}, [r9, :32]!
+        vst1.32 {d27[1]}, [r3, :32], r5
+@ CHECK: vst1.32	{d9[1]}, [r3, :32]       @ encoding: [0xbf,0x98,0x83,0xf4]
+@ CHECK: vst1.32	{d27[1]}, [r9, :32]!     @ encoding: [0xbd,0xb8,0xc9,0xf4]
+@ CHECK: vst1.32	{d27[1]}, [r3, :32], r5  @ encoding: [0xb5,0xb8,0xc3,0xf4]
+
diff --git a/test/MC/ARM/neon-vswp.s b/test/MC/ARM/neon-vswp.s
new file mode 100644
index 0000000..2138eed
--- /dev/null
+++ b/test/MC/ARM/neon-vswp.s
@@ -0,0 +1,7 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
+
+vswp d1, d2
+vswp q1, q2
+
+@ CHECK: vswp	d1, d2                  @ encoding: [0x02,0x10,0xb2,0xf3]
+@ CHECK: vswp	q1, q2                  @ encoding: [0x44,0x20,0xb2,0xf3]
diff --git a/test/MC/ARM/neont2-minmax-encoding.s b/test/MC/ARM/neont2-minmax-encoding.s
index 7e86d45..9ecadce 100644
--- a/test/MC/ARM/neont2-minmax-encoding.s
+++ b/test/MC/ARM/neont2-minmax-encoding.s
@@ -2,59 +2,125 @@
 
 .code 16
 
-@ CHECK: vmin.s8	d16, d16, d17           @ encoding: [0x40,0xef,0xb1,0x06]
-	vmin.s8	d16, d16, d17
-@ CHECK: vmin.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x06]
-	vmin.s16	d16, d16, d17
-@ CHECK: vmin.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x06]
-	vmin.s32	d16, d16, d17
-@ CHECK: vmin.u8	d16, d16, d17           @ encoding: [0x40,0xff,0xb1,0x06]
-	vmin.u8	d16, d16, d17
-@ CHECK: vmin.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xb1,0x06]
-	vmin.u16	d16, d16, d17
-@ CHECK: vmin.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xb1,0x06]
-	vmin.u32	d16, d16, d17
-@ CHECK: vmin.f32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0f]
-	vmin.f32	d16, d16, d17
-@ CHECK: vmin.s8	q8, q8, q9              @ encoding: [0x40,0xef,0xf2,0x06]
-	vmin.s8	q8, q8, q9
-@ CHECK: vmin.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x06]
-	vmin.s16	q8, q8, q9
-@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x06]
-	vmin.s32	q8, q8, q9
-@ CHECK: vmin.u8	q8, q8, q9              @ encoding: [0x40,0xff,0xf2,0x06]
-	vmin.u8	q8, q8, q9
-@ CHECK: vmin.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xf2,0x06]
-	vmin.u16	q8, q8, q9
-@ CHECK: vmin.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xf2,0x06]
-	vmin.u32	q8, q8, q9
-@ CHECK: vmin.f32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0f]
-	vmin.f32	q8, q8, q9
-@ CHECK: vmax.s8	d16, d16, d17           @ encoding: [0x40,0xef,0xa1,0x06]
-	vmax.s8	d16, d16, d17
-@ CHECK: vmax.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x06]
-	vmax.s16	d16, d16, d17
-@ CHECK: vmax.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x06]
-	vmax.s32	d16, d16, d17
-@ CHECK: vmax.u8	d16, d16, d17           @ encoding: [0x40,0xff,0xa1,0x06]
-	vmax.u8	d16, d16, d17
-@ CHECK: vmax.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x06]
-	vmax.u16	d16, d16, d17
-@ CHECK: vmax.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x06]
-	vmax.u32	d16, d16, d17
-@ CHECK: vmax.f32	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x0f]
-	vmax.f32	d16, d16, d17
-@ CHECK: vmax.s8	q8, q8, q9              @ encoding: [0x40,0xef,0xe2,0x06]
-	vmax.s8	q8, q8, q9
-@ CHECK: vmax.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x06]
-	vmax.s16	q8, q8, q9
+        vmax.s8 d1, d2, d3
+        vmax.s16 d4, d5, d6
+        vmax.s32 d7, d8, d9
+        vmax.u8 d10, d11, d12
+        vmax.u16 d13, d14, d15
+        vmax.u32 d16, d17, d18
+        vmax.f32 d19, d20, d21
+
+        vmax.s8 d2, d3
+        vmax.s16 d5, d6
+        vmax.s32 d8, d9
+        vmax.u8 d11, d12
+        vmax.u16 d14, d15
+        vmax.u32 d17, d18
+        vmax.f32 d20, d21
+
+        vmax.s8 q1, q2, q3
+        vmax.s16 q4, q5, q6
+        vmax.s32 q7, q8, q9
+        vmax.u8 q10, q11, q12
+        vmax.u16 q13, q14, q15
+        vmax.u32 q6, q7, q8
+        vmax.f32 q9, q5, q1
+
+        vmax.s8 q2, q3
+        vmax.s16 q5, q6
+        vmax.s32 q8, q9
+        vmax.u8 q11, q2
+        vmax.u16 q4, q5
+        vmax.u32 q7, q8
+        vmax.f32 q2, q1
+
+@ CHECK: vmax.s8	d1, d2, d3      @ encoding: [0x02,0xef,0x03,0x16]
+@ CHECK: vmax.s16	d4, d5, d6      @ encoding: [0x15,0xef,0x06,0x46]
+@ CHECK: vmax.s32	d7, d8, d9      @ encoding: [0x28,0xef,0x09,0x76]
+@ CHECK: vmax.u8	d10, d11, d12   @ encoding: [0x0b,0xff,0x0c,0xa6]
+@ CHECK: vmax.u16	d13, d14, d15   @ encoding: [0x1e,0xff,0x0f,0xd6]
+@ CHECK: vmax.u32	d16, d17, d18   @ encoding: [0x61,0xff,0xa2,0x06]
+@ CHECK: vmax.f32	d19, d20, d21   @ encoding: [0x44,0xef,0xa5,0x3f]
+@ CHECK: vmax.s8	d2, d2, d3      @ encoding: [0x02,0xef,0x03,0x26]
+@ CHECK: vmax.s16	d5, d5, d6      @ encoding: [0x15,0xef,0x06,0x56]
+@ CHECK: vmax.s32	d8, d8, d9      @ encoding: [0x28,0xef,0x09,0x86]
+@ CHECK: vmax.u8	d11, d11, d12   @ encoding: [0x0b,0xff,0x0c,0xb6]
+@ CHECK: vmax.u16	d14, d14, d15   @ encoding: [0x1e,0xff,0x0f,0xe6]
+@ CHECK: vmax.u32	d17, d17, d18   @ encoding: [0x61,0xff,0xa2,0x16]
+@ CHECK: vmax.f32	d20, d20, d21   @ encoding: [0x44,0xef,0xa5,0x4f]
+@ CHECK: vmax.s8	q1, q2, q3      @ encoding: [0x04,0xef,0x46,0x26]
+@ CHECK: vmax.s16	q4, q5, q6      @ encoding: [0x1a,0xef,0x4c,0x86]
+@ CHECK: vmax.s32	q7, q8, q9      @ encoding: [0x20,0xef,0xe2,0xe6]
+@ CHECK: vmax.u8	q10, q11, q12   @ encoding: [0x46,0xff,0xe8,0x46]
+@ CHECK: vmax.u16	q13, q14, q15   @ encoding: [0x5c,0xff,0xee,0xa6]
+@ CHECK: vmax.u32	q6, q7, q8      @ encoding: [0x2e,0xff,0x60,0xc6]
+@ CHECK: vmax.f32	q9, q5, q1      @ encoding: [0x4a,0xef,0x42,0x2f]
+@ CHECK: vmax.s8	q2, q2, q3      @ encoding: [0x04,0xef,0x46,0x46]
+@ CHECK: vmax.s16	q5, q5, q6      @ encoding: [0x1a,0xef,0x4c,0xa6]
 @ CHECK: vmax.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x06]
-	vmax.s32	q8, q8, q9
-@ CHECK: vmax.u8	q8, q8, q9              @ encoding: [0x40,0xff,0xe2,0x06]
-	vmax.u8	q8, q8, q9
-@ CHECK: vmax.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x06]
-	vmax.u16	q8, q8, q9
-@ CHECK: vmax.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x06]
-	vmax.u32	q8, q8, q9
-@ CHECK: vmax.f32	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x0f]
-	vmax.f32	q8, q8, q9
+@ CHECK: vmax.u8	q11, q11, q2    @ encoding: [0x46,0xff,0xc4,0x66]
+@ CHECK: vmax.u16	q4, q4, q5      @ encoding: [0x18,0xff,0x4a,0x86]
+@ CHECK: vmax.u32	q7, q7, q8      @ encoding: [0x2e,0xff,0x60,0xe6]
+@ CHECK: vmax.f32	q2, q2, q1      @ encoding: [0x04,0xef,0x42,0x4f]
+
+
+        vmin.s8 d1, d2, d3
+        vmin.s16 d4, d5, d6
+        vmin.s32 d7, d8, d9
+        vmin.u8 d10, d11, d12
+        vmin.u16 d13, d14, d15
+        vmin.u32 d16, d17, d18
+        vmin.f32 d19, d20, d21
+
+        vmin.s8 d2, d3
+        vmin.s16 d5, d6
+        vmin.s32 d8, d9
+        vmin.u8 d11, d12
+        vmin.u16 d14, d15
+        vmin.u32 d17, d18
+        vmin.f32 d20, d21
+
+        vmin.s8 q1, q2, q3
+        vmin.s16 q4, q5, q6
+        vmin.s32 q7, q8, q9
+        vmin.u8 q10, q11, q12
+        vmin.u16 q13, q14, q15
+        vmin.u32 q6, q7, q8
+        vmin.f32 q9, q5, q1
+
+        vmin.s8 q2, q3
+        vmin.s16 q5, q6
+        vmin.s32 q8, q9
+        vmin.u8 q11, q2
+        vmin.u16 q4, q5
+        vmin.u32 q7, q8
+        vmin.f32 q2, q1
+
+@ CHECK: vmin.s8	d1, d2, d3      @ encoding: [0x02,0xef,0x13,0x16]
+@ CHECK: vmin.s16	d4, d5, d6      @ encoding: [0x15,0xef,0x16,0x46]
+@ CHECK: vmin.s32	d7, d8, d9      @ encoding: [0x28,0xef,0x19,0x76]
+@ CHECK: vmin.u8	d10, d11, d12   @ encoding: [0x0b,0xff,0x1c,0xa6]
+@ CHECK: vmin.u16	d13, d14, d15   @ encoding: [0x1e,0xff,0x1f,0xd6]
+@ CHECK: vmin.u32	d16, d17, d18   @ encoding: [0x61,0xff,0xb2,0x06]
+@ CHECK: vmin.f32	d19, d20, d21   @ encoding: [0x64,0xef,0xa5,0x3f]
+@ CHECK: vmin.s8	d2, d2, d3      @ encoding: [0x02,0xef,0x13,0x26]
+@ CHECK: vmin.s16	d5, d5, d6      @ encoding: [0x15,0xef,0x16,0x56]
+@ CHECK: vmin.s32	d8, d8, d9      @ encoding: [0x28,0xef,0x19,0x86]
+@ CHECK: vmin.u8	d11, d11, d12   @ encoding: [0x0b,0xff,0x1c,0xb6]
+@ CHECK: vmin.u16	d14, d14, d15   @ encoding: [0x1e,0xff,0x1f,0xe6]
+@ CHECK: vmin.u32	d17, d17, d18   @ encoding: [0x61,0xff,0xb2,0x16]
+@ CHECK: vmin.f32	d20, d20, d21   @ encoding: [0x64,0xef,0xa5,0x4f]
+@ CHECK: vmin.s8	q1, q2, q3      @ encoding: [0x04,0xef,0x56,0x26]
+@ CHECK: vmin.s16	q4, q5, q6      @ encoding: [0x1a,0xef,0x5c,0x86]
+@ CHECK: vmin.s32	q7, q8, q9      @ encoding: [0x20,0xef,0xf2,0xe6]
+@ CHECK: vmin.u8	q10, q11, q12   @ encoding: [0x46,0xff,0xf8,0x46]
+@ CHECK: vmin.u16	q13, q14, q15   @ encoding: [0x5c,0xff,0xfe,0xa6]
+@ CHECK: vmin.u32	q6, q7, q8      @ encoding: [0x2e,0xff,0x70,0xc6]
+@ CHECK: vmin.f32	q9, q5, q1      @ encoding: [0x6a,0xef,0x42,0x2f]
+@ CHECK: vmin.s8	q2, q2, q3      @ encoding: [0x04,0xef,0x56,0x46]
+@ CHECK: vmin.s16	q5, q5, q6      @ encoding: [0x1a,0xef,0x5c,0xa6]
+@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x06]
+@ CHECK: vmin.u8	q11, q11, q2    @ encoding: [0x46,0xff,0xd4,0x66]
+@ CHECK: vmin.u16	q4, q4, q5      @ encoding: [0x18,0xff,0x5a,0x86]
+@ CHECK: vmin.u32	q7, q7, q8      @ encoding: [0x2e,0xff,0x70,0xe6]
+@ CHECK: vmin.f32	q2, q2, q1      @ encoding: [0x24,0xef,0x42,0x4f]
diff --git a/test/MC/ARM/neont2-mov-encoding.s b/test/MC/ARM/neont2-mov-encoding.s
index ababbb7..43df349 100644
--- a/test/MC/ARM/neont2-mov-encoding.s
+++ b/test/MC/ARM/neont2-mov-encoding.s
@@ -1,119 +1,131 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 .code 16
 
-@ CHECK: vmov.i8	d16, #0x8               @ encoding: [0x18,0x0e,0xc0,0xef]
 	vmov.i8	d16, #0x8
-@ CHECK: vmov.i16	d16, #0x10      @ encoding: [0x10,0x08,0xc1,0xef]
 	vmov.i16	d16, #0x10
-@ CHECK: vmov.i16	d16, #0x1000    @ encoding: [0x10,0x0a,0xc1,0xef]
 	vmov.i16	d16, #0x1000
-@ CHECK: vmov.i32	d16, #0x20      @ encoding: [0x10,0x00,0xc2,0xef]
 	vmov.i32	d16, #0x20
-@ CHECK: vmov.i32	d16, #0x2000    @ encoding: [0x10,0x02,0xc2,0xef]
 	vmov.i32	d16, #0x2000
-@ CHECK: vmov.i32	d16, #0x200000  @ encoding: [0x10,0x04,0xc2,0xef]
 	vmov.i32	d16, #0x200000
-@ CHECK: vmov.i32	d16, #0x20000000 @ encoding: [0x10,0x06,0xc2,0xef]
 	vmov.i32	d16, #0x20000000
-@ CHECK: vmov.i32	d16, #0x20FF    @ encoding: [0x10,0x0c,0xc2,0xef]
 	vmov.i32	d16, #0x20FF
-@ CHECK: vmov.i32	d16, #0x20FFFF  @ encoding: [0x10,0x0d,0xc2,0xef]
 	vmov.i32	d16, #0x20FFFF
-@ CHECK: vmov.i64	d16, #0xFF0000FF0000FFFF @ encoding: [0x33,0x0e,0xc1,0xff]
 	vmov.i64	d16, #0xFF0000FF0000FFFF
-@ CHECK: vmov.i8	q8, #0x8                @ encoding: [0x58,0x0e,0xc0,0xef]
+
+@ CHECK: vmov.i8	d16, #0x8       @ encoding: [0xc0,0xef,0x18,0x0e]
+@ CHECK: vmov.i16	d16, #0x10      @ encoding: [0xc1,0xef,0x10,0x08]
+@ CHECK: vmov.i16	d16, #0x1000    @ encoding: [0xc1,0xef,0x10,0x0a]
+@ CHECK: vmov.i32	d16, #0x20      @ encoding: [0xc2,0xef,0x10,0x00]
+@ CHECK: vmov.i32	d16, #0x2000    @ encoding: [0xc2,0xef,0x10,0x02]
+@ CHECK: vmov.i32	d16, #0x200000  @ encoding: [0xc2,0xef,0x10,0x04]
+@ CHECK: vmov.i32	d16, #0x20000000 @ encoding: [0xc2,0xef,0x10,0x06]
+@ CHECK: vmov.i32	d16, #0x20ff    @ encoding: [0xc2,0xef,0x10,0x0c]
+@ CHECK: vmov.i32	d16, #0x20ffff  @ encoding: [0xc2,0xef,0x10,0x0d]
+@ CHECK: vmov.i64 d16, #0xff0000ff0000ffff @ encoding: [0xc1,0xff,0x33,0x0e]
+
+
 	vmov.i8	q8, #0x8
-@ CHECK: vmov.i16	q8, #0x10       @ encoding: [0x50,0x08,0xc1,0xef]
 	vmov.i16	q8, #0x10
-@ CHECK: vmov.i16	q8, #0x1000     @ encoding: [0x50,0x0a,0xc1,0xef]
 	vmov.i16	q8, #0x1000
-@ CHECK: vmov.i32	q8, #0x20       @ encoding: [0x50,0x00,0xc2,0xef]
 	vmov.i32	q8, #0x20
-@ CHECK: vmov.i32	q8, #0x2000     @ encoding: [0x50,0x02,0xc2,0xef]
 	vmov.i32	q8, #0x2000
-@ CHECK: vmov.i32	q8, #0x200000   @ encoding: [0x50,0x04,0xc2,0xef]
 	vmov.i32	q8, #0x200000
-@ CHECK: vmov.i32	q8, #0x20000000 @ encoding: [0x50,0x06,0xc2,0xef]
 	vmov.i32	q8, #0x20000000
-@ CHECK: vmov.i32	q8, #0x20FF     @ encoding: [0x50,0x0c,0xc2,0xef]
 	vmov.i32	q8, #0x20FF
-@ CHECK: vmov.i32	q8, #0x20FFFF   @ encoding: [0x50,0x0d,0xc2,0xef]
 	vmov.i32	q8, #0x20FFFF
-@ CHECK: vmov.i64	q8, #0xFF0000FF0000FFFF @ encoding: [0x73,0x0e,0xc1,0xff]
 	vmov.i64	q8, #0xFF0000FF0000FFFF
-@ CHECK: vmvn.i16	d16, #0x10      @ encoding: [0x30,0x08,0xc1,0xef]
+
+@ CHECK: vmov.i8	q8, #0x8        @ encoding: [0xc0,0xef,0x58,0x0e]
+@ CHECK: vmov.i16	q8, #0x10       @ encoding: [0xc1,0xef,0x50,0x08]
+@ CHECK: vmov.i16	q8, #0x1000     @ encoding: [0xc1,0xef,0x50,0x0a]
+@ CHECK: vmov.i32	q8, #0x20       @ encoding: [0xc2,0xef,0x50,0x00]
+@ CHECK: vmov.i32	q8, #0x2000     @ encoding: [0xc2,0xef,0x50,0x02]
+@ CHECK: vmov.i32	q8, #0x200000   @ encoding: [0xc2,0xef,0x50,0x04]
+@ CHECK: vmov.i32	q8, #0x20000000 @ encoding: [0xc2,0xef,0x50,0x06]
+@ CHECK: vmov.i32	q8, #0x20ff     @ encoding: [0xc2,0xef,0x50,0x0c]
+@ CHECK: vmov.i32	q8, #0x20ffff   @ encoding: [0xc2,0xef,0x50,0x0d]
+@ CHECK: vmov.i64 q8, #0xff0000ff0000ffff @ encoding: [0xc1,0xff,0x73,0x0e]
+
+
 	vmvn.i16	d16, #0x10
-@ CHECK: vmvn.i16	d16, #0x1000    @ encoding: [0x30,0x0a,0xc1,0xef]
 	vmvn.i16	d16, #0x1000
-@ CHECK: vmvn.i32	d16, #0x20      @ encoding: [0x30,0x00,0xc2,0xef]
 	vmvn.i32	d16, #0x20
-@ CHECK: vmvn.i32	d16, #0x2000    @ encoding: [0x30,0x02,0xc2,0xef]
 	vmvn.i32	d16, #0x2000
-@ CHECK: vmvn.i32	d16, #0x200000  @ encoding: [0x30,0x04,0xc2,0xef]
 	vmvn.i32	d16, #0x200000
-@ CHECK: vmvn.i32	d16, #0x20000000 @ encoding: [0x30,0x06,0xc2,0xef]
 	vmvn.i32	d16, #0x20000000
-@ CHECK: vmvn.i32	d16, #0x20FF    @ encoding: [0x30,0x0c,0xc2,0xef]
 	vmvn.i32	d16, #0x20FF
-@ CHECK: vmvn.i32	d16, #0x20FFFF  @ encoding: [0x30,0x0d,0xc2,0xef]
 	vmvn.i32	d16, #0x20FFFF
-@ CHECK: vmovl.s8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xef]
+
+@ CHECK: vmvn.i16	d16, #0x10      @ encoding: [0xc1,0xef,0x30,0x08]
+@ CHECK: vmvn.i16	d16, #0x1000    @ encoding: [0xc1,0xef,0x30,0x0a]
+@ CHECK: vmvn.i32	d16, #0x20      @ encoding: [0xc2,0xef,0x30,0x00]
+@ CHECK: vmvn.i32	d16, #0x2000    @ encoding: [0xc2,0xef,0x30,0x02]
+@ CHECK: vmvn.i32	d16, #0x200000  @ encoding: [0xc2,0xef,0x30,0x04]
+@ CHECK: vmvn.i32	d16, #0x20000000 @ encoding: [0xc2,0xef,0x30,0x06]
+@ CHECK: vmvn.i32	d16, #0x20ff    @ encoding: [0xc2,0xef,0x30,0x0c]
+@ CHECK: vmvn.i32	d16, #0x20ffff  @ encoding: [0xc2,0xef,0x30,0x0d]
+
+
 	vmovl.s8	q8, d16
-@ CHECK: vmovl.s16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xef]
 	vmovl.s16	q8, d16
-@ CHECK: vmovl.s32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xef]
 	vmovl.s32	q8, d16
-@ CHECK: vmovl.u8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xff]
 	vmovl.u8	q8, d16
-@ CHECK: vmovl.u16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xff]
 	vmovl.u16	q8, d16
-@ CHECK: vmovl.u32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xff]
 	vmovl.u32	q8, d16
-@ CHECK: vmovn.i16	d16, q8         @ encoding: [0x20,0x02,0xf2,0xff]
 	vmovn.i16	d16, q8
-@ CHECK: vmovn.i32	d16, q8         @ encoding: [0x20,0x02,0xf6,0xff]
 	vmovn.i32	d16, q8
-@ CHECK: vmovn.i64	d16, q8         @ encoding: [0x20,0x02,0xfa,0xff]
 	vmovn.i64	d16, q8
-@ CHECK: vqmovn.s16	d16, q8         @ encoding: [0xa0,0x02,0xf2,0xff]
 	vqmovn.s16	d16, q8
-@ CHECK: vqmovn.s32	d16, q8         @ encoding: [0xa0,0x02,0xf6,0xff]
 	vqmovn.s32	d16, q8
-@ CHECK: vqmovn.s64	d16, q8         @ encoding: [0xa0,0x02,0xfa,0xff]
 	vqmovn.s64	d16, q8
-@ CHECK: vqmovn.u16	d16, q8         @ encoding: [0xe0,0x02,0xf2,0xff]
 	vqmovn.u16	d16, q8
-@ CHECK: vqmovn.u32	d16, q8         @ encoding: [0xe0,0x02,0xf6,0xff]
 	vqmovn.u32	d16, q8
-@ CHECK: vqmovn.u64	d16, q8         @ encoding: [0xe0,0x02,0xfa,0xff]
 	vqmovn.u64	d16, q8
-@ CHECK: vqmovun.s16	d16, q8         @ encoding: [0x60,0x02,0xf2,0xff]
 	vqmovun.s16	d16, q8
-@ CHECK: vqmovun.s32	d16, q8         @ encoding: [0x60,0x02,0xf6,0xff]
 	vqmovun.s32	d16, q8
-@ CHECK: vqmovun.s64	d16, q8         @ encoding: [0x60,0x02,0xfa,0xff]
 	vqmovun.s64	d16, q8
-@ CHECK: vmov.s8	r0, d16[1]              @ encoding: [0xb0,0x0b,0x50,0xee]
+
+@ CHECK: vmovl.s8	q8, d16         @ encoding: [0xc8,0xef,0x30,0x0a]
+@ CHECK: vmovl.s16	q8, d16         @ encoding: [0xd0,0xef,0x30,0x0a]
+@ CHECK: vmovl.s32	q8, d16         @ encoding: [0xe0,0xef,0x30,0x0a]
+@ CHECK: vmovl.u8	q8, d16         @ encoding: [0xc8,0xff,0x30,0x0a]
+@ CHECK: vmovl.u16	q8, d16         @ encoding: [0xd0,0xff,0x30,0x0a]
+@ CHECK: vmovl.u32	q8, d16         @ encoding: [0xe0,0xff,0x30,0x0a]
+@ CHECK: vmovn.i16	d16, q8         @ encoding: [0xf2,0xff,0x20,0x02]
+@ CHECK: vmovn.i32	d16, q8         @ encoding: [0xf6,0xff,0x20,0x02]
+@ CHECK: vmovn.i64	d16, q8         @ encoding: [0xfa,0xff,0x20,0x02]
+@ CHECK: vqmovn.s16	d16, q8         @ encoding: [0xf2,0xff,0xa0,0x02]
+@ CHECK: vqmovn.s32	d16, q8         @ encoding: [0xf6,0xff,0xa0,0x02]
+@ CHECK: vqmovn.s64	d16, q8         @ encoding: [0xfa,0xff,0xa0,0x02]
+@ CHECK: vqmovn.u16	d16, q8         @ encoding: [0xf2,0xff,0xe0,0x02]
+@ CHECK: vqmovn.u32	d16, q8         @ encoding: [0xf6,0xff,0xe0,0x02]
+@ CHECK: vqmovn.u64	d16, q8         @ encoding: [0xfa,0xff,0xe0,0x02]
+@ CHECK: vqmovun.s16	d16, q8         @ encoding: [0xf2,0xff,0x60,0x02]
+@ CHECK: vqmovun.s32	d16, q8         @ encoding: [0xf6,0xff,0x60,0x02]
+@ CHECK: vqmovun.s64	d16, q8         @ encoding: [0xfa,0xff,0x60,0x02]
+
+
 	vmov.s8	r0, d16[1]
-@ CHECK: vmov.s16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x10,0xee]
 	vmov.s16	r0, d16[1]
-@ CHECK: vmov.u8	r0, d16[1]              @ encoding: [0xb0,0x0b,0xd0,0xee]
 	vmov.u8	r0, d16[1]
-@ CHECK: vmov.u16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x90,0xee]
 	vmov.u16	r0, d16[1]
-@ CHECK: vmov.32	r0, d16[1]              @ encoding: [0x90,0x0b,0x30,0xee]
 	vmov.32	r0, d16[1]
-@ CHECK: vmov.8	d16[1], r1              @ encoding: [0xb0,0x1b,0x40,0xee]
 	vmov.8	d16[1], r1
-@ CHECK: vmov.16	d16[1], r1              @ encoding: [0xf0,0x1b,0x00,0xee]
 	vmov.16	d16[1], r1
-@ CHECK: vmov.32	d16[1], r1              @ encoding: [0x90,0x1b,0x20,0xee]
 	vmov.32	d16[1], r1
-@ CHECK: vmov.8	d18[1], r1              @ encoding: [0xb0,0x1b,0x42,0xee]
 	vmov.8	d18[1], r1
-@ CHECK: vmov.16	d18[1], r1              @ encoding: [0xf0,0x1b,0x02,0xee]
 	vmov.16	d18[1], r1
-@ CHECK: vmov.32	d18[1], r1              @ encoding: [0x90,0x1b,0x22,0xee]
 	vmov.32	d18[1], r1
+
+@ CHECK: vmov.s8	r0, d16[1]      @ encoding: [0x50,0xee,0xb0,0x0b]
+@ CHECK: vmov.s16	r0, d16[1]      @ encoding: [0x10,0xee,0xf0,0x0b]
+@ CHECK: vmov.u8	r0, d16[1]      @ encoding: [0xd0,0xee,0xb0,0x0b]
+@ CHECK: vmov.u16	r0, d16[1]      @ encoding: [0x90,0xee,0xf0,0x0b]
+@ CHECK: vmov.32	r0, d16[1]      @ encoding: [0x30,0xee,0x90,0x0b]
+@ CHECK: vmov.8	d16[1], r1              @ encoding: [0x40,0xee,0xb0,0x1b]
+@ CHECK: vmov.16	d16[1], r1      @ encoding: [0x00,0xee,0xf0,0x1b]
+@ CHECK: vmov.32	d16[1], r1      @ encoding: [0x20,0xee,0x90,0x1b]
+@ CHECK: vmov.8	d18[1], r1              @ encoding: [0x42,0xee,0xb0,0x1b]
+@ CHECK: vmov.16	d18[1], r1      @ encoding: [0x02,0xee,0xf0,0x1b]
+@ CHECK: vmov.32	d18[1], r1      @ encoding: [0x22,0xee,0x90,0x1b]
diff --git a/test/MC/ARM/neont2-mul-accum-encoding.s b/test/MC/ARM/neont2-mul-accum-encoding.s
index be4bf79..bc6a4d4 100644
--- a/test/MC/ARM/neont2-mul-accum-encoding.s
+++ b/test/MC/ARM/neont2-mul-accum-encoding.s
@@ -10,6 +10,7 @@
 	vmla.i16	q9, q8, q10
 	vmla.i32	q9, q8, q10
 	vmla.f32	q9, q8, q10
+	vmla.i32	q12, q8, d3[0]
 
 @ CHECK: vmla.i8	d16, d18, d17   @ encoding: [0x42,0xef,0xa1,0x09]
 @ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0x52,0xef,0xa1,0x09]
@@ -19,6 +20,7 @@
 @ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0x50,0xef,0xe4,0x29]
 @ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0x60,0xef,0xe4,0x29]
 @ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0x40,0xef,0xf4,0x2d]
+@ CHECK: vmla.i32	q12, q8, d3[0]    @ encoding: [0xe0,0xff,0xc3,0x80]
 
 
 	vmlal.s8	q8, d19, d18
@@ -27,6 +29,7 @@
 	vmlal.u8	q8, d19, d18
 	vmlal.u16	q8, d19, d18
 	vmlal.u32	q8, d19, d18
+	vmlal.s32	q0, d5, d10[0]
 
 @ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xc3,0xef,0xa2,0x08]
 @ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xd3,0xef,0xa2,0x08]
@@ -34,13 +37,22 @@
 @ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xc3,0xff,0xa2,0x08]
 @ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xd3,0xff,0xa2,0x08]
 @ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xe3,0xff,0xa2,0x08]
+@ CHECK: vmlal.s32	q0, d5, d10[0]    @ encoding: [0xa5,0xef,0x4a,0x02]
 
 
 	vqdmlal.s16	q8, d19, d18
 	vqdmlal.s32	q8, d19, d18
+        vqdmlal.s16 q11, d11, d7[0]
+        vqdmlal.s16 q11, d11, d7[1]
+        vqdmlal.s16 q11, d11, d7[2]
+        vqdmlal.s16 q11, d11, d7[3]
 
 @ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xd3,0xef,0xa2,0x09]
 @ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xe3,0xef,0xa2,0x09]
+@ CHECK: vqdmlal.s16	q11, d11, d7[0] @ encoding: [0xdb,0xef,0x47,0x63]
+@ CHECK: vqdmlal.s16	q11, d11, d7[1] @ encoding: [0xdb,0xef,0x4f,0x63]
+@ CHECK: vqdmlal.s16	q11, d11, d7[2] @ encoding: [0xdb,0xef,0x67,0x63]
+@ CHECK: vqdmlal.s16	q11, d11, d7[3] @ encoding: [0xdb,0xef,0x6f,0x63]
 
 
 	vmls.i8	d16, d18, d17
@@ -51,6 +63,7 @@
 	vmls.i16	q9, q8, q10
 	vmls.i32	q9, q8, q10
 	vmls.f32	q9, q8, q10
+	vmls.i16	q4, q12, d6[2]
 
 @ CHECK: vmls.i8	d16, d18, d17   @ encoding: [0x42,0xff,0xa1,0x09]
 @ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0x52,0xff,0xa1,0x09]
@@ -60,6 +73,7 @@
 @ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0x50,0xff,0xe4,0x29]
 @ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0x60,0xff,0xe4,0x29]
 @ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0x60,0xef,0xf4,0x2d]
+@ CHECK: vmls.i16	q4, q12, d6[2]  @ encoding: [0x98,0xff,0xe6,0x84]
 
 
 	vmlsl.s8	q8, d19, d18
@@ -68,6 +82,7 @@
 	vmlsl.u8	q8, d19, d18
 	vmlsl.u16	q8, d19, d18
 	vmlsl.u32	q8, d19, d18
+	vmlsl.u16	q11, d25, d1[3]
 
 @ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xc3,0xef,0xa2,0x0a]
 @ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xd3,0xef,0xa2,0x0a]
@@ -75,6 +90,7 @@
 @ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xc3,0xff,0xa2,0x0a]
 @ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xd3,0xff,0xa2,0x0a]
 @ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xe3,0xff,0xa2,0x0a]
+@ CHECK: vmlsl.u16	q11, d25, d1[3]    @ encoding: [0xd9,0xff,0xe9,0x66]
 
 
 	vqdmlsl.s16	q8, d19, d18
diff --git a/test/MC/ARM/neont2-mul-encoding.s b/test/MC/ARM/neont2-mul-encoding.s
index 93ecabb..dfbb667 100644
--- a/test/MC/ARM/neont2-mul-encoding.s
+++ b/test/MC/ARM/neont2-mul-encoding.s
@@ -2,57 +2,77 @@
 
 .code 16
 
-@ CHECK: vmul.i8	d16, d16, d17           @ encoding: [0x40,0xef,0xb1,0x09]
 	vmul.i8	d16, d16, d17
-@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x09]
 	vmul.i16	d16, d16, d17
-@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x09]
 	vmul.i32	d16, d16, d17
-@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0x40,0xff,0xb1,0x0d]
 	vmul.f32	d16, d16, d17
-@ CHECK: vmul.i8	q8, q8, q9              @ encoding: [0x40,0xef,0xf2,0x09]
 	vmul.i8	q8, q8, q9
-@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x09]
 	vmul.i16	q8, q8, q9
-@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x09]
 	vmul.i32	q8, q8, q9
-@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0x40,0xff,0xf2,0x0d]
 	vmul.f32	q8, q8, q9
-@ CHECK: vmul.p8	d16, d16, d17           @ encoding: [0x40,0xff,0xb1,0x09]
 	vmul.p8	d16, d16, d17
-@ CHECK: vmul.p8	q8, q8, q9              @ encoding: [0x40,0xff,0xf2,0x09]
 	vmul.p8	q8, q8, q9
-@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x0b]
+	vmul.i16	d18, d8, d0[3]
+
+@ CHECK: vmul.i8	d16, d16, d17   @ encoding: [0x40,0xef,0xb1,0x09]
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x09]
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x09]
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0x40,0xff,0xb1,0x0d]
+@ CHECK: vmul.i8	q8, q8, q9      @ encoding: [0x40,0xef,0xf2,0x09]
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x09]
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x09]
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0x40,0xff,0xf2,0x0d]
+@ CHECK: vmul.p8	d16, d16, d17   @ encoding: [0x40,0xff,0xb1,0x09]
+@ CHECK: vmul.p8	q8, q8, q9      @ encoding: [0x40,0xff,0xf2,0x09]
+@ CHECK: vmul.i16	d18, d8, d0[3]  @ encoding: [0xd8,0xef,0x68,0x28]
+
+
 	vqdmulh.s16	d16, d16, d17
-@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0b]
 	vqdmulh.s32	d16, d16, d17
-@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x0b]
 	vqdmulh.s16	q8, q8, q9
-@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0b]
 	vqdmulh.s32	q8, q8, q9
-@ CHECK: vqrdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x0b]
+	vqdmulh.s16	d11, d2, d3[0]
+
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x0b]
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0b]
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x0b]
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0b]
+@ CHECK: vqdmulh.s16	d11, d2, d3[0]  @ encoding: [0x92,0xef,0x43,0xbc]
+
+
 	vqrdmulh.s16	d16, d16, d17
-@ CHECK: vqrdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x0b]
 	vqrdmulh.s32	d16, d16, d17
-@ CHECK: vqrdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x0b]
 	vqrdmulh.s16	q8, q8, q9
-@ CHECK: vqrdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x0b]
 	vqrdmulh.s32	q8, q8, q9
-@ CHECK: vmull.s8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0c]
+
+@ CHECK: vqrdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x0b]
+@ CHECK: vqrdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x0b]
+@ CHECK: vqrdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x0b]
+@ CHECK: vqrdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x0b]
+
+
 	vmull.s8	q8, d16, d17
-@ CHECK: vmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0c]
 	vmull.s16	q8, d16, d17
-@ CHECK: vmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0c]
 	vmull.s32	q8, d16, d17
-@ CHECK: vmull.u8	q8, d16, d17    @ encoding: [0xc0,0xff,0xa1,0x0c]
 	vmull.u8	q8, d16, d17
-@ CHECK: vmull.u16	q8, d16, d17    @ encoding: [0xd0,0xff,0xa1,0x0c]
 	vmull.u16	q8, d16, d17
-@ CHECK: vmull.u32	q8, d16, d17    @ encoding: [0xe0,0xff,0xa1,0x0c]
 	vmull.u32	q8, d16, d17
-@ CHECK: vmull.p8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0e]
 	vmull.p8	q8, d16, d17
-@ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0d]
+
+@ CHECK: vmull.s8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0c]
+@ CHECK: vmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0c]
+@ CHECK: vmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0c]
+@ CHECK: vmull.u8	q8, d16, d17    @ encoding: [0xc0,0xff,0xa1,0x0c]
+@ CHECK: vmull.u16	q8, d16, d17    @ encoding: [0xd0,0xff,0xa1,0x0c]
+@ CHECK: vmull.u32	q8, d16, d17    @ encoding: [0xe0,0xff,0xa1,0x0c]
+@ CHECK: vmull.p8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0e]
+
+
 	vqdmull.s16	q8, d16, d17
-@ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0d]
 	vqdmull.s32	q8, d16, d17
+        vqdmull.s16	q1, d7, d1[1]
+
+@ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0d]
+@ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0d]
+@ CHECK: vqdmull.s16	q1, d7, d1[1]   @ encoding: [0x97,0xef,0x49,0x2b]
+
diff --git a/test/MC/ARM/neont2-table-encoding.s b/test/MC/ARM/neont2-table-encoding.s
index 46fb934..9bfcc74 100644
--- a/test/MC/ARM/neont2-table-encoding.s
+++ b/test/MC/ARM/neont2-table-encoding.s
@@ -1,21 +1,24 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 .code 16
 
-@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xff]
 	vtbl.8	d16, {d17}, d16
-@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xff]
 	vtbl.8	d16, {d16, d17}, d18
-@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xff]
 	vtbl.8	d16, {d16, d17, d18}, d20
-@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xff]
 	vtbl.8	d16, {d16, d17, d18, d19}, d20
-@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xff]
+
+@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xf1,0xff,0xa0,0x08]
+@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xf0,0xff,0xa2,0x09]
+@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xf0,0xff,0xa4,0x0a]
+@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xf0,0xff,0xa4,0x0b]
+
+
 	vtbx.8	d18, {d16}, d17
-@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xff]
 	vtbx.8	d19, {d16, d17}, d18
-@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xff]
 	vtbx.8	d20, {d16, d17, d18}, d21
-@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xff]
 	vtbx.8	d20, {d16, d17, d18, d19}, d21
+
+@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xf0,0xff,0xe1,0x28]
+@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xf0,0xff,0xe2,0x39]
+@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xf0,0xff,0xe5,0x4a]
+@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xf0,0xff,0xe5,0x4b]
diff --git a/test/MC/ARM/neont2-vst-encoding.s b/test/MC/ARM/neont2-vst-encoding.s
index 1722f12..b50d8b6 100644
--- a/test/MC/ARM/neont2-vst-encoding.s
+++ b/test/MC/ARM/neont2-vst-encoding.s
@@ -101,3 +101,7 @@
   vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
 @ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf9]
   vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
+
+@ rdar://11082188
+        vst2.8 {d8, d10}, [r4]
+@ CHECK: vst2.8	{d8, d10}, [r4]         @ encoding: [0x04,0xf9,0x0f,0x89]
diff --git a/test/MC/ARM/nop-armv6t2-padding.s b/test/MC/ARM/nop-armv6t2-padding.s
deleted file mode 100644
index 0e25718..0000000
--- a/test/MC/ARM/nop-armv6t2-padding.s
+++ /dev/null
@@ -1,10 +0,0 @@
-@ RUN: llvm-mc -triple armv6t2-apple-darwin %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
-@ RUN: FileCheck %s < %t.dump
-
-x:
-      add r0, r1, r2
-      .align 4
-      add r0, r1, r2
-
-@ CHECK: ('_section_data', '020081e0 007820e3 007820e3 007820e3 020081e0')
diff --git a/test/MC/ARM/pr11877.s b/test/MC/ARM/pr11877.s
new file mode 100644
index 0000000..da3f6ad
--- /dev/null
+++ b/test/MC/ARM/pr11877.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple arm-unknown-unknown %s
+
+i:
+        .long    g
+g = h
+h = i
diff --git a/test/MC/ARM/prefetch.ll b/test/MC/ARM/prefetch.ll
deleted file mode 100644
index e77fdb1..0000000
--- a/test/MC/ARM/prefetch.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   -mattr=+v7,+mp -show-mc-encoding | FileCheck %s -check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+v7     -show-mc-encoding | FileCheck %s -check-prefix=T2
-; rdar://8924681
-
-define void @t1(i8* %ptr) nounwind  {
-entry:
-; ARM: t1:
-; ARM: pldw [r0]                        @ encoding: [0x00,0xf0,0x90,0xf5]
-; ARM: pld [r0]                         @ encoding: [0x00,0xf0,0xd0,0xf5]
-
-; T2: t1:
-; T2: pld [r0]                      @ encoding: [0x90,0xf8,0x00,0xf0]
-  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
-  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
-  ret void
-}
-
-define void @t2(i8* %ptr) nounwind  {
-entry:
-; ARM: t2:
-; ARM: pld [r0, #1023]                  @ encoding: [0xff,0xf3,0xd0,0xf5]
-
-; T2: t2:
-; T2: pld [r0, #1023]               @ encoding: [0x90,0xf8,0xff,0xf3]
-  %tmp = getelementptr i8* %ptr, i32 1023
-  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
-  ret void
-}
-
-define void @t3(i32 %base, i32 %offset) nounwind  {
-entry:
-; ARM: t3:
-; ARM: pld [r0, r1, lsr #2]             @ encoding: [0x21,0xf1,0xd0,0xf7]
-
-; T2: t3:
-; T2: pld [r0, r1]                  @ encoding: [0x10,0xf8,0x01,0xf0]
-  %tmp1 = lshr i32 %offset, 2
-  %tmp2 = add i32 %base, %tmp1
-  %tmp3 = inttoptr i32 %tmp2 to i8*
-  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
-  ret void
-}
-
-define void @t4(i32 %base, i32 %offset) nounwind  {
-entry:
-; ARM: t4:
-; ARM: pld [r0, r1, lsl #2]             @ encoding: [0x01,0xf1,0xd0,0xf7]
-
-; T2: t4:
-; T2: pld [r0, r1, lsl #2]          @ encoding: [0x10,0xf8,0x21,0xf0]
-  %tmp1 = shl i32 %offset, 2
-  %tmp2 = add i32 %base, %tmp1
-  %tmp3 = inttoptr i32 %tmp2 to i8*
-  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
-  ret void
-}
-
-declare void @llvm.prefetch(i8*, i32, i32) nounwind 
diff --git a/test/MC/ARM/simple-fp-encoding.s b/test/MC/ARM/simple-fp-encoding.s
index e7d452a..b592f1e 100644
--- a/test/MC/ARM/simple-fp-encoding.s
+++ b/test/MC/ARM/simple-fp-encoding.s
@@ -21,9 +21,15 @@
 @ CHECK: vmul.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0x61,0xee]
         vmul.f64        d16, d17, d16
 
+@ CHECK: vmul.f64	d20, d20, d17   @ encoding: [0xa1,0x4b,0x64,0xee]
+	vmul.f64  d20, d17
+
 @ CHECK: vmul.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x20,0xee]
         vmul.f32        s0, s1, s0
 
+@ CHECK: vmul.f32	s11, s11, s21   @ encoding: [0xaa,0x5a,0x65,0xee]
+	vmul.f32  s11, s21
+
 @ CHECK: vnmul.f64 d16, d17, d16     @ encoding: [0xe0,0x0b,0x61,0xee]
         vnmul.f64       d16, d17, d16
 
@@ -114,10 +120,21 @@
 @ CHECK: vnmls.f32 s1, s2, s0        @ encoding: [0x00,0x0a,0x51,0xee]
         vnmls.f32       s1, s2, s0
 
-@ CHECK: vmrs apsr_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
-@ CHECK: vmrs apsr_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+        vmrs    APSR_nzcv, fpscr
         vmrs    apsr_nzcv, fpscr
         fmstat
+        vmrs    r2, fpsid
+        vmrs    r3, FPSID
+        vmrs    r4, mvfr0
+        vmrs    r5, MVFR1
+
+@ CHECK: vmrs APSR_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@ CHECK: vmrs APSR_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@ CHECK: vmrs APSR_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@ CHECK: vmrs r2, fpsid              @ encoding: [0x10,0x2a,0xf0,0xee]
+@ CHECK: vmrs r3, fpsid              @ encoding: [0x10,0x3a,0xf0,0xee]
+@ CHECK: vmrs r4, mvfr0              @ encoding: [0x10,0x4a,0xf7,0xee]
+@ CHECK: vmrs r5, mvfr1              @ encoding: [0x10,0x5a,0xf6,0xee]
 
 @ CHECK: vnegne.f64 d16, d16         @ encoding: [0x60,0x0b,0xf1,0x1e]
         vnegne.f64      d16, d16
@@ -127,6 +144,16 @@
         vmovne  s0, r0
         vmoveq  s0, r1
 
+        vmov.f32 r1, s2
+        vmov.f32 s4, r3
+        vmov.f64 r1, r5, d2
+        vmov.f64 d4, r3, r9
+
+@ CHECK: vmov	r1, s2                  @ encoding: [0x10,0x1a,0x11,0xee]
+@ CHECK: vmov	s4, r3                  @ encoding: [0x10,0x3a,0x02,0xee]
+@ CHECK: vmov	r1, r5, d2              @ encoding: [0x12,0x1b,0x55,0xec]
+@ CHECK: vmov	d4, r3, r9              @ encoding: [0x14,0x3b,0x49,0xec]
+
 @ CHECK: vmrs r0, fpscr              @ encoding: [0x10,0x0a,0xf1,0xee]
         vmrs    r0, fpscr
 @ CHECK: vmrs  r0, fpexc             @ encoding: [0x10,0x0a,0xf8,0xee]
@@ -172,52 +199,62 @@
 @ CHECK: vmov r0, r1, d16            @ encoding: [0x30,0x0b,0x51,0xec]
         vmov    r0, r1, d16
 
-@ CHECK: vldr.64 d17, [r0]           @ encoding: [0x00,0x1b,0xd0,0xed]
+@ CHECK: vldr d17, [r0]           @ encoding: [0x00,0x1b,0xd0,0xed]
+@ CHECK: vldr s0, [lr]            @ encoding: [0x00,0x0a,0x9e,0xed]
+@ CHECK: vldr d0, [lr]            @ encoding: [0x00,0x0b,0x9e,0xed]
+
         vldr.64	d17, [r0]
+	vldr.i32 s0, [lr]
+	vldr.d d0, [lr]
 
-@ CHECK: vldr.64 d1, [r2, #32]       @ encoding: [0x08,0x1b,0x92,0xed]
-@ CHECK: vldr.64 d1, [r2, #-32]      @ encoding: [0x08,0x1b,0x12,0xed]
+@ CHECK: vldr d1, [r2, #32]       @ encoding: [0x08,0x1b,0x92,0xed]
+@ CHECK: vldr d1, [r2, #-32]      @ encoding: [0x08,0x1b,0x12,0xed]
         vldr.64	d1, [r2, #32]
-        vldr.64	d1, [r2, #-32]
+        vldr.f64	d1, [r2, #-32]
 
-@ CHECK: vldr.64 d2, [r3]            @ encoding: [0x00,0x2b,0x93,0xed]
+@ CHECK: vldr d2, [r3]            @ encoding: [0x00,0x2b,0x93,0xed]
         vldr.64 d2, [r3]
 
-@ CHECK: vldr.64 d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
-@ CHECK: vldr.64 d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
-@ CHECK: vldr.64 d3, [pc, #-0]            @ encoding: [0x00,0x3b,0x1f,0xed]
+@ CHECK: vldr d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
+@ CHECK: vldr d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
+@ CHECK: vldr d3, [pc, #-0]            @ encoding: [0x00,0x3b,0x1f,0xed]
         vldr.64 d3, [pc]
         vldr.64 d3, [pc,#0]
         vldr.64 d3, [pc,#-0]
 
-@ CHECK: vldr.32 s13, [r0]           @ encoding: [0x00,0x6a,0xd0,0xed]
+@ CHECK: vldr s13, [r0]           @ encoding: [0x00,0x6a,0xd0,0xed]
         vldr.32	s13, [r0]
 
-@ CHECK: vldr.32 s1, [r2, #32]       @ encoding: [0x08,0x0a,0xd2,0xed]
-@ CHECK: vldr.32 s1, [r2, #-32]      @ encoding: [0x08,0x0a,0x52,0xed]
+@ CHECK: vldr s1, [r2, #32]       @ encoding: [0x08,0x0a,0xd2,0xed]
+@ CHECK: vldr s1, [r2, #-32]      @ encoding: [0x08,0x0a,0x52,0xed]
         vldr.32	s1, [r2, #32]
         vldr.32	s1, [r2, #-32]
 
-@ CHECK: vldr.32 s2, [r3]            @ encoding: [0x00,0x1a,0x93,0xed]
+@ CHECK: vldr s2, [r3]            @ encoding: [0x00,0x1a,0x93,0xed]
         vldr.32 s2, [r3]
 
-@ CHECK: vldr.32 s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
-@ CHECK: vldr.32 s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
-@ CHECK: vldr.32 s5, [pc, #-0]            @ encoding: [0x00,0x2a,0x5f,0xed]
+@ CHECK: vldr s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
+@ CHECK: vldr s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
+@ CHECK: vldr s5, [pc, #-0]            @ encoding: [0x00,0x2a,0x5f,0xed]
         vldr.32 s5, [pc]
         vldr.32 s5, [pc,#0]
         vldr.32 s5, [pc,#-0]
 
-@ CHECK: vstr.64 d4, [r1]            @ encoding: [0x00,0x4b,0x81,0xed]
-@ CHECK: vstr.64 d4, [r1, #24]       @ encoding: [0x06,0x4b,0x81,0xed]
-@ CHECK: vstr.64 d4, [r1, #-24]      @ encoding: [0x06,0x4b,0x01,0xed]
+@ CHECK: vstr d4, [r1]            @ encoding: [0x00,0x4b,0x81,0xed]
+@ CHECK: vstr d4, [r1, #24]       @ encoding: [0x06,0x4b,0x81,0xed]
+@ CHECK: vstr d4, [r1, #-24]      @ encoding: [0x06,0x4b,0x01,0xed]
+@ CHECK: vstr s0, [lr]            @ encoding: [0x00,0x0a,0x8e,0xed]
+@ CHECK: vstr d0, [lr]            @ encoding: [0x00,0x0b,0x8e,0xed]
+
         vstr.64 d4, [r1]
         vstr.64 d4, [r1, #24]
         vstr.64 d4, [r1, #-24]
+	vstr s0, [lr]
+	vstr d0, [lr]
 
-@ CHECK: vstr.32 s4, [r1]            @ encoding: [0x00,0x2a,0x81,0xed]
-@ CHECK: vstr.32 s4, [r1, #24]       @ encoding: [0x06,0x2a,0x81,0xed]
-@ CHECK: vstr.32 s4, [r1, #-24]      @ encoding: [0x06,0x2a,0x01,0xed]
+@ CHECK: vstr s4, [r1]            @ encoding: [0x00,0x2a,0x81,0xed]
+@ CHECK: vstr s4, [r1, #24]       @ encoding: [0x06,0x2a,0x81,0xed]
+@ CHECK: vstr s4, [r1, #-24]      @ encoding: [0x06,0x2a,0x01,0xed]
         vstr.32 s4, [r1]
         vstr.32 s4, [r1, #24]
         vstr.32 s4, [r1, #-24]
@@ -229,8 +266,10 @@
 
 @ CHECK: vstmia r1, {d2, d3, d4, d5, d6, d7} @ encoding: [0x0c,0x2b,0x81,0xec]
 @ CHECK: vstmia	r1, {s2, s3, s4, s5, s6, s7} @ encoding: [0x06,0x1a,0x81,0xec]
+@ CHECK: vpush	{d8, d9, d10, d11, d12, d13, d14, d15} @ encoding: [0x10,0x8b,0x2d,0xed]
         vstmia  r1, {d2,d3-d6,d7}
         vstmia  r1, {s2,s3-s6,s7}
+        vstmdb sp!, {q4-q7}
 
 @ CHECK: vcvtr.s32.f64  s0, d0 @ encoding: [0x40,0x0b,0xbd,0xee]
 @ CHECK: vcvtr.s32.f32  s0, s1 @ encoding: [0x60,0x0a,0xbd,0xee]
@@ -243,3 +282,76 @@
 
 @ CHECK: vmovne	s25, s26, r2, r5
         vmovne	s25, s26, r2, r5        @ encoding: [0x39,0x2a,0x45,0x1c]
+
+@ VMOV w/ optional data type suffix.
+	vmov.32 s1, r8
+        vmov.s16 s2, r4
+        vmov.16 s3, r6
+        vmov.u32 s4, r1
+        vmov.p8 s5, r2
+        vmov.8 s6, r3
+
+        vmov.32 r1, s8
+        vmov.s16 r2, s4
+        vmov.16 r3, s6
+        vmov.u32 r4, s1
+        vmov.p8 r5, s2
+        vmov.8 r6, s3
+
+@ CHECK: vmov	s1, r8                  @ encoding: [0x90,0x8a,0x00,0xee]
+@ CHECK: vmov	s2, r4                  @ encoding: [0x10,0x4a,0x01,0xee]
+@ CHECK: vmov	s3, r6                  @ encoding: [0x90,0x6a,0x01,0xee]
+@ CHECK: vmov	s4, r1                  @ encoding: [0x10,0x1a,0x02,0xee]
+@ CHECK: vmov	s5, r2                  @ encoding: [0x90,0x2a,0x02,0xee]
+@ CHECK: vmov	s6, r3                  @ encoding: [0x10,0x3a,0x03,0xee]
+@ CHECK: vmov	r1, s8                  @ encoding: [0x10,0x1a,0x14,0xee]
+@ CHECK: vmov	r2, s4                  @ encoding: [0x10,0x2a,0x12,0xee]
+@ CHECK: vmov	r3, s6                  @ encoding: [0x10,0x3a,0x13,0xee]
+@ CHECK: vmov	r4, s1                  @ encoding: [0x90,0x4a,0x10,0xee]
+@ CHECK: vmov	r5, s2                  @ encoding: [0x10,0x5a,0x11,0xee]
+@ CHECK: vmov	r6, s3                  @ encoding: [0x90,0x6a,0x11,0xee]
+
+
+@ VCVT (between floating-point and fixed-point)
+        vcvt.f32.u32 s0, s0, #20
+        vcvt.f64.s32 d0, d0, #32
+        vcvt.f32.u16 s0, s0, #1
+        vcvt.f64.s16 d0, d0, #16
+        vcvt.f32.s32 s1, s1, #20
+        vcvt.f64.u32 d20, d20, #32
+        vcvt.f32.s16 s17, s17, #1
+        vcvt.f64.u16 d23, d23, #16
+        vcvt.u32.f32 s12, s12, #20 
+        vcvt.s32.f64 d2, d2, #32
+        vcvt.u16.f32 s28, s28, #1
+        vcvt.s16.f64 d15, d15, #16
+        vcvt.s32.f32 s1, s1, #20
+        vcvt.u32.f64 d20, d20, #32
+        vcvt.s16.f32 s17, s17, #1
+        vcvt.u16.f64 d23, d23, #16
+
+@ CHECK: vcvt.f32.u32	s0, s0, #20     @ encoding: [0xc6,0x0a,0xbb,0xee]
+@ CHECK: vcvt.f64.s32	d0, d0, #32     @ encoding: [0xc0,0x0b,0xba,0xee]
+@ CHECK: vcvt.f32.u16	s0, s0, #1      @ encoding: [0x67,0x0a,0xbb,0xee]
+@ CHECK: vcvt.f64.s16	d0, d0, #16     @ encoding: [0x40,0x0b,0xba,0xee]
+@ CHECK: vcvt.f32.s32	s1, s1, #20     @ encoding: [0xc6,0x0a,0xfa,0xee]
+@ CHECK: vcvt.f64.u32	d20, d20, #32   @ encoding: [0xc0,0x4b,0xfb,0xee]
+@ CHECK: vcvt.f32.s16	s17, s17, #1    @ encoding: [0x67,0x8a,0xfa,0xee]
+@ CHECK: vcvt.f64.u16	d23, d23, #16   @ encoding: [0x40,0x7b,0xfb,0xee]
+
+@ CHECK: vcvt.u32.f32	s12, s12, #20   @ encoding: [0xc6,0x6a,0xbf,0xee]
+@ CHECK: vcvt.s32.f64	d2, d2, #32     @ encoding: [0xc0,0x2b,0xbe,0xee]
+@ CHECK: vcvt.u16.f32	s28, s28, #1    @ encoding: [0x67,0xea,0xbf,0xee]
+@ CHECK: vcvt.s16.f64	d15, d15, #16   @ encoding: [0x40,0xfb,0xbe,0xee]
+@ CHECK: vcvt.s32.f32	s1, s1, #20     @ encoding: [0xc6,0x0a,0xfe,0xee]
+@ CHECK: vcvt.u32.f64	d20, d20, #32   @ encoding: [0xc0,0x4b,0xff,0xee]
+@ CHECK: vcvt.s16.f32	s17, s17, #1    @ encoding: [0x67,0x8a,0xfe,0xee]
+@ CHECK: vcvt.u16.f64	d23, d23, #16   @ encoding: [0x40,0x7b,0xff,0xee]
+
+
+@ Use NEON to load some f32 immediates that don't fit the f8 representation.
+        vmov.f32 d4, #0.0
+        vmov.f32 d4, #32.0
+
+@ CHECK: vmov.i32	d4, #0x0        @ encoding: [0x10,0x40,0x80,0xf2]
+@ CHECK: vmov.i32	d4, #0x42000000 @ encoding: [0x12,0x46,0x84,0xf2]
diff --git a/test/MC/ARM/thumb-diagnostics.s b/test/MC/ARM/thumb-diagnostics.s
index d02c27e..99d7e38 100644
--- a/test/MC/ARM/thumb-diagnostics.s
+++ b/test/MC/ARM/thumb-diagnostics.s
@@ -24,13 +24,9 @@
 
 @ Out of range immediates for ASR instruction.
         asrs r2, r3, #33
-        asrs r2, r3, #0
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS:         asrs r2, r3, #33
 @ CHECK-ERRORS:                      ^
-@ CHECK-ERRORS: error: invalid operand for instruction
-@ CHECK-ERRORS:         asrs r2, r3, #0
-@ CHECK-ERRORS:                      ^
 
 @ Out of range immediates for BKPT instruction.
         bkpt #256
@@ -125,10 +121,10 @@ error: invalid operand for instruction
         add sp, #3
         add sp, sp, #512
         add r2, sp, #1024
-@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
 @ CHECK-ERRORS:         add sp, #-1
 @ CHECK-ERRORS:                 ^
-@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
 @ CHECK-ERRORS:         add sp, #3
 @ CHECK-ERRORS:                 ^
 @ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/ARM/vfp4.s b/test/MC/ARM/vfp4.s
new file mode 100644
index 0000000..cc87a38
--- /dev/null
+++ b/test/MC/ARM/vfp4.s
@@ -0,0 +1,50 @@
+@ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4   | FileCheck %s --check-prefix=ARM
+@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB
+
+@ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
+@ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b]
+vfma.f64 d16, d18, d17
+
+@ ARM: vfma.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0xa2,0xee]
+@ THUMB: vfma.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x00,0x1a]
+vfma.f32 s2, s4, s0
+
+@ ARM: vfma.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x42,0xf2]
+@ THUMB: vfma.f32 d16, d18, d17 @ encoding: [0x42,0xef,0xb1,0x0c]
+vfma.f32 d16, d18, d17
+
+@ ARM: vfma.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x08,0xf2]
+@ THUMB: vfma.f32	q2, q4, q0 @ encoding: [0x08,0xef,0x50,0x4c]
+vfma.f32 q2, q4, q0
+
+@ ARM: vfnma.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xd2,0xee]
+@ THUMB: vfnma.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xe1,0x0b]
+vfnma.f64 d16, d18, d17
+
+@ ARM: vfnma.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0x92,0xee]
+@ THUMB: vfnma.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x40,0x1a]
+vfnma.f32 s2, s4, s0
+
+@ ARM: vfms.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xe2,0xee]
+@ THUMB: vfms.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xe1,0x0b]
+vfms.f64 d16, d18, d17
+
+@ ARM: vfms.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0xa2,0xee]
+@ THUMB: vfms.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x40,0x1a]
+vfms.f32 s2, s4, s0
+
+@ ARM: vfms.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x62,0xf2]
+@ THUMB: vfms.f32 d16, d18, d17 @ encoding: [0x62,0xef,0xb1,0x0c]
+vfms.f32 d16, d18, d17
+
+@ ARM: vfms.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x28,0xf2]
+@ THUMB: vfms.f32	q2, q4, q0 @ encoding: [0x28,0xef,0x50,0x4c]
+vfms.f32 q2, q4, q0
+
+@ ARM: vfnms.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xd2,0xee]
+@ THUMB: vfnms.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xa1,0x0b]
+vfnms.f64 d16, d18, d17
+
+@ ARM: vfnms.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0x92,0xee]
+@ THUMB: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a]
+vfnms.f32 s2, s4, s0
diff --git a/test/MC/ARM/vpush-vpop.s b/test/MC/ARM/vpush-vpop.s
index 1212c83..4fb4dec 100644
--- a/test/MC/ARM/vpush-vpop.s
+++ b/test/MC/ARM/vpush-vpop.s
@@ -7,6 +7,21 @@ foo:
  vpush {s8, s9, s10, s11, s12}
  vpop  {d8, d9, d10, d11, d12}
  vpop  {s8, s9, s10, s11, s12}
+@ optional size suffix
+ vpush.s8 {d8, d9, d10, d11, d12}
+ vpush.16 {s8, s9, s10, s11, s12}
+ vpop.f32  {d8, d9, d10, d11, d12}
+ vpop.64  {s8, s9, s10, s11, s12}
+
+@ CHECK-THUMB: vpush {d8, d9, d10, d11, d12} @ encoding: [0x2d,0xed,0x0a,0x8b]
+@ CHECK-THUMB: vpush {s8, s9, s10, s11, s12} @ encoding: [0x2d,0xed,0x05,0x4a]
+@ CHECK-THUMB: vpop  {d8, d9, d10, d11, d12} @ encoding: [0xbd,0xec,0x0a,0x8b]
+@ CHECK-THUMB: vpop  {s8, s9, s10, s11, s12} @ encoding: [0xbd,0xec,0x05,0x4a]
+
+@ CHECK-ARM: vpush {d8, d9, d10, d11, d12} @ encoding: [0x0a,0x8b,0x2d,0xed]
+@ CHECK-ARM: vpush {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0x2d,0xed]
+@ CHECK-ARM: vpop  {d8, d9, d10, d11, d12} @ encoding: [0x0a,0x8b,0xbd,0xec]
+@ CHECK-ARM: vpop  {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0xbd,0xec]
 
 @ CHECK-THUMB: vpush {d8, d9, d10, d11, d12} @ encoding: [0x2d,0xed,0x0a,0x8b]
 @ CHECK-THUMB: vpush {s8, s9, s10, s11, s12} @ encoding: [0x2d,0xed,0x05,0x4a]
diff --git a/test/MC/AsmParser/2011-09-06-NoNewline.s b/test/MC/AsmParser/2011-09-06-NoNewline.s
deleted file mode 100644
index 33e1dbb..0000000
--- a/test/MC/AsmParser/2011-09-06-NoNewline.s
+++ /dev/null
@@ -1,7 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown %s
-movl %gs:8, %eax
-// RUN: llvm-mc -triple i386-unknown-unknown %s
-movl %gs:8, %eax
-// RUN: llvm-mc -triple i386-unknown-unknown %s
-movl %gs:8, %eax
-        
-\ No newline at end of file
diff --git a/test/MC/AsmParser/cfi-unfinished-frame.s b/test/MC/AsmParser/cfi-unfinished-frame.s
new file mode 100644
index 0000000..1182d52
--- /dev/null
+++ b/test/MC/AsmParser/cfi-unfinished-frame.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -filetype=asm -triple x86_64-pc-linux-gnu %s -o %t 2>%t.out
+// RUN: FileCheck -input-file=%t.out %s
+
+.cfi_startproc
+// CHECK: Unfinished frame
diff --git a/test/MC/AsmParser/dg.exp b/test/MC/AsmParser/dg.exp
deleted file mode 100644
index a6d81da..0000000
--- a/test/MC/AsmParser/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
-}
diff --git a/test/MC/AsmParser/directive_file-errors.s b/test/MC/AsmParser/directive_file-errors.s
new file mode 100644
index 0000000..5ae2bbe
--- /dev/null
+++ b/test/MC/AsmParser/directive_file-errors.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -g -triple i386-unknown-unknown %s 2> %t.err | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
+// Test for Bug 11740
+
+        .file "hello"
+        .file 1 "world"
+
+// CHECK: .file "hello"
+// CHECK-ERRORS:6:9: error: input can't have .file dwarf directives when -g is used to generate dwarf debug info for assembly code
diff --git a/test/MC/AsmParser/directive_file.s b/test/MC/AsmParser/directive_file.s
index 3160d5c..121890e 100644
--- a/test/MC/AsmParser/directive_file.s
+++ b/test/MC/AsmParser/directive_file.s
@@ -2,7 +2,8 @@
 
         .file "hello"
         .file 1 "world"
+        .file 2 "directory" "file"
 
 # CHECK: .file "hello"
 # CHECK: .file 1 "world"
-
+# CHECK: .file 2 "directory" "file"
diff --git a/test/MC/AsmParser/directive_incbin.s b/test/MC/AsmParser/directive_incbin.s
new file mode 100644
index 0000000..55f9c79
--- /dev/null
+++ b/test/MC/AsmParser/directive_incbin.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s -I %p | FileCheck %s
+
+.data
+.incbin "incbin_abcd"
+
+# CHECK: .ascii	 "abcd\n"
diff --git a/test/MC/AsmParser/incbin_abcd b/test/MC/AsmParser/incbin_abcd
new file mode 100644
index 0000000..acbe86c
--- /dev/null
+++ b/test/MC/AsmParser/incbin_abcd
@@ -0,0 +1 @@
+abcd
diff --git a/test/MC/AsmParser/lit.local.cfg b/test/MC/AsmParser/lit.local.cfg
new file mode 100644
index 0000000..6c49f08
--- /dev/null
+++ b/test/MC/AsmParser/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/AsmParser/macro-args.s b/test/MC/AsmParser/macro-args.s
index 808b6eb..4b87899 100644
--- a/test/MC/AsmParser/macro-args.s
+++ b/test/MC/AsmParser/macro-args.s
@@ -8,3 +8,13 @@
 GET    is_sse, %eax
 
 // CHECK: movl	is_sse@GOTOFF(%ebx), %eax
+
+.macro bar
+    .long $n
+.endm
+
+bar 1, 2, 3
+bar
+
+// CHECK: .long 3
+// CHECK: .long 0
diff --git a/test/MC/AsmParser/pr11865.s b/test/MC/AsmParser/pr11865.s
new file mode 100644
index 0000000..1c03e11
--- /dev/null
+++ b/test/MC/AsmParser/pr11865.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s
+
+i:
+        .long    g
+g = h
+h = i
diff --git a/test/MC/AsmParser/variables-invalid.s b/test/MC/AsmParser/variables-invalid.s
index 9656889..c466d42 100644
--- a/test/MC/AsmParser/variables-invalid.s
+++ b/test/MC/AsmParser/variables-invalid.s
@@ -2,7 +2,7 @@
 // RUN: FileCheck --input-file %t %s
 
         .data
-// CHECK: invalid assignment to 't0_v0'
+// CHECK: Recursive use of 't0_v0'
         t0_v0 = t0_v0 + 1
 
         t1_v1 = 1
@@ -13,5 +13,16 @@ t2_s0:
         t2_s0 = 2
 
         t3_s0 = t2_s0 + 1
+        .long t3_s0
 // CHECK: invalid reassignment of non-absolute variable 't3_s0'
         t3_s0 = 1
+
+
+// CHECK: Recursive use of 't4_s2'
+        t4_s0 = t4_s1
+        t4_s1 = t4_s2
+        t4_s2 = t4_s0
+
+// CHECK: Recursive use of 't5_s1'
+        t5_s0 = t5_s1 + 1
+        t5_s1 = t5_s0
diff --git a/test/MC/COFF/dg.exp b/test/MC/COFF/dg.exp
deleted file mode 100644
index d46d700..0000000
--- a/test/MC/COFF/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,s}]]
-}
diff --git a/test/MC/COFF/global_ctors.ll b/test/MC/COFF/global_ctors.ll
new file mode 100644
index 0000000..4d6b1c7
--- /dev/null
+++ b/test/MC/COFF/global_ctors.ll
@@ -0,0 +1,28 @@
+; Test that global ctors are emitted into the proper COFF section for the
+; target. Mingw uses .ctors, whereas MSVC uses .CRT$XC*.
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32 
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN32 
+; RUN: llc < %s -mtriple i686-pc-mingw32 | FileCheck %s --check-prefix MINGW32 
+; RUN: llc < %s -mtriple x86_64-pc-mingw32 | FileCheck %s --check-prefix MINGW32 
+
+@.str = private unnamed_addr constant [13 x i8] c"constructing\00", align 1
+@.str2 = private unnamed_addr constant [5 x i8] c"main\00", align 1
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @a_global_ctor }]
+
+declare i32 @puts(i8*)
+
+define void @a_global_ctor() nounwind {
+  %1 = call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+define i32 @main() nounwind {
+  %1 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @.str2, i32 0, i32 0))
+  ret i32 0
+}
+
+; WIN32: .section .CRT$XCU,"r"
+; WIN32: a_global_ctor
+; MINGW32: .section .ctors,"w"
+; MINGW32: a_global_ctor
diff --git a/test/MC/COFF/lit.local.cfg b/test/MC/COFF/lit.local.cfg
new file mode 100644
index 0000000..41a8434
--- /dev/null
+++ b/test/MC/COFF/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.s', '.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/COFF/secrel32.s b/test/MC/COFF/secrel32.s
new file mode 100644
index 0000000..ce148db
--- /dev/null
+++ b/test/MC/COFF/secrel32.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+
+// check that we produce the correct relocation for .secrel32
+
+Lfoo:
+	.secrel32	Lfoo
+
+// CHECK:       Relocations              = [
+// CHECK-NEXT:    0 = {
+// CHECK-NEXT:       VirtualAddress           = 0x0
+// CHECK-NEXT:       SymbolTableIndex         = 0
+// CHECK-NEXT:       Type                     = IMAGE_REL_I386_SECREL (11)
+// CHECK-NEXT:       SymbolName               = .text
+// CHECK-NEXT:     }
diff --git a/test/MC/COFF/symbol-mangling.ll b/test/MC/COFF/symbol-mangling.ll
new file mode 100644
index 0000000..f1b4b4b
--- /dev/null
+++ b/test/MC/COFF/symbol-mangling.ll
@@ -0,0 +1,17 @@
+; The purpose of this test is to see if the MC layer properly handles symbol
+; names needing quoting on MS/Windows. This code is generated by clang when
+; using -cxx-abi microsoft.
+
+; RUN: llc -filetype=asm -mtriple i686-pc-win32 %s -o - | FileCheck %s
+
+; CHECK: ?sayhi@A@@QBEXXZ
+
+%struct.A = type {}
+
+define i32 @main() {
+entry:
+  tail call void @"\01?sayhi@A@@QBEXXZ"(%struct.A* null)
+  ret i32 0
+}
+
+declare void @"\01?sayhi@A@@QBEXXZ"(%struct.A*)
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
index 69a094d..ce1446b 100644
--- a/test/MC/Disassembler/ARM/arm-tests.txt
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -201,7 +201,7 @@
 0x20 0x51 0x17 0xe6
 
 # CHECK:	strdeq	r2, r3, [r0], -r8
-0xf8 0x24 0x00 0x00
+0xf8 0x20 0x00 0x00
 
 # CHECK:	ldrdeq	r2, r3, [r0], -r12
 0xdc 0x24 0x00 0x00
@@ -215,7 +215,7 @@
 # CHECK:	vldmdb	r2!, {s7, s8, s9, s10, s11}
 0x05 0x3a 0x72 0xed
 
-# CHECK:	vldr.32	s23, [r2, #660]
+# CHECK:	vldr	s23, [r2, #660]
 0xa5 0xba 0xd2 0xed
 
 # CHECK:	strtvc	r5, [r3], r0, lsr #20
@@ -317,3 +317,7 @@
 
 # CHECK: stc2l	p0, c0, [r2], #-96
 0x18 0x0 0x62 0xfc
+
+# CHECK: ldmgt	sp!, {r9}
+0x00 0x02 0xbd 0xc8
+
diff --git a/test/MC/Disassembler/ARM/dg.exp b/test/MC/Disassembler/ARM/dg.exp
deleted file mode 100644
index fc2f17a..0000000
--- a/test/MC/Disassembler/ARM/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
-}
-
diff --git a/test/MC/Disassembler/ARM/fp-encoding.txt b/test/MC/Disassembler/ARM/fp-encoding.txt
index f3e0261..9095b84 100644
--- a/test/MC/Disassembler/ARM/fp-encoding.txt
+++ b/test/MC/Disassembler/ARM/fp-encoding.txt
@@ -152,46 +152,46 @@
 # CHECK: vmov    r0, r1, d16
 
 0x00 0x1b 0xd0 0xed
-# CHECK: vldr.64	d17, [r0]
+# CHECK: vldr	d17, [r0]
 
 0x08 0x1b 0x92 0xed
 0x08 0x1b 0x12 0xed
-# CHECK: vldr.64	d1, [r2, #32]
-# CHECK: vldr.64	d1, [r2, #-32]
+# CHECK: vldr	d1, [r2, #32]
+# CHECK: vldr	d1, [r2, #-32]
 
 0x00 0x2b 0x93 0xed
-# CHECK: vldr.64 d2, [r3]
+# CHECK: vldr d2, [r3]
 
 0x00 0x3b 0x9f 0xed
-# CHECK: vldr.64 d3, [pc]
+# CHECK: vldr d3, [pc]
 
 0x00 0x6a 0xd0 0xed
-# CHECK: vldr.32	s13, [r0]
+# CHECK: vldr	s13, [r0]
 
 0x08 0x0a 0xd2 0xed
 0x08 0x0a 0x52 0xed
-# CHECK: vldr.32	s1, [r2, #32]
-# CHECK: vldr.32	s1, [r2, #-32]
+# CHECK: vldr	s1, [r2, #32]
+# CHECK: vldr	s1, [r2, #-32]
 
 0x00 0x1a 0x93 0xed
-# CHECK: vldr.32 s2, [r3]
+# CHECK: vldr s2, [r3]
 
 0x00 0x2a 0xdf 0xed
-# CHECK: vldr.32 s5, [pc]
+# CHECK: vldr s5, [pc]
 
 0x00 0x4b 0x81 0xed
 0x06 0x4b 0x81 0xed
 0x06 0x4b 0x01 0xed
-# CHECK: vstr.64 d4, [r1]
-# CHECK: vstr.64 d4, [r1, #24]
-# CHECK: vstr.64 d4, [r1, #-24]
+# CHECK: vstr d4, [r1]
+# CHECK: vstr d4, [r1, #24]
+# CHECK: vstr d4, [r1, #-24]
 
 0x00 0x2a 0x81 0xed
 0x06 0x2a 0x81 0xed
 0x06 0x2a 0x01 0xed
-# CHECK: vstr.32 s4, [r1]
-# CHECK: vstr.32 s4, [r1, #24]
-# CHECK: vstr.32 s4, [r1, #-24]
+# CHECK: vstr s4, [r1]
+# CHECK: vstr s4, [r1, #24]
+# CHECK: vstr s4, [r1, #-24]
 
 0x0c 0x2b 0x91 0xec
 0x06 0x1a 0x91 0xec
diff --git a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
index 6fdb55e..8146b5c 100644
--- a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
@@ -1,4 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {potentially undefined instruction encoding}
 
 # invalid (imod, M, iflags) combination
-0x93 0x1c 0x02 0xf1
+0x93 0x00 0x02 0xf1
diff --git a/test/MC/Disassembler/ARM/invalid-IT-CC15.txt b/test/MC/Disassembler/ARM/invalid-IT-CC15.txt
new file mode 100644
index 0000000..17e25ea
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-IT-CC15.txt
@@ -0,0 +1,18 @@
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown |& grep und
+# rdar://10841671
+
+0xe3 0xbf
+0xdf 0xed 0x61 0x3b
+0x71 0xee 0xe0 0x1b
+0x72 0xee 0xa3 0x2b
+0xdf 0xed 0x60 0x0b
+
+# This is test is dealing with a undefined condition code value of 15 in the
+# above sequence of junk bytes and not allowing the disassembler to abort on
+# printing the final instruction in this list.
+# 
+#	ittte	al
+#	vldr	d19, [pc, #388]
+#	vsub.f64	d17, d17, d16
+#	vadd.f64	d18, d18, d19
+#	vldr<und>	d16, [pc, #384]
diff --git a/test/MC/Disassembler/ARM/invalid-LDRD-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRD-arm.txt
deleted file mode 100644
index f8f23ed..0000000
--- a/test/MC/Disassembler/ARM/invalid-LDRD-arm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
-# -------------------------------------------------------------------------------------------------
-# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| X: X: X: 1| X: X: X: X| 1: 1: X: 1| X: X: X: X|
-# -------------------------------------------------------------------------------------------------
-# 
-# A8.6.68 LDRD (register)
-# if Rt{0} = 1 then UNDEFINED;
-0xd0 0x10 0x00 0x00
diff --git a/test/MC/Disassembler/ARM/invalid-LDRT-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRT-arm.txt
deleted file mode 100644
index 067dcb3..0000000
--- a/test/MC/Disassembler/ARM/invalid-LDRT-arm.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=0 Name=PHI Format=(42)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 0| 0: 0: 1: 1| 0: 1: 1: 1| 0: 1: 0: 1| 0: 0: 0: 1| 0: 0: 0: 1| 0: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# The bytes have Inst{4} = 1, so it's not an LDRT Encoding A2 instruction.
-0x10 0x51 0x37 0xe6
-
-
diff --git a/test/MC/Disassembler/ARM/invalid-LSL-regform.txt b/test/MC/Disassembler/ARM/invalid-LSL-regform.txt
deleted file mode 100644
index 6a1f11f..0000000
--- a/test/MC/Disassembler/ARM/invalid-LSL-regform.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 0: 0: 1| 1: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 0: 0: 1| 0: 0: 0: 1| 0: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.89 LSL (register)
-# if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
-0x12 0xf1 0xa0 0xe1
diff --git a/test/MC/Disassembler/ARM/invalid-RSC-arm.txt b/test/MC/Disassembler/ARM/invalid-RSC-arm.txt
deleted file mode 100644
index 096b909..0000000
--- a/test/MC/Disassembler/ARM/invalid-RSC-arm.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=261 Name=RSCrs Format=ARM_FORMAT_DPSOREGFRM(5)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 0: 0: 1: 1| 0: 0: 0: 0| 1: 1: 1: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 0: 0: 0| 0: 1: 0: 1| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-# if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
-0x5f 0xf8 0xe4 0x30
diff --git a/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt b/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt
deleted file mode 100644
index b236f8e..0000000
--- a/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=322 Name=SSAT Format=ARM_FORMAT_SATFRM(13)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 1: 0: 0| 0: 0: 0: 1| 1: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.183 SSAT
-# if d == 15 || n == 15 then UNPREDICTABLE;
-0x1a 0xf4 0xa0 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt b/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt
deleted file mode 100644
index d3998bd..0000000
--- a/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=355 Name=STRBrs Format=ARM_FORMAT_STFRM(7)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# if t == 15 then UNPREDICTABLE
-0x00 0xf0 0xcf 0xe7
diff --git a/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt b/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt
deleted file mode 100644
index fb3e711..0000000
--- a/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=426 Name=UQADD8 Format=ARM_FORMAT_DPFRM(4)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 0| 0: 1: 1: 0| 0: 1: 1: 0| 0: 1: 0: 1| 1: 1: 1: 1| 1: 0: 0: 1| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# DPFrm with bad reg specifier(s)
-#
-# if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
-0x9f 0x5f 0x66 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
new file mode 100644
index 0000000..8ff3a2b
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1839 Name=VST1d8Twb_register Format=ARM_FORMAT_NLdSt(30)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 1: 1: 0| 0: 0: 1: 0| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.391 VST1 (multiple single elements)
+# This encoding looks like: vst1.8 {d0,d1,d2}, [r0, :128]
+# But bits 5-4 for the alignment of 128 encoded as align = 0b10, is available only if <list>
+# contains two or four registers.  rdar://11220250
+0x00 0xf9 0x2f 0x06
diff --git a/test/MC/Disassembler/ARM/ldrd-armv4.txt b/test/MC/Disassembler/ARM/ldrd-armv4.txt
new file mode 100644
index 0000000..bb87ade
--- /dev/null
+++ b/test/MC/Disassembler/ARM/ldrd-armv4.txt
@@ -0,0 +1,15 @@
+# RUN: llvm-mc --disassemble %s -triple=armv4-linux-gnueabi |& FileCheck %s -check-prefix=V4
+# RUN: llvm-mc --disassemble %s -triple=armv5te-linux-gnueabi |& FileCheck %s -check-prefix=V5TE
+
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| X: X: X: 1| X: X: X: X| 1: 1: X: 1| X: X: X: X|
+# -------------------------------------------------------------------------------------------------
+# 
+# A8.6.68 LDRD (register)
+# if Rt{0} = 1 then UNDEFINED;
+
+# V4: invalid instruction encoding
+# V5TE: ldrd
+0xd0 0x10 0x00 0x01
+
diff --git a/test/MC/Disassembler/ARM/lit.local.cfg b/test/MC/Disassembler/ARM/lit.local.cfg
new file mode 100644
index 0000000..22a76e5
--- /dev/null
+++ b/test/MC/Disassembler/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/ARM/neon-tests.txt b/test/MC/Disassembler/ARM/neon-tests.txt
index 1e03deb..f44c2a0 100644
--- a/test/MC/Disassembler/ARM/neon-tests.txt
+++ b/test/MC/Disassembler/ARM/neon-tests.txt
@@ -30,7 +30,7 @@
 # CHECK:	vorr	d0, d15, d15
 0x1f 0x01 0x2f 0xf2
 
-# CHECK:	vmov.i64	q6, #0xFF00FF00FF
+# CHECK:	vmov.i64	q6, #0xff00ff00ff
 0x75 0xce 0x81 0xf2
 
 # CHECK:	vmvn.i32	d0, #0x0
@@ -69,10 +69,10 @@
 # CHECK:	vpop	{d8}
 0x02 0x8b 0xbd 0xec
 
-# CHECK:	vorr.i32	q15, #0x4F0000
+# CHECK:	vorr.i32	q15, #0x4f0000
 0x5f 0xe5 0xc4 0xf2
 
-# CHECK:	vbic.i32	q2, #0xA900
+# CHECK:	vbic.i32	q2, #0xa900
 0x79 0x43 0x82 0xf3
 
 # CHECK:	vst2.32	{d16, d18}, [r2, :64], r2
diff --git a/test/MC/Disassembler/ARM/neon.txt b/test/MC/Disassembler/ARM/neon.txt
index 5d2df93..58fe20e 100644
--- a/test/MC/Disassembler/ARM/neon.txt
+++ b/test/MC/Disassembler/ARM/neon.txt
@@ -307,9 +307,9 @@
 0xf2 0x01 0x50 0xf2
 # CHECK: vbic	q8, q8, q9
 0x3f 0x07 0xc7 0xf3
-# CHECK: vbic.i32	d16, #0xFF000000
+# CHECK: vbic.i32	d16, #0xff000000
 0x7f 0x07 0xc7 0xf3
-# CHECK: vbic.i32	q8, #0xFF000000
+# CHECK: vbic.i32	q8, #0xff000000
 
 0xb0 0x01 0x71 0xf2
 # CHECK: vorn	d16, d17, d16
@@ -587,11 +587,11 @@
 0x10 0x06 0xc2 0xf2
 # CHECK: vmov.i32	d16, #0x20000000
 0x10 0x0c 0xc2 0xf2
-# CHECK: vmov.i32	d16, #0x20FF
+# CHECK: vmov.i32	d16, #0x20ff
 0x10 0x0d 0xc2 0xf2
-# CHECK: vmov.i32	d16, #0x20FFFF
+# CHECK: vmov.i32	d16, #0x20ffff
 0x33 0x0e 0xc1 0xf3
-# CHECK: vmov.i64	d16, #0xFF0000FF0000FFFF
+# CHECK: vmov.i64	d16, #0xff0000ff0000ffff
 0x58 0x0e 0xc0 0xf2
 # CHECK: vmov.i8	q8, #0x8
 0x50 0x08 0xc1 0xf2
@@ -607,11 +607,11 @@
 0x50 0x06 0xc2 0xf2
 # CHECK: vmov.i32	q8, #0x20000000
 0x50 0x0c 0xc2 0xf2
-# CHECK: vmov.i32	q8, #0x20FF
+# CHECK: vmov.i32	q8, #0x20ff
 0x50 0x0d 0xc2 0xf2
-# CHECK: vmov.i32	q8, #0x20FFFF
+# CHECK: vmov.i32	q8, #0x20ffff
 0x73 0x0e 0xc1 0xf3
-# CHECK: vmov.i64	q8, #0xFF0000FF0000FFFF
+# CHECK: vmov.i64	q8, #0xff0000ff0000ffff
 0x30 0x08 0xc1 0xf2
 # CHECK: vmvn.i16	d16, #0x10
 0x30 0x0a 0xc1 0xf2
@@ -625,9 +625,9 @@
 0x30 0x06 0xc2 0xf2
 # CHECK: vmvn.i32	d16, #0x20000000
 0x30 0x0c 0xc2 0xf2
-# CHECK: vmvn.i32	d16, #0x20FF
+# CHECK: vmvn.i32	d16, #0x20ff
 0x30 0x0d 0xc2 0xf2
-# CHECK: vmvn.i32	d16, #0x20FFFF
+# CHECK: vmvn.i32	d16, #0x20ffff
 0x30 0x0a 0xc8 0xf2
 # CHECK: vmovl.s8	q8, d16
 0x30 0x0a 0xd0 0xf2
@@ -1856,3 +1856,390 @@
 
 0xe9 0x1a 0xb2 0x4e
 # CHECK: vcvttmi.f32.f16	s2, s19
+
+0x1d 0x76 0x66 0xf4
+# CHECK: vld1.8	{d23, d24, d25}, [r6, :64]!
+0x9d 0x62 0x6f 0xf4
+# CHECK: vld1.32	{d22, d23, d24, d25}, [pc, :64]!
+0x9d 0xaa 0x41 0xf4
+# CHECK: vst1.32	{d26, d27}, [r1, :64]!
+
+0x10 0x0f 0x83 0xf2
+0x50 0x0f 0x83 0xf2
+# CHECK: vmov.f32	d0, #1.600000e+01
+# CHECK: vmov.f32	q0, #1.600000e+01
+
+# rdar://10798451
+0xe7 0xf9 0x32 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16], r2
+0xe7 0xf9 0x3d 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]!
+0xe7 0xf9 0x3f 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]
+
+# rdar://11034702
+0x0d 0x87 0x04 0xf4
+# CHECK: vst1.8	{d8}, [r4]!            
+0x4d 0x87 0x04 0xf4
+# CHECK: vst1.16	{d8}, [r4]!             
+0x8d 0x87 0x04 0xf4
+# CHECK: vst1.32	{d8}, [r4]!             
+0xcd 0x87 0x04 0xf4
+# CHECK: vst1.64	{d8}, [r4]!             
+0x06 0x87 0x04 0xf4
+# CHECK: vst1.8	{d8}, [r4], r6          
+0x46 0x87 0x04 0xf4
+# CHECK: vst1.16	{d8}, [r4], r6          
+0x86 0x87 0x04 0xf4
+# CHECK: vst1.32	{d8}, [r4], r6          
+0xc6 0x87 0x04 0xf4
+# CHECK: vst1.64	{d8}, [r4], r6          
+
+0x0d 0x8a 0x04 0xf4
+# CHECK: vst1.8	{d8, d9}, [r4]!         
+0x4d 0x8a 0x04 0xf4
+# CHECK: vst1.16	{d8, d9}, [r4]!         
+0x8d 0x8a 0x04 0xf4
+# CHECK: vst1.32	{d8, d9}, [r4]!         
+0xcd 0x8a 0x04 0xf4
+# CHECK: vst1.64	{d8, d9}, [r4]!         
+0x06 0x8a 0x04 0xf4
+# CHECK: vst1.8	{d8, d9}, [r4], r6      
+0x46 0x8a 0x04 0xf4
+# CHECK: vst1.16	{d8, d9}, [r4], r6      
+0x86 0x8a 0x04 0xf4
+# CHECK: vst1.32	{d8, d9}, [r4], r6      
+0xc6 0x8a 0x04 0xf4
+# CHECK: vst1.64	{d8, d9}, [r4], r6      
+
+0x0d 0x86 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10}, [r4]!    
+0x4d 0x86 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10}, [r4]!    
+0x8d 0x86 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10}, [r4]!    
+0xcd 0x86 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10}, [r4]!    
+0x06 0x86 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10}, [r4], r6 
+0x46 0x86 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10}, [r4], r6 
+0x86 0x86 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10}, [r4], r6 
+0xc6 0x86 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10}, [r4], r6 
+
+0x0d 0x82 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4]! 
+0x4d 0x82 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4]! 
+0x8d 0x82 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4]! 
+0xcd 0x82 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4]! 
+0x06 0x82 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4], r6 
+0x46 0x82 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4], r6 
+0x86 0x82 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4], r6 
+0xc6 0x82 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4], r6 
+
+0x0d 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x4d 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x8d 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x06 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+0x46 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x86 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+
+0x0d 0x89 0x04 0xf4
+# CHECK: vst2.8	{d8, d10}, [r4]!        
+0x4d 0x89 0x04 0xf4
+# CHECK: vst2.16	{d8, d10}, [r4]!        
+0x8d 0x89 0x04 0xf4
+# CHECK: vst2.32	{d8, d10}, [r4]!        
+0x06 0x89 0x04 0xf4
+# CHECK: vst2.8	{d8, d10}, [r4], r6     
+0x46 0x89 0x04 0xf4
+# CHECK: vst2.16	{d8, d10}, [r4], r6     
+0x86 0x89 0x04 0xf4
+# CHECK: vst2.32	{d8, d10}, [r4], r6     
+
+0x0d 0x84 0x04 0xf4
+# CHECK: vst3.8	{d8, d9, d10}, [r4]!    
+0x4d 0x84 0x04 0xf4
+# CHECK: vst3.16	{d8, d9, d10}, [r4]!    
+0x8d 0x84 0x04 0xf4
+# CHECK: vst3.32	{d8, d9, d10}, [r4]!    
+0x06 0x85 0x04 0xf4
+# CHECK: vst3.8	{d8, d10, d12}, [r4], r6 
+0x46 0x85 0x04 0xf4
+# CHECK: vst3.16	{d8, d10, d12}, [r4], r6 
+0x86 0x85 0x04 0xf4
+# CHECK: vst3.32	{d8, d10, d12}, [r4], r6 
+
+0x0d 0x80 0x04 0xf4
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4]! 
+0x4d 0x80 0x04 0xf4
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4]! 
+0x8d 0x80 0x04 0xf4
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4]! 
+0x06 0x81 0x04 0xf4
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4], r6 
+0x46 0x81 0x04 0xf4
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4], r6 
+0x86 0x81 0x04 0xf4
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4], r6 
+
+0x4f 0x8a 0x04 0xf4
+# CHECK: vst1.16	{d8, d9}, [r4]          
+0x8f 0x8a 0x04 0xf4
+# CHECK: vst1.32	{d8, d9}, [r4]          
+0xcf 0x8a 0x04 0xf4
+# CHECK: vst1.64	{d8, d9}, [r4]          
+0x0f 0x8a 0x04 0xf4
+# CHECK: vst1.8	{d8, d9}, [r4]          
+
+0x4f 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4]          
+0x8f 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4]          
+0x0f 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4]          
+
+0x4d 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x46 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x8d 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x86 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+0x0d 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x06 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+
+0x4f 0x89 0x04 0xf4
+# CHECK: vst2.16	{d8, d10}, [r4]         
+0x8f 0x89 0x04 0xf4
+# CHECK: vst2.32	{d8, d10}, [r4]         
+0x0f 0x89 0x04 0xf4
+# CHECK: vst2.8	{d8, d10}, [r4]         
+
+0x0f 0x84 0x04 0xf4
+# CHECK: vst3.8	{d8, d9, d10}, [r4]     
+0x4f 0x84 0x04 0xf4
+# CHECK: vst3.16	{d8, d9, d10}, [r4]     
+0x8f 0x84 0x04 0xf4
+# CHECK: vst3.32	{d8, d9, d10}, [r4]     
+
+0x0f 0x80 0x04 0xf4
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4] 
+0x4f 0x80 0x04 0xf4
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4] 
+0x8f 0x80 0x04 0xf4
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4] 
+
+0x0f 0x85 0x04 0xf4
+# CHECK: vst3.8	{d8, d10, d12}, [r4]    
+0x4f 0x85 0x04 0xf4
+# CHECK: vst3.16	{d8, d10, d12}, [r4]    
+0x8f 0x85 0x04 0xf4
+# CHECK: vst3.32	{d8, d10, d12}, [r4]    
+
+0x0f 0x81 0x04 0xf4
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4] 
+0x4f 0x81 0x04 0xf4
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4] 
+0x8f 0x81 0x04 0xf4
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4] 
+
+# rdar://11204059
+0x0d 0x87 0x24 0xf4
+# CHECK: vld1.8	{d8}, [r4]!             
+0x4d 0x87 0x24 0xf4
+# CHECK: vld1.16	{d8}, [r4]!             
+0x8d 0x87 0x24 0xf4
+# CHECK: vld1.32	{d8}, [r4]!             
+0xcd 0x87 0x24 0xf4
+# CHECK: vld1.64	{d8}, [r4]!             
+0x06 0x87 0x24 0xf4
+# CHECK: vld1.8	{d8}, [r4], r6          
+0x46 0x87 0x24 0xf4
+# CHECK: vld1.16	{d8}, [r4], r6          
+0x86 0x87 0x24 0xf4
+# CHECK: vld1.32	{d8}, [r4], r6          
+0xc6 0x87 0x24 0xf4
+# CHECK: vld1.64	{d8}, [r4], r6          
+0x0d 0x8a 0x24 0xf4
+# CHECK: vld1.8	{d8, d9}, [r4]!         
+0x4d 0x8a 0x24 0xf4
+# CHECK: vld1.16	{d8, d9}, [r4]!         
+0x8d 0x8a 0x24 0xf4
+# CHECK: vld1.32	{d8, d9}, [r4]!         
+0xcd 0x8a 0x24 0xf4
+# CHECK: vld1.64	{d8, d9}, [r4]!         
+0x06 0x8a 0x24 0xf4
+# CHECK: vld1.8	{d8, d9}, [r4], r6      
+0x46 0x8a 0x24 0xf4
+# CHECK: vld1.16	{d8, d9}, [r4], r6      
+0x86 0x8a 0x24 0xf4
+# CHECK: vld1.32	{d8, d9}, [r4], r6      
+0xc6 0x8a 0x24 0xf4
+# CHECK: vld1.64	{d8, d9}, [r4], r6      
+0x0d 0x86 0x24 0xf4
+# CHECK: vld1.8	{d8, d9, d10}, [r4]!    
+0x4d 0x86 0x24 0xf4
+# CHECK: vld1.16	{d8, d9, d10}, [r4]!    
+0x8d 0x86 0x24 0xf4
+# CHECK: vld1.32	{d8, d9, d10}, [r4]!    
+0xcd 0x86 0x24 0xf4
+# CHECK: vld1.64	{d8, d9, d10}, [r4]!    
+0x06 0x86 0x24 0xf4
+# CHECK: vld1.8	{d8, d9, d10}, [r4], r6 
+0x46 0x86 0x24 0xf4
+# CHECK: vld1.16	{d8, d9, d10}, [r4], r6 
+0x86 0x86 0x24 0xf4
+# CHECK: vld1.32	{d8, d9, d10}, [r4], r6 
+0xc6 0x86 0x24 0xf4
+# CHECK: vld1.64	{d8, d9, d10}, [r4], r6 
+0x0d 0x82 0x24 0xf4
+# CHECK: vld1.8	{d8, d9, d10, d11}, [r4]! 
+0x4d 0x82 0x24 0xf4
+# CHECK: vld1.16	{d8, d9, d10, d11}, [r4]! 
+0x8d 0x82 0x24 0xf4
+# CHECK: vld1.32	{d8, d9, d10, d11}, [r4]! 
+0xcd 0x82 0x24 0xf4
+# CHECK: vld1.64	{d8, d9, d10, d11}, [r4]! 
+0x06 0x82 0x24 0xf4
+# CHECK: vld1.8	{d8, d9, d10, d11}, [r4], r6 
+0x46 0x82 0x24 0xf4
+# CHECK: vld1.16	{d8, d9, d10, d11}, [r4], r6 
+0x86 0x82 0x24 0xf4
+# CHECK: vld1.32	{d8, d9, d10, d11}, [r4], r6 
+0xc6 0x82 0x24 0xf4
+# CHECK: vld1.64	{d8, d9, d10, d11}, [r4], r6 
+0x0d 0x88 0x24 0xf4
+# CHECK: vld2.8	{d8, d9}, [r4]!         
+0x4d 0x88 0x24 0xf4
+# CHECK: vld2.16	{d8, d9}, [r4]!         
+0x8d 0x88 0x24 0xf4
+# CHECK: vld2.32	{d8, d9}, [r4]!         
+0x06 0x88 0x24 0xf4
+# CHECK: vld2.8	{d8, d9}, [r4], r6      
+0x46 0x88 0x24 0xf4
+# CHECK: vld2.16	{d8, d9}, [r4], r6      
+0x86 0x88 0x24 0xf4
+# CHECK: vld2.32	{d8, d9}, [r4], r6      
+0x0d 0x89 0x24 0xf4
+# CHECK: vld2.8	{d8, d10}, [r4]!        
+0x4d 0x89 0x24 0xf4
+# CHECK: vld2.16	{d8, d10}, [r4]!        
+0x8d 0x89 0x24 0xf4
+# CHECK: vld2.32	{d8, d10}, [r4]!        
+0x06 0x89 0x24 0xf4
+# CHECK: vld2.8	{d8, d10}, [r4], r6     
+0x46 0x89 0x24 0xf4
+# CHECK: vld2.16	{d8, d10}, [r4], r6     
+0x86 0x89 0x24 0xf4
+# CHECK: vld2.32	{d8, d10}, [r4], r6     
+0x0d 0x84 0x24 0xf4
+# CHECK: vld3.8	{d8, d9, d10}, [r4]!    
+0x4d 0x84 0x24 0xf4
+# CHECK: vld3.16	{d8, d9, d10}, [r4]!    
+0x8d 0x84 0x24 0xf4
+# CHECK: vld3.32	{d8, d9, d10}, [r4]!    
+0x06 0x85 0x24 0xf4
+# CHECK: vld3.8	{d8, d10, d12}, [r4], r6 
+0x46 0x85 0x24 0xf4
+# CHECK: vld3.16	{d8, d10, d12}, [r4], r6 
+0x86 0x85 0x24 0xf4
+# CHECK: vld3.32	{d8, d10, d12}, [r4], r6 
+0x0d 0x80 0x24 0xf4
+# CHECK: vld4.8	{d8, d9, d10, d11}, [r4]! 
+0x4d 0x80 0x24 0xf4
+# CHECK: vld4.16	{d8, d9, d10, d11}, [r4]! 
+0x8d 0x80 0x24 0xf4
+# CHECK: vld4.32	{d8, d9, d10, d11}, [r4]! 
+0x06 0x81 0x24 0xf4
+# CHECK: vld4.8	{d8, d10, d12, d14}, [r4], r6 
+0x46 0x81 0x24 0xf4
+# CHECK: vld4.16	{d8, d10, d12, d14}, [r4], r6 
+0x86 0x81 0x24 0xf4
+# CHECK: vld4.32	{d8, d10, d12, d14}, [r4], r6 
+0x4f 0x8a 0x24 0xf4
+# CHECK: vld1.16	{d8, d9}, [r4]          
+0x8f 0x8a 0x24 0xf4
+# CHECK: vld1.32	{d8, d9}, [r4]          
+0xcf 0x8a 0x24 0xf4
+# CHECK: vld1.64	{d8, d9}, [r4]          
+0x0f 0x8a 0x24 0xf4
+# CHECK: vld1.8	{d8, d9}, [r4]          
+0x4f 0x88 0x24 0xf4
+# CHECK: vld2.16	{d8, d9}, [r4]          
+0x8f 0x88 0x24 0xf4
+# CHECK: vld2.32	{d8, d9}, [r4]          
+0x0f 0x88 0x24 0xf4
+# CHECK: vld2.8	{d8, d9}, [r4]          
+0x4d 0x88 0x24 0xf4
+# CHECK: vld2.16	{d8, d9}, [r4]!         
+0x46 0x88 0x24 0xf4
+# CHECK: vld2.16	{d8, d9}, [r4], r6      
+0x8d 0x88 0x24 0xf4
+# CHECK: vld2.32	{d8, d9}, [r4]!         
+0x86 0x88 0x24 0xf4
+# CHECK: vld2.32	{d8, d9}, [r4], r6      
+0x0d 0x88 0x24 0xf4
+# CHECK: vld2.8	{d8, d9}, [r4]!         
+0x06 0x88 0x24 0xf4
+# CHECK: vld2.8	{d8, d9}, [r4], r6      
+0x4f 0x89 0x24 0xf4
+# CHECK: vld2.16	{d8, d10}, [r4]         
+0x8f 0x89 0x24 0xf4
+# CHECK: vld2.32	{d8, d10}, [r4]         
+0x0f 0x89 0x24 0xf4
+# CHECK: vld2.8	{d8, d10}, [r4]         
+0x4d 0x83 0x24 0xf4
+# CHECK: vld2.16	{d8, d9, d10, d11}, [r4]! 
+0x46 0x83 0x24 0xf4
+# CHECK: vld2.16	{d8, d9, d10, d11}, [r4], r6 
+0x8d 0x83 0x24 0xf4
+# CHECK: vld2.32	{d8, d9, d10, d11}, [r4]! 
+0x86 0x83 0x24 0xf4
+# CHECK: vld2.32	{d8, d9, d10, d11}, [r4], r6 
+0x0d 0x83 0x24 0xf4
+# CHECK: vld2.8	{d8, d9, d10, d11}, [r4]! 
+0x06 0x83 0x24 0xf4
+# CHECK: vld2.8	{d8, d9, d10, d11}, [r4], r6 
+0x0f 0x84 0x24 0xf4
+# CHECK: vld3.8	{d8, d9, d10}, [r4]     
+0x4f 0x84 0x24 0xf4
+# CHECK: vld3.16	{d8, d9, d10}, [r4]     
+0x8f 0x84 0x24 0xf4
+# CHECK: vld3.32	{d8, d9, d10}, [r4]     
+0x0f 0x80 0x24 0xf4
+# CHECK: vld4.8	{d8, d9, d10, d11}, [r4] 
+0x4f 0x80 0x24 0xf4
+# CHECK: vld4.16	{d8, d9, d10, d11}, [r4] 
+0x8f 0x80 0x24 0xf4
+# CHECK: vld4.32	{d8, d9, d10, d11}, [r4] 
+0x0f 0x85 0x24 0xf4
+# CHECK: vld3.8	{d8, d10, d12}, [r4]    
+0x4f 0x85 0x24 0xf4
+# CHECK: vld3.16	{d8, d10, d12}, [r4]    
+0x8f 0x85 0x24 0xf4
+# CHECK: vld3.32	{d8, d10, d12}, [r4]    
+0x0f 0x81 0x24 0xf4
+# CHECK: vld4.8	{d8, d10, d12, d14}, [r4] 
+0x4f 0x81 0x24 0xf4
+# CHECK: vld4.16	{d8, d10, d12, d14}, [r4] 
+0x8f 0x81 0x24 0xf4
+# CHECK: vld4.32	{d8, d10, d12, d14}, [r4] 
diff --git a/test/MC/Disassembler/ARM/neont2.txt b/test/MC/Disassembler/ARM/neont2.txt
index 577703c..efe7e60 100644
--- a/test/MC/Disassembler/ARM/neont2.txt
+++ b/test/MC/Disassembler/ARM/neont2.txt
@@ -301,9 +301,9 @@
 0x50 0xef 0xf2 0x01
 # CHECK: vbic	q8, q8, q9
 0xc7 0xff 0x3f 0x07
-# CHECK: vbic.i32	d16, #0xFF000000
+# CHECK: vbic.i32	d16, #0xff000000
 0xc7 0xff 0x7f 0x07
-# CHECK: vbic.i32	q8, #0xFF000000
+# CHECK: vbic.i32	q8, #0xff000000
 
 0x71 0xef 0xb0 0x01
 # CHECK: vorn	d16, d17, d16
@@ -486,11 +486,11 @@
 0xc2 0xef 0x10 0x06
 # CHECK: vmov.i32	d16, #0x20000000
 0xc2 0xef 0x10 0x0c
-# CHECK: vmov.i32	d16, #0x20FF
+# CHECK: vmov.i32	d16, #0x20ff
 0xc2 0xef 0x10 0x0d
-# CHECK: vmov.i32	d16, #0x20FFFF
+# CHECK: vmov.i32	d16, #0x20ffff
 0xc1 0xff 0x33 0x0e
-# CHECK: vmov.i64	d16, #0xFF0000FF0000FFFF
+# CHECK: vmov.i64	d16, #0xff0000ff0000ffff
 0xc0 0xef 0x58 0x0e
 # CHECK: vmov.i8	q8, #0x8
 0xc1 0xef 0x50 0x08
@@ -506,11 +506,11 @@
 0xc2 0xef 0x50 0x06
 # CHECK: vmov.i32	q8, #0x20000000
 0xc2 0xef 0x50 0x0c
-# CHECK: vmov.i32	q8, #0x20FF
+# CHECK: vmov.i32	q8, #0x20ff
 0xc2 0xef 0x50 0x0d
-# CHECK: vmov.i32	q8, #0x20FFFF
+# CHECK: vmov.i32	q8, #0x20ffff
 0xc1 0xff 0x73 0x0e
-# CHECK: vmov.i64	q8, #0xFF0000FF0000FFFF
+# CHECK: vmov.i64	q8, #0xff0000ff0000ffff
 0xc1 0xef 0x30 0x08
 # CHECK: vmvn.i16	d16, #0x10
 0xc1 0xef 0x30 0x0a
@@ -524,9 +524,9 @@
 0xc2 0xef 0x30 0x06
 # CHECK: vmvn.i32	d16, #0x20000000
 0xc2 0xef 0x30 0x0c
-# CHECK: vmvn.i32	d16, #0x20FF
+# CHECK: vmvn.i32	d16, #0x20ff
 0xc2 0xef 0x30 0x0d
-# CHECK: vmvn.i32	d16, #0x20FFFF
+# CHECK: vmvn.i32	d16, #0x20ffff
 0xc8 0xef 0x30 0x0a
 # CHECK: vmovl.s8	q8, d16
 0xd0 0xef 0x30 0x0a
@@ -1584,3 +1584,379 @@
 # CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
 0xc0 0xf9 0x4f 0x1b
 # CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
+
+0x63 0xf9 0x37 0xc9
+# CHECK: vld2.8	{d28, d30}, [r3, :256], r7
+
+# rdar://10798451
+0xe7 0xf9 0x32 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16], r2
+0xe7 0xf9 0x3d 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]!
+0xe7 0xf9 0x3f 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]
+
+# rdar://11034702
+0x04 0xf9 0x0d 0x87
+# CHECK: vst1.8	{d8}, [r4]!             
+0x04 0xf9 0x4d 0x87
+# CHECK: vst1.16	{d8}, [r4]!             
+0x04 0xf9 0x8d 0x87
+# CHECK: vst1.32	{d8}, [r4]!             
+0x04 0xf9 0xcd 0x87
+# CHECK: vst1.64	{d8}, [r4]!             
+0x04 0xf9 0x06 0x87
+# CHECK: vst1.8	{d8}, [r4], r6          
+0x04 0xf9 0x46 0x87
+# CHECK: vst1.16	{d8}, [r4], r6          
+0x04 0xf9 0x86 0x87
+# CHECK: vst1.32	{d8}, [r4], r6          
+0x04 0xf9 0xc6 0x87
+# CHECK: vst1.64	{d8}, [r4], r6          
+
+0x04 0xf9 0x0d 0x8a
+# CHECK: vst1.8	{d8, d9}, [r4]!         
+0x04 0xf9 0x4d 0x8a
+# CHECK: vst1.16	{d8, d9}, [r4]!         
+0x04 0xf9 0x8d 0x8a
+# CHECK: vst1.32	{d8, d9}, [r4]!         
+0x04 0xf9 0xcd 0x8a
+# CHECK: vst1.64	{d8, d9}, [r4]!         
+0x04 0xf9 0x06 0x8a
+# CHECK: vst1.8	{d8, d9}, [r4], r6      
+0x04 0xf9 0x46 0x8a
+# CHECK: vst1.16	{d8, d9}, [r4], r6      
+0x04 0xf9 0x86 0x8a
+# CHECK: vst1.32	{d8, d9}, [r4], r6      
+0x04 0xf9 0xc6 0x8a
+# CHECK: vst1.64	{d8, d9}, [r4], r6      
+
+0x04 0xf9 0x0d 0x86
+# CHECK: vst1.8	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x4d 0x86
+# CHECK: vst1.16	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x8d 0x86
+# CHECK: vst1.32	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0xcd 0x86
+# CHECK: vst1.64	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x06 0x86
+# CHECK: vst1.8	{d8, d9, d10}, [r4], r6 
+0x04 0xf9 0x46 0x86
+# CHECK: vst1.16	{d8, d9, d10}, [r4], r6 
+0x04 0xf9 0x86 0x86
+# CHECK: vst1.32	{d8, d9, d10}, [r4], r6 
+0x04 0xf9 0xc6 0x86
+# CHECK: vst1.64	{d8, d9, d10}, [r4], r6 
+
+0x04 0xf9 0x0d 0x82
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0x4d 0x82
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0x8d 0x82
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0xcd 0x82
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0x06 0x82
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4], r6 
+0x04 0xf9 0x46 0x82
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4], r6 
+0x04 0xf9 0x86 0x82
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4], r6 
+0x04 0xf9 0xc6 0x82
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4], r6 
+
+0x04 0xf9 0x0d 0x88
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x04 0xf9 0x4d 0x88
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x04 0xf9 0x8d 0x88
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x04 0xf9 0x06 0x88
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+0x04 0xf9 0x46 0x88
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x04 0xf9 0x86 0x88
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+
+0x04 0xf9 0x0d 0x89
+# CHECK: vst2.8	{d8, d10}, [r4]!        
+0x04 0xf9 0x4d 0x89
+# CHECK: vst2.16	{d8, d10}, [r4]!        
+0x04 0xf9 0x8d 0x89
+# CHECK: vst2.32	{d8, d10}, [r4]!        
+0x04 0xf9 0x06 0x89
+# CHECK: vst2.8	{d8, d10}, [r4], r6     
+0x04 0xf9 0x46 0x89
+# CHECK: vst2.16	{d8, d10}, [r4], r6     
+0x04 0xf9 0x86 0x89
+# CHECK: vst2.32	{d8, d10}, [r4], r6     
+
+0x04 0xf9 0x0d 0x84
+# CHECK: vst3.8	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x4d 0x84
+# CHECK: vst3.16	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x8d 0x84
+# CHECK: vst3.32	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x06 0x85
+# CHECK: vst3.8	{d8, d10, d12}, [r4], r6 
+0x04 0xf9 0x46 0x85
+# CHECK: vst3.16	{d8, d10, d12}, [r4], r6 
+0x04 0xf9 0x86 0x85
+# CHECK: vst3.32	{d8, d10, d12}, [r4], r6 
+
+0x04 0xf9 0x0d 0x80
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4]!
+0x04 0xf9 0x4d 0x80
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4]!
+0x04 0xf9 0x8d 0x80
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4]!
+0x04 0xf9 0x06 0x81
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4], r6
+0x04 0xf9 0x46 0x81
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4], r6
+0x04 0xf9 0x86 0x81
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4], r6
+
+0x04 0xf9 0x4f 0x8a
+# CHECK: vst1.16	{d8, d9}, [r4]          
+0x04 0xf9 0x8f 0x8a
+# CHECK: vst1.32	{d8, d9}, [r4]          
+0x04 0xf9 0xcf 0x8a
+# CHECK: vst1.64	{d8, d9}, [r4]          
+0x04 0xf9 0x0f 0x8a
+# CHECK: vst1.8	{d8, d9}, [r4]          
+0x04 0xf9 0x4f 0x88
+# CHECK: vst2.16	{d8, d9}, [r4]          
+0x04 0xf9 0x8f 0x88
+# CHECK: vst2.32	{d8, d9}, [r4]          
+0x04 0xf9 0x0f 0x88
+# CHECK: vst2.8	{d8, d9}, [r4]          
+0x04 0xf9 0x4d 0x88
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x04 0xf9 0x46 0x88
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x04 0xf9 0x8d 0x88
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x04 0xf9 0x86 0x88
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+0x04 0xf9 0x0d 0x88
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x04 0xf9 0x06 0x88
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+
+0x04 0xf9 0x4f 0x89
+# CHECK: vst2.16	{d8, d10}, [r4]        
+0x04 0xf9 0x8f 0x89
+# CHECK: vst2.32	{d8, d10}, [r4]        
+0x04 0xf9 0x0f 0x89
+# CHECK: vst2.8	{d8, d10}, [r4]        
+
+0x04 0xf9 0x0f 0x84
+# CHECK: vst3.8	{d8, d9, d10}, [r4]    
+0x04 0xf9 0x4f 0x84
+# CHECK: vst3.16	{d8, d9, d10}, [r4]    
+0x04 0xf9 0x8f 0x84
+# CHECK: vst3.32	{d8, d9, d10}, [r4]    
+
+0x04 0xf9 0x0f 0x80
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4]
+0x04 0xf9 0x4f 0x80
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4]
+0x04 0xf9 0x8f 0x80
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4]
+
+0x04 0xf9 0x0f 0x85
+# CHECK: vst3.8	{d8, d10, d12}, [r4]   
+0x04 0xf9 0x4f 0x85
+# CHECK: vst3.16	{d8, d10, d12}, [r4]   
+0x04 0xf9 0x8f 0x85
+# CHECK: vst3.32	{d8, d10, d12}, [r4]   
+
+0x04 0xf9 0x0f 0x81
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4]
+0x04 0xf9 0x4f 0x81
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4]
+0x04 0xf9 0x8f 0x81
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4]
+
+# rdar://11204059
+0x24 0xf9 0x0d 0x87
+# CHECK: vld1.8	{d8}, [r4]!             
+0x24 0xf9 0x4d 0x87
+# CHECK: vld1.16	{d8}, [r4]!             
+0x24 0xf9 0x8d 0x87
+# CHECK: vld1.32	{d8}, [r4]!             
+0x24 0xf9 0xcd 0x87
+# CHECK: vld1.64	{d8}, [r4]!             
+0x24 0xf9 0x06 0x87
+# CHECK: vld1.8	{d8}, [r4], r6          
+0x24 0xf9 0x46 0x87
+# CHECK: vld1.16	{d8}, [r4], r6          
+0x24 0xf9 0x86 0x87
+# CHECK: vld1.32	{d8}, [r4], r6          
+0x24 0xf9 0xc6 0x87
+# CHECK: vld1.64	{d8}, [r4], r6          
+0x24 0xf9 0x0d 0x8a
+# CHECK: vld1.8	{d8, d9}, [r4]!         
+0x24 0xf9 0x4d 0x8a
+# CHECK: vld1.16	{d8, d9}, [r4]!         
+0x24 0xf9 0x8d 0x8a
+# CHECK: vld1.32	{d8, d9}, [r4]!         
+0x24 0xf9 0xcd 0x8a
+# CHECK: vld1.64	{d8, d9}, [r4]!         
+0x24 0xf9 0x06 0x8a
+# CHECK: vld1.8	{d8, d9}, [r4], r6      
+0x24 0xf9 0x46 0x8a
+# CHECK: vld1.16	{d8, d9}, [r4], r6      
+0x24 0xf9 0x86 0x8a
+# CHECK: vld1.32	{d8, d9}, [r4], r6      
+0x24 0xf9 0xc6 0x8a
+# CHECK: vld1.64	{d8, d9}, [r4], r6      
+0x24 0xf9 0x0d 0x86
+# CHECK: vld1.8	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x4d 0x86
+# CHECK: vld1.16	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x8d 0x86
+# CHECK: vld1.32	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0xcd 0x86
+# CHECK: vld1.64	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x06 0x86
+# CHECK: vld1.8	{d8, d9, d10}, [r4], r6 
+0x24 0xf9 0x46 0x86
+# CHECK: vld1.16	{d8, d9, d10}, [r4], r6 
+0x24 0xf9 0x86 0x86
+# CHECK: vld1.32	{d8, d9, d10}, [r4], r6 
+0x24 0xf9 0xc6 0x86
+# CHECK: vld1.64	{d8, d9, d10}, [r4], r6 
+0x24 0xf9 0x0d 0x82
+# CHECK: vld1.8	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x4d 0x82
+# CHECK: vld1.16	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x8d 0x82
+# CHECK: vld1.32	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0xcd 0x82
+# CHECK: vld1.64	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x06 0x82
+# CHECK: vld1.8	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x46 0x82
+# CHECK: vld1.16	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x86 0x82
+# CHECK: vld1.32	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0xc6 0x82
+# CHECK: vld1.64	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x0d 0x88
+# CHECK: vld2.8	{d8, d9}, [r4]!         
+0x24 0xf9 0x4d 0x88
+# CHECK: vld2.16	{d8, d9}, [r4]!         
+0x24 0xf9 0x8d 0x88
+# CHECK: vld2.32	{d8, d9}, [r4]!         
+0x24 0xf9 0x06 0x88
+# CHECK: vld2.8	{d8, d9}, [r4], r6      
+0x24 0xf9 0x46 0x88
+# CHECK: vld2.16	{d8, d9}, [r4], r6      
+0x24 0xf9 0x86 0x88
+# CHECK: vld2.32	{d8, d9}, [r4], r6      
+0x24 0xf9 0x0d 0x89
+# CHECK: vld2.8	{d8, d10}, [r4]!        
+0x24 0xf9 0x4d 0x89
+# CHECK: vld2.16	{d8, d10}, [r4]!        
+0x24 0xf9 0x8d 0x89
+# CHECK: vld2.32	{d8, d10}, [r4]!        
+0x24 0xf9 0x06 0x89
+# CHECK: vld2.8	{d8, d10}, [r4], r6     
+0x24 0xf9 0x46 0x89
+# CHECK: vld2.16	{d8, d10}, [r4], r6     
+0x24 0xf9 0x86 0x89
+# CHECK: vld2.32	{d8, d10}, [r4], r6     
+0x24 0xf9 0x0d 0x84
+# CHECK: vld3.8	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x4d 0x84
+# CHECK: vld3.16	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x8d 0x84
+# CHECK: vld3.32	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x06 0x85
+# CHECK: vld3.8	{d8, d10, d12}, [r4], r6 
+0x24 0xf9 0x46 0x85
+# CHECK: vld3.16	{d8, d10, d12}, [r4], r6 
+0x24 0xf9 0x86 0x85
+# CHECK: vld3.32	{d8, d10, d12}, [r4], r6 
+0x24 0xf9 0x0d 0x80
+# CHECK: vld4.8	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x4d 0x80
+# CHECK: vld4.16	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x8d 0x80
+# CHECK: vld4.32	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x06 0x81
+# CHECK: vld4.8	{d8, d10, d12, d14}, [r4], r6 
+0x24 0xf9 0x46 0x81
+# CHECK: vld4.16	{d8, d10, d12, d14}, [r4], r6 
+0x24 0xf9 0x86 0x81
+# CHECK: vld4.32	{d8, d10, d12, d14}, [r4], r6 
+0x24 0xf9 0x4f 0x8a
+# CHECK: vld1.16	{d8, d9}, [r4]          
+0x24 0xf9 0x8f 0x8a
+# CHECK: vld1.32	{d8, d9}, [r4]          
+0x24 0xf9 0xcf 0x8a
+# CHECK: vld1.64	{d8, d9}, [r4]          
+0x24 0xf9 0x0f 0x8a
+# CHECK: vld1.8	{d8, d9}, [r4]          
+0x24 0xf9 0x4f 0x88
+# CHECK: vld2.16	{d8, d9}, [r4]          
+0x24 0xf9 0x8f 0x88
+# CHECK: vld2.32	{d8, d9}, [r4]          
+0x24 0xf9 0x0f 0x88
+# CHECK: vld2.8	{d8, d9}, [r4]          
+0x24 0xf9 0x4d 0x88
+# CHECK: vld2.16	{d8, d9}, [r4]!         
+0x24 0xf9 0x46 0x88
+# CHECK: vld2.16	{d8, d9}, [r4], r6      
+0x24 0xf9 0x8d 0x88
+# CHECK: vld2.32	{d8, d9}, [r4]!         
+0x24 0xf9 0x86 0x88
+# CHECK: vld2.32	{d8, d9}, [r4], r6      
+0x24 0xf9 0x0d 0x88
+# CHECK: vld2.8	{d8, d9}, [r4]!         
+0x24 0xf9 0x06 0x88
+# CHECK: vld2.8	{d8, d9}, [r4], r6      
+0x24 0xf9 0x4f 0x89
+# CHECK: vld2.16	{d8, d10}, [r4]         
+0x24 0xf9 0x8f 0x89
+# CHECK: vld2.32	{d8, d10}, [r4]         
+0x24 0xf9 0x0f 0x89
+# CHECK: vld2.8	{d8, d10}, [r4]         
+0x24 0xf9 0x4d 0x83
+# CHECK: vld2.16	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x46 0x83
+# CHECK: vld2.16	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x8d 0x83
+# CHECK: vld2.32	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x86 0x83
+# CHECK: vld2.32	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x0d 0x83
+# CHECK: vld2.8	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x06 0x83
+# CHECK: vld2.8	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x0f 0x84
+# CHECK: vld3.8	{d8, d9, d10}, [r4]     
+0x24 0xf9 0x4f 0x84
+# CHECK: vld3.16	{d8, d9, d10}, [r4]     
+0x24 0xf9 0x8f 0x84
+# CHECK: vld3.32	{d8, d9, d10}, [r4]     
+0x24 0xf9 0x0f 0x80
+# CHECK: vld4.8	{d8, d9, d10, d11}, [r4] 
+0x24 0xf9 0x4f 0x80
+# CHECK: vld4.16	{d8, d9, d10, d11}, [r4] 
+0x24 0xf9 0x8f 0x80
+# CHECK: vld4.32	{d8, d9, d10, d11}, [r4] 
+0x24 0xf9 0x0f 0x85
+# CHECK: vld3.8	{d8, d10, d12}, [r4]    
+0x24 0xf9 0x4f 0x85
+# CHECK: vld3.16	{d8, d10, d12}, [r4]    
+0x24 0xf9 0x8f 0x85
+# CHECK: vld3.32	{d8, d10, d12}, [r4]    
+0x24 0xf9 0x0f 0x81
+# CHECK: vld4.8	{d8, d10, d12, d14}, [r4] 
+0x24 0xf9 0x4f 0x81
+# CHECK: vld4.16	{d8, d10, d12, d14}, [r4] 
+0x24 0xf9 0x8f 0x81
+# CHECK: vld4.32	{d8, d10, d12, d14}, [r4] 
diff --git a/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt b/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt
new file mode 100644
index 0000000..275bae2f
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+
+# CHECK: potentially undefined
+# CHECK: 0x1f 0x12 0xb0 0x00
+0x1f 0x12 0xb0 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x13 0xf2 0xb0 0x00
+0x13 0xf2 0xb0 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x13 0x1f 0xb0 0x00
+0x13 0x1f 0xb0 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x13 0x12 0xbf 0x00
+0x13 0x12 0xbf 0x00
diff --git a/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt b/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt
new file mode 100644
index 0000000..635b66e
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+
+# CHECK: potentially undefined
+# CHECK: 0xd1 0xf1 0x5f 0x01
+0xd1 0xf1 0x5f 0x01
+# CHECK: potentially undefined
+# CHECK: 0xf1 0xf1 0x5f 0x01
+0xf1 0xf1 0x5f 0x01
+# CHECK: potentially undefined
+# CHECK: 0xf1 0xf1 0x5f 0x01
+0xf1 0xf1 0x5f 0x01
+# CHECK: potentially undefined
+# CHECK: 0xd1 0xe1 0x4f 0x01
+0xd1 0xe1 0x4f 0x01
+
+
diff --git a/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt b/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt
new file mode 100644
index 0000000..ed5e350
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt
@@ -0,0 +1,22 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+
+# CHECK: potentially undefined
+# CHECK: 0xff 0x00 0xb9 0x00
+0xff 0x00 0xb9 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0xfb 0xf0 0xb9 0x00
+0xfb 0xf0 0xb9 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0xfb 0x01 0xb9 0x00
+0xfb 0x01 0xb9 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0xfb 0x00 0xbf 0x00
+0xfb 0x00 0xbf 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0xfb 0x90 0xb9 0x00
+0xfb 0x90 0xb9 0x00
+
diff --git a/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt b/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt
new file mode 100644
index 0000000..a8f54f7
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| X: X: X: 1| X: X: X: X| 1: 1: X: 1| X: X: X: X|
+# -------------------------------------------------------------------------------------------------
+# 
+# A8.6.68 LDRD (register)
+# if Rt{0} = 1 then UNDEFINED;
+
+# CHECK: potentially undefined
+# CHECK: 0xd0 0x10 0x00 0x00
+0xd0 0x10 0x00 0x00
diff --git a/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt b/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt
new file mode 100644
index 0000000..f7d6bc6
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 0: 0: 1| 1: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 0: 0: 1| 0: 0: 0: 1| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.89 LSL (register)
+# if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+
+# CHECK: warning: potentially undefined instruction encoding
+0x12 0xf1 0xa0 0xe1
diff --git a/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt b/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt
new file mode 100644
index 0000000..3db86cc
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+
+# CHECK: potentially undefined
+# CHECK: 0x93 0x12 0x01 0x00 
+0x93 0x12 0x01 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x92 0x0f 0x01 0x00
+0x92 0x0f 0x01 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x9f 0x02 0x01 0x00
+0x9f 0x02 0x01 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x92 0x01 0x0f 0x00
+0x92 0x01 0x0f 0x00
diff --git a/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt b/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt
new file mode 100644
index 0000000..5b13610
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# Opcode=261 Name=RSCrs Format=ARM_FORMAT_DPSOREGFRM(5)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 1: 1| 0: 0: 0: 0| 1: 1: 1: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 0: 0: 0| 0: 1: 0: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
+
+# CHECK: warning: potentially undefined instruction encoding
+0x5f 0xf8 0xe4 0x30
diff --git a/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt
new file mode 100644
index 0000000..8ec49ca
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# CHECK: warning: potentially undefined
+# CHECK: shadd16	r5, r7, r0
+0x10 0x51 0x37 0xe6
+
+
diff --git a/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt
new file mode 100644
index 0000000..874378e
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# Opcode=322 Name=SSAT Format=ARM_FORMAT_SATFRM(13)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 1: 0: 0| 0: 0: 0: 1| 1: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.183 SSAT
+# if d == 15 || n == 15 then UNPREDICTABLE;
+
+# CHECK:warning: potentially undefined instruction encoding
+0x1a 0xf4 0xa0 0xe6
diff --git a/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt b/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt
new file mode 100644
index 0000000..fef6125
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# Opcode=355 Name=STRBrs Format=ARM_FORMAT_STFRM(7)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# if t == 15 then UNPREDICTABLE
+
+# CHECK: warning: potentially undefined instruction encoding
+0x00 0xf0 0xcf 0xe7
diff --git a/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt b/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt
new file mode 100644
index 0000000..4c4c9ab
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# Opcode=426 Name=UQADD8 Format=ARM_FORMAT_DPFRM(4)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 0: 1: 1: 0| 0: 1: 1: 0| 0: 1: 0: 1| 1: 1: 1: 1| 1: 0: 0: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# DPFrm with bad reg specifier(s)
+#
+# if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+0x9f 0x5f 0x66 0xe6
+
+# CHECK: warning: potentially undefined
+# CHECK: uqadd8	r5, r6, pc
+ 
diff --git a/test/MC/Disassembler/ARM/unpredictables-thumb.txt b/test/MC/Disassembler/ARM/unpredictables-thumb.txt
new file mode 100644
index 0000000..e7645f0
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictables-thumb.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc --disassemble %s -triple=thumbv7 |& FileCheck %s
+
+0x01 0x47
+# CHECK: 3:1: warning: potentially undefined
+# CHECK: bx r0
diff --git a/test/MC/Disassembler/ARM/vfp4.txt b/test/MC/Disassembler/ARM/vfp4.txt
new file mode 100644
index 0000000..4f2c732
--- /dev/null
+++ b/test/MC/Disassembler/ARM/vfp4.txt
@@ -0,0 +1,37 @@
+# RUN: llvm-mc < %s -triple thumbv7-unknown-unknown --disassemble -mattr=+neon,+vfp4 | FileCheck %s
+
+# CHECK: vfma.f64 d16, d18, d17
+0xe2 0xee 0xa1 0x0b
+
+# CHECK: vfma.f32 s2, s4, s0
+0xa2 0xee 0x00 0x1a
+
+# CHECK: vfma.f32 d16, d18, d17
+0x42 0xef 0xb1 0x0c
+
+# CHECK: vfma.f32 q2, q4, q0
+0x08 0xef 0x50 0x4c
+
+# CHECK: vfnms.f64 d16, d18, d17
+0xd2 0xee 0xa1 0x0b
+
+# CHECK: vfnms.f32 s2, s4, s0
+0x92 0xee 0x00 0x1a
+
+# CHECK: vfms.f64 d16, d18, d17
+0xe2 0xee 0xe1 0x0b
+
+# CHECK: vfms.f32 s2, s4, s0
+0xa2 0xee 0x40 0x1a
+
+# CHECK: vfms.f32 d16, d18, d17
+0x62 0xef 0xb1 0x0c
+
+# CHECK: vfms.f32 q2, q4, q0
+0x28 0xef 0x50 0x4c
+
+# CHECK: vfnma.f64 d16, d18, d17
+0xd2 0xee 0xe1 0x0b
+
+# CHECK: vfnma.f32 s2, s4, s0
+0x92 0xee 0x40 0x1a
diff --git a/test/MC/Disassembler/MBlaze/dg.exp b/test/MC/Disassembler/MBlaze/dg.exp
deleted file mode 100644
index 0be99a3..0000000
--- a/test/MC/Disassembler/MBlaze/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
-}
-
diff --git a/test/MC/Disassembler/MBlaze/lit.local.cfg b/test/MC/Disassembler/MBlaze/lit.local.cfg
new file mode 100644
index 0000000..3955b4e
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/MBlaze/mblaze_mbar.txt b/test/MC/Disassembler/MBlaze/mblaze_mbar.txt
new file mode 100644
index 0000000..6beba86
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_mbar.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Memory Barrier instructions
+################################################################################
+
+# CHECK:    mbar        0
+0xB8 0x02 0x00 0x04
+
+# CHECK:    mbar        1
+0xB8 0x22 0x00 0x04
+
+# CHECK:    mbar        2
+0xB8 0x42 0x00 0x04
diff --git a/test/MC/Disassembler/MBlaze/mblaze_pattern.txt b/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
index 1268378..cb19ee0 100644
--- a/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
+++ b/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
@@ -12,3 +12,6 @@
 
 # CHECK:    pcmpeq      r0, r1, r2
 0x88 0x01 0x14 0x00
+
+# CHECK:    clz         r0, r1
+0x90 0x01 0x00 0xE0
diff --git a/test/MC/Disassembler/X86/dg.exp b/test/MC/Disassembler/X86/dg.exp
deleted file mode 100644
index a4d0e7c..0000000
--- a/test/MC/Disassembler/X86/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
-}
-
diff --git a/test/MC/Disassembler/X86/intel-syntax.txt b/test/MC/Disassembler/X86/intel-syntax.txt
index 54b242d..3391e45 100644
--- a/test/MC/Disassembler/X86/intel-syntax.txt
+++ b/test/MC/Disassembler/X86/intel-syntax.txt
@@ -77,3 +77,31 @@
 
 # CHECK: test RAX, 0
 0x48 0xa9 0x00 0x00 0x00 0x00
+
+# CHECK: sysret
+0x48 0x0f 0x07
+
+# CHECK: sysret
+0x0f 0x07
+
+# CHECK: sysexit
+0x48 0x0f 0x35
+
+# CHECK: sysexit
+0x0f 0x35
+
+# CHECK: iret
+0x66 0xcf
+
+# CHECK: iretd
+0xcf
+
+# CHECK: iretq
+0x48 0xcf
+
+# CHECK: ret
+0x66 0xc3
+
+# CHECK: retf
+0x66 0xcb
+
diff --git a/test/MC/Disassembler/X86/invalid-cmp-imm.txt b/test/MC/Disassembler/X86/invalid-cmp-imm.txt
new file mode 100644
index 0000000..bf8699b
--- /dev/null
+++ b/test/MC/Disassembler/X86/invalid-cmp-imm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 |& grep {invalid instruction encoding}
+
+# This instruction would decode as cmpordps if the immediate byte was less than 8.
+0x0f 0xc2 0xc7 0x08
+# This instruction would decode as cmpordpd if the immediate byte was less than 8.
+0x66 0x0f 0xc2 0xc7 0x08
+# This instruction would decode as cmpordss if the immediate byte was less than 8.
+0xf3 0x0f 0xc2 0xc7 0x08
+# This instruction would decode as cmpordsd if the immediate byte was less than 8.
+0xf2 0x0f 0xc2 0xc7 0x08
diff --git a/test/MC/Disassembler/X86/lit.local.cfg b/test/MC/Disassembler/X86/lit.local.cfg
new file mode 100644
index 0000000..6211b3e
--- /dev/null
+++ b/test/MC/Disassembler/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index 2dc918c..c0e77d06 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -6,6 +6,11 @@
 # CHECK: int	$33
 0xCD 0x21
 
+# CHECK: jrcxz -127
+0xe3 0x81
+
+# CHECK: jecxz -127
+0x67 0xe3 0x81
 
 # CHECK: addb	%al, (%rax)
 0 0
@@ -28,6 +33,9 @@
 # CHECK: vmcall
 0x0f 0x01 0xc1
 
+# CHECK: vmfunc
+0x0f 0x01 0xd4
+
 # CHECK: vmlaunch
 0x0f 0x01 0xc2
 
@@ -52,6 +60,30 @@
 # CHECK: vmptrst
 0x0f 0xc7 0x38
 
+# CHECK: vmrun
+0x0f 0x01 0xd8
+
+# CHECK: vmmcall
+0x0f 0x01 0xd9
+
+# CHECK: vmload
+0x0f 0x01 0xda
+
+# CHECK: vmsave
+0x0f 0x01 0xdb
+
+# CHECK: stgi
+0x0f 0x01 0xdc
+
+# CHECK: clgi
+0x0f 0x01 0xdd
+
+# CHECK: skinit
+0x0f 0x01 0xde
+
+# CHECK: invlpga
+0x0f 0x01 0xdf
+
 # CHECK: movl $0, -4(%rbp)
 0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
 
@@ -309,6 +341,9 @@
 # CHECK: invvpid (%rax), %rax
 0x66 0x0f 0x38 0x81 0x00
 
+# CHECK: invpcid (%rax), %rax
+0x66 0x0f 0x38 0x82 0x00
+
 # CHECK: nop
 0x90
 
@@ -518,3 +553,179 @@
 
 # CHECK: andnq (%rax), %r15, %rax
 0xc4 0xe2 0x80 0xf2 0x00
+
+# CHECK: blsrl (%rax), %r15d
+0xc4 0xe2 0x00 0xf3 0x08
+
+# CHECK: blsrq (%rax), %r15
+0xc4 0xe2 0x80 0xf3 0x08
+
+# CHECK: blsmskl (%rax), %r15d
+0xc4 0xe2 0x00 0xf3 0x10
+
+# CHECK: blsmskq (%rax), %r15
+0xc4 0xe2 0x80 0xf3 0x10
+
+# CHECK: blsil (%rax), %r15d
+0xc4 0xe2 0x00 0xf3 0x18
+
+# CHECK: blsiq (%rax), %r15
+0xc4 0xe2 0x80 0xf3 0x18
+
+# CHECK: bextrl %r12d, (%rax), %r10d
+0xc4 0x62 0x18 0xf7 0x10
+
+# CHECK: bextrl %r12d, %r11d, %r10d
+0xc4 0x42 0x18 0xf7 0xd3
+
+# CHECK: bextrq %r12, (%rax), %r10
+0xc4 0x62 0x98 0xf7 0x10
+
+# CHECK: bextrq %r12, %r11, %r10
+0xc4 0x42 0x98 0xf7 0xd3
+
+# CHECK: bzhil %r12d, (%rax), %r10d
+0xc4 0x62 0x18 0xf5 0x10
+
+# CHECK: bzhil %r12d, %r11d, %r10d
+0xc4 0x42 0x18 0xf5 0xd3
+
+# CHECK: bzhiq %r12, (%rax), %r10
+0xc4 0x62 0x98 0xf5 0x10
+
+# CHECK: bzhiq %r12, %r11, %r10
+0xc4 0x42 0x98 0xf5 0xd3
+
+# CHECK: pextl %r12d, %r11d, %r10d
+0xc4 0x42 0x22 0xf5 0xd4
+
+# CHECK: pextl (%rax), %r11d, %r10d
+0xc4 0x62 0x22 0xf5 0x10
+
+# CHECK: pextq %r12, %r11, %r10
+0xc4 0x42 0xa2 0xf5 0xd4
+
+# CHECK: pextq (%rax), %r11, %r10
+0xc4 0x62 0xa2 0xf5 0x10
+
+# CHECK: pdepl %r12d, %r11d, %r10d
+0xc4 0x42 0x23 0xf5 0xd4
+
+# CHECK: pdepl (%rax), %r11d, %r10d
+0xc4 0x62 0x23 0xf5 0x10
+
+# CHECK: pdepq %r12, %r11, %r10
+0xc4 0x42 0xa3 0xf5 0xd4
+
+# CHECK: pdepq (%rax), %r11, %r10
+0xc4 0x62 0xa3 0xf5 0x10
+
+# CHECK: mulxl %r12d, %r11d, %r10d
+0xc4 0x42 0x23 0xf6 0xd4
+
+# CHECK: mulxl (%rax), %r11d, %r10d
+0xc4 0x62 0x23 0xf6 0x10
+
+# CHECK: mulxq %r12, %r11, %r10
+0xc4 0x42 0xa3 0xf6 0xd4
+
+# CHECK: mulxq (%rax), %r11, %r10
+0xc4 0x62 0xa3 0xf6 0x10
+
+# CHECK: rorxl $1, %r12d, %r10d
+0xc4 0x43 0x7b 0xf0 0xd4 0x01
+
+# CHECK: rorxl $31, (%rax), %r10d
+0xc4 0x63 0x7b 0xf0 0x10 0x1f
+
+# CHECK: rorxq $1, %r12, %r10
+0xc4 0x43 0xfb 0xf0 0xd4 0x01
+
+# CHECK: rorxq $63, (%rax), %r10
+0xc4 0x63 0xfb 0xf0 0x10 0x3f
+
+# CHECK: shlxl %r12d, (%rax), %r10d
+0xc4 0x62 0x19 0xf7 0x10
+
+# CHECK: shlxl %r12d, %r11d, %r10d
+0xc4 0x42 0x19 0xf7 0xd3
+
+# CHECK: shlxq %r12, (%rax), %r10
+0xc4 0x62 0x99 0xf7 0x10
+
+# CHECK: shlxq %r12, %r11, %r10
+0xc4 0x42 0x99 0xf7 0xd3
+
+# CHECK: sarxl %r12d, (%rax), %r10d
+0xc4 0x62 0x1a 0xf7 0x10
+
+# CHECK: sarxl %r12d, %r11d, %r10d
+0xc4 0x42 0x1a 0xf7 0xd3
+
+# CHECK: sarxq %r12, (%rax), %r10
+0xc4 0x62 0x9a 0xf7 0x10
+
+# CHECK: sarxq %r12, %r11, %r10
+0xc4 0x42 0x9a 0xf7 0xd3
+
+# CHECK: shrxl %r12d, (%rax), %r10d
+0xc4 0x62 0x1b 0xf7 0x10
+
+# CHECK: shrxl %r12d, %r11d, %r10d
+0xc4 0x42 0x1b 0xf7 0xd3
+
+# CHECK: shrxq %r12, (%rax), %r10
+0xc4 0x62 0x9b 0xf7 0x10
+
+# CHECK: shrxq %r12, %r11, %r10
+0xc4 0x42 0x9b 0xf7 0xd3
+
+# CHECK: vfmadd132ps %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x19 0x98 0xd3
+
+# CHECK: vfmadd132pd %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x99 0x98 0xd3
+
+# CHECK: vfmadd132ps %ymm11, %ymm12, %ymm10
+0xc4 0x42 0x1d 0x98 0xd3
+
+# CHECK: vfmadd132pd %ymm11, %ymm12, %ymm10
+0xc4 0x42 0x9d 0x98 0xd3
+
+# CHECK: vfmadd132ps (%rax), %xmm12, %xmm10
+0xc4 0x62 0x19 0x98 0x10
+
+# CHECK: vfmadd132pd (%rax), %xmm12, %xmm10
+0xc4 0x62 0x99 0x98 0x10
+
+# CHECK: vfmadd132ps (%rax), %ymm12, %ymm10
+0xc4 0x62 0x1d 0x98 0x10
+
+# CHECK: vfmadd132pd (%rax), %ymm12, %ymm10
+0xc4 0x62 0x9d 0x98 0x10
+
+# CHECK: vfmadd132ss %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x19 0x99 0xd3
+
+# CHECK: vfmadd132sd %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x99 0x99 0xd3
+
+# CHECK: vfmadd132ss (%rax), %xmm12, %xmm10
+0xc4 0x62 0x19 0x99 0x10
+
+# CHECK: vfmadd132sd (%rax), %xmm12, %xmm10
+0xc4 0x62 0x99 0x99 0x10
+
+# CHECK: vfmaddss (%rcx), %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xf9 0x6a 0x01 0x10
+
+# CHECK: vfmaddss %xmm1, (%rcx), %xmm0, %xmm0
+0xc4 0xe3 0x79 0x6a 0x01 0x10
+
+# CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
+0xc4 0xe3 0xe1 0x48 0x40 0x04 0x21
+
+# rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling
+# CHECK: lock
+# CHECK-NEXT: xaddq	%rcx, %rbx
+0xf0 0x48 0x0f 0xc1 0xcb
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index c4437ba..739fa6a 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -11,6 +11,12 @@
 # CHECK: calll
 0xff 0xd0
 
+# CHECK: jecxz -127
+0xe3 0x81
+
+# CHECK: jcxz -127
+0x67 0xe3 0x81
+
 # CHECK: incl
 0x40
 
@@ -63,6 +69,9 @@
 # CHECK: vmcall
 0x0f 0x01 0xc1
 
+# CHECK: vmfunc
+0x0f 0x01 0xd4
+
 # CHECK: vmlaunch
 0x0f 0x01 0xc2
 
@@ -87,6 +96,30 @@
 # CHECK: vmptrst
 0x0f 0xc7 0x38
 
+# CHECK: vmrun
+0x0f 0x01 0xd8
+
+# CHECK: vmmcall
+0x0f 0x01 0xd9
+
+# CHECK: vmload
+0x0f 0x01 0xda
+
+# CHECK: vmsave
+0x0f 0x01 0xdb
+
+# CHECK: stgi
+0x0f 0x01 0xdc
+
+# CHECK: clgi
+0x0f 0x01 0xdd
+
+# CHECK: skinit
+0x0f 0x01 0xde
+
+# CHECK: invlpga
+0x0f 0x01 0xdf
+
 # CHECK: movl $0, -4(%ebp)
 0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
 
@@ -328,6 +361,9 @@
 # CHECK: invvpid (%eax), %eax
 0x66 0x0f 0x38 0x81 0x00
 
+# CHECK: invpcid (%eax), %eax
+0x66 0x0f 0x38 0x82 0x00
+
 # CHECK: nop
 0x90
 
@@ -385,6 +421,18 @@
 # CHECK: movl %eax, 0
 0xa3 0x00 0x00 0x00 0x00
 
+# CHECK: cmpordpd %xmm7, %xmm0
+0x66 0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordps %xmm7, %xmm0
+0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordsd %xmm7, %xmm0
+0xf2 0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordss %xmm7, %xmm0
+0xf3 0x0f 0xc2 0xc7 0x07
+
 # CHECK: vaddps	%xmm3, %xmm7, %xmm0
 0xc4 0xe1 0x00 0x58 0xc3
 
@@ -495,3 +543,72 @@
 
 # CHECK: andnl (%eax), %edi, %eax
 0xc4 0xe2 0x80 0xf2 0x00
+
+# CHECK: blsrl (%eax), %edi
+0xc4 0xe2 0x40 0xf3 0x08
+
+# CHECK: blsmskl (%eax), %edi
+0xc4 0xe2 0x40 0xf3 0x10
+
+# CHECK: blsil (%eax), %edi
+0xc4 0xe2 0x40 0xf3 0x18
+
+# CHECK: bextrl %esi, (%eax), %edx
+0xc4 0xe2 0x08 0xf7 0x10
+
+# CHECK: bextrl %esi, %ebx, %edx
+0xc4 0xe2 0x08 0xf7 0xd3
+
+# CHECK: bzhil %esi, (%eax), %edx
+0xc4 0xe2 0x08 0xf5 0x10
+
+# CHECK: bzhil %esi, %ebx, %edx
+0xc4 0xe2 0x08 0xf5 0xd3
+
+# CHECK: pextl %esp, %ecx, %edx
+0xc4 0xe2 0x72 0xf5 0xd4
+
+# CHECK: pextl (%eax), %ecx, %edx
+0xc4 0xe2 0x72 0xf5 0x10
+
+# CHECK: pdepl %esp, %ecx, %edx
+0xc4 0xe2 0x73 0xf5 0xd4
+
+# CHECK: pdepl (%eax), %ecx, %edx
+0xc4 0xe2 0x73 0xf5 0x10
+
+# CHECK: mulxl %esp, %ecx, %edx
+0xc4 0xe2 0x73 0xf6 0xd4
+
+# CHECK: mulxl (%eax), %ecx, %edx
+0xc4 0xe2 0x73 0xf6 0x10
+
+# CHECK: mulxl %esp, %ecx, %edx
+0xc4 0xe2 0xf3 0xf6 0xd4
+
+# CHECK: mulxl (%eax), %ecx, %edx
+0xc4 0xe2 0xf3 0xf6 0x10
+
+# CHECK: rorxl $1, %esp, %edx
+0xc4 0xe3 0x7b 0xf0 0xd4 0x01
+
+# CHECK: rorxl $31, (%eax), %edx
+0xc4 0xe3 0x7b 0xf0 0x10 0x1f
+
+# CHECK: shlxl %esi, (%eax), %edx
+0xc4 0xe2 0x09 0xf7 0x10
+
+# CHECK: shlxl %esi, %ebx, %edx
+0xc4 0xe2 0x09 0xf7 0xd3
+
+# CHECK: sarxl %esi, (%eax), %edx
+0xc4 0xe2 0x0a 0xf7 0x10
+
+# CHECK: sarxl %esi, %ebx, %edx
+0xc4 0xe2 0x0a 0xf7 0xd3
+
+# CHECK: shrxl %esi, (%eax), %edx
+0xc4 0xe2 0x0b 0xf7 0x10
+
+# CHECK: shrxl %esi, %ebx, %edx
+0xc4 0xe2 0x0b 0xf7 0xd3
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
new file mode 100644
index 0000000..f4b8f46
--- /dev/null
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -0,0 +1,63 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
+
+# Coverage
+
+# CHECK: vcmptrue_usps 
+0xc5 0x04 0xc2 0xc7 0x1f
+
+# CHECK: vcmptrue_uspd 
+0xc5 0x05 0xc2 0xc7 0x1f
+
+# CHECK: vcmptrue_usss 
+0xc5 0x06 0xc2 0xc7 0x1f
+
+# CHECK: vcmptrue_ussd 
+0xc5 0x07 0xc2 0xc7 0x1f
+
+# CHECK: vcmpeq_uqps 
+0xc5 0x04 0xc2 0xc7 0x08
+
+# CHECK: vcmpeq_uqpd 
+0xc5 0x05 0xc2 0xc7 0x08
+
+# CHECK: vcmpeq_uqss 
+0xc5 0x06 0xc2 0xc7 0x08
+
+# CHECK: vcmpeq_uqsd 
+0xc5 0x07 0xc2 0xc7 0x08
+
+# CHECK: vcmpeqps 
+0xc5 0x04 0xc2 0xc7 0x00
+
+# CHECK: vcmpeqpd 
+0xc5 0x05 0xc2 0xc7 0x00
+
+# CHECK: vcmpeqss 
+0xc5 0x06 0xc2 0xc7 0x00
+
+# CHECK: vcmpeqsd 
+0xc5 0x07 0xc2 0xc7 0x00
+
+# CHECK: cmpeqps 
+0x0f 0xc2 0xc7 0x00
+
+# CHECK: cmpeqpd 
+0x66 0x0f 0xc2 0xc7 0x00
+
+# CHECK: cmpeqss 
+0xf3 0x0f 0xc2 0xc7 0x00
+
+# CHECK: cmpeqsd 
+0xf2 0x0f 0xc2 0xc7 0x00
+
+# CHECK: cmpordps 
+0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordpd 
+0x66 0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordss 
+0xf3 0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordsd 
+0xf2 0x0f 0xc2 0xc7 0x07
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
new file mode 100644
index 0000000..3a5af00
--- /dev/null
+++ b/test/MC/ELF/cfi-escape.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_escape 0x15, 7, 0x7f # DW_CFA_val_offset_sf, %esp, 8/-8
+        nop
+	.cfi_endproc
+
+// CHECK:       # Section 4
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00411507 7f000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 5
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
new file mode 100644
index 0000000..0fc3129
--- /dev/null
+++ b/test/MC/ELF/cfi-restore.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_restore %rbp
+        nop
+	.cfi_endproc
+
+// CHECK:       # Section 4
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 0041c600 00000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 5
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s
new file mode 100644
index 0000000..cf6d160
--- /dev/null
+++ b/test/MC/ELF/cfi-signal-frame.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+        .cfi_startproc
+        .cfi_signal_frame
+        .cfi_endproc
+
+g:
+        .cfi_startproc
+        .cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
+// CHECK-NEXT:  ('sh_size', 0x0000000000000058)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5253 00017810 011b0c07 08900100 10000000 1c000000 00000000 00000000 00000000 14000000 00000000 017a5200 01781001 1b0c0708 90010000 10000000 1c000000 00000000 00000000 00000000')
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/dg.exp b/test/MC/ELF/dg.exp
deleted file mode 100644
index d46d700..0000000
--- a/test/MC/ELF/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,s}]]
-}
diff --git a/test/MC/ELF/gen-dwarf.s b/test/MC/ELF/gen-dwarf.s
new file mode 100644
index 0000000..b090e08
--- /dev/null
+++ b/test/MC/ELF/gen-dwarf.s
@@ -0,0 +1,70 @@
+// RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=obj -o - | elf-dump | FileCheck %s
+
+
+// Test that on ELF the debug info has a relocation to debug_abbrev and one to
+// to debug_line.
+
+
+    .text
+    .globl foo
+    .type foo, @function
+    .align 4
+foo:
+    ret
+    .size foo, .-foo
+
+// Section 4 is .debug_line
+// CHECK:       # Section 4
+// CHECK-NEXT:  # '.debug_line'
+
+
+
+// The two relocations, one to symbol 6 and one to 4
+// CHECK:         # '.rel.debug_info'
+// CHECK-NEXT:   ('sh_type',
+// CHECK-NEXT:   ('sh_flags'
+// CHECK-NEXT:   ('sh_addr',
+// CHECK-NEXT:   ('sh_offset',
+// CHECK-NEXT:   ('sh_size',
+// CHECK-NEXT:   ('sh_link',
+// CHECK-NEXT:   ('sh_info',
+// CHECK-NEXT:   ('sh_addralign',
+// CHECK-NEXT:   ('sh_entsize',
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x00000006)
+// CHECK-NEXT:     ('r_sym', 0x000006)
+// CHECK-NEXT:     ('r_type', 0x01)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:    # Relocation 1
+// CHECK-NEXT:    (('r_offset', 0x0000000c)
+// CHECK-NEXT:     ('r_sym', 0x000004)
+// CHECK-NEXT:     ('r_type', 0x01)
+// CHECK-NEXT:    ),
+
+
+// Section 8 is .debug_abbrev
+// CHECK:       # Section 8
+// CHECK-NEXT:  (('sh_name', 0x00000001) # '.debug_abbrev'
+
+// Symbol 4 is section 4 (.debug_line)
+// CHECK:         # Symbol 4
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_value', 0x00000000)
+// CHECK-NEXT:     ('st_size', 0x00000000)
+// CHECK-NEXT:     ('st_bind', 0x0)
+// CHECK-NEXT:     ('st_type', 0x3)
+// CHECK-NEXT:     ('st_other', 0x00)
+// CHECK-NEXT:     ('st_shndx', 0x0004)
+// CHECK-NEXT:    ),
+
+// Symbol 6 is section 8 (.debug_abbrev)
+// CHECK:         # Symbol 6
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_value', 0x00000000)
+// CHECK-NEXT:     ('st_size', 0x00000000)
+// CHECK-NEXT:     ('st_bind', 0x0)
+// CHECK-NEXT:     ('st_type', 0x3)
+// CHECK-NEXT:     ('st_other', 0x00)
+// CHECK-NEXT:     ('st_shndx', 0x0008)
+// CHECK-NEXT:    ),
diff --git a/test/MC/ELF/global-offset.s b/test/MC/ELF/global-offset.s
index 8cc5dbb..81ae5d7 100644
--- a/test/MC/ELF/global-offset.s
+++ b/test/MC/ELF/global-offset.s
@@ -6,6 +6,10 @@
         addl    $_GLOBAL_OFFSET_TABLE_, %ebx
         leal    _GLOBAL_OFFSET_TABLE_(%ebx), %ebx
 
+// But not in this case
+foo:
+        addl    _GLOBAL_OFFSET_TABLE_-foo,%ebx
+
 // CHECK:      ('sh_name', 0x00000005) # '.text'
 // CHECK-NEXT: ('sh_type',
 // CHECK-NEXT: ('sh_flags',
@@ -16,4 +20,4 @@
 // CHECK-NEXT: ('sh_info',
 // CHECK-NEXT: ('sh_addralign',
 // CHECK-NEXT: ('sh_entsize',
-// CHECK-NEXT: ('_section_data', '81c30200 00008d9b 02000000')
+// CHECK-NEXT: ('_section_data', '81c30200 00008d9b 02000000 031d0200 0000')
diff --git a/test/MC/ELF/lit.local.cfg b/test/MC/ELF/lit.local.cfg
new file mode 100644
index 0000000..56bf008
--- /dev/null
+++ b/test/MC/ELF/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/ELF/many-section.s b/test/MC/ELF/many-section.s
index e7e723a..b729e66 100644
--- a/test/MC/ELF/many-section.s
+++ b/test/MC/ELF/many-section.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t
-// RUN: llvm-nm %t | FileCheck %s
+// RUN: llvm-nm -a %t | FileCheck %s
 
 // CHECK: s000a
 // CHECK-NOT: U
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index 4421763..85da2eb 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -160,6 +160,18 @@
 // CHECK-NEXT:  ('r_sym', 0x00000d)
 // CHECK-NEXT:  ('r_type', 0x21)
 // CHECK-NEXT: ),
+// Relocation 25 (_GLOBAL_OFFSET_TABLE_-bar2) is of type R_386_GOTPC.
+// CHECK-NEXT: Relocation 25
+// CHECK-NEXT: (('r_offset', 0x00000094)
+// CHECK-NEXT:  ('r_sym', 0x00000b)
+// CHECK-NEXT:  ('r_type', 0x0a)
+// CHECK-NEXT: ),
+// Relocation 26 (und_symbol-bar2) is of type R_386_PC32
+// CHECK-NEXT: Relocation 26
+// CHECK-NEXT: (('r_offset', 0x0000009a)
+// CHECK-NEXT:  ('r_sym', 0x00000e)
+// CHECK-NEXT:  ('r_type', 0x02)
+// CHECK-NEXT: ),
 
 // Section 4 is bss
 // CHECK:      # Section 4
@@ -225,6 +237,8 @@ bar2:
         movl zed@DTPOFF(%eax), %eax
         pushl $bar
         addl foo@GOTTPOFF(%edx), %eax
+        subl    _GLOBAL_OFFSET_TABLE_-bar2, %ebx
+        leal und_symbol-bar2(%edx),%ecx
 
         .section        zedsec,"awT",@progbits
 zed:
diff --git a/test/MC/ELF/tls-i386.s b/test/MC/ELF/tls-i386.s
index 197418d..922d4c6 100644
--- a/test/MC/ELF/tls-i386.s
+++ b/test/MC/ELF/tls-i386.s
@@ -9,6 +9,13 @@
         movl    foo5@TPOFF(%eax), %eax
         movl    foo6@DTPOFF(%eax), %eax
         movl    foo7@INDNTPOFF, %eax
+        .long   foo8@NTPOFF
+        .long   foo9@GOTNTPOFF
+        .long   fooA@TLSGD
+        .long   fooB@TLSLDM
+        .long   fooC@TPOFF
+        .long   fooD@DTPOFF
+        .long   fooE@INDNTPOFF
 
 // CHECK:       (('st_name', 0x00000001) # 'foo1'
 // CHECK-NEXT:   ('st_value', 0x00000000)
@@ -72,3 +79,67 @@
 // CHECK-NEXT:   ('st_other', 0x00)
 // CHECK-NEXT:   ('st_shndx', 0x0000)
 // CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 12
+// CHECK-NEXT:  (('st_name', 0x00000024) # 'foo8'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 13
+// CHECK-NEXT:  (('st_name', 0x00000029) # 'foo9'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 14
+// CHECK-NEXT:  (('st_name', 0x0000002e) # 'fooA'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 15
+// CHECK-NEXT:  (('st_name', 0x00000033) # 'fooB'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 16
+// CHECK-NEXT:  (('st_name', 0x00000038) # 'fooC'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 17
+// CHECK-NEXT:  (('st_name', 0x0000003d) # 'fooD'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 18
+// CHECK-NEXT:  (('st_name', 0x00000042) # 'fooE'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+
diff --git a/test/MC/ELF/tls.s b/test/MC/ELF/tls.s
index d6d7de6..fe2bb4e 100644
--- a/test/MC/ELF/tls.s
+++ b/test/MC/ELF/tls.s
@@ -5,12 +5,14 @@
 	leaq	foo1@TLSGD(%rip), %rdi
         leaq    foo2@GOTTPOFF(%rip), %rdi
         leaq    foo3@TLSLD(%rip), %rdi
-
+	.long foo4@GOTTPOFF
+	.long foo5@TLSLD
+	.long foo6@TLSGD
 	.section	.zed,"awT",@progbits
 foobar:
 	.long	43
 
-// CHECK:      (('st_name', 0x00000010) # 'foobar'
+// CHECK:      (('st_name', 0x0000001f) # 'foobar'
 // CHECK-NEXT:  ('st_bind', 0x0)
 // CHECK-NEXT:  ('st_type', 0x6)
 // CHECK-NEXT:  ('st_other', 0x00)
@@ -46,3 +48,30 @@ foobar:
 // CHECK-NEXT:   ('st_value', 0x0000000000000000)
 // CHECK-NEXT:   ('st_size', 0x0000000000000000)
 // CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 10
+// CHECK-NEXT:  (('st_name', 0x00000010) # 'foo4'
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 11
+// CHECK-NEXT:  (('st_name', 0x00000015) # 'foo5'
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 12
+// CHECK-NEXT:  (('st_name', 0x0000001a) # 'foo6'
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/type.s b/test/MC/ELF/type.s
index 2b25a6b..ec53e4f 100644
--- a/test/MC/ELF/type.s
+++ b/test/MC/ELF/type.s
@@ -12,6 +12,10 @@ bar:
 // Test that gnu_unique_object is accepted.
         .type zed,@gnu_unique_object
 
+ifunc:
+        .global ifunc
+        .type ifunc,@gnu_indirect_function
+
 // CHECK:      # Symbol 4
 // CHECK-NEXT: (('st_name', 0x00000005) # 'bar'
 // CHECK-NEXT:  ('st_bind', 0x1)
@@ -30,3 +34,13 @@ bar:
 // CHECK-NEXT:  ('st_value', 0x0000000000000000)
 // CHECK-NEXT:  ('st_size', 0x0000000000000000)
 // CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 6
+// CHECK-NEXT: (('st_name', 0x00000009) # 'ifunc'
+// CHECK-NEXT:  ('st_bind', 0x1)
+// CHECK-NEXT:  ('st_type', 0xa)
+// CHECK-NEXT:  ('st_other', 0x00)
+// CHECK-NEXT:  ('st_shndx', 0x0001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
diff --git a/test/MC/MBlaze/dg.exp b/test/MC/MBlaze/dg.exp
deleted file mode 100644
index 0c4e78e..0000000
--- a/test/MC/MBlaze/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/MBlaze/lit.local.cfg b/test/MC/MBlaze/lit.local.cfg
new file mode 100644
index 0000000..b0e1d85
--- /dev/null
+++ b/test/MC/MBlaze/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/ARM/darwin-ARM-reloc.s b/test/MC/MachO/ARM/darwin-ARM-reloc.s
new file mode 100644
index 0000000..b98c80c
--- /dev/null
+++ b/test/MC/MachO/ARM/darwin-ARM-reloc.s
@@ -0,0 +1,173 @@
+@ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+	.syntax unified
+        .text
+_f0:
+        bl _printf
+
+_f1:
+        bl _f0
+
+        .data
+_d0:
+Ld0_0:
+        .long Lsc0_0 - Ld0_0
+
+	.section	__TEXT,__cstring,cstring_literals
+Lsc0_0:
+        .long 0
+
+        .subsections_via_symbols
+
+@ CHECK: ('cputype', 12)
+@ CHECK: ('cpusubtype', 9)
+@ CHECK: ('filetype', 1)
+@ CHECK: ('num_load_commands', 3)
+@ CHECK: ('load_commands_size', 364)
+@ CHECK: ('flag', 8192)
+@ CHECK: ('load_commands', [
+@ CHECK:   # Load Command 0
+@ CHECK:  (('command', 1)
+@ CHECK:   ('size', 260)
+@ CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:   ('vm_addr', 0)
+@ CHECK:   ('vm_size', 16)
+@ CHECK:   ('file_offset', 392)
+@ CHECK:   ('file_size', 16)
+@ CHECK:   ('maxprot', 7)
+@ CHECK:   ('initprot', 7)
+@ CHECK:   ('num_sections', 3)
+@ CHECK:   ('flags', 0)
+@ CHECK:   ('sections', [
+@ CHECK:     # Section 0
+@ CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 0)
+@ CHECK:     ('size', 8)
+@ CHECK:     ('offset', 392)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 408)
+@ CHECK:     ('num_reloc', 2)
+@ CHECK:     ('flags', 0x80000400)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:     # Relocation 0
+@ CHECK:     (('word-0', 0x4),
+@ CHECK:      ('word-1', 0x55000001)),
+@ CHECK:     # Relocation 1
+@ CHECK:     (('word-0', 0x0),
+@ CHECK:      ('word-1', 0x5d000003)),
+@ CHECK:   ])
+@ CHECK:   ('_section_data', 'feffffeb fdffffeb')
+@ CHECK:     # Section 1
+@ CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 8)
+@ CHECK:     ('size', 4)
+@ CHECK:     ('offset', 400)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 424)
+@ CHECK:     ('num_reloc', 2)
+@ CHECK:     ('flags', 0x0)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:     # Relocation 0
+@ CHECK:     (('word-0', 0xa2000000),
+@ CHECK:      ('word-1', 0xc)),
+@ CHECK:     # Relocation 1
+@ CHECK:     (('word-0', 0xa1000000),
+@ CHECK:      ('word-1', 0x8)),
+@ CHECK:   ])
+@ CHECK:   ('_section_data', '04000000')
+@ CHECK:     # Section 2
+@ CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 12)
+@ CHECK:     ('size', 4)
+@ CHECK:     ('offset', 404)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 0)
+@ CHECK:     ('num_reloc', 0)
+@ CHECK:     ('flags', 0x2)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:   ])
+@ CHECK:   ('_section_data', '00000000')
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 1
+@ CHECK:  (('command', 2)
+@ CHECK:   ('size', 24)
+@ CHECK:   ('symoff', 440)
+@ CHECK:   ('nsyms', 4)
+@ CHECK:   ('stroff', 488)
+@ CHECK:   ('strsize', 24)
+@ CHECK:   ('_string_data', '\x00_printf\x00_f0\x00_f1\x00_d0\x00\x00\x00\x00')
+@ CHECK:   ('_symbols', [
+@ CHECK:     # Symbol 0
+@ CHECK:    (('n_strx', 9)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 1)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_f0')
+@ CHECK:    ),
+@ CHECK:     # Symbol 1
+@ CHECK:    (('n_strx', 13)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 1)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 4)
+@ CHECK:     ('_string', '_f1')
+@ CHECK:    ),
+@ CHECK:     # Symbol 2
+@ CHECK:    (('n_strx', 17)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 2)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 8)
+@ CHECK:     ('_string', '_d0')
+@ CHECK:    ),
+@ CHECK:     # Symbol 3
+@ CHECK:    (('n_strx', 1)
+@ CHECK:     ('n_type', 0x1)
+@ CHECK:     ('n_sect', 0)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_printf')
+@ CHECK:    ),
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 2
+@ CHECK:  (('command', 11)
+@ CHECK:   ('size', 80)
+@ CHECK:   ('ilocalsym', 0)
+@ CHECK:   ('nlocalsym', 3)
+@ CHECK:   ('iextdefsym', 3)
+@ CHECK:   ('nextdefsym', 0)
+@ CHECK:   ('iundefsym', 3)
+@ CHECK:   ('nundefsym', 1)
+@ CHECK:   ('tocoff', 0)
+@ CHECK:   ('ntoc', 0)
+@ CHECK:   ('modtaboff', 0)
+@ CHECK:   ('nmodtab', 0)
+@ CHECK:   ('extrefsymoff', 0)
+@ CHECK:   ('nextrefsyms', 0)
+@ CHECK:   ('indirectsymoff', 0)
+@ CHECK:   ('nindirectsyms', 0)
+@ CHECK:   ('extreloff', 0)
+@ CHECK:   ('nextrel', 0)
+@ CHECK:   ('locreloff', 0)
+@ CHECK:   ('nlocrel', 0)
+@ CHECK:   ('_indirect_symbols', [
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK: ])
diff --git a/test/MC/ARM/darwin-Thumb-reloc.s b/test/MC/MachO/ARM/darwin-Thumb-reloc.s
index 567573d..567573d 100644
--- a/test/MC/ARM/darwin-Thumb-reloc.s
+++ b/test/MC/MachO/ARM/darwin-Thumb-reloc.s
diff --git a/test/MC/MachO/ARM/empty-function-nop.ll b/test/MC/MachO/ARM/empty-function-nop.ll
new file mode 100644
index 0000000..ef86ebc
--- /dev/null
+++ b/test/MC/MachO/ARM/empty-function-nop.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -filetype=obj -mtriple=thumbv6-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-T1 %s
+; RUN: llc < %s -filetype=obj -mtriple=thumbv7-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-T2 %s
+; RUN: llc < %s -filetype=obj -mtriple=armv6-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-ARM %s
+; RUN: llc < %s -filetype=obj -mtriple=armv7-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-ARMV7 %s
+
+; Empty functions need a NOP in them for MachO to prevent DWARF FDEs from
+; getting all mucked up. See lib/CodeGen/AsmPrinter/AsmPrinter.cpp for
+; details.
+define internal fastcc void @empty_function() {
+  unreachable
+}
+; CHECK-T1:    ('_section_data', 'c046')
+; CHECK-T2:    ('_section_data', '00bf')
+; CHECK-ARM:   ('_section_data', '0000a0e1')
+; CHECK-ARMV7: ('_section_data', '00f020e3')
diff --git a/test/MC/MachO/ARM/lit.local.cfg b/test/MC/MachO/ARM/lit.local.cfg
new file mode 100644
index 0000000..8976463
--- /dev/null
+++ b/test/MC/MachO/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/ARM/no-subsections-reloc.s b/test/MC/MachO/ARM/no-subsections-reloc.s
new file mode 100644
index 0000000..7701c59
--- /dev/null
+++ b/test/MC/MachO/ARM/no-subsections-reloc.s
@@ -0,0 +1,18 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+@ When not using subsections-via-symbols, references to non-local symbols
+@ in the same section can be resolved at assembly time w/o relocations.
+
+ .syntax unified
+ .text
+ .thumb
+ .thumb_func _foo
+_foo:
+    ldr r3, bar
+bar:
+    .long 0
+
+@ CHECK: 'num_reloc', 0
+@ CHECK: '_section_data', 'dff80030 00000000'
diff --git a/test/MC/ARM/nop-armv4-padding.s b/test/MC/MachO/ARM/nop-armv4-padding.s
index 8f646db..8f646db 100644
--- a/test/MC/ARM/nop-armv4-padding.s
+++ b/test/MC/MachO/ARM/nop-armv4-padding.s
diff --git a/test/MC/MachO/ARM/nop-armv6t2-padding.s b/test/MC/MachO/ARM/nop-armv6t2-padding.s
new file mode 100644
index 0000000..c38ad2d
--- /dev/null
+++ b/test/MC/MachO/ARM/nop-armv6t2-padding.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple armv6t2-apple-darwin %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck %s < %t.dump
+
+x:
+      add r0, r1, r2
+      .align 4
+      add r0, r1, r2
+
+@ CHECK: ('_section_data', '020081e0 00f020e3 00f020e3 00f020e3 020081e0')
diff --git a/test/MC/ARM/nop-thumb-padding.s b/test/MC/MachO/ARM/nop-thumb-padding.s
index 1e173f1..1e173f1 100644
--- a/test/MC/ARM/nop-thumb-padding.s
+++ b/test/MC/MachO/ARM/nop-thumb-padding.s
diff --git a/test/MC/ARM/nop-thumb2-padding.s b/test/MC/MachO/ARM/nop-thumb2-padding.s
index a8aa3a1..a8aa3a1 100644
--- a/test/MC/ARM/nop-thumb2-padding.s
+++ b/test/MC/MachO/ARM/nop-thumb2-padding.s
diff --git a/test/MC/MachO/ARM/relax-thumb-ldr-literal.s b/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
new file mode 100644
index 0000000..8d26f6d
--- /dev/null
+++ b/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
@@ -0,0 +1,13 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+	.syntax unified
+        .text
+	.thumb
+	.thumb_func _foo
+_foo:
+        ldr r2, (_foo - 4)
+
+@ CHECK: ('num_reloc', 0)
+@ CHECK: ('_section_data', '5ff80820')
diff --git a/test/MC/MachO/ARM/relax-thumb2-branches.s b/test/MC/MachO/ARM/relax-thumb2-branches.s
new file mode 100644
index 0000000..7916d42
--- /dev/null
+++ b/test/MC/MachO/ARM/relax-thumb2-branches.s
@@ -0,0 +1,14 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        ble Lfoo        @ wide encoding
+
+        .space 258
+Lfoo:
+        nop
+
+        ble Lbaz        @ narrow encoding
+        .space 256
+Lbaz:
+
+@ CHECK: '_section_data', '40f38180
+@ CHECK: 000000bf 7fdd
diff --git a/test/MC/MachO/ARM/static-movt-relocs.s b/test/MC/MachO/ARM/static-movt-relocs.s
new file mode 100644
index 0000000..dce5683
--- /dev/null
+++ b/test/MC/MachO/ARM/static-movt-relocs.s
@@ -0,0 +1,23 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+        .thumb
+        .thumb_func foo
+foo:
+        movw r0, :lower16:(bar + 16)
+        movt r0, :upper16:(bar + 16)
+        bx r0
+
+
+@ CHECK:  ('_relocations', [
+@ CHECK:    # Relocation 0
+@ CHECK:    (('word-0', 0x4),
+@ CHECK:     ('word-1', 0x8e000001)),
+@ CHECK:    # Relocation 1
+@ CHECK:    (('word-0', 0x10),
+@ CHECK:     ('word-1', 0x16ffffff)),
+@ CHECK:    # Relocation 2
+@ CHECK:    (('word-0', 0x0),
+@ CHECK:     ('word-1', 0x8c000001)),
+@ CHECK:    # Relocation 3
+@ CHECK:    (('word-0', 0x0),
+@ CHECK:     ('word-1', 0x14ffffff)),
+@ CHECK:  ])
diff --git a/test/MC/MachO/ARM/thumb2-function-relative-load.s b/test/MC/MachO/ARM/thumb2-function-relative-load.s
new file mode 100644
index 0000000..622007d
--- /dev/null
+++ b/test/MC/MachO/ARM/thumb2-function-relative-load.s
@@ -0,0 +1,13 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+        .syntax unified
+        .text
+	.thumb
+        .thumb_func _foo
+_foo:
+	ldr lr, (_foo - 4)
+
+        .subsections_via_symbols
+
+@ CHECK: ('_section_data', '5ff808e0')
diff --git a/test/MC/ARM/thumb2-movt-fixup.s b/test/MC/MachO/ARM/thumb2-movt-fixup.s
index ddd95b5..ddd95b5 100644
--- a/test/MC/ARM/thumb2-movt-fixup.s
+++ b/test/MC/MachO/ARM/thumb2-movt-fixup.s
diff --git a/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s b/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
new file mode 100644
index 0000000..49cfa41
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// Test case for rdar://10743265
+
+// This tests that this expression does not cause a crash and produces two
+// relocation entries:
+// Relocation information (__TEXT,__text) 2 entries
+// address  pcrel length extern type    scattered symbolnum/value
+// 00000000 False long   True   SUB     False     _base
+// 00000000 False long   True   UNSIGND False     _start_ap_2
+
+_base = .
+
+.long (0x2000) + _start_ap_2 - _base 
+.word 0
+
+_start_ap_2:
+        cli
+
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0x5c000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0xc000001)),
+// CHECK:   ])
diff --git a/test/MC/MachO/dg.exp b/test/MC/MachO/dg.exp
deleted file mode 100644
index ca6aefe..0000000
--- a/test/MC/MachO/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
-}
-
diff --git a/test/MC/MachO/file.s b/test/MC/MachO/file.s
new file mode 100644
index 0000000..0168747
--- /dev/null
+++ b/test/MC/MachO/file.s
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .file	1 "dir/foo"
+        nop
+
+// CHECK:         ('_section_data', '90')
+// CHECK-NEXT:      # Section 1
+// CHECK-NEXT:     (('section_name', '__debug_line\x00\x00\x00\x00')
+// CHECK-NEXT:      ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:      ('address', 1)
+// CHECK-NEXT:      ('size', 45)
+// CHECK-NEXT:      ('offset', 221)
+// CHECK-NEXT:      ('alignment', 0)
+// CHECK-NEXT:      ('reloc_offset', 0)
+// CHECK-NEXT:      ('num_reloc', 0)
+// CHECK-NEXT:      ('flags', 0x2000000)
+// CHECK-NEXT:      ('reserved1', 0)
+// CHECK-NEXT:      ('reserved2', 0)
+// CHECK-NEXT:     ),
+// CHECK-NEXT:    ('_relocations', [
+// CHECK-NEXT:    ])
+// CHECK-NEXT:    ('_section_data', '29000000 02001e00 00000101 fb0e0d00 01010101 00000001 00000164 69720000 666f6f00 01000000 02000001 01')
diff --git a/test/MC/MachO/gen-dwarf.s b/test/MC/MachO/gen-dwarf.s
new file mode 100644
index 0000000..4fbc32d
--- /dev/null
+++ b/test/MC/MachO/gen-dwarf.s
@@ -0,0 +1,122 @@
+// RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
+// RUN: llvm-dwarfdump %t | FileCheck %s
+
+.globl _bar
+_bar:
+	movl	$0, %eax
+L1:	leave
+	ret
+_foo:
+_baz:
+	nop
+.data
+_x:	.long 1
+
+// CHECK: file format Mach-O 32-bit i386
+
+// CHECK: .debug_abbrev contents:
+// CHECK: Abbrev table for offset: 0x00000000
+// CHECK: [1] DW_TAG_compile_unit	DW_CHILDREN_yes
+// CHECK: 	DW_AT_stmt_list	DW_FORM_data4
+// CHECK: 	DW_AT_low_pc	DW_FORM_addr
+// CHECK: 	DW_AT_high_pc	DW_FORM_addr
+// CHECK: 	DW_AT_name	DW_FORM_string
+// CHECK: 	DW_AT_comp_dir	DW_FORM_string
+// CHECK: 	DW_AT_producer	DW_FORM_string
+// CHECK: 	DW_AT_language	DW_FORM_data2
+
+// CHECK: [2] DW_TAG_label	DW_CHILDREN_yes
+// CHECK: 	DW_AT_name	DW_FORM_string
+// CHECK: 	DW_AT_decl_file	DW_FORM_data4
+// CHECK: 	DW_AT_decl_line	DW_FORM_data4
+// CHECK: 	DW_AT_low_pc	DW_FORM_addr
+// CHECK: 	DW_AT_prototyped	DW_FORM_flag
+
+// CHECK: [3] DW_TAG_unspecified_parameters	DW_CHILDREN_no
+
+
+// CHECK: .debug_info contents:
+
+// We don't check the leading addresses these are at.
+// CHECK:  DW_TAG_compile_unit [1] *
+// CHECK:    DW_AT_stmt_list [DW_FORM_data4]	(0x00000000)
+// CHECK:    DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000000)
+// CHECK:    DW_AT_high_pc [DW_FORM_addr]	(0x0000000000000008)
+// We don't check the file name as it is a temp directory
+// CHECK:    DW_AT_name [DW_FORM_string]
+// We don't check the DW_AT_comp_dir which is the current working directory
+// CHECK:    DW_AT_producer [DW_FORM_string]	("llvm-mc (based on {{.*}})")
+// CHECK:    DW_AT_language [DW_FORM_data2]	(0x8001)
+
+// CHECK:    DW_TAG_label [2] *
+// CHECK:      DW_AT_name [DW_FORM_string]	("bar")
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x00000005)
+// CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000000)
+// CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
+
+// CHECK:      DW_TAG_unspecified_parameters [3]  
+
+// CHECK:      NULL
+
+// CHECK:    DW_TAG_label [2] *
+// CHECK:      DW_AT_name [DW_FORM_string]	("foo")
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x00000009)
+// CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000007)
+// CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
+
+// CHECK:      DW_TAG_unspecified_parameters [3]  
+
+// CHECK:      NULL
+
+// CHECK:    DW_TAG_label [2] *
+// CHECK:      DW_AT_name [DW_FORM_string]	("baz")
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x0000000a)
+// CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000007)
+// CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
+
+// CHECK:      DW_TAG_unspecified_parameters [3]  
+
+// CHECK:      NULL
+
+// CHECK:    NULL
+
+// CHECK: .debug_aranges contents:
+// CHECK: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+
+// CHECK: .debug_lines contents:
+// CHECK: Line table prologue:
+// We don't check the total_length as it includes lengths of temp paths
+// CHECK:         version: 2
+// We don't check the prologue_length as it too includes lengths of temp paths
+// CHECK: min_inst_length: 1
+// CHECK: default_is_stmt: 1
+// CHECK:       line_base: -5
+// CHECK:      line_range: 14
+// CHECK:     opcode_base: 13
+// CHECK: standard_opcode_lengths[DW_LNS_copy] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_advance_pc] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_advance_line] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_set_file] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_set_column] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_negate_stmt] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_set_basic_block] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_const_add_pc] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_set_prologue_end] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_set_isa] = 1
+// We don't check include_directories as it has a temp path
+// CHECK:                 Dir  Mod Time   File Len   File Name
+// CHECK:                 ---- ---------- ---------- ---------------------------
+// CHECK: file_names[  1]    1 0x00000000 0x00000000 gen-dwarf.s
+
+// CHECK: Address            Line   Column File   ISA Flags
+// CHECK: ------------------ ------ ------ ------ --- -------------
+// CHECK: 0x0000000000000000      6      0      1   0  is_stmt
+// CHECK: 0x0000000000000005      7      0      1   0  is_stmt
+// CHECK: 0x0000000000000006      8      0      1   0  is_stmt
+// CHECK: 0x0000000000000007     11      0      1   0  is_stmt
+// CHECK: 0x0000000000000008     11      0      1   0  is_stmt end_sequence
diff --git a/test/MC/MachO/lit.local.cfg b/test/MC/MachO/lit.local.cfg
new file mode 100644
index 0000000..6c49f08
--- /dev/null
+++ b/test/MC/MachO/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/reloc-pcrel-offset.s b/test/MC/MachO/reloc-pcrel-offset.s
index e0f12bf..e113e96 100644
--- a/test/MC/MachO/reloc-pcrel-offset.s
+++ b/test/MC/MachO/reloc-pcrel-offset.s
@@ -11,4 +11,7 @@
 
         .text
 _a:
+_b:
         call _a
+
+        .subsections_via_symbols
diff --git a/test/MC/MachO/reloc-pcrel.s b/test/MC/MachO/reloc-pcrel.s
index fff7cc0..1133415 100644
--- a/test/MC/MachO/reloc-pcrel.s
+++ b/test/MC/MachO/reloc-pcrel.s
@@ -8,13 +8,13 @@
 // CHECK:  ('word-1', 0x6)),
 // CHECK: # Relocation 2
 // CHECK: (('word-0', 0x40),
-// CHECK:  ('word-1', 0xd000002)),
+// CHECK:  ('word-1', 0xd000003)),
 // CHECK: # Relocation 3
 // CHECK: (('word-0', 0x3b),
-// CHECK:  ('word-1', 0xd000002)),
+// CHECK:  ('word-1', 0xd000003)),
 // CHECK: # Relocation 4
 // CHECK: (('word-0', 0x36),
-// CHECK:  ('word-1', 0xd000002)),
+// CHECK:  ('word-1', 0xd000003)),
 // CHECK: # Relocation 5
 // CHECK: (('word-0', 0xe0000031),
 // CHECK:  ('word-1', 0x4)),
@@ -36,15 +36,16 @@
 // CHECK-NEXT: ])
 
         xorl %eax,%eax
-        
+
         .globl _a
 _a:
         xorl %eax,%eax
 _b:
+_d:
         xorl %eax,%eax
 L0:
         xorl %eax,%eax
-L1:     
+L1:
 
         call L0
         call L0 - 1
@@ -60,3 +61,5 @@ L1:
         call _c + 1
 //        call _a - L0
         call _b - L0
+
+        .subsections_via_symbols
diff --git a/test/MC/Mips/elf-bigendian.ll b/test/MC/Mips/elf-bigendian.ll
new file mode 100644
index 0000000..875ba3b
--- /dev/null
+++ b/test/MC/Mips/elf-bigendian.ll
@@ -0,0 +1,45 @@
+; RUN: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check that this is big endian.
+; CHECK: ('e_indent[EI_DATA]', 0x02)
+
+; Make sure that a section table (text) entry is correct.
+; CHECK:   (('sh_name', 0x{{[0]*}}5) # '.text'
+; CHECKNEXT:   ('sh_type', 0x{{[0]*}}1)
+; CHECKNEXT:   ('sh_flags', 0x{{[0]*}}6)
+; CHECKNEXT:   ('sh_addr', 0x{{{[0-9,a-f]+}})
+; CHECKNEXT:   ('sh_offset', 0x{{{[0-9,a-f]+}})
+; CHECKNEXT:   ('sh_size', 0x{{{[0-9,a-f]+}})
+; CHECKNEXT:   ('sh_link', 0x{{[0]+}})
+; CHECKNEXT:   ('sh_info', 0x{{[0]+}})
+; CHECKNEXT:   ('sh_addralign', 0x{{[0]*}}4)
+; CHECKNEXT:   ('sh_entsize', 0x{{[0]+}})
+
+; See that at least first 3 instructions are correct: GP prologue
+; CHECKNEXT:   ('_section_data', '3c1c0000 279c0000 0399e021 {{[0-9,a-f]*}}')
+
+; ModuleID = '../br1.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "mips-unknown-linux"
+
+@x = global i32 1, align 4
+@str = private unnamed_addr constant [4 x i8] c"goo\00"
+@str2 = private unnamed_addr constant [4 x i8] c"foo\00"
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %foo
+
+if.end:                                           ; preds = %entry
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @str, i32 0, i32 0))
+  br label %foo
+
+foo:                                              ; preds = %entry, %if.end
+  %puts2 = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @str2, i32 0, i32 0))
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
diff --git a/test/MC/Mips/elf-relsym.ll b/test/MC/Mips/elf-relsym.ll
new file mode 100644
index 0000000..0f74437
--- /dev/null
+++ b/test/MC/Mips/elf-relsym.ll
@@ -0,0 +1,29 @@
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check that the appropriate symbols were created.
+
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$.str'
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$.str1'
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$CPI0_0'
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$CPI0_1'
+
+@.str = private unnamed_addr constant [6 x i8] c"abcde\00", align 1
+@gc1 = external global i8*
+@.str1 = private unnamed_addr constant [5 x i8] c"fghi\00", align 1
+@gc2 = external global i8*
+@gd1 = external global double
+@gd2 = external global double
+
+define void @foo1() nounwind {
+entry:
+  store i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8** @gc1, align 4
+  store i8* getelementptr inbounds ([5 x i8]* @.str1, i32 0, i32 0), i8** @gc2, align 4
+  %0 = load double* @gd1, align 8
+  %add = fadd double %0, 2.500000e+00
+  store double %add, double* @gd1, align 8
+  %1 = load double* @gd2, align 8
+  %add1 = fadd double %1, 4.500000e+00
+  store double %add1, double* @gd2, align 8
+  ret void
+}
+
diff --git a/test/MC/Mips/elf-tls.ll b/test/MC/Mips/elf-tls.ll
new file mode 100644
index 0000000..b4183b8
--- /dev/null
+++ b/test/MC/Mips/elf-tls.ll
@@ -0,0 +1,36 @@
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check that the appropriate relocations were created.
+
+; CHECK:     ('r_type', 0x2b)
+; CHECK:     ('r_type', 0x2c)
+; CHECK:     ('r_type', 0x2d)
+
+@t1 = thread_local global i32 0, align 4
+
+define i32 @f1() nounwind {
+entry:
+  %tmp = load i32* @t1, align 4
+  ret i32 %tmp
+
+}
+
+
+@t2 = external thread_local global i32
+
+define i32 @f2() nounwind {
+entry:
+  %tmp = load i32* @t2, align 4
+  ret i32 %tmp
+
+}
+
+@f3.i = internal thread_local unnamed_addr global i32 1, align 4
+
+define i32 @f3() nounwind {
+entry:
+  %0 = load i32* @f3.i, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @f3.i, align 4
+  ret i32 %inc
+}
diff --git a/test/MC/Mips/elf_basic.s b/test/MC/Mips/elf_basic.s
new file mode 100644
index 0000000..7a79fa0
--- /dev/null
+++ b/test/MC/Mips/elf_basic.s
@@ -0,0 +1,32 @@
+// 32 bit big endian
+// RUN: llvm-mc -filetype=obj -triple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32 %s
+// 32 bit little endian
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE32 %s
+// 64 bit big endian
+// RUN: llvm-mc -filetype=obj -arch=mips64 -triple mips64-unknown-linux %s -o - | elf-dump --dump-section-data | FileCheck -check-prefix=CHECK-BE64 %s
+// 64 bit little endian
+// RUN: llvm-mc -filetype=obj -arch=mips64el -triple mips64el-unknown-linux %s -o - | elf-dump --dump-section-data | FileCheck -check-prefix=CHECK-LE64 %s
+
+// Check that we produce 32 bit with each endian.
+
+// This is 32 bit.
+// CHECK-BE32: ('e_indent[EI_CLASS]', 0x01)
+// This is big endian.
+// CHECK-BE32: ('e_indent[EI_DATA]', 0x02)
+
+// This is 32 bit.
+// CHECK-LE32: ('e_indent[EI_CLASS]', 0x01)
+// This is little endian.
+// CHECK-LE32: ('e_indent[EI_DATA]', 0x01)
+
+// Check that we produce 64 bit with each endian.
+
+// This is 64 bit.
+// CHECK-BE64: ('e_indent[EI_CLASS]', 0x02)
+// This is big endian.
+// CHECK-BE64: ('e_indent[EI_DATA]', 0x02)
+
+// This is 64 bit.
+// CHECK-LE64: ('e_indent[EI_CLASS]', 0x02)
+// This is little endian.
+// CHECK-LE64: ('e_indent[EI_DATA]', 0x01)
diff --git a/test/MC/Mips/lit.local.cfg b/test/MC/Mips/lit.local.cfg
new file mode 100644
index 0000000..d2e3b28
--- /dev/null
+++ b/test/MC/Mips/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Mips/pr11877.s b/test/MC/Mips/pr11877.s
new file mode 100644
index 0000000..d354ce4
--- /dev/null
+++ b/test/MC/Mips/pr11877.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple mips-unknown-unknown %s
+
+i:
+        .long    g
+g = h
+h = i
diff --git a/test/MC/X86/2011-09-06-NoNewline.s b/test/MC/X86/2011-09-06-NoNewline.s
new file mode 100644
index 0000000..bc681a3
--- /dev/null
+++ b/test/MC/X86/2011-09-06-NoNewline.s
@@ -0,0 +1,3 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s
+// PR10869
+movl %gs:8, %eax
+\ No newline at end of file
diff --git a/test/MC/X86/address-size.s b/test/MC/X86/address-size.s
new file mode 100644
index 0000000..b105b40
--- /dev/null
+++ b/test/MC/X86/address-size.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+	.code64
+	movb	$0x0, (%esi)
+// CHECK: encoding: [0x67,0xc6,0x06,0x00]
+	movb	$0x0, (%rsi)
+// CHECK: encoding: [0xc6,0x06,0x00]
+
+	.code32
+	movb	$0x0, (%si)
+// CHECK: encoding: [0x67,0xc6,0x06,0x00]
+	movb	$0x0, (%esi)
+// CHECK: encoding: [0xc6,0x06,0x00]
diff --git a/test/MC/X86/dg.exp b/test/MC/X86/dg.exp
deleted file mode 100644
index ec87b69..0000000
--- a/test/MC/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/X86/intel-syntax-2.s b/test/MC/X86/intel-syntax-2.s
new file mode 100644
index 0000000..ca4afc3
--- /dev/null
+++ b/test/MC/X86/intel-syntax-2.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown  %s | FileCheck %s
+
+	.intel_syntax
+_test:
+// CHECK:	movl	$257, -4(%rsp)
+	mov	DWORD PTR [RSP - 4], 257
+
diff --git a/test/MC/X86/intel-syntax-encoding.s b/test/MC/X86/intel-syntax-encoding.s
new file mode 100644
index 0000000..03b0551
--- /dev/null
+++ b/test/MC/X86/intel-syntax-encoding.s
@@ -0,0 +1,57 @@
+// RUN: llvm-mc -x86-asm-syntax=intel -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: encoding: [0x66,0x83,0xf0,0x0c]
+	xor	ax, 12
+// CHECK: encoding: [0x83,0xf0,0x0c]
+	xor	eax, 12
+// CHECK: encoding: [0x48,0x83,0xf0,0x0c]
+	xor	rax, 12
+
+// CHECK: encoding: [0x66,0x83,0xc8,0x0c]
+	or	ax, 12
+// CHECK: encoding: [0x83,0xc8,0x0c]
+	or	eax, 12
+// CHECK: encoding: [0x48,0x83,0xc8,0x0c]
+	or	rax, 12
+
+// CHECK: encoding: [0x66,0x83,0xf8,0x0c]
+	cmp	ax, 12
+// CHECK: encoding: [0x83,0xf8,0x0c]
+	cmp	eax, 12
+// CHECK: encoding: [0x48,0x83,0xf8,0x0c]
+	cmp	rax, 12
+
+// CHECK: encoding: [0x48,0x89,0x44,0x24,0xf0]	
+	mov	QWORD PTR [RSP - 16], RAX
+
+// CHECK: encoding: [0x66,0x83,0xc0,0xf4]
+	add	ax, -12
+// CHECK: encoding: [0x83,0xc0,0xf4]
+	add	eax, -12
+// CHECK: encoding: [0x48,0x83,0xc0,0xf4]
+	add	rax, -12
+
+LBB0_3:
+// CHECK: encoding: [0xeb,A]
+	jmp	LBB0_3
+// CHECK: encoding: [0xf2,0x0f,0x10,0x2c,0x25,0xf8,0xff,0xff,0xff]
+        movsd   XMM5, QWORD PTR [-8]
+
+// CHECK: encoding: [0xd1,0xe7]
+	shl	EDI, 1
+
+// CHECK: encoding: [0x0f,0xc2,0xd1,0x01]
+	cmpltps XMM2, XMM1
+
+// CHECK: encoding: [0xc3]
+    ret
+
+// CHECK: encoding: [0xcb]
+    retf
+
+// CHECK: encoding: [0xc2,0x08,0x00]
+    ret 8
+
+// CHECK: encoding: [0xca,0x08,0x00]
+    retf 8
+
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
new file mode 100644
index 0000000..7cd5677
--- /dev/null
+++ b/test/MC/X86/intel-syntax.s
@@ -0,0 +1,66 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
+
+_test:
+	xor	EAX, EAX
+	ret
+
+_main:
+// CHECK:	movl	$257, -4(%rsp)
+	mov	DWORD PTR [RSP - 4], 257
+// CHECK:	movl	$258, 4(%rsp)
+	mov	DWORD PTR [RSP + 4], 258
+// CHECK:	movq	$123, -16(%rsp)
+	mov	QWORD PTR [RSP - 16], 123
+// CHECK:	movb	$97, -17(%rsp)
+	mov	BYTE PTR [RSP - 17], 97
+// CHECK:	movl	-4(%rsp), %eax
+	mov	EAX, DWORD PTR [RSP - 4]
+// CHECK:	movq    (%rsp), %rax
+	mov     RAX, QWORD PTR [RSP]
+// CHECK:	movl	$-4, -4(%rsp)
+	mov	DWORD PTR [RSP - 4], -4
+// CHECK:	movq	0, %rcx
+	mov	RCX, QWORD PTR [0]
+// CHECK:	movl	-24(%rsp,%rax,4), %eax	
+	mov	EAX, DWORD PTR [RSP + 4*RAX - 24]
+// CHECK:	movb	%dil, (%rdx,%rcx)
+	mov	BYTE PTR [RDX + RCX], DIL
+// CHECK:	movzwl	2(%rcx), %edi
+	movzx	EDI, WORD PTR [RCX + 2]
+// CHECK:	callq	_test
+	call	_test
+// CHECK:	andw	$12,	%ax
+	and	ax, 12
+// CHECK:	andw	$-12,	%ax
+	and	ax, -12
+// CHECK:	andw	$257,	%ax
+	and	ax, 257
+// CHECK:	andw	$-257,	%ax
+	and	ax, -257
+// CHECK:	andl	$12,	%eax
+	and	eax, 12
+// CHECK:	andl	$-12,	%eax
+	and	eax, -12
+// CHECK:	andl	$257,	%eax
+	and	eax, 257
+// CHECK:	andl	$-257,	%eax
+	and	eax, -257
+// CHECK:	andq	$12,	%rax
+	and	rax, 12
+// CHECK:	andq	$-12,	%rax
+	and	rax, -12
+// CHECK:	andq	$257,	%rax
+	and	rax, 257
+// CHECK:	andq	$-257,	%rax
+	and	rax, -257
+// CHECK:	fld	%st(0)
+	fld	ST(0)
+// CHECK:	movl	%fs:(%rdi), %eax
+        mov     EAX, DWORD PTR FS:[RDI]
+// CHECK:	leal	(,%rdi,4), %r8d
+        lea     R8D, DWORD PTR [4*RDI]
+// CHECK:        movl    _fnan(,%ecx,4), %ecx
+        mov     ECX, DWORD PTR [4*ECX + _fnan]
+// CHECK:       movq    %fs:320, %rax
+        mov     RAX, QWORD PTR FS:[320]
+	ret
diff --git a/test/MC/X86/lit.local.cfg b/test/MC/X86/lit.local.cfg
new file mode 100644
index 0000000..eee568e
--- /dev/null
+++ b/test/MC/X86/lit.local.cfg
@@ -0,0 +1,12 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index 0954ce2..6c27b85 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -500,6 +500,9 @@
 // CHECK: 	sysexit
         	sysexit
 
+// CHECK: 	sysexitl
+        	sysexitl
+
 // CHECK: 	ud2
         	ud2
 
@@ -4417,6 +4420,10 @@
 // CHECK:  encoding: [0x0f,0x35]
         	sysexit
 
+// CHECK: sysexitl
+// CHECK:  encoding: [0x0f,0x35]
+        	sysexitl
+
 // CHECK: fxsave	3735928559(%ebx,%ecx,8)
 // CHECK:  encoding: [0x0f,0xae,0x84,0xcb,0xef,0xbe,0xad,0xde]
         	fxsave	0xdeadbeef(%ebx,%ecx,8)
@@ -18401,6 +18408,9 @@
 // CHECK: 	vmcall
         	vmcall
 
+// CHECK: 	vmfunc
+        	vmfunc
+
 // CHECK: 	vmclear	3735928559(%ebx,%ecx,8)
         	vmclear	0xdeadbeef(%ebx,%ecx,8)
 
@@ -18458,6 +18468,30 @@
 // CHECK: 	vmxon	305419896
         	vmxon	0x12345678
 
+// CHECK: 	vmrun %eax
+        	vmrun %eax
+
+// CHECK: 	vmmcall
+        	vmmcall
+
+// CHECK: 	vmload %eax
+        	vmload %eax
+
+// CHECK: 	vmsave %eax
+        	vmsave %eax
+
+// CHECK: 	stgi
+        	stgi
+
+// CHECK: 	clgi
+        	clgi
+
+// CHECK: 	skinit %eax
+        	skinit %eax
+
+// CHECK: 	invlpga %ecx, %eax
+        	invlpga %ecx, %eax
+
 // CHECK: 	phaddw	3735928559(%ebx,%ecx,8), %mm3
         	phaddw	0xdeadbeef(%ebx,%ecx,8),%mm3
 
diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s
index 19f1445..57a0037 100644
--- a/test/MC/X86/x86-32.s
+++ b/test/MC/X86/x86-32.s
@@ -28,6 +28,9 @@
 	vmcall
 // CHECK: vmcall
 // CHECK: encoding: [0x0f,0x01,0xc1]
+	vmfunc
+// CHECK: vmfunc
+// CHECK: encoding: [0x0f,0x01,0xd4]
 	vmlaunch
 // CHECK: vmlaunch
 // CHECK: encoding: [0x0f,0x01,0xc2]
@@ -41,7 +44,32 @@
 // CHECK: swapgs
 // CHECK: encoding: [0x0f,0x01,0xf8]
 
-rdtscp
+	vmrun %eax
+// CHECK: vmrun %eax
+// CHECK: encoding: [0x0f,0x01,0xd8]
+	vmmcall
+// CHECK: vmmcall
+// CHECK: encoding: [0x0f,0x01,0xd9]
+	vmload %eax
+// CHECK: vmload %eax
+// CHECK: encoding: [0x0f,0x01,0xda]
+	vmsave %eax
+// CHECK: vmsave %eax
+// CHECK: encoding: [0x0f,0x01,0xdb]
+	stgi
+// CHECK: stgi
+// CHECK: encoding: [0x0f,0x01,0xdc]
+	clgi
+// CHECK: clgi
+// CHECK: encoding: [0x0f,0x01,0xdd]
+	skinit %eax
+// CHECK: skinit %eax
+// CHECK: encoding: [0x0f,0x01,0xde]
+	invlpga %ecx, %eax
+// CHECK: invlpga %ecx, %eax
+// CHECK: encoding: [0x0f,0x01,0xdf]
+
+	rdtscp
 // CHECK: rdtscp
 // CHECK:  encoding: [0x0f,0x01,0xf9]
 
@@ -69,9 +97,9 @@ rdtscp
         sal $1, %eax
 
 // moffset forms of moves, rdar://7947184
-movb	0, %al    // CHECK: movb 0, %al  # encoding: [0xa0,A,A,A,A]
-movw	0, %ax    // CHECK: movw 0, %ax  # encoding: [0x66,0xa1,A,A,A,A]
-movl	0, %eax   // CHECK: movl 0, %eax  # encoding: [0xa1,A,A,A,A]
+movb	0, %al    // CHECK: movb 0, %al  # encoding: [0xa0,0x00,0x00,0x00,0x00]
+movw	0, %ax    // CHECK: movw 0, %ax  # encoding: [0x66,0xa1,0x00,0x00,0x00,0x00]
+movl	0, %eax   // CHECK: movl 0, %eax  # encoding: [0xa1,0x00,0x00,0x00,0x00]
 
 // rdar://7973775
 into
@@ -962,3 +990,11 @@ xchgl %ecx, %eax
 // CHECK: xchgl %ecx, %eax
 // CHECK: encoding: [0x91]
 xchgl %eax, %ecx
+
+// CHECK: retw
+// CHECK: encoding: [0x66,0xc3]
+retw
+
+// CHECK: lretw
+// CHECK: encoding: [0x66,0xcb]
+lretw
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index a9cdaa4..6a2d5bb 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -50,6 +50,9 @@
 // CHECK: ret
         ret
         
+// CHECK: retw
+        retw
+        
 // FIXME: Check that this matches SUB32ri8
 // CHECK: subl $1, %eax
         subl $1, %eax
@@ -339,15 +342,28 @@ rclb	$1, %bl   // CHECK: rclb %bl     # encoding: [0xd0,0xd3]
 rclb	$2, %bl   // CHECK: rclb $2, %bl # encoding: [0xc0,0xd3,0x02]
 
 // rdar://8418316
-// CHECK: shldw	$1, %bx, %bx
-// CHECK: shldw	$1, %bx, %bx
-// CHECK: shrdw	$1, %bx, %bx
-// CHECK: shrdw	$1, %bx, %bx
-
-shld	%bx,%bx
-shld	$1, %bx,%bx
-shrd	%bx,%bx
-shrd	$1, %bx,%bx
+// PR12173
+// CHECK: shldw	%cl, %bx, %dx
+// CHECK: shldw	%cl, %bx, %dx
+// CHECK: shldw	$1, %bx, %dx
+// CHECK: shldw	%cl, %bx, (%rax)
+// CHECK: shldw	%cl, %bx, (%rax)
+// CHECK: shrdw	%cl, %bx, %dx
+// CHECK: shrdw	%cl, %bx, %dx
+// CHECK: shrdw	$1, %bx, %dx
+// CHECK: shrdw	%cl, %bx, (%rax)
+// CHECK: shrdw	%cl, %bx, (%rax)
+
+shld  %bx, %dx
+shld  %cl, %bx, %dx
+shld  $1, %bx, %dx
+shld  %bx, (%rax)
+shld  %cl, %bx, (%rax)
+shrd  %bx, %dx
+shrd  %cl, %bx, %dx
+shrd  $1, %bx, %dx
+shrd  %bx, (%rax)
+shrd  %cl, %bx, (%rax)
 
 // CHECK: sldtl	%ecx
 // CHECK: encoding: [0x0f,0x00,0xc1]
@@ -459,6 +475,7 @@ cwtl  // CHECK: cwtl
 cbw   // CHECK: cbtw
 cwd   // CHECK: cwtd
 cdq   // CHECK: cltd
+cqo   // CHECK: cqto
 
 // rdar://8456378 and PR7557 - fstsw
 fstsw %ax
@@ -827,6 +844,7 @@ iretq
 lretq  // CHECK: lretq # encoding: [0x48,0xcb]
 lretl  // CHECK: lretl # encoding: [0xcb]
 lret   // CHECK: lretl # encoding: [0xcb]
+lretw  // CHECK: lretw # encoding: [0x66,0xcb]
 
 // rdar://8403907
 sysret
@@ -1039,6 +1057,9 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1]
 	movsl
 	movsl	%ds:(%rsi), %es:(%rdi)
 	movsl	(%rsi), %es:(%rdi)
+// rdar://10883092
+// CHECK: movsd
+	movsl	(%rsi), (%rdi)
 
 // CHECK: movsq # encoding: [0x48,0xa5]
 // CHECK: movsq
@@ -1191,3 +1212,15 @@ xchgl %ecx, %eax
 // CHECK: xchgl %ecx, %eax
 // CHECK: encoding: [0x91]
 xchgl %eax, %ecx
+
+// CHECK: sysexit
+// CHECK: encoding: [0x0f,0x35]
+sysexit
+
+// CHECK: sysexitl
+// CHECK: encoding: [0x0f,0x35]
+sysexitl
+
+// CHECK: sysexitq
+// CHECK: encoding: [0x48,0x0f,0x35]
+sysexitq
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
index d3b226f..bd5559a 100644
--- a/test/MC/X86/x86_64-avx-encoding.s
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -600,6 +600,774 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03]
           vcmpunordsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
 
+// CHECK: vcmpps  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08]
+          vcmpeq_uqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09]
+          vcmpngeps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a]
+          vcmpngtps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b]
+          vcmpfalseps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c]
+          vcmpneq_oqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d]
+          vcmpgeps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e]
+          vcmpgtps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f]
+          vcmptrueps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10]
+          vcmpeq_osps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11]
+          vcmplt_oqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12]
+          vcmple_oqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13]
+          vcmpunord_sps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14]
+          vcmpneq_usps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15]
+          vcmpnlt_uqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16]
+          vcmpnle_uqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17]
+          vcmpord_sps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18]
+          vcmpeq_usps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19]
+          vcmpnge_uqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a]
+          vcmpngt_uqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b]
+          vcmpfalse_osps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c]
+          vcmpneq_osps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d]
+          vcmpge_oqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e]
+          vcmpgt_oqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f]
+          vcmptrue_usps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x08]
+          vcmpeq_uqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $9, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x09]
+          vcmpngeps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $10, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x0a]
+          vcmpngtps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $11, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x0b]
+          vcmpfalseps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $12, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x0c]
+          vcmpneq_oqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $13, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x0d]
+          vcmpgeps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $14, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x0e]
+          vcmpgtps   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $15, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x0f]
+          vcmptrueps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $16, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x10]
+          vcmpeq_osps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $17, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x11]
+          vcmplt_oqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $18, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x12]
+          vcmple_oqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $19, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x13]
+          vcmpunord_sps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $20, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x14]
+          vcmpneq_usps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $21, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x15]
+          vcmpnlt_uqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $22, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x16]
+          vcmpnle_uqps   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $23, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x17]
+          vcmpord_sps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $24, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x18]
+          vcmpeq_usps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $25, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x19]
+          vcmpnge_uqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $26, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x1a]
+          vcmpngt_uqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $27, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x1b]
+          vcmpfalse_osps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $28, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x1c]
+          vcmpneq_osps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $29, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x1d]
+          vcmpge_oqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $30, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x1e]
+          vcmpgt_oqps   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $31, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x1f]
+          vcmptrue_usps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x08]
+          vcmpeq_uqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x09]
+          vcmpngepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0a]
+          vcmpngtpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0b]
+          vcmpfalsepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0c]
+          vcmpneq_oqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0d]
+          vcmpgepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0e]
+          vcmpgtpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0f]
+          vcmptruepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x10]
+          vcmpeq_ospd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x11]
+          vcmplt_oqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x12]
+          vcmple_oqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x13]
+          vcmpunord_spd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x14]
+          vcmpneq_uspd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x15]
+          vcmpnlt_uqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x16]
+          vcmpnle_uqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x17]
+          vcmpord_spd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x18]
+          vcmpeq_uspd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x19]
+          vcmpnge_uqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1a]
+          vcmpngt_uqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1b]
+          vcmpfalse_ospd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1c]
+          vcmpneq_ospd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1d]
+          vcmpge_oqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1e]
+          vcmpgt_oqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1f]
+          vcmptrue_uspd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x08]
+          vcmpeq_uqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $9, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x09]
+          vcmpngepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $10, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x0a]
+          vcmpngtpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $11, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x0b]
+          vcmpfalsepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $12, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x0c]
+          vcmpneq_oqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $13, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x0d]
+          vcmpgepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $14, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x0e]
+          vcmpgtpd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $15, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x0f]
+          vcmptruepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $16, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x10]
+          vcmpeq_ospd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $17, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x11]
+          vcmplt_oqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $18, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x12]
+          vcmple_oqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $19, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x13]
+          vcmpunord_spd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $20, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x14]
+          vcmpneq_uspd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $21, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x15]
+          vcmpnlt_uqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $22, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x16]
+          vcmpnle_uqpd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $23, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x17]
+          vcmpord_spd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $24, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x18]
+          vcmpeq_uspd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $25, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x19]
+          vcmpnge_uqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $26, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x1a]
+          vcmpngt_uqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $27, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x1b]
+          vcmpfalse_ospd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $28, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x1c]
+          vcmpneq_ospd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $29, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x1d]
+          vcmpge_oqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $30, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x1e]
+          vcmpgt_oqpd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $31, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x1f]
+          vcmptrue_uspd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x08]
+          vcmpeq_uqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x09]
+          vcmpngess   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0a]
+          vcmpngtss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0b]
+          vcmpfalsess   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0c]
+          vcmpneq_oqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0d]
+          vcmpgess   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0e]
+          vcmpgtss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0f]
+          vcmptruess   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x10]
+          vcmpeq_osss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x11]
+          vcmplt_oqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x12]
+          vcmple_oqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x13]
+          vcmpunord_sss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x14]
+          vcmpneq_usss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x15]
+          vcmpnlt_uqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x16]
+          vcmpnle_uqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x17]
+          vcmpord_sss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x18]
+          vcmpeq_usss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x19]
+          vcmpnge_uqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1a]
+          vcmpngt_uqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1b]
+          vcmpfalse_osss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1c]
+          vcmpneq_osss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1d]
+          vcmpge_oqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1e]
+          vcmpgt_oqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1f]
+          vcmptrue_usss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x08]
+          vcmpeq_uqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $9, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x09]
+          vcmpngess   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $10, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x0a]
+          vcmpngtss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $11, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x0b]
+          vcmpfalsess   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $12, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x0c]
+          vcmpneq_oqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $13, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x0d]
+          vcmpgess   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $14, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x0e]
+          vcmpgtss   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $15, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x0f]
+          vcmptruess   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $16, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x10]
+          vcmpeq_osss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $17, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x11]
+          vcmplt_oqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $18, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x12]
+          vcmple_oqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $19, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x13]
+          vcmpunord_sss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $20, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x14]
+          vcmpneq_usss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $21, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x15]
+          vcmpnlt_uqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $22, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x16]
+          vcmpnle_uqss   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $23, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x17]
+          vcmpord_sss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $24, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x18]
+          vcmpeq_usss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $25, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x19]
+          vcmpnge_uqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $26, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x1a]
+          vcmpngt_uqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $27, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x1b]
+          vcmpfalse_osss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $28, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x1c]
+          vcmpneq_osss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $29, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x1d]
+          vcmpge_oqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $30, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x1e]
+          vcmpgt_oqss   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $31, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x1f]
+          vcmptrue_usss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x08]
+          vcmpeq_uqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x09]
+          vcmpngesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0a]
+          vcmpngtsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0b]
+          vcmpfalsesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0c]
+          vcmpneq_oqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0d]
+          vcmpgesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0e]
+          vcmpgtsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0f]
+          vcmptruesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x10]
+          vcmpeq_ossd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x11]
+          vcmplt_oqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x12]
+          vcmple_oqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x13]
+          vcmpunord_ssd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x14]
+          vcmpneq_ussd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x15]
+          vcmpnlt_uqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x16]
+          vcmpnle_uqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x17]
+          vcmpord_ssd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x18]
+          vcmpeq_ussd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x19]
+          vcmpnge_uqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1a]
+          vcmpngt_uqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1b]
+          vcmpfalse_ossd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1c]
+          vcmpneq_ossd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1d]
+          vcmpge_oqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1e]
+          vcmpgt_oqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1f]
+          vcmptrue_ussd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x08]
+          vcmpeq_uqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $9, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x09]
+          vcmpngesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $10, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x0a]
+          vcmpngtsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $11, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x0b]
+          vcmpfalsesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $12, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x0c]
+          vcmpneq_oqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $13, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x0d]
+          vcmpgesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $14, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x0e]
+          vcmpgtsd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $15, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x0f]
+          vcmptruesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $16, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x10]
+          vcmpeq_ossd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $17, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x11]
+          vcmplt_oqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $18, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x12]
+          vcmple_oqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $19, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x13]
+          vcmpunord_ssd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $20, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x14]
+          vcmpneq_ussd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $21, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x15]
+          vcmpnlt_uqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $22, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x16]
+          vcmpnle_uqsd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $23, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x17]
+          vcmpord_ssd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $24, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x18]
+          vcmpeq_ussd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $25, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x19]
+          vcmpnge_uqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $26, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x1a]
+          vcmpngt_uqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $27, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x1b]
+          vcmpfalse_ossd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $28, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x1c]
+          vcmpneq_ossd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $29, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x1d]
+          vcmpge_oqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $30, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x1e]
+          vcmpgt_oqsd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $31, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x1f]
+          vcmptrue_ussd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
 // CHECK: vucomiss  %xmm11, %xmm12
 // CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3]
           vucomiss  %xmm11, %xmm12
@@ -3346,3 +4114,10 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 _foo:
   nop
   vpshufb _foo(%rip), %xmm0, %xmm0
+
+// CHECK: vblendvps %ymm1, _foo2(%rip), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x4a,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: _foo2-5
+_foo2:
+  nop
+  vblendvps %ymm1, _foo2(%rip), %ymm0, %ymm0
diff --git a/test/MC/X86/x86_64-bmi-encoding.s b/test/MC/X86/x86_64-bmi-encoding.s
new file mode 100644
index 0000000..3e69d4a
--- /dev/null
+++ b/test/MC/X86/x86_64-bmi-encoding.s
@@ -0,0 +1,202 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: blsmskl  %r11d, %r10d
+// CHECK: encoding: [0xc4,0xc2,0x28,0xf3,0xd3]
+          blsmskl  %r11d, %r10d
+
+// CHECK: blsmskq  %r11, %r10
+// CHECK: encoding: [0xc4,0xc2,0xa8,0xf3,0xd3]
+          blsmskq  %r11, %r10
+
+// CHECK: blsmskl  (%rax), %r10d
+// CHECK: encoding: [0xc4,0xe2,0x28,0xf3,0x10]
+          blsmskl  (%rax), %r10d
+
+// CHECK: blsmskq  (%rax), %r10
+// CHECK: encoding: [0xc4,0xe2,0xa8,0xf3,0x10]
+          blsmskq  (%rax), %r10
+
+// CHECK: blsil  %r11d, %r10d
+// CHECK: encoding: [0xc4,0xc2,0x28,0xf3,0xdb]
+          blsil  %r11d, %r10d
+
+// CHECK: blsiq  %r11, %r10
+// CHECK: encoding: [0xc4,0xc2,0xa8,0xf3,0xdb]
+          blsiq  %r11, %r10
+
+// CHECK: blsil  (%rax), %r10d
+// CHECK: encoding: [0xc4,0xe2,0x28,0xf3,0x18]
+          blsil  (%rax), %r10d
+
+// CHECK: blsiq  (%rax), %r10
+// CHECK: encoding: [0xc4,0xe2,0xa8,0xf3,0x18]
+          blsiq  (%rax), %r10
+
+// CHECK: blsrl  %r11d, %r10d
+// CHECK: encoding: [0xc4,0xc2,0x28,0xf3,0xcb]
+          blsrl  %r11d, %r10d
+
+// CHECK: blsrq  %r11, %r10
+// CHECK: encoding: [0xc4,0xc2,0xa8,0xf3,0xcb]
+          blsrq  %r11, %r10
+
+// CHECK: blsrl  (%rax), %r10d
+// CHECK: encoding: [0xc4,0xe2,0x28,0xf3,0x08]
+          blsrl  (%rax), %r10d
+
+// CHECK: blsrq  (%rax), %r10
+// CHECK: encoding: [0xc4,0xe2,0xa8,0xf3,0x08]
+          blsrq  (%rax), %r10
+
+// CHECK: andnl  (%rax), %r11d, %r10d
+// CHECK: encoding: [0xc4,0x62,0x20,0xf2,0x10]
+          andnl  (%rax), %r11d, %r10d
+
+// CHECK: andnq  (%rax), %r11, %r10
+// CHECK: encoding: [0xc4,0x62,0xa0,0xf2,0x10]
+          andnq  (%rax), %r11, %r10
+
+// CHECK: bextrl %r12d, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x62,0x18,0xf7,0x10]
+          bextrl %r12d, (%rax), %r10d
+
+// CHECK: bextrl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x18,0xf7,0xd3]
+          bextrl %r12d, %r11d, %r10d
+
+// CHECK: bextrq %r12, (%rax), %r10
+// CHECK: encoding: [0xc4,0x62,0x98,0xf7,0x10]
+          bextrq %r12, (%rax), %r10
+
+// CHECK: bextrq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0x98,0xf7,0xd3]
+          bextrq %r12, %r11, %r10
+
+// CHECK: bzhil %r12d, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x62,0x18,0xf5,0x10]
+          bzhil %r12d, (%rax), %r10d
+
+// CHECK: bzhil %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x18,0xf5,0xd3]
+          bzhil %r12d, %r11d, %r10d
+
+// CHECK: bzhiq %r12, (%rax), %r10
+// CHECK: encoding: [0xc4,0x62,0x98,0xf5,0x10]
+          bzhiq %r12, (%rax), %r10
+
+// CHECK: bzhiq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0x98,0xf5,0xd3]
+          bzhiq %r12, %r11, %r10
+
+// CHECK: pextl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x22,0xf5,0xd4]
+          pextl %r12d, %r11d, %r10d
+
+// CHECK: pextl (%rax), %r11d, %r10d
+// CHECK: encoding: [0xc4,0x62,0x22,0xf5,0x10]
+          pextl (%rax), %r11d, %r10d
+
+// CHECK: pextq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0xa2,0xf5,0xd4]
+          pextq %r12, %r11, %r10
+
+// CHECK: pextq (%rax), %r11, %r10
+// CHECK: encoding: [0xc4,0x62,0xa2,0xf5,0x10]
+          pextq (%rax), %r11, %r10
+
+// CHECK: pdepl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x23,0xf5,0xd4]
+          pdepl %r12d, %r11d, %r10d
+
+// CHECK: pdepl (%rax), %r11d, %r10d
+// CHECK: encoding: [0xc4,0x62,0x23,0xf5,0x10]
+          pdepl (%rax), %r11d, %r10d
+
+// CHECK: pdepq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0xa3,0xf5,0xd4]
+          pdepq %r12, %r11, %r10
+
+// CHECK: pdepq (%rax), %r11, %r10
+// CHECK: encoding: [0xc4,0x62,0xa3,0xf5,0x10]
+          pdepq (%rax), %r11, %r10
+
+// CHECK: mulxl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x23,0xf6,0xd4]
+          mulxl %r12d, %r11d, %r10d
+
+// CHECK: mulxl (%rax), %r11d, %r10d
+// CHECK: encoding: [0xc4,0x62,0x23,0xf6,0x10]
+          mulxl (%rax), %r11d, %r10d
+
+// CHECK: mulxq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0xa3,0xf6,0xd4]
+          mulxq %r12, %r11, %r10
+
+// CHECK: mulxq (%rax), %r11, %r10
+// CHECK: encoding: [0xc4,0x62,0xa3,0xf6,0x10]
+          mulxq (%rax), %r11, %r10
+
+// CHECK: rorxl $10, %r12d, %r10d
+// CHECK: encoding: [0xc4,0x43,0x7b,0xf0,0xd4,0x0a]
+          rorxl $10, %r12d, %r10d
+
+// CHECK: rorxl $31, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x63,0x7b,0xf0,0x10,0x1f]
+          rorxl $31, (%rax), %r10d
+
+// CHECK: rorxq $1, %r12, %r10
+// CHECK: encoding: [0xc4,0x43,0xfb,0xf0,0xd4,0x01]
+          rorxq $1, %r12, %r10
+
+// CHECK: rorxq $63, (%rax), %r10
+// CHECK: encoding: [0xc4,0x63,0xfb,0xf0,0x10,0x3f]
+          rorxq $63, (%rax), %r10
+
+// CHECK: shlxl %r12d, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x62,0x19,0xf7,0x10]
+          shlxl %r12d, (%rax), %r10d
+
+// CHECK: shlxl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x19,0xf7,0xd3]
+          shlxl %r12d, %r11d, %r10d
+
+// CHECK: shlxq %r12, (%rax), %r10
+// CHECK: encoding: [0xc4,0x62,0x99,0xf7,0x10]
+          shlxq %r12, (%rax), %r10
+
+// CHECK: shlxq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0x99,0xf7,0xd3]
+          shlxq %r12, %r11, %r10
+
+// CHECK: sarxl %r12d, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x62,0x1a,0xf7,0x10]
+          sarxl %r12d, (%rax), %r10d
+
+// CHECK: sarxl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x1a,0xf7,0xd3]
+          sarxl %r12d, %r11d, %r10d
+
+// CHECK: sarxq %r12, (%rax), %r10
+// CHECK: encoding: [0xc4,0x62,0x9a,0xf7,0x10]
+          sarxq %r12, (%rax), %r10
+
+// CHECK: sarxq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0x9a,0xf7,0xd3]
+          sarxq %r12, %r11, %r10
+
+// CHECK: shrxl %r12d, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x62,0x1b,0xf7,0x10]
+          shrxl %r12d, (%rax), %r10d
+
+// CHECK: shrxl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x1b,0xf7,0xd3]
+          shrxl %r12d, %r11d, %r10d
+
+// CHECK: shrxq %r12, (%rax), %r10
+// CHECK: encoding: [0xc4,0x62,0x9b,0xf7,0x10]
+          shrxq %r12, (%rax), %r10
+
+// CHECK: shrxq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0x9b,0xf7,0xd3]
+          shrxq %r12, %r11, %r10
+
diff --git a/test/MC/X86/x86_64-fma4-encoding.s b/test/MC/X86/x86_64-fma4-encoding.s
new file mode 100644
index 0000000..805fc23
--- /dev/null
+++ b/test/MC/X86/x86_64-fma4-encoding.s
@@ -0,0 +1,391 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// vfmadd
+// CHECK: vfmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0x01,0x10]
+          vfmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6a,0x01,0x10]
+          vfmaddss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
+          vfmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x01,0x10]
+          vfmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6b,0x01,0x10]
+          vfmaddsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
+          vfmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0x01,0x10]
+          vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x68,0x01,0x10]
+          vfmaddps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
+          vfmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0x01,0x10]
+          vfmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x69,0x01,0x10]
+          vfmaddpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
+          vfmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0x01,0x10]
+          vfmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x68,0x01,0x10]
+          vfmaddps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
+          vfmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0x01,0x10]
+          vfmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x69,0x01,0x10]
+          vfmaddpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
+          vfmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfmsub
+// CHECK: vfmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0x01,0x10]
+          vfmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6e,0x01,0x10]
+          vfmsubss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0xc2,0x10]
+          vfmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6f,0x01,0x10]
+          vfmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6f,0x01,0x10]
+          vfmsubsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6f,0xc2,0x10]
+          vfmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6c,0x01,0x10]
+          vfmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6c,0x01,0x10]
+          vfmsubps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
+          vfmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6d,0x01,0x10]
+          vfmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6d,0x01,0x10]
+          vfmsubpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
+          vfmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6c,0x01,0x10]
+          vfmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x6c,0x01,0x10]
+          vfmsubps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
+          vfmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6d,0x01,0x10]
+          vfmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x6d,0x01,0x10]
+          vfmsubpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
+          vfmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfnmadd
+// CHECK: vfnmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7a,0x01,0x10]
+          vfnmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7a,0x01,0x10]
+          vfnmaddss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7a,0xc2,0x10]
+          vfnmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7b,0x01,0x10]
+          vfnmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7b,0x01,0x10]
+          vfnmaddsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7b,0xc2,0x10]
+          vfnmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x78,0x01,0x10]
+          vfnmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x78,0x01,0x10]
+          vfnmaddps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
+          vfnmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x79,0x01,0x10]
+          vfnmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x79,0x01,0x10]
+          vfnmaddpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
+          vfnmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x78,0x01,0x10]
+          vfnmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmaddps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x78,0x01,0x10]
+          vfnmaddps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
+          vfnmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x79,0x01,0x10]
+          vfnmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmaddpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x79,0x01,0x10]
+          vfnmaddpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
+          vfnmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfnmsub
+// CHECK: vfnmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7e,0x01,0x10]
+          vfnmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7e,0x01,0x10]
+          vfnmsubss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7e,0xc2,0x10]
+          vfnmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7f,0x01,0x10]
+          vfnmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7f,0x01,0x10]
+          vfnmsubsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7f,0xc2,0x10]
+          vfnmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7c,0x01,0x10]
+          vfnmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7c,0x01,0x10]
+          vfnmsubps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
+          vfnmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7d,0x01,0x10]
+          vfnmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7d,0x01,0x10]
+          vfnmsubpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
+          vfnmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7c,0x01,0x10]
+          vfnmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmsubps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x7c,0x01,0x10]
+          vfnmsubps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
+          vfnmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7d,0x01,0x10]
+          vfnmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmsubpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x7d,0x01,0x10]
+          vfnmsubpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
+          vfnmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfmaddsub
+// CHECK: vfmaddsubps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5c,0x01,0x10]
+          vfmaddsubps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5c,0x01,0x10]
+          vfmaddsubps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsubps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
+          vfmaddsubps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5d,0x01,0x10]
+          vfmaddsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5d,0x01,0x10]
+          vfmaddsubpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
+          vfmaddsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5c,0x01,0x10]
+          vfmaddsubps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddsubps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5c,0x01,0x10]
+          vfmaddsubps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddsubps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
+          vfmaddsubps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5d,0x01,0x10]
+          vfmaddsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddsubpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5d,0x01,0x10]
+          vfmaddsubpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
+          vfmaddsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfmsubadd
+// CHECK: vfmsubaddps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5e,0x01,0x10]
+          vfmsubaddps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5e,0x01,0x10]
+          vfmsubaddps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubaddps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
+          vfmsubaddps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5f,0x01,0x10]
+          vfmsubaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5f,0x01,0x10]
+          vfmsubaddpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
+          vfmsubaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5e,0x01,0x10]
+          vfmsubaddps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubaddps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5e,0x01,0x10]
+          vfmsubaddps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubaddps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
+          vfmsubaddps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5f,0x01,0x10]
+          vfmsubaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubaddpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5f,0x01,0x10]
+          vfmsubaddpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
+          vfmsubaddpd   %ymm2, %ymm1, %ymm0, %ymm0
diff --git a/test/MC/X86/x86_64-xop-encoding.s b/test/MC/X86/x86_64-xop-encoding.s
new file mode 100644
index 0000000..1137b71
--- /dev/null
+++ b/test/MC/X86/x86_64-xop-encoding.s
@@ -0,0 +1,584 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+//////////////////////////
+// 2 operand instructions
+/////////////////////////
+        
+// vphsubwd
+// CHECK: vphsubwd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe2,0x0c,0x01]
+          vphsubwd (%rcx,%rax), %xmm1
+// CHECK: vphsubwd %xmm0, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe2,0xc8]
+          vphsubwd %xmm0, %xmm1
+
+// vphsubdq
+// CHECK: vphsubdq (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe3,0x0c,0x01] 
+          vphsubdq (%rcx,%rax), %xmm1
+// CHECK: vphsubdq %xmm0, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe3,0xc8]
+          vphsubdq %xmm0, %xmm1
+
+// vphsubbw
+// CHECK: vphsubbw (%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe1,0x08]
+          vphsubbw (%rax), %xmm1
+// CHECK: vphsubbw %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe1,0xca]
+          vphsubbw %xmm2, %xmm1
+
+// vphaddwq
+// CHECK: vphaddwq (%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc7,0x21]
+          vphaddwq (%rcx), %xmm4
+// CHECK: vphaddwq %xmm6, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc7,0xd6]
+          vphaddwq %xmm6, %xmm2
+
+// vphaddwd
+// CHECK: vphaddwd (%rdx,%rax), %xmm7
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc6,0x3c,0x02]
+          vphaddwd (%rdx,%rax), %xmm7
+// CHECK: vphaddwd %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc6,0xe3]
+          vphaddwd %xmm3, %xmm4
+
+// vphadduwq
+// CHECK: vphadduwq (%rcx,%rax), %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd7,0x34,0x01]
+          vphadduwq (%rcx,%rax), %xmm6
+// CHECK: vphadduwq %xmm7, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd7,0xc7]
+          vphadduwq %xmm7, %xmm0
+
+// vphadduwd
+// CHECK: vphadduwd (%rax), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd6,0x28]
+          vphadduwd (%rax), %xmm5
+// CHECK: vphadduwd %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd6,0xca]
+          vphadduwd %xmm2, %xmm1
+
+// vphaddudq
+// CHECK: vphaddudq 8(%rcx,%rax), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xdb,0x64,0x01,0x08]
+          vphaddudq 8(%rcx,%rax), %xmm4
+// CHECK: vphaddudq %xmm6, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0xdb,0xd6]
+          vphaddudq %xmm6, %xmm2
+
+// vphaddubw
+// CHECK: vphaddubw (%rcx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd1,0x19]
+          vphaddubw (%rcx), %xmm3
+// CHECK: vphaddubw %xmm5, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd1,0xc5]
+          vphaddubw %xmm5, %xmm0
+
+// vphaddubq
+// CHECK: vphaddubq (%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd3,0x21]
+          vphaddubq (%rcx), %xmm4
+// CHECK: vphaddubq %xmm2, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd3,0xd2]
+          vphaddubq %xmm2, %xmm2
+
+// vphaddubd
+// CHECK: vphaddubd (%rax), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd2,0x28]
+          vphaddubd (%rax), %xmm5
+// CHECK: vphaddubd %xmm5, %xmm7
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd2,0xfd]
+          vphaddubd %xmm5, %xmm7
+
+// vphadddq
+// CHECK: vphadddq (%rdx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xcb,0x22]
+          vphadddq (%rdx), %xmm4
+// CHECK: vphadddq %xmm4, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0xcb,0xec]
+          vphadddq %xmm4, %xmm5
+
+// vphaddbw
+// CHECK: vphaddbw (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc1,0x0c,0x01]
+          vphaddbw (%rcx,%rax), %xmm1
+// CHECK: vphaddbw %xmm5, %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc1,0xf5]
+          vphaddbw %xmm5, %xmm6
+
+// vphaddbq
+// CHECK: vphaddbq (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc3,0x0c,0x01]
+          vphaddbq (%rcx,%rax), %xmm1
+// CHECK: vphaddbq %xmm2, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc3,0xc2]
+          vphaddbq %xmm2, %xmm0
+
+// vphaddbd
+// CHECK: vphaddbd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc2,0x0c,0x01]
+          vphaddbd (%rcx,%rax), %xmm1
+// CHECK: vphaddbd %xmm1, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc2,0xd9]
+          vphaddbd %xmm1, %xmm3
+
+// vfrczss
+// CHECK: vfrczss (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x82,0x0c,0x01]
+          vfrczss (%rcx,%rax), %xmm1
+// CHECK: vfrczss %xmm5, %xmm7
+// CHECK: encoding: [0x8f,0xe9,0x78,0x82,0xfd]
+          vfrczss %xmm5, %xmm7
+
+// vfrczsd
+// CHECK: vfrczsd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x83,0x0c,0x01]
+          vfrczsd (%rcx,%rax), %xmm1
+// CHECK: vfrczsd %xmm7, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0x83,0xc7]
+          vfrczsd %xmm7, %xmm0
+
+// vfrczps
+// CHECK: vfrczps 4(%rax), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0x58,0x04]
+          vfrczps 4(%rax), %xmm3
+// CHECK: vfrczps %xmm6, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0xee]
+          vfrczps %xmm6, %xmm5
+// CHECK: vfrczps (%rcx), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0x09]
+          vfrczps (%rcx), %xmm1
+// CHECK: vfrczps %ymm2, %ymm4
+// CHECK: encoding: [0x8f,0xe9,0x7c,0x80,0xe2]
+          vfrczps %ymm2, %ymm4
+
+// vfrczpd
+// CHECK: vfrczpd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x81,0x0c,0x01]
+          vfrczpd (%rcx,%rax), %xmm1
+// CHECK: vfrczpd %xmm7, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0x81,0xc7]
+          vfrczpd %xmm7, %xmm0
+// CHECK: vfrczpd (%rcx,%rax), %ymm2
+// CHECK: encoding: [0x8f,0xe9,0x7c,0x81,0x14,0x01]
+          vfrczpd (%rcx,%rax), %ymm2
+// CHECK: vfrczpd %ymm5, %ymm3
+// CHECK: encoding: [0x8f,0xe9,0x7c,0x81,0xdd]
+          vfrczpd %ymm5, %ymm3
+
+
+        
+//////////////////////////
+// 3 operand instructions
+/////////////////////////
+        
+// vpshlw
+// CHECK: vpshlw %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x95,0xd1]
+          vpshlw %xmm0, %xmm1, %xmm2
+// CHECK: vpshlw (%rax), %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0xf0,0x95,0x10]
+          vpshlw (%rax), %xmm1, %xmm2
+// CHECK: vpshlw %xmm0, (%rax,%rcx), %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x95,0x14,0x08]
+          vpshlw %xmm0, (%rax,%rcx), %xmm2
+
+// vpshlq
+// CHECK: vpshlq %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x68,0x97,0xf4]
+          vpshlq %xmm2, %xmm4, %xmm6
+// CHECK: vpshlq (%rcx), %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x97,0x09]
+          vpshlq (%rcx), %xmm2, %xmm1
+// CHECK: vpshlq %xmm5, (%rdx,%rcx), %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x50,0x97,0x34,0x0a]
+          vpshlq %xmm5, (%rdx,%rcx), %xmm6
+
+// vpshld
+// CHECK: vpshld %xmm7, %xmm5, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x40,0x96,0xdd]
+          vpshld %xmm7, %xmm5, %xmm3
+// CHECK: vpshld 4(%rax), %xmm3, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0xe0,0x96,0x58,0x04]
+          vpshld 4(%rax), %xmm3, %xmm3
+// CHECK: vpshld %xmm1, (%rax,%rcx), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x70,0x96,0x2c,0x08]
+          vpshld %xmm1, (%rax,%rcx), %xmm5
+
+// vpshlb
+// CHECK: vpshlb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x70,0x94,0xda]
+          vpshlb %xmm1, %xmm2, %xmm3
+// CHECK: vpshlb (%rcx), %xmm0, %xmm7
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x94,0x39]
+          vpshlb (%rcx), %xmm0, %xmm7
+// CHECK: vpshlb %xmm2, (%rax,%rdx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x68,0x94,0x1c,0x10]
+          vpshlb %xmm2, (%rax,%rdx), %xmm3
+
+// vpshaw
+// CHECK: vpshaw %xmm7, %xmm5, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x40,0x99,0xdd]
+          vpshaw %xmm7, %xmm5, %xmm3
+// CHECK: vpshaw (%rax), %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x99,0x08]
+          vpshaw (%rax), %xmm2, %xmm1
+// CHECK: vpshaw %xmm0, 8(%rax,%rcx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0x99,0x5c,0x08,0x08]
+          vpshaw %xmm0, 8(%rax,%rcx), %xmm3
+
+// vpshaq
+// CHECK: vpshaq %xmm4, %xmm4, %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x58,0x9b,0xe4]
+          vpshaq %xmm4, %xmm4, %xmm4
+// CHECK: vpshaq (%rcx), %xmm2, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x9b,0x01]
+          vpshaq (%rcx), %xmm2, %xmm0
+// CHECK: vpshaq %xmm6, (%rax,%rcx), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x48,0x9b,0x2c,0x08]
+          vpshaq %xmm6, (%rax,%rcx), %xmm5
+
+// vpshad
+// CHECK: vpshad %xmm5, %xmm4, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x50,0x9a,0xc4]
+          vpshad %xmm5, %xmm4, %xmm0
+// CHECK: vpshad (%rax), %xmm2, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x9a,0x28]
+          vpshad (%rax), %xmm2, %xmm5
+// CHECK: vpshad %xmm2, (%rax), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x68,0x9a,0x28]
+          vpshad %xmm2, (%rax), %xmm5
+
+// vpshab
+// CHECK: vpshab %xmm1, %xmm1, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x70,0x98,0xc1]
+          vpshab %xmm1, %xmm1, %xmm0
+// CHECK: vpshab (%rcx), %xmm4, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0xd8,0x98,0x01]
+          vpshab (%rcx), %xmm4, %xmm0
+// CHECK: vpshab %xmm5, (%rcx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x50,0x98,0x19]
+          vpshab %xmm5, (%rcx), %xmm3
+
+// vprotw
+// CHECK: vprotw (%rax), %xmm3, %xmm6
+// CHECK: encoding: [0x8f,0xe9,0xe0,0x91,0x30]
+          vprotw (%rax), %xmm3, %xmm6
+// CHECK: vprotw %xmm5, (%rax,%rcx), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x50,0x91,0x0c,0x08]
+          vprotw %xmm5, (%rax,%rcx), %xmm1
+// CHECK: vprotw %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x91,0xd1]
+          vprotw %xmm0, %xmm1, %xmm2
+// CHECK: vprotw $42, (%rcx), %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0x09,0x2a]
+          vprotw $42, (%rcx), %xmm1
+// CHECK: vprotw $41, (%rax), %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0x20,0x29]
+          vprotw $41, (%rax), %xmm4
+// CHECK: vprotw $40, %xmm1, %xmm3
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0xd9,0x28]
+          vprotw $40, %xmm1, %xmm3
+
+// vprotq
+// CHECK: vprotq (%rax), %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0xf0,0x93,0x10]
+          vprotq (%rax), %xmm1, %xmm2
+// CHECK: vprotq (%rax,%rcx), %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0xf0,0x93,0x14,0x08]
+          vprotq (%rax,%rcx), %xmm1, %xmm2
+// CHECK: vprotq %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x93,0xd1]
+          vprotq %xmm0, %xmm1, %xmm2
+// CHECK: vprotq $42, (%rax), %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0x10,0x2a]
+          vprotq $42, (%rax), %xmm2
+// CHECK: vprotq $42, (%rax,%rcx), %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0x14,0x08,0x2a]
+          vprotq $42, (%rax,%rcx), %xmm2
+// CHECK: vprotq $42, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0xd1,0x2a]
+          vprotq $42, %xmm1, %xmm2
+
+// vprotd
+// CHECK: vprotd (%rax), %xmm0, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x92,0x18]
+          vprotd (%rax), %xmm0, %xmm3
+// CHECK: vprotd %xmm2, (%rax,%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x68,0x92,0x24,0x08]
+          vprotd %xmm2, (%rax,%rcx), %xmm4
+// CHECK: vprotd %xmm5, %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x50,0x92,0xd3]
+          vprotd %xmm5, %xmm3, %xmm2
+// CHECK: vprotd $43, (%rcx), %xmm6
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0x31,0x2b]
+          vprotd $43, (%rcx), %xmm6
+// CHECK: vprotd $44, (%rax,%rcx), %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0x3c,0x08,0x2c]
+          vprotd $44, (%rax,%rcx), %xmm7
+// CHECK: vprotd $45, %xmm4, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0xe4,0x2d]
+          vprotd $45, %xmm4, %xmm4
+
+// vprotb
+// CHECK: vprotb (%rcx), %xmm2, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x90,0x29]
+          vprotb (%rcx), %xmm2, %xmm5
+// CHECK: vprotb %xmm5, (%rax,%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x50,0x90,0x24,0x08]
+          vprotb %xmm5, (%rax,%rcx), %xmm4
+// CHECK: vprotb %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x58,0x90,0xd3]
+          vprotb %xmm4, %xmm3, %xmm2
+// CHECK: vprotb $46, (%rax), %xmm3
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0x18,0x2e]
+          vprotb $46, (%rax), %xmm3
+// CHECK: vprotb $47, (%rax,%rcx), %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0x3c,0x08,0x2f]
+          vprotb $47, (%rax,%rcx), %xmm7
+// CHECK: vprotb $48, %xmm5, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0xed,0x30]
+          vprotb $48, %xmm5, %xmm5
+
+//////////////////////////
+// 4 operand instructions
+/////////////////////////
+
+// vpmadcswd
+// CHECK: vpmadcswd %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xb6,0xe2,0x10]
+        vpmadcswd %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: vpmadcswd %xmm1, (%rax), %xmm3, %xmm4		
+// CHECK: encoding: [0x8f,0xe8,0x60,0xb6,0x20,0x10]
+        vpmadcswd %xmm1, (%rax), %xmm3, %xmm4		
+
+// vpmadcsswd
+// CHECK: vpmadcsswd %xmm1, %xmm4, %xmm6, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x48,0xa6,0xe4,0x10]
+          vpmadcsswd %xmm1, %xmm4, %xmm6, %xmm4
+// CHECK: vpmadcsswd %xmm1, (%rax,%rcx), %xmm3, %xmm4		
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa6,0x24,0x08,0x10]
+          vpmadcsswd %xmm1, (%rax,%rcx), %xmm3, %xmm4		
+
+// vpmacsww
+// CHECK: vpmacsww %xmm0, %xmm2, %xmm5, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x50,0x95,0xe2,0x00]
+          vpmacsww %xmm0, %xmm2, %xmm5, %xmm4
+// CHECK: vpmacsww %xmm1, (%rax), %xmm6, %xmm4		
+// CHECK: encoding: [0x8f,0xe8,0x48,0x95,0x20,0x10]
+          vpmacsww %xmm1, (%rax), %xmm6, %xmm4		
+
+// vpmacswd
+// CHECK: vpmacswd %xmm4, %xmm5, %xmm6, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x48,0x96,0xfd,0x40]
+          vpmacswd %xmm4, %xmm5, %xmm6, %xmm7
+// CHECK: vpmacswd %xmm0, (%rax), %xmm1, %xmm2		
+// CHECK: encoding: [0x8f,0xe8,0x70,0x96,0x10,0x00]
+          vpmacswd %xmm0, (%rax), %xmm1, %xmm2		
+
+// vpmacssww
+// CHECK: vpmacssww %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x68,0x85,0xcb,0x40]
+          vpmacssww %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: vpmacssww %xmm6, (%rcx), %xmm7, %xmm7		
+// CHECK: encoding: [0x8f,0xe8,0x40,0x85,0x39,0x60]
+          vpmacssww %xmm6, (%rcx), %xmm7, %xmm7		
+
+// vpmacsswd
+// CHECK: vpmacsswd %xmm4, %xmm2, %xmm4, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x58,0x86,0xd2,0x40]
+          vpmacsswd %xmm4, %xmm2, %xmm4, %xmm2
+// CHECK: vpmacsswd %xmm0, 8(%rax,%rcx), %xmm1, %xmm0		
+// CHECK: encoding: [0x8f,0xe8,0x70,0x86,0x44,0x08,0x08,0x00]
+          vpmacsswd %xmm0, 8(%rax,%rcx), %xmm1, %xmm0		
+
+// vpmacssdql
+// CHECK: vpmacssdql %xmm1, %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x68,0x87,0xe1,0x10]
+          vpmacssdql %xmm1, %xmm1, %xmm2, %xmm4
+// CHECK: vpmacssdql %xmm7, (%rcx), %xmm6, %xmm5		
+// CHECK: encoding: [0x8f,0xe8,0x48,0x87,0x29,0x70]
+          vpmacssdql %xmm7, (%rcx), %xmm6, %xmm5		
+
+// vpmacssdqh
+// CHECK: vpmacssdqh %xmm3, %xmm2, %xmm0, %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x78,0x8f,0xca,0x30]
+          vpmacssdqh %xmm3, %xmm2, %xmm0, %xmm1
+// CHECK: vpmacssdqh %xmm7, (%rax,%rcx), %xmm2, %xmm3		
+// CHECK: encoding: [0x8f,0xe8,0x68,0x8f,0x1c,0x08,0x70]
+          vpmacssdqh %xmm7, (%rax,%rcx), %xmm2, %xmm3		
+
+// vpmacssdd
+// CHECK: vpmacssdd %xmm2, %xmm2, %xmm3, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x60,0x8e,0xea,0x20]
+          vpmacssdd %xmm2, %xmm2, %xmm3, %xmm5
+// CHECK: vpmacssdd %xmm4, (%rax), %xmm1, %xmm2		
+// CHECK: encoding: [0x8f,0xe8,0x70,0x8e,0x10,0x40]
+          vpmacssdd %xmm4, (%rax), %xmm1, %xmm2		
+
+// vpmacsdql
+// CHECK: vpmacsdql %xmm3, %xmm0, %xmm6, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x48,0x97,0xf8,0x30]
+          vpmacsdql %xmm3, %xmm0, %xmm6, %xmm7
+// CHECK: vpmacsdql %xmm5, 8(%rcx), %xmm3, %xmm5		
+// CHECK: encoding: [0x8f,0xe8,0x60,0x97,0x69,0x08,0x50]
+          vpmacsdql %xmm5, 8(%rcx), %xmm3, %xmm5		
+
+// vpmacsdqh
+// CHECK: vpmacsdqh %xmm7, %xmm5, %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x60,0x9f,0xd5,0x70]
+          vpmacsdqh %xmm7, %xmm5, %xmm3, %xmm2
+// CHECK: vpmacsdqh %xmm5, 4(%rax), %xmm2, %xmm0		
+// CHECK: encoding: [0x8f,0xe8,0x68,0x9f,0x40,0x04,0x50]
+          vpmacsdqh %xmm5, 4(%rax), %xmm2, %xmm0		
+
+// vpmacsdd
+// CHECK: vpmacsdd %xmm4, %xmm6, %xmm4, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x58,0x9e,0xd6,0x40]
+          vpmacsdd %xmm4, %xmm6, %xmm4, %xmm2
+// CHECK: vpmacsdd %xmm4, (%rax,%rcx), %xmm4, %xmm3		
+// CHECK: encoding: [0x8f,0xe8,0x58,0x9e,0x1c,0x08,0x40]
+          vpmacsdd %xmm4, (%rax,%rcx), %xmm4, %xmm3		
+
+// vpcomw
+// CHECK: vpcomw $42, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xcd,0xe2,0x2a]
+          vpcomw $42, %xmm2, %xmm3, %xmm4
+// CHECK: vpcomw $42, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xcd,0x20,0x2a]
+          vpcomw $42, (%rax), %xmm3, %xmm4
+
+// vpcomuw
+// CHECK: vpcomuw $43, %xmm1, %xmm3, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x60,0xed,0xe9,0x2b]
+          vpcomuw $43, %xmm1, %xmm3, %xmm5
+// CHECK: vpcomuw $44, (%rax,%rcx), %xmm0, %xmm6
+// CHECK: encoding: [0x8f,0xe8,0x78,0xed,0x34,0x08,0x2c]
+          vpcomuw $44, (%rax,%rcx), %xmm0, %xmm6
+
+// vpcomuq
+// CHECK: vpcomuq $45, %xmm3, %xmm3, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x60,0xef,0xfb,0x2d]
+          vpcomuq $45, %xmm3, %xmm3, %xmm7
+// CHECK: vpcomuq $46, (%rax), %xmm3, %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x60,0xef,0x08,0x2e]
+          vpcomuq $46, (%rax), %xmm3, %xmm1
+
+// vpcomud
+// CHECK: vpcomud $47, %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x70,0xee,0xd0,0x2f]
+          vpcomud $47, %xmm0, %xmm1, %xmm2
+// CHECK: vpcomud $48, 4(%rax), %xmm6, %xmm3
+// CHECK: encoding: [0x8f,0xe8,0x48,0xee,0x58,0x04,0x30]
+          vpcomud $48, 4(%rax), %xmm6, %xmm3
+
+// vpcomub
+// CHECK: vpcomub $49, %xmm3, %xmm4, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x58,0xec,0xeb,0x31]
+          vpcomub $49, %xmm3, %xmm4, %xmm5
+// CHECK: vpcomub $50, (%rcx), %xmm6, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x48,0xec,0x11,0x32]
+          vpcomub $50, (%rcx), %xmm6, %xmm2
+
+// vpcomq
+// CHECK: vpcomq $51, %xmm3, %xmm0, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x78,0xcf,0xeb,0x33]
+          vpcomq $51, %xmm3, %xmm0, %xmm5
+// CHECK: vpcomq $52, (%rax), %xmm1, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x70,0xcf,0x38,0x34]
+          vpcomq $52, (%rax), %xmm1, %xmm7
+
+// vpcomd
+// CHECK: vpcomd $53, %xmm3, %xmm3, %xmm0
+// CHECK: encoding: [0x8f,0xe8,0x60,0xce,0xc3,0x35]
+          vpcomd $53, %xmm3, %xmm3, %xmm0
+// CHECK: vpcomd $54, (%rcx), %xmm2, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x68,0xce,0x11,0x36]
+          vpcomd $54, (%rcx), %xmm2, %xmm2
+
+// vpcomb
+// CHECK: vpcomb $55, %xmm6, %xmm4, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x58,0xcc,0xd6,0x37]
+          vpcomb $55, %xmm6, %xmm4, %xmm2
+// CHECK: vpcomb $56, 8(%rax), %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x60,0xcc,0x50,0x08,0x38]
+          vpcomb $56, 8(%rax), %xmm3, %xmm2
+
+
+// vpperm
+// CHECK: vpperm %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa3,0xe2,0x10]
+        vpperm %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: vpperm (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0xe0,0xa3,0x20,0x20]
+        vpperm (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: vpperm %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa3,0x20,0x10]
+        vpperm %xmm1, (%rax), %xmm3, %xmm4
+
+// vpcmov
+// CHECK: vpcmov %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa2,0xe2,0x10]
+	vpcmov %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: vpcmov (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0xe0,0xa2,0x20,0x20]
+	vpcmov (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: vpcmov %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa2,0x20,0x10]
+	vpcmov %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: vpcmov %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: encoding: [0x8f,0xe8,0x64,0xa2,0xe2,0x10]
+	vpcmov %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: vpcmov (%rax), %ymm2, %ymm3, %ymm4
+// CHECK: encoding: [0x8f,0xe8,0xe4,0xa2,0x20,0x20]
+	vpcmov (%rax), %ymm2, %ymm3, %ymm4
+// CHECK: vpcmov %ymm1, (%rax), %ymm3, %ymm4
+// CHECK: encoding: [0x8f,0xe8,0x64,0xa2,0x20,0x10]
+	vpcmov %ymm1, (%rax), %ymm3, %ymm4
+
+
+//////////////////////////
+// 5 operand instructions
+/////////////////////////
+// vpermil2pd
+// CHECK: vpermil2pd $1, %xmm5, %xmm2, %xmm1, %xmm7
+// CHECK: encoding: [0xc4,0xe3,0x71,0x49,0xfa,0x51]
+          vpermil2pd $1, %xmm5, %xmm2, %xmm1, %xmm7
+// CHECK: vpermil2pd $2, (%rax), %xmm3, %xmm3, %xmm4
+// CHECK: encoding: [0xc4,0xe3,0xe1,0x49,0x20,0x32]
+          vpermil2pd $2, (%rax), %xmm3, %xmm3, %xmm4
+// CHECK: vpermil2pd $3, 8(%rax), %ymm0, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xe3,0xdd,0x49,0x70,0x08,0x03]
+          vpermil2pd $3, 8(%rax), %ymm0, %ymm4, %ymm6
+// CHECK: vpermil2pd $0, %xmm3, (%rax,%rcx), %xmm1, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x71,0x49,0x04,0x08,0x30]
+          vpermil2pd $0, %xmm3, (%rax,%rcx), %xmm1, %xmm0
+// CHECK: vpermil2pd $1, %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: encoding: [0xc4,0xe3,0x65,0x49,0xe2,0x11]
+          vpermil2pd $1, %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: vpermil2pd $2, %ymm1, (%rax), %ymm3, %ymm4
+// CHECK: encoding: [0xc4,0xe3,0x65,0x49,0x20,0x12]
+          vpermil2pd $2, %ymm1, (%rax), %ymm3, %ymm4
+
+// vpermil2ps
+// CHECK: vpermil2ps $0, %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x69,0x48,0xcb,0x40]
+          vpermil2ps $0, %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xe1,0x48,0x40,0x04,0x21]
+          vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
+// CHECK: vpermil2ps $2, (%rax), %ymm1, %ymm5, %ymm6
+// CHECK: encoding: [0xc4,0xe3,0xd5,0x48,0x30,0x12]
+          vpermil2ps $2, (%rax), %ymm1, %ymm5, %ymm6
+// CHECK: vpermil2ps $3, %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0xc4,0xe3,0x61,0x48,0x20,0x13]
+          vpermil2ps $3, %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: vpermil2ps $0, %ymm4, %ymm4, %ymm2, %ymm2
+// CHECK: encoding: [0xc4,0xe3,0x6d,0x48,0xd4,0x40]
+          vpermil2ps $0, %ymm4, %ymm4, %ymm2, %ymm2
+// CHECK: vpermil2pd $1, %ymm1, 4(%rax), %ymm1, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x75,0x49,0x40,0x04,0x11]
+          vpermil2pd $1, %ymm1, 4(%rax), %ymm1, %ymm0
+
diff --git a/test/MC/X86/x86_errors.s b/test/MC/X86/x86_errors.s
index e0a2c676..f161e06 100644
--- a/test/MC/X86/x86_errors.s
+++ b/test/MC/X86/x86_errors.s
@@ -15,3 +15,16 @@ addl $0, 0(%rax)
 
 # 8 "test.s"
  movi $8,%eax
+
+movl 0(%rax), 0(%edx)  // error: invalid operand for instruction
+
+// 32: error: instruction requires a CPU feature not currently enabled
+sysexitq
+
+// rdar://10710167
+// 64: error: expected scale expression
+lea (%rsp, %rbp, $4), %rax
+
+// rdar://10423777
+// 64: error: index register is 32-bit, but base register is 64-bit
+movq (%rsi,%ecx),%xmm0
diff --git a/test/Makefile b/test/Makefile
index c0bc36c..a4e53f8 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -59,7 +59,7 @@ extra-lit-site-cfgs::
 
 ifneq ($(strip $(filter check-local-all,$(MAKECMDGOALS))),)
 ifndef TESTSUITE
-ifeq ($(shell test -d $(PROJ_SRC_DIR)/../tools/clang && echo OK), OK)
+ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/clang/Makefile && echo OK), OK)
 LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/test
 
 # Force creation of Clang's lit.site.cfg.
@@ -121,11 +121,6 @@ DSYMUTIL=dsymutil
 else
 DSYMUTIL=true
 endif
-ifdef TargetCommonOpts
-BUGPOINT_TOPTS="-gcc-tool-args $(TargetCommonOpts)"
-else
-BUGPOINT_TOPTS=""
-endif
 
 ifneq ($(OCAMLOPT),)
 CC_FOR_OCAMLOPT := $(shell $(OCAMLOPT) -config | grep native_c_compiler | sed -e 's/native_c_compiler: //')
@@ -140,30 +135,18 @@ site.exp: FORCE
 	@echo '# Do not edit!' >> site.tmp
 	@echo 'set target_triplet "$(TARGET_TRIPLE)"' >> site.tmp
 	@echo 'set TARGETS_TO_BUILD "$(TARGETS_TO_BUILD)"' >> site.tmp
-	@echo 'set llvmgcc_langs "$(LLVMGCC_LANGS)"' >> site.tmp
-	@echo 'set llvmtoolsdir "$(ToolDir)"' >>site.tmp
-	@echo 'set llvmlibsdir "$(LibDir)"' >>site.tmp
 	@echo 'set llvmshlibdir "$(SharedLibDir)"' >>site.tmp
 	@echo 'set llvm_bindings "$(BINDINGS_TO_BUILD)"' >> site.tmp
 	@echo 'set srcroot "$(LLVM_SRC_ROOT)"' >>site.tmp
 	@echo 'set objroot "$(LLVM_OBJ_ROOT)"' >>site.tmp
 	@echo 'set srcdir "$(LLVM_SRC_ROOT)/test"' >>site.tmp
 	@echo 'set objdir "$(LLVM_OBJ_ROOT)/test"' >>site.tmp
-	@echo 'set gccpath "$(CC)"' >>site.tmp
-	@echo 'set gxxpath "$(CXX)"' >>site.tmp
-	@echo 'set compile_c "' $(CC) $(CPP.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >>site.tmp
-	@echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c -x c++ '"' >> site.tmp
 	@echo 'set link "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) '"' >>site.tmp
-	@echo 'set llvmgcc "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp
-	@echo 'set llvmgxx "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp
-	@echo 'set bugpoint_topts $(BUGPOINT_TOPTS)' >> site.tmp
 	@echo 'set shlibext "$(SHLIBEXT)"' >> site.tmp
 	@echo 'set ocamlopt "$(OCAMLOPT) -cc \"$(CXX_FOR_OCAMLOPT)\" -I $(LibDir)/ocaml"' >> site.tmp
 	@echo 'set valgrind "$(VALGRIND)"' >> site.tmp
 	@echo 'set grep "$(GREP)"' >>site.tmp
 	@echo 'set gas "$(GAS)"' >>site.tmp
-	@echo 'set llvmdsymutil "$(DSYMUTIL)"' >>site.tmp
-	@echo 'set emitir "$(LLVMCC_EMITIR_FLAG)"' >>site.tmp
 	@echo '## All variables above are generated by configure. Do Not Edit ## ' >>site.tmp
 	@test ! -f site.exp || \
 	sed '1,/^## All variables above are.*##/ d' site.exp >> site.tmp
@@ -186,6 +169,8 @@ lit.site.cfg: site.exp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
+	@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
+	@$(ECHOPATH) s=@LLVM_BINDINGS@=$(BINDINGS_TO_BUILD)=g >> lit.tmp
 	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
 	@-rm -f lit.tmp
 
diff --git a/test/Object/TestObjectFiles/archive-test.a-coff-i386 b/test/Object/Inputs/archive-test.a-coff-i386
index 846cd63..846cd63 100644
--- a/test/Object/TestObjectFiles/archive-test.a-coff-i386
+++ b/test/Object/Inputs/archive-test.a-coff-i386
diff --git a/test/Object/Inputs/elf-versioning-test.i386 b/test/Object/Inputs/elf-versioning-test.i386
new file mode 100755
index 0000000..c7c1eac
--- /dev/null
+++ b/test/Object/Inputs/elf-versioning-test.i386
diff --git a/test/Object/Inputs/elf-versioning-test.x86_64 b/test/Object/Inputs/elf-versioning-test.x86_64
new file mode 100755
index 0000000..cba79ba
--- /dev/null
+++ b/test/Object/Inputs/elf-versioning-test.x86_64
diff --git a/test/Object/Inputs/elfver.S b/test/Object/Inputs/elfver.S
new file mode 100644
index 0000000..ba63279
--- /dev/null
+++ b/test/Object/Inputs/elfver.S
@@ -0,0 +1,31 @@
+# Compile with:
+#   ARGS="-shared -nostdlib -Wl,--version-script=elfver.script"
+#   clang $ARGS -m32 elfver.S -lc -o elf-versioning-test.i386
+#   clang $ARGS -m64 elfver.S -lc -o elf-versioning-test.x86_64
+
+# Also, strip off non-dynamic symbols:
+#   strip elf-versioning-test.i386
+#   strip elf-versioning-test.x86_64
+
+#ifdef __i386__
+.symver _puts, puts@GLIBC_2.0
+#else
+.symver _puts, puts@GLIBC_2.2.5
+#endif
+call _puts@PLT
+
+.symver foo1, foo@VER1
+.globl foo1
+.type foo1, @function
+foo1:
+  ret
+
+.symver foo2, foo@@VER2
+.globl foo2
+.type foo2, @function
+foo2:
+  ret
+
+.globl unversioned_define
+.type unversioned_define, @function
+unversioned_define:
diff --git a/test/Object/Inputs/elfver.script b/test/Object/Inputs/elfver.script
new file mode 100644
index 0000000..1316fcb
--- /dev/null
+++ b/test/Object/Inputs/elfver.script
@@ -0,0 +1,10 @@
+VER1 {
+  global:
+    foo;
+};
+
+VER2 {
+  global:
+    foo;
+} VER1;
+
diff --git a/test/Object/Inputs/shared-object-test.elf-i386 b/test/Object/Inputs/shared-object-test.elf-i386
new file mode 100644
index 0000000..fb63915
--- /dev/null
+++ b/test/Object/Inputs/shared-object-test.elf-i386
diff --git a/test/Object/Inputs/shared-object-test.elf-x86-64 b/test/Object/Inputs/shared-object-test.elf-x86-64
new file mode 100644
index 0000000..92667f5
--- /dev/null
+++ b/test/Object/Inputs/shared-object-test.elf-x86-64
diff --git a/test/Object/Inputs/shared.ll b/test/Object/Inputs/shared.ll
new file mode 100644
index 0000000..1a62d56
--- /dev/null
+++ b/test/Object/Inputs/shared.ll
@@ -0,0 +1,33 @@
+; How to make the shared objects from this file:
+;
+; LDARGS="--unresolved-symbols=ignore-all -soname=libfoo.so --no-as-needed -lc -lm"
+;
+; X86-32 ELF:
+;   llc -mtriple=i386-linux-gnu shared.ll -filetype=obj -o tmp32.o -relocation-model=pic
+;   ld -melf_i386 -shared tmp32.o -o shared-object-test.elf-i386 $LDARGS
+;
+; X86-64 ELF:
+;   llc -mtriple=x86_64-linux-gnu shared.ll -filetype=obj -o tmp64.o -relocation-model=pic
+;   ld -melf_x86_64 -shared tmp64.o -o shared-object-test.elf-x86-64 $LDARGS
+
+@defined_sym = global i32 1, align 4
+
+@tls_sym = thread_local global i32 2, align 4
+
+@undef_sym = external global i32
+
+@undef_tls_sym = external thread_local global i32
+
+@common_sym = common global i32 0, align 4
+
+define i32 @global_func() nounwind uwtable {
+entry:
+  ret i32 0
+}
+
+declare i32 @undef_func(...)
+
+define internal i32 @local_func() nounwind uwtable {
+entry:
+  ret i32 0
+}
diff --git a/test/Object/TestObjectFiles/trivial-object-test.coff-i386 b/test/Object/Inputs/trivial-object-test.coff-i386
index 8cfd994..8cfd994 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.coff-i386
+++ b/test/Object/Inputs/trivial-object-test.coff-i386
diff --git a/test/Object/TestObjectFiles/trivial-object-test.coff-x86-64 b/test/Object/Inputs/trivial-object-test.coff-x86-64
index 0775914..0775914 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.coff-x86-64
+++ b/test/Object/Inputs/trivial-object-test.coff-x86-64
diff --git a/test/Object/TestObjectFiles/trivial-object-test.elf-i386 b/test/Object/Inputs/trivial-object-test.elf-i386
index 1a0ea40..1a0ea40 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.elf-i386
+++ b/test/Object/Inputs/trivial-object-test.elf-i386
diff --git a/test/Object/TestObjectFiles/trivial-object-test.elf-x86-64 b/test/Object/Inputs/trivial-object-test.elf-x86-64
index 889f5d9..889f5d9 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.elf-x86-64
+++ b/test/Object/Inputs/trivial-object-test.elf-x86-64
diff --git a/test/Object/TestObjectFiles/trivial-object-test.macho-i386 b/test/Object/Inputs/trivial-object-test.macho-i386
index 099bd1e..099bd1e 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.macho-i386
+++ b/test/Object/Inputs/trivial-object-test.macho-i386
diff --git a/test/Object/TestObjectFiles/trivial-object-test.macho-x86-64 b/test/Object/Inputs/trivial-object-test.macho-x86-64
index 93eeb5d..93eeb5d 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.macho-x86-64
+++ b/test/Object/Inputs/trivial-object-test.macho-x86-64
diff --git a/test/Object/Inputs/trivial.ll b/test/Object/Inputs/trivial.ll
new file mode 100644
index 0000000..25ece76
--- /dev/null
+++ b/test/Object/Inputs/trivial.ll
@@ -0,0 +1,12 @@
+@.str = private unnamed_addr constant [13 x i8] c"Hello World\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %call = tail call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) nounwind
+  tail call void bitcast (void (...)* @SomeOtherFunction to void ()*)() nounwind
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare void @SomeOtherFunction(...)
diff --git a/test/Object/TestObjectFiles/archive-test.a-bitcode b/test/Object/TestObjectFiles/archive-test.a-bitcode
deleted file mode 100644
index 3aeb34f..0000000
--- a/test/Object/TestObjectFiles/archive-test.a-bitcode
+++ /dev/null
diff --git a/test/Object/X86/lit.local.cfg b/test/Object/X86/lit.local.cfg
new file mode 100644
index 0000000..6a29e92
--- /dev/null
+++ b/test/Object/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.test']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Object/X86/objdump-disassembly-inline-relocations.test b/test/Object/X86/objdump-disassembly-inline-relocations.test
new file mode 100644
index 0000000..a5875f6a
--- /dev/null
+++ b/test/Object/X86/objdump-disassembly-inline-relocations.test
@@ -0,0 +1,32 @@
+RUN: llvm-objdump -d -r %p/../Inputs/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -d -r %p/../Inputs/trivial-object-test.coff-x86-64 \
+RUN:              | FileCheck %s -check-prefix COFF-x86-64
+
+COFF-i386: file format COFF-i386
+COFF-i386: Disassembly of section .text:
+COFF-i386:        0:       83 ec 0c                                        subl    $12, %esp
+COFF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+COFF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
+COFF-i386:                              e: IMAGE_REL_I386_DIR32  L_.str
+COFF-i386:       12:       e8 00 00 00 00                                  calll   0
+COFF-i386:                             13: IMAGE_REL_I386_REL32  _puts
+COFF-i386:       17:       e8 00 00 00 00                                  calll   0
+COFF-i386:                             18: IMAGE_REL_I386_REL32  _SomeOtherFunction
+COFF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+COFF-i386:       20:       83 c4 0c                                        addl    $12, %esp
+COFF-i386:       23:       c3                                              ret
+
+COFF-x86-64: file format COFF-x86-64
+COFF-x86-64: Disassembly of section .text:
+COFF-x86-64:        0:       48 83 ec 28                                     subq    $40, %rsp
+COFF-x86-64:        4:       c7 44 24 24 00 00 00 00                         movl    $0, 36(%rsp)
+COFF-x86-64:        c:       48 8d 0d 00 00 00 00                            leaq    (%rip), %rcx
+COFF-x86-64:                               f: IMAGE_REL_AMD64_REL32 L.str
+COFF-x86-64:       13:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:                              14: IMAGE_REL_AMD64_REL32 puts
+COFF-x86-64:       18:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:                              19: IMAGE_REL_AMD64_REL32 SomeOtherFunction
+COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
+COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
+COFF-x86-64:       25:       c3                                              ret
diff --git a/test/Object/X86/objdump-trivial-object.test b/test/Object/X86/objdump-trivial-object.test
new file mode 100644
index 0000000..8f9ea97
--- /dev/null
+++ b/test/Object/X86/objdump-trivial-object.test
@@ -0,0 +1,54 @@
+RUN: llvm-objdump -d %p/../Inputs/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -d %p/../Inputs/trivial-object-test.coff-x86-64 \
+RUN:              | FileCheck %s -check-prefix COFF-x86-64
+RUN: llvm-objdump -d %p/../Inputs/trivial-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+RUN: llvm-objdump -d %p/../Inputs/trivial-object-test.elf-x86-64 \
+RUN:              | FileCheck %s -check-prefix ELF-x86-64
+
+COFF-i386: file format COFF-i386
+COFF-i386: Disassembly of section .text:
+COFF-i386:        0:       83 ec 0c                                        subl    $12, %esp
+COFF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+COFF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
+COFF-i386:       12:       e8 00 00 00 00                                  calll   0
+COFF-i386:       17:       e8 00 00 00 00                                  calll   0
+COFF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+COFF-i386:       20:       83 c4 0c                                        addl    $12, %esp
+COFF-i386:       23:       c3                                              ret
+
+COFF-x86-64: file format COFF-x86-64
+COFF-x86-64: Disassembly of section .text:
+COFF-x86-64:        0:       48 83 ec 28                                     subq    $40, %rsp
+COFF-x86-64:        4:       c7 44 24 24 00 00 00 00                         movl    $0, 36(%rsp)
+COFF-x86-64:        c:       48 8d 0d 00 00 00 00                            leaq    (%rip), %rcx
+COFF-x86-64:       13:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:       18:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
+COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
+COFF-x86-64:       25:       c3                                              ret
+
+
+ELF-i386: file format ELF32-i386
+ELF-i386: Disassembly of section .text:
+ELF-i386:        0:       83 ec 0c                                        subl    $12, %esp
+ELF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+ELF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
+ELF-i386:       12:       e8 fc ff ff ff                                  calll   -4
+ELF-i386:       17:       e8 fc ff ff ff                                  calll   -4
+ELF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+ELF-i386:       20:       83 c4 0c                                        addl    $12, %esp
+ELF-i386:       23:       c3                                              ret
+
+ELF-x86-64: file format ELF64-x86-64
+ELF-x86-64: Disassembly of section .text:
+ELF-x86-64:        0:       48 83 ec 08                                     subq    $8, %rsp
+ELF-x86-64:        4:       c7 44 24 04 00 00 00 00                         movl    $0, 4(%rsp)
+ELF-x86-64:        c:       bf 00 00 00 00                                  movl    $0, %edi
+ELF-x86-64:       11:       e8 00 00 00 00                                  callq   0
+ELF-x86-64:       16:       30 c0                                           xorb    %al, %al
+ELF-x86-64:       18:       e8 00 00 00 00                                  callq   0
+ELF-x86-64:       1d:       8b 44 24 04                                     movl    4(%rsp), %eax
+ELF-x86-64:       21:       48 83 c4 08                                     addq    $8, %rsp
+ELF-x86-64:       25:       c3                                              ret
diff --git a/test/Object/dg.exp b/test/Object/dg.exp
deleted file mode 100644
index be82c51..0000000
--- a/test/Object/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{test}]]
diff --git a/test/Object/lit.local.cfg b/test/Object/lit.local.cfg
new file mode 100644
index 0000000..df9b335
--- /dev/null
+++ b/test/Object/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.test']
diff --git a/test/Object/nm-archive.test b/test/Object/nm-archive.test
index da6144e..2d96b73 100644
--- a/test/Object/nm-archive.test
+++ b/test/Object/nm-archive.test
@@ -1,7 +1,8 @@
-RUN: llvm-nm %p/TestObjectFiles/archive-test.a-coff-i386 \
+RUN: llvm-nm %p/Inputs/archive-test.a-coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF
-RUN: llvm-nm %p/TestObjectFiles/archive-test.a-bitcode \
-RUN:         | FileCheck %s -check-prefix BITCODE
+RUN: llvm-as %p/Inputs/trivial.ll -o=%t1
+RUN: llvm-ar rcs %t2 %t1
+RUN: llvm-nm %t2 | FileCheck %s -check-prefix BITCODE
 
 
 COFF: trivial-object-test.coff-i386:
diff --git a/test/Object/nm-shared-object.test b/test/Object/nm-shared-object.test
new file mode 100644
index 0000000..b361df5
--- /dev/null
+++ b/test/Object/nm-shared-object.test
@@ -0,0 +1,15 @@
+RUN: llvm-nm -D %p/Inputs/shared-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-nm -D %p/Inputs/shared-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF
+
+; Note: tls_sym should be 'D' (not '?'), but TLS is not
+; yet recognized by ObjectFile.
+
+ELF: {{[0-9a-f]+}} A __bss_start
+ELF: {{[0-9a-f]+}} A _edata
+ELF: {{[0-9a-f]+}} A _end
+ELF: {{[0-9a-f]+}} B common_sym
+ELF: {{[0-9a-f]+}} D defined_sym
+ELF: {{[0-9a-f]+}} T global_func
+ELF:               ? tls_sym
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index 6de1780..e5635ab 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -1,11 +1,15 @@
-RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.coff-i386 \
+RUN: llvm-nm %p/Inputs/trivial-object-test.coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF
-RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
+RUN: llvm-nm %p/Inputs/trivial-object-test.coff-x86-64 \
 RUN:         | FileCheck %s -check-prefix COFF
-RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.elf-i386 \
+RUN: llvm-nm %p/Inputs/trivial-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
-RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
+RUN: llvm-nm %p/Inputs/trivial-object-test.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-nm %p/Inputs/trivial-object-test.macho-i386 \
+RUN:         | FileCheck %s -check-prefix macho
+RUN: llvm-nm %p/Inputs/trivial-object-test.macho-x86-64 \
+RUN:         | FileCheck %s -check-prefix macho64
 
 COFF: 00000000 d .data
 COFF: 00000000 t .text
@@ -17,3 +21,13 @@ COFF:          U {{_?}}puts
 ELF:          U SomeOtherFunction
 ELF: 00000000 T main
 ELF:          U puts
+
+
+macho: 00000000 U _SomeOtherFunction
+macho: 00000000 s _main
+macho: 00000000 U _puts
+
+macho64: 00000028 s L_.str
+macho64: 00000000 u _SomeOtherFunction
+macho64: 00000000 s _main
+macho64: 00000000 u _puts
+\ No newline at end of file
diff --git a/test/Object/objdump-disassembly-inline-relocations.test b/test/Object/objdump-disassembly-inline-relocations.test
deleted file mode 100644
index 91f2e48..0000000
--- a/test/Object/objdump-disassembly-inline-relocations.test
+++ /dev/null
@@ -1,32 +0,0 @@
-RUN: llvm-objdump -d -r %p/TestObjectFiles/trivial-object-test.coff-i386 \
-RUN:              | FileCheck %s -check-prefix COFF-i386
-RUN: llvm-objdump -d -r %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
-RUN:              | FileCheck %s -check-prefix COFF-x86-64
-
-COFF-i386: file format COFF-i386
-COFF-i386: Disassembly of section .text:
-COFF-i386:        0:       83 ec 0c                                        subl    $12, %esp
-COFF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
-COFF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
-COFF-i386:                              e: IMAGE_REL_I386_DIR32  L_.str
-COFF-i386:       12:       e8 00 00 00 00                                  calll   0
-COFF-i386:                             13: IMAGE_REL_I386_REL32  _puts
-COFF-i386:       17:       e8 00 00 00 00                                  calll   0
-COFF-i386:                             18: IMAGE_REL_I386_REL32  _SomeOtherFunction
-COFF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
-COFF-i386:       20:       83 c4 0c                                        addl    $12, %esp
-COFF-i386:       23:       c3                                              ret
-
-COFF-x86-64: file format COFF-x86-64
-COFF-x86-64: Disassembly of section .text:
-COFF-x86-64:        0:       48 83 ec 28                                     subq    $40, %rsp
-COFF-x86-64:        4:       c7 44 24 24 00 00 00 00                         movl    $0, 36(%rsp)
-COFF-x86-64:        c:       48 8d 0d 00 00 00 00                            leaq    (%rip), %rcx
-COFF-x86-64:                               f: IMAGE_REL_AMD64_REL32 L.str
-COFF-x86-64:       13:       e8 00 00 00 00                                  callq   0
-COFF-x86-64:                              14: IMAGE_REL_AMD64_REL32 puts
-COFF-x86-64:       18:       e8 00 00 00 00                                  callq   0
-COFF-x86-64:                              19: IMAGE_REL_AMD64_REL32 SomeOtherFunction
-COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
-COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
-COFF-x86-64:       25:       c3                                              ret
diff --git a/test/Object/objdump-file-header.test b/test/Object/objdump-file-header.test
new file mode 100644
index 0000000..3fce3f4
--- /dev/null
+++ b/test/Object/objdump-file-header.test
@@ -0,0 +1,18 @@
+RUN: llvm-objdump -f %p/Inputs/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -f %p/Inputs/trivial-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+
+XFAIL: *
+
+COFF-i386: : file format
+COFF-i386: architecture: i386
+COFF-i386: HAS_RELOC
+COFF-i386: HAS_SYMS
+COFF-i386: start address 0x
+
+ELF-i386: : file format elf
+ELF-i386: architecture: i386
+ELF-i386: HAS_RELOC
+ELF-i386: HAS_SYMS
+ELF-i386: start address 0x
diff --git a/test/Object/objdump-relocations.test b/test/Object/objdump-relocations.test
index 2dcdb43..c4b564e 100644
--- a/test/Object/objdump-relocations.test
+++ b/test/Object/objdump-relocations.test
@@ -1,10 +1,10 @@
-RUN: llvm-objdump -r %p/TestObjectFiles/trivial-object-test.coff-i386 \
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.coff-i386 \
 RUN:              | FileCheck %s -check-prefix COFF-i386
-RUN: llvm-objdump -r %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.coff-x86-64 \
 RUN:              | FileCheck %s -check-prefix COFF-x86-64
-RUN: llvm-objdump -r %p/TestObjectFiles/trivial-object-test.elf-i386 \
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
-RUN: llvm-objdump -r %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-x86-64 \
 RUN:              | FileCheck %s -check-prefix ELF-x86-64
 
 COFF-i386: .text
diff --git a/test/Object/objdump-section-content.test b/test/Object/objdump-section-content.test
new file mode 100644
index 0000000..581e75e
--- /dev/null
+++ b/test/Object/objdump-section-content.test
@@ -0,0 +1,20 @@
+RUN: llvm-objdump -s %p/Inputs/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -s %p/Inputs/trivial-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+
+COFF-i386: trivial-object-test.coff-i386:     file format
+COFF-i386: Contents of section .text:
+COFF-i386:  0000 83ec0cc7 44240800 000000c7 04240000  ....D$.......$..
+COFF-i386:  0010 0000e800 000000e8 00000000 8b442408  .............D$.
+COFF-i386:  0020 83c40cc3                             ....
+COFF-i386: Contents of section .data:
+COFF-i386:  0000 48656c6c 6f20576f 726c6421 00        Hello World!.
+
+ELF-i386: trivial-object-test.elf-i386:     file format
+ELF-i386: Contents of section .text:
+ELF-i386:  0000 83ec0cc7 44240800 000000c7 04240000  ....D$.......$..
+ELF-i386:  0010 0000e8fc ffffffe8 fcffffff 8b442408  .............D$.
+ELF-i386:  0020 83c40cc3                             ....
+ELF-i386: Contents of section .rodata.str1.1:
+ELF-i386:  0024 48656c6c 6f20576f 726c6421 00        Hello World!.
diff --git a/test/Object/objdump-sectionheaders.test b/test/Object/objdump-sectionheaders.test
index 4515d00..a417d07 100644
--- a/test/Object/objdump-sectionheaders.test
+++ b/test/Object/objdump-sectionheaders.test
@@ -1,4 +1,4 @@
-; RUN: llvm-objdump -h %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
+; RUN: llvm-objdump -h %p/Inputs/trivial-object-test.elf-x86-64 \
 ; RUN:              | FileCheck %s
 
 ; To verify this, use readelf -S, not objdump -h. Binutils objdump filters the
diff --git a/test/Object/objdump-symbol-table.test b/test/Object/objdump-symbol-table.test
new file mode 100644
index 0000000..8a0f440
--- /dev/null
+++ b/test/Object/objdump-symbol-table.test
@@ -0,0 +1,33 @@
+RUN: llvm-objdump -t %p/Inputs/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -t %p/Inputs/trivial-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+RUN: llvm-objdump -t %p/Inputs/trivial-object-test.macho-i386 \
+RUN:              | FileCheck %s -check-prefix macho-i386
+
+COFF-i386: trivial-object-test.coff-i386:     file format
+COFF-i386: SYMBOL TABLE:
+COFF-i386: [  0](sec  1)(fl 0x00)(ty   0)(scl   3) (nx 1) 0x00000000 .text
+COFF-i386: AUX scnlen 0x24 nreloc 3 nlnno 0 checksum 0x0 assoc 1 comdat 0
+COFF-i386: [  2](sec  2)(fl 0x00)(ty   0)(scl   3) (nx 1) 0x00000000 .data
+COFF-i386: AUX scnlen 0xd nreloc 0 nlnno 0 checksum 0x0 assoc 2 comdat 0
+COFF-i386: [  4](sec  1)(fl 0x00)(ty 200)(scl   2) (nx 0) 0x00000000 _main
+COFF-i386: [  5](sec  2)(fl 0x00)(ty   0)(scl   3) (nx 0) 0x00000000 L_.str
+COFF-i386: [  6](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 _puts
+COFF-i386: [  7](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 _SomeOtherFunction
+
+ELF-i386: trivial-object-test.elf-i386:     file format
+ELF-i386: SYMBOL TABLE:
+ELF-i386: 00000000 l    df *ABS*  00000000 trivial-object-test.s
+ELF-i386: 00000000 l    d  .text  00000000 .text
+ELF-i386: 00000024 l    d  .rodata.str1.1 00000000 .rodata.str1.1
+ELF-i386: 00000031 l    d  .note.GNU-stack        00000000 .note.GNU-stack
+ELF-i386: 00000000 g     F .text  00000024 main
+ELF-i386: 00000000         *UND*  00000000 SomeOtherFunction
+ELF-i386: 00000000         *UND*  00000000 puts
+
+macho-i386: trivial-object-test.macho-i386:        file format Mach-O 32-bit i386
+macho-i386: SYMBOL TABLE:
+macho-i386: 00000000 g     F __TEXT,__text  00000024 _main
+macho-i386: 00000000         *UND*  00000000 _SomeOtherFunction
+macho-i386: 00000000         *UND*  00000000 _puts
+\ No newline at end of file
diff --git a/test/Object/objdump-trivial-object.test b/test/Object/objdump-trivial-object.test
deleted file mode 100644
index c4855fd..0000000
--- a/test/Object/objdump-trivial-object.test
+++ /dev/null
@@ -1,54 +0,0 @@
-RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.coff-i386 \
-RUN:              | FileCheck %s -check-prefix COFF-i386
-RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
-RUN:              | FileCheck %s -check-prefix COFF-x86-64
-RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.elf-i386 \
-RUN:              | FileCheck %s -check-prefix ELF-i386
-RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
-RUN:              | FileCheck %s -check-prefix ELF-x86-64
-
-COFF-i386: file format COFF-i386
-COFF-i386: Disassembly of section .text:
-COFF-i386:        0:       83 ec 0c                                        subl    $12, %esp
-COFF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
-COFF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
-COFF-i386:       12:       e8 00 00 00 00                                  calll   0
-COFF-i386:       17:       e8 00 00 00 00                                  calll   0
-COFF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
-COFF-i386:       20:       83 c4 0c                                        addl    $12, %esp
-COFF-i386:       23:       c3                                              ret
-
-COFF-x86-64: file format COFF-x86-64
-COFF-x86-64: Disassembly of section .text:
-COFF-x86-64:        0:       48 83 ec 28                                     subq    $40, %rsp
-COFF-x86-64:        4:       c7 44 24 24 00 00 00 00                         movl    $0, 36(%rsp)
-COFF-x86-64:        c:       48 8d 0d 00 00 00 00                            leaq    (%rip), %rcx
-COFF-x86-64:       13:       e8 00 00 00 00                                  callq   0
-COFF-x86-64:       18:       e8 00 00 00 00                                  callq   0
-COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
-COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
-COFF-x86-64:       25:       c3                                              ret
-
-
-ELF-i386: file format ELF32-i386
-ELF-i386: Disassembly of section .text:
-ELF-i386:        0:       83 ec 0c                                        subl    $12, %esp
-ELF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
-ELF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
-ELF-i386:       12:       e8 fc ff ff ff                                  calll   -4
-ELF-i386:       17:       e8 fc ff ff ff                                  calll   -4
-ELF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
-ELF-i386:       20:       83 c4 0c                                        addl    $12, %esp
-ELF-i386:       23:       c3                                              ret
-
-ELF-x86-64: file format ELF64-x86-64
-ELF-x86-64: Disassembly of section .text:
-ELF-x86-64:        0:       48 83 ec 08                                     subq    $8, %rsp
-ELF-x86-64:        4:       c7 44 24 04 00 00 00 00                         movl    $0, 4(%rsp)
-ELF-x86-64:        c:       bf 00 00 00 00                                  movl    $0, %edi
-ELF-x86-64:       11:       e8 00 00 00 00                                  callq   0
-ELF-x86-64:       16:       30 c0                                           xorb    %al, %al
-ELF-x86-64:       18:       e8 00 00 00 00                                  callq   0
-ELF-x86-64:       1d:       8b 44 24 04                                     movl    4(%rsp), %eax
-ELF-x86-64:       21:       48 83 c4 08                                     addq    $8, %rsp
-ELF-x86-64:       25:       c3                                              ret
diff --git a/test/Object/readobj-elf-versioning.test b/test/Object/readobj-elf-versioning.test
new file mode 100644
index 0000000..0906f34
--- /dev/null
+++ b/test/Object/readobj-elf-versioning.test
@@ -0,0 +1,15 @@
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.i386 \
+RUN:         | FileCheck %s -check-prefix ELF32
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \
+RUN:         | FileCheck %s -check-prefix ELF64
+
+ELF: foo@@VER2          FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+ELF: foo@VER1           FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+ELF: unversioned_define FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+
+ELF32: puts@GLIBC_2.0   FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global
+ELF64: puts@GLIBC_2.2.5 FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global
diff --git a/test/Object/readobj-shared-object.test b/test/Object/readobj-shared-object.test
new file mode 100644
index 0000000..3b5457c
--- /dev/null
+++ b/test/Object/readobj-shared-object.test
@@ -0,0 +1,59 @@
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF32
+
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF64
+
+ELF64:File Format : ELF64-x86-64
+ELF64:Arch        : x86_64
+ELF64:Address Size: 64 bits
+ELF64:Load Name   : libfoo.so
+
+ELF32:File Format : ELF32-i386
+ELF32:Arch        : i386
+ELF32:Address Size: 32 bits
+ELF32:Load Name   : libfoo.so
+
+ELF:Symbols:
+ELF:  .dynsym                DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .dynstr                DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .text                  DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .eh_frame              DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .tdata                 DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .dynamic               DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .got.plt               DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .data                  DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .bss                   DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  shared.ll              FILE            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute,formatspecific
+ELF:  local_func             FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}
+ELF:  _GLOBAL_OFFSET_TABLE_  DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute
+ELF:  _DYNAMIC               DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute
+ELF:  common_sym             DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  tls_sym                DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,threadlocal
+ELF:  defined_sym            DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  __bss_start            ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  _end                   ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  global_func            FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  _edata                 ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  Total: 21
+
+ELF:Dynamic Symbols:
+ELF:  common_sym             DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  tls_sym                DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,threadlocal
+ELF:  defined_sym            DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  __bss_start            ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  _end                   ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  global_func            FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  _edata                 ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  Total: {{[0-9a-f]+}}
+
+ELF:Libraries needed:
+ELF:  libc.so.6
+ELF:  libm.so.6
+ELF:  Total: 2
+
+
diff --git a/test/Other/2009-03-31-CallGraph.ll b/test/Other/2009-03-31-CallGraph.ll
index d6653ec..864903c 100644
--- a/test/Other/2009-03-31-CallGraph.ll
+++ b/test/Other/2009-03-31-CallGraph.ll
@@ -15,6 +15,8 @@ ok2:
     unreachable
 
 lpad2:
+    %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
     unreachable
 }
 
@@ -29,3 +31,4 @@ declare void @f6() nounwind
 
 declare void @f8()
 
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Other/X86/dg.exp b/test/Other/X86/dg.exp
deleted file mode 100644
index 7b7bd4e..0000000
--- a/test/Other/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Other/X86/lit.local.cfg b/test/Other/X86/lit.local.cfg
new file mode 100644
index 0000000..da2db5a
--- /dev/null
+++ b/test/Other/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index e4521d5..d28c178 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -102,14 +102,17 @@
 @N = constant i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1)
 @O = constant i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
 
-; Fold GEP of a GEP. Theoretically some of these cases could be folded
-; without using targetdata, however that's not implemented yet.
+; Fold GEP of a GEP. Very simple cases are folded without targetdata.
 
+; PLAIN: @Y = global [3 x { i32, i32 }]* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 2)
 ; PLAIN: @Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
+; OPT: @Y = global [3 x { i32, i32 }]* getelementptr ([3 x { i32, i32 }]* @ext, i64 2)
 ; OPT: @Z = global i32* getelementptr (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
+; TO: @Y = global [3 x { i32, i32 }]* getelementptr ([3 x { i32, i32 }]* @ext, i64 2)
 ; TO: @Z = global i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 1)
 
 @ext = external global [3 x { i32, i32 }]
+@Y = global [3 x { i32, i32 }]* getelementptr inbounds ([3 x { i32, i32 }]* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 1), i64 1)
 @Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
 
 ; Duplicate all of the above as function return values rather than
diff --git a/test/Other/dg.exp b/test/Other/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Other/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Other/lint.ll b/test/Other/lint.ll
index 4aa984e..ca2b1a3 100644
--- a/test/Other/lint.ll
+++ b/test/Other/lint.ll
@@ -151,7 +151,7 @@ entry:
 exit:
   %t3 = phi i32* [ %t4, %exit ]
   %t4 = bitcast i32* %t3 to i32*
-  %x = volatile load i32* %t3
+  %x = load volatile i32* %t3
   br label %exit
 }
 
diff --git a/test/Other/lit.local.cfg b/test/Other/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Other/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/TableGen/Dag.td b/test/TableGen/Dag.td
index 9ed2301..7ceb4e7 100644
--- a/test/TableGen/Dag.td
+++ b/test/TableGen/Dag.td
@@ -60,6 +60,7 @@ def VAL3 : bar<foo1, somedef1>;
 // CHECK-NEXT:  dag Dag1 = (somedef1 1);
 // CHECK-NEXT:  dag Dag2 = (somedef1 2);
 // CHECK-NEXT:  dag Dag3 = (somedef1 2);
+// CHECK-NEXT:  NAME = ?
 // CHECK-NEXT: }
 
 
@@ -68,4 +69,5 @@ def VAL4 : bar<foo2, somedef2>;
 // CHECK-NEXT:  dag Dag1 = (somedef1 1);
 // CHECK-NEXT:  dag Dag2 = (somedef2 2);
 // CHECK-NEXT:  dag Dag3 = (somedef2 2);
+// CHECK-NEXT:  NAME = ?
 // CHECK-NEXT: }
diff --git a/test/TableGen/ForeachList.td b/test/TableGen/ForeachList.td
new file mode 100644
index 0000000..99b7e14
--- /dev/null
+++ b/test/TableGen/ForeachList.td
@@ -0,0 +1,76 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Register<string name, int idx> {
+  string Name = name;
+  int Index = idx;
+}
+
+foreach i = [0, 1, 2, 3, 4, 5, 6, 7] in {
+  def R#i : Register<"R"#i, i>;
+  def F#i : Register<"F"#i, i>;
+}
+
+// CHECK: def F0
+// CHECK: string Name = "F0";
+// CHECK: int Index = 0;
+
+// CHECK: def F1
+// CHECK: string Name = "F1";
+// CHECK: int Index = 1;
+
+// CHECK: def F2
+// CHECK: string Name = "F2";
+// CHECK: int Index = 2;
+
+// CHECK: def F3
+// CHECK: string Name = "F3";
+// CHECK: int Index = 3;
+
+// CHECK: def F4
+// CHECK: string Name = "F4";
+// CHECK: int Index = 4;
+
+// CHECK: def F5
+// CHECK: string Name = "F5";
+// CHECK: int Index = 5;
+
+// CHECK: def F6
+// CHECK: string Name = "F6";
+// CHECK: int Index = 6;
+
+// CHECK: def F7
+// CHECK: string Name = "F7";
+// CHECK: int Index = 7;
+
+// CHECK: def R0
+// CHECK: string Name = "R0";
+// CHECK: int Index = 0;
+
+// CHECK: def R1
+// CHECK: string Name = "R1";
+// CHECK: int Index = 1;
+
+// CHECK: def R2
+// CHECK: string Name = "R2";
+// CHECK: int Index = 2;
+
+// CHECK: def R3
+// CHECK: string Name = "R3";
+// CHECK: int Index = 3;
+
+// CHECK: def R4
+// CHECK: string Name = "R4";
+// CHECK: int Index = 4;
+
+// CHECK: def R5
+// CHECK: string Name = "R5";
+// CHECK: int Index = 5;
+
+// CHECK: def R6
+// CHECK: string Name = "R6";
+// CHECK: int Index = 6;
+
+// CHECK: def R7
+// CHECK: string Name = "R7";
+// CHECK: int Index = 7;
diff --git a/test/TableGen/ForeachLoop.td b/test/TableGen/ForeachLoop.td
new file mode 100644
index 0000000..e2defe9
--- /dev/null
+++ b/test/TableGen/ForeachLoop.td
@@ -0,0 +1,43 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Register<string name, int idx> {
+  string Name = name;
+  int Index = idx;
+}
+
+foreach i = [0, 1, 2, 3, 4, 5, 6, 7] in
+  def R#i : Register<"R"#i, i>;
+
+
+// CHECK: def R0
+// CHECK: string Name = "R0";
+// CHECK: int Index = 0;
+
+// CHECK: def R1
+// CHECK: string Name = "R1";
+// CHECK: int Index = 1;
+
+// CHECK: def R2
+// CHECK: string Name = "R2";
+// CHECK: int Index = 2;
+
+// CHECK: def R3
+// CHECK: string Name = "R3";
+// CHECK: int Index = 3;
+
+// CHECK: def R4
+// CHECK: string Name = "R4";
+// CHECK: int Index = 4;
+
+// CHECK: def R5
+// CHECK: string Name = "R5";
+// CHECK: int Index = 5;
+
+// CHECK: def R6
+// CHECK: string Name = "R6";
+// CHECK: int Index = 6;
+
+// CHECK: def R7
+// CHECK: string Name = "R7";
+// CHECK: int Index = 7;
diff --git a/test/TableGen/NestedForeach.td b/test/TableGen/NestedForeach.td
new file mode 100644
index 0000000..e8c16f7
--- /dev/null
+++ b/test/TableGen/NestedForeach.td
@@ -0,0 +1,74 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Droid<string series, int release, string model, int patchlevel> {
+  string Series = series;
+  int Release = release;
+  string Model = model;
+  int Patchlevel = patchlevel;
+}
+
+foreach S = ["R", "C"] in {
+  foreach R = [2, 3, 4] in {
+    foreach M = ["D", "P", "Q"] in {
+      foreach P = [0, 2, 4] in {
+        def S#R#M#P : Droid<S, R, M, P>;
+      }
+    }
+  }
+}
+
+// CHECK: def C2D0
+// CHECK: def C2D2
+// CHECK: def C2D4
+// CHECK: def C2P0
+// CHECK: def C2P2
+// CHECK: def C2P4
+// CHECK: def C2Q0
+// CHECK: def C2Q2
+// CHECK: def C2Q4
+// CHECK: def C3D0
+// CHECK: def C3D2
+// CHECK: def C3D4
+// CHECK: def C3P0
+// CHECK: def C3P2
+// CHECK: def C3P4
+// CHECK: def C3Q0
+// CHECK: def C3Q2
+// CHECK: def C3Q4
+// CHECK: def C4D0
+// CHECK: def C4D2
+// CHECK: def C4D4
+// CHECK: def C4P0
+// CHECK: def C4P2
+// CHECK: def C4P4
+// CHECK: def C4Q0
+// CHECK: def C4Q2
+// CHECK: def C4Q4
+// CHECK: def R2D0
+// CHECK: def R2D2
+// CHECK: def R2D4
+// CHECK: def R2P0
+// CHECK: def R2P2
+// CHECK: def R2P4
+// CHECK: def R2Q0
+// CHECK: def R2Q2
+// CHECK: def R2Q4
+// CHECK: def R3D0
+// CHECK: def R3D2
+// CHECK: def R3D4
+// CHECK: def R3P0
+// CHECK: def R3P2
+// CHECK: def R3P4
+// CHECK: def R3Q0
+// CHECK: def R3Q2
+// CHECK: def R3Q4
+// CHECK: def R4D0
+// CHECK: def R4D2
+// CHECK: def R4D4
+// CHECK: def R4P0
+// CHECK: def R4P2
+// CHECK: def R4P4
+// CHECK: def R4Q0
+// CHECK: def R4Q2
+// CHECK: def R4Q4
diff --git a/test/TableGen/Paste.td b/test/TableGen/Paste.td
new file mode 100644
index 0000000..a7e2a5b
--- /dev/null
+++ b/test/TableGen/Paste.td
@@ -0,0 +1,36 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Instr<int i> {
+  int index = i;
+}
+
+multiclass Test {
+  def Vx#NAME#PS : Instr<0>;
+  def Vx#NAME#PD : Instr<1>;
+  def Vy#NAME#PS : Instr<2>;
+  def Vy#NAME#PD : Instr<3>;
+}
+
+defm ADD : Test;
+defm SUB : Test;
+
+// CHECK: VxADDPD
+// CHECK: index = 1;
+// CHECK: VxADDPS
+// CHECK: index = 0;
+
+// CHECK: VxSUBPD
+// CHECK: index = 1;
+// CHECK: VxSUBPS
+// CHECK: index = 0;
+
+// CHECK: VyADDPD
+// CHECK: index = 3;
+// CHECK: VyADDPS
+// CHECK: index = 2;
+
+// CHECK: VySUBPD
+// CHECK: index = 3;
+// CHECK: VySUBPS
+// CHECK: index = 2;
diff --git a/test/TableGen/SetTheory.td b/test/TableGen/SetTheory.td
index a4acea9..4d85aa3 100644
--- a/test/TableGen/SetTheory.td
+++ b/test/TableGen/SetTheory.td
@@ -165,3 +165,10 @@ def S9d : Set<(sequence "S%ua", 7, 9)>;
 // CHECK: S9b = [ e7 e6 e5 e4 e3 ]
 // CHECK: S9c = [ e0 ]
 // CHECK: S9d = [ a b c d e0 e3 e6 e9 e4 e5 e7 ]
+
+// The 'interleave' operator is almost the inverse of 'decimate'.
+def interleave;
+def T0a : Set<(interleave S9a, S9b)>;
+def T0b : Set<(interleave S8e, S8d)>;
+// CHECK: T0a = [ e3 e7 e4 e6 e5 ]
+// CHECK: T0b = [ e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ]
diff --git a/test/TableGen/SiblingForeach.td b/test/TableGen/SiblingForeach.td
new file mode 100644
index 0000000..a11f6f8
--- /dev/null
+++ b/test/TableGen/SiblingForeach.td
@@ -0,0 +1,277 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Set<int i = 0, int j = 0, int k = 0> {
+  int I = i;
+  int J = j;
+  int K = k;
+}
+
+foreach i = [1, 2, 3] in {
+  def I1_#i : Set<i>;
+  foreach j = [1, 2, 3] in {
+    def I1_#i#_J1_#j : Set<i, j>;
+  }
+  def I2_#i : Set<i>;
+  foreach j = [4, 5, 6] in {
+    foreach k = [1, 2, 3] in {
+      def I3_#i#_J2_#j#_K1_#k : Set<i, j, k>;
+    }
+    def I4_#i#_J3_#j : Set<i, j>;
+  }
+}
+
+// CHECK: def I1_1
+// CHECK: int I = 1;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I1_1_J1_1
+// CHECK: int I = 1;
+// CHECK: int J = 1;
+// CHECK: int K = 0;
+
+// CHECK: def I1_1_J1_2
+// CHECK: int I = 1;
+// CHECK: int J = 2;
+// CHECK: int K = 0;
+
+// CHECK: def I1_1_J1_3
+// CHECK: int I = 1;
+// CHECK: int J = 3;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2
+// CHECK: int I = 2;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2_J1_1
+// CHECK: int I = 2;
+// CHECK: int J = 1;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2_J1_2
+// CHECK: int I = 2;
+// CHECK: int J = 2;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2_J1_3
+// CHECK: int I = 2;
+// CHECK: int J = 3;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3
+// CHECK: int I = 3;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3_J1_1
+// CHECK: int I = 3;
+// CHECK: int J = 1;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3_J1_2
+// CHECK: int I = 3;
+// CHECK: int J = 2;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3_J1_3
+// CHECK: int I = 3;
+// CHECK: int J = 3;
+// CHECK: int K = 0;
+
+// CHECK: def I2_1
+// CHECK: int I = 1;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I2_2
+// CHECK: int I = 2;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I2_3
+// CHECK: int I = 3;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I3_1_J2_4_K1_1
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 1;
+
+// CHECK: def I3_1_J2_4_K1_2
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 2;
+
+// CHECK: def I3_1_J2_4_K1_3
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 3;
+
+// CHECK: def I3_1_J2_5_K1_1
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 1;
+
+// CHECK: def I3_1_J2_5_K1_2
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 2;
+
+// CHECK: def I3_1_J2_5_K1_3
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 3;
+
+// CHECK: def I3_1_J2_6_K1_1
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 1;
+
+// CHECK: def I3_1_J2_6_K1_2
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 2;
+
+// CHECK: def I3_1_J2_6_K1_3
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 3;
+
+// CHECK: def I3_2_J2_4_K1_1
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 1;
+
+// CHECK: def I3_2_J2_4_K1_2
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 2;
+
+// CHECK: def I3_2_J2_4_K1_3
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 3;
+
+// CHECK: def I3_2_J2_5_K1_1
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 1;
+
+// CHECK: def I3_2_J2_5_K1_2
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 2;
+
+// CHECK: def I3_2_J2_5_K1_3
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 3;
+
+// CHECK: def I3_2_J2_6_K1_1
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 1;
+
+// CHECK: def I3_2_J2_6_K1_2
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 2;
+
+// CHECK: def I3_2_J2_6_K1_3
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 3;
+
+// CHECK: def I3_3_J2_4_K1_1
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 1;
+
+// CHECK: def I3_3_J2_4_K1_2
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 2;
+
+// CHECK: def I3_3_J2_4_K1_3
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 3;
+
+// CHECK: def I3_3_J2_5_K1_1
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 1;
+
+// CHECK: def I3_3_J2_5_K1_2
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 2;
+
+// CHECK: def I3_3_J2_5_K1_3
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 3;
+
+// CHECK: def I3_3_J2_6_K1_1
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 1;
+
+// CHECK: def I3_3_J2_6_K1_2
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 2;
+
+// CHECK: def I3_3_J2_6_K1_3
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 3;
+
+// CHECK: def I4_1_J3_4
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 0;
+
+// CHECK: def I4_1_J3_5
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 0;
+
+// CHECK: def I4_1_J3_6
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 0;
+
+// CHECK: def I4_2_J3_4
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 0;
+
+// CHECK: def I4_2_J3_5
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 0;
+
+// CHECK: def I4_2_J3_6
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 0;
+
+// CHECK: def I4_3_J3_4
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 0;
+
+// CHECK: def I4_3_J3_5
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 0;
+
+// CHECK: def I4_3_J3_6
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 0;
diff --git a/test/TableGen/TwoLevelName.td b/test/TableGen/TwoLevelName.td
new file mode 100644
index 0000000..9c502f4
--- /dev/null
+++ b/test/TableGen/TwoLevelName.td
@@ -0,0 +1,46 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Type<string name, int length, int width> {
+  string Name = name;
+  int Length = length;
+  int Width = width;
+}
+
+multiclass OT1<string ss, int l, int w> {
+  def _#NAME# : Type<ss, l, w>;
+}
+multiclass OT2<string ss, int w> {
+  defm  v1#NAME# : OT1<!strconcat( "v1", ss),  1, w>;
+  defm  v2#NAME# : OT1<!strconcat( "v2", ss),  2, w>;
+  defm  v3#NAME# : OT1<!strconcat( "v3", ss),  3, w>;
+  defm  v4#NAME# : OT1<!strconcat( "v4", ss),  4, w>;
+  defm  v8#NAME# : OT1<!strconcat( "v8", ss),  8, w>;
+  defm v16#NAME# : OT1<!strconcat("v16", ss), 16, w>;
+}
+
+defm i8 : OT2<"i8", 8>;
+
+// CHECK: _v16i8
+// CHECK: Length = 16
+// CHECK: Width = 8
+
+// CHECK: _v1i8
+// CHECK: Length = 1
+// CHECK: Width = 8
+
+// CHECK: _v2i8
+// CHECK: Length = 2
+// CHECK: Width = 8
+
+// CHECK: _v3i8
+// CHECK: Length = 3
+// CHECK: Width = 8
+
+// CHECK: _v4i8
+// CHECK: Length = 4
+// CHECK: Width = 8
+
+// CHECK: _v8i8
+// CHECK: Length = 8
+// CHECK: Width = 8
diff --git a/test/TableGen/dg.exp b/test/TableGen/dg.exp
deleted file mode 100644
index f7d275a..0000000
--- a/test/TableGen/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{td}]]
diff --git a/test/TableGen/foreach.td b/test/TableGen/foreach.td
index cbcade9..814ae6e 100644
--- a/test/TableGen/foreach.td
+++ b/test/TableGen/foreach.td
@@ -1,6 +1,6 @@
 // RUN: llvm-tblgen %s | grep {Jr} | count 2
 // RUN: llvm-tblgen %s | grep {Sr} | count 2
-// RUN: llvm-tblgen %s | grep {NAME} | count 1
+// RUN: llvm-tblgen %s | grep {"NAME"} | count 1
 // XFAIL: vg_leak
 
 // Variables for foreach
diff --git a/test/TableGen/lit.local.cfg b/test/TableGen/lit.local.cfg
new file mode 100644
index 0000000..9a4a014
--- /dev/null
+++ b/test/TableGen/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.td']
diff --git a/test/Transforms/ADCE/dg.exp b/test/Transforms/ADCE/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ADCE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ADCE/lit.local.cfg b/test/Transforms/ADCE/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ADCE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ArgumentPromotion/dg.exp b/test/Transforms/ArgumentPromotion/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ArgumentPromotion/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ArgumentPromotion/lit.local.cfg b/test/Transforms/ArgumentPromotion/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll
new file mode 100644
index 0000000..32a91ce
--- /dev/null
+++ b/test/Transforms/BBVectorize/cycle.ll
@@ -0,0 +1,112 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; This test checks the non-trivial pairing-induced cycle avoidance. Without this cycle avoidance, the algorithm would otherwise
+; want to select the pairs:
+; %div77 = fdiv double %sub74, %mul76.v.r1 <->   %div125 = fdiv double %mul121, %mul76.v.r2 (div125 depends on mul117)
+; %add84 = fadd double %sub83, 2.000000e+00 <->   %add127 = fadd double %mul126, 1.000000e+00 (add127 depends on div77)
+; %mul95 = fmul double %sub45.v.r1, %sub36.v.r1 <->   %mul88 = fmul double %sub36.v.r1, %sub87 (mul88 depends on add84)
+; %mul117 = fmul double %sub39.v.r1, %sub116 <->   %mul97 = fmul double %mul96, %sub39.v.r1 (mul97 depends on mul95)
+; and so a dependency cycle would be created.
+
+declare double @fabs(double) nounwind readnone
+define void @test1(double %a, double %b, double %c, double %add80, double %mul1, double %mul2.v.r1, double %mul73, double %sub, double %sub65, double %F.0, i32 %n.0, double %Bnm3.0, double %Bnm2.0, double %Bnm1.0, double %Anm3.0, double %Anm2.0, double %Anm1.0) {
+entry:
+  br label %go
+go:
+  %conv = sitofp i32 %n.0 to double
+  %add35 = fadd double %conv, %a
+  %sub36 = fadd double %add35, -1.000000e+00
+  %add38 = fadd double %conv, %b
+  %sub39 = fadd double %add38, -1.000000e+00
+  %add41 = fadd double %conv, %c
+  %sub42 = fadd double %add41, -1.000000e+00
+  %sub45 = fadd double %add35, -2.000000e+00
+  %sub48 = fadd double %add38, -2.000000e+00
+  %sub51 = fadd double %add41, -2.000000e+00
+  %mul52 = shl nsw i32 %n.0, 1
+  %sub53 = add nsw i32 %mul52, -1
+  %conv54 = sitofp i32 %sub53 to double
+  %sub56 = add nsw i32 %mul52, -3
+  %conv57 = sitofp i32 %sub56 to double
+  %sub59 = add nsw i32 %mul52, -5
+  %conv60 = sitofp i32 %sub59 to double
+  %mul61 = mul nsw i32 %n.0, %n.0
+  %conv62 = sitofp i32 %mul61 to double
+  %mul63 = fmul double %conv62, 3.000000e+00
+  %mul67 = fmul double %sub65, %conv
+  %add68 = fadd double %mul63, %mul67
+  %add69 = fadd double %add68, 2.000000e+00
+  %sub71 = fsub double %add69, %mul2.v.r1
+  %sub74 = fsub double %sub71, %mul73
+  %mul75 = fmul double %conv57, 2.000000e+00
+  %mul76 = fmul double %mul75, %sub42
+  %div77 = fdiv double %sub74, %mul76
+  %mul82 = fmul double %add80, %conv
+  %sub83 = fsub double %mul63, %mul82
+  %add84 = fadd double %sub83, 2.000000e+00
+  %sub86 = fsub double %add84, %mul2.v.r1
+  %sub87 = fsub double -0.000000e+00, %sub86
+  %mul88 = fmul double %sub36, %sub87
+  %mul89 = fmul double %mul88, %sub39
+  %mul90 = fmul double %conv54, 4.000000e+00
+  %mul91 = fmul double %mul90, %conv57
+  %mul92 = fmul double %mul91, %sub51
+  %mul93 = fmul double %mul92, %sub42
+  %div94 = fdiv double %mul89, %mul93
+  %mul95 = fmul double %sub45, %sub36
+  %mul96 = fmul double %mul95, %sub48
+  %mul97 = fmul double %mul96, %sub39
+  %sub99 = fsub double %conv, %a
+  %sub100 = fadd double %sub99, -2.000000e+00
+  %mul101 = fmul double %mul97, %sub100
+  %sub103 = fsub double %conv, %b
+  %sub104 = fadd double %sub103, -2.000000e+00
+  %mul105 = fmul double %mul101, %sub104
+  %mul106 = fmul double %conv57, 8.000000e+00
+  %mul107 = fmul double %mul106, %conv57
+  %mul108 = fmul double %mul107, %conv60
+  %sub111 = fadd double %add41, -3.000000e+00
+  %mul112 = fmul double %mul108, %sub111
+  %mul113 = fmul double %mul112, %sub51
+  %mul114 = fmul double %mul113, %sub42
+  %div115 = fdiv double %mul105, %mul114
+  %sub116 = fsub double -0.000000e+00, %sub36
+  %mul117 = fmul double %sub39, %sub116
+  %sub119 = fsub double %conv, %c
+  %sub120 = fadd double %sub119, -1.000000e+00
+  %mul121 = fmul double %mul117, %sub120
+  %mul123 = fmul double %mul75, %sub51
+  %mul124 = fmul double %mul123, %sub42
+  %div125 = fdiv double %mul121, %mul124
+  %mul126 = fmul double %div77, %sub
+  %add127 = fadd double %mul126, 1.000000e+00
+  %mul128 = fmul double %add127, %Anm1.0
+  %mul129 = fmul double %div94, %sub
+  %add130 = fadd double %div125, %mul129
+  %mul131 = fmul double %add130, %sub
+  %mul132 = fmul double %mul131, %Anm2.0
+  %add133 = fadd double %mul128, %mul132
+  %mul134 = fmul double %div115, %mul1
+  %mul135 = fmul double %mul134, %Anm3.0
+  %add136 = fadd double %add133, %mul135
+  %mul139 = fmul double %add127, %Bnm1.0
+  %mul143 = fmul double %mul131, %Bnm2.0
+  %add144 = fadd double %mul139, %mul143
+  %mul146 = fmul double %mul134, %Bnm3.0
+  %add147 = fadd double %add144, %mul146
+  %div148 = fdiv double %add136, %add147
+  %sub149 = fsub double %F.0, %div148
+  %div150 = fdiv double %sub149, %F.0
+  %call = tail call double @fabs(double %div150) nounwind readnone
+  %cmp = fcmp olt double %call, 0x3CB0000000000000
+  %cmp152 = icmp sgt i32 %n.0, 20000
+  %or.cond = or i1 %cmp, %cmp152
+  br i1 %or.cond, label %done, label %go
+done:
+  ret void
+; CHECK: @test1
+; CHECK: go:
+; CHECK-NEXT: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
+; FIXME: When tree pruning is deterministic, include the entire output.
+}
diff --git a/test/Transforms/BBVectorize/func-alias.ll b/test/Transforms/BBVectorize/func-alias.ll
new file mode 100644
index 0000000..9d0cc07
--- /dev/null
+++ b/test/Transforms/BBVectorize/func-alias.ll
@@ -0,0 +1,244 @@
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -basicaa -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
+; The chain length is set to 2 so that this will do some vectorization; check that the order of the functions is unchanged.
+
+%struct.descriptor_dimension = type { i64, i64, i64 }
+%struct.__st_parameter_common = type { i32, i32, i8*, i32, i32, i8*, i32* }
+%struct.__st_parameter_dt = type { %struct.__st_parameter_common, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
+%"struct.array4_real(kind=4)" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+%"struct.array4_integer(kind=4).73" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+%struct.array4_unknown = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+
+@.cst4 = external unnamed_addr constant [11 x i8], align 8
+@.cst823 = external unnamed_addr constant [214 x i8], align 64
+@j.4580 = external global i32
+@j1.4581 = external global i32
+@nty1.4590 = external global [2 x i8]
+@nty2.4591 = external global [2 x i8]
+@xr1.4592 = external global float
+@xr2.4593 = external global float
+@yr1.4594 = external global float
+@yr2.4595 = external global float
+
+@__main1_MOD_iave = external unnamed_addr global i32
+@__main1_MOD_igrp = external global i32
+@__main1_MOD_iounit = external global i32
+@__main1_MOD_ityp = external global i32
+@__main1_MOD_mclmsg = external unnamed_addr global %struct.array4_unknown, align 32
+@__main1_MOD_mxdate = external unnamed_addr global %"struct.array4_integer(kind=4).73", align 32
+@__main1_MOD_rmxval = external unnamed_addr global %"struct.array4_real(kind=4)", align 32
+
+declare void @_gfortran_st_write(%struct.__st_parameter_dt*)
+declare void @_gfortran_st_write_done(%struct.__st_parameter_dt*)
+declare void @_gfortran_transfer_character_write(%struct.__st_parameter_dt*, i8*, i32)
+declare void @_gfortran_transfer_integer_write(%struct.__st_parameter_dt*, i8*, i32)
+declare void @_gfortran_transfer_real_write(%struct.__st_parameter_dt*, i8*, i32)
+
+define i1 @"prtmax__<bb 3>_<bb 34>"(%struct.__st_parameter_dt* %memtmp3, i32 %D.4627_188.reload) nounwind {
+; CHECK: prtmax__
+newFuncRoot:
+  br label %"<bb 34>"
+
+codeRepl80.exitStub:                              ; preds = %"<bb 34>"
+  ret i1 true
+
+"<bb 34>.<bb 25>_crit_edge.exitStub":             ; preds = %"<bb 34>"
+  ret i1 false
+
+"<bb 34>":                                        ; preds = %newFuncRoot
+  %tmp128 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp129 = getelementptr inbounds %struct.__st_parameter_common* %tmp128, i32 0, i32 2
+  store i8* getelementptr inbounds ([11 x i8]* @.cst4, i64 0, i64 0), i8** %tmp129, align 8
+  %tmp130 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp131 = getelementptr inbounds %struct.__st_parameter_common* %tmp130, i32 0, i32 3
+  store i32 31495, i32* %tmp131, align 4
+  %tmp132 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 5
+  store i8* getelementptr inbounds ([214 x i8]* @.cst823, i64 0, i64 0), i8** %tmp132, align 8
+  %tmp133 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 6
+  store i32 214, i32* %tmp133, align 4
+  %tmp134 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp135 = getelementptr inbounds %struct.__st_parameter_common* %tmp134, i32 0, i32 0
+  store i32 4096, i32* %tmp135, align 4
+  %iounit.8748_288 = load i32* @__main1_MOD_iounit, align 4
+  %tmp136 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp137 = getelementptr inbounds %struct.__st_parameter_common* %tmp136, i32 0, i32 1
+  store i32 %iounit.8748_288, i32* %tmp137, align 4
+  call void @_gfortran_st_write(%struct.__st_parameter_dt* %memtmp3) nounwind
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j.4580, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  %D.75807_289 = load i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
+  %j.8758_290 = load i32* @j.4580, align 4
+  %D.75760_291 = sext i32 %j.8758_290 to i64
+  %iave.8736_292 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_293 = sext i32 %iave.8736_292 to i64
+  %D.75808_294 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75809_295 = mul nsw i64 %D.75620_293, %D.75808_294
+  %igrp.8737_296 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_297 = sext i32 %igrp.8737_296 to i64
+  %D.75810_298 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75811_299 = mul nsw i64 %D.75635_297, %D.75810_298
+  %D.75812_300 = add nsw i64 %D.75809_295, %D.75811_299
+  %D.75813_301 = add nsw i64 %D.75760_291, %D.75812_300
+  %ityp.8750_302 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_303 = sext i32 %ityp.8750_302 to i64
+  %D.75814_304 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75815_305 = mul nsw i64 %D.75704_303, %D.75814_304
+  %D.75816_306 = add nsw i64 %D.75813_301, %D.75815_305
+  %D.75817_307 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
+  %D.75818_308 = add nsw i64 %D.75816_306, %D.75817_307
+  %tmp138 = bitcast i8* %D.75807_289 to [0 x float]*
+  %tmp139 = bitcast [0 x float]* %tmp138 to float*
+  %D.75819_309 = getelementptr inbounds float* %tmp139, i64 %D.75818_308
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75819_309, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  %D.75820_310 = load i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
+  %j.8758_311 = load i32* @j.4580, align 4
+  %D.75760_312 = sext i32 %j.8758_311 to i64
+  %iave.8736_313 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_314 = sext i32 %iave.8736_313 to i64
+  %D.75821_315 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75822_316 = mul nsw i64 %D.75620_314, %D.75821_315
+  %igrp.8737_317 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_318 = sext i32 %igrp.8737_317 to i64
+  %D.75823_319 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75824_320 = mul nsw i64 %D.75635_318, %D.75823_319
+  %D.75825_321 = add nsw i64 %D.75822_316, %D.75824_320
+  %D.75826_322 = add nsw i64 %D.75760_312, %D.75825_321
+  %ityp.8750_323 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_324 = sext i32 %ityp.8750_323 to i64
+  %D.75827_325 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75828_326 = mul nsw i64 %D.75704_324, %D.75827_325
+  %D.75829_327 = add nsw i64 %D.75826_322, %D.75828_326
+  %D.75830_328 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
+  %D.75831_329 = add nsw i64 %D.75829_327, %D.75830_328
+  %tmp140 = bitcast i8* %D.75820_310 to [0 x [1 x i8]]*
+  %tmp141 = bitcast [0 x [1 x i8]]* %tmp140 to [1 x i8]*
+  %D.75832_330 = getelementptr inbounds [1 x i8]* %tmp141, i64 %D.75831_329
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75832_330, i32 1) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  %D.75833_331 = load i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
+  %j.8758_332 = load i32* @j.4580, align 4
+  %D.75760_333 = sext i32 %j.8758_332 to i64
+  %iave.8736_334 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_335 = sext i32 %iave.8736_334 to i64
+  %D.75834_336 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75835_337 = mul nsw i64 %D.75620_335, %D.75834_336
+  %igrp.8737_338 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_339 = sext i32 %igrp.8737_338 to i64
+  %D.75836_340 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75837_341 = mul nsw i64 %D.75635_339, %D.75836_340
+  %D.75838_342 = add nsw i64 %D.75835_337, %D.75837_341
+  %D.75839_343 = add nsw i64 %D.75760_333, %D.75838_342
+  %ityp.8750_344 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_345 = sext i32 %ityp.8750_344 to i64
+  %D.75840_346 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75841_347 = mul nsw i64 %D.75704_345, %D.75840_346
+  %D.75842_348 = add nsw i64 %D.75839_343, %D.75841_347
+  %D.75843_349 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
+  %D.75844_350 = add nsw i64 %D.75842_348, %D.75843_349
+  %tmp142 = bitcast i8* %D.75833_331 to [0 x i32]*
+  %tmp143 = bitcast [0 x i32]* %tmp142 to i32*
+  %D.75845_351 = getelementptr inbounds i32* %tmp143, i64 %D.75844_350
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75845_351, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr1.4592, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr1.4594, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty1.4590, i32 2) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j1.4581, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  %D.75807_352 = load i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
+  %j1.8760_353 = load i32* @j1.4581, align 4
+  %D.75773_354 = sext i32 %j1.8760_353 to i64
+  %iave.8736_355 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_356 = sext i32 %iave.8736_355 to i64
+  %D.75808_357 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75809_358 = mul nsw i64 %D.75620_356, %D.75808_357
+  %igrp.8737_359 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_360 = sext i32 %igrp.8737_359 to i64
+  %D.75810_361 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75811_362 = mul nsw i64 %D.75635_360, %D.75810_361
+  %D.75812_363 = add nsw i64 %D.75809_358, %D.75811_362
+  %D.75846_364 = add nsw i64 %D.75773_354, %D.75812_363
+  %ityp.8750_365 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_366 = sext i32 %ityp.8750_365 to i64
+  %D.75814_367 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75815_368 = mul nsw i64 %D.75704_366, %D.75814_367
+  %D.75847_369 = add nsw i64 %D.75846_364, %D.75815_368
+  %D.75817_370 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
+  %D.75848_371 = add nsw i64 %D.75847_369, %D.75817_370
+  %tmp144 = bitcast i8* %D.75807_352 to [0 x float]*
+  %tmp145 = bitcast [0 x float]* %tmp144 to float*
+  %D.75849_372 = getelementptr inbounds float* %tmp145, i64 %D.75848_371
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75849_372, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  %D.75820_373 = load i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
+  %j1.8760_374 = load i32* @j1.4581, align 4
+  %D.75773_375 = sext i32 %j1.8760_374 to i64
+  %iave.8736_376 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_377 = sext i32 %iave.8736_376 to i64
+  %D.75821_378 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75822_379 = mul nsw i64 %D.75620_377, %D.75821_378
+  %igrp.8737_380 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_381 = sext i32 %igrp.8737_380 to i64
+  %D.75823_382 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75824_383 = mul nsw i64 %D.75635_381, %D.75823_382
+  %D.75825_384 = add nsw i64 %D.75822_379, %D.75824_383
+  %D.75850_385 = add nsw i64 %D.75773_375, %D.75825_384
+  %ityp.8750_386 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_387 = sext i32 %ityp.8750_386 to i64
+  %D.75827_388 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75828_389 = mul nsw i64 %D.75704_387, %D.75827_388
+  %D.75851_390 = add nsw i64 %D.75850_385, %D.75828_389
+  %D.75830_391 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
+  %D.75852_392 = add nsw i64 %D.75851_390, %D.75830_391
+  %tmp146 = bitcast i8* %D.75820_373 to [0 x [1 x i8]]*
+  %tmp147 = bitcast [0 x [1 x i8]]* %tmp146 to [1 x i8]*
+  %D.75853_393 = getelementptr inbounds [1 x i8]* %tmp147, i64 %D.75852_392
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75853_393, i32 1) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  %D.75833_394 = load i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
+  %j1.8760_395 = load i32* @j1.4581, align 4
+  %D.75773_396 = sext i32 %j1.8760_395 to i64
+  %iave.8736_397 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_398 = sext i32 %iave.8736_397 to i64
+  %D.75834_399 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75835_400 = mul nsw i64 %D.75620_398, %D.75834_399
+  %igrp.8737_401 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_402 = sext i32 %igrp.8737_401 to i64
+  %D.75836_403 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75837_404 = mul nsw i64 %D.75635_402, %D.75836_403
+  %D.75838_405 = add nsw i64 %D.75835_400, %D.75837_404
+  %D.75854_406 = add nsw i64 %D.75773_396, %D.75838_405
+  %ityp.8750_407 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_408 = sext i32 %ityp.8750_407 to i64
+  %D.75840_409 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75841_410 = mul nsw i64 %D.75704_408, %D.75840_409
+  %D.75855_411 = add nsw i64 %D.75854_406, %D.75841_410
+  %D.75843_412 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
+  %D.75856_413 = add nsw i64 %D.75855_411, %D.75843_412
+  %tmp148 = bitcast i8* %D.75833_394 to [0 x i32]*
+  %tmp149 = bitcast [0 x i32]* %tmp148 to i32*
+  %D.75857_414 = getelementptr inbounds i32* %tmp149, i64 %D.75856_413
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75857_414, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr2.4593, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr2.4595, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty2.4591, i32 2) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  call void @_gfortran_st_write_done(%struct.__st_parameter_dt* %memtmp3) nounwind
+; CHECK: @_gfortran_st_write_done
+  %j.8758_415 = load i32* @j.4580, align 4
+  %D.4634_416 = icmp eq i32 %j.8758_415, %D.4627_188.reload
+  %j.8758_417 = load i32* @j.4580, align 4
+  %j.8770_418 = add nsw i32 %j.8758_417, 1
+  store i32 %j.8770_418, i32* @j.4580, align 4
+  %tmp150 = icmp ne i1 %D.4634_416, false
+  br i1 %tmp150, label %codeRepl80.exitStub, label %"<bb 34>.<bb 25>_crit_edge.exitStub"
+}
+
diff --git a/test/Transforms/BBVectorize/ld1.ll b/test/Transforms/BBVectorize/ld1.ll
new file mode 100644
index 0000000..cea225d
--- /dev/null
+++ b/test/Transforms/BBVectorize/ld1.ll
@@ -0,0 +1,41 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %i2 = load double* %c, align 8
+  %add = fadd double %mul, %i2
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %arrayidx6 = getelementptr inbounds double* %c, i64 1
+  %i5 = load double* %arrayidx6, align 8
+  %add7 = fadd double %mul5, %i5
+  %mul9 = fmul double %add, %i1
+  %add11 = fadd double %mul9, %i2
+  %mul13 = fmul double %add7, %i4
+  %add15 = fadd double %mul13, %i5
+  %mul16 = fmul double %add11, %add15
+  ret double %mul16
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i2.v.i0 = bitcast double* %c to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %i2 = load <2 x double>* %i2.v.i0, align 8
+; CHECK: %add = fadd <2 x double> %mul, %i2
+; CHECK: %mul9 = fmul <2 x double> %add, %i1
+; CHECK: %add11 = fadd <2 x double> %mul9, %i2
+; CHECK: %add11.v.r1 = extractelement <2 x double> %add11, i32 0
+; CHECK: %add11.v.r2 = extractelement <2 x double> %add11, i32 1
+; CHECK: %mul16 = fmul double %add11.v.r1, %add11.v.r2
+; CHECK: ret double %mul16
+}
+
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/BBVectorize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
new file mode 100644
index 0000000..bebc91a
--- /dev/null
+++ b/test/Transforms/BBVectorize/loop1.ll
@@ -0,0 +1,93 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
+; The second check covers the use of alias analysis (with loop unrolling).
+
+define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
+entry:
+  br label %for.body
+; CHECK: @test1
+; CHECK-UNRL: @test1
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8
+  %mul = fmul double %0, %0
+  %mul3 = fmul double %0, %1
+  %add = fadd double %mul, %mul3
+  %add4 = fadd double %1, %1
+  %add5 = fadd double %add4, %0
+  %mul6 = fmul double %0, %add5
+  %add7 = fadd double %add, %mul6
+  %mul8 = fmul double %1, %1
+  %add9 = fadd double %0, %0
+  %add10 = fadd double %add9, %0
+  %mul11 = fmul double %mul8, %add10
+  %add12 = fadd double %add7, %mul11
+  %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+  store double %add12, double* %arrayidx14, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 10
+  br i1 %exitcond, label %for.end, label %for.body
+; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+; CHECK: %0 = load double* %arrayidx, align 8
+; CHECK: %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+; CHECK: %1 = load double* %arrayidx2, align 8
+; CHECK: %mul = fmul double %0, %0
+; CHECK: %mul3 = fmul double %0, %1
+; CHECK: %add = fadd double %mul, %mul3
+; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
+; CHECK: %mul8 = fmul double %1, %1
+; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1
+; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2
+; CHECK: %add5.v.i1.1 = insertelement <2 x double> undef, double %0, i32 0
+; CHECK: %add5.v.i1.2 = insertelement <2 x double> %add5.v.i1.1, double %0, i32 1
+; CHECK: %add5 = fadd <2 x double> %add4, %add5.v.i1.2
+; CHECK: %mul6.v.i0.2 = insertelement <2 x double> %add5.v.i1.1, double %mul8, i32 1
+; CHECK: %mul6 = fmul <2 x double> %mul6.v.i0.2, %add5
+; CHECK: %mul6.v.r1 = extractelement <2 x double> %mul6, i32 0
+; CHECK: %mul6.v.r2 = extractelement <2 x double> %mul6, i32 1
+; CHECK: %add7 = fadd double %add, %mul6.v.r1
+; CHECK: %add12 = fadd double %add7, %mul6.v.r2
+; CHECK: %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+; CHECK: store double %add12, double* %arrayidx14, align 8
+; CHECK: %indvars.iv.next = add i64 %indvars.iv, 1
+; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, 10
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+; CHECK-UNRL: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
+; CHECK-UNRL: %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+; CHECK-UNRL: %0 = bitcast double* %arrayidx to <2 x double>*
+; CHECK-UNRL: %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+; CHECK-UNRL: %1 = bitcast double* %arrayidx2 to <2 x double>*
+; CHECK-UNRL: %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+; CHECK-UNRL: %2 = load <2 x double>* %0, align 8
+; CHECK-UNRL: %3 = load <2 x double>* %1, align 8
+; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
+; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
+; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
+; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
+; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
+; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
+; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
+; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
+; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
+; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
+; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
+; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
+; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>*
+; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8
+; CHECK-UNRL: %indvars.iv.next.1 = add i64 %indvars.iv, 2
+; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
+; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10
+; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/BBVectorize/mem-op-depth.ll b/test/Transforms/BBVectorize/mem-op-depth.ll
new file mode 100644
index 0000000..84f16bd
--- /dev/null
+++ b/test/Transforms/BBVectorize/mem-op-depth.ll
@@ -0,0 +1,22 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=6 -instcombine -gvn -S | FileCheck %s
+
+@A = common global [1024 x float] zeroinitializer, align 16
+@B = common global [1024 x float] zeroinitializer, align 16
+
+define i32 @test1() nounwind {
+; CHECK: @test1
+  %V1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  %V2 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 1), align 4
+  %V3= load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 2), align 8
+  %V4 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 3), align 4
+; CHECK:   %V1 = load <4 x float>* bitcast ([1024 x float]* @A to <4 x float>*), align 16
+  store float %V1, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 0), align 16
+  store float %V2, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 1), align 4
+  store float %V3, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 2), align 8
+  store float %V4, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 3), align 4
+; CHECK-NEXT: store <4 x float> %V1, <4 x float>* bitcast ([1024 x float]* @B to <4 x float>*), align 16
+  ret i32 0
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/BBVectorize/req-depth.ll b/test/Transforms/BBVectorize/req-depth.ll
new file mode 100644
index 0000000..8c9cc3c
--- /dev/null
+++ b/test/Transforms/BBVectorize/req-depth.ll
@@ -0,0 +1,17 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -S | FileCheck %s -check-prefix=CHECK-RD3
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -S | FileCheck %s -check-prefix=CHECK-RD2
+
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+	%R  = fmul double %Y1, %Y2
+	ret double %R
+; CHECK-RD3: @test1
+; CHECK-RD2: @test1
+; CHECK-RD3-NOT: <2 x double>
+; CHECK-RD2: <2 x double>
+}
+
diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll
new file mode 100644
index 0000000..d9945b5
--- /dev/null
+++ b/test/Transforms/BBVectorize/search-limit.ll
@@ -0,0 +1,46 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
+
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test1
+; CHECK-SL4: @test1
+; CHECK-SL4-NOT: <2 x double>
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+        ; Here we have a dependency chain: the short search limit will not
+        ; see past this chain and so will not see the second part of the
+        ; pair to vectorize.
+        %mul41 = fmul double %Z1, %Y2
+        %sub48 = fsub double %Z1, %mul41
+        %mul62 = fmul double %Z1, %sub48
+        %sub69 = fsub double %Z1, %mul62
+        %mul83 = fmul double %Z1, %sub69
+        %sub90 = fsub double %Z1, %mul83
+        %mul104 = fmul double %Z1, %sub90
+        %sub111 = fsub double %Z1, %mul104
+        %mul125 = fmul double %Z1, %sub111
+        %sub132 = fsub double %Z1, %mul125
+        %mul146 = fmul double %Z1, %sub132
+        %sub153 = fsub double %Z1, %mul146
+        ; end of chain.
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+	%R1  = fdiv double %Z1, %Z2
+        %R   = fmul double %R1, %sub153
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R1 = fdiv double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
new file mode 100644
index 0000000..6844977
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -0,0 +1,103 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.powi.f64(double, i32)
+
+; Basic depth-3 chain with fma
+define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
+; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
+; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with cos
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.cos.f64(double %X1)
+	%Y2 = call double @llvm.cos.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test2
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi
+define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test3
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi (different powers: should not vectorize)
+define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+        %P2 = add i32 %P, 1
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test4
+; CHECK-NOT: <2 x double>
+; CHECK: ret double %R
+}
+
+; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly
+; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) nounwind readonly
+
diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll
new file mode 100644
index 0000000..a5397ee
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-ldstr.ll
@@ -0,0 +1,110 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
+
+; Simple 3-pair chain with loads and stores
+define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test1
+; CHECK-AO-NOT: <2 x double>
+}
+
+; Simple chain with extending loads and stores
+define void @test2(float* %a, float* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0f = load float* %a, align 4
+  %i0 = fpext float %i0f to double
+  %i1f = load float* %b, align 4
+  %i1 = fpext float %i1f to double
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds float* %a, i64 1
+  %i3f = load float* %arrayidx3, align 4
+  %i3 = fpext float %i3f to double
+  %arrayidx4 = getelementptr inbounds float* %b, i64 1
+  %i4f = load float* %arrayidx4, align 4
+  %i4 = fpext float %i4f to double
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+; CHECK: @test2
+; CHECK: %i0f.v.i0 = bitcast float* %a to <2 x float>*
+; CHECK: %i1f.v.i0 = bitcast float* %b to <2 x float>*
+; CHECK: %i0f = load <2 x float>* %i0f.v.i0, align 4
+; CHECK: %i0 = fpext <2 x float> %i0f to <2 x double>
+; CHECK: %i1f = load <2 x float>* %i1f.v.i0, align 4
+; CHECK: %i1 = fpext <2 x float> %i1f to <2 x double>
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test2
+; CHECK-AO-NOT: <2 x double>
+}
+
+; Simple chain with loads and truncating stores
+define void @test3(double* %a, double* %b, float* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %mulf = fptrunc double %mul to float
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %mul5f = fptrunc double %mul5 to float
+  store float %mulf, float* %c, align 8
+  %arrayidx5 = getelementptr inbounds float* %c, i64 1
+  store float %mul5f, float* %arrayidx5, align 4
+  ret void
+; CHECK: @test3
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %mulf = fptrunc <2 x double> %mul to <2 x float>
+; CHECK: %0 = bitcast float* %c to <2 x float>*
+; CHECK: store <2 x float> %mulf, <2 x float>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test3
+; CHECK-AO: %i0 = load double* %a, align 8
+; CHECK-AO: %i1 = load double* %b, align 8
+; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
+; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
+; CHECK-AO: %arrayidx3 = getelementptr inbounds double* %a, i64 1
+; CHECK-AO: %i3 = load double* %arrayidx3, align 8
+; CHECK-AO: %arrayidx4 = getelementptr inbounds double* %b, i64 1
+; CHECK-AO: %i4 = load double* %arrayidx4, align 8
+; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1
+; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1
+; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2
+; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float>
+; CHECK-AO: %0 = bitcast float* %c to <2 x float>*
+; CHECK-AO: store <2 x float> %mulf, <2 x float>* %0, align 8
+; CHECK-AO: ret void
+}
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
new file mode 100644
index 0000000..904d766
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -0,0 +1,152 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; Basic depth-3 chain
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test1
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair permuted)
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test2
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y2, %B1
+	%Z2 = fadd double %Y1, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair first splat)
+define double @test3(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test3
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y2, %B1
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair second splat)
+define double @test4(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test4
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y1, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain
+define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2 x float> %B2) {
+; CHECK: @test5
+; CHECK: %X1.v.i1 = shufflevector <2 x float> %B1, <2 x float> %B2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: %X1.v.i0 = shufflevector <2 x float> %A1, <2 x float> %A2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	%X1 = fsub <2 x float> %A1, %B1
+	%X2 = fsub <2 x float> %A2, %B2
+; CHECK: %X1 = fsub <4 x float> %X1.v.i0, %X1.v.i1
+	%Y1 = fmul <2 x float> %X1, %A1
+	%Y2 = fmul <2 x float> %X2, %A2
+; CHECK: %Y1 = fmul <4 x float> %X1, %X1.v.i0
+	%Z1 = fadd <2 x float> %Y1, %B1
+	%Z2 = fadd <2 x float> %Y2, %B2
+; CHECK: %Z1 = fadd <4 x float> %Y1, %X1.v.i1
+	%R  = fmul <2 x float> %Z1, %Z2
+; CHECK: %Z1.v.r1 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+; CHECK: %Z1.v.r2 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK: %R = fmul <2 x float> %Z1.v.r1, %Z1.v.r2
+	ret <2 x float> %R
+; CHECK: ret <2 x float> %R
+}
+
+; Basic chain with shuffles
+define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
+; CHECK: @test6
+; CHECK: %X1.v.i1 = shufflevector <8 x i8> %B1, <8 x i8> %B2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK: %X1.v.i0 = shufflevector <8 x i8> %A1, <8 x i8> %A2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	%X1 = sub <8 x i8> %A1, %B1
+	%X2 = sub <8 x i8> %A2, %B2
+; CHECK: %X1 = sub <16 x i8> %X1.v.i0, %X1.v.i1
+	%Y1 = mul <8 x i8> %X1, %A1
+	%Y2 = mul <8 x i8> %X2, %A2
+; CHECK: %Y1 = mul <16 x i8> %X1, %X1.v.i0
+	%Z1 = add <8 x i8> %Y1, %B1
+	%Z2 = add <8 x i8> %Y2, %B2
+; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1
+        %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
+        %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
+; CHECK: %Z1.v.r2 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <8 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15>
+; CHECK: %Q1.v.i1 = shufflevector <8 x i8> %Z1.v.r2, <8 x i8> undef, <16 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9>
+	%R  = mul <8 x i8> %Q1, %Q2
+; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2
+	ret <8 x i8> %R
+; CHECK: ret <8 x i8> %R
+}
+
+
diff --git a/test/Transforms/BlockPlacement/dg.exp b/test/Transforms/BlockPlacement/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/BlockPlacement/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/BlockPlacement/lit.local.cfg b/test/Transforms/BlockPlacement/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/BlockPlacement/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeExtractor/dg.exp b/test/Transforms/CodeExtractor/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/CodeExtractor/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/CodeExtractor/lit.local.cfg b/test/Transforms/CodeExtractor/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/CodeExtractor/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeGenPrepare/dg.exp b/test/Transforms/CodeGenPrepare/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/CodeGenPrepare/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/CodeGenPrepare/lit.local.cfg b/test/Transforms/CodeGenPrepare/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/ConstProp/2007-11-23-cttz.ll b/test/Transforms/ConstProp/2007-11-23-cttz.ll
index 37cda30..a28c9b0 100644
--- a/test/Transforms/ConstProp/2007-11-23-cttz.ll
+++ b/test/Transforms/ConstProp/2007-11-23-cttz.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -constprop -S | grep {ret i13 13}
 ; PR1816
-declare i13 @llvm.cttz.i13(i13)
+declare i13 @llvm.cttz.i13(i13, i1)
 
 define i13 @test() {
-	%X = call i13 @llvm.cttz.i13(i13 0)
+	%X = call i13 @llvm.cttz.i13(i13 0, i1 true)
 	ret i13 %X
 }
diff --git a/test/Transforms/ConstProp/basictest.ll b/test/Transforms/ConstProp/basictest.ll
index d0d0a5b..09e6e7d 100644
--- a/test/Transforms/ConstProp/basictest.ll
+++ b/test/Transforms/ConstProp/basictest.ll
@@ -1,5 +1,8 @@
 ; RUN: opt < %s -constprop -die -S | FileCheck %s
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
 ; This is a basic sanity check for constant propagation.  The add instruction
 ; should be eliminated.
 define i32 @test1(i1 %B) {
@@ -40,3 +43,11 @@ define i1 @TNAN() {
   %C = or i1 %A, %B
   ret i1 %C
 }
+
+define i128 @vector_to_int_cast() {
+  %A = bitcast <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824> to i128
+  ret i128 %A
+; CHECK: @vector_to_int_cast
+; CHECK: ret i128 85070591750041656499021422275829170176
+}
+  
+\ No newline at end of file
diff --git a/test/Transforms/ConstProp/bswap.ll b/test/Transforms/ConstProp/bswap.ll
index 9fce309..a68fdcd 100644
--- a/test/Transforms/ConstProp/bswap.ll
+++ b/test/Transforms/ConstProp/bswap.ll
@@ -1,6 +1,6 @@
 ; bswap should be constant folded when it is passed a constant argument
 
-; RUN: opt < %s -constprop -S | not grep call
+; RUN: opt < %s -constprop -S | FileCheck %s
 
 declare i16 @llvm.bswap.i16(i16)
 
@@ -8,18 +8,34 @@ declare i32 @llvm.bswap.i32(i32)
 
 declare i64 @llvm.bswap.i64(i64)
 
+declare i80 @llvm.bswap.i80(i80)
+
+; CHECK: define i16 @W
 define i16 @W() {
+        ; CHECK: ret i16 256
         %Z = call i16 @llvm.bswap.i16( i16 1 )          ; <i16> [#uses=1]
         ret i16 %Z
 }
 
+; CHECK: define i32 @X
 define i32 @X() {
+        ; CHECK: ret i32 16777216
         %Z = call i32 @llvm.bswap.i32( i32 1 )          ; <i32> [#uses=1]
         ret i32 %Z
 }
 
+; CHECK: define i64 @Y
 define i64 @Y() {
+        ; CHECK: ret i64 72057594037927936
         %Z = call i64 @llvm.bswap.i64( i64 1 )          ; <i64> [#uses=1]
         ret i64 %Z
 }
 
+; CHECK: define i80 @Z
+define i80 @Z() {
+        ; CHECK: ret i80 -450681596205739728166896
+        ;                0xA0908070605040302010
+        %Z = call i80 @llvm.bswap.i80( i80 76151636403560493650080 )
+        ;                                  0x102030405060708090A0
+        ret i80 %Z
+}
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 3b6010a..7a405a5 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -constprop -S | FileCheck %s
+; RUN: opt < %s -constprop -disable-simplify-libcalls -S | FileCheck %s --check-prefix=FNOBUILTIN
 
 declare double @cos(double)
 
@@ -59,3 +60,90 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+define double @test_intrinsic_pow() nounwind uwtable ssp {
+entry:
+; CHECK: @test_intrinsic_pow
+; CHECK-NOT: call
+  %0 = call double @llvm.pow.f64(double 1.500000e+00, double 3.000000e+00)
+  ret double %0
+}
+declare double @llvm.pow.f64(double, double) nounwind readonly
+
+; Shouldn't fold because of -fno-builtin
+define double @sin_() nounwind uwtable ssp {
+; FNOBUILTIN: @sin_
+; FNOBUILTIN: %1 = call double @sin(double 3.000000e+00)
+  %1 = call double @sin(double 3.000000e+00)
+  ret double %1
+}
+
+; Shouldn't fold because of -fno-builtin
+define double @sqrt_() nounwind uwtable ssp {
+; FNOBUILTIN: @sqrt_
+; FNOBUILTIN: %1 = call double @sqrt(double 3.000000e+00)
+  %1 = call double @sqrt(double 3.000000e+00)
+  ret double %1
+}
+
+; Shouldn't fold because of -fno-builtin
+define float @sqrtf_() nounwind uwtable ssp {
+; FNOBUILTIN: @sqrtf_
+; FNOBUILTIN: %1 = call float @sqrtf(float 3.000000e+00)
+  %1 = call float @sqrtf(float 3.000000e+00)
+  ret float %1
+}
+declare float @sqrtf(float)
+
+; Shouldn't fold because of -fno-builtin
+define float @sinf_() nounwind uwtable ssp {
+; FNOBUILTIN: @sinf_
+; FNOBUILTIN: %1 = call float @sinf(float 3.000000e+00)
+  %1 = call float @sinf(float 3.000000e+00)
+  ret float %1
+}
+declare float @sinf(float)
+
+; Shouldn't fold because of -fno-builtin
+define double @tan_() nounwind uwtable ssp {
+; FNOBUILTIN: @tan_
+; FNOBUILTIN: %1 = call double @tan(double 3.000000e+00)
+  %1 = call double @tan(double 3.000000e+00)
+  ret double %1
+}
+
+; Shouldn't fold because of -fno-builtin
+define double @tanh_() nounwind uwtable ssp {
+; FNOBUILTIN: @tanh_
+; FNOBUILTIN: %1 = call double @tanh(double 3.000000e+00)
+  %1 = call double @tanh(double 3.000000e+00)
+  ret double %1
+}
+declare double @tanh(double)
+
+; Shouldn't fold because of -fno-builtin
+define double @pow_() nounwind uwtable ssp {
+; FNOBUILTIN: @pow_
+; FNOBUILTIN: %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
+  %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
+  ret double %1
+}
+declare double @pow(double, double)
+
+; Shouldn't fold because of -fno-builtin
+define double @fmod_() nounwind uwtable ssp {
+; FNOBUILTIN: @fmod_
+; FNOBUILTIN: %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
+  %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
+  ret double %1
+}
+declare double @fmod(double, double)
+
+; Shouldn't fold because of -fno-builtin
+define double @atan2_() nounwind uwtable ssp {
+; FNOBUILTIN: @atan2_
+; FNOBUILTIN: %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
+  %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
+  ret double %1
+}
+declare double @atan2(double, double)
diff --git a/test/Transforms/ConstProp/dg.exp b/test/Transforms/ConstProp/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ConstProp/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ConstProp/lit.local.cfg b/test/Transforms/ConstProp/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ConstProp/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ConstantMerge/dg.exp b/test/Transforms/ConstantMerge/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ConstantMerge/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ConstantMerge/linker-private.ll b/test/Transforms/ConstantMerge/linker-private.ll
new file mode 100644
index 0000000..eba7880
--- /dev/null
+++ b/test/Transforms/ConstantMerge/linker-private.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -constmerge -S | FileCheck %s
+; <rdar://problem/10564621>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+; CHECK: @.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+@isLogVisible = global i8 0, align 1
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
+@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
+
+define linkonce_odr void @bar() nounwind ssp align 2 {
+entry:
+  %stack = alloca i8*, align 4
+  %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
+  store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
diff --git a/test/Transforms/ConstantMerge/lit.local.cfg b/test/Transforms/ConstantMerge/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ConstantMerge/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
index 270c048..475cd8d 100644
--- a/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -79,4 +79,103 @@ Impossible:
 
 LessThanOrEqualToTwo:
   ret i32 0
-}
-\ No newline at end of file
+}
+
+define i32 @switch1(i32 %s) {
+; CHECK: @switch1
+entry:
+  %cmp = icmp slt i32 %s, 0
+  br i1 %cmp, label %negative, label %out
+
+negative:
+  switch i32 %s, label %out [
+; CHECK: switch i32 %s, label %out
+    i32 0, label %out
+; CHECK-NOT: i32 0
+    i32 1, label %out
+; CHECK-NOT: i32 1
+    i32 -1, label %next
+; CHECK: i32 -1, label %next
+    i32 -2, label %next
+; CHECK: i32 -2, label %next
+    i32 2, label %out
+; CHECK-NOT: i32 2
+    i32 3, label %out
+; CHECK-NOT: i32 3
+  ]
+
+out:
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %negative ], [ 0, %negative ]
+  ret i32 %q
+}
+
+define i32 @switch2(i32 %s) {
+; CHECK: @switch2
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+  switch i32 %s, label %out [
+    i32 0, label %out
+    i32 -1, label %next
+    i32 -2, label %next
+  ]
+; CHECK: br label %out
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+define i32 @switch3(i32 %s) {
+; CHECK: @switch3
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+  switch i32 %s, label %out [
+    i32 -1, label %out
+    i32 -2, label %next
+    i32 -3, label %next
+  ]
+; CHECK: br label %out
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+define void @switch4(i32 %s) {
+; CHECK: @switch4
+entry:
+  %cmp = icmp eq i32 %s, 0
+  br i1 %cmp, label %zero, label %out
+
+zero:
+  switch i32 %s, label %out [
+    i32 0, label %next
+    i32 1, label %out
+    i32 -1, label %out
+  ]
+; CHECK: br label %next
+
+out:
+  ret void
+
+next:
+  ret void
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/dg.exp b/test/Transforms/CorrelatedValuePropagation/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/CorrelatedValuePropagation/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/CorrelatedValuePropagation/lit.local.cfg b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll
new file mode 100644
index 0000000..9b70ed2
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/range.ll
@@ -0,0 +1,43 @@
+; RUN: opt -correlated-propagation -S < %s | FileCheck %s
+
+declare i32 @foo()
+
+define i32 @test1(i32 %a) nounwind {
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, 7
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+
+; CHECK: @test1
+; CHECK: then:
+; CHECK-NEXT: br i1 false, label %end, label %else
+}
+
+define i32 @test2(i32 %a) nounwind {
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp ugt i32 %a, 15
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+
+; CHECK: @test2
+; CHECK: then:
+; CHECK-NEXT: br i1 false, label %end, label %else
+}
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
index b2d63ec..e3fe1bb 100644
--- a/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -30,10 +30,10 @@ entry:
 define void @h() {
 entry:
   %i = alloca i32, align 4
-  volatile store i32 10, i32* %i, align 4
+  store volatile i32 10, i32* %i, align 4
 ; CHECK: %tmp = load volatile i32* %i, align 4
 ; CHECK-next: call void @f(i32 undef)
-  %tmp = volatile load i32* %i, align 4
+  %tmp = load volatile i32* %i, align 4
   call void @f(i32 %tmp)
   ret void
 }
diff --git a/test/Transforms/DeadArgElim/dg.exp b/test/Transforms/DeadArgElim/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/DeadArgElim/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/DeadArgElim/lit.local.cfg b/test/Transforms/DeadArgElim/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/DeadArgElim/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
new file mode 100644
index 0000000..ed53eb5
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
@@ -0,0 +1,95 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.vec2 = type { <4 x i32>, <4 x i32> }
+%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
+
+@glob1 = global %struct.vec2 zeroinitializer, align 16
+@glob2 = global %struct.vec2plusi zeroinitializer, align 16
+
+define void @write24to28(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write24to28
+entry:
+  %arrayidx0 = getelementptr inbounds i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @write28to32(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write28to32
+entry:
+  %p3 = bitcast i32* %p to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @dontwrite28to32memset
+entry:
+  %p3 = bitcast i32* %p to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write32to36
+entry:
+  %0 = bitcast %struct.vec2plusi* %p to i8*
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false)
+  %c = getelementptr inbounds %struct.vec2plusi* %p, i64 0, i32 2
+  store i32 1, i32* %c, align 4
+  ret void
+}
+
+define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write16to32
+entry:
+  %0 = bitcast %struct.vec2* %p to i8*
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
+  %c = getelementptr inbounds %struct.vec2* %p, i64 0, i32 1
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
+  ret void
+}
+
+define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp {
+; CHECK: @dontwrite28to32memcpy
+entry:
+  %0 = bitcast %struct.vec2* %p to i8*
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
+  %arrayidx1 = getelementptr inbounds %struct.vec2* %p, i64 0, i32 0, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+%struct.trapframe = type { i64, i64, i64 }
+
+; bugzilla 11455 - make sure negative GEP's don't break this optimisation
+; CHECK: @cpu_lwp_fork
+define void @cpu_lwp_fork(%struct.trapframe* %md_regs, i64 %pcb_rsp0) nounwind uwtable noinline ssp {
+entry:
+  %0 = inttoptr i64 %pcb_rsp0 to %struct.trapframe*
+  %add.ptr = getelementptr inbounds %struct.trapframe* %0, i64 -1
+  %1 = bitcast %struct.trapframe* %add.ptr to i8*
+  %2 = bitcast %struct.trapframe* %md_regs to i8*
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
+  %tf_trapno = getelementptr inbounds %struct.trapframe* %0, i64 -1, i32 1
+  store i64 3, i64* %tf_trapno, align 8
+  ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/dg.exp b/test/Transforms/DeadStoreElimination/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/DeadStoreElimination/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/DeadStoreElimination/dominate.ll b/test/Transforms/DeadStoreElimination/dominate.ll
new file mode 100644
index 0000000..284fea4
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/dominate.ll
@@ -0,0 +1,25 @@
+; RUN: opt  %s -dse -disable-output
+; test that we don't crash
+declare void @bar()
+
+define void @foo() {
+bb1:
+  %memtmp3.i = alloca [21 x i8], align 1
+  %0 = getelementptr inbounds [21 x i8]* %memtmp3.i, i64 0, i64 0
+  br label %bb3
+
+bb2:
+  call void @llvm.lifetime.end(i64 -1, i8* %0)
+  br label %bb3
+
+bb3:
+  call void @bar()
+  call void @llvm.lifetime.end(i64 -1, i8* %0)
+  br label %bb4
+
+bb4:
+  ret void
+
+}
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/Transforms/DeadStoreElimination/free.ll b/test/Transforms/DeadStoreElimination/free.ll
index aa3f0ab..a5fbdc7 100644
--- a/test/Transforms/DeadStoreElimination/free.ll
+++ b/test/Transforms/DeadStoreElimination/free.ll
@@ -2,6 +2,9 @@
 
 target datalayout = "e-p:64:64:64"
 
+declare void @free(i8* nocapture)
+declare noalias i8* @malloc(i64)
+
 ; CHECK: @test
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: @free
@@ -26,10 +29,10 @@ define void @test2({i32, i32}* %P) {
 	ret void
 }
 
-; CHECK: @test4
+; CHECK: @test3
 ; CHECK-NOT: store
 ; CHECK: ret void
-define void @test4() {
+define void @test3() {
   %m = call i8* @malloc(i64 24)
   store i8 0, i8* %m
   %m1 = getelementptr i8* %m, i64 1
@@ -38,5 +41,30 @@ define void @test4() {
   ret void
 }
 
-declare void @free(i8*)
-declare i8* @malloc(i64)
+; PR11240
+; CHECK: @test4
+; CHECK-NOT: store
+; CHECK: ret void
+define void @test4(i1 %x) nounwind {
+entry:
+  %alloc1 = tail call noalias i8* @malloc(i64 4) nounwind
+  br i1 %x, label %skipinit1, label %init1
+
+init1:
+  store i8 1, i8* %alloc1
+  br label %skipinit1
+
+skipinit1:
+  tail call void @free(i8* %alloc1) nounwind
+  ret void
+}
+
+; CHECK: @test5
+define void @test5() {
+  br label %bb
+
+bb:
+  tail call void @free(i8* undef) nounwind
+  br label %bb
+}
+
diff --git a/test/Transforms/DeadStoreElimination/lit.local.cfg b/test/Transforms/DeadStoreElimination/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DeadStoreElimination/pr11390.ll b/test/Transforms/DeadStoreElimination/pr11390.ll
new file mode 100644
index 0000000..2ce6eea
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/pr11390.ll
@@ -0,0 +1,38 @@
+; RUN: opt -basicaa -dse -S -o - %s | FileCheck %s
+; PR11390
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define fastcc void @cat_domain(i8* nocapture %name, i8* nocapture %domain, i8** 
+nocapture %s) nounwind uwtable {
+entry:
+  %call = tail call i64 @strlen(i8* %name) nounwind readonly
+  %call1 = tail call i64 @strlen(i8* %domain) nounwind readonly
+  %add = add i64 %call, 1
+  %add2 = add i64 %add, %call1
+  %add3 = add i64 %add2, 1
+  %call4 = tail call noalias i8* @malloc(i64 %add3) nounwind
+  store i8* %call4, i8** %s, align 8
+  %tobool = icmp eq i8* %call4, null
+  br i1 %tobool, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call4, i8* %name, i64 %call, i32 1, i1 false)
+  %arrayidx = getelementptr inbounds i8* %call4, i64 %call
+  store i8 46, i8* %arrayidx, align 1
+; CHECK: store i8 46
+  %add.ptr5 = getelementptr inbounds i8* %call4, i64 %add
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %add.ptr5, i8* %domain, i64 %call1, i32 1, i1 false)
+  %arrayidx8 = getelementptr inbounds i8* %call4, i64 %add2
+  store i8 0, i8* %arrayidx8, align 1
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  ret void
+}
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
+
+declare noalias i8* @malloc(i64) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index ec2f157..81eb5a8 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -251,3 +251,10 @@ bb:
 ; CHECK: call void @test19f
 }
 
+define void @test20() {
+  %m = call i8* @malloc(i32 24)
+  store i8 0, i8* %m
+  ret void
+}
+; CHECK: @test20
+; CHECK-NEXT: ret void
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 57b1697..32c302c 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -10,22 +10,22 @@ define void @test1(i8 %V, i32 *%P) {
   
   %C = zext i8 %V to i32
   %D = zext i8 %V to i32  ;; CSE
-  volatile store i32 %C, i32* %P
-  volatile store i32 %D, i32* %P
+  store volatile i32 %C, i32* %P
+  store volatile i32 %D, i32* %P
   ; CHECK-NEXT: %C = zext i8 %V to i32
   ; CHECK-NEXT: store volatile i32 %C
   ; CHECK-NEXT: store volatile i32 %C
   
   %E = add i32 %C, %C
   %F = add i32 %C, %C
-  volatile store i32 %E, i32* %P
-  volatile store i32 %F, i32* %P
+  store volatile i32 %E, i32* %P
+  store volatile i32 %F, i32* %P
   ; CHECK-NEXT: %E = add i32 %C, %C
   ; CHECK-NEXT: store volatile i32 %E
   ; CHECK-NEXT: store volatile i32 %E
 
   %G = add nuw i32 %C, %C         ;; not a CSE with E
-  volatile store i32 %G, i32* %P
+  store volatile i32 %G, i32* %P
   ; CHECK-NEXT: %G = add nuw i32 %C, %C
   ; CHECK-NEXT: store volatile i32 %G
   ret void
diff --git a/test/Transforms/EarlyCSE/dg.exp b/test/Transforms/EarlyCSE/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/EarlyCSE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/EarlyCSE/instsimplify-dom.ll b/test/Transforms/EarlyCSE/instsimplify-dom.ll
new file mode 100644
index 0000000..36dffec
--- /dev/null
+++ b/test/Transforms/EarlyCSE/instsimplify-dom.ll
@@ -0,0 +1,19 @@
+; RUN: opt -early-cse -S < %s | FileCheck %s
+; PR12231
+
+declare i32 @f()
+
+define i32 @fn() {
+entry:
+  br label %lbl_1215
+
+lbl_1215:
+  %ins34 = phi i32 [ %ins35, %xxx ], [ undef, %entry ]
+  ret i32 %ins34
+
+xxx:
+  %ins35 = call i32 @f()
+  br label %lbl_1215
+}
+
+; CHECK: define i32 @fn
diff --git a/test/Transforms/EarlyCSE/lit.local.cfg b/test/Transforms/EarlyCSE/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/EarlyCSE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll b/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
index 85df09e..b7e4d1f 100644
--- a/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
@@ -4,6 +4,6 @@
 @g = global i32 0		; <i32*> [#uses=1]
 
 define i32 @f() {
-	%t = volatile load i32* @g		; <i32> [#uses=1]
+	%t = load volatile i32* @g		; <i32> [#uses=1]
 	ret i32 %t
 }
diff --git a/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll b/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
deleted file mode 100644
index e2bab19..0000000
--- a/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
+++ /dev/null
@@ -1,105 +0,0 @@
-; RUN: opt < %s -functionattrs -S | not grep {nocapture *%%q}
-; RUN: opt < %s -functionattrs -S | grep {nocapture *%%p} | count 6
-@g = global i32* null		; <i32**> [#uses=1]
-
-define i32* @c1(i32* %q) {
-	ret i32* %q
-}
-
-define void @c2(i32* %q) {
-	store i32* %q, i32** @g
-	ret void
-}
-
-define void @c3(i32* %q) {
-	call void @c2(i32* %q)
-	ret void
-}
-
-define i1 @c4(i32* %q, i32 %bitno) {
-	%tmp = ptrtoint i32* %q to i32
-	%tmp2 = lshr i32 %tmp, %bitno
-	%bit = trunc i32 %tmp2 to i1
-	br i1 %bit, label %l1, label %l0
-l0:
-	ret i1 0 ; escaping value not caught by def-use chaining.
-l1:
-	ret i1 1 ; escaping value not caught by def-use chaining.
-}
-
-@lookup_table = global [2 x i1] [ i1 0, i1 1 ]
-
-define i1 @c5(i32* %q, i32 %bitno) {
-	%tmp = ptrtoint i32* %q to i32
-	%tmp2 = lshr i32 %tmp, %bitno
-	%bit = and i32 %tmp2, 1
-        ; subtle escape mechanism follows
-	%lookup = getelementptr [2 x i1]* @lookup_table, i32 0, i32 %bit
-	%val = load i1* %lookup
-	ret i1 %val
-}
-
-declare void @throw_if_bit_set(i8*, i8) readonly
-define i1 @c6(i8* %q, i8 %bit) {
-	invoke void @throw_if_bit_set(i8* %q, i8 %bit)
-		to label %ret0 unwind label %ret1
-ret0:
-	ret i1 0
-ret1:
-        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
-                 cleanup
-	ret i1 1
-}
-
-declare i32 @__gxx_personality_v0(...)
-
-define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind {
-	%tmp = ptrtoint i32* %q to i32
-	%tmp2 = lshr i32 %tmp, %bitno
-	%bit = and i32 %tmp2, 1
-	%lookup = getelementptr [2 x i1]* @lookup_table, i32 0, i32 %bit
-	ret i1* %lookup
-}
-
-define i1 @c7(i32* %q, i32 %bitno) {
-	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
-	%val = load i1* %ptr
-	ret i1 %val
-}
-
-
-define i32 @nc1(i32* %q, i32* %p, i1 %b) {
-e:
-	br label %l
-l:
-	%x = phi i32* [ %p, %e ]
-	%y = phi i32* [ %q, %e ]
-	%tmp = bitcast i32* %x to i32*		; <i32*> [#uses=2]
-	%tmp2 = select i1 %b, i32* %tmp, i32* %y
-	%val = load i32* %tmp2		; <i32> [#uses=1]
-	store i32 0, i32* %tmp
-	store i32* %y, i32** @g
-	ret i32 %val
-}
-
-define void @nc2(i32* %p, i32* %q) {
-	%1 = call i32 @nc1(i32* %q, i32* %p, i1 0)		; <i32> [#uses=0]
-	ret void
-}
-
-define void @nc3(void ()* %p) {
-	call void %p()
-	ret void
-}
-
-declare void @external(i8*) readonly nounwind
-define void @nc4(i8* %p) {
-	call void @external(i8* %p)
-	ret void
-}
-
-define void @nc5(void (i8*)* %f, i8* %p) {
-	call void %f(i8* %p) readonly nounwind
-	call void %f(i8* nocapture %p)
-	ret void
-}
diff --git a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
index f21fabc..93991d2 100644
--- a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
+++ b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
@@ -5,6 +5,6 @@
 
 define void @foo() {
 ; CHECK: void @foo() {
-  %tmp = volatile load i32* @g
+  %tmp = load volatile i32* @g
   ret void
 }
diff --git a/test/Transforms/FunctionAttrs/dg.exp b/test/Transforms/FunctionAttrs/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/FunctionAttrs/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/FunctionAttrs/lit.local.cfg b/test/Transforms/FunctionAttrs/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/FunctionAttrs/nocapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll
new file mode 100644
index 0000000..3027acd
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/nocapture.ll
@@ -0,0 +1,178 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+@g = global i32* null		; <i32**> [#uses=1]
+
+; CHECK: define i32* @c1(i32* %q)
+define i32* @c1(i32* %q) {
+	ret i32* %q
+}
+
+; CHECK: define void @c2(i32* %q)
+define void @c2(i32* %q) {
+	store i32* %q, i32** @g
+	ret void
+}
+
+; CHECK: define void @c3(i32* %q)
+define void @c3(i32* %q) {
+	call void @c2(i32* %q)
+	ret void
+}
+
+; CHECK: define i1 @c4(i32* %q, i32 %bitno)
+define i1 @c4(i32* %q, i32 %bitno) {
+	%tmp = ptrtoint i32* %q to i32
+	%tmp2 = lshr i32 %tmp, %bitno
+	%bit = trunc i32 %tmp2 to i1
+	br i1 %bit, label %l1, label %l0
+l0:
+	ret i1 0 ; escaping value not caught by def-use chaining.
+l1:
+	ret i1 1 ; escaping value not caught by def-use chaining.
+}
+
+@lookup_table = global [2 x i1] [ i1 0, i1 1 ]
+
+; CHECK: define i1 @c5(i32* %q, i32 %bitno)
+define i1 @c5(i32* %q, i32 %bitno) {
+	%tmp = ptrtoint i32* %q to i32
+	%tmp2 = lshr i32 %tmp, %bitno
+	%bit = and i32 %tmp2, 1
+        ; subtle escape mechanism follows
+	%lookup = getelementptr [2 x i1]* @lookup_table, i32 0, i32 %bit
+	%val = load i1* %lookup
+	ret i1 %val
+}
+
+declare void @throw_if_bit_set(i8*, i8) readonly
+
+; CHECK: define i1 @c6(i8* %q, i8 %bit)
+define i1 @c6(i8* %q, i8 %bit) {
+	invoke void @throw_if_bit_set(i8* %q, i8 %bit)
+		to label %ret0 unwind label %ret1
+ret0:
+	ret i1 0
+ret1:
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	ret i1 1
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind {
+	%tmp = ptrtoint i32* %q to i32
+	%tmp2 = lshr i32 %tmp, %bitno
+	%bit = and i32 %tmp2, 1
+	%lookup = getelementptr [2 x i1]* @lookup_table, i32 0, i32 %bit
+	ret i1* %lookup
+}
+
+; CHECK: define i1 @c7(i32* %q, i32 %bitno)
+define i1 @c7(i32* %q, i32 %bitno) {
+	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
+	%val = load i1* %ptr
+	ret i1 %val
+}
+
+
+; CHECK: define i32 @nc1(i32* %q, i32* nocapture %p, i1 %b)
+define i32 @nc1(i32* %q, i32* %p, i1 %b) {
+e:
+	br label %l
+l:
+	%x = phi i32* [ %p, %e ]
+	%y = phi i32* [ %q, %e ]
+	%tmp = bitcast i32* %x to i32*		; <i32*> [#uses=2]
+	%tmp2 = select i1 %b, i32* %tmp, i32* %y
+	%val = load i32* %tmp2		; <i32> [#uses=1]
+	store i32 0, i32* %tmp
+	store i32* %y, i32** @g
+	ret i32 %val
+}
+
+; CHECK: define void @nc2(i32* nocapture %p, i32* %q)
+define void @nc2(i32* %p, i32* %q) {
+	%1 = call i32 @nc1(i32* %q, i32* %p, i1 0)		; <i32> [#uses=0]
+	ret void
+}
+
+; CHECK: define void @nc3(void ()* nocapture %p)
+define void @nc3(void ()* %p) {
+	call void %p()
+	ret void
+}
+
+declare void @external(i8*) readonly nounwind
+; CHECK: define void @nc4(i8* nocapture %p)
+define void @nc4(i8* %p) {
+	call void @external(i8* %p)
+	ret void
+}
+
+; CHECK: define void @nc5(void (i8*)* nocapture %f, i8* nocapture %p)
+define void @nc5(void (i8*)* %f, i8* %p) {
+	call void %f(i8* %p) readonly nounwind
+	call void %f(i8* nocapture %p)
+	ret void
+}
+
+; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* %y1_1)
+define void @test1_1(i8* %x1_1, i8* %y1_1) {
+  call i8* @test1_2(i8* %x1_1, i8* %y1_1)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* %y1_2)
+define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
+  call void @test1_1(i8* %x1_2, i8* %y1_2)
+  store i32* null, i32** @g
+  ret i8* %y1_2
+}
+
+; CHECK: define void @test2(i8* nocapture %x2)
+define void @test2(i8* %x2) {
+  call void @test2(i8* %x2)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture %y3, i8* nocapture %z3)
+define void @test3(i8* %x3, i8* %y3, i8* %z3) {
+  call void @test3(i8* %z3, i8* %y3, i8* %x3)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define void @test4_1(i8* %x4_1)
+define void @test4_1(i8* %x4_1) {
+  call i8* @test4_2(i8* %x4_1, i8* %x4_1, i8* %x4_1)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define i8* @test4_2(i8* nocapture %x4_2, i8* %y4_2, i8* nocapture %z4_2)
+define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2) {
+  call void @test4_1(i8* null)
+  store i32* null, i32** @g
+  ret i8* %y4_2
+}
+
+declare i8* @test5_1(i8* %x5_1)
+
+; CHECK: define void @test5_2(i8* %x5_2)
+define void @test5_2(i8* %x5_2) {
+  call i8* @test5_1(i8* %x5_2)
+  store i32* null, i32** @g
+  ret void
+}
+
+declare void @test6_1(i8* %x6_1, i8* nocapture %y6_1, ...)
+
+; CHECK: define void @test6_2(i8* %x6_2, i8* nocapture %y6_2, i8* %z6_2)
+define void @test6_2(i8* %x6_2, i8* %y6_2, i8* %z6_2) {
+  call void (i8*, i8*, ...)* @test6_1(i8* %x6_2, i8* %y6_2, i8* %z6_2)
+  store i32* null, i32** @g
+  ret void
+}
+
diff --git a/test/Transforms/GVN/commute.ll b/test/Transforms/GVN/commute.ll
new file mode 100644
index 0000000..cf4fb7f
--- /dev/null
+++ b/test/Transforms/GVN/commute.ll
@@ -0,0 +1,23 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+declare void @use(i32, i32)
+
+define void @foo(i32 %x, i32 %y) {
+  ; CHECK: @foo
+  %add1 = add i32 %x, %y
+  %add2 = add i32 %y, %x
+  call void @use(i32 %add1, i32 %add2)
+  ; CHECK: @use(i32 %add1, i32 %add1)
+  ret void
+}
+
+declare void @vse(i1, i1)
+
+define void @bar(i32 %x, i32 %y) {
+  ; CHECK: @bar
+  %cmp1 = icmp ult i32 %x, %y
+  %cmp2 = icmp ugt i32 %y, %x
+  call void @vse(i1 %cmp1, i1 %cmp2)
+  ; CHECK: @vse(i1 %cmp1, i1 %cmp1)
+  ret void
+}
diff --git a/test/Transforms/GVN/condprop.ll b/test/Transforms/GVN/condprop.ll
index 0b31b01..9c28955 100644
--- a/test/Transforms/GVN/condprop.ll
+++ b/test/Transforms/GVN/condprop.ll
@@ -55,25 +55,6 @@ return:		; preds = %bb8
 }
 
 declare void @foo(i1)
-
-; CHECK: @test2
-define void @test2(i1 %x, i1 %y) {
-  %z = or i1 %x, %y
-  br i1 %z, label %true, label %false
-true:
-; CHECK: true:
-  %z2 = or i1 %x, %y
-  call void @foo(i1 %z2)
-; CHECK: call void @foo(i1 true)
-  br label %true
-false:
-; CHECK: false:
-  %z3 = or i1 %x, %y
-  call void @foo(i1 %z3)
-; CHECK: call void @foo(i1 false)
-  br label %false
-}
-
 declare void @bar(i32)
 
 ; CHECK: @test3
@@ -130,3 +111,141 @@ case3:
 ; CHECK: call void @bar(i32 %x)
   ret void
 }
+
+; CHECK: @test5
+define i1 @test5(i32 %x, i32 %y) {
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = icmp ne i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+  %cmp3 = icmp eq i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test6
+define i1 @test6(i32 %x, i32 %y) {
+  %cmp2 = icmp ne i32 %x, %y
+  %cmp = icmp eq i32 %x, %y
+  %cmp3 = icmp eq i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test7
+define i1 @test7(i32 %x, i32 %y) {
+  %cmp = icmp sgt i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = icmp sle i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+  %cmp3 = icmp sgt i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test8
+define i1 @test8(i32 %x, i32 %y) {
+  %cmp2 = icmp sle i32 %x, %y
+  %cmp = icmp sgt i32 %x, %y
+  %cmp3 = icmp sgt i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; PR1768
+; CHECK: @test9
+define i32 @test9(i32 %i, i32 %j) {
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+  %diff = sub i32 %i, %j
+  ret i32 %diff
+; CHECK: ret i32 0
+
+ret:
+  ret i32 5
+; CHECK: ret i32 5
+}
+
+; PR1768
+; CHECK: @test10
+define i32 @test10(i32 %j, i32 %i) {
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+  %diff = sub i32 %i, %j
+  ret i32 %diff
+; CHECK: ret i32 0
+
+ret:
+  ret i32 5
+; CHECK: ret i32 5
+}
+
+declare i32 @yogibar()
+
+; CHECK: @test11
+define i32 @test11(i32 %x) {
+  %v0 = call i32 @yogibar()
+  %v1 = call i32 @yogibar()
+  %cmp = icmp eq i32 %v0, %v1
+  br i1 %cmp, label %cond_true, label %next
+
+cond_true:
+  ret i32 %v1
+; CHECK: ret i32 %v0
+
+next:
+  %cmp2 = icmp eq i32 %x, %v0
+  br i1 %cmp2, label %cond_true2, label %next2
+
+cond_true2:
+  ret i32 %v0
+; CHECK: ret i32 %x
+
+next2:
+  ret i32 0
+}
+
+; CHECK: @test12
+define i32 @test12(i32 %x) {
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:
+  br label %ret
+
+cond_false:
+  br label %ret
+
+ret:
+  %res = phi i32 [ %x, %cond_true ], [ %x, %cond_false ]
+; CHECK: %res = phi i32 [ 0, %cond_true ], [ %x, %cond_false ]
+  ret i32 %res
+}
diff --git a/test/Transforms/GVN/dg.exp b/test/Transforms/GVN/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/GVN/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GVN/lit.local.cfg b/test/Transforms/GVN/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/GVN/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GVN/pre-compare.ll b/test/Transforms/GVN/pre-compare.ll
new file mode 100644
index 0000000..18d0c2e
--- /dev/null
+++ b/test/Transforms/GVN/pre-compare.ll
@@ -0,0 +1,68 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+; C source:
+;
+;   void f(int x) {
+;     if (x != 1)
+;       puts (x == 2 ? "a" : "b");
+;     for (;;) {
+;       puts("step 1");
+;       if (x == 2)
+;         continue;
+;       printf("step 2: %d\n", x);
+;     }
+;   }
+;
+; If we PRE %cmp3, CodeGenPrepare won't be able to sink the compare down to its
+; uses, and we are forced to keep both %x and %cmp3 in registers in the loop.
+;
+; It is just as cheap to recompute the icmp against %x as it is to compare a
+; GPR against 0. On x86-64, the br i1 %cmp3 becomes:
+;
+;   testb %r12b, %r12b
+;   jne	LBB0_3
+;
+; The sunk icmp is:
+;
+;   cmpl $2, %ebx
+;   je	LBB0_3
+;
+; This is just as good, and it doesn't require a separate register.
+;
+; CHECK-NOT: phi i1
+
+@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1
+@.str1 = private unnamed_addr constant [2 x i8] c"b\00", align 1
+@.str2 = private unnamed_addr constant [7 x i8] c"step 1\00", align 1
+@.str3 = private unnamed_addr constant [12 x i8] c"step 2: %d\0A\00", align 1
+
+define void @f(i32 %x) noreturn nounwind uwtable ssp {
+entry:
+  %cmp = icmp eq i32 %x, 1
+  br i1 %cmp, label %for.cond.preheader, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp1 = icmp eq i32 %x, 2
+  %cond = select i1 %cmp1, i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)
+  %call = tail call i32 @puts(i8* %cond) nounwind
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry, %if.then
+  %cmp3 = icmp eq i32 %x, 2
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.backedge, %for.cond.preheader
+  %call2 = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0)) nounwind
+  br i1 %cmp3, label %for.cond.backedge, label %if.end5
+
+if.end5:                                          ; preds = %for.cond
+  %call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str3, i64 0, i64 0), i32 %x) nounwind
+  br label %for.cond.backedge
+
+for.cond.backedge:                                ; preds = %if.end5, %for.cond
+  br label %for.cond
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 2f0d2eb..9e08004 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -26,6 +26,15 @@ define i8 @crash0({i32, i32} %A, {i32, i32}* %P) {
   ret i8 %Y
 }
 
+;; No PR filed, crashed in CaptureTracker.
+declare void @helper()
+define void @crash1() {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i32 1, i1 false) nounwind
+  %tmp = load i8* bitcast (void ()* @helper to i8*)
+  %x = icmp eq i8 %tmp, 15
+  ret void
+}
+
 
 ;;===----------------------------------------------------------------------===;;
 ;; Store -> Load  and  Load -> Load forwarding where src and dst are different
@@ -642,3 +651,28 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
+;;===----------------------------------------------------------------------===;;
+;; Load -> Store dependency which isn't interfered with by a call that happens
+;; before the pointer was captured.
+;;===----------------------------------------------------------------------===;;
+
+%class.X = type { [8 x i8] }
+
+@_ZTV1X = weak_odr constant [5 x i8*] zeroinitializer
+@_ZTV1Y = weak_odr constant [5 x i8*] zeroinitializer
+
+declare void @use()
+declare void @use3(i8***, i8**)
+
+; PR8908
+define void @test_escape1() nounwind {
+  %x = alloca i8**, align 8
+  store i8** getelementptr inbounds ([5 x i8*]* @_ZTV1X, i64 0, i64 2), i8*** %x, align 8
+  call void @use() nounwind
+  %DEAD = load i8*** %x, align 8
+  call void @use3(i8*** %x, i8** %DEAD) nounwind
+  ret void
+; CHECK: test_escape1
+; CHECK-NOT: DEAD
+; CHECK: ret
+}
diff --git a/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll b/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
deleted file mode 100644
index 29864f8..0000000
--- a/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
+++ /dev/null
@@ -1,264 +0,0 @@
-; RUN: opt < %s -globaldce | llc -O0 -o /dev/null
-
-%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
-%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>" = type { i32 }
-%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
-%struct.pthread_attr_t = type { i64, [48 x i8] }
-%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
-
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel ; <i32 (i64)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutexattr_init ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)* @pthread_mutexattr_settype ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutexattr_destroy ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
-
-define weak void @_ZN9__gnu_cxx26__aux_require_boolean_exprIbEEvRKT_(i8* %__t) {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !0)
-  tail call void @llvm.dbg.stoppoint(i32 240, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !0)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_19_ConvertibleConceptIjjEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !8)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !8)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !11)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !11)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !12)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !12)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !13)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !13)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !14)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !14)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !15)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !15)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !16)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !16)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIiEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !17)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !17)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIlEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !18)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !18)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIxEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !19)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !19)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIjEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !20)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !20)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIcSt11char_traitsIcEEcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !21)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !21)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIwSt11char_traitsIwEEwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !22)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !22)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !23)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !23)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !24)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !24)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKcSsEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !25)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !25)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPcSsEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !26)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !26)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKwSbIwSt11char_traitsIwESaIwEEEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !27)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !27)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPwSbIwSt11char_traitsIwESaIwEEEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !28)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !28)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !29)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !29)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !30)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !30)
-  ret void
-}
-
-declare void @llvm.dbg.func.start(metadata) nounwind readnone
-
-declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
-
-declare void @llvm.dbg.region.end(metadata) nounwind readnone
-
-declare extern_weak i32 @pthread_once(i32*, void ()*)
-
-declare extern_weak i8* @pthread_getspecific(i32)
-
-declare extern_weak i32 @pthread_setspecific(i32, i8*)
-
-declare extern_weak i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
-
-declare extern_weak i32 @pthread_cancel(i64)
-
-declare extern_weak i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
-
-declare extern_weak i32 @pthread_key_create(i32*, void (i8*)*)
-
-declare extern_weak i32 @pthread_key_delete(i32)
-
-declare extern_weak i32 @pthread_mutexattr_init(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
-
-declare extern_weak i32 @pthread_mutexattr_settype(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)
-
-declare extern_weak i32 @pthread_mutexattr_destroy(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
-
-!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__aux_require_boolean_expr<bool>", metadata !"__aux_require_boolean_expr<bool>", metadata !"_ZN9__gnu_cxx26__aux_require_boolean_exprIbEEvRKT_", metadata !2, i32 239, metadata !3, i1 false, i1 true}
-!1 = metadata !{i32 458769, i32 0, i32 4, metadata !"concept-inst.cc", metadata !"/home/buildbot/buildslave/llvm-x86_64-linux-selfhost/llvm-gcc.obj/x86_64-unknown-linux-gnu/libstdc++-v3/src/../../../../llvm-gcc.src/libstdc++-v3/src", metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build)", i1 true, i1 true, metadata !"", i32 0}
-!2 = metadata !{i32 458769, i32 0, i32 4, metadata !"boost_concept_check.h", metadata !"/home/buildbot/buildslave/llvm-x86_64-linux-selfhost/llvm-gcc.obj/x86_64-unknown-linux-gnu/libstdc++-v3/include/bits", metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build)", i1 false, i1 true, metadata !"", i32 0}
-!3 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 458768, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !6}
-!6 = metadata !{i32 458790, metadata !1, metadata !"", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !7}
-!7 = metadata !{i32 458788, metadata !1, metadata !"bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2}
-!8 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_ConvertibleConcept<unsigned int, unsigned int> >", metadata !"__function_requires<__gnu_cxx::_ConvertibleConcept<unsigned int, unsigned int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_19_ConvertibleConceptIjjEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!9 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0}
-!10 = metadata !{null}
-!11 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!12 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const char*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!13 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!14 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!16 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!17 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIiEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!18 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIlEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!19 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long long int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long long int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIxEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!20 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<unsigned int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<unsigned int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIjEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!21 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<char, std::char_traits<char> >, char> >", metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<char, std::char_traits<char> >, char> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIcSt11char_traitsIcEEcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!22 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<wchar_t, std::char_traits<wchar_t> >, wchar_t> >", metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<wchar_t, std::char_traits<wchar_t> >, wchar_t> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIwSt11char_traitsIwEEwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!23 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!24 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const char*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!25 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKcSsEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!26 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPcSsEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!27 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKwSbIwSt11char_traitsIwESaIwEEEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!28 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPwSbIwSt11char_traitsIwESaIwEEEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!29 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!30 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
diff --git a/test/Transforms/GlobalDCE/dg.exp b/test/Transforms/GlobalDCE/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/GlobalDCE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GlobalDCE/lit.local.cfg b/test/Transforms/GlobalDCE/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/GlobalDCE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
index a6803ab..588d5c9 100644
--- a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
+++ b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
@@ -3,7 +3,7 @@
 
 define double @foo() nounwind  {
 entry:
-	%tmp1 = volatile load double* @t0.1441, align 8		; <double> [#uses=2]
+	%tmp1 = load volatile double* @t0.1441, align 8		; <double> [#uses=2]
 	%tmp4 = fmul double %tmp1, %tmp1		; <double> [#uses=1]
 	ret double %tmp4
 }
diff --git a/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll b/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
deleted file mode 100644
index 0e70c49..0000000
--- a/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt < %s -globalopt -S | grep { nest } | count 1
-	%struct.FRAME.nest = type { i32, i32 (i32)* }
-	%struct.__builtin_trampoline = type { [10 x i8] }
-@.str = internal constant [7 x i8] c"%d %d\0A\00"		; <[7 x i8]*> [#uses=1]
-
-define i32 @process(i32 (i32)* %func) nounwind  {
-entry:
-	%tmp2 = tail call i32 %func( i32 1 ) nounwind 		; <i32> [#uses=1]
-	ret i32 %tmp2
-}
-
-define internal fastcc i32 @g.1478(%struct.FRAME.nest* nest  %CHAIN.1, i32 %m) nounwind  {
-entry:
-	%tmp3 = getelementptr %struct.FRAME.nest* %CHAIN.1, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
-	%tmp7 = icmp eq i32 %tmp4, %m		; <i1> [#uses=1]
-	%tmp78 = zext i1 %tmp7 to i32		; <i32> [#uses=1]
-	ret i32 %tmp78
-}
-
-define internal i32 @f.1481(%struct.FRAME.nest* nest  %CHAIN.2, i32 %m) nounwind  {
-entry:
-	%tmp4 = tail call fastcc i32 @g.1478( %struct.FRAME.nest* nest  %CHAIN.2, i32 %m ) nounwind 		; <i32> [#uses=1]
-	%tmp6 = getelementptr %struct.FRAME.nest* %CHAIN.2, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp7 = load i32* %tmp6, align 4		; <i32> [#uses=1]
-	%tmp9 = icmp eq i32 %tmp4, %tmp7		; <i1> [#uses=1]
-	%tmp910 = zext i1 %tmp9 to i32		; <i32> [#uses=1]
-	ret i32 %tmp910
-}
-
-define i32 @nest(i32 %n) nounwind  {
-entry:
-	%TRAMP.316 = alloca [10 x i8]		; <[10 x i8]*> [#uses=1]
-	%FRAME.0 = alloca %struct.FRAME.nest		; <%struct.FRAME.nest*> [#uses=3]
-	%TRAMP.316.sub = getelementptr [10 x i8]* %TRAMP.316, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 %n, i32* %tmp3, align 8
-	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.316.sub, i8* bitcast (i32 (%struct.FRAME.nest*, i32)* @f.1481 to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32)**> [#uses=1]
-	%tmp89 = bitcast i8* %tramp to i32 (i32)*		; <i32 (i32)*> [#uses=2]
-	store i32 (i32)* %tmp89, i32 (i32)** %tmp7, align 4
-	%tmp13 = call i32 @process( i32 (i32)* %tmp89 ) nounwind 		; <i32> [#uses=1]
-	ret i32 %tmp13
-}
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
-
-define i32 @main() nounwind  {
-entry:
-	%tmp = tail call i32 @nest( i32 2 ) nounwind 		; <i32> [#uses=1]
-	%tmp1 = tail call i32 @nest( i32 1 ) nounwind 		; <i32> [#uses=1]
-	%tmp3 = tail call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([7 x i8]* @.str, i32 0, i32 0), i32 %tmp1, i32 %tmp ) nounwind 		; <i32> [#uses=0]
-	ret i32 undef
-}
-
-declare i32 @printf(i8*, ...) nounwind 
diff --git a/test/Transforms/GlobalOpt/atomic.ll b/test/Transforms/GlobalOpt/atomic.ll
new file mode 100644
index 0000000..4c3f439
--- /dev/null
+++ b/test/Transforms/GlobalOpt/atomic.ll
@@ -0,0 +1,10 @@
+; RUN: opt -globalopt < %s -S -o - | FileCheck %s
+
+@GV1 = internal global i64 1
+; CHECK: @GV1 = internal unnamed_addr constant i64 1
+
+define void @test1() {
+entry:
+  %0 = load atomic i8* bitcast (i64* @GV1 to i8*) acquire, align 8
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/constantfold-initializers.ll b/test/Transforms/GlobalOpt/constantfold-initializers.ll
index 834bd00..af8fa32 100644
--- a/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -6,3 +6,46 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK: @A = global i1 false
 @A = global i1 icmp ne (i64 sub nsw (i64 ptrtoint (i8* getelementptr inbounds ([3 x i8]* @.str91250, i64 0, i64 1) to i64), i64 ptrtoint ([3 x i8]* @.str91250 to i64)), i64 1)
+
+; PR11352
+
+@xs = global [2 x i32] zeroinitializer, align 4
+; CHECK: @xs = global [2 x i32] [i32 1, i32 1]
+
+define internal void @test1() {
+entry:
+  store i32 1, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 0)
+  %0 = load i32* getelementptr inbounds ([2 x i32]* @xs, i32 0, i64 0), align 4
+  store i32 %0, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 1)
+  ret void
+}
+
+; PR12060
+
+%closure = type { i32 }
+
+@f = internal global %closure zeroinitializer, align 4
+@m = global i32 0, align 4
+; CHECK-NOT: @f
+; CHECK: @m = global i32 13
+
+define internal i32 @test2_helper(%closure* %this, i32 %b) {
+entry:
+  %0 = getelementptr inbounds %closure* %this, i32 0, i32 0
+  %1 = load i32* %0, align 4
+  %add = add nsw i32 %1, %b
+  ret i32 %add
+}
+
+define internal void @test2() {
+entry:
+  store i32 4, i32* getelementptr inbounds (%closure* @f, i32 0, i32 0)
+  %call = call i32 @test2_helper(%closure* @f, i32 9)
+  store i32 %call, i32* @m, align 4
+  ret void
+}
+
+@llvm.global_ctors = appending constant
+  [2 x { i32, void ()* }]
+  [{ i32, void ()* } { i32 65535, void ()* @test1 },
+   { i32, void ()* } { i32 65535, void ()* @test2 }]
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
index 204f979..e3bc473 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
@@ -4,20 +4,31 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 %0 = type { i32, void ()* }
 %struct.foo = type { i32* }
+%struct.bar = type { i128 }
 
 @G = global i32 0, align 4
 @H = global i32 0, align 4
 @X = global %struct.foo zeroinitializer, align 8
-@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @init }]
+@X2 = global %struct.bar zeroinitializer, align 8
+@llvm.global_ctors = appending global [2 x %0] [%0 { i32 65535, void ()* @init1 }, %0 { i32 65535, void ()* @init2 }]
 
 ; PR8710 - GlobalOpt shouldn't change the global's initializer to have this
 ; arbitrary constant expression, the code generator can't handle it.
-define internal void @init() {
+define internal void @init1() {
 entry:
   %tmp = getelementptr inbounds %struct.foo* @X, i32 0, i32 0
   store i32* inttoptr (i64 sdiv (i64 ptrtoint (i32* @G to i64), i64 ptrtoint (i32* @H to i64)) to i32*), i32** %tmp, align 8
   ret void
 }
-
-; CHECK: @init
+; CHECK: @init1
 ; CHECK: store i32*
+
+; PR11705 - ptrtoint isn't safe in general in global initializers.
+define internal void @init2() {
+entry:
+  %tmp = getelementptr inbounds %struct.bar* @X2, i32 0, i32 0
+  store i128 ptrtoint (i32* @G to i128), i128* %tmp, align 16
+  ret void
+}
+; CHECK: @init2
+; CHECK: store i128
diff --git a/test/Transforms/GlobalOpt/cxx-dtor.ll b/test/Transforms/GlobalOpt/cxx-dtor.ll
index 2263562..7c6ae78 100644
--- a/test/Transforms/GlobalOpt/cxx-dtor.ll
+++ b/test/Transforms/GlobalOpt/cxx-dtor.ll
@@ -2,6 +2,7 @@
 
 %0 = type { i32, void ()* }
 %struct.A = type { i8 }
+%struct.B = type { }
 
 @a = global %struct.A zeroinitializer, align 1
 @__dso_handle = external global i8*
@@ -15,13 +16,14 @@ define internal void @__cxx_global_var_init() nounwind section "__TEXT,__StaticI
 }
 
 define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind align 2 {
-  call void @_ZN1AD2Ev(%struct.A* %this)
+  %t = bitcast %struct.A* %this to %struct.B*
+  call void @_ZN1BD1Ev(%struct.B* %t)
   ret void
 }
 
 declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 
-define linkonce_odr void @_ZN1AD2Ev(%struct.A* %this) nounwind align 2 {
+define linkonce_odr void @_ZN1BD1Ev(%struct.B* %this) nounwind align 2 {
   ret void
 }
 
diff --git a/test/Transforms/GlobalOpt/deadfunction.ll b/test/Transforms/GlobalOpt/deadfunction.ll
new file mode 100644
index 0000000..5e003c6
--- /dev/null
+++ b/test/Transforms/GlobalOpt/deadfunction.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+; CHECK-NOT: test
+
+declare void @aa()
+declare void @bb()
+
+; Test that we can erase a function which has a blockaddress referring to it
+@test.x = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@test, %a), i8* blockaddress(@test, %b), i8* blockaddress(@test, %c)], align 16
+define internal void @test(i32 %n) nounwind noinline {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds [3 x i8*]* @test.x, i64 0, i64 %idxprom
+  %0 = load i8** %arrayidx, align 8
+  indirectbr i8* %0, [label %a, label %b, label %c]
+
+a:
+  tail call void @aa() nounwind
+  br label %b
+
+b:
+  tail call void @bb() nounwind
+  br label %c
+
+c:
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/dg.exp b/test/Transforms/GlobalOpt/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/GlobalOpt/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GlobalOpt/invariant.ll b/test/Transforms/GlobalOpt/invariant.ll
new file mode 100644
index 0000000..6b99193
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invariant.ll
@@ -0,0 +1,59 @@
+; RUN: opt -globalopt -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr)
+
+define void @test1(i8* %ptr) {
+  call {}* @llvm.invariant.start(i64 4, i8* %ptr)
+  ret void
+}
+
+@object1 = global i32 0
+; CHECK: @object1 = constant i32 -1
+define void @ctor1() {
+  store i32 -1, i32* @object1
+  %A = bitcast i32* @object1 to i8*
+  call void @test1(i8* %A)
+  ret void
+}
+
+
+@object2 = global i32 0
+; CHECK: @object2 = global i32 0
+define void @ctor2() {
+  store i32 -1, i32* @object2
+  %A = bitcast i32* @object2 to i8*
+  %B = call {}* @llvm.invariant.start(i64 4, i8* %A)
+  %C = bitcast {}* %B to i8*
+  ret void
+}
+
+
+@object3 = global i32 0
+; CHECK: @object3 = global i32 -1
+define void @ctor3() {
+  store i32 -1, i32* @object3
+  %A = bitcast i32* @object3 to i8*
+  call {}* @llvm.invariant.start(i64 3, i8* %A)
+  ret void
+}
+
+
+@object4 = global i32 0
+; CHECK: @object4 = global i32 -1
+define void @ctor4() {
+  store i32 -1, i32* @object4
+  %A = bitcast i32* @object4 to i8*
+  call {}* @llvm.invariant.start(i64 -1, i8* %A)
+  ret void
+}
+
+
+@llvm.global_ctors = appending constant
+  [4 x { i32, void ()* }]
+  [ { i32, void ()* } { i32 65535, void ()* @ctor1 },
+    { i32, void ()* } { i32 65535, void ()* @ctor2 },
+    { i32, void ()* } { i32 65535, void ()* @ctor3 },
+    { i32, void ()* } { i32 65535, void ()* @ctor4 } ]
diff --git a/test/Transforms/GlobalOpt/invoke.ll b/test/Transforms/GlobalOpt/invoke.ll
new file mode 100644
index 0000000..c1f499c
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invoke.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+; rdar://11022897
+
+; Globalopt should be able to evaluate an invoke.
+; CHECK: @tmp = global i32 1
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@tmp = global i32 0
+
+define i32 @one() {
+  ret i32 1
+}
+
+define void @_GLOBAL__I_a() {
+bb:
+  %tmp1 = invoke i32 @one()
+          to label %bb2 unwind label %bb4
+
+bb2:                                              ; preds = %bb
+  store i32 %tmp1, i32* @tmp
+  ret void
+
+bb4:                                              ; preds = %bb
+  %tmp5 = landingpad { i8*, i32 } personality i8* undef
+          filter [0 x i8*] zeroinitializer
+  unreachable
+}
diff --git a/test/Transforms/GlobalOpt/lit.local.cfg b/test/Transforms/GlobalOpt/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/GlobalOpt/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll b/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll
new file mode 100644
index 0000000..d613601
--- /dev/null
+++ b/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+
+@zero = internal global [10 x i32] zeroinitializer
+
+define i32 @test1(i64 %idx) nounwind {
+  %arrayidx = getelementptr inbounds [10 x i32]* @zero, i64 0, i64 %idx
+  %l = load i32* %arrayidx
+  ret i32 %l
+; CHECK: @test1
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/IPConstantProp/dangling-block-address.ll b/test/Transforms/IPConstantProp/dangling-block-address.ll
index 0489dfa..bb101333 100644
--- a/test/Transforms/IPConstantProp/dangling-block-address.ll
+++ b/test/Transforms/IPConstantProp/dangling-block-address.ll
@@ -12,7 +12,7 @@
 define void @foo(i32 %x) nounwind readnone {
 entry:
   %b = alloca i32, align 4                        ; <i32*> [#uses=1]
-  volatile store i32 -1, i32* %b
+  store volatile i32 -1, i32* %b
   ret void
 }
 
diff --git a/test/Transforms/IPConstantProp/dg.exp b/test/Transforms/IPConstantProp/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/IPConstantProp/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/IPConstantProp/lit.local.cfg b/test/Transforms/IPConstantProp/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/IPConstantProp/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
index 77354f7..af9f1b3 100644
--- a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
+++ b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s
 ; Test WidenIV::GetExtendedOperandRecurrence.
 ; add219 should be extended to i64 because it is nsw, even though its
 ; sext cannot be hoisted outside the loop.
@@ -19,7 +19,7 @@ for.body153:                                      ; preds = %for.body153, %for.b
 
 ; CHECK: add nsw i64 %indvars.iv, 1
 for.body170:                                      ; preds = %for.body170, %for.body153
-  %i2.19 = phi i32 [ %add249, %for.body170 ], [ undef, %for.body153 ]
+  %i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
   %add219 = add nsw i32 %i2.19, 1
   %idxprom220 = sext i32 %add219 to i64
   %add249 = add nsw i32 %i2.19, %shl132
diff --git a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
new file mode 100644
index 0000000..76c90e0
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+; rdar://10359193: assert "IndVar type must match IVInit type"
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-darwin"
+
+; CHECK: @test
+; CHECK: if.end.i126:
+; CHECK: %exitcond = icmp ne i8* %incdec.ptr.i, getelementptr (i8* null, i32 undef)
+define void @test() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:
+  br i1 undef, label %while.end, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br i1 undef, label %if.then165, label %while.cond
+
+if.then165:                                       ; preds = %while.body
+  br i1 undef, label %while.cond, label %for.body.lr.ph.i81
+
+for.body.lr.ph.i81:                               ; preds = %if.then165
+  br label %for.body.i86
+
+for.body.i86:                                     ; preds = %for.end.i129, %for.body.lr.ph.i81
+  %cmp196.i = icmp ult i32 0, undef
+  br i1 %cmp196.i, label %for.body21.lr.ph.i, label %for.end.i129
+
+for.body21.lr.ph.i:                               ; preds = %for.body.i86
+  br label %for.body21.i
+
+for.body21.i:
+  %destYPixelPtr.010.i = phi i8* [ null, %for.body21.lr.ph.i ], [ %incdec.ptr.i, %if.end.i126 ]
+  %x.09.i = phi i32 [ 0, %for.body21.lr.ph.i ], [ %inc.i125, %if.end.i126 ]
+  br i1 undef, label %if.end.i126, label %if.else.i124
+
+if.else.i124:                                     ; preds = %for.body21.i
+  store i8 undef, i8* %destYPixelPtr.010.i, align 1
+  br label %if.end.i126
+
+if.end.i126:                                      ; preds = %if.else.i124, %for.body21.i
+  %incdec.ptr.i = getelementptr inbounds i8* %destYPixelPtr.010.i, i32 1
+  %inc.i125 = add i32 %x.09.i, 1
+  %cmp19.i = icmp ult i32 %inc.i125, undef
+  br i1 %cmp19.i, label %for.body21.i, label %for.end.i129
+
+for.end.i129:                                     ; preds = %if.end.i126, %for.body.i86
+  br i1 undef, label %for.body.i86, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  br label %bail
+
+bail:                                             ; preds = %while.end, %lor.lhs.false44, %lor.lhs.false41, %if.end29, %if.end
+  unreachable
+
+return:                                           ; preds = %lor.lhs.false20, %lor.lhs.false12, %lor.lhs.false, %entry
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
new file mode 100644
index 0000000..c0c508f
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@@ -0,0 +1,140 @@
+; RUN: opt < %s -indvars -S "-default-data-layout=e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" | FileCheck %s
+; RUN: opt < %s -indvars -S "-default-data-layout=e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" | FileCheck %s
+;
+; PR11279: Assertion !IVLimit->getType()->isPointerTy()
+;
+; Test LinearFunctionTestReplace of a pointer-type loop counter. Note
+; that BECount may or may not be a pointer type. A pointer type
+; BECount doesn't really make sense, but that's what falls out of
+; SCEV. Since it's an i8*, it has unit stride so we never adjust the
+; SCEV expression in a way that would convert it to an integer type.
+
+; CHECK: @testnullptrptr
+; CHECK: loop:
+; CHECK: icmp ne
+define i8 @testnullptrptr(i8* %buf, i8* %end) nounwind {
+  br label %loopguard
+
+loopguard:
+  %guard = icmp ult i8* null, %end
+  br i1 %guard, label %preheader, label %exit
+
+preheader:
+  br label %loop
+
+loop:
+  %p.01.us.us = phi i8* [ null, %preheader ], [ %gep, %loop ]
+  %s = phi i8 [0, %preheader], [%snext, %loop]
+  %gep = getelementptr inbounds i8* %p.01.us.us, i64 1
+  %snext = load i8* %gep
+  %cmp = icmp ult i8* %gep, %end
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i8 %snext
+}
+
+; CHECK: @testptrptr
+; CHECK: loop:
+; CHECK: icmp ne
+define i8 @testptrptr(i8* %buf, i8* %end) nounwind {
+  br label %loopguard
+
+loopguard:
+  %guard = icmp ult i8* %buf, %end
+  br i1 %guard, label %preheader, label %exit
+
+preheader:
+  br label %loop
+
+loop:
+  %p.01.us.us = phi i8* [ %buf, %preheader ], [ %gep, %loop ]
+  %s = phi i8 [0, %preheader], [%snext, %loop]
+  %gep = getelementptr inbounds i8* %p.01.us.us, i64 1
+  %snext = load i8* %gep
+  %cmp = icmp ult i8* %gep, %end
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i8 %snext
+}
+
+; CHECK: @testnullptrint
+; CHECK: loop:
+; CHECK: icmp ne
+define i8 @testnullptrint(i8* %buf, i8* %end) nounwind {
+  br label %loopguard
+
+loopguard:
+  %bi = ptrtoint i8* %buf to i32
+  %ei = ptrtoint i8* %end to i32
+  %cnt = sub i32 %ei, %bi
+  %guard = icmp ult i32 0, %cnt
+  br i1 %guard, label %preheader, label %exit
+
+preheader:
+  br label %loop
+
+loop:
+  %p.01.us.us = phi i8* [ null, %preheader ], [ %gep, %loop ]
+  %iv = phi i32 [ 0, %preheader ], [ %ivnext, %loop ]
+  %s = phi i8 [0, %preheader], [%snext, %loop]
+  %gep = getelementptr inbounds i8* %p.01.us.us, i64 1
+  %snext = load i8* %gep
+  %ivnext = add i32 %iv, 1
+  %cmp = icmp ult i32 %ivnext, %cnt
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i8 %snext
+}
+
+; CHECK: @testptrint
+; CHECK: loop:
+; CHECK: icmp ne
+define i8 @testptrint(i8* %buf, i8* %end) nounwind {
+  br label %loopguard
+
+loopguard:
+  %bi = ptrtoint i8* %buf to i32
+  %ei = ptrtoint i8* %end to i32
+  %cnt = sub i32 %ei, %bi
+  %guard = icmp ult i32 %bi, %cnt
+  br i1 %guard, label %preheader, label %exit
+
+preheader:
+  br label %loop
+
+loop:
+  %p.01.us.us = phi i8* [ %buf, %preheader ], [ %gep, %loop ]
+  %iv = phi i32 [ %bi, %preheader ], [ %ivnext, %loop ]
+  %s = phi i8 [0, %preheader], [%snext, %loop]
+  %gep = getelementptr inbounds i8* %p.01.us.us, i64 1
+  %snext = load i8* %gep
+  %ivnext = add i32 %iv, 1
+  %cmp = icmp ult i32 %ivnext, %cnt
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i8 %snext
+}
+
+; IV and BECount have two different pointer types here.
+define void @testnullptr([512 x i8]* %base) nounwind {
+entry:
+  %add.ptr1603 = getelementptr [512 x i8]* %base, i64 0, i64 512
+  br label %preheader
+
+preheader:
+  %cmp1604192 = icmp ult i8* undef, %add.ptr1603
+  br i1 %cmp1604192, label %for.body, label %for.end1609
+
+for.body:
+  %r.17193 = phi i8* [ %incdec.ptr1608, %for.body ], [ null, %preheader ]
+  %incdec.ptr1608 = getelementptr i8* %r.17193, i64 1
+  %cmp1604 = icmp ult i8* %incdec.ptr1608, %add.ptr1603
+  br i1 %cmp1604, label %for.body, label %for.end1609
+
+for.end1609:
+  unreachable
+}
diff --git a/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll b/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
new file mode 100644
index 0000000..c74d04e
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+;
+; Prior to the fix for PR11375, indvars would replace %firstIV with a
+; loop-invariant gep computed in the preheader. This was incorrect
+; because it was based on the minimum "ExitNotTaken" count. If the
+; final loop test is skipped (odd number of elements) then the early
+; exit would be taken and the loop invariant value would be incorrect.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; CHECK: if.end:
+; CHECK: phi i32* [ %first.lcssa, %early.exit ]
+define i32 @test(i32* %first, i32* %last) uwtable ssp {
+entry:
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %if.end, label %do.body
+
+do.body:                                          ; preds = %if.else, %if.then
+  %firstIV = phi i32* [ %incdec.ptr2, %if.else ], [ %first, %if.then ]
+  %incdec.ptr1 = getelementptr inbounds i32* %firstIV, i64 1
+  %cmp1 = icmp eq i32* %incdec.ptr1, %last
+  br i1 %cmp1, label %early.exit, label %if.else
+
+if.else:                                        ; preds = %do.body
+  %incdec.ptr2 = getelementptr inbounds i32* %firstIV, i64 2
+  %cmp2 = icmp eq i32* %incdec.ptr2, %last
+  br i1 %cmp2, label %if.end, label %do.body
+
+early.exit:
+  %first.lcssa = phi i32* [ %firstIV, %do.body ]
+  br label %if.end
+
+if.end:
+  %tmp = phi i32* [ %first.lcssa, %early.exit ], [ %first, %if.then ], [ %first, %entry ], [ undef, %if.else ]
+  %val = load i32* %tmp
+  ret i32 %val
+}
diff --git a/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll b/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll
new file mode 100644
index 0000000..ccf2595
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+; PR11350: Check that SimplifyIndvar handles a cycle of useless self-phis.
+
+; CHECK: @test
+; CHECK-NOT: lcssa = phi
+define void @test() nounwind {
+entry:
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry
+  br label %for.cond.outer
+
+for.cond.outer:                                   ; preds = %for.cond.preheader, %for.end
+  %p_41.addr.0.ph = phi i32 [ %p_41.addr.1.lcssa, %for.end ], [ 1, %for.cond.preheader ]
+  br label %for.cond
+
+for.cond:
+  br i1 true, label %for.end, label %for.ph
+
+for.ph:                                   ; preds = %for.cond4.preheader
+  br label %for.end
+
+for.end:
+  %p_41.addr.1.lcssa = phi i32 [ undef, %for.ph ], [ %p_41.addr.0.ph, %for.cond ]
+  %p_68.lobit.i = lshr i32 %p_41.addr.1.lcssa, 31
+  %cmp7 = icmp eq i32 %p_41.addr.1.lcssa, 0
+  %conv8 = zext i1 %cmp7 to i32
+  br label %for.cond.outer
+}
diff --git a/test/Transforms/IndVarSimplify/ada-loops.ll b/test/Transforms/IndVarSimplify/ada-loops.ll
index 154de6f..c093298 100644
--- a/test/Transforms/IndVarSimplify/ada-loops.ll
+++ b/test/Transforms/IndVarSimplify/ada-loops.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
 ;
 ; PR1301
 
diff --git a/test/Transforms/IndVarSimplify/addrec-gep.ll b/test/Transforms/IndVarSimplify/addrec-gep.ll
deleted file mode 100644
index b62d093..0000000
--- a/test/Transforms/IndVarSimplify/addrec-gep.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK: getelementptr
-; CHECK: mul {{.*}}, 37
-; CHECK: add {{.*}}, 5203
-; CHECK-NOT: cast
-
-; This test tests several things. The load and store should use the
-; same address instead of having it computed twice, and SCEVExpander should
-; be able to reconstruct the full getelementptr, despite it having a few
-; obstacles set in its way.
-
-target datalayout = "e-p:64:64:64-n32:64"
-
-define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
-entry:
-	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
-	br i1 %tmp, label %bb.nph3, label %return
-
-bb.nph:		; preds = %bb2.preheader
-	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
-	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
-	br label %bb1
-
-bb1:		; preds = %bb2, %bb.nph
-	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
-	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
-	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
-        %z0 = add i64 %tmp3, 5203
-	%tmp5 = getelementptr double* %p, i64 %z0		; <double*> [#uses=1]
-	%tmp6 = load double* %tmp5, align 8		; <double> [#uses=1]
-	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
-        %z1 = add i64 %tmp4, 5203
-	%tmp8 = getelementptr double* %p, i64 %z1		; <double*> [#uses=1]
-	store double %tmp7, double* %tmp8, align 8
-	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
-	br label %bb2
-
-bb2:		; preds = %bb1
-	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
-	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
-
-bb2.bb3_crit_edge:		; preds = %bb2
-	br label %bb3
-
-bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
-	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
-	br label %bb4
-
-bb4:		; preds = %bb3
-	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
-	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
-
-bb4.return_crit_edge:		; preds = %bb4
-	br label %bb4.return_crit_edge.split
-
-bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
-	br label %return
-
-bb.nph3:		; preds = %entry
-	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
-	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
-	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
-	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
-	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
-	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
-	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
-	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
-
-bb.nph3.split:		; preds = %bb.nph3
-	br label %bb2.preheader
-
-bb2.preheader:		; preds = %bb.nph3.split, %bb4
-	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
-	br i1 true, label %bb.nph, label %bb3
-
-return:		; preds = %bb4.return_crit_edge.split, %entry
-	ret void
-}
diff --git a/test/Transforms/IndVarSimplify/avoid-i0.ll b/test/Transforms/IndVarSimplify/avoid-i0.ll
index 59661fa..22f2e4b 100644
--- a/test/Transforms/IndVarSimplify/avoid-i0.ll
+++ b/test/Transforms/IndVarSimplify/avoid-i0.ll
@@ -90,7 +90,7 @@ entry:
 	br label %bb4
 
 bb:		; preds = %bb4
-	%0 = volatile load i32* @x, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32* @x, align 4		; <i32> [#uses=1]
 	store i32 %0, i32* %vol.0, align 4
 	store i32 0, i32* %l_52, align 4
 	br label %bb2
diff --git a/test/Transforms/IndVarSimplify/complex-scev.ll b/test/Transforms/IndVarSimplify/complex-scev.ll
deleted file mode 100644
index 395377e..0000000
--- a/test/Transforms/IndVarSimplify/complex-scev.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; The i induction variable looks like a wrap-around, but it really is just
-; a simple affine IV.  Make sure that indvars eliminates it.
-
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK: phi
-; CHECK-NOT: phi
-
-define void @foo() {
-entry:
-        br label %bb6
-
-bb6:            ; preds = %cond_true, %entry
-        %j.0 = phi i32 [ 1, %entry ], [ %tmp5, %cond_true ]             ; <i32> [#uses=3]
-        %i.0 = phi i32 [ 0, %entry ], [ %j.0, %cond_true ]              ; <i32> [#uses=1]
-        %tmp7 = call i32 (...)* @foo2( )                ; <i32> [#uses=1]
-        %tmp = icmp ne i32 %tmp7, 0             ; <i1> [#uses=1]
-        br i1 %tmp, label %cond_true, label %return
-
-cond_true:              ; preds = %bb6
-        %tmp2 = call i32 (...)* @bar( i32 %i.0, i32 %j.0 )              ; <i32> [#uses=0]
-        %tmp5 = add i32 %j.0, 1         ; <i32> [#uses=1]
-        br label %bb6
-
-return:         ; preds = %bb6
-        ret void
-}
-
-declare i32 @bar(...)
-
-declare i32 @foo2(...)
-
diff --git a/test/Transforms/IndVarSimplify/dg.exp b/test/Transforms/IndVarSimplify/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/IndVarSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/IndVarSimplify/elim-extend.ll b/test/Transforms/IndVarSimplify/elim-extend.ll
index 43c162f..ad5679f 100644
--- a/test/Transforms/IndVarSimplify/elim-extend.ll
+++ b/test/Transforms/IndVarSimplify/elim-extend.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
diff --git a/test/Transforms/IndVarSimplify/gep-with-mul-base.ll b/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
deleted file mode 100644
index 7e1e2a3..0000000
--- a/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK: define void @foo
-; CHECK: mul
-; CHECK: mul
-; CHECK: mul
-; CHECK: add
-; CHECK: sub
-; CHECK: define void @bar
-; CHECK: mul
-; CHECK: mul
-; CHECK: mul
-; CHECK: add
-; CHECK: sub
-
-define void @foo(i64 %n, i64 %m, i64 %o, double* nocapture %p) nounwind {
-entry:
-	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
-	br i1 %tmp, label %bb.nph, label %return
-
-bb.nph:		; preds = %entry
-	%tmp1 = mul i64 %n, 37		; <i64> [#uses=1]
-	%tmp2 = mul i64 %tmp1, %m		; <i64> [#uses=1]
-	%tmp3 = mul i64 %tmp2, %o		; <i64> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb, %bb.nph
-	%i.01 = phi i64 [ %tmp3, %bb.nph ], [ %tmp13, %bb ]		; <i64> [#uses=3]
-	%tmp9 = getelementptr double* %p, i64 %i.01		; <double*> [#uses=1]
-	%tmp10 = load double* %tmp9, align 8		; <double> [#uses=1]
-	%tmp11 = fdiv double %tmp10, 2.100000e+00		; <double> [#uses=1]
-	store double %tmp11, double* %tmp9, align 8
-	%tmp13 = add i64 %i.01, 1		; <i64> [#uses=2]
-	%tmp14 = icmp slt i64 %tmp13, %n		; <i1> [#uses=1]
-	br i1 %tmp14, label %bb, label %return.loopexit
-
-return.loopexit:		; preds = %bb
-	br label %return
-
-return:		; preds = %return.loopexit, %entry
-	ret void
-}
-define void @bar(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
-entry:
-	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
-	br i1 %tmp, label %bb.nph, label %return
-
-bb.nph:		; preds = %entry
-	%tmp1 = mul i64 %n, %q		; <i64> [#uses=1]
-	%tmp2 = mul i64 %tmp1, %m		; <i64> [#uses=1]
-	%tmp3 = mul i64 %tmp2, %o		; <i64> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb, %bb.nph
-	%i.01 = phi i64 [ %tmp3, %bb.nph ], [ %tmp13, %bb ]		; <i64> [#uses=3]
-	%tmp9 = getelementptr double* %p, i64 %i.01		; <double*> [#uses=1]
-	%tmp10 = load double* %tmp9, align 8		; <double> [#uses=1]
-	%tmp11 = fdiv double %tmp10, 2.100000e+00		; <double> [#uses=1]
-	store double %tmp11, double* %tmp9, align 8
-	%tmp13 = add i64 %i.01, 1		; <i64> [#uses=2]
-	%tmp14 = icmp slt i64 %tmp13, %n		; <i1> [#uses=1]
-	br i1 %tmp14, label %bb, label %return.loopexit
-
-return.loopexit:		; preds = %bb
-	br label %return
-
-return:		; preds = %return.loopexit, %entry
-	ret void
-}
diff --git a/test/Transforms/IndVarSimplify/iv-fold.ll b/test/Transforms/IndVarSimplify/iv-fold.ll
index 2e19118..e0b05cd 100644
--- a/test/Transforms/IndVarSimplify/iv-fold.ll
+++ b/test/Transforms/IndVarSimplify/iv-fold.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
 
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 646e6c0..2e0f70c 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
 ; CHECK-NOT: and
 ; CHECK-NOT: zext
 
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
index 490eee9..9abfe13 100644
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s
 ;
 ; Make sure that indvars can perform LFTR without a canonical IV.
 
diff --git a/test/Transforms/IndVarSimplify/lit.local.cfg b/test/Transforms/IndVarSimplify/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
index 269478a..c3619f6 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
@@ -1,8 +1,14 @@
 ; RUN: opt < %s -indvars -S \
 ; RUN:   | grep {%b.1 = phi i32 \\\[ 2, %bb \\\], \\\[ 1, %bb2 \\\]}
-
+;
 ; This loop has multiple exits, and the value of %b1 depends on which
 ; exit is taken. Indvars should correctly compute the exit values.
+;
+; XFAIL: *
+; Indvars does not currently replace loop invariant values unless all
+; loop exits have the same exit value. We could handle some cases,
+; such as this, by making getSCEVAtScope() sensitive to a particular
+; loop exit.  See PR11388.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-pc-linux-gnu"
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate9.ll b/test/Transforms/IndVarSimplify/loop_evaluate9.ll
index 8184a73..9f3bcaf 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate9.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate9.ll
@@ -2,8 +2,13 @@
 ; RUN: grep {\[%\]tmp7 = icmp eq i8 -28, -28} %t
 ; RUN: grep {\[%\]tmp8 = icmp eq i8 63, 63} %t
 ; PR4477
-
 ; Indvars should compute the exit values in loop.
+;
+; XFAIL: *
+; Indvars does not currently replace loop invariant values unless all
+; loop exits have the same exit value. We could handle some cases,
+; such as this, by making getSCEVAtScope() sensitive to a particular
+; loop exit.  See PR11388.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 9c2abd0..bfdd000 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s
 ;
 ; Make sure that indvars isn't inserting canonical IVs.
 ; This is kinda hard to do until linear function test replacement is removed.
@@ -333,9 +333,9 @@ entry:
 
 ; CHECK: loop:
 ; CHECK: phi %structIF*
-; CHECK: phi i32*
-; CHECK: getelementptr inbounds
+; CHECK-NOT: phi
 ; CHECK: getelementptr inbounds
+; CHECK-NOT: getelementptr
 ; CHECK: exit:
 loop:
   %ptr.iv = phi %structIF* [ %ptr.inc, %latch ], [ %base, %entry ]
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll b/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
deleted file mode 100644
index 251d34e..0000000
--- a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK-NOT: {{inttoptr|ptrtoint}}
-; CHECK: scevgep
-; CHECK-NOT: {{inttoptr|ptrtoint}}
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n32:64"
-
-; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.
-
-define void @foo(i8* %p) nounwind {
-entry:
-  br i1 true, label %bb.nph, label %for.end
-
-for.cond:
-  %phitmp = icmp slt i64 %inc, 20
-  br i1 %phitmp, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge:
-  br label %for.end
-
-bb.nph:
-  br label %for.body
-
-for.body:
-  %storemerge1 = phi i64 [ %inc, %for.cond ], [ 0, %bb.nph ]
-  %call = tail call i64 @bar() nounwind
-  %call2 = tail call i64 @car() nounwind
-  %conv = trunc i64 %call2 to i8
-  %conv3 = sext i8 %conv to i64
-  %add = add nsw i64 %call, %storemerge1
-  %add4 = add nsw i64 %add, %conv3
-  %arrayidx = getelementptr inbounds i8* %p, i64 %add4
-  store i8 0, i8* %arrayidx
-  %inc = add nsw i64 %storemerge1, 1
-  br label %for.cond
-
-for.end:
-  ret void
-}
-
-declare i64 @bar()
-
-declare i64 @car()
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-nested.ll b/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
deleted file mode 100644
index cdcaaa0..0000000
--- a/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
+++ /dev/null
@@ -1,76 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; No explicit integer multiplications!
-; No i8* arithmetic or pointer casting anywhere!
-; CHECK-NOT: = {{= mul|i8\*|bitcast|inttoptr|ptrtoint}}
-; Exactly one getelementptr for each load+store.
-; Each getelementptr using %struct.Q* %s as a base and not i8*.
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK-NOT: = {{= mul|i8\*|bitcast|inttoptr|ptrtoint}}
-
-; FIXME: This test should pass with or without TargetData. Until opt
-; supports running tests without targetdata, just hardware this in.
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
-
-%struct.Q = type { [10 x %struct.N] }
-%struct.N = type { %struct.S }
-%struct.S = type { [100 x double], [100 x double] }
-
-define void @foo(%struct.Q* %s, i64 %n) nounwind {
-entry:
-  br label %bb1
-
-bb1:
-  %i = phi i64 [ 2, %entry ], [ %i.next, %bb ]
-  %j = phi i64 [ 0, %entry ], [ %j.next, %bb ]
-  %t5 = icmp slt i64 %i, %n
-  br i1 %t5, label %bb, label %return
-
-bb:
-  %t0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %i
-  %t1 = load double* %t0, align 8
-  %t2 = fmul double %t1, 3.200000e+00
-  %t3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %i
-  store double %t2, double* %t3, align 8
-
-  %s0 = getelementptr inbounds %struct.Q* %s, i64 13, i32 0, i64 7, i32 0, i32 1, i64 %i
-  %s1 = load double* %s0, align 8
-  %s2 = fmul double %s1, 3.200000e+00
-  %s3 = getelementptr inbounds %struct.Q* %s, i64 13, i32 0, i64 7, i32 0, i32 1, i64 %i
-  store double %s2, double* %s3, align 8
-
-  %u0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 7, i32 0, i32 1, i64 %j
-  %u1 = load double* %u0, align 8
-  %u2 = fmul double %u1, 3.200000e+00
-  %u3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 7, i32 0, i32 1, i64 %j
-  store double %u2, double* %u3, align 8
-
-  %v0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 1, i64 %i
-  %v1 = load double* %v0, align 8
-  %v2 = fmul double %v1, 3.200000e+00
-  %v3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 1, i64 %i
-  store double %v2, double* %v3, align 8
-
-  %w0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %j
-  %w1 = load double* %w0, align 8
-  %w2 = fmul double %w1, 3.200000e+00
-  %w3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %j
-  store double %w2, double* %w3, align 8
-
-  %x0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 3, i32 0, i32 0, i64 %i
-  %x1 = load double* %x0, align 8
-  %x2 = fmul double %x1, 3.200000e+00
-  %x3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 3, i32 0, i32 0, i64 %i
-  store double %x2, double* %x3, align 8
-
-  %i.next = add i64 %i, 1
-  %j.next = add i64 %j, 1
-  br label %bb1
-
-return:
-  ret void
-}
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
deleted file mode 100644
index 2f3100f..0000000
--- a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK: %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 2, i64 %0, i64 1
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n32:64"
-
-; Indvars shouldn't expand this to
-;   %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %tmp, i64 19
-; or something. That's valid, but more obscure.
-
-define void @foo([3 x [3 x double]]* noalias %p) nounwind {
-entry:
-  br label %loop
-
-loop:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
-  %ip = add i64 %i, 1
-  %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 2, i64 %ip, i64 1
-  volatile store double 0.0, double* %p.2.ip.1
-  %i.next = add i64 %i, 1
-  br label %loop
-}
diff --git a/test/Transforms/IndVarSimplify/preserve-gep.ll b/test/Transforms/IndVarSimplify/preserve-gep.ll
deleted file mode 100644
index fec8a28..0000000
--- a/test/Transforms/IndVarSimplify/preserve-gep.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK-NOT: {{ptrtoint|inttoptr}}
-; CHECK: getelementptr
-; CHECK-NOT: {{ptrtoint|inttoptr|getelementptr}}
-
-; Indvars shouldn't leave getelementptrs expanded out as
-; inttoptr+ptrtoint in its output in common cases.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
-target triple = "x86_64-unknown-linux-gnu"
-	%struct.Foo = type { i32, i32, [10 x i32], i32 }
-
-define void @me(%struct.Foo* nocapture %Bar) nounwind {
-entry:
-	br i1 false, label %return, label %bb.nph
-
-bb.nph:		; preds = %entry
-	br label %bb
-
-bb:		; preds = %bb1, %bb.nph
-	%i.01 = phi i64 [ %4, %bb1 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
-	%0 = getelementptr %struct.Foo* %Bar, i64 %i.01, i32 2, i64 3		; <i32*> [#uses=1]
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
-	%2 = mul i32 %1, 113		; <i32> [#uses=1]
-	%3 = getelementptr %struct.Foo* %Bar, i64 %i.01, i32 2, i64 3		; <i32*> [#uses=1]
-	store i32 %2, i32* %3, align 4
-	%4 = add i64 %i.01, 1		; <i64> [#uses=2]
-	br label %bb1
-
-bb1:		; preds = %bb
-	%phitmp = icmp sgt i64 %4, 19999		; <i1> [#uses=1]
-	br i1 %phitmp, label %bb1.return_crit_edge, label %bb
-
-bb1.return_crit_edge:		; preds = %bb1
-	br label %return
-
-return:		; preds = %bb1.return_crit_edge, %entry
-	ret void
-}
diff --git a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
index 22e2092..f619e8d 100644
--- a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
+++ b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
 
 ; Indvars should insert a 64-bit induction variable to eliminate the
 ; sext for the addressing, however it shouldn't eliminate the sext
diff --git a/test/Transforms/IndVarSimplify/sink-alloca.ll b/test/Transforms/IndVarSimplify/sink-alloca.ll
index 3a6c683..64207d8 100644
--- a/test/Transforms/IndVarSimplify/sink-alloca.ll
+++ b/test/Transforms/IndVarSimplify/sink-alloca.ll
@@ -1,15 +1,10 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
-; PR4775
-
-; Indvars shouldn't sink the alloca out of the entry block, even though
-; it's not used until after the loop.
-
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
 
-@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @main to i8*)],
-section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
-
+; PR4775
+; Indvars shouldn't sink the alloca out of the entry block, even though
+; it's not used until after the loop.
 define i32 @main() nounwind {
 ; CHECK: entry:
 ; CHECK-NEXT: %result.i = alloca i32, align 4
@@ -23,9 +18,39 @@ while.cond:                                       ; preds = %while.cond, %entry
   br i1 %tobool, label %while.end, label %while.cond
 
 while.end:                                        ; preds = %while.cond
-  volatile store i32 0, i32* %result.i
-  %tmp.i = volatile load i32* %result.i           ; <i32> [#uses=0]
+  store volatile i32 0, i32* %result.i
+  %tmp.i = load volatile i32* %result.i           ; <i32> [#uses=0]
   ret i32 0
 }
-
 declare i32 @bar()
+
+; <rdar://problem/10352360>
+; Indvars shouldn't sink the first alloca between the stacksave and stackrestore
+; intrinsics.
+declare i8* @a(...)
+declare i8* @llvm.stacksave() nounwind
+declare void @llvm.stackrestore(i8*) nounwind
+define void @h(i64 %n) nounwind uwtable ssp {
+; CHECK: entry:
+; CHECK-NEXT: %vla = alloca i8*
+; CHECK-NEXT: %savedstack = call i8* @llvm.stacksave()
+entry:
+  %vla = alloca i8*, i64 %n, align 16
+  %savedstack = call i8* @llvm.stacksave() nounwind
+  %vla.i = alloca i8*, i64 %n, align 16
+  br label %for.body.i
+
+for.body.i:
+  %indvars.iv37.i = phi i64 [ %indvars.iv.next38.i, %for.body.i ], [ 0, %entry ]
+  %call.i = call i8* (...)* @a() nounwind
+  %arrayidx.i = getelementptr inbounds i8** %vla.i, i64 %indvars.iv37.i
+  store i8* %call.i, i8** %arrayidx.i, align 8
+  %indvars.iv.next38.i = add i64 %indvars.iv37.i, 1
+  %exitcond5 = icmp eq i64 %indvars.iv.next38.i, %n
+  br i1 %exitcond5, label %g.exit, label %for.body.i
+
+g.exit:
+  call void @llvm.stackrestore(i8* %savedstack) nounwind
+  %call1 = call i8* (...)* @a(i8** %vla) nounwind
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll b/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
index fc906cd..fb9ef22 100644
--- a/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
+++ b/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -indvars -instcombine -S | FileCheck %s
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -instcombine -S | FileCheck %s
 ;
 ; Test that -indvars can reduce variable stride IVs.  If it can reduce variable
 ; stride iv's, it will make %iv. and %m.0.0 isomorphic to each other without
diff --git a/test/Transforms/Inline/2007-06-06-NoInline.ll b/test/Transforms/Inline/2007-06-06-NoInline.ll
deleted file mode 100644
index d5a7953..0000000
--- a/test/Transforms/Inline/2007-06-06-NoInline.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: opt < %s -inline -S | grep "define internal i32 @bar"
-@llvm.noinline = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32)* @bar to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define internal i32 @bar(i32 %x, i32 %y) {
-entry:
-	%x_addr = alloca i32		; <i32*> [#uses=2]
-	%y_addr = alloca i32		; <i32*> [#uses=2]
-	%retval = alloca i32, align 4		; <i32*> [#uses=2]
-	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 %x, i32* %x_addr
-	store i32 %y, i32* %y_addr
-	%tmp1 = load i32* %x_addr		; <i32> [#uses=1]
-	%tmp2 = load i32* %y_addr		; <i32> [#uses=1]
-	%tmp3 = add i32 %tmp1, %tmp2		; <i32> [#uses=1]
-	store i32 %tmp3, i32* %tmp
-	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
-	store i32 %tmp4, i32* %retval
-	br label %return
-
-return:		; preds = %entry
-	%retval5 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval5
-}
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-	%a_addr = alloca i32		; <i32*> [#uses=2]
-	%b_addr = alloca i32		; <i32*> [#uses=2]
-	%retval = alloca i32, align 4		; <i32*> [#uses=2]
-	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 %a, i32* %a_addr
-	store i32 %b, i32* %b_addr
-	%tmp1 = load i32* %b_addr		; <i32> [#uses=1]
-	%tmp2 = load i32* %a_addr		; <i32> [#uses=1]
-	%tmp3 = call i32 @bar( i32 %tmp1, i32 %tmp2 )		; <i32> [#uses=1]
-	store i32 %tmp3, i32* %tmp
-	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
-	store i32 %tmp4, i32* %retval
-	br label %return
-
-return:		; preds = %entry
-	%retval5 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval5
-}
diff --git a/test/Transforms/Inline/2008-09-02-AlwaysInline.ll b/test/Transforms/Inline/2008-09-02-AlwaysInline.ll
deleted file mode 100644
index 39095c4..0000000
--- a/test/Transforms/Inline/2008-09-02-AlwaysInline.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: opt < %s  -inline-threshold=0 -inline -S | not grep call 
-
-define i32 @fn2() alwaysinline {
-  ret i32 1
-}
-
-define i32 @fn3() {
-   %r = call i32 @fn2()
-   ret i32 %r
-}
diff --git a/test/Transforms/Inline/2008-10-30-AlwaysInline.ll b/test/Transforms/Inline/2008-10-30-AlwaysInline.ll
deleted file mode 100644
index 11e5012..0000000
--- a/test/Transforms/Inline/2008-10-30-AlwaysInline.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -always-inline -S | not grep call 
-
-; Ensure that threshold doesn't disrupt always inline.
-; RUN: opt < %s -inline-threshold=-2000000001 -always-inline -S | not grep call 
-
-
-define internal i32 @if0() alwaysinline {
-       ret i32 1 
-}
-
-define i32 @f0() {
-       %r = call i32 @if0()
-       ret i32 %r
-}
diff --git a/test/Transforms/Inline/2008-11-04-AlwaysInline.ll b/test/Transforms/Inline/2008-11-04-AlwaysInline.ll
deleted file mode 100644
index bc9787b..0000000
--- a/test/Transforms/Inline/2008-11-04-AlwaysInline.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: opt < %s -always-inline -S | grep {@foo}
-; Ensure that foo is not removed by always inliner
-; PR 2945
-
-define internal i32 @foo() nounwind {
-  ret i32 0
-}
diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll
new file mode 100644
index 0000000..d04d54e
--- /dev/null
+++ b/test/Transforms/Inline/alloca-bonus.ll
@@ -0,0 +1,155 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s
+
+target datalayout = "p:32:32"
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+
+@glbl = external global i32
+
+define void @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call void @inner1
+  %ptr = alloca i32
+  call void @inner1(i32* %ptr)
+  ret void
+}
+
+define void @inner1(i32 *%ptr) {
+  %A = load i32* %ptr
+  store i32 0, i32* %ptr
+  %C = getelementptr inbounds i32* %ptr, i32 0
+  %D = getelementptr inbounds i32* %ptr, i32 1
+  %E = bitcast i32* %ptr to i8*
+  %F = select i1 false, i32* %ptr, i32* @glbl
+  call void @llvm.lifetime.start(i64 0, i8* %E)
+  ret void
+}
+
+define void @outer2() {
+; CHECK: @outer2
+; CHECK: call void @inner2
+  %ptr = alloca i32
+  call void @inner2(i32* %ptr)
+  ret void
+}
+
+; %D poisons this call, scalar-repl can't handle that instruction.
+define void @inner2(i32 *%ptr) {
+  %A = load i32* %ptr
+  store i32 0, i32* %ptr
+  %C = getelementptr inbounds i32* %ptr, i32 0
+  %D = getelementptr inbounds i32* %ptr, i32 %A
+  %E = bitcast i32* %ptr to i8*
+  %F = select i1 false, i32* %ptr, i32* @glbl
+  call void @llvm.lifetime.start(i64 0, i8* %E)
+  ret void
+}
+
+define void @outer3() {
+; CHECK: @outer3
+; CHECK-NOT: call void @inner3
+  %ptr = alloca i32
+  call void @inner3(i32* %ptr, i1 undef)
+  ret void
+}
+
+define void @inner3(i32 *%ptr, i1 %x) {
+  %A = icmp eq i32* %ptr, null
+  %B = and i1 %x, %A
+  br i1 %A, label %bb.true, label %bb.false
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %t1 = load i32* %ptr
+  %t2 = add i32 %t1, 1
+  %t3 = add i32 %t2, 1
+  %t4 = add i32 %t3, 1
+  %t5 = add i32 %t4, 1
+  %t6 = add i32 %t5, 1
+  %t7 = add i32 %t6, 1
+  %t8 = add i32 %t7, 1
+  %t9 = add i32 %t8, 1
+  %t10 = add i32 %t9, 1
+  %t11 = add i32 %t10, 1
+  %t12 = add i32 %t11, 1
+  %t13 = add i32 %t12, 1
+  %t14 = add i32 %t13, 1
+  %t15 = add i32 %t14, 1
+  %t16 = add i32 %t15, 1
+  %t17 = add i32 %t16, 1
+  %t18 = add i32 %t17, 1
+  %t19 = add i32 %t18, 1
+  %t20 = add i32 %t19, 1
+  ret void
+bb.false:
+  ret void
+}
+
+define void @outer4(i32 %A) {
+; CHECK: @outer4
+; CHECK-NOT: call void @inner4
+  %ptr = alloca i32
+  call void @inner4(i32* %ptr, i32 %A)
+  ret void
+}
+
+; %B poisons this call, scalar-repl can't handle that instruction. However, we
+; still want to detect that the icmp and branch *can* be handled.
+define void @inner4(i32 *%ptr, i32 %A) {
+  %B = getelementptr inbounds i32* %ptr, i32 %A
+  %C = icmp eq i32* %ptr, null
+  br i1 %C, label %bb.true, label %bb.false
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %t1 = load i32* %ptr
+  %t2 = add i32 %t1, 1
+  %t3 = add i32 %t2, 1
+  %t4 = add i32 %t3, 1
+  %t5 = add i32 %t4, 1
+  %t6 = add i32 %t5, 1
+  %t7 = add i32 %t6, 1
+  %t8 = add i32 %t7, 1
+  %t9 = add i32 %t8, 1
+  %t10 = add i32 %t9, 1
+  %t11 = add i32 %t10, 1
+  %t12 = add i32 %t11, 1
+  %t13 = add i32 %t12, 1
+  %t14 = add i32 %t13, 1
+  %t15 = add i32 %t14, 1
+  %t16 = add i32 %t15, 1
+  %t17 = add i32 %t16, 1
+  %t18 = add i32 %t17, 1
+  %t19 = add i32 %t18, 1
+  %t20 = add i32 %t19, 1
+  ret void
+bb.false:
+  ret void
+}
+
+define void @outer5() {
+; CHECK: @outer5
+; CHECK-NOT: call void @inner5
+  %ptr = alloca i32
+  call void @inner5(i1 false, i32* %ptr)
+  ret void
+}
+
+; %D poisons this call, scalar-repl can't handle that instruction. However, if
+; the flag is set appropriately, the poisoning instruction is inside of dead
+; code, and so shouldn't be counted.
+define void @inner5(i1 %flag, i32 *%ptr) {
+  %A = load i32* %ptr
+  store i32 0, i32* %ptr
+  %C = getelementptr inbounds i32* %ptr, i32 0
+  br i1 %flag, label %if.then, label %exit
+
+if.then:
+  %D = getelementptr inbounds i32* %ptr, i32 %A
+  %E = bitcast i32* %ptr to i8*
+  %F = select i1 false, i32* %ptr, i32* @glbl
+  call void @llvm.lifetime.start(i64 0, i8* %E)
+  ret void
+
+exit:
+  ret void
+}
+
diff --git a/test/Transforms/Inline/always-inline.ll b/test/Transforms/Inline/always-inline.ll
new file mode 100644
index 0000000..e0be41f
--- /dev/null
+++ b/test/Transforms/Inline/always-inline.ll
@@ -0,0 +1,125 @@
+; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s
+;
+; Ensure the threshold has no impact on these decisions.
+; RUN: opt < %s -inline-threshold=20000000 -always-inline -S | FileCheck %s
+; RUN: opt < %s -inline-threshold=-20000000 -always-inline -S | FileCheck %s
+
+define i32 @inner1() alwaysinline {
+  ret i32 1
+}
+define i32 @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call
+; CHECK: ret
+
+   %r = call i32 @inner1()
+   ret i32 %r
+}
+
+; The always inliner can't DCE internal functions. PR2945
+; CHECK: @pr2945
+define internal i32 @pr2945() nounwind {
+  ret i32 0
+}
+
+define internal void @inner2(i32 %N) alwaysinline {
+  %P = alloca i32, i32 %N
+  ret void
+}
+define void @outer2(i32 %N) {
+; The always inliner (unlike the normal one) should be willing to inline
+; a function with a dynamic alloca into one without a dynamic alloca.
+; rdar://6655932
+;
+; CHECK: @outer2
+; CHECK-NOT: call void @inner2
+; CHECK alloca i32, i32 %N
+; CHECK-NOT: call void @inner2
+; CHECK: ret void
+
+  call void @inner2( i32 %N )
+  ret void
+}
+
+declare i32 @a() returns_twice
+declare i32 @b() returns_twice
+
+define i32 @inner3() alwaysinline {
+entry:
+  %call = call i32 @a() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+define i32 @outer3() {
+entry:
+; CHECK: @outer3
+; CHECK-NOT: call i32 @a
+; CHECK: ret
+
+  %call = call i32 @inner3()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @inner4() alwaysinline returns_twice {
+entry:
+  %call = call i32 @b() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @outer4() {
+entry:
+; CHECK: @outer4
+; CHECK: call i32 @b()
+; CHECK: ret
+
+  %call = call i32 @inner4() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @inner5(i8* %addr) alwaysinline {
+entry:
+  indirectbr i8* %addr, [ label %one, label %two ]
+
+one:
+  ret i32 42
+
+two:
+  ret i32 44
+}
+define i32 @outer5(i32 %x) {
+; CHECK: @outer5
+; CHECK: call i32 @inner5
+; CHECK: ret
+
+  %cmp = icmp slt i32 %x, 42
+  %addr = select i1 %cmp, i8* blockaddress(@inner5, %one), i8* blockaddress(@inner5, %two)
+  %call = call i32 @inner5(i8* %addr)
+  ret i32 %call
+}
+
+define void @inner6(i32 %x) alwaysinline {
+entry:
+  %icmp = icmp slt i32 %x, 0
+  br i1 %icmp, label %return, label %bb
+
+bb:
+  %sub = sub nsw i32 %x, 1
+  call void @inner6(i32 %sub)
+  ret void
+
+return:
+  ret void
+}
+define void @outer6() {
+; CHECK: @outer6
+; CHECK: call void @inner6(i32 42)
+; CHECK: ret
+
+entry:
+  call void @inner6(i32 42)
+  ret void
+}
+
diff --git a/test/Transforms/Inline/always_inline_dyn_alloca.ll b/test/Transforms/Inline/always_inline_dyn_alloca.ll
deleted file mode 100644
index 25cfc49..0000000
--- a/test/Transforms/Inline/always_inline_dyn_alloca.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -inline -S | not grep callee
-; rdar://6655932
-
-; If callee is marked alwaysinline, inline it! Even if callee has dynamic
-; alloca and caller does not,
-
-define internal void @callee(i32 %N) alwaysinline {
-        %P = alloca i32, i32 %N
-        ret void
-}
-
-define void @foo(i32 %N) {
-        call void @callee( i32 %N )
-        ret void
-}
diff --git a/test/Transforms/Inline/blockaddress.ll b/test/Transforms/Inline/blockaddress.ll
new file mode 100644
index 0000000..4206312
--- /dev/null
+++ b/test/Transforms/Inline/blockaddress.ll
@@ -0,0 +1,27 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+; PR10162
+
+; Make sure the blockaddress is mapped correctly when doit is inlined
+; CHECK: store i8* blockaddress(@f, %here.i), i8** @ptr1, align 8
+
+@i = global i32 1, align 4
+@ptr1 = common global i8* null, align 8
+
+define void @doit(i8** nocapture %pptr, i32 %cond) nounwind uwtable {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.end, label %here
+
+here:
+  store i8* blockaddress(@doit, %here), i8** %pptr, align 8
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @f(i32 %cond) nounwind uwtable {
+entry:
+  call void @doit(i8** @ptr1, i32 %cond)
+  ret void
+}
diff --git a/test/Transforms/Inline/dg.exp b/test/Transforms/Inline/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Inline/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Inline/dynamic_alloca_test.ll b/test/Transforms/Inline/dynamic_alloca_test.ll
index 0286535..15a5c66 100644
--- a/test/Transforms/Inline/dynamic_alloca_test.ll
+++ b/test/Transforms/Inline/dynamic_alloca_test.ll
@@ -3,33 +3,43 @@
 ; Functions with dynamic allocas can only be inlined into functions that
 ; already have dynamic allocas.
 
-; RUN: opt < %s -inline -S | \
-; RUN:   grep llvm.stacksave
-; RUN: opt < %s -inline -S | not grep callee
-
+; RUN: opt < %s -inline -S | FileCheck %s
+;
+; FIXME: This test is xfailed because the inline cost rewrite disabled *all*
+; inlining of functions which contain a dynamic alloca. It should be re-enabled
+; once that functionality is restored.
+; XFAIL: *
 
 declare void @ext(i32*)
 
 define internal void @callee(i32 %N) {
-        %P = alloca i32, i32 %N         ; <i32*> [#uses=1]
-        call void @ext( i32* %P )
-        ret void
+  %P = alloca i32, i32 %N
+  call void @ext(i32* %P)
+  ret void
 }
 
 define void @foo(i32 %N) {
-; <label>:0
-        %P = alloca i32, i32 %N         ; <i32*> [#uses=1]
-        call void @ext( i32* %P )
-        br label %Loop
-
-Loop:           ; preds = %Loop, %0
-        %count = phi i32 [ 0, %0 ], [ %next, %Loop ]            ; <i32> [#uses=2]
-        %next = add i32 %count, 1               ; <i32> [#uses=1]
-        call void @callee( i32 %N )
-        %cond = icmp eq i32 %count, 100000              ; <i1> [#uses=1]
-        br i1 %cond, label %out, label %Loop
-
-out:            ; preds = %Loop
-        ret void
+; CHECK: @foo
+; CHECK: alloca i32, i32 %{{.*}}
+; CHECK: call i8* @llvm.stacksave()
+; CHECK: alloca i32, i32 %{{.*}}
+; CHECK: call void @ext
+; CHECK: call void @llvm.stackrestore
+; CHECK: ret
+
+entry:
+  %P = alloca i32, i32 %N
+  call void @ext(i32* %P)
+  br label %loop
+
+loop:
+  %count = phi i32 [ 0, %entry ], [ %next, %loop ]
+  %next = add i32 %count, 1
+  call void @callee(i32 %N)
+  %cond = icmp eq i32 %count, 100000
+  br i1 %cond, label %out, label %loop
+
+out:
+  ret void
 }
 
diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll
index 462c29a..1f34113 100644
--- a/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/test/Transforms/Inline/inline-invoke-tail.ll
@@ -23,15 +23,11 @@ invcont:
 	ret i32 %retval
 
 lpad:
-	%eh_ptr = call i8* @llvm.eh.exception()
-	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
 	unreachable
 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @__gxx_personality_v0(...)
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/Inline/inline_cleanup.ll b/test/Transforms/Inline/inline_cleanup.ll
index 4c64721..3898aa7 100644
--- a/test/Transforms/Inline/inline_cleanup.ll
+++ b/test/Transforms/Inline/inline_cleanup.ll
@@ -1,10 +1,8 @@
 ; Test that the inliner doesn't leave around dead allocas, and that it folds
 ; uncond branches away after it is done specializing.
 
-; RUN: opt < %s -inline -S | \
-; RUN:    not grep {alloca.*uses=0}
-; RUN: opt < %s -inline -S | \
-; RUN:    not grep {br label}
+; RUN: opt < %s -inline -S | FileCheck %s
+
 @A = weak global i32 0		; <i32*> [#uses=1]
 @B = weak global i32 0		; <i32*> [#uses=1]
 @C = weak global i32 0		; <i32*> [#uses=1]
@@ -54,6 +52,18 @@ UnifiedReturnBlock:		; preds = %cond_next13
 declare void @ext(i32*)
 
 define void @test() {
+; CHECK: @test
+; CHECK-NOT: ret
+;
+; FIXME: This should be a CHECK-NOT, but currently we have a bug that causes us
+; to not nuke unused allocas.
+; CHECK: alloca
+; CHECK-NOT: ret
+;
+; No branches should survive the inliner's cleanup.
+; CHECK-NOT: br
+; CHECK: ret void
+
 entry:
 	tail call fastcc void @foo( i32 1 )
 	tail call fastcc void @foo( i32 2 )
@@ -61,3 +71,143 @@ entry:
 	tail call fastcc void @foo( i32 8 )
 	ret void
 }
+
+declare void @f(i32 %x)
+
+define void @inner2(i32 %x, i32 %y, i32 %z, i1 %b) {
+entry:
+  %cmp1 = icmp ne i32 %x, 0
+  br i1 %cmp1, label %then1, label %end1
+
+then1:
+  call void @f(i32 %x)
+  br label %end1
+
+end1:
+  %x2 = and i32 %x, %z
+  %cmp2 = icmp sgt i32 %x2, 1
+  br i1 %cmp2, label %then2, label %end2
+
+then2:
+  call void @f(i32 %x2)
+  br label %end2
+
+end2:
+  %y2 = or i32 %y, %z
+  %cmp3 = icmp sgt i32 %y2, 0
+  br i1 %cmp3, label %then3, label %end3
+
+then3:
+  call void @f(i32 %y2)
+  br label %end3
+
+end3:
+  br i1 %b, label %end3.1, label %end3.2
+
+end3.1:
+  %x3.1 = or i32 %x, 10
+  br label %end3.3
+
+end3.2:
+  %x3.2 = or i32 %x, 10
+  br label %end3.3
+
+end3.3:
+  %x3.3 = phi i32 [ %x3.1, %end3.1 ], [ %x3.2, %end3.2 ]
+  %cmp4 = icmp slt i32 %x3.3, 1
+  br i1 %cmp4, label %then4, label %end4
+
+then4:
+  call void @f(i32 %x3.3)
+  br label %end4
+
+end4:
+  ret void
+}
+
+define void @outer2(i32 %z, i1 %b) {
+; Ensure that after inlining, none of the blocks with a call to @f actually
+; make it through inlining.
+; CHECK: define void @outer2
+; CHECK-NOT: call
+; CHECK: ret void
+
+entry:
+  call void @inner2(i32 0, i32 -1, i32 %z, i1 %b)
+  ret void
+}
+
+define void @PR12470_inner(i16 signext %p1) nounwind uwtable {
+entry:
+  br i1 undef, label %cond.true, label %cond.false
+
+cond.true:
+  br label %cond.end
+
+cond.false:
+  %conv = sext i16 %p1 to i32
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ undef, %cond.true ], [ 0, %cond.false ]
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.end5, label %if.then
+
+if.then:
+  ret void
+
+if.end5:
+  ret void
+}
+
+define void @PR12470_outer() {
+; This previously crashed during inliner cleanup and folding inner return
+; instructions. Check that we don't crash and we produce a function with a single
+; return instruction due to merging the returns of the inlined function.
+; CHECK: define void @PR12470_outer
+; CHECK-NOT: call
+; CHECK: ret void
+; CHECK-NOT: ret void
+; CHECK: }
+
+entry:
+  call void @PR12470_inner(i16 signext 1)
+  ret void
+}
+
+define void @crasher_inner() nounwind uwtable {
+entry:
+  br i1 false, label %for.end28, label %for.body6
+
+for.body6:
+  br i1 undef, label %for.body6, label %for.cond12.for.inc26_crit_edge
+
+for.cond12.for.inc26_crit_edge:
+  br label %for.body6.1
+
+for.end28:
+  ret void
+
+for.body6.1:
+  br i1 undef, label %for.body6.1, label %for.cond12.for.inc26_crit_edge.1
+
+for.cond12.for.inc26_crit_edge.1:
+  br label %for.body6.2
+
+for.body6.2:
+  br i1 undef, label %for.body6.2, label %for.cond12.for.inc26_crit_edge.2
+
+for.cond12.for.inc26_crit_edge.2:
+  br label %for.end28
+}
+
+define void @crasher_outer() {
+; CHECK: @crasher_outer
+; CHECK-NOT: call
+; CHECK: ret void
+; CHECK-NOT: ret
+; CHECK: }
+entry:
+  tail call void @crasher_inner()
+  ret void
+}
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index 537c69b..dc35b60 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -1,14 +1,112 @@
-; RUN: opt < %s -inline -S | not grep callee
-; RUN: opt < %s -inline -S | not grep div
+; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s
 
+define internal i32 @callee1(i32 %A, i32 %B) {
+  %C = sdiv i32 %A, %B
+  ret i32 %C
+}
+
+define i32 @caller1() {
+; CHECK: define i32 @caller1
+; CHECK-NEXT: ret i32 3
+
+  %X = call i32 @callee1( i32 10, i32 3 )
+  ret i32 %X
+}
+
+define i32 @caller2() {
+; Check that we can constant-prop through instructions after inlining callee21
+; to get constants in the inlined callsite to callee22.
+; FIXME: Currently, the threshold is fixed at 20 because we don't perform
+; *recursive* cost analysis to realize that the nested call site will definitely
+; inline and be cheap. We should eventually do that and lower the threshold here
+; to 1.
+;
+; CHECK: @caller2
+; CHECK-NOT: call void @callee2
+; CHECK: ret
+
+  %x = call i32 @callee21(i32 42, i32 48)
+  ret i32 %x
+}
+
+define i32 @callee21(i32 %x, i32 %y) {
+  %sub = sub i32 %y, %x
+  %result = call i32 @callee22(i32 %sub)
+  ret i32 %result
+}
 
-define internal i32 @callee(i32 %A, i32 %B) {
-        %C = sdiv i32 %A, %B            ; <i32> [#uses=1]
-        ret i32 %C
+declare i8* @getptr()
+
+define i32 @callee22(i32 %x) {
+  %icmp = icmp ugt i32 %x, 42
+  br i1 %icmp, label %bb.true, label %bb.false
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %x1 = add i32 %x, 1
+  %x2 = add i32 %x1, 1
+  %x3 = add i32 %x2, 1
+  %x4 = add i32 %x3, 1
+  %x5 = add i32 %x4, 1
+  %x6 = add i32 %x5, 1
+  %x7 = add i32 %x6, 1
+  %x8 = add i32 %x7, 1
+
+  ret i32 %x8
+bb.false:
+  ret i32 %x
 }
 
-define i32 @test() {
-        %X = call i32 @callee( i32 10, i32 3 )          ; <i32> [#uses=1]
-        ret i32 %X
+define i32 @caller3() {
+; Check that even if the expensive path is hidden behind several basic blocks,
+; it doesn't count toward the inline cost when constant-prop proves those paths
+; dead.
+;
+; CHECK: @caller3
+; CHECK-NOT: call
+; CHECK: ret i32 6
+
+entry:
+  %x = call i32 @callee3(i32 42, i32 48)
+  ret i32 %x
 }
 
+define i32 @callee3(i32 %x, i32 %y) {
+  %sub = sub i32 %y, %x
+  %icmp = icmp ugt i32 %sub, 42
+  br i1 %icmp, label %bb.true, label %bb.false
+
+bb.true:
+  %icmp2 = icmp ult i32 %sub, 64
+  br i1 %icmp2, label %bb.true.true, label %bb.true.false
+
+bb.true.true:
+  ; This block musn't be counted in the inline cost.
+  %x1 = add i32 %x, 1
+  %x2 = add i32 %x1, 1
+  %x3 = add i32 %x2, 1
+  %x4 = add i32 %x3, 1
+  %x5 = add i32 %x4, 1
+  %x6 = add i32 %x5, 1
+  %x7 = add i32 %x6, 1
+  %x8 = add i32 %x7, 1
+  br label %bb.merge
+
+bb.true.false:
+  ; This block musn't be counted in the inline cost.
+  %y1 = add i32 %y, 1
+  %y2 = add i32 %y1, 1
+  %y3 = add i32 %y2, 1
+  %y4 = add i32 %y3, 1
+  %y5 = add i32 %y4, 1
+  %y6 = add i32 %y5, 1
+  %y7 = add i32 %y6, 1
+  %y8 = add i32 %y7, 1
+  br label %bb.merge
+
+bb.merge:
+  %result = phi i32 [ %x8, %bb.true.true ], [ %y8, %bb.true.false ]
+  ret i32 %result
+
+bb.false:
+  ret i32 %sub
+}
diff --git a/test/Transforms/Inline/inline_returns_twice.ll b/test/Transforms/Inline/inline_returns_twice.ll
new file mode 100644
index 0000000..ab2e954
--- /dev/null
+++ b/test/Transforms/Inline/inline_returns_twice.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; Check that functions with "returns_twice" calls are only inlined,
+; if they are themselve marked as such.
+
+declare i32 @a() returns_twice
+declare i32 @b() returns_twice
+
+define i32 @f() {
+entry:
+  %call = call i32 @a() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @g() {
+entry:
+; CHECK: define i32 @g
+; CHECK: call i32 @f()
+; CHECK-NOT: call i32 @a()
+  %call = call i32 @f()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @h() returns_twice {
+entry:
+  %call = call i32 @b() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @i() {
+entry:
+; CHECK: define i32 @i
+; CHECK: call i32 @b()
+; CHECK-NOT: call i32 @h()
+  %call = call i32 @h() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
diff --git a/test/Transforms/Inline/lit.local.cfg b/test/Transforms/Inline/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Inline/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll
index 1d5ebbb..6cde0e2 100644
--- a/test/Transforms/Inline/noinline-recursive-fn.ll
+++ b/test/Transforms/Inline/noinline-recursive-fn.ll
@@ -17,7 +17,7 @@ entry:
 bb:                                               ; preds = %entry
   %1 = sub nsw i32 %x, 1                          ; <i32> [#uses=1]
   call void @foo(i32 %1) nounwind ssp
-  volatile store i32 1, i32* @g, align 4
+  store volatile i32 1, i32* @g, align 4
   ret void
 
 return:                                           ; preds = %entry
@@ -42,7 +42,7 @@ entry:
   %0 = bitcast i8* %Bar to void (i32, i8*, i8*)*
   %1 = sub nsw i32 %x, 1
   call void %0(i32 %1, i8* %Foo, i8* %Bar) nounwind
-  volatile store i32 42, i32* @g, align 4
+  store volatile i32 42, i32* @g, align 4
   ret void
 }
 
@@ -54,7 +54,7 @@ entry:
 bb:                                               ; preds = %entry
   %1 = bitcast i8* %Foo to void (i32, i8*, i8*)*  ; <void (i32, i8*, i8*)*> [#uses=1]
   call void %1(i32 %x, i8* %Foo, i8* %Bar) nounwind
-  volatile store i32 13, i32* @g, align 4
+  store volatile i32 13, i32* @g, align 4
   ret void
 
 return:                                           ; preds = %entry
@@ -71,3 +71,40 @@ entry:
   call void @f2(i32 123, i8* bitcast (void (i32, i8*, i8*)* @f1 to i8*), i8* bitcast (void (i32, i8*, i8*)* @f2 to i8*)) nounwind ssp
   ret void
 }
+
+
+; Check that a recursive function, when called with a constant that makes the
+; recursive path dead code can actually be inlined.
+define i32 @fib(i32 %i) {
+entry:
+  %is.zero = icmp eq i32 %i, 0
+  br i1 %is.zero, label %zero.then, label %zero.else
+
+zero.then:
+  ret i32 0
+
+zero.else:
+  %is.one = icmp eq i32 %i, 1
+  br i1 %is.one, label %one.then, label %one.else
+
+one.then:
+  ret i32 1
+
+one.else:
+  %i1 = sub i32 %i, 1
+  %f1 = call i32 @fib(i32 %i1)
+  %i2 = sub i32 %i, 2
+  %f2 = call i32 @fib(i32 %i2)
+  %f = add i32 %f1, %f2
+  ret i32 %f
+}
+
+define i32 @fib_caller() {
+; CHECK: @fib_caller
+; CHECK-NOT: call
+; CHECK: ret
+  %f1 = call i32 @fib(i32 0)
+  %f2 = call i32 @fib(i32 1)
+  %result = add i32 %f1, %f2
+  ret i32 %result
+}
diff --git a/test/Transforms/Inline/ptr-diff.ll b/test/Transforms/Inline/ptr-diff.ll
new file mode 100644
index 0000000..60fc3e2
--- /dev/null
+++ b/test/Transforms/Inline/ptr-diff.ll
@@ -0,0 +1,58 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s
+
+target datalayout = "p:32:32"
+
+define i32 @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call
+; CHECK: ret i32
+
+  %ptr = alloca i32
+  %ptr1 = getelementptr inbounds i32* %ptr, i32 0
+  %ptr2 = getelementptr inbounds i32* %ptr, i32 42
+  %result = call i32 @inner1(i32* %ptr1, i32* %ptr2)
+  ret i32 %result
+}
+
+define i32 @inner1(i32* %begin, i32* %end) {
+  %begin.i = ptrtoint i32* %begin to i32
+  %end.i = ptrtoint i32* %end to i32
+  %distance = sub i32 %end.i, %begin.i
+  %icmp = icmp sle i32 %distance, 42
+  br i1 %icmp, label %then, label %else
+
+then:
+  ret i32 3
+
+else:
+  %t = load i32* %begin
+  ret i32 %t
+}
+
+define i32 @outer2(i32* %ptr) {
+; Test that an inbounds GEP disables this -- it isn't safe in general as
+; wrapping changes the behavior of lessthan and greaterthan comparisions.
+; CHECK: @outer2
+; CHECK: call i32 @inner2
+; CHECK: ret i32
+
+  %ptr1 = getelementptr i32* %ptr, i32 0
+  %ptr2 = getelementptr i32* %ptr, i32 42
+  %result = call i32 @inner2(i32* %ptr1, i32* %ptr2)
+  ret i32 %result
+}
+
+define i32 @inner2(i32* %begin, i32* %end) {
+  %begin.i = ptrtoint i32* %begin to i32
+  %end.i = ptrtoint i32* %end to i32
+  %distance = sub i32 %end.i, %begin.i
+  %icmp = icmp sle i32 %distance, 42
+  br i1 %icmp, label %then, label %else
+
+then:
+  ret i32 3
+
+else:
+  %t = load i32* %begin
+  ret i32 %t
+}
diff --git a/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll b/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
index 3297919..7f73908 100644
--- a/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
+++ b/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
@@ -2,6 +2,6 @@
 
 define void @test(i32* %P) {
         ; Dead but not deletable!
-        %X = volatile load i32* %P              ; <i32> [#uses=0]
+        %X = load volatile i32* %P              ; <i32> [#uses=0]
         ret void
 }
diff --git a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll b/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
deleted file mode 100644
index 6190aa9..0000000
--- a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep {call i32 @f}
-
-	%struct.FRAME.nest = type { i32, i32 (i32)* }
-	%struct.__builtin_trampoline = type { [10 x i8] }
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
-
-declare i32 @f(%struct.FRAME.nest* nest , i32 )
-
-define i32 @nest(i32 %n) {
-entry:
-	%FRAME.0 = alloca %struct.FRAME.nest, align 8		; <%struct.FRAME.nest*> [#uses=3]
-	%TRAMP.216 = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
-	%TRAMP.216.sub = getelementptr [10 x i8]* %TRAMP.216, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 %n, i32* %tmp3, align 8
-	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest* , i32)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32)**> [#uses=1]
-	%tmp89 = bitcast i8* %tramp to i32 (i32)*		; <i32 (i32)*> [#uses=2]
-	store i32 (i32)* %tmp89, i32 (i32)** %tmp7, align 8
-	%tmp2.i = call i32 %tmp89( i32 1 )		; <i32> [#uses=1]
-	ret i32 %tmp2.i
-}
diff --git a/test/Transforms/InstCombine/2007-10-28-stacksave.ll b/test/Transforms/InstCombine/2007-10-28-stacksave.ll
index 76bceb6..4c5c367 100644
--- a/test/Transforms/InstCombine/2007-10-28-stacksave.ll
+++ b/test/Transforms/InstCombine/2007-10-28-stacksave.ll
@@ -26,7 +26,7 @@ lab:		; preds = %cleanup31, %entry
 	%tmp21 = getelementptr i32* %tmp1819, i32 0		; <i32*> [#uses=1]
 	store i32 1, i32* %tmp21, align 4
 	%tmp2223 = bitcast i32* %tmp1819 to i8*		; <i8*> [#uses=1]
-	volatile store i8* %tmp2223, i8** @p, align 4
+	store volatile i8* %tmp2223, i8** @p, align 4
 	%tmp25 = add i32 %n.0, 1		; <i32> [#uses=2]
 	%tmp27 = icmp sle i32 %tmp25, 999999		; <i1> [#uses=1]
 	%tmp2728 = zext i1 %tmp27 to i8		; <i8> [#uses=1]
diff --git a/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll b/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
deleted file mode 100644
index 6401dfd..0000000
--- a/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -instcombine -disable-output
-
-	%struct.FRAME.nest = type { i32, i32 (i32*)* }
-	%struct.__builtin_trampoline = type { [10 x i8] }
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
-
-declare i32 @f(%struct.FRAME.nest* nest , i32*)
-
-define i32 @nest(i32 %n) {
-entry:
-	%FRAME.0 = alloca %struct.FRAME.nest, align 8		; <%struct.FRAME.nest*> [#uses=3]
-	%TRAMP.216 = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
-	%TRAMP.216.sub = getelementptr [10 x i8]* %TRAMP.216, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 %n, i32* %tmp3, align 8
-	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, i32*)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32*)**> [#uses=1]
-	%tmp89 = bitcast i8* %tramp to i32 (i32*)*		; <i32 (i32*)*> [#uses=2]
-	store i32 (i32*)* %tmp89, i32 (i32*)** %tmp7, align 8
-	%tmp2.i = call i32 %tmp89( i32* nest  null )		; <i32> [#uses=1]
-	ret i32 %tmp2.i
-}
diff --git a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
index 6847f5e..de08c32 100644
--- a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
+++ b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
@@ -2,7 +2,7 @@
 
 define void @test() {
 	%votf = alloca <4 x float>		; <<4 x float>*> [#uses=1]
-	volatile store <4 x float> zeroinitializer, <4 x float>* %votf, align 16
+	store volatile <4 x float> zeroinitializer, <4 x float>* %votf, align 16
 	ret void
 }
 
diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
index a24f307..1286e3d 100644
--- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
+++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
@@ -6,17 +6,17 @@ target triple = "i386-apple-darwin8"
 define i32 @main() nounwind  {
 entry:
 	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
-	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb
 
 bb:		; preds = %bb, %entry
 	%b.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
 	%tmp3.reg2mem.0 = phi i32 [ %tmp34, %entry ], [ %tmp3, %bb ]		; <i32> [#uses=1]
 	%tmp4 = add i32 %tmp3.reg2mem.0, 5		; <i32> [#uses=1]
-	volatile store i32 %tmp4, i32* @g_1, align 4
+	store volatile i32 %tmp4, i32* @g_1, align 4
 	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
-	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp9, label %bb, label %bb11
 
 bb11:		; preds = %bb
diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
index 5fb11ff..ebbd3a7 100644
--- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
+++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
@@ -7,11 +7,11 @@ target triple = "i386-apple-darwin8"
 define i32 @main(i32 %i) nounwind  {
 entry:
 	%tmp93 = icmp slt i32 %i, 10		; <i1> [#uses=0]
-	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp93, label %bb11, label %bb
 
 bb:		; preds = %bb, %entry
-	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb11
 
 bb11:		; preds = %bb
diff --git a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
index 8307834..4f4709b 100644
--- a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
+++ b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
@@ -10,7 +10,7 @@ entry:
 	%tmp2752 = alloca i32		; <i32*> [#uses=2]
 	%tmpcast53 = bitcast i32* %tmp2752 to i8*		; <i8*> [#uses=1]
 	store i32 2, i32* %tmp2752, align 4
-	volatile store i8* %tmpcast53, i8** @p, align 4
+	store volatile i8* %tmpcast53, i8** @p, align 4
 	br label %bb44
 
 bb:		; preds = %bb44
@@ -29,7 +29,7 @@ bb44:		; preds = %bb44, %entry
 	store i32 1, i32* %tmp27, align 4
 	%tmp34 = getelementptr i32* %tmp27, i32 %tmp4		; <i32*> [#uses=1]
 	store i32 2, i32* %tmp34, align 4
-	volatile store i8* %tmpcast, i8** @p, align 4
+	store volatile i8* %tmpcast, i8** @p, align 4
 	%exitcond = icmp eq i32 %tmp3857, 999999		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb, label %bb44
 }
diff --git a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
index 8104408..1ed5323 100644
--- a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
+++ b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
@@ -7,17 +7,17 @@ target triple = "i386-apple-darwin8"
 define i32 @main() nounwind  {
 entry:
 	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
-	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb
 
 bb:		; preds = %bb, %entry
 	%b.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
 	%tmp3.reg2mem.0 = phi i32 [ %tmp3, %bb ], [ %tmp34, %entry ]
 	%tmp4 = add i32 %tmp3.reg2mem.0, 5		; <i32> [#uses=1]
-	volatile store i32 %tmp4, i32* @g_1, align 4
+	store volatile i32 %tmp4, i32* @g_1, align 4
 	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
-	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp9, label %bb, label %bb11
 
 bb11:		; preds = %bb
diff --git a/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll b/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll
new file mode 100644
index 0000000..abab9dc
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -disable-output
+
+%opaque_struct = type opaque
+
+@G = external global [0 x %opaque_struct]
+
+declare void @foo(%opaque_struct*)
+
+define void @bar() {
+  call void @foo(%opaque_struct* bitcast ([0 x %opaque_struct]* @G to %opaque_struct*))
+  ret void
+}
diff --git a/test/Transforms/InstCombine/2012-02-13-FCmp.ll b/test/Transforms/InstCombine/2012-02-13-FCmp.ll
new file mode 100644
index 0000000..39b0594
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-02-13-FCmp.ll
@@ -0,0 +1,35 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; Radar 10803727
+@.str = private unnamed_addr constant [35 x i8] c"\0Ain_range input (should be 0): %f\0A\00", align 1
+@.str1 = external hidden unnamed_addr constant [35 x i8], align 1
+
+declare i32 @printf(i8*, ...)
+define i64 @_Z8tempCastj(i32 %val) uwtable ssp {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str1, i64 0, i64 0), i32 %val)
+  %conv = uitofp i32 %val to double
+  %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str, i64 0, i64 0), double %conv)
+  %cmp.i = fcmp oge double %conv, -1.000000e+00
+  br i1 %cmp.i, label %land.rhs.i, label %if.end.critedge
+; CHECK:  br i1 true, label %land.rhs.i, label %if.end.critedge
+
+land.rhs.i:                                       ; preds = %entry
+  %cmp1.i = fcmp olt double %conv, 1.000000e+00
+  br i1 %cmp1.i, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.rhs.i
+  %add = fadd double %conv, 5.000000e-01
+  %conv3 = fptosi double %add to i64
+  br label %return
+
+if.end.critedge:                                  ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.critedge, %land.rhs.i
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i64 [ %conv3, %if.then ], [ -1, %if.end ]
+  ret i64 %retval.0
+}
+
diff --git a/test/Transforms/InstCombine/2012-02-28-ICmp.ll b/test/Transforms/InstCombine/2012-02-28-ICmp.ll
new file mode 100644
index 0000000..82cf85f
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-02-28-ICmp.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; <rdar://problem/10803154>
+
+; There should be no transformation.
+; CHECK: %a = trunc i32 %x to i8
+; CHECK: %b = icmp ne i8 %a, 0
+; CHECK: %c = and i32 %x, 16711680
+; CHECK: %d = icmp ne i32 %c, 0
+; CHECK: %e = and i1 %b, %d
+; CHECK: ret i1 %e
+
+define i1 @f1(i32 %x) {
+  %a = trunc i32 %x to i8
+  %b = icmp ne i8 %a, 0
+  %c = and i32 %x, 16711680
+  %d = icmp ne i32 %c, 0
+  %e = and i1 %b, %d
+  ret i1 %e
+}
diff --git a/test/Transforms/InstCombine/2012-03-10-InstCombine.ll b/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
new file mode 100644
index 0000000..58ccf12
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+; Derived from gcc.c-torture/execute/frame-address.c
+
+; CHECK:     @func
+; CHECK:     return:
+; CHECK-NOT: ret i32 0
+; CHECK:     ret i32 %retval
+
+define i32 @func(i8* %c, i8* %f) nounwind uwtable readnone noinline ssp {
+entry:
+  %d = alloca i8, align 1
+  store i8 0, i8* %d, align 1
+  %cmp = icmp ugt i8* %d, %c
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp2 = icmp ule i8* %d, %f
+  %not.cmp1 = icmp uge i8* %c, %f
+  %.cmp2 = and i1 %cmp2, %not.cmp1
+  %land.ext = zext i1 %.cmp2 to i32
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp uge i8* %d, %f
+  %not.cmp3 = icmp ule i8* %c, %f
+  %.cmp5 = and i1 %cmp5, %not.cmp3
+  %land.ext7 = zext i1 %.cmp5 to i32
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %land.ext, %if.then ], [ %land.ext7, %if.else ]
+  ret i32 %retval.0
+}
+
diff --git a/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll b/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
new file mode 100644
index 0000000..c1602da
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR12234
+
+@g = extern_weak global i32
+define i32 @function(i32 %x) nounwind {
+entry:
+  %xor = xor i32 %x, 1
+  store volatile i32 %xor, i32* inttoptr (i64 1 to i32*), align 4
+  %or4 = or i32 or (i32 zext (i1 icmp eq (i32* @g, i32* null) to i32), i32 1), %xor
+  ret i32 %or4
+}
+; CHECK: define i32 @function
diff --git a/test/Transforms/InstCombine/LandingPadClauses.ll b/test/Transforms/InstCombine/LandingPadClauses.ll
index 055bdcc..de3b2d3 100644
--- a/test/Transforms/InstCombine/LandingPadClauses.ll
+++ b/test/Transforms/InstCombine/LandingPadClauses.ll
@@ -6,6 +6,7 @@
 
 declare i32 @generic_personality(i32, i64, i8*, i8*)
 declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*)
+declare i32 @__objc_personality_v0(i32, i64, i8*, i8*)
 
 declare void @bar()
 
@@ -179,3 +180,54 @@ lpad.d:
 ; CHECK-NEXT: null
 ; CHECK-NEXT: unreachable
 }
+
+define void @foo_objc() {
+; CHECK: @foo_objc
+  invoke void @bar()
+    to label %cont.a unwind label %lpad.a
+cont.a:
+  invoke void @bar()
+    to label %cont.b unwind label %lpad.b
+cont.b:
+  invoke void @bar()
+    to label %cont.c unwind label %lpad.c
+cont.c:
+  invoke void @bar()
+    to label %cont.d unwind label %lpad.d
+cont.d:
+  ret void
+
+lpad.a:
+  %a = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__objc_personality_v0
+          catch i32* null
+          catch i32* @T1
+  unreachable
+; CHECK: %a = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+
+lpad.b:
+  %b = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__objc_personality_v0
+          filter [1 x i32*] zeroinitializer
+  unreachable
+; CHECK: %b = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.c:
+  %c = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__objc_personality_v0
+          filter [2 x i32*] [i32* @T1, i32* null]
+  unreachable
+; CHECK: %c = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.d:
+  %d = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__objc_personality_v0
+          cleanup
+          catch i32* null
+  unreachable
+; CHECK: %d = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+}
diff --git a/test/Transforms/InstCombine/align-external.ll b/test/Transforms/InstCombine/align-external.ll
index 6e8ad87..d4a5d42 100644
--- a/test/Transforms/InstCombine/align-external.ll
+++ b/test/Transforms/InstCombine/align-external.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; Don't assume that external global variables have their preferred
-; alignment. They may only have the ABI minimum alignment.
+; Don't assume that external global variables or those with weak linkage have
+; their preferred alignment. They may only have the ABI minimum alignment.
 
 ; CHECK: %s = shl i64 %a, 3
 ; CHECK: %r = or i64 %s, ptrtoint (i32* @A to i64)
@@ -11,7 +11,7 @@
 target datalayout = "-i32:8:32"
 
 @A = external global i32
-@B = external global i32
+@B = weak_odr global i32 0
 
 define i64 @foo(i64 %a) {
   %t = ptrtoint i32* @A to i64
@@ -20,3 +20,10 @@ define i64 @foo(i64 %a) {
   %q = add i64 %r, 1
   ret i64 %q
 }
+
+define i32 @bar() {
+; CHECK: @bar
+  %r = load i32* @B, align 1
+; CHECK: align 1
+  ret i32 %r
+}
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index e4d1367..ef7185c 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -44,3 +44,47 @@ define i32* @test4(i32 %n) {
   %A = alloca i32, i32 %n
   ret i32* %A
 }
+
+; Allocas which are only used by GEPs, bitcasts, and stores (transitively)
+; should be deleted.
+define void @test5() {
+; CHECK: @test5
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK: ret
+
+entry:
+  %a = alloca { i32 }
+  %b = alloca i32*
+  %a.1 = getelementptr { i32 }* %a, i32 0, i32 0
+  store i32 123, i32* %a.1
+  store i32* %a.1, i32** %b
+  %b.1 = bitcast i32** %b to i32*
+  store i32 123, i32* %b.1
+  %a.2 = getelementptr { i32 }* %a, i32 0, i32 0
+  store atomic i32 2, i32* %a.2 unordered, align 4
+  %a.3 = getelementptr { i32 }* %a, i32 0, i32 0
+  store atomic i32 3, i32* %a.3 release, align 4
+  %a.4 = getelementptr { i32 }* %a, i32 0, i32 0
+  store atomic i32 4, i32* %a.4 seq_cst, align 4
+  ret void
+}
+
+declare void @f(i32* %p)
+
+; Check that we don't delete allocas in some erroneous cases.
+define void @test6() {
+; CHECK: @test6
+; CHECK-NOT: ret
+; CHECK: alloca
+; CHECK-NEXT: alloca
+; CHECK: ret
+
+entry:
+  %a = alloca { i32 }
+  %b = alloca i32
+  %a.1 = getelementptr { i32 }* %a, i32 0, i32 0
+  store volatile i32 123, i32* %a.1
+  tail call void @f(i32* %b)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/apint-shl-trunc.ll b/test/Transforms/InstCombine/apint-shl-trunc.ll
index 8163e6d..f2dc7d5 100644
--- a/test/Transforms/InstCombine/apint-shl-trunc.ll
+++ b/test/Transforms/InstCombine/apint-shl-trunc.ll
@@ -1,13 +1,24 @@
-; RUN: opt < %s -instcombine -S | grep shl
-; END.
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i1 @test0(i39 %X, i39 %A) {
+; CHECK: @test0
+; CHECK: %[[V1:.*]] = shl i39 1, %A
+; CHECK: %[[V2:.*]] = and i39 %[[V1]], %X
+; CHECK: %[[V3:.*]] = icmp ne i39 %[[V2]], 0
+; CHECK: ret i1 %[[V3]]
+
 	%B = lshr i39 %X, %A
 	%D = trunc i39 %B to i1
 	ret i1 %D
 }
 
 define i1 @test1(i799 %X, i799 %A) {
+; CHECK: @test1
+; CHECK: %[[V1:.*]] = shl i799 1, %A
+; CHECK: %[[V2:.*]] = and i799 %[[V1]], %X
+; CHECK: %[[V3:.*]] = icmp ne i799 %[[V2]], 0
+; CHECK: ret i1 %[[V3]]
+
 	%B = lshr i799 %X, %A
 	%D = trunc i799 %B to i1
 	ret i1 %D
diff --git a/test/Transforms/InstCombine/bitcount.ll b/test/Transforms/InstCombine/bitcount.ll
index f75ca2d..a6fd837 100644
--- a/test/Transforms/InstCombine/bitcount.ll
+++ b/test/Transforms/InstCombine/bitcount.ll
@@ -4,13 +4,13 @@
 ; RUN:   grep -v declare | not grep llvm.ct
 
 declare i31 @llvm.ctpop.i31(i31 %val) 
-declare i32 @llvm.cttz.i32(i32 %val) 
-declare i33 @llvm.ctlz.i33(i33 %val) 
+declare i32 @llvm.cttz.i32(i32 %val, i1) 
+declare i33 @llvm.ctlz.i33(i33 %val, i1) 
 
 define i32 @test(i32 %A) {
   %c1 = call i31 @llvm.ctpop.i31(i31 12415124)
-  %c2 = call i32 @llvm.cttz.i32(i32 87359874)
-  %c3 = call i33 @llvm.ctlz.i33(i33 87359874)
+  %c2 = call i32 @llvm.cttz.i32(i32 87359874, i1 true)
+  %c3 = call i33 @llvm.ctlz.i33(i33 87359874, i1 true)
   %t1 = zext i31 %c1 to i32
   %t3 = trunc i33 %c3 to i32
   %r1 = add i32 %t1, %c2
diff --git a/test/Transforms/InstCombine/constant-fold-gep.ll b/test/Transforms/InstCombine/constant-fold-gep.ll
index c679226..e5b16ea 100644
--- a/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -9,7 +9,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 @Y = internal global [3 x %struct.X] zeroinitializer
 
 define void @frob() {
-; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 16
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 4
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
@@ -33,7 +33,7 @@ define void @frob() {
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 10), align 4
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 2), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 11), align 4
-; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 16
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 12), align 4
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 1), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 13), align 4
@@ -47,7 +47,7 @@ define void @frob() {
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 8
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 0), align 8
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 18), align 8
-; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 8
+; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 16
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 36), align 8
 ; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 1), align 8
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 19), align 8
diff --git a/test/Transforms/InstCombine/crash.ll b/test/Transforms/InstCombine/crash.ll
index 54a77aa..d5af532 100644
--- a/test/Transforms/InstCombine/crash.ll
+++ b/test/Transforms/InstCombine/crash.ll
@@ -165,20 +165,19 @@ entry:
   br i1 %tobool, label %cond.end, label %cond.false
 
 terminate.handler:                                ; preds = %ehcleanup
-  %exc = call i8* @llvm.eh.exception()            ; <i8*> [#uses=1]
-  %0 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1) ; <i32> [#uses=0]
+  %exc = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           cleanup
   call void @_ZSt9terminatev() noreturn nounwind
   unreachable
 
 ehcleanup:                                        ; preds = %cond.false
-  %exc1 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=2]
-  %1 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc1, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null) ; <i32> [#uses=0]
+  %exc1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           catch i8* null
   invoke void @_ZN6UStackD1Ev(%class.UStack* %breaks)
           to label %cont unwind label %terminate.handler
 
 cont:                                             ; preds = %ehcleanup
-  call void @_Unwind_Resume_or_Rethrow(i8* %exc1)
-  unreachable
+  resume { i8*, i32 } %exc1
 
 cond.false:                                       ; preds = %entry
   %tmp4 = getelementptr inbounds %class.RuleBasedBreakIterator* %this, i32 0, i32 0 ; <i64 ()**> [#uses=1]
@@ -199,10 +198,6 @@ declare void @_ZN6UStackD1Ev(%class.UStack*)
 
 declare i32 @__gxx_personality_v0(...)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_ZSt9terminatev()
 
 declare void @_Unwind_Resume_or_Rethrow(i8*)
diff --git a/test/Transforms/InstCombine/dg.exp b/test/Transforms/InstCombine/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/InstCombine/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/InstCombine/extractvalue.ll b/test/Transforms/InstCombine/extractvalue.ll
index cf36b8f..5e4c677 100644
--- a/test/Transforms/InstCombine/extractvalue.ll
+++ b/test/Transforms/InstCombine/extractvalue.ll
@@ -88,7 +88,7 @@ define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) {
 ; CHECK-NEXT: ret
 define i32 @nogep-multiuse({i32, i32}* %pair) {
         ; The load should be left unchanged since both parts are needed.
-        %L = volatile load {i32, i32}* %pair
+        %L = load volatile {i32, i32}* %pair
         %LHS = extractvalue {i32, i32} %L, 0
         %RHS = extractvalue {i32, i32} %L, 1
         %R = add i32 %LHS, %RHS
@@ -100,8 +100,8 @@ define i32 @nogep-multiuse({i32, i32}* %pair) {
 ; CHECK-NEXT: extractvalue
 ; CHECK-NEXT: ret
 define i32 @nogep-volatile({i32, i32}* %pair) {
-        ; The volatile load should be left unchanged.
-        %L = volatile load {i32, i32}* %pair
+        ; The load volatile should be left unchanged.
+        %L = load volatile {i32, i32}* %pair
         %E = extractvalue {i32, i32} %L, 1
         ret i32 %E
 }
diff --git a/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll b/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll
new file mode 100644
index 0000000..bd92b4a
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll
@@ -0,0 +1,17 @@
+; RUN: opt -instcombine -S -disable-simplify-libcalls < %s | FileCheck %s
+; rdar://10466410
+
+; Instcombine tries to fold (fptrunc (sqrt (fpext x))) -> (sqrtf x), but this
+; shouldn't fold when sqrtf isn't available.
+define float @foo(float %f) uwtable ssp {
+entry:
+; CHECK: %conv = fpext float %f to double
+; CHECK: %call = tail call double @sqrt(double %conv)
+; CHECK: %conv1 = fptrunc double %call to float
+  %conv = fpext float %f to double
+  %call = tail call double @sqrt(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+declare double @sqrt(double)
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 77ca62c..a9ae221 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -121,8 +121,8 @@ define i1 @test12(i1 %A) {
   %B = icmp ne i64 bitcast (<2 x i32> <i32 1, i32 -1> to i64), %S
   ret i1 %B
 ; CHECK: @test12
-; CHECK-NEXT: %B = select i1
-; CHECK-NEXT: ret i1 %B
+; CHECK-NEXT: = xor i1 %A, true
+; CHECK-NEXT: ret i1
 }
 
 ; PR6481
@@ -524,7 +524,7 @@ define i1 @test53(i32 %a, i32 %b) nounwind {
 
 ; CHECK: @test54
 ; CHECK-NEXT: %and = and i8 %a, -64
-; CHECK-NEXT icmp eq i8 %and, -128
+; CHECK-NEXT: icmp eq i8 %and, -128
 define i1 @test54(i8 %a) nounwind {
   %ext = zext i8 %a to i32
   %and = and i32 %ext, 192
@@ -559,3 +559,81 @@ define i1 @test57(i32 %a) {
   call void @foo(i32 %and)
   ret i1 %cmp
 }
+
+; rdar://problem/10482509
+; CHECK: @cmpabs1
+; CHECK-NEXT: icmp ne
+define zeroext i1 @cmpabs1(i64 %val) {
+  %sub = sub nsw i64 0, %val
+  %cmp = icmp slt i64 %val, 0
+  %sub.val = select i1 %cmp, i64 %sub, i64 %val
+  %tobool = icmp ne i64 %sub.val, 0
+  ret i1 %tobool
+}
+
+; CHECK: @cmpabs2
+; CHECK-NEXT: icmp ne
+define zeroext i1 @cmpabs2(i64 %val) {
+  %sub = sub nsw i64 0, %val
+  %cmp = icmp slt i64 %val, 0
+  %sub.val = select i1 %cmp, i64 %val, i64 %sub
+  %tobool = icmp ne i64 %sub.val, 0
+  ret i1 %tobool
+}
+
+; CHECK: @test58
+; CHECK-NEXT: call i32 @test58_d(i64 36029346783166592)
+define void @test58() nounwind {
+  %cast = bitcast <1 x i64> <i64 36029346783166592> to i64
+  %call = call i32 @test58_d( i64 %cast) nounwind
+  ret void
+}
+declare i32 @test58_d(i64)
+
+define i1 @test59(i8* %foo) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 2
+  %gep2 = getelementptr inbounds i8* %foo, i64 10
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  %use = ptrtoint i8* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use) nounwind
+  ret i1 %cmp
+; CHECK: @test59
+; CHECK: ret i1 true
+}
+
+define i1 @test60(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; CHECK: @test60
+; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
+; CHECK-NEXT: icmp slt i64 %gep1.idx, %j
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test61(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr i32* %bit, i64 %i
+  %gep2 = getelementptr  i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+; CHECK: @test61
+; CHECK: icmp ult i8* %cast1, %gep2
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test62(i8* %a) {
+  %arrayidx1 = getelementptr inbounds i8* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8* %a, i64 10
+  %cmp = icmp slt i8* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+; CHECK: @test62
+; CHECK-NEXT: ret i1 true
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index f033e51..382e6b3 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -5,10 +5,10 @@
 declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
 declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
 declare double @llvm.powi.f64(double, i32) nounwind readonly
-declare i32 @llvm.cttz.i32(i32) nounwind readnone
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
-declare i8 @llvm.ctlz.i8(i8) nounwind readnone
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
 
 define i8 @uaddtest1(i8 %A, i8 %B) {
   %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
@@ -142,13 +142,13 @@ define i32 @umultest4(i32 %n) nounwind {
 define void @powi(double %V, double *%P) {
 entry:
   %A = tail call double @llvm.powi.f64(double %V, i32 -1) nounwind
-  volatile store double %A, double* %P
+  store volatile double %A, double* %P
 
   %B = tail call double @llvm.powi.f64(double %V, i32 0) nounwind
-  volatile store double %B, double* %P
+  store volatile double %B, double* %P
 
   %C = tail call double @llvm.powi.f64(double %V, i32 1) nounwind
-  volatile store double %C, double* %P
+  store volatile double %C, double* %P
   ret void
 ; CHECK: @powi
 ; CHECK: %A = fdiv double 1.0{{.*}}, %V
@@ -161,7 +161,7 @@ define i32 @cttz(i32 %a) {
 entry:
   %or = or i32 %a, 8
   %and = and i32 %or, -8
-  %count = tail call i32 @llvm.cttz.i32(i32 %and) nounwind readnone
+  %count = tail call i32 @llvm.cttz.i32(i32 %and, i1 true) nounwind readnone
   ret i32 %count
 ; CHECK: @cttz
 ; CHECK-NEXT: entry:
@@ -172,7 +172,7 @@ define i8 @ctlz(i8 %a) {
 entry:
   %or = or i8 %a, 32
   %and = and i8 %or, 63
-  %count = tail call i8 @llvm.ctlz.i8(i8 %and) nounwind readnone
+  %count = tail call i8 @llvm.ctlz.i8(i8 %and, i1 true) nounwind readnone
   ret i8 %count
 ; CHECK: @ctlz
 ; CHECK-NEXT: entry:
@@ -181,15 +181,15 @@ entry:
 
 define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
 entry:
-  %lz = tail call i32 @llvm.ctlz.i32(i32 %a) nounwind readnone
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone
   %lz.cmp = icmp eq i32 %lz, 32
-  volatile store i1 %lz.cmp, i1* %c
-  %tz = tail call i32 @llvm.cttz.i32(i32 %a) nounwind readnone
+  store volatile i1 %lz.cmp, i1* %c
+  %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 false) nounwind readnone
   %tz.cmp = icmp ne i32 %tz, 32
-  volatile store i1 %tz.cmp, i1* %c
+  store volatile i1 %tz.cmp, i1* %c
   %pop = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone
   %pop.cmp = icmp eq i32 %pop, 0
-  volatile store i1 %pop.cmp, i1* %c
+  store volatile i1 %pop.cmp, i1* %c
   ret void
 ; CHECK: @cmp.simplify
 ; CHECK-NEXT: entry:
@@ -201,16 +201,22 @@ entry:
 ; CHECK-NEXT: store volatile i1 %pop.cmp, i1* %c
 }
 
-
-define i32 @cttz_simplify1(i32 %x) nounwind readnone ssp {
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x)    ; <i32> [#uses=1]
-  %shr3 = lshr i32 %tmp1, 5                       ; <i32> [#uses=1]
+define i32 @cttz_simplify1a(i32 %x) nounwind readnone ssp {
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %shr3 = lshr i32 %tmp1, 5
   ret i32 %shr3
-  
-; CHECK: @cttz_simplify1
+
+; CHECK: @cttz_simplify1a
 ; CHECK: icmp eq i32 %x, 0
-; CHECK-NEXT: zext i1 
+; CHECK-NEXT: zext i1
 ; CHECK-NEXT: ret i32
 }
 
+define i32 @cttz_simplify1b(i32 %x) nounwind readnone ssp {
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %shr3 = lshr i32 %tmp1, 5
+  ret i32 %shr3
 
+; CHECK: @cttz_simplify1b
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/InstCombine/lit.local.cfg b/test/Transforms/InstCombine/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/InstCombine/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 53a5643..edb5305 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -1,116 +1,184 @@
 ; This test makes sure that mul instructions are properly eliminated.
-; RUN: opt < %s -instcombine -S | not grep mul
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
+; CHECK: @test1
         %B = mul i32 %A, 1              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: ret i32 %A
 }
 
 define i32 @test2(i32 %A) {
+; CHECK: @test2
         ; Should convert to an add instruction
         %B = mul i32 %A, 2              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: shl i32 %A, 1
 }
 
 define i32 @test3(i32 %A) {
+; CHECK: @test3
         ; This should disappear entirely
         %B = mul i32 %A, 0              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: ret i32 0
 }
 
 define double @test4(double %A) {
+; CHECK: @test4
         ; This is safe for FP
         %B = fmul double 1.000000e+00, %A                ; <double> [#uses=1]
         ret double %B
+; CHECK: ret double %A
 }
 
 define i32 @test5(i32 %A) {
+; CHECK: @test5
         %B = mul i32 %A, 8              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: shl i32 %A, 3
 }
 
 define i8 @test6(i8 %A) {
+; CHECK: @test6
         %B = mul i8 %A, 8               ; <i8> [#uses=1]
         %C = mul i8 %B, 8               ; <i8> [#uses=1]
         ret i8 %C
+; CHECK: shl i8 %A, 6
 }
 
 define i32 @test7(i32 %i) {
+; CHECK: @test7
         %tmp = mul i32 %i, -1           ; <i32> [#uses=1]
         ret i32 %tmp
+; CHECK: sub i32 0, %i
 }
 
 define i64 @test8(i64 %i) {
-       ; tmp = sub 0, %i
+; CHECK: @test8
         %j = mul i64 %i, -1             ; <i64> [#uses=1]
         ret i64 %j
+; CHECK: sub i64 0, %i
 }
 
 define i32 @test9(i32 %i) {
-        ; %j = sub 0, %i
+; CHECK: @test9
         %j = mul i32 %i, -1             ; <i32> [#uses=1]
         ret i32 %j
+; CHECJ: sub i32 0, %i
 }
 
 define i32 @test10(i32 %a, i32 %b) {
+; CHECK: @test10
         %c = icmp slt i32 %a, 0         ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
        ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST10:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST10]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
 define i32 @test11(i32 %a, i32 %b) {
+; CHECK: @test11
         %c = icmp sle i32 %a, -1                ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST11:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST11]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
-define i32 @test12(i8 %a, i32 %b) {
-        %c = icmp ugt i8 %a, 127                ; <i1> [#uses=1]
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK: @test12
+        %c = icmp ugt i32 %a, 2147483647                ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
-        ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST12:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST12]], %b
+; CHECK-NEXT: ret i32 %e
+
 }
 
 ; PR2642
 define internal void @test13(<4 x float>*) {
+; CHECK: @test13
 	load <4 x float>* %0, align 1
 	fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
 	store <4 x float> %3, <4 x float>* %0, align 1
 	ret void
+; CHECK-NEXT: ret void
 }
 
 define <16 x i8> @test14(<16 x i8> %a) {
+; CHECK: @test14
         %b = mul <16 x i8> %a, zeroinitializer
         ret <16 x i8> %b
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
 }
 
 ; rdar://7293527
 define i32 @test15(i32 %A, i32 %B) {
+; CHECK: @test15
 entry:
   %shl = shl i32 1, %B
   %m = mul i32 %shl, %A
   ret i32 %m
+; CHECK: shl i32 %A, %B
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
 define i32 @test16(i32 %b, i1 %c) {
+; CHECK: @test16
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST16:%.*]] = sext i1 %c to i32
+; CHECK-NEXT: %e = and i32 [[TEST16]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
 define i32 @test17(i32 %a, i32 %b) {
+; CHECK: @test17
   %a.lobit = lshr i32 %a, 31
   %e = mul i32 %a.lobit, %b
   ret i32 %e
+; CHECK: [[TEST17:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST17]], %b
+; CHECK-NEXT: ret i32 %e
+}
+
+define i32 @test18(i32 %A, i32 %B) {
+; CHECK: @test18
+  %C = and i32 %A, 1
+  %D = and i32 %B, 1
+
+  %E = mul i32 %C, %D
+  %F = and i32 %E, 16
+  ret i32 %F
+; CHECK-NEXT: ret i32 0
 }
 
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+declare void @use(i1)
 
+define i32 @test19(i32 %A, i32 %B) {
+; CHECK: @test19
+  %C = and i32 %A, 1
+  %D = and i32 %B, 1
 
+; It would be nice if we also started proving that this doesn't overflow.
+  %E = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %C, i32 %D)
+  %F = extractvalue {i32, i1} %E, 0
+  %G = extractvalue {i32, i1} %E, 1
+  call void @use(i1 %G)
+  %H = and i32 %F, 16
+  ret i32 %H
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll
index 9123283..81ceef8 100644
--- a/test/Transforms/InstCombine/overflow.ll
+++ b/test/Transforms/InstCombine/overflow.ll
@@ -130,4 +130,26 @@ entry:
   ret i64 %Q
 }
 
+; CHECK: @test8
+; PR11438
+; This is @test1, but the operands are not sign-extended.  Make sure
+; we don't transform this case.
+define i32 @test8(i64 %a, i64 %b) nounwind ssp {
+entry:
+; CHECK-NOT: llvm.sadd
+; CHECK: add i64 %a, %b
+; CHECK-NOT: llvm.sadd
+; CHECK: ret
+  %add = add i64 %a, %b
+  %add.off = add i64 %add, 2147483648
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
 
+if.end:
+  %conv9 = trunc i64 %add to i32
+  ret i32 %conv9
+}
diff --git a/test/Transforms/InstCombine/pr12251.ll b/test/Transforms/InstCombine/pr12251.ll
new file mode 100644
index 0000000..74a41eb
--- /dev/null
+++ b/test/Transforms/InstCombine/pr12251.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define zeroext i1 @_Z3fooPb(i8* nocapture %x) {
+entry:
+  %a = load i8* %x, align 1, !range !0
+  %b = and i8 %a, 1
+  %tobool = icmp ne i8 %b, 0
+  ret i1 %tobool
+}
+
+; CHECK: %a = load i8* %x, align 1, !range !0
+; CHECK-NEXT: %tobool = icmp ne i8 %a, 0
+; CHECK-NEXT: ret i1 %tobool
+
+!0 = metadata !{i8 0, i8 2}
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 4661561..4baae26 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -809,3 +809,23 @@ define i32 @test61(i32* %ptr) {
 ; CHECK: @test61
 ; CHECK: ret i32 10
 }
+
+define i1 @test62(i1 %A, i1 %B) {
+        %not = xor i1 %A, true
+        %C = select i1 %A, i1 %not, i1 %B             
+        ret i1 %C
+; CHECK: @test62
+; CHECK: %not = xor i1 %A, true
+; CHECK: %C = and i1 %not, %B
+; CHECK: ret i1 %C
+}
+
+define i1 @test63(i1 %A, i1 %B) {
+        %not = xor i1 %A, true
+        %C = select i1 %A, i1 %B, i1 %not         
+        ret i1 %C
+; CHECK: @test63
+; CHECK: %not = xor i1 %A, true
+; CHECK: %C = or i1 %B, %not
+; CHECK: ret i1 %C
+}
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index f49a2ef..f198797 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -3,8 +3,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 declare i32 @llvm.ctpop.i32(i32)
-declare i32 @llvm.ctlz.i32(i32)
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i64 @test1(i32 %x) {
   %t = call i32 @llvm.ctpop.i32(i32 %x)
@@ -16,7 +16,7 @@ define i64 @test1(i32 %x) {
 }
 
 define i64 @test2(i32 %x) {
-  %t = call i32 @llvm.ctlz.i32(i32 %x)
+  %t = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
   %s = sext i32 %t to i64
   ret i64 %s
 
@@ -25,7 +25,7 @@ define i64 @test2(i32 %x) {
 }
 
 define i64 @test3(i32 %x) {
-  %t = call i32 @llvm.cttz.i32(i32 %x)
+  %t = call i32 @llvm.cttz.i32(i32 %x, i1 true)
   %s = sext i32 %t to i64
   ret i64 %s
 
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 132d51a..52310e3 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -542,3 +542,75 @@ define i32 @test45(i32 %a) nounwind {
 ; CHECK-NEXT: %y = lshr i32 %a, 5
 ; CHECK-NEXT: ret i32 %y
 }
+
+define i32 @test46(i32 %a) {
+  %y = ashr exact i32 %a, 3
+  %z = shl i32 %y, 1
+  ret i32 %z
+; CHECK: @test46
+; CHECK-NEXT: %z = ashr exact i32 %a, 2
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test47(i32 %a) {
+  %y = lshr exact i32 %a, 3
+  %z = shl i32 %y, 1
+  ret i32 %z
+; CHECK: @test47
+; CHECK-NEXT: %z = lshr exact i32 %a, 2
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test48(i32 %x) {
+  %A = lshr exact i32 %x, 1
+  %B = shl i32 %A, 3
+  ret i32 %B
+; CHECK: @test48
+; CHECK-NEXT: %B = shl i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test49(i32 %x) {
+  %A = ashr exact i32 %x, 1
+  %B = shl i32 %A, 3
+  ret i32 %B
+; CHECK: @test49
+; CHECK-NEXT: %B = shl i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test50(i32 %x) {
+  %A = shl nsw i32 %x, 1
+  %B = ashr i32 %A, 3
+  ret i32 %B
+; CHECK: @test50
+; CHECK-NEXT: %B = ashr i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test51(i32 %x) {
+  %A = shl nuw i32 %x, 1
+  %B = lshr i32 %A, 3
+  ret i32 %B
+; CHECK: @test51
+; CHECK-NEXT: %B = lshr i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test52(i32 %x) {
+  %A = shl nsw i32 %x, 3
+  %B = ashr i32 %A, 1
+  ret i32 %B
+; CHECK: @test52
+; CHECK-NEXT: %B = shl nsw i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test53(i32 %x) {
+  %A = shl nuw i32 %x, 3
+  %B = lshr i32 %A, 1
+  ret i32 %B
+; CHECK: @test53
+; CHECK-NEXT: %B = shl nuw i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
diff --git a/test/Transforms/InstCombine/sign-test-and-or.ll b/test/Transforms/InstCombine/sign-test-and-or.ll
index 47f5f30..a6066d8 100644
--- a/test/Transforms/InstCombine/sign-test-and-or.ll
+++ b/test/Transforms/InstCombine/sign-test-and-or.ll
@@ -77,3 +77,103 @@ if.then:
 if.end:
   ret void
 }
+
+define void @test5(i32 %a) nounwind {
+  %and = and i32 %a, 134217728
+  %1 = icmp eq i32 %and, 0
+  %2 = icmp sgt i32 %a, -1
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test5
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test6(i32 %a) nounwind {
+  %1 = icmp sgt i32 %a, -1
+  %and = and i32 %a, 134217728
+  %2 = icmp eq i32 %and, 0
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test6
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test7(i32 %a) nounwind {
+  %and = and i32 %a, 134217728
+  %1 = icmp ne i32 %and, 0
+  %2 = icmp slt i32 %a, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test7
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.end, label %if.the
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test8(i32 %a) nounwind {
+  %1 = icmp slt i32 %a, 0
+  %and = and i32 %a, 134217728
+  %2 = icmp ne i32 %and, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test8
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.end, label %if.the
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test9(i32 %a) nounwind {
+  %1 = and i32 %a, 1073741824
+  %2 = icmp ne i32 %1, 0
+  %3 = icmp sgt i32 %a, -1
+  %or.cond = and i1 %2, %3
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test9
+; CHECK-NEXT: %1 = and i32 %a, -1073741824
+; CHECK-NEXT: %2 = icmp eq i32 %1, 1073741824
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
new file mode 100644
index 0000000..279e4ac
--- /dev/null
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -0,0 +1,37 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @test1(i32 %x) nounwind {
+  %and = and i32 %x, 31
+  %sub = sub i32 63, %and
+  ret i32 %sub
+
+; CHECK: @test1
+; CHECK-NEXT: and i32 %x, 31
+; CHECK-NEXT: xor i32 %and, 63
+; CHECK-NEXT: ret
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+
+define i32 @test2(i32 %x) nounwind {
+  %count = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) nounwind readnone
+  %sub = sub i32 31, %count
+  ret i32 %sub
+
+; CHECK: @test2
+; CHECK-NEXT: ctlz
+; CHECK-NEXT: xor i32 %count, 31
+; CHECK-NEXT: ret
+}
+
+define i32 @test3(i32 %x) nounwind {
+  %and = and i32 %x, 31
+  %sub = xor i32 31, %and
+  %add = add i32 %sub, 42
+  ret i32 %add
+
+; CHECK: @test3
+; CHECK-NEXT: and i32 %x, 31
+; CHECK-NEXT: sub i32 73, %and
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 37de328..b71ec8c 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -301,3 +301,29 @@ define i32 @test28(i32 %x, i32 %y, i32 %z) {
 ; CHECK-NEXT: add i32
 ; CHECK-NEXT: ret i32
 }
+
+define i64 @test29(i8* %foo, i64 %i, i64 %j) {
+  %gep1 = getelementptr inbounds i8* %foo, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = ptrtoint i8* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+; CHECK: @test29
+; CHECK-NEXT: sub i64 %i, %j
+; CHECK-NEXT: ret i64
+}
+
+define i64 @test30(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = ptrtoint i32* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+; CHECK: @test30
+; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
+; CHECK-NEXT: sub i64 %gep1.idx, %j
+; CHECK-NEXT: ret i64
+}
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 896cb88..8f78c2e 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -98,6 +98,17 @@ define <4 x i8> @test9a(<16 x i8> %tmp6) nounwind {
 	ret <4 x i8> %tmp9
 }
 
+; Test fold of two shuffles where the first shuffle vectors inputs are a
+; different length then the second.
+define <4 x i8> @test9b(<4 x i8> %tmp6, <4 x i8> %tmp7) nounwind {
+; CHECK: @test9
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+  %tmp1 = shufflevector <4 x i8> %tmp6, <4 x i8> %tmp7, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>		; <<4 x i8>> [#uses=1]
+  %tmp9 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>		; <<4 x i8>> [#uses=1]
+  ret <4 x i8> %tmp9
+}
+
 ; Redundant vector splats should be removed.  Radar 8597790.
 define <4 x i32> @test10(<4 x i32> %tmp5) nounwind {
 ; CHECK: @test10
@@ -107,3 +118,38 @@ define <4 x i32> @test10(<4 x i32> %tmp5) nounwind {
   %tmp7 = shufflevector <4 x i32> %tmp6, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %tmp7
 }
+
+; Test fold of two shuffles where the two shufflevector inputs's op1 are
+; the same
+define <8 x i8> @test11(<16 x i8> %tmp6) nounwind {
+; CHECK: @test11
+; CHECK-NEXT: shufflevector <16 x i8> %tmp6, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret
+  %tmp1 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>		; <<4 x i8>> [#uses=1]
+  %tmp2 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>		; <<4 x i8>> [#uses=1]
+  %tmp3 = shufflevector <4 x i8> %tmp1, <4 x i8> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>		; <<8 x i8>> [#uses=1]
+  ret <8 x i8> %tmp3
+}
+
+; Test fold of two shuffles where the first shufflevector's inputs are
+; the same as the second
+define <8 x i8> @test12(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
+; CHECK: @test12
+; CHECK-NEXT: shufflevector <8 x i8> %tmp6, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>
+; CHECK-NEXT: ret
+  %tmp1 = shufflevector <8 x i8> %tmp6, <8 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 undef, i32 7>	; <<8 x i8>> [#uses=1]
+  %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>		; <<8 x i8>> [#uses=1]
+  ret <8 x i8> %tmp3
+}
+
+; Test fold of two shuffles where the first shufflevector's inputs are
+; the same as the second
+define <8 x i8> @test12a(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
+; CHECK: @test12a
+; CHECK-NEXT: shufflevector <8 x i8> %tmp2, <8 x i8> %tmp6, <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: ret
+  %tmp1 = shufflevector <8 x i8> %tmp6, <8 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 undef, i32 7>	; <<8 x i8>> [#uses=1]
+  %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp1, <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>		; <<8 x i8>> [#uses=1]
+  ret <8 x i8> %tmp3
+}
+
diff --git a/test/Transforms/InstCombine/vector_gep1.ll b/test/Transforms/InstCombine/vector_gep1.ll
new file mode 100644
index 0000000..6523622
--- /dev/null
+++ b/test/Transforms/InstCombine/vector_gep1.ll
@@ -0,0 +1,37 @@
+; RUN: opt -instcombine %s -disable-output
+; RUN: opt -instsimplify %s -disable-output
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@G1 = global i8 zeroinitializer
+
+define <2 x i1> @test(<2 x i8*> %a, <2 x i8*> %b) {
+   %A = icmp eq <2 x i8*> %a, %b
+   ret <2 x i1> %A
+}
+
+define <2 x i1> @test2(<2 x i8*> %a) {
+  %A = inttoptr <2 x i32> <i32 1, i32 2> to <2 x i8*>
+  %B = icmp ult <2 x i8*> %A, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define <2 x i1> @test3(<2 x i8*> %a) {
+  %g = getelementptr <2 x i8*> %a, <2 x i32> <i32 1, i32 0>
+  %B = icmp ult <2 x i8*> %g, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define <1 x i1> @test4(<1 x i8*> %a) {
+  %g = getelementptr <1 x i8*> %a, <1 x i32> <i32 1>
+  %B = icmp ult <1 x i8*> %g, zeroinitializer
+  ret <1 x i1> %B
+}
+
+define <2 x i1> @test5(<2 x i8*> %a) {
+  %w = getelementptr <2 x i8*> %a, <2 x i32> zeroinitializer
+  %e = getelementptr <2 x i8*> %w, <2 x i32> <i32 5, i32 9>
+  %g = getelementptr <2 x i8*> %e, <2 x i32> <i32 1, i32 0>
+  %B = icmp ult <2 x i8*> %g, zeroinitializer
+  ret <2 x i1> %B
+}
diff --git a/test/Transforms/InstCombine/volatile_store.ll b/test/Transforms/InstCombine/volatile_store.ll
index 0518e5a..2256678 100644
--- a/test/Transforms/InstCombine/volatile_store.ll
+++ b/test/Transforms/InstCombine/volatile_store.ll
@@ -5,8 +5,8 @@
 
 define void @self_assign_1() {
 entry:
-	%tmp = volatile load i32* @x		; <i32> [#uses=1]
-	volatile store i32 %tmp, i32* @x
+	%tmp = load volatile i32* @x		; <i32> [#uses=1]
+	store volatile i32 %tmp, i32* @x
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/Transforms/InstSimplify/2011-10-27-BinOpCrash.ll b/test/Transforms/InstSimplify/2011-10-27-BinOpCrash.ll
new file mode 100644
index 0000000..a10081a
--- /dev/null
+++ b/test/Transforms/InstSimplify/2011-10-27-BinOpCrash.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine
+
+@_ZN11xercesc_2_5L11gDigitCharsE = external constant [32 x i16], align 2
+@_ZN11xercesc_2_5L10gBaseCharsE = external constant [354 x i16], align 2
+@_ZN11xercesc_2_5L17gIdeographicCharsE = external constant [7 x i16], align 2
+@_ZN11xercesc_2_5L15gCombiningCharsE = external constant [163 x i16], align 2
+
+define i32 @_ZN11xercesc_2_515XMLRangeFactory11buildRangesEv(i32 %x) {
+  %a = add i32 %x, add (i32 add (i32 ashr (i32 add (i32 mul (i32 ptrtoint ([32 x i16]* @_ZN11xercesc_2_5L11gDigitCharsE to i32), i32 -1), i32 ptrtoint (i16* getelementptr inbounds ([32 x i16]* @_ZN11xercesc_2_5L11gDigitCharsE, i32 0, i32 30) to i32)), i32 1), i32 ashr (i32 add (i32 mul (i32 ptrtoint ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE to i32), i32 -1), i32 ptrtoint (i16* getelementptr inbounds ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 4) to i32)), i32 1)), i32 8)
+  %b = add i32 %a, %x
+  ret i32 %b
+}
diff --git a/test/Transforms/InstSimplify/2011-11-23-MaskedBitsCrash.ll b/test/Transforms/InstSimplify/2011-11-23-MaskedBitsCrash.ll
new file mode 100644
index 0000000..6166536
--- /dev/null
+++ b/test/Transforms/InstSimplify/2011-11-23-MaskedBitsCrash.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instsimplify
+
+; The mul can be proved to always overflow (turning a negative value
+; into a positive one) and thus results in undefined behaviour.  At
+; the same time we were deducing from the nsw flag that that mul could
+; be assumed to have a negative value (since if not it has an undefined
+; value, which can be taken to be negative).  We were reporting the mul
+; as being both positive and negative, firing an assertion!
+define i1 @test1(i32 %a) {
+entry:
+  %0 = or i32 %a, 1
+  %1 = shl i32 %0, 31
+  %2 = mul nsw i32 %1, 4
+  %3 = and i32 %2, -4
+  %4 = icmp ne i32 %3, 0
+  ret i1 %4
+}
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
new file mode 100644
index 0000000..33a4d6b
--- /dev/null
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i64 @pow2(i32 %x) {
+; CHECK: @pow2
+  %negx = sub i32 0, %x
+  %x2 = and i32 %x, %negx
+  %e = zext i32 %x2 to i64
+  %nege = sub i64 0, %e
+  %e2 = and i64 %e, %nege
+  ret i64 %e2
+; CHECK: ret i64 %e
+}
+
+define i64 @pow2b(i32 %x) {
+; CHECK: @pow2b
+  %sh = shl i32 2, %x
+  %e = zext i32 %sh to i64
+  %nege = sub i64 0, %e
+  %e2 = and i64 %e, %nege
+  ret i64 %e2
+; CHECK: ret i64 %e
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 2cbd641..ced74bd 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -10,6 +10,161 @@ define i1 @ptrtoint() {
 ; CHECK: ret i1 false
 }
 
+define i1 @bitcast() {
+; CHECK: @bitcast
+  %a = alloca i32
+  %b = alloca i64
+  %x = bitcast i32* %a to i8*
+  %y = bitcast i64* %b to i8*
+  %cmp = icmp eq i8* %x, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep() {
+; CHECK: @gep
+  %a = alloca [3 x i8], align 8
+  %x = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep2() {
+; CHECK: @gep2
+  %a = alloca [3 x i8], align 8
+  %x = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %y = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %cmp = icmp eq i8* %x, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
+; PR11238
+%gept = type { i32, i32 }
+@gepy = global %gept zeroinitializer, align 8
+@gepz = extern_weak global %gept
+
+define i1 @gep3() {
+; CHECK: @gep3
+  %x = alloca %gept, align 8
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* %x, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep4() {
+; CHECK: @gep4
+  %x = alloca %gept, align 8
+  %a = getelementptr %gept* @gepy, i64 0, i32 0
+  %b = getelementptr %gept* @gepy, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep5() {
+; CHECK: @gep5
+  %x = alloca %gept, align 8
+  %a = getelementptr inbounds %gept* %x, i64 0, i32 1
+  %b = getelementptr %gept* @gepy, i64 0, i32 0
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep6(%gept* %x) {
+; Same as @gep3 but potentially null.
+; CHECK: @gep6
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* %x, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep7(%gept* %x) {
+; CHECK: @gep7
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* @gepz, i64 0, i32 0
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK: ret i1 %equal
+}
+
+define i1 @gep8(%gept* %x) {
+; CHECK: @gep8
+  %a = getelementptr %gept* %x, i32 1
+  %b = getelementptr %gept* %x, i32 -1
+  %equal = icmp ugt %gept* %a, %b
+  ret i1 %equal
+; CHECK: ret i1 %equal
+}
+
+define i1 @gep9(i8* %ptr) {
+; CHECK: @gep9
+; CHECK-NOT: ret
+; CHECK: ret i1 true
+
+entry:
+  %first1 = getelementptr inbounds i8* %ptr, i32 0
+  %first2 = getelementptr inbounds i8* %first1, i32 1
+  %first3 = getelementptr inbounds i8* %first2, i32 2
+  %first4 = getelementptr inbounds i8* %first3, i32 4
+  %last1 = getelementptr inbounds i8* %first2, i32 48
+  %last2 = getelementptr inbounds i8* %last1, i32 8
+  %last3 = getelementptr inbounds i8* %last2, i32 -4
+  %last4 = getelementptr inbounds i8* %last3, i32 -4
+  %first.int = ptrtoint i8* %first4 to i32
+  %last.int = ptrtoint i8* %last4 to i32
+  %cmp = icmp ne i32 %last.int, %first.int
+  ret i1 %cmp
+}
+
+define i1 @gep10(i8* %ptr) {
+; CHECK: @gep10
+; CHECK-NOT: ret
+; CHECK: ret i1 true
+
+entry:
+  %first1 = getelementptr inbounds i8* %ptr, i32 -2
+  %first2 = getelementptr inbounds i8* %first1, i32 44
+  %last1 = getelementptr inbounds i8* %ptr, i32 48
+  %last2 = getelementptr inbounds i8* %last1, i32 -6
+  %first.int = ptrtoint i8* %first2 to i32
+  %last.int = ptrtoint i8* %last2 to i32
+  %cmp = icmp eq i32 %last.int, %first.int
+  ret i1 %cmp
+}
+
+define i1 @gep11(i8* %ptr) {
+; CHECK: @gep11
+; CHECK-NOT: ret
+; CHECK: ret i1 true
+
+entry:
+  %first1 = getelementptr inbounds i8* %ptr, i32 -2
+  %last1 = getelementptr inbounds i8* %ptr, i32 48
+  %last2 = getelementptr inbounds i8* %last1, i32 -6
+  %cmp = icmp ult i8* %first1, %last2
+  ret i1 %cmp
+}
+
+define i1 @gep12(i8* %ptr) {
+; CHECK: @gep12
+; CHECK-NOT: ret
+; CHECK: ret i1 %cmp
+
+entry:
+  %first1 = getelementptr inbounds i8* %ptr, i32 -2
+  %last1 = getelementptr inbounds i8* %ptr, i32 48
+  %last2 = getelementptr inbounds i8* %last1, i32 -6
+  %cmp = icmp slt i8* %first1, %last2
+  ret i1 %cmp
+}
+
 define i1 @zext(i32 %x) {
 ; CHECK: @zext
   %e1 = zext i32 %x to i64
@@ -204,6 +359,24 @@ define i1 @select4(i1 %cond) {
 ; CHECK: ret i1 %cond
 }
 
+define i1 @select5(i32 %x) {
+; CHECK: @select5
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c, i32 1, i32 %x
+  %c2 = icmp eq i32 %s, 0
+  ret i1 %c2
+; CHECK: ret i1 false
+}
+
+define i1 @select6(i32 %x) {
+; CHECK: @select6
+  %c = icmp sgt i32 %x, 0
+  %s = select i1 %c, i32 %x, i32 4
+  %c2 = icmp eq i32 %s, 0
+  ret i1 %c2
+; CHECK: ret i1 %c2
+}
+
 define i1 @urem1(i32 %X, i32 %Y) {
 ; CHECK: @urem1
   %A = urem i32 %X, %Y
@@ -300,6 +473,40 @@ define i1 @udiv2(i32 %X, i32 %Y, i32 %Z) {
 ; CHECK: ret i1 true
 }
 
+define i1 @udiv3(i32 %X, i32 %Y) {
+; CHECK: @udiv3
+  %A = udiv i32 %X, %Y
+  %C = icmp ugt i32 %A, %X
+  ret i1 %C
+; CHECK: ret i1 false
+}
+
+define i1 @udiv4(i32 %X, i32 %Y) {
+; CHECK: @udiv4
+  %A = udiv i32 %X, %Y
+  %C = icmp ule i32 %A, %X
+  ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @udiv5(i32 %X) {
+; CHECK: @udiv5
+  %A = udiv i32 123, %X
+  %C = icmp ugt i32 %A, 124
+  ret i1 %C
+; CHECK: ret i1 false
+}
+
+; PR11340
+define i1 @udiv6(i32 %X) nounwind {
+; CHECK: @udiv6
+  %A = udiv i32 1, %X
+  %C = icmp eq i32 %A, 0
+  ret i1 %C
+; CHECK: ret i1 %C
+}
+
+
 define i1 @sdiv1(i32 %X) {
 ; CHECK: @sdiv1
   %A = sdiv i32 %X, 1000000
@@ -323,3 +530,71 @@ define i1 @and1(i32 %X) {
   ret i1 %B
 ; CHECK: ret i1 false
 }
+
+define i1 @mul1(i32 %X) {
+; CHECK: @mul1
+; Square of a non-zero number is non-zero if there is no overflow.
+  %Y = or i32 %X, 1
+  %M = mul nuw i32 %Y, %Y
+  %C = icmp eq i32 %M, 0
+  ret i1 %C
+; CHECK: ret i1 false
+}
+
+define i1 @mul2(i32 %X) {
+; CHECK: @mul2
+; Square of a non-zero number is positive if there is no signed overflow.
+  %Y = or i32 %X, 1
+  %M = mul nsw i32 %Y, %Y
+  %C = icmp sgt i32 %M, 0
+  ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @mul3(i32 %X, i32 %Y) {
+; CHECK: @mul3
+; Product of non-negative numbers is non-negative if there is no signed overflow.
+  %XX = mul nsw i32 %X, %X
+  %YY = mul nsw i32 %Y, %Y
+  %M = mul nsw i32 %XX, %YY
+  %C = icmp sge i32 %M, 0
+  ret i1 %C
+; CHECK: ret i1 true
+}
+
+define <2 x i1> @vectorselect1(<2 x i1> %cond) {
+; CHECK: @vectorselect1
+  %invert = xor <2 x i1> %cond, <i1 1, i1 1>
+  %s = select <2 x i1> %invert, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 1, i32 1>
+  %c = icmp ne <2 x i32> %s, <i32 0, i32 0>
+  ret <2 x i1> %c
+; CHECK: ret <2 x i1> %cond
+}
+
+; PR11948
+define <2 x i1> @vectorselectcrash(i32 %arg1) {
+  %tobool40 = icmp ne i32 %arg1, 0
+  %cond43 = select i1 %tobool40, <2 x i16> <i16 -5, i16 66>, <2 x i16> <i16 46, i16 1>
+  %cmp45 = icmp ugt <2 x i16> %cond43, <i16 73, i16 21>
+  ret <2 x i1> %cmp45
+}
+
+; PR12013
+define i1 @alloca_compare(i64 %idx) {
+  %sv = alloca { i32, i32, [124 x i32] }
+  %1 = getelementptr inbounds { i32, i32, [124 x i32] }* %sv, i32 0, i32 2, i64 %idx
+  %2 = icmp eq i32* %1, null
+  ret i1 %2
+  ; CHECK: alloca_compare
+  ; CHECK: ret i1 false
+}
+
+; PR12075
+define i1 @infinite_gep() {
+  ret i1 1
+
+unreachableblock:
+  %X = getelementptr i32 *%X, i32 1
+  %Y = icmp eq i32* %X, null
+  ret i1 %Y
+}
diff --git a/test/Transforms/InstSimplify/dg.exp b/test/Transforms/InstSimplify/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/InstSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/InstSimplify/lit.local.cfg b/test/Transforms/InstSimplify/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/InstSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstSimplify/phi.ll b/test/Transforms/InstSimplify/phi.ll
new file mode 100644
index 0000000..05cd40d
--- /dev/null
+++ b/test/Transforms/InstSimplify/phi.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; PR12189
+define i1 @test1(i32 %x) {
+; CHECK: @test1
+  br i1 true, label %a, label %b
+
+a:
+  %aa = or i32 %x, 10
+  br label %c
+
+b:
+  %bb = or i32 %x, 10
+  br label %c
+
+c:
+  %cc = phi i32 [ %bb, %b ], [%aa, %a ]
+  %d = urem i32 %cc, 2
+  %e = icmp eq i32 %d, 0
+  ret i1 %e
+; CHECK: ret i1 %e
+}
diff --git a/test/Transforms/InstSimplify/ptr_diff.ll b/test/Transforms/InstSimplify/ptr_diff.ll
new file mode 100644
index 0000000..1eb1fd4
--- /dev/null
+++ b/test/Transforms/InstSimplify/ptr_diff.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @ptrdiff1(i8* %ptr) {
+; CHECK: @ptrdiff1
+; CHECK-NEXT: ret i64 42
+
+  %first = getelementptr inbounds i8* %ptr, i32 0
+  %last = getelementptr inbounds i8* %ptr, i32 42
+  %first.int = ptrtoint i8* %first to i64
+  %last.int = ptrtoint i8* %last to i64
+  %diff = sub i64 %last.int, %first.int
+  ret i64 %diff
+}
+
+define i64 @ptrdiff2(i8* %ptr) {
+; CHECK: @ptrdiff2
+; CHECK-NEXT: ret i64 42
+
+  %first1 = getelementptr inbounds i8* %ptr, i32 0
+  %first2 = getelementptr inbounds i8* %first1, i32 1
+  %first3 = getelementptr inbounds i8* %first2, i32 2
+  %first4 = getelementptr inbounds i8* %first3, i32 4
+  %last1 = getelementptr inbounds i8* %first2, i32 48
+  %last2 = getelementptr inbounds i8* %last1, i32 8
+  %last3 = getelementptr inbounds i8* %last2, i32 -4
+  %last4 = getelementptr inbounds i8* %last3, i32 -4
+  %first.int = ptrtoint i8* %first4 to i64
+  %last.int = ptrtoint i8* %last4 to i64
+  %diff = sub i64 %last.int, %first.int
+  ret i64 %diff
+}
+
+define i64 @ptrdiff3(i8* %ptr) {
+; Don't bother with non-inbounds GEPs.
+; CHECK: @ptrdiff3
+; CHECK: getelementptr
+; CHECK: sub
+; CHECK: ret
+
+  %first = getelementptr i8* %ptr, i32 0
+  %last = getelementptr i8* %ptr, i32 42
+  %first.int = ptrtoint i8* %first to i64
+  %last.int = ptrtoint i8* %last to i64
+  %diff = sub i64 %last.int, %first.int
+  ret i64 %diff
+}
diff --git a/test/Transforms/InstSimplify/reassociate.ll b/test/Transforms/InstSimplify/reassociate.ll
index 3c8169e..e659e6f 100644
--- a/test/Transforms/InstSimplify/reassociate.ll
+++ b/test/Transforms/InstSimplify/reassociate.ll
@@ -184,3 +184,12 @@ define i32 @udiv5(i32 %x, i32 %y) {
 ; CHECK: ret i32 %x
 }
 
+define i16 @trunc1(i32 %x) {
+; CHECK: @trunc1
+  %y = add i32 %x, 1
+  %tx = trunc i32 %x to i16
+  %ty = trunc i32 %y to i16
+  %d = sub i16 %ty, %tx
+  ret i16 %d
+; CHECK: ret i16 1
+}
diff --git a/test/Transforms/InstSimplify/undef.ll b/test/Transforms/InstSimplify/undef.ll
index 8134cc8..23cd50f 100644
--- a/test/Transforms/InstSimplify/undef.ll
+++ b/test/Transforms/InstSimplify/undef.ll
@@ -84,6 +84,13 @@ define i64 @test11() {
   ret i64 %r
 }
 
+; @test11b
+; CHECK: ret i64 undef
+define i64 @test11b(i64 %a) {
+  %r = shl i64 %a, undef
+  ret i64 %r
+}
+
 ; @test12
 ; CHECK: ret i64 undef
 define i64 @test12() {
@@ -91,6 +98,13 @@ define i64 @test12() {
   ret i64 %r
 }
 
+; @test12b
+; CHECK: ret i64 undef
+define i64 @test12b(i64 %a) {
+  %r = ashr i64 %a, undef
+  ret i64 %r
+}
+
 ; @test13
 ; CHECK: ret i64 undef
 define i64 @test13() {
@@ -98,6 +112,13 @@ define i64 @test13() {
   ret i64 %r
 }
 
+; @test13b
+; CHECK: ret i64 undef
+define i64 @test13b(i64 %a) {
+  %r = lshr i64 %a, undef
+  ret i64 %r
+}
+
 ; @test14
 ; CHECK: ret i1 undef
 define i1 @test14() {
@@ -125,3 +146,10 @@ define i64 @test17(i64 %a) {
   %r = select i1 undef, i64 undef, i64 %a
   ret i64 %r
 }
+
+; @test18
+; CHECK: ret i64 undef
+define i64 @test18(i64 %a) {
+  %r = call i64 (i64)* undef(i64 %a)
+  ret i64 %r
+}
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
new file mode 100644
index 0000000..f65260e
--- /dev/null
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -0,0 +1,8 @@
+;RUN: opt -instsimplify %s -disable-output
+declare void @helper(<2 x i8*>)
+define void @test(<2 x i8*> %a) {
+  %A = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 0>
+  call void @helper(<2 x i8*> %A)
+  ret void
+}
+
diff --git a/test/Transforms/Internalize/dg.exp b/test/Transforms/Internalize/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Internalize/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Internalize/lit.local.cfg b/test/Transforms/Internalize/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Internalize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll b/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
index 46aaa00..e80bae5 100644
--- a/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
+++ b/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
@@ -15,7 +15,7 @@ for.cond1177:
   br i1 %cmp1179, label %for.cond1177, label %land.rhs1320
 
 land.rhs1320:
-  %tmp1324 = volatile load i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1, !tbaa !0
+  %tmp1324 = load volatile i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1, !tbaa !0
   br label %if.end.i
 
 if.end.i:
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index 2115dd3..b9c0354 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -399,7 +399,7 @@ if.then237:
   br label %lbl_664
 
 lbl_596:                                          ; preds = %lbl_664, %for.end37
-  volatile store i64 undef, i64* undef, align 4
+  store volatile i64 undef, i64* undef, align 4
   br label %for.cond111
 
 for.cond111:                                      ; preds = %safe_sub_func_int64_t_s_s.exit, %lbl_596
diff --git a/test/Transforms/JumpThreading/dg.exp b/test/Transforms/JumpThreading/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/JumpThreading/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/JumpThreading/lit.local.cfg b/test/Transforms/JumpThreading/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/JumpThreading/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/JumpThreading/no-irreducible-loops.ll b/test/Transforms/JumpThreading/no-irreducible-loops.ll
index 7c7fe39..a4914f9 100644
--- a/test/Transforms/JumpThreading/no-irreducible-loops.ll
+++ b/test/Transforms/JumpThreading/no-irreducible-loops.ll
@@ -17,11 +17,11 @@ bb:		; preds = %bb4
 	br i1 %0, label %bb1, label %bb2
 
 bb1:		; preds = %bb
-	volatile store i32 1000, i32* @v1, align 4
+	store volatile i32 1000, i32* @v1, align 4
 	br label %bb3
 
 bb2:		; preds = %bb
-	volatile store i32 1001, i32* @v1, align 4
+	store volatile i32 1001, i32* @v1, align 4
 	br label %bb3
 
 bb3:		; preds = %bb2, %bb1
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index cce23ea..78d36e7 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -1,12 +1,12 @@
 ; RUN: opt < %s -jump-threading -S | FileCheck %s
-; rdar://6402033
 
-; Test that we can thread through the block with the partially redundant load (%2).
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
-define i32 @foo(i32* %P) nounwind {
-; CHECK: foo
+; Test that we can thread through the block with the partially redundant load (%2).
+; rdar://6402033
+define i32 @test1(i32* %P) nounwind {
+; CHECK: @test1
 entry:
 	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
@@ -39,3 +39,43 @@ bb3:		; preds = %bb1
 declare i32 @f1(...)
 
 declare i32 @f2(...)
+
+
+;; Check that we preserve TBAA information.
+; rdar://11039258
+
+define i32 @test2(i32* %P) nounwind {
+; CHECK: @test2
+entry:
+	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1, label %bb
+
+bb:		; preds = %entry
+; CHECK: bb1.thread:
+; CHECK: store{{.*}}, !tbaa !0
+; CHECK: br label %bb3
+	store i32 42, i32* %P, align 4, !tbaa !0
+	br label %bb1
+
+bb1:		; preds = %entry, %bb
+	%res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
+	%2 = load i32* %P, align 4, !tbaa !0
+	%3 = icmp sgt i32 %2, 36
+	br i1 %3, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+	%4 = tail call i32 (...)* @f2() nounwind
+	ret i32 %res.0
+
+bb3:		; preds = %bb1
+; CHECK: bb3:
+; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
+; CHECK: ret i32 %res.01
+	ret i32 %res.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+
diff --git a/test/Transforms/LCSSA/dg.exp b/test/Transforms/LCSSA/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LCSSA/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LCSSA/lit.local.cfg b/test/Transforms/LCSSA/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LCSSA/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/2007-05-22-VolatileSink.ll b/test/Transforms/LICM/2007-05-22-VolatileSink.ll
index 17383c2..4df6ea7 100644
--- a/test/Transforms/LICM/2007-05-22-VolatileSink.ll
+++ b/test/Transforms/LICM/2007-05-22-VolatileSink.ll
@@ -10,7 +10,7 @@ entry:
 	br label %bb6
 
 bb:		; preds = %bb6
-	%tmp2 = volatile load i32* %DataIn		; <i32> [#uses=1]
+	%tmp2 = load volatile i32* %DataIn		; <i32> [#uses=1]
 	%tmp3 = getelementptr [64 x i32]* %buffer, i32 0, i32 %i.0		; <i32*> [#uses=1]
 	store i32 %tmp2, i32* %tmp3
 	%tmp5 = add i32 %i.0, 1		; <i32> [#uses=1]
@@ -28,7 +28,7 @@ bb12:		; preds = %bb22
 	%tmp16 = add i32 %tmp14, %i.1		; <i32> [#uses=1]
 	%tmp17 = getelementptr [64 x i32]* %buffer, i32 0, i32 %tmp16		; <i32*> [#uses=1]
 	%tmp18 = load i32* %tmp17		; <i32> [#uses=1]
-	volatile store i32 %tmp18, i32* %DataOut
+	store volatile i32 %tmp18, i32* %DataOut
 	%tmp21 = add i32 %j.1, 1		; <i32> [#uses=1]
 	br label %bb22
 
diff --git a/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll b/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
index fd114f4..2bbc6ab 100644
--- a/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
+++ b/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
@@ -19,7 +19,7 @@ for.body4.lr.ph:
 
 for.body4:
   %l_612.11 = phi i32* [ undef, %for.body4.lr.ph ], [ %call19, %for.body4 ]
-  %tmp7 = volatile load i16* @g_39, align 2
+  %tmp7 = load volatile i16* @g_39, align 2
   %call = call i32** @func_108(i32*** undef)
   %call19 = call i32* @func_84(i32** %call)
   br i1 false, label %for.body4, label %for.cond.loopexit
diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll
index ff7fa0b..de41d00 100644
--- a/test/Transforms/LICM/crash.ll
+++ b/test/Transforms/LICM/crash.ll
@@ -68,7 +68,7 @@ define void @test4() noreturn nounwind {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
-  volatile store i32* @g_47, i32** undef, align 8
+  store volatile i32* @g_47, i32** undef, align 8
   store i32 undef, i32* @g_47, align 4
   br label %1
 }
diff --git a/test/Transforms/LICM/dg.exp b/test/Transforms/LICM/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LICM/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll
new file mode 100644
index 0000000..4e100d3
--- /dev/null
+++ b/test/Transforms/LICM/hoist-invariant-load.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -licm -stats -S |& grep "1 licm"
+
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__DATA, __objc_imageinfo, regular, no_dead_strip"
+@llvm.used = appending global [3 x i8*] [i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata"
+
+define void @test(i8* %x) uwtable ssp {
+entry:
+  %x.addr = alloca i8*, align 8
+  %i = alloca i32, align 4
+  store i8* %x, i8** %x.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp ult i32 %0, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i8** %x.addr, align 8
+  %2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !invariant.load !0
+  %call = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %1, i8* %2)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %3 = load i32* %i, align 4
+  %inc = add i32 %3, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+
+!0 = metadata !{}
diff --git a/test/Transforms/LICM/lit.local.cfg b/test/Transforms/LICM/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LICM/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index 9aefc4f8..05a64d6 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -59,7 +59,7 @@ define void @test3(i32 %i) {
 	br label %Loop
 Loop:
         ; Should not promote this to a register
-	%x = volatile load i32* @X
+	%x = load volatile i32* @X
 	%x2 = add i32 %x, 1	
 	store i32 %x2, i32* @X
 	br i1 true, label %Out, label %Loop
@@ -133,7 +133,7 @@ Loop:		; preds = %Loop, %0
 	%x2 = add i32 %x, 1		; <i32> [#uses=1]
 	store i32 %x2, i32* @X
         
-        volatile store i32* @X, i32** %P2
+        store volatile i32* @X, i32** %P2
         
 	%Next = add i32 %j, 1		; <i32> [#uses=2]
 	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
diff --git a/test/Transforms/LICM/speculate.ll b/test/Transforms/LICM/speculate.ll
new file mode 100644
index 0000000..507b193
--- /dev/null
+++ b/test/Transforms/LICM/speculate.ll
@@ -0,0 +1,167 @@
+; RUN: opt -S -licm < %s | FileCheck %s
+
+; UDiv is safe to speculate if the denominator is known non-zero.
+
+; CHECK: @safe_udiv
+; CHECK:      %div = udiv i64 %x, %or
+; CHECK-NEXT: br label %for.body
+
+define void @safe_udiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %or = or i64 %m, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = udiv i64 %x, %or
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; UDiv is unsafe to speculate if the denominator is not known non-zero.
+
+; CHECK: @unsafe_udiv
+; CHECK-NOT:  udiv
+; CHECK: for.body:
+
+define void @unsafe_udiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = udiv i64 %x, %m
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; SDiv is safe to speculate if the denominator is known non-zero and
+; known to have at least one zero bit.
+
+; CHECK: @safe_sdiv
+; CHECK:      %div = sdiv i64 %x, %or
+; CHECK-NEXT: br label %for.body
+
+define void @safe_sdiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %and = and i64 %m, -3
+  %or = or i64 %and, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = sdiv i64 %x, %or
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; SDiv is unsafe to speculate if the denominator is not known non-zero.
+
+; CHECK: @unsafe_sdiv_a
+; CHECK-NOT:  sdiv
+; CHECK: for.body:
+
+define void @unsafe_sdiv_a(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %or = or i64 %m, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = sdiv i64 %x, %or
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; SDiv is unsafe to speculate if the denominator is not known to have a zero bit.
+
+; CHECK: @unsafe_sdiv_b
+; CHECK-NOT:  sdiv
+; CHECK: for.body:
+
+define void @unsafe_sdiv_b(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %and = and i64 %m, -3
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = sdiv i64 %x, %and
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopDeletion/dg.exp b/test/Transforms/LoopDeletion/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopDeletion/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopDeletion/lit.local.cfg b/test/Transforms/LoopDeletion/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopDeletion/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopIdiom/dg.exp b/test/Transforms/LoopIdiom/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopIdiom/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopIdiom/lit.local.cfg b/test/Transforms/LoopIdiom/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopIdiom/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopRotate/alloca.ll b/test/Transforms/LoopRotate/alloca.ll
new file mode 100644
index 0000000..fd217ea
--- /dev/null
+++ b/test/Transforms/LoopRotate/alloca.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -loop-rotate -S | FileCheck %s
+
+; Test alloca in -loop-rotate.
+
+; We expect a different value for %ptr each iteration (according to the
+; definition of alloca). I.e. each @use must be paired with an alloca.
+
+; CHECK: call void @use(i8* %
+; CHECK: %ptr = alloca i8
+
+@e = global i16 10
+
+declare void @use(i8*)
+
+define void @test() {
+entry:
+  %end = load i16* @e
+  br label %loop
+
+loop:
+  %n.phi = phi i16 [ %n, %loop.fin ], [ 0, %entry ]
+  %ptr = alloca i8
+  %cond = icmp eq i16 %n.phi, %end
+  br i1 %cond, label %exit, label %loop.fin
+
+loop.fin:
+  %n = add i16 %n.phi, 1
+  call void @use(i8* %ptr)
+  br label %loop
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 9287178..b32ee82 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -1,11 +1,13 @@
 ; RUN: opt -S -loop-rotate  %s  | FileCheck %s
 
-; CHECK: entry
-; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x}
-
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp {
+; CHECK: define i32 @tak
+; CHECK: entry
+; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x}
+
 entry:
   br label %tailrecurse
 
@@ -35,7 +37,45 @@ return:                                           ; preds = %if.end
   ret i32 %z.tr, !dbg !17
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+@channelColumns = external global i64
+@horzPlane = external global i8*, align 8
+
+define void @FindFreeHorzSeg(i64 %startCol, i64 %row, i64* %rowStart) {
+; Ensure that the loop increment basic block is rotated into the tail of the
+; body, even though it contains a debug intrinsic call.
+; CHECK: define void @FindFreeHorzSeg
+; CHECK: %dec = add
+; CHECK-NEXT: tail call void @llvm.dbg.value
+; CHECK-NEXT: br i1 %tobool, label %for.cond, label %for.end
+
+entry:
+  br label %for.cond
+
+for.cond:
+  %i.0 = phi i64 [ %startCol, %entry ], [ %dec, %for.inc ]
+  %cmp = icmp eq i64 %i.0, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.body:
+  %0 = load i64* @channelColumns, align 8
+  %mul = mul i64 %0, %row
+  %add = add i64 %mul, %i.0
+  %1 = load i8** @horzPlane, align 8
+  %arrayidx = getelementptr inbounds i8* %1, i64 %add
+  %2 = load i8* %arrayidx, align 1
+  %tobool = icmp eq i8 %2, 0
+  br i1 %tobool, label %for.inc, label %for.end
+
+for.inc:
+  %dec = add i64 %i.0, -1
+  tail call void @llvm.dbg.value(metadata !{i64 %dec}, i64 0, metadata undef)
+  br label %for.cond
+
+for.end:
+  %add1 = add i64 %i.0, 1
+  store i64 %add1, i64* %rowStart, align 8
+  ret void
+}
 
 !llvm.dbg.sp = !{!0}
 
diff --git a/test/Transforms/LoopRotate/dg.exp b/test/Transforms/LoopRotate/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopRotate/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopRotate/lit.local.cfg b/test/Transforms/LoopRotate/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopRotate/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopRotate/simplifylatch.ll b/test/Transforms/LoopRotate/simplifylatch.ll
new file mode 100644
index 0000000..f422724
--- /dev/null
+++ b/test/Transforms/LoopRotate/simplifylatch.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S < %s -loop-rotate -verify-dom-info -verify-loop-info | FileCheck %s
+; PR2624 unroll multiple exits
+
+@mode_table = global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+
+; CHECK: @f
+; CHECK-NOT: bb4
+define i8 @f() {
+entry:
+	tail call i32 @fegetround( )		; <i32>:0 [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb4, %entry
+	%mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb4 ]		; <i8> [#uses=4]
+	zext i8 %mode.0 to i32		; <i32>:1 [#uses=1]
+	getelementptr [4 x i32]* @mode_table, i32 0, i32 %1		; <i32*>:2 [#uses=1]
+	load i32* %2, align 4		; <i32>:3 [#uses=1]
+	icmp eq i32 %3, %0		; <i1>:4 [#uses=1]
+	br i1 %4, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	ret i8 %mode.0
+
+bb2:		; preds = %bb
+	icmp eq i8 %mode.0, 1		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb5, label %bb4
+
+bb4:		; preds = %bb2
+	%indvar.next = add i8 %mode.0, 1		; <i8> [#uses=1]
+	br label %bb
+
+bb5:		; preds = %bb2
+	tail call void @raise_exception( ) noreturn
+	unreachable
+}
+
+declare i32 @fegetround()
+
+declare void @raise_exception() noreturn
diff --git a/test/Transforms/LoopSimplify/2011-12-14-LandingpadHeader.ll b/test/Transforms/LoopSimplify/2011-12-14-LandingpadHeader.ll
new file mode 100644
index 0000000..173a582
--- /dev/null
+++ b/test/Transforms/LoopSimplify/2011-12-14-LandingpadHeader.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -loop-simplify -S | FileCheck %s
+; PR11575
+
+@catchtypeinfo = external unnamed_addr constant { i8*, i8*, i8* }
+
+define void @main() uwtable ssp {
+entry:
+  invoke void @f1()
+          to label %try.cont19 unwind label %catch
+
+; CHECK: catch.preheader:
+; CHECK-NEXT: landingpad
+; CHECK: br label %catch
+
+; CHECK: catch.split-lp:
+; CHECK-NEXT: landingpad
+; CHECK: br label %catch
+
+catch:                                            ; preds = %if.else, %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8*, i8* }* @catchtypeinfo to i8*)
+  invoke void @f3()
+          to label %if.else unwind label %eh.resume
+
+if.else:                                          ; preds = %catch
+  invoke void @f2()
+          to label %try.cont19 unwind label %catch
+
+try.cont19:                                       ; preds = %if.else, %entry
+  ret void
+
+eh.resume:                                        ; preds = %catch
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+          catch i8* bitcast ({ i8*, i8*, i8* }* @catchtypeinfo to i8*)
+  resume { i8*, i32 } undef
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @f1()
+
+declare void @f2()
+
+declare void @f3()
diff --git a/test/Transforms/LoopSimplify/2012-03-20-indirectbr.ll b/test/Transforms/LoopSimplify/2012-03-20-indirectbr.ll
new file mode 100644
index 0000000..9c805da
--- /dev/null
+++ b/test/Transforms/LoopSimplify/2012-03-20-indirectbr.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -loop-simplify -S | FileCheck %s
+
+; Make sure the preheader exists.
+; CHECK: sw.bb103:
+; CHECK: indirectbr {{.*}}label %while.cond112
+; CHECK: while.cond112:
+; But the tail is not split.
+; CHECK: for.body:
+; CHECK: indirectbr {{.*}}label %while.cond112
+define fastcc void @build_regex_nfa() nounwind uwtable ssp {
+entry:
+  indirectbr i8* blockaddress(@build_regex_nfa, %while.cond), [label %while.cond]
+
+while.cond:                                       ; preds = %if.then439, %entry
+  indirectbr i8* blockaddress(@build_regex_nfa, %sw.bb103), [label %do.body785, label %sw.bb103]
+
+sw.bb103:                                         ; preds = %while.body
+  indirectbr i8* blockaddress(@build_regex_nfa, %while.cond112), [label %while.cond112]
+
+while.cond112:                                    ; preds = %for.body, %for.cond.preheader, %sw.bb103
+  %pc.0 = phi i8 [ -1, %sw.bb103 ], [ 0, %for.body ], [ %pc.0, %for.cond.preheader ]
+  indirectbr i8* blockaddress(@build_regex_nfa, %Lsetdone), [label %sw.bb118, label %Lsetdone]
+
+sw.bb118:                                         ; preds = %while.cond112
+  indirectbr i8* blockaddress(@build_regex_nfa, %for.cond.preheader), [label %Lerror.loopexit, label %for.cond.preheader]
+
+for.cond.preheader:                               ; preds = %sw.bb118
+  indirectbr i8* blockaddress(@build_regex_nfa, %for.body), [label %while.cond112, label %for.body]
+
+for.body:                                         ; preds = %for.body, %for.cond.preheader
+  indirectbr i8* blockaddress(@build_regex_nfa, %for.body), [label %while.cond112, label %for.body]
+
+Lsetdone:                                         ; preds = %while.cond112
+  unreachable
+
+do.body785:                                       ; preds = %while.cond, %while.body
+  ret void
+
+Lerror.loopexit:                                  ; preds = %sw.bb118
+  unreachable
+}
diff --git a/test/Transforms/LoopSimplify/dg.exp b/test/Transforms/LoopSimplify/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopSimplify/lit.local.cfg b/test/Transforms/LoopSimplify/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll b/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
index 90477d1..ce56bd3 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
@@ -10,7 +10,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%l_2.0.reg2mem.0 = phi i16 [ 0, %entry ], [ %t1, %bb ]		; <i16> [#uses=2]
 	%t0 = shl i16 %l_2.0.reg2mem.0, 1		; <i16>:0 [#uses=1]
-	volatile store i16 %t0, i16* @g_3, align 2
+	store volatile i16 %t0, i16* @g_3, align 2
 	%t1 = add i16 %l_2.0.reg2mem.0, -3		; <i16>:1 [#uses=2]
 	%t2 = icmp slt i16 %t1, 1		; <i1>:2 [#uses=1]
 	br i1 %t2, label %bb, label %return
@@ -22,7 +22,7 @@ return:		; preds = %bb
 define i32 @main() nounwind {
 entry:
 	tail call void @func_1( ) nounwind
-	volatile load i16* @g_3, align 2		; <i16>:0 [#uses=1]
+	load volatile i16* @g_3, align 2		; <i16>:0 [#uses=1]
 	zext i16 %0 to i32		; <i32>:1 [#uses=1]
 	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %1 ) nounwind		; <i32>:2 [#uses=0]
 	ret i32 0
diff --git a/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
new file mode 100644
index 0000000..392a8bc
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
@@ -0,0 +1,39 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; PR11571: handle a postinc user outside of for.body7 that requires
+; recursive expansion of a quadratic recurrence within for.body7. LSR
+; needs to forget that for.body7 is a postinc loop during expansion.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-freebsd10.0"
+
+@b = external global [121 x i32]
+
+; CHECK: @vb
+;   Outer recurrence:
+; CHECK: %lsr.iv1 = phi [121 x i32]*
+;   Inner recurrence:
+; CHECK: %lsr.iv = phi i32
+;   Outer step (relative to inner recurrence):
+; CHECK: %scevgep = getelementptr i1* %{{.*}}, i32 %lsr.iv
+;   Outer use:
+; CHECK: %lsr.iv3 = phi [121 x i32]* [ %lsr.iv1, %for.body43.preheader ]
+define void @vb() nounwind {
+for.cond.preheader:
+  br label %for.body7
+
+for.body7:
+  %indvars.iv77 = phi i32 [ %indvars.iv.next78, %for.body7 ], [ 1, %for.cond.preheader ]
+  %bf.072 = phi i32 [ %t1, %for.body7 ], [ 0, %for.cond.preheader ]
+  %t1 = add i32 %bf.072, %indvars.iv77
+  %indvars.iv.next78 = add i32 %indvars.iv77, 1
+  br i1 undef, label %for.body43, label %for.body7
+
+for.body43:
+  %bf.459 = phi i32 [ %inc44, %for.body43 ], [ %t1, %for.body7 ]
+  %inc44 = add nsw i32 %bf.459, 1
+  %arrayidx45 = getelementptr inbounds [121 x i32]* @b, i32 0, i32 %bf.459
+  %t2 = load i32* %arrayidx45, align 4
+  br label %for.body43
+}
+
diff --git a/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
new file mode 100644
index 0000000..d7f5723
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
@@ -0,0 +1,88 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://10619599> "SelectionDAGBuilder shouldn't visit PHI nodes!" assert.
+; <rdar://10655343> SCEVExpander segfault on simple test case
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-f128:128:128-n8:16:32"
+target triple = "i386-apple-darwin"
+
+; LSR should convert the inner loop (bb7.us) IV (j.01.us) into float*.
+; This involves a nested AddRec, the outer AddRec's loop invariant components
+; cannot find a preheader, so they should be expanded in the loop header
+; (bb7.lr.ph.us) below the existing phi i.12.us.
+; Currently, LSR won't kick in on such loops.
+; CHECK: @nopreheader
+; CHECK: bb7.us:
+; CHECK-NOT: phi float*
+; CHECK: %j.01.us = phi i32
+; CHECK-NOT: phi float*
+define void @nopreheader(float* nocapture %a, i32 %n) nounwind {
+entry:
+  %0 = sdiv i32 %n, undef
+  indirectbr i8* undef, [label %bb10.preheader]
+
+bb10.preheader:                                   ; preds = %bb4
+  indirectbr i8* undef, [label %bb8.preheader.lr.ph, label %return]
+
+bb8.preheader.lr.ph:                              ; preds = %bb10.preheader
+  indirectbr i8* null, [label %bb7.lr.ph.us, label %bb9]
+
+bb7.lr.ph.us:                                     ; preds = %bb9.us, %bb8.preheader.lr.ph
+  %i.12.us = phi i32 [ %2, %bb9.us ], [ 0, %bb8.preheader.lr.ph ]
+  %tmp30 = mul i32 %0, %i.12.us
+  indirectbr i8* undef, [label %bb7.us]
+
+bb7.us:                                           ; preds = %bb7.lr.ph.us, %bb7.us
+  %j.01.us = phi i32 [ 0, %bb7.lr.ph.us ], [ %1, %bb7.us ]
+  %tmp31 = add i32 %tmp30, %j.01.us
+  %scevgep9 = getelementptr float* %a, i32 %tmp31
+  store float undef, float* %scevgep9, align 1
+  %1 = add nsw i32 %j.01.us, 1
+  indirectbr i8* undef, [label %bb9.us, label %bb7.us]
+
+bb9.us:                                           ; preds = %bb7.us
+  %2 = add nsw i32 %i.12.us, 1
+  indirectbr i8* undef, [label %bb7.lr.ph.us, label %return]
+
+bb9:                                              ; preds = %bb9, %bb8.preheader.lr.ph
+  indirectbr i8* undef, [label %bb9, label %return]
+
+return:                                           ; preds = %bb9, %bb9.us, %bb10.preheader
+  ret void
+}
+
+; In this case, SCEVExpander simply cannot materialize the AddRecExpr
+; that LSR picks. We must detect that %bb8.preheader does not have a
+; preheader and avoid performing LSR on %bb7.
+; CHECK: @nopreheader2
+; CHECK: bb7:
+; CHECK: %indvar = phi i32
+define fastcc void @nopreheader2([200 x i32]* nocapture %Array2) nounwind {
+entry:
+  indirectbr i8* undef, [label %bb]
+
+bb:                                               ; preds = %bb, %entry
+  indirectbr i8* undef, [label %bb3, label %bb]
+
+bb3:                                              ; preds = %bb3, %bb
+  indirectbr i8* undef, [label %bb8.preheader, label %bb3]
+
+bb8.preheader:                                    ; preds = %bb9, %bb3
+  %indvar5 = phi i32 [ %indvar.next6, %bb9 ], [ 0, %bb3 ]
+  %tmp26 = add i32 %indvar5, 13
+  indirectbr i8* null, [label %bb7]
+
+bb7:                                              ; preds = %bb8.preheader, %bb7
+  %indvar = phi i32 [ 0, %bb8.preheader ], [ %indvar.next, %bb7 ]
+  %scevgep = getelementptr [200 x i32]* %Array2, i32 %tmp26, i32 %indvar
+  store i32 undef, i32* %scevgep, align 4
+  %indvar.next = add i32 %indvar, 1
+  indirectbr i8* undef, [label %bb9, label %bb7]
+
+bb9:                                              ; preds = %bb7
+  %indvar.next6 = add i32 %indvar5, 1
+  indirectbr i8* undef, [label %return, label %bb8.preheader]
+
+return:                                           ; preds = %bb9
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
new file mode 100644
index 0000000..3036a7e
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
@@ -0,0 +1,113 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://10701050> "Cannot split an edge from an IndirectBrInst" assert.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; while.cond197 is a dominates the simplified loop while.cond238 but
+; has no with no preheader.
+;
+; CHECK: @nopreheader
+; CHECK: %while.cond238
+; CHECK: phi i64
+; CHECK-NOT: phi
+; CHECK: indirectbr
+define void @nopreheader(i8* %end) nounwind {
+entry:
+  br label %while.cond179
+
+while.cond179:                                    ; preds = %if.end434, %if.end369, %if.end277, %if.end165
+  %s.1 = phi i8* [ undef, %if.end434 ], [ %incdec.ptr356, %if.end348 ], [ undef, %entry ]
+  indirectbr i8* undef, [label %land.rhs184, label %while.end453]
+
+land.rhs184:                                      ; preds = %while.cond179
+  indirectbr i8* undef, [label %while.end453, label %while.cond197]
+
+while.cond197:                                    ; preds = %land.rhs202, %land.rhs184
+  %0 = phi i64 [ %indvar.next11, %land.rhs202 ], [ 0, %land.rhs184 ]
+  indirectbr i8* undef, [label %land.rhs202, label %while.end215]
+
+land.rhs202:                                      ; preds = %while.cond197
+  %indvar.next11 = add i64 %0, 1
+  indirectbr i8* undef, [label %while.end215, label %while.cond197]
+
+while.end215:                                     ; preds = %land.rhs202, %while.cond197
+  indirectbr i8* undef, [label %PREMATURE, label %if.end221]
+
+if.end221:                                        ; preds = %while.end215
+  indirectbr i8* undef, [label %while.cond238.preheader, label %lor.lhs.false227]
+
+lor.lhs.false227:                                 ; preds = %if.end221
+  indirectbr i8* undef, [label %while.cond238.preheader, label %if.else]
+
+while.cond238.preheader:                          ; preds = %lor.lhs.false227, %if.end221
+  %tmp16 = add i64 %0, 2
+  indirectbr i8* undef, [label %while.cond238]
+
+while.cond238:                                    ; preds = %land.rhs243, %while.cond238.preheader
+  %1 = phi i64 [ %indvar.next15, %land.rhs243 ], [ 0, %while.cond238.preheader ]
+  %tmp36 = add i64 %tmp16, %1
+  %s.3 = getelementptr i8* %s.1, i64 %tmp36
+  %cmp241 = icmp ult i8* %s.3, %end
+  indirectbr i8* undef, [label %land.rhs243, label %while.end256]
+
+land.rhs243:                                      ; preds = %while.cond238
+  %indvar.next15 = add i64 %1, 1
+  indirectbr i8* undef, [label %while.end256, label %while.cond238]
+
+while.end256:                                     ; preds = %land.rhs243, %while.cond238
+  indirectbr i8* undef, [label %PREMATURE]
+
+if.else:                                          ; preds = %lor.lhs.false227
+  indirectbr i8* undef, [label %if.then297, label %if.else386]
+
+if.then297:                                       ; preds = %if.else
+  indirectbr i8* undef, [label %PREMATURE, label %if.end307]
+
+if.end307:                                        ; preds = %if.then297
+  indirectbr i8* undef, [label %if.end314, label %FAIL]
+
+if.end314:                                        ; preds = %if.end307
+  indirectbr i8* undef, [label %if.end340]
+
+if.end340:                                        ; preds = %while.end334
+  indirectbr i8* undef, [label %PREMATURE, label %if.end348]
+
+if.end348:                                        ; preds = %if.end340
+  %incdec.ptr356 = getelementptr inbounds i8* undef, i64 2
+  indirectbr i8* undef, [label %while.cond179]
+
+if.else386:                                       ; preds = %if.else
+  indirectbr i8* undef, [label %while.end453, label %if.end434]
+
+if.end434:                                        ; preds = %if.then428, %if.end421
+  indirectbr i8* undef, [label %while.cond179]
+
+while.end453:                                     ; preds = %if.else386, %land.rhs184, %while.cond179
+  indirectbr i8* undef, [label %PREMATURE, label %if.end459]
+
+if.end459:                                        ; preds = %while.end453
+  indirectbr i8* undef, [label %if.then465, label %FAIL]
+
+if.then465:                                       ; preds = %if.end459
+  indirectbr i8* undef, [label %return, label %if.then479]
+
+if.then479:                                       ; preds = %if.then465
+  indirectbr i8* undef, [label %return]
+
+FAIL:                                             ; preds = %if.end459, %if.end307, %land.lhs.true142, %land.lhs.true131, %while.end
+  indirectbr i8* undef, [label %DECL_FAIL]
+
+PREMATURE:                                        ; preds = %while.end453, %while.end415, %if.end340, %while.end334, %if.then297, %while.end256, %while.end215
+  indirectbr i8* undef, [label %return, label %if.then495]
+
+if.then495:                                       ; preds = %PREMATURE
+  indirectbr i8* undef, [label %return]
+
+DECL_FAIL:                                        ; preds = %if.then488, %FAIL, %land.lhs.true99, %lor.lhs.false, %if.end83, %if.then39, %if.end
+  indirectbr i8* undef, [label %return]
+
+return:                                           ; preds = %if.then512, %if.end504, %DECL_FAIL, %if.then495, %PREMATURE, %if.then479, %if.then465, %if.then69, %if.end52, %if.end19, %if.then
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
new file mode 100644
index 0000000..0172492
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
@@ -0,0 +1,155 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://problem/11049788> Segmentation fault: 11 in LoopStrengthReduce
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; IVUsers should not consider tmp128 a valid user because it is not in a
+; simplified loop nest.
+; CHECK: @nopreheader
+; CHECK: for.cond:
+; CHECK: %tmp128 = add i64 %0, %indvar65
+define void @nopreheader(i8* %cmd) nounwind ssp {
+entry:
+  indirectbr i8* undef, [label %while.cond]
+
+while.cond:                                       ; preds = %while.body, %entry
+  %0 = phi i64 [ %indvar.next48, %while.body ], [ 0, %entry ]
+  indirectbr i8* undef, [label %while.end, label %while.body]
+
+while.body:                                       ; preds = %lor.rhs, %lor.lhs.false17, %lor.lhs.false11, %lor.lhs.false, %land.rhs
+  %indvar.next48 = add i64 %0, 1
+  indirectbr i8* undef, [label %while.cond]
+
+while.end:                                        ; preds = %lor.rhs, %while.cond
+  indirectbr i8* undef, [label %if.end152]
+
+if.end152:                                        ; preds = %lor.lhs.false144, %if.end110
+  indirectbr i8* undef, [label %lor.lhs.false184, label %for.cond]
+
+lor.lhs.false184:                                 ; preds = %lor.lhs.false177
+  indirectbr i8* undef, [label %return, label %for.cond]
+
+for.cond:                                         ; preds = %for.inc, %lor.lhs.false184, %if.end152
+  %indvar65 = phi i64 [ %indvar.next66, %for.inc ], [ 0, %lor.lhs.false184 ], [ 0, %if.end152 ]
+  %tmp128 = add i64 %0, %indvar65
+  %s.4 = getelementptr i8* %cmd, i64 %tmp128
+  %tmp195 = load i8* %s.4, align 1
+  indirectbr i8* undef, [label %return, label %land.rhs198]
+
+land.rhs198:                                      ; preds = %for.cond
+  indirectbr i8* undef, [label %return, label %for.inc]
+
+for.inc:                                          ; preds = %lor.rhs234, %land.lhs.true228, %land.lhs.true216, %land.lhs.true204
+  %indvar.next66 = add i64 %indvar65, 1
+  indirectbr i8* undef, [label %for.cond]
+
+return:                                           ; preds = %if.end677, %doshell, %if.then96
+  ret void
+}
+
+; Another case with a dominating loop that does not contain the IV
+; User. Just make sure it doesn't assert.
+define void @nopreheader2() nounwind ssp {
+entry:
+  indirectbr i8* undef, [label %while.cond, label %return]
+
+while.cond:                                       ; preds = %while.cond.backedge, %entry
+  indirectbr i8* undef, [label %while.cond.backedge, label %lor.rhs]
+
+lor.rhs:                                          ; preds = %while.cond
+  indirectbr i8* undef, [label %while.cond.backedge, label %while.end]
+
+while.cond.backedge:                              ; preds = %lor.rhs, %while.cond
+  indirectbr i8* undef, [label %while.cond]
+
+while.end:                                        ; preds = %lor.rhs
+  indirectbr i8* undef, [label %if.then18, label %return]
+
+if.then18:                                        ; preds = %while.end
+  indirectbr i8* undef, [label %if.end35, label %lor.lhs.false]
+
+lor.lhs.false:                                    ; preds = %if.then18
+  indirectbr i8* undef, [label %if.end35, label %return]
+
+if.end35:                                         ; preds = %lor.lhs.false, %if.then18
+  indirectbr i8* undef, [label %while.cond36]
+
+while.cond36:                                     ; preds = %while.body49, %if.end35
+  %0 = phi i64 [ %indvar.next13, %while.body49 ], [ 0, %if.end35 ]
+  indirectbr i8* undef, [label %while.body49, label %lor.rhs42]
+
+lor.rhs42:                                        ; preds = %while.cond36
+  indirectbr i8* undef, [label %while.body49, label %while.end52]
+
+while.body49:                                     ; preds = %lor.rhs42, %while.cond36
+  %indvar.next13 = add i64 %0, 1
+  indirectbr i8* undef, [label %while.cond36]
+
+while.end52:                                      ; preds = %lor.rhs42
+  indirectbr i8* undef, [label %land.lhs.true, label %return]
+
+land.lhs.true:                                    ; preds = %while.end52
+  indirectbr i8* undef, [label %while.cond66.preheader, label %return]
+
+while.cond66.preheader:                           ; preds = %land.lhs.true
+  indirectbr i8* undef, [label %while.cond66]
+
+while.cond66:                                     ; preds = %while.body77, %while.cond66.preheader
+  indirectbr i8* undef, [label %land.rhs, label %while.cond81.preheader]
+
+land.rhs:                                         ; preds = %while.cond66
+  indirectbr i8* undef, [label %while.body77, label %while.cond81.preheader]
+
+while.cond81.preheader:                           ; preds = %land.rhs, %while.cond66
+  %tmp45 = add i64 undef, %0
+  %tmp46 = add i64 %tmp45, undef
+  indirectbr i8* undef, [label %while.cond81]
+
+while.body77:                                     ; preds = %land.rhs
+  indirectbr i8* undef, [label %while.cond66]
+
+while.cond81:                                     ; preds = %while.body94, %while.cond81.preheader
+  %tmp25 = add i64 %tmp46, undef
+  indirectbr i8* undef, [label %while.body94, label %lor.rhs87]
+
+lor.rhs87:                                        ; preds = %while.cond81
+  indirectbr i8* undef, [label %while.body94, label %return]
+
+while.body94:                                     ; preds = %lor.rhs87, %while.cond81
+  indirectbr i8* undef, [label %while.cond81]
+
+return:                                           ; preds = %if.end216, %land.lhs.true183, %land.lhs.true, %while.end52, %lor.lhs.false, %while.end, %entry
+  ret void
+}
+
+; Test a phi operand IV User dominated by a no-preheader loop.
+define void @nopreheader3() nounwind uwtable ssp align 2 {
+entry:
+  indirectbr i8* blockaddress(@nopreheader3, %if.end10), [label %if.end22, label %if.end10]
+
+if.end10:                                         ; preds = %entry
+  indirectbr i8* blockaddress(@nopreheader3, %if.end6.i), [label %if.end22, label %if.end6.i]
+
+if.end6.i:                                        ; preds = %if.end10
+  indirectbr i8* blockaddress(@nopreheader3, %while.cond2.preheader.i.i), [label %if.then12, label %while.cond2.preheader.i.i]
+
+while.cond2.preheader.i.i:                        ; preds = %while.end.i18.i, %if.end6.i
+  indirectbr i8* blockaddress(@nopreheader3, %while.cond2.i.i), [label %while.cond2.i.i]
+
+while.cond2.i.i:                                  ; preds = %while.cond2.i.i, %while.cond2.preheader.i.i
+  %i1.1.i14.i = phi i32 [ %add.i15.i, %while.cond2.i.i ], [ undef, %while.cond2.preheader.i.i ]
+  %add.i15.i = add nsw i32 %i1.1.i14.i, undef
+  indirectbr i8* blockaddress(@nopreheader3, %while.end.i18.i), [label %while.cond2.i.i, label %while.end.i18.i]
+
+while.end.i18.i:                                  ; preds = %while.cond2.i.i
+  indirectbr i8* blockaddress(@nopreheader3, %while.cond2.preheader.i.i), [label %if.then12, label %while.cond2.preheader.i.i]
+
+if.then12:                                        ; preds = %while.end.i18.i, %if.end6.i
+  %i1.0.lcssa.i.i = phi i32 [ undef, %if.end6.i ], [ %i1.1.i14.i, %while.end.i18.i ]
+  indirectbr i8* blockaddress(@nopreheader3, %if.end22), [label %if.end22]
+
+if.end22:                                         ; preds = %if.then12, %if.end10, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll b/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll
new file mode 100644
index 0000000..c9b11a9
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -loop-reduce -S
+; PR11950: isHighCostExpansion crashes on ConstExpr
+;
+; The crash happened during IVChain analysis (CollectChains). We don't
+; really care how LSR decides to transform this loop, so we don't
+; check it. As long as the analysis doesn't crash we're ok.
+target datalayout = "e-p:64:64:64-n32:64"
+
+%struct.this_structure_s.0.5 = type { [6144 x [8 x i32]], [6144 x [8 x i32]], [6147 x [4 x i32]], [8 x i32], [2 x i8*], [2 x i8*], [6144 x i8], [6144 x i32], [6144 x i32], [4 x [4 x i8]] }
+
+define internal fastcc void @someFunction(%struct.this_structure_s.0.5* nocapture %scratch, i32 %stage, i32 %cbSize) nounwind {
+entry:
+  %0 = getelementptr inbounds %struct.this_structure_s.0.5* %scratch, i32 0, i32 4, i32 %stage
+  %1 = load i8** %0, align 4
+  %2 = getelementptr inbounds %struct.this_structure_s.0.5* %scratch, i32 0, i32 5, i32 %stage
+  %3 = load i8** %2, align 4
+  %4 = getelementptr inbounds %struct.this_structure_s.0.5* %scratch, i32 0, i32 2, i32 0, i32 0
+  %tmp11 = shl i32 %stage, 1
+  %tmp1325 = or i32 %tmp11, 1
+  br label %__label_D_1608
+
+__label_D_1608:                                   ; preds = %__label_D_1608, %entry
+  %i.12 = phi i32 [ 0, %entry ], [ %10, %__label_D_1608 ]
+  %tmp = shl i32 %i.12, 2
+  %lvar_g.13 = getelementptr i32* %4, i32 %tmp
+  %tmp626 = or i32 %tmp, 1
+  %scevgep = getelementptr i32* %4, i32 %tmp626
+  %tmp727 = or i32 %tmp, 2
+  %scevgep8 = getelementptr i32* %4, i32 %tmp727
+  %tmp928 = or i32 %tmp, 3
+  %scevgep10 = getelementptr i32* %4, i32 %tmp928
+  %scevgep12 = getelementptr %struct.this_structure_s.0.5* %scratch, i32 0, i32 9, i32 %tmp11, i32 %i.12
+  %scevgep14 = getelementptr %struct.this_structure_s.0.5* %scratch, i32 0, i32 9, i32 %tmp1325, i32 %i.12
+  %5 = load i8* %scevgep12, align 1
+  %6 = sext i8 %5 to i32
+  %7 = load i8* %scevgep14, align 1
+  %8 = sext i8 %7 to i32
+  store i32 0, i32* %lvar_g.13, align 4
+  store i32 %8, i32* %scevgep, align 4
+  store i32 %6, i32* %scevgep8, align 4
+  %9 = add nsw i32 %8, %6
+  store i32 %9, i32* %scevgep10, align 4
+  %10 = add nsw i32 %i.12, 1
+  %exitcond = icmp eq i32 %10, 3
+  br i1 %exitcond, label %return, label %__label_D_1608
+
+return:                                           ; preds = %__label_D_1608
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
new file mode 100644
index 0000000..9189d79
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -0,0 +1,292 @@
+; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+
+; @simple is the most basic chain of address induction variables. Chaining
+; saves at least one register and avoids complex addressing and setup
+; code.
+;
+; A9: @simple
+; no expensive address computation in the preheader
+; A9: lsl
+; A9-NOT: lsl
+; A9: %loop
+; no complex address modes
+; A9-NOT: lsl
+define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @user is not currently chained because the IV is live across memory ops.
+;
+; A9: @user
+; stride multiples computed in the preheader
+; A9: lsl
+; A9: lsl
+; A9: %loop
+; complex address modes
+; A9: lsl
+; A9: lsl
+define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  store i32 %s4, i32* %iv
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @extrastride is a slightly more interesting case of a single
+; complete chain with multiple strides. The test case IR is what LSR
+; used to do, and exactly what we don't want to do. LSR's new IV
+; chaining feature should now undo the damage.
+;
+; A9: extrastride:
+; no spills
+; A9-NOT: str
+; only one stride multiple in the preheader
+; A9: lsl
+; A9-NOT: {{str r|lsl}}
+; A9: %for.body{{$}}
+; no complex address modes or reloads
+; A9-NOT: {{ldr .*[sp]|lsl}}
+define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %cmp8 = icmp eq i32 %z, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %add.ptr.sum = shl i32 %main_stride, 1 ; s*2
+  %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3
+  %add.ptr2.sum = add i32 %x, %main_stride ; s + x
+  %add.ptr4.sum = shl i32 %main_stride, 2 ; s*4
+  %add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
+  %0 = bitcast i8* %main.addr.011 to i32*
+  %1 = load i32* %0, align 4
+  %add.ptr = getelementptr inbounds i8* %main.addr.011, i32 %main_stride
+  %2 = bitcast i8* %add.ptr to i32*
+  %3 = load i32* %2, align 4
+  %add.ptr1 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr.sum
+  %4 = bitcast i8* %add.ptr1 to i32*
+  %5 = load i32* %4, align 4
+  %add.ptr2 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr1.sum
+  %6 = bitcast i8* %add.ptr2 to i32*
+  %7 = load i32* %6, align 4
+  %add.ptr3 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr4.sum
+  %8 = bitcast i8* %add.ptr3 to i32*
+  %9 = load i32* %8, align 4
+  %add = add i32 %3, %1
+  %add4 = add i32 %add, %5
+  %add5 = add i32 %add4, %7
+  %add6 = add i32 %add5, %9
+  store i32 %add6, i32* %res.addr.09, align 4
+  %add.ptr6 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr3.sum
+  %add.ptr7 = getelementptr inbounds i32* %res.addr.09, i32 %y
+  %inc = add i32 %i.010, 1
+  %cmp = icmp eq i32 %inc, %z
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; @foldedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' can be folded into the addressing mode.
+; Consequently, we should *not* form any chains.
+;
+; A9: foldedidx:
+; A9: ldrb.w {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
+define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.07
+  %0 = load i8* %arrayidx, align 1
+  %conv5 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.07
+  %1 = load i8* %arrayidx1, align 1
+  %conv26 = zext i8 %1 to i32
+  %add = add nsw i32 %conv26, %conv5
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.07
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %inc1 = or i32 %i.07, 1
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %inc1
+  %2 = load i8* %arrayidx.1, align 1
+  %conv5.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %inc1
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv26.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv26.1, %conv5.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %inc1
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %inc.12 = or i32 %i.07, 2
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %inc.12
+  %4 = load i8* %arrayidx.2, align 1
+  %conv5.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %inc.12
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv26.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv26.2, %conv5.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %inc.12
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %inc.23 = or i32 %i.07, 3
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %inc.23
+  %6 = load i8* %arrayidx.3, align 1
+  %conv5.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %inc.23
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv26.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv26.3, %conv5.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %inc.23
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %inc.3 = add nsw i32 %i.07, 4
+  %exitcond.3 = icmp eq i32 %inc.3, 400
+  br i1 %exitcond.3, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; @testNeon is an important example of the nead for ivchains.
+;
+; Currently we have three extra add.w's that keep the store address
+; live past the next increment because ISEL is unfortunately undoing
+; the store chain. ISEL also fails to convert the stores to
+; post-increment addressing. However, the loads should use
+; post-increment addressing, no add's or add.w's beyond the three
+; mentioned. Most importantly, there should be no spills or reloads!
+;
+; CHECK: testNeon:
+; CHECK: %.lr.ph
+; CHECK-NOT: lsl.w
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK-NOT: add.w r
+; CHECK: bne
+define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
+  %1 = icmp sgt i32 %limit, 0
+  br i1 %1, label %.lr.ph, label %45
+
+.lr.ph:                                           ; preds = %0
+  %2 = shl nsw i32 %ref_stride, 1
+  %3 = mul nsw i32 %ref_stride, 3
+  %4 = shl nsw i32 %ref_stride, 2
+  %5 = mul nsw i32 %ref_stride, 5
+  %6 = mul nsw i32 %ref_stride, 6
+  %7 = mul nsw i32 %ref_stride, 7
+  %8 = shl nsw i32 %ref_stride, 3
+  %9 = sub i32 0, %8
+  %10 = mul i32 %limit, -64
+  br label %11
+
+; <label>:11                                      ; preds = %11, %.lr.ph
+  %.05 = phi i8* [ %ref_data, %.lr.ph ], [ %42, %11 ]
+  %counter.04 = phi i32 [ 0, %.lr.ph ], [ %44, %11 ]
+  %result.03 = phi <16 x i8> [ zeroinitializer, %.lr.ph ], [ %41, %11 ]
+  %.012 = phi <16 x i8>* [ %data, %.lr.ph ], [ %43, %11 ]
+  %12 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %.05, i32 1) nounwind
+  %13 = getelementptr inbounds i8* %.05, i32 %ref_stride
+  %14 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %13, i32 1) nounwind
+  %15 = shufflevector <1 x i64> %12, <1 x i64> %14, <2 x i32> <i32 0, i32 1>
+  %16 = bitcast <2 x i64> %15 to <16 x i8>
+  %17 = getelementptr inbounds <16 x i8>* %.012, i32 1
+  store <16 x i8> %16, <16 x i8>* %.012, align 4
+  %18 = getelementptr inbounds i8* %.05, i32 %2
+  %19 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %18, i32 1) nounwind
+  %20 = getelementptr inbounds i8* %.05, i32 %3
+  %21 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %20, i32 1) nounwind
+  %22 = shufflevector <1 x i64> %19, <1 x i64> %21, <2 x i32> <i32 0, i32 1>
+  %23 = bitcast <2 x i64> %22 to <16 x i8>
+  %24 = getelementptr inbounds <16 x i8>* %.012, i32 2
+  store <16 x i8> %23, <16 x i8>* %17, align 4
+  %25 = getelementptr inbounds i8* %.05, i32 %4
+  %26 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %25, i32 1) nounwind
+  %27 = getelementptr inbounds i8* %.05, i32 %5
+  %28 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %27, i32 1) nounwind
+  %29 = shufflevector <1 x i64> %26, <1 x i64> %28, <2 x i32> <i32 0, i32 1>
+  %30 = bitcast <2 x i64> %29 to <16 x i8>
+  %31 = getelementptr inbounds <16 x i8>* %.012, i32 3
+  store <16 x i8> %30, <16 x i8>* %24, align 4
+  %32 = getelementptr inbounds i8* %.05, i32 %6
+  %33 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %32, i32 1) nounwind
+  %34 = getelementptr inbounds i8* %.05, i32 %7
+  %35 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %34, i32 1) nounwind
+  %36 = shufflevector <1 x i64> %33, <1 x i64> %35, <2 x i32> <i32 0, i32 1>
+  %37 = bitcast <2 x i64> %36 to <16 x i8>
+  store <16 x i8> %37, <16 x i8>* %31, align 4
+  %38 = add <16 x i8> %16, %23
+  %39 = add <16 x i8> %38, %30
+  %40 = add <16 x i8> %39, %37
+  %41 = add <16 x i8> %result.03, %40
+  %42 = getelementptr i8* %.05, i32 %9
+  %43 = getelementptr inbounds <16 x i8>* %.012, i32 -64
+  %44 = add nsw i32 %counter.04, 1
+  %exitcond = icmp eq i32 %44, %limit
+  br i1 %exitcond, label %._crit_edge, label %11
+
+._crit_edge:                                      ; preds = %11
+  %scevgep = getelementptr <16 x i8>* %data, i32 %10
+  br label %45
+
+; <label>:45                                      ; preds = %._crit_edge, %0
+  %result.0.lcssa = phi <16 x i8> [ %41, %._crit_edge ], [ zeroinitializer, %0 ]
+  %.01.lcssa = phi <16 x i8>* [ %scevgep, %._crit_edge ], [ %data, %0 ]
+  store <16 x i8> %result.0.lcssa, <16 x i8>* %.01.lcssa, align 4
+  ret void
+}
+
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
new file mode 100644
index 0000000..bac2ffa
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
new file mode 100644
index 0000000..cb23ad0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | FileCheck %s
+;
+; PR11431: handle a phi operand that is replaced by a postinc user.
+; LSR first expands %t3 to %t2 in %phi
+; LSR then expands %t2 in %phi into two decrements, one on each loop exit.
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i1 @check() nounwind
+
+; Check that LSR did something close to the behavior at the time of the bug.
+; CHECK: @sqlite3DropTriggerPtr
+; CHECK: incq %rax
+; CHECK: jne
+; CHECK: decq %rax
+; CHECK: ret
+define i64 @sqlite3DropTriggerPtr() nounwind {
+bb:
+  %cmp = call zeroext i1 @check()
+  br label %bb1
+
+bb1:                                              ; preds = %bb4, %bb
+  %t0 = phi i64 [ 0, %bb ], [ %t3, %bb4 ]
+  %t2 = phi i64 [ 1, %bb ], [ %t5, %bb4 ]
+  %t3 = add nsw i64 %t0, 1
+  br i1 %cmp, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb1
+  %t5 = add nsw i64 %t2, 1
+  br i1 %cmp, label %bb1, label %bb8
+
+bb8:                                              ; preds = %bb8, %bb4
+  %phi = phi i64 [ %t3, %bb1 ], [ %t2, %bb4 ]
+  ret i64 %phi
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
new file mode 100644
index 0000000..5108650
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s | FileCheck %s
+;
+; Test LSR's ability to prune formulae that refer to nonexistant
+; AddRecs in other loops.
+;
+; Unable to reduce this case further because it requires LSR to exceed
+; ComplexityLimit.
+;
+; We really just want to ensure that LSR can process this loop without
+; finding an unsatisfactory solution and bailing out. I've added
+; dummyout, an obvious candidate for postinc replacement so we can
+; verify that LSR removes it.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; CHECK: @test
+; CHECK: # %for.body{{$}}
+; dummyiv copy should be removed
+; CHECK-NOT: movq
+; CHECK: # %for.cond19.preheader
+; dummycnt should be removed
+; CHECK-NOT: incq
+; CHECK: # %for.body23{{$}}
+define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
+entry:
+  %cmp34 = icmp eq i64 %count, 0
+  br i1 %cmp34, label %for.end29, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %dummyiv = phi i64 [ %dummycnt, %for.body ], [ 0, %entry ]
+  %indvars.iv39 = phi i64 [ %indvars.iv.next40, %for.body ], [ 0, %entry ]
+  %dp.036 = phi i32* [ %add.ptr, %for.body ], [ %destrow, %entry ]
+  %p.035 = phi float* [ %incdec.ptr4, %for.body ], [ %srcrow, %entry ]
+  %incdec.ptr = getelementptr inbounds float* %p.035, i64 1
+  %0 = load float* %incdec.ptr, align 4
+  %incdec.ptr2 = getelementptr inbounds float* %p.035, i64 2
+  %1 = load float* %incdec.ptr2, align 4
+  %incdec.ptr3 = getelementptr inbounds float* %p.035, i64 3
+  %2 = load float* %incdec.ptr3, align 4
+  %incdec.ptr4 = getelementptr inbounds float* %p.035, i64 4
+  %3 = load float* %incdec.ptr4, align 4
+  %4 = load i32* %dp.036, align 4
+  %conv5 = fptoui float %0 to i32
+  %or = or i32 %4, %conv5
+  %arrayidx6 = getelementptr inbounds i32* %dp.036, i64 1
+  %5 = load i32* %arrayidx6, align 4
+  %conv7 = fptoui float %1 to i32
+  %or8 = or i32 %5, %conv7
+  %arrayidx9 = getelementptr inbounds i32* %dp.036, i64 2
+  %6 = load i32* %arrayidx9, align 4
+  %conv10 = fptoui float %2 to i32
+  %or11 = or i32 %6, %conv10
+  %arrayidx12 = getelementptr inbounds i32* %dp.036, i64 3
+  %7 = load i32* %arrayidx12, align 4
+  %conv13 = fptoui float %3 to i32
+  %or14 = or i32 %7, %conv13
+  store i32 %or, i32* %dp.036, align 4
+  store i32 %or8, i32* %arrayidx6, align 4
+  store i32 %or11, i32* %arrayidx9, align 4
+  store i32 %or14, i32* %arrayidx12, align 4
+  %add.ptr = getelementptr inbounds i32* %dp.036, i64 4
+  %indvars.iv.next40 = add i64 %indvars.iv39, 4
+  %dummycnt = add i64 %dummyiv, 1
+  %cmp = icmp ult i64 %indvars.iv.next40, %count
+  br i1 %cmp, label %for.body, label %for.cond19.preheader
+
+for.cond19.preheader:                             ; preds = %for.body
+  %dummyout = add i64 %dummyiv, 1
+  %rem = and i64 %count, 3
+  %cmp2130 = icmp eq i64 %rem, 0
+  br i1 %cmp2130, label %for.end29, label %for.body23.lr.ph
+
+for.body23.lr.ph:                                 ; preds = %for.cond19.preheader
+  %8 = and i64 %count, 3
+  br label %for.body23
+
+for.body23:                                       ; preds = %for.body23, %for.body23.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body23.lr.ph ], [ %indvars.iv.next, %for.body23 ]
+  %dp.132 = phi i32* [ %add.ptr, %for.body23.lr.ph ], [ %incdec.ptr28, %for.body23 ]
+  %p.131 = phi float* [ %incdec.ptr4, %for.body23.lr.ph ], [ %incdec.ptr24, %for.body23 ]
+  %incdec.ptr24 = getelementptr inbounds float* %p.131, i64 1
+  %9 = load float* %incdec.ptr24, align 4
+  %10 = load i32* %dp.132, align 4
+  %conv25 = fptoui float %9 to i32
+  %or26 = or i32 %10, %conv25
+  store i32 %or26, i32* %dp.132, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %incdec.ptr28 = getelementptr inbounds i32* %dp.132, i64 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %8
+  br i1 %exitcond, label %for.end29, label %for.body23
+
+for.end29:                                        ; preds = %entry, %for.body23, %for.cond19.preheader
+  %result = phi i64 [ 0, %entry ], [ %dummyout, %for.body23 ], [ %dummyout, %for.cond19.preheader ]
+  ret i64 %result
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
new file mode 100644
index 0000000..2dcaab8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s
+
+declare i1 @check() nounwind
+declare i1 @foo(i8*, i8*, i8*) nounwind
+
+; Check that redundant phi elimination ran
+; CHECK: @test
+; CHECK: %while.body.i
+; CHECK: movs
+; CHECK-NOT: movs
+; CHECK: %for.end.i
+define i32 @test(i8* %base) nounwind uwtable ssp {
+entry:
+  br label %while.body.lr.ph.i
+
+while.body.lr.ph.i:                               ; preds = %cond.true.i
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %cond.true29.i, %while.body.lr.ph.i
+  %indvars.iv7.i = phi i64 [ 16, %while.body.lr.ph.i ], [ %indvars.iv.next8.i, %cond.true29.i ]
+  %i.05.i = phi i64 [ 0, %while.body.lr.ph.i ], [ %indvars.iv7.i, %cond.true29.i ]
+  %sext.i = shl i64 %i.05.i, 32
+  %idx.ext.i = ashr exact i64 %sext.i, 32
+  %add.ptr.sum.i = add i64 %idx.ext.i, 16
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %while.body.i
+  %indvars.iv.i = phi i64 [ 0, %while.body.i ], [ %indvars.iv.next.i, %for.body.i ]
+  %add.ptr.sum = add i64 %add.ptr.sum.i, %indvars.iv.i
+  %arrayidx22.i = getelementptr inbounds i8* %base, i64 %add.ptr.sum
+  %0 = load i8* %arrayidx22.i, align 1
+  %indvars.iv.next.i = add i64 %indvars.iv.i, 1
+  %cmp = call i1 @check() nounwind
+  br i1 %cmp, label %for.end.i, label %for.body.i
+
+for.end.i:                                        ; preds = %for.body.i
+  %add.ptr.i144 = getelementptr inbounds i8* %base, i64 %add.ptr.sum.i
+  %cmp2 = tail call i1 @foo(i8* %add.ptr.i144, i8* %add.ptr.i144, i8* undef) nounwind
+  br i1 %cmp2, label %cond.true29.i, label %cond.false35.i
+
+cond.true29.i:                                    ; preds = %for.end.i
+  %indvars.iv.next8.i = add i64 %indvars.iv7.i, 16
+  br i1 false, label %exit, label %while.body.i
+
+cond.false35.i:                                   ; preds = %for.end.i
+  unreachable
+
+exit:                                 ; preds = %cond.true29.i, %cond.true.i
+  ret i32 0
+}
+
+%struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771 = type { i32, i32, i32 }
+
+@tags = external global [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], align 16
+
+; PR11782: SCEVExpander assert
+;
+; Test phi reuse after LSR that requires SCEVExpander to hoist an
+; interesting GEP.
+;
+; CHECK: @test2
+; CHECK: %entry
+; CHECK-NOT: mov
+; CHECK: jne
+define void @test2(i32 %n) nounwind uwtable {
+entry:
+  br i1 undef, label %while.end, label %for.cond468
+
+for.cond468:                                      ; preds = %if.then477, %entry
+  %indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ]
+  %k.0.in = phi i32* [ %last, %if.then477 ], [ getelementptr inbounds ([5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 0, i32 2), %entry ]
+  %k.0 = load i32* %k.0.in, align 4
+  %0 = trunc i64 %indvars.iv1163 to i32
+  %cmp469 = icmp slt i32 %0, %n
+  br i1 %cmp469, label %for.body471, label %for.inc498
+
+for.body471:                                      ; preds = %for.cond468
+  %first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 1
+  %1 = load i32* %first, align 4
+  br i1 undef, label %if.then477, label %for.inc498
+
+if.then477:                                       ; preds = %for.body471
+  %last = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 2
+  %indvars.iv.next1164 = add i64 %indvars.iv1163, 1
+  br label %for.cond468
+
+for.inc498:                                       ; preds = %for.inc498, %for.body471, %for.cond468
+  br label %for.inc498
+
+while.end:                                        ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/dg.exp b/test/Transforms/LoopStrengthReduce/X86/dg.exp
deleted file mode 100644
index 7b7bd4e..0000000
--- a/test/Transforms/LoopStrengthReduce/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
new file mode 100644
index 0000000..e42b67f
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -0,0 +1,300 @@
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
+
+; @simple is the most basic chain of address induction variables. Chaining
+; saves at least one register and avoids complex addressing and setup
+; code.
+;
+; X64: @simple
+; %x * 4
+; X64: shlq $2
+; no other address computation in the preheader
+; X64-NEXT: xorl
+; X64-NEXT: .align
+; X64: %loop
+; no complex address modes
+; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
+;
+; X32: @simple
+; no expensive address computation in the preheader
+; X32-NOT: imul
+; X32: %loop
+; no complex address modes
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @user is not currently chained because the IV is live across memory ops.
+;
+; X64: @user
+; X64: shlq $4
+; X64: lea
+; X64: lea
+; X64: %loop
+; complex address modes
+; X64: (%{{[^)]+}},%{{[^)]+}},
+;
+; X32: @user
+; expensive address computation in the preheader
+; X32: imul
+; X32: %loop
+; complex address modes
+; X32: (%{{[^)]+}},%{{[^)]+}},
+define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  store i32 %s4, i32* %iv
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @extrastride is a slightly more interesting case of a single
+; complete chain with multiple strides. The test case IR is what LSR
+; used to do, and exactly what we don't want to do. LSR's new IV
+; chaining feature should now undo the damage.
+;
+; X64: extrastride:
+; We currently don't handle this on X64 because the sexts cause
+; strange increment expressions like this:
+; IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
+;
+; X32: extrastride:
+; no spills in the preheader
+; X32-NOT: mov{{.*}}(%esp){{$}}
+; X32: %for.body{{$}}
+; no complex address modes
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+; no reloads
+; X32-NOT: (%esp)
+define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %cmp8 = icmp eq i32 %z, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %add.ptr.sum = shl i32 %main_stride, 1 ; s*2
+  %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3
+  %add.ptr2.sum = add i32 %x, %main_stride ; s + x
+  %add.ptr4.sum = shl i32 %main_stride, 2 ; s*4
+  %add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
+  %0 = bitcast i8* %main.addr.011 to i32*
+  %1 = load i32* %0, align 4
+  %add.ptr = getelementptr inbounds i8* %main.addr.011, i32 %main_stride
+  %2 = bitcast i8* %add.ptr to i32*
+  %3 = load i32* %2, align 4
+  %add.ptr1 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr.sum
+  %4 = bitcast i8* %add.ptr1 to i32*
+  %5 = load i32* %4, align 4
+  %add.ptr2 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr1.sum
+  %6 = bitcast i8* %add.ptr2 to i32*
+  %7 = load i32* %6, align 4
+  %add.ptr3 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr4.sum
+  %8 = bitcast i8* %add.ptr3 to i32*
+  %9 = load i32* %8, align 4
+  %add = add i32 %3, %1
+  %add4 = add i32 %add, %5
+  %add5 = add i32 %add4, %7
+  %add6 = add i32 %add5, %9
+  store i32 %add6, i32* %res.addr.09, align 4
+  %add.ptr6 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr3.sum
+  %add.ptr7 = getelementptr inbounds i32* %res.addr.09, i32 %y
+  %inc = add i32 %i.010, 1
+  %cmp = icmp eq i32 %inc, %z
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; @foldedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' can be folded into the addressing mode.
+; Consequently, we should *not* form any chains.
+;
+; X64: foldedidx:
+; X64: movzbl -3(
+;
+; X32: foldedidx:
+; X32: movzbl -3(
+define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.07
+  %0 = load i8* %arrayidx, align 1
+  %conv5 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.07
+  %1 = load i8* %arrayidx1, align 1
+  %conv26 = zext i8 %1 to i32
+  %add = add nsw i32 %conv26, %conv5
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.07
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %inc1 = or i32 %i.07, 1
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %inc1
+  %2 = load i8* %arrayidx.1, align 1
+  %conv5.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %inc1
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv26.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv26.1, %conv5.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %inc1
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %inc.12 = or i32 %i.07, 2
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %inc.12
+  %4 = load i8* %arrayidx.2, align 1
+  %conv5.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %inc.12
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv26.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv26.2, %conv5.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %inc.12
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %inc.23 = or i32 %i.07, 3
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %inc.23
+  %6 = load i8* %arrayidx.3, align 1
+  %conv5.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %inc.23
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv26.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv26.3, %conv5.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %inc.23
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %inc.3 = add nsw i32 %i.07, 4
+  %exitcond.3 = icmp eq i32 %inc.3, 400
+  br i1 %exitcond.3, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; @multioper tests instructions with multiple IV user operands. We
+; should be able to chain them independent of each other.
+;
+; X64: @multioper
+; X64: %for.body
+; X64: movl %{{.*}},4)
+; X64-NEXT: leal 1(
+; X64-NEXT: movl %{{.*}},4)
+; X64-NEXT: leal 2(
+; X64-NEXT: movl %{{.*}},4)
+; X64-NEXT: leal 3(
+; X64-NEXT: movl %{{.*}},4)
+;
+; X32: @multioper
+; X32: %for.body
+; X32: movl %{{.*}},4)
+; X32-NEXT: leal 1(
+; X32-NEXT: movl %{{.*}},4)
+; X32-NEXT: leal 2(
+; X32-NEXT: movl %{{.*}},4)
+; X32-NEXT: leal 3(
+; X32-NEXT: movl %{{.*}},4)
+define void @multioper(i32* %a, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %p = phi i32* [ %p.next, %for.body ], [ %a, %entry ]
+  %i = phi i32 [ %inc4, %for.body ], [ 0, %entry ]
+  store i32 %i, i32* %p, align 4
+  %inc1 = or i32 %i, 1
+  %add.ptr.i1 = getelementptr inbounds i32* %p, i32 1
+  store i32 %inc1, i32* %add.ptr.i1, align 4
+  %inc2 = add nsw i32 %i, 2
+  %add.ptr.i2 = getelementptr inbounds i32* %p, i32 2
+  store i32 %inc2, i32* %add.ptr.i2, align 4
+  %inc3 = add nsw i32 %i, 3
+  %add.ptr.i3 = getelementptr inbounds i32* %p, i32 3
+  store i32 %inc3, i32* %add.ptr.i3, align 4
+  %p.next = getelementptr inbounds i32* %p, i32 4
+  %inc4 = add nsw i32 %i, 4
+  %cmp = icmp slt i32 %inc4, %n
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+; @testCmpZero has a ICmpZero LSR use that should not be hidden from
+; LSR. Profitable chains should have more than one nonzero increment
+; anyway.
+;
+; X32: @testCmpZero
+; X32: %for.body82.us
+; X32: dec
+; X32: jne
+define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
+entry:
+  %dest0 = getelementptr inbounds i8* %src, i32 %srcidx
+  %source0 = getelementptr inbounds i8* %dst, i32 %dstidx
+  %add.ptr79.us.sum = add i32 %srcidx, %len
+  %lftr.limit = getelementptr i8* %src, i32 %add.ptr79.us.sum
+  br label %for.body82.us
+
+for.body82.us:
+  %dest = phi i8* [ %dest0, %entry ], [ %incdec.ptr91.us, %for.body82.us ]
+  %source = phi i8* [ %source0, %entry ], [ %add.ptr83.us, %for.body82.us ]
+  %0 = bitcast i8* %source to i32*
+  %1 = load i32* %0, align 4
+  %trunc = trunc i32 %1 to i8
+  %add.ptr83.us = getelementptr inbounds i8* %source, i32 4
+  %incdec.ptr91.us = getelementptr inbounds i8* %dest, i32 1
+  store i8 %trunc, i8* %dest, align 1
+  %exitcond = icmp eq i8* %incdec.ptr91.us, %lftr.limit
+  br i1 %exitcond, label %return, label %for.body82.us
+
+return:
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
new file mode 100644
index 0000000..d8e0aa9
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
@@ -0,0 +1,96 @@
+; REQUIRES: asserts
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; X64: sharedidx:
+; X64: %for.body.preheader
+; X64-NOT: leal ({{.*}},4)
+; X64: %for.body.1
+
+; X32: sharedidx:
+; X32: %for.body.2
+; X32: add
+; X32: add
+; X32: add
+; X32: add
+; X32: add
+; X32: %for.body.3
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
new file mode 100644
index 0000000..da2db5a
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/addrec-gep.ll b/test/Transforms/LoopStrengthReduce/addrec-gep.ll
new file mode 100644
index 0000000..3e4e369
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/addrec-gep.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK: bb1:
+; CHECK: load double* [[IV:%[^,]+]]
+; CHECK: store double {{.*}}, double* [[IV]]
+; CHECK: getelementptr double*
+; CHECK-NOT: cast
+; CHECK: br {{.*}} label %bb1
+
+; This test tests several things. The load and store should use the
+; same address instead of having it computed twice, and SCEVExpander should
+; be able to reconstruct the full getelementptr, despite it having a few
+; obstacles set in its way.
+; We only check that the inner loop (bb1-bb2) is "reduced" because LSR
+; currently only operates on inner loops.
+
+target datalayout = "e-p:64:64:64-n32:64"
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph3, label %return
+
+bb.nph:		; preds = %bb2.preheader
+	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
+	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
+	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
+        %z0 = add i64 %tmp3, 5203
+	%tmp5 = getelementptr double* %p, i64 %z0		; <double*> [#uses=1]
+	%tmp6 = load double* %tmp5, align 8		; <double> [#uses=1]
+	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
+        %z1 = add i64 %tmp4, 5203
+	%tmp8 = getelementptr double* %p, i64 %z1		; <double*> [#uses=1]
+	store double %tmp7, double* %tmp8, align 8
+	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
+	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
+	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
+	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %bb4.return_crit_edge.split
+
+bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
+	br label %return
+
+bb.nph3:		; preds = %entry
+	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
+	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
+	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
+	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
+	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
+	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+
+bb.nph3.split:		; preds = %bb.nph3
+	br label %bb2.preheader
+
+bb2.preheader:		; preds = %bb.nph3.split, %bb4
+	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
+	br i1 true, label %bb.nph, label %bb3
+
+return:		; preds = %bb4.return_crit_edge.split, %entry
+	ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/dg.exp b/test/Transforms/LoopStrengthReduce/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopStrengthReduce/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
new file mode 100644
index 0000000..b87bf62
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
@@ -0,0 +1,70 @@
+; RUN: opt -loop-reduce %s
+; we used to crash on this one
+
+declare i8* @_Znwm()
+declare i32 @__gxx_personality_v0(...)
+declare void @g()
+define void @f() {
+bb0:
+  br label %bb1
+bb1:
+  %v0 = phi i64 [ 0, %bb0 ], [ %v1, %bb1 ]
+  %v1 = add nsw i64 %v0, 1
+  br i1 undef, label %bb2, label %bb1
+bb2:
+  %v2 = icmp eq i64 %v0, 0
+  br i1 %v2, label %bb6, label %bb3
+bb3:
+  %v3 = invoke noalias i8* @_Znwm()
+          to label %bb5 unwind label %bb4
+bb4:
+  %v4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb9
+bb5:
+  %v5 = bitcast i8* %v3 to i32**
+  %add.ptr.i = getelementptr inbounds i32** %v5, i64 %v0
+  br label %bb6
+bb6:
+  %v6 = phi i32** [ null, %bb2 ], [ %add.ptr.i, %bb5 ]
+  invoke void @g()
+          to label %bb7 unwind label %bb8
+bb7:
+  unreachable
+bb8:
+  %v7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb9
+bb9:
+  resume { i8*, i32 } zeroinitializer
+}
+
+
+define void @h() {
+bb1:
+  invoke void @g() optsize
+          to label %bb2 unwind label %bb5
+bb2:
+  %arrayctor.cur = phi i8* [ undef, %bb1 ], [ %arrayctor.next, %bb3 ]
+  invoke void @g() optsize
+          to label %bb3 unwind label %bb6
+bb3:
+  %arrayctor.next = getelementptr inbounds i8* %arrayctor.cur, i64 1
+  br label %bb2
+bb4:
+  ret void
+bb5:
+  %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @g() optsize
+          to label %bb4 unwind label %bb7
+bb6:
+  %tmp1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %arraydestroy.isempty = icmp eq i8* undef, %arrayctor.cur
+  ret void
+bb7:
+  %lpad.nonloopexit = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
index abbfda6..ad4959b 100644
--- a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
+++ b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
@@ -9,7 +9,7 @@ entry:
 	br label %no_exit
 no_exit:		; preds = %no_exit, %entry
 	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=1]
-	volatile store float 0.000000e+00, float* %D
+	store volatile float 0.000000e+00, float* %D
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
 ; CHECK: icmp
 ; CHECK-NEXT: br i1
diff --git a/test/Transforms/LoopStrengthReduce/ivchain.ll b/test/Transforms/LoopStrengthReduce/ivchain.ll
new file mode 100644
index 0000000..ce7ad19
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ivchain.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+;
+; PR11782: bad cast to AddRecExpr.
+; A sign extend feeds an IVUser and cannot be hoisted into the AddRec.
+; CollectIVChains should bailout on this case.
+
+%struct = type { i8*, i8*, i16, i64, i16, i16, i16, i64, i64, i16, i8*, i64, i64, i64 }
+
+; CHECK: @test
+; CHECK: for.body:
+; CHECK: lsr.iv = phi %struct
+; CHECK: br
+define i32 @test(i8* %h, i32 %more) nounwind uwtable {
+entry:
+  br i1 undef, label %land.end238, label %return
+
+land.end238:                                      ; preds = %if.end229
+  br label %for.body
+
+for.body:                                         ; preds = %sw.epilog, %land.end238
+  %fbh.0 = phi %struct* [ undef, %land.end238 ], [ %incdec.ptr, %sw.epilog ]
+  %column_n.0 = phi i16 [ 0, %land.end238 ], [ %inc601, %sw.epilog ]
+  %conv250 = sext i16 %column_n.0 to i32
+  %add257 = add nsw i32 %conv250, 1
+  %conv258 = trunc i32 %add257 to i16
+  %cmp263 = icmp ult i16 undef, 2
+  br label %if.end388
+
+if.end388:                                        ; preds = %if.then380, %if.else356
+  %ColLength = getelementptr inbounds %struct* %fbh.0, i64 0, i32 7
+  %call405 = call signext i16 @SQLColAttribute(i8* undef, i16 zeroext %conv258, i16 zeroext 1003, i8* null, i16 signext 0, i16* null, i64* %ColLength) nounwind
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb542, %sw.bb523, %if.end475
+  %inc601 = add i16 %column_n.0, 1
+  %incdec.ptr = getelementptr inbounds %struct* %fbh.0, i64 1
+  br label %for.body
+
+return:                                           ; preds = %entry
+  ret i32 1
+}
+
+declare signext i16 @SQLColAttribute(i8*, i16 zeroext, i16 zeroext, i8*, i16 signext, i16*, i64*)
diff --git a/test/Transforms/LoopStrengthReduce/lit.local.cfg b/test/Transforms/LoopStrengthReduce/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 2760915..96904c6 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -1,15 +1,15 @@
 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
 ; PR9939
 
-; LSR should property handle the post-inc offset when folding the
+; LSR should properly handle the post-inc offset when folding the
 ; non-IV operand of an icmp into the IV.
 
-; CHECK:   %5 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
-; CHECK:   %6 = lshr i64 %5, 1
-; CHECK:   %7 = mul i64 %6, 2
+; CHECK:   %4 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   %5 = lshr i64 %4, 1
+; CHECK:   %6 = mul i64 %5, 2
 ; CHECK:   br label %for.body
 ; CHECK: for.body:
-; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %7, %for.body.lr.ph ]
+; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %6, %for.body.lr.ph ]
 ; CHECK:   %lsr.iv.next = add i64 %lsr.iv2, -2
 ; CHECK:   %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
 ; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next3, null
diff --git a/test/Transforms/LoopStrengthReduce/pr12018.ll b/test/Transforms/LoopStrengthReduce/pr12018.ll
new file mode 100644
index 0000000..ee7b1e8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr12018.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+%struct.nsTArray = type { i8 }
+%struct.nsTArrayHeader = type { i32 }
+
+define void @_Z6foobarR8nsTArray(%struct.nsTArray* %aValues, i32 %foo, %struct.nsTArrayHeader* %bar) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %_ZN8nsTArray9ElementAtEi.exit, %entry
+  %i.06 = phi i32 [ %add, %_ZN8nsTArray9ElementAtEi.exit ], [ 0, %entry ]
+  %call.i = call %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev() nounwind
+  %add.ptr.i = getelementptr inbounds %struct.nsTArrayHeader* %call.i, i32 1
+  %tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
+  %arrayidx = getelementptr inbounds %struct.nsTArray* %tmp, i32 %i.06
+  %add = add nsw i32 %i.06, 1
+  call void @llvm.dbg.value(metadata !{%struct.nsTArray* %aValues}, i64 0, metadata !0) nounwind
+  br label %_ZN8nsTArray9ElementAtEi.exit
+
+_ZN8nsTArray9ElementAtEi.exit:                    ; preds = %for.body
+  %arrayidx.i = getelementptr inbounds %struct.nsTArray* %tmp, i32 %add
+  call void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray* %arrayidx, %struct.nsTArray* %arrayidx.i) nounwind
+  %cmp = icmp slt i32 %add, %foo
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %_ZN8nsTArray9ElementAtEi.exit
+  ret void
+}
+
+declare void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray*, %struct.nsTArray*)
+
+declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 786689}                       ; [ DW_TAG_arg_variable ]
diff --git a/test/Transforms/LoopStrengthReduce/pr12048.ll b/test/Transforms/LoopStrengthReduce/pr12048.ll
new file mode 100644
index 0000000..7e0f2ad
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr12048.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce
+
+define void @resolve_name() nounwind uwtable ssp {
+  br label %while.cond40.preheader
+while.cond132.while.cond.loopexit_crit_edge:
+  br label %while.cond40.preheader
+while.cond40.preheader:
+  br label %while.cond40
+while.cond40:
+  %indvars.iv194 = phi i8* [ null, %while.cond40.preheader ], [ %scevgep, %while.body51 ]
+  %tmp.1 = phi i8* [ undef, %while.cond40.preheader ], [ %incdec.ptr, %while.body51 ]
+  switch i8 undef, label %while.body51 [
+    i8 0, label %if.then59
+  ]
+while.body51:                                     ; preds = %land.end50
+  %incdec.ptr = getelementptr inbounds i8* %tmp.1, i64 1
+  %scevgep = getelementptr i8* %indvars.iv194, i64 1
+  br label %while.cond40
+if.then59:                                        ; preds = %while.end
+  br i1 undef, label %if.then64, label %if.end113
+if.then64:                                        ; preds = %if.then59
+  %incdec.ptr88.tmp.2 = select i1 undef, i8* undef, i8* undef
+  br label %if.end113
+if.end113:                                        ; preds = %if.then64, %if.then59
+  %tmp.4 = phi i8* [ %incdec.ptr88.tmp.2, %if.then64 ], [ undef, %if.then59 ]
+  %tmp.4195 = ptrtoint i8* %tmp.4 to i64
+  br  label %while.cond132.preheader
+while.cond132.preheader:                          ; preds = %if.end113
+  %cmp133173 = icmp eq i8* %tmp.1, %tmp.4
+  br i1 %cmp133173, label %while.cond40.preheader, label %while.body139.lr.ph
+while.body139.lr.ph:                              ; preds = %while.cond132.preheader
+  %scevgep198 = getelementptr i8* %indvars.iv194, i64 0
+  %scevgep198199 = ptrtoint i8* %scevgep198 to i64
+  br label %while.body139
+while.body139:                                    ; preds = %while.body139, %while.body139.lr.ph
+  %start_of_var.0177 = phi i8* [ %tmp.1, %while.body139.lr.ph ], [ null, %while.body139 ]
+  br i1 undef, label %while.cond132.while.cond.loopexit_crit_edge, label %while.body139
+}
diff --git a/test/Transforms/LoopStrengthReduce/pr3399.ll b/test/Transforms/LoopStrengthReduce/pr3399.ll
index b809007..26c5002 100644
--- a/test/Transforms/LoopStrengthReduce/pr3399.ll
+++ b/test/Transforms/LoopStrengthReduce/pr3399.ll
@@ -13,7 +13,7 @@ bb:		; preds = %bb5, %bb5.thread
 
 bb1:		; preds = %bb
 	%l_2.0.reg2mem.0 = sub i32 0, %indvar		; <i32> [#uses=1]
-	%0 = volatile load i32* @g_53, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32* @g_53, align 4		; <i32> [#uses=1]
 	%1 = trunc i32 %l_2.0.reg2mem.0 to i16		; <i16> [#uses=1]
 	%2 = trunc i32 %0 to i16		; <i16> [#uses=1]
 	%3 = mul i16 %2, %1		; <i16> [#uses=1]
diff --git a/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll b/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll
new file mode 100644
index 0000000..f90d030
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK-NOT: {{inttoptr|ptrtoint}}
+; CHECK: scevgep
+; CHECK-NOT: {{inttoptr|ptrtoint}}
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n32:64"
+
+; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.
+
+define void @foo(i8* %p) nounwind {
+entry:
+  br i1 true, label %bb.nph, label %for.end
+
+for.cond:
+  %phitmp = icmp slt i64 %inc, 20
+  br i1 %phitmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  br label %for.end
+
+bb.nph:
+  br label %for.body
+
+for.body:
+  %storemerge1 = phi i64 [ %inc, %for.cond ], [ 0, %bb.nph ]
+  %call = tail call i64 @bar() nounwind
+  %call2 = tail call i64 @car() nounwind
+  %conv = trunc i64 %call2 to i8
+  %conv3 = sext i8 %conv to i64
+  %add = add nsw i64 %call, %storemerge1
+  %add4 = add nsw i64 %add, %conv3
+  %arrayidx = getelementptr inbounds i8* %p, i64 %add4
+  store i8 0, i8* %arrayidx
+  %inc = add nsw i64 %storemerge1, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+declare i64 @bar()
+
+declare i64 @car()
diff --git a/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll b/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll
index 59551d5..a43a4ff 100644
--- a/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll
+++ b/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S < %s -loop-unroll -unroll-count=4 -enable-iv-rewrite=false | FileCheck %s
+; RUN: opt -S < %s -loop-unroll -unroll-count=4 | FileCheck %s
 ;
 ; Test induction variable simplify after loop unrolling. It should
 ; expose nice opportunities for GVN.
diff --git a/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll b/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll
new file mode 100644
index 0000000..8946a23
--- /dev/null
+++ b/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -S -loop-unroll -simplifycfg | FileCheck %s
+; PR12513: Loop unrolling breaks with indirect branches.
+; If loop unrolling attempts to transform this loop, it replaces the
+; indirectbr successors. SimplifyCFG then considers them to be unreachable.
+declare void @subtract() nounwind uwtable
+
+; CHECK-NOT: unreachable
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
+entry:
+  %vals19 = alloca [5 x i32], align 16
+  %x20 = alloca i32, align 4
+  store i32 135, i32* %x20, align 4
+  br label %for.body
+
+for.body:                                         ; preds = ; %call2_termjoin, %call3_termjoin
+  %indvars.iv = phi i64 [ 0, %entry ], [ %joinphi15.in.in, %call2_termjoin ]
+  %a6 = call coldcc i8* @funca(i8* blockaddress(@main, %for.body_code), i8*
+blockaddress(@main, %for.body_codeprime)) nounwind
+  indirectbr i8* %a6, [label %for.body_code, label %for.body_codeprime]
+
+for.body_code:                                    ; preds = %for.body
+  call void @subtract()
+  br label %call2_termjoin
+
+call2_termjoin:                                   ; preds = %for.body_codeprime, %for.body_code
+  %joinphi15.in.in = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %joinphi15.in.in, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %call2_termjoin
+  ret i32 0
+
+for.body_codeprime:                               ; preds = %for.body
+  call void @subtract_v2(i64 %indvars.iv)
+  br label %call2_termjoin
+}
+
+declare coldcc i8* @funca(i8*, i8*) readonly
+
+declare void @subtract_v2(i64) nounwind uwtable
diff --git a/test/Transforms/LoopUnroll/dg.exp b/test/Transforms/LoopUnroll/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopUnroll/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopUnroll/lit.local.cfg b/test/Transforms/LoopUnroll/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopUnroll/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
new file mode 100644
index 0000000..3179d55e
--- /dev/null
+++ b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
+; Loop size = 3, when the function has the optsize attribute, the
+; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
+; by 16 times because 3 * 16 < 50.
+define void @unroll_opt_for_size() nounwind optsize {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
diff --git a/test/Transforms/LoopUnroll/pr11361.ll b/test/Transforms/LoopUnroll/pr11361.ll
new file mode 100644
index 0000000..7ce7f5f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/pr11361.ll
@@ -0,0 +1,42 @@
+; RUN: opt -loop-unroll -disable-output
+; PR11361
+
+; This tests for an iterator invalidation issue.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @func_1() nounwind uwtable {
+entry:
+  br label %for.cond8.preheader
+
+for.cond8.preheader:                              ; preds = %for.inc15, %entry
+  %l_1264.04 = phi i32 [ 0, %entry ], [ %add.i, %for.inc15 ]
+  %l_1330.0.03 = phi i80 [ undef, %entry ], [ %ins.lcssa, %for.inc15 ]
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %for.cond8.preheader
+  %l_1330.0.12 = phi i80 [ %l_1330.0.03, %for.cond8.preheader ], [ %ins, %for.body9 ]
+  %storemerge1 = phi i32 [ 7, %for.cond8.preheader ], [ %sub, %for.body9 ]
+  %tmp = lshr i80 %l_1330.0.12, 8
+  %tmp1 = trunc i80 %tmp to i8
+  %inc12 = add i8 %tmp1, 1
+  %tmp2 = zext i8 %inc12 to i80
+  %tmp3 = shl nuw nsw i80 %tmp2, 8
+  %mask = and i80 %l_1330.0.12, -65281
+  %ins = or i80 %tmp3, %mask
+  %sub = add nsw i32 %storemerge1, -1
+  %tobool = icmp eq i32 %sub, 0
+  br i1 %tobool, label %for.inc15, label %for.body9
+
+for.inc15:                                        ; preds = %for.body9
+  %ins.lcssa = phi i80 [ %ins, %for.body9 ]
+  %sext = shl i32 %l_1264.04, 24
+  %conv.i = ashr exact i32 %sext, 24
+  %add.i = add nsw i32 %conv.i, 1
+  %cmp = icmp slt i32 %add.i, 3
+  br i1 %cmp, label %for.cond8.preheader, label %for.end16
+
+for.end16:                                        ; preds = %for.inc15
+  ret void
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
new file mode 100644
index 0000000..d8bbea9
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s
+
+; Tests for unrolling loops with run-time trip counts
+
+; CHECK: unr.cmp{{.*}}:
+; CHECK: for.body.unr{{.*}}:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+
+; Still try to completely unroll loops with compile-time trip counts
+; even if the -unroll-runtime is specified
+
+; CHECK: for.body:
+; CHECK-NOT: for.body.unr:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+; This is test 2007-05-09-UnknownTripCount.ll which can be unrolled now
+; if the -unroll-runtime option is turned on
+
+; CHECK: bb72.2:
+
+define void @foo(i32 %trips) {
+entry:
+        br label %cond_true.outer
+
+cond_true.outer:
+        %indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]
+        br label %bb72
+
+bb72:
+        %indvar.next2 = add i32 %indvar1.ph, 1
+        %exitcond3 = icmp eq i32 %indvar.next2, %trips
+        br i1 %exitcond3, label %cond_true138, label %cond_true.outer
+
+cond_true138:
+        ret void
+}
+
+
+; Test run-time unrolling for a loop that counts down by -2.
+
+; CHECK: for.body.unr:
+; CHECK: br i1 %cmp.7, label %for.cond.for.end_crit_edge{{.*}}, label %for.body
+
+define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
+entry:
+  %cmp2 = icmp eq i32 %len, 0
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ]
+  %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ]
+  %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %incdec.ptr = getelementptr inbounds i16* %p.addr.05, i64 1
+  %0 = load i16* %p.addr.05, align 2
+  %conv = zext i16 %0 to i32
+  %add = add i32 %conv, %res.03
+  %sub = add nsw i32 %len.addr.04, -2
+  %cmp = icmp eq i32 %sub, 0
+  br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %phitmp = trunc i32 %add to i16
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i16 %res.0.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
new file mode 100644
index 0000000..ad99b8c
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=4 | FileCheck %s
+
+; This tests that setting the unroll count works
+
+; CHECK: unr.cmp:
+; CHECK: for.body.unr:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll
new file mode 100644
index 0000000..cbc7af5
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 -unroll-runtime -unroll-count=8 | FileCheck %s
+
+; Choose a smaller, power-of-two, unroll count if the loop is too large.
+; This test makes sure we're not unrolling 'odd' counts
+
+; CHECK: unr.cmp:
+; CHECK: for.body.unr:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop3.ll b/test/Transforms/LoopUnroll/runtime-loop3.ll
new file mode 100644
index 0000000..55cf223
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop3.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
+
+; Test that nested loops can be unrolled.  We need to increase threshold to do it
+
+; STATS: 2 loop-unroll - Number of loops unrolled (completely or otherwise)
+
+define i32 @nested(i32* nocapture %a, i32 %n, i32 %m) nounwind uwtable readonly {
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.cond1.preheader.lr.ph, label %for.end7
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp28 = icmp sgt i32 %m, 0
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc5, %for.cond1.preheader.lr.ph
+  %indvars.iv16 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next17, %for.inc5 ]
+  %sum.012 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %sum.1.lcssa, %for.inc5 ]
+  br i1 %cmp28, label %for.body3, label %for.inc5
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.cond1.preheader ]
+  %sum.19 = phi i32 [ %add4, %for.body3 ], [ %sum.012, %for.cond1.preheader ]
+  %0 = add nsw i64 %indvars.iv, %indvars.iv16
+  %arrayidx = getelementptr inbounds i32* %a, i64 %0
+  %1 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %1, %sum.19
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %m
+  br i1 %exitcond, label %for.inc5, label %for.body3
+
+for.inc5:                                         ; preds = %for.body3, %for.cond1.preheader
+  %sum.1.lcssa = phi i32 [ %sum.012, %for.cond1.preheader ], [ %add4, %for.body3 ]
+  %indvars.iv.next17 = add i64 %indvars.iv16, 1
+  %lftr.wideiv18 = trunc i64 %indvars.iv.next17 to i32
+  %exitcond19 = icmp eq i32 %lftr.wideiv18, %n
+  br i1 %exitcond19, label %for.end7, label %for.cond1.preheader
+
+for.end7:                                         ; preds = %for.inc5, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.inc5 ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/LoopUnroll/unloop.ll b/test/Transforms/LoopUnroll/unloop.ll
index 217c8ce..5a9cacd 100644
--- a/test/Transforms/LoopUnroll/unloop.ll
+++ b/test/Transforms/LoopUnroll/unloop.ll
@@ -427,3 +427,44 @@ if.end2413:                                       ; preds = %defchar
 return:                                           ; preds = %sw.bb304
   ret void
 }
+
+; PR11335: the most deeply nested block should be removed from the outer loop.
+; CHECK: @removeSubloopBlocks2
+; CHECK: for.cond3:
+; CHECK-NOT: br
+; CHECK: ret void
+define void @removeSubloopBlocks2() nounwind {
+entry:
+  %tobool.i = icmp ne i32 undef, 0
+  br label %lbl_616
+
+lbl_616.loopexit:                                 ; preds = %for.cond
+  br label %lbl_616
+
+lbl_616:                                          ; preds = %lbl_616.loopexit, %entry
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond3, %lbl_616
+  br i1 false, label %for.cond1.preheader, label %lbl_616.loopexit
+
+for.cond1.preheader:                              ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1.loopexit:                               ; preds = %for.cond.i
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.cond1.loopexit, %for.cond1.preheader
+  br i1 false, label %for.body2, label %for.cond3
+
+for.body2:                                        ; preds = %for.cond1
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.cond.i, %for.body2
+  br i1 %tobool.i, label %for.cond.i, label %for.cond1.loopexit
+
+for.cond3:                                        ; preds = %for.cond1
+  br i1 false, label %for.cond, label %if.end
+
+if.end:                                           ; preds = %for.cond3
+  ret void
+}
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
new file mode 100644
index 0000000..8389fe4
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -0,0 +1,91 @@
+; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 2 loop-unswitch - Number of switches unswitched
+
+; CHECK:      %1 = icmp eq i32 %c, 1
+; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us
+; CHECK-NEXT:   %var_val.us = load i32* %var
+; CHECK-NEXT:   switch i32 1, label %default.us-lcssa.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   %2 = icmp eq i32 %c, 2
+; CHECK-NEXT:   br i1 %2, label %.split.split.us, label %.split..split.split_crit_edge
+
+; CHECK:      .split..split.split_crit_edge:                    ; preds = %.split
+; CHECK-NEXT:   br label %.split.split
+
+; CHECK:      .split.split.us:                                  ; preds = %.split
+; CHECK-NEXT:   br label %loop_begin.us1
+
+; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us5, %.split.split.us
+; CHECK-NEXT:   %var_val.us2 = load i32* %var
+; CHECK-NEXT:   switch i32 2, label %default.us-lcssa.us-lcssa.us [
+; CHECK-NEXT:     i32 1, label %inc.us3
+; CHECK-NEXT:     i32 2, label %dec.us4
+; CHECK-NEXT:   ]
+
+; CHECK:      dec.us4:                                          ; preds = %loop_begin.us1
+; CHECK-NEXT:   call void @decf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us5
+
+; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split.split
+; CHECK-NEXT:   %var_val = load i32* %var
+; CHECK-NEXT:   switch i32 %c, label %default.us-lcssa.us-lcssa [
+; CHECK-NEXT:     i32 1, label %inc
+; CHECK-NEXT:     i32 2, label %dec
+; CHECK-NEXT:   ]
+
+; CHECK:      inc:                                              ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa, label %inc.split
+
+; CHECK:      dec:                                              ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable6, label %dec.split
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+  call void @decf() noreturn nounwind
+  br label %loop_begin
+default:  
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
new file mode 100644
index 0000000..05d98d5
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -0,0 +1,84 @@
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 13 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 1 loop-unswitch - Number of switches unswitched
+
+; ModuleID = '../llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll'
+
+; CHECK:        %1 = icmp eq i32 %c, 1
+; CHECK-NEXT:   br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us
+; CHECK:        switch i32 1, label %second_switch.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
+; CHECK-NEXT:   switch i32 %d, label %default.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+; CHECK-NEXT:   ]
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split
+; CHECK:        switch i32 %c, label %second_switch [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge:                         ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable, label %inc
+
+; CHECK:      second_switch:                                    ; preds = %loop_begin
+; CHECK-NEXT:   switch i32 %d, label %default [
+; CHECK-NEXT:     i32 1, label %inc
+; CHECK-NEXT:   ]
+
+; CHECK:      inc:                                              ; preds = %loop_begin.inc_crit_edge, %second_switch
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+  %d = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %second_switch [
+      i32 1, label %inc
+  ]
+
+second_switch:
+  switch i32 %d, label %default [
+      i32 1, label %inc
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+
+default:  
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
new file mode 100644
index 0000000..1b186d6
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -0,0 +1,138 @@
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 3 loop-unswitch - Number of switches unswitched
+
+; CHECK:        %1 = icmp eq i32 %c, 1
+; CHECK-NEXT:   br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   %2 = icmp eq i32 %d, 1
+; CHECK-NEXT:   br i1 %2, label %.split.us.split.us, label %.split.us..split.us.split_crit_edge
+
+; CHECK:      .split.us..split.us.split_crit_edge:              ; preds = %.split.us
+; CHECK-NEXT:   br label %.split.us.split
+
+; CHECK:      .split.us.split.us:                               ; preds = %.split.us
+; CHECK-NEXT:   br label %loop_begin.us.us
+
+; CHECK:      loop_begin.us.us:                                 ; preds = %loop_begin.backedge.us.us, %.split.us.split.us
+; CHECK-NEXT:   %var_val.us.us = load i32* %var
+; CHECK-NEXT:   switch i32 1, label %second_switch.us.us [
+; CHECK-NEXT:     i32 1, label %inc.us.us
+
+; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us.us
+
+; CHECK:      second_switch.us.us:                              ; preds = %loop_begin.us.us
+; CHECK-NEXT:   switch i32 1, label %default.us.us [
+; CHECK-NEXT:     i32 1, label %inc.us.us
+
+; CHECK:      .split.us.split:                                  ; preds = %.split.us..split.us.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us.split
+; CHECK-NEXT:   %var_val.us = load i32* %var
+; CHECK-NEXT:   switch i32 1, label %second_switch.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
+; CHECK-NEXT:   switch i32 %d, label %default.us [
+; CHECK-NEXT:     i32 1, label %second_switch.us.inc.us_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      second_switch.us.inc.us_crit_edge:                ; preds = %second_switch.us
+; CHECK-NEXT:   br i1 true, label %us-unreachable8, label %inc.us
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   %3 = icmp eq i32 %d, 1
+; CHECK-NEXT:   br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge
+
+; CHECK:      .split..split.split_crit_edge:                    ; preds = %.split
+; CHECK-NEXT:   br label %.split.split
+
+; CHECK:      .split.split.us:                                  ; preds = %.split
+; CHECK-NEXT:   br label %loop_begin.us1
+
+; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us6, %.split.split.us
+; CHECK-NEXT:   %var_val.us2 = load i32* %var
+; CHECK-NEXT:   switch i32 %c, label %second_switch.us4 [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge.us
+; CHECK-NEXT:   ]
+
+; CHECK:      inc.us3:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us6
+
+; CHECK:      second_switch.us4:                                ; preds = %loop_begin.us1
+; CHECK-NEXT:   switch i32 1, label %default.us5 [
+; CHECK-NEXT:     i32 1, label %inc.us3
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge.us:                      ; preds = %loop_begin.us1
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3
+
+; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split.split
+; CHECK-NEXT:   %var_val = load i32* %var
+; CHECK-NEXT:   switch i32 %c, label %second_switch [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge:                         ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa, label %inc
+
+; CHECK:      second_switch:                                    ; preds = %loop_begin
+; CHECK-NEXT:   switch i32 %d, label %default [
+; CHECK-NEXT:     i32 1, label %second_switch.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      second_switch.inc_crit_edge:                      ; preds = %second_switch
+; CHECK-NEXT:   br i1 true, label %us-unreachable7, label %inc
+
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+  %d = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %second_switch [
+      i32 1, label %inc
+  ]
+
+second_switch:
+  switch i32 %d, label %default [
+      i32 1, label %inc
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+
+default:  
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll b/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll
new file mode 100644
index 0000000..c92f0a2
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -S -loop-unswitch -verify-loop-info -verify-dom-info | FileCheck %s
+; PR12343: -loop-unswitch crash on indirect branch
+
+; CHECK:       %0 = icmp eq i64 undef, 0
+; CHECK-NEXT:  br i1 %0, label %"5", label %"4"
+
+; CHECK:       "5":                                              ; preds = %entry
+; CHECK-NEXT:  br label %"16"
+
+; CHECK:       "16":                                             ; preds = %"22", %"5"
+; CHECK-NEXT:  indirectbr i8* undef, [label %"22", label %"33"]
+
+; CHECK:       "22":                                             ; preds = %"16"
+; CHECK-NEXT:  br i1 %0, label %"16", label %"26"
+
+; CHECK:       "26":                                             ; preds = %"22"
+; CHECK-NEXT:  unreachable
+
+define void @foo() {
+entry:
+  %0 = icmp eq i64 undef, 0
+  br i1 %0, label %"5", label %"4"
+
+"4":                                              ; preds = %entry
+  unreachable
+
+"5":                                              ; preds = %entry
+  br label %"16"
+
+"16":                                             ; preds = %"22", %"5"
+  indirectbr i8* undef, [label %"22", label %"33"]
+
+"22":                                             ; preds = %"16"
+  br i1 %0, label %"16", label %"26"
+
+"26":                                             ; preds = %"22"
+  unreachable
+
+"33":                                             ; preds = %"16"
+  unreachable
+}
diff --git a/test/Transforms/LoopUnswitch/dg.exp b/test/Transforms/LoopUnswitch/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopUnswitch/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopUnswitch/lit.local.cfg b/test/Transforms/LoopUnswitch/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerAtomic/dg.exp b/test/Transforms/LowerAtomic/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerAtomic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerAtomic/lit.local.cfg b/test/Transforms/LowerAtomic/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LowerAtomic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerExpectIntrinsic/dg.exp b/test/Transforms/LowerExpectIntrinsic/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerExpectIntrinsic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerExpectIntrinsic/lit.local.cfg b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/LowerInvoke/dg.exp b/test/Transforms/LowerInvoke/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerInvoke/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerInvoke/lit.local.cfg b/test/Transforms/LowerInvoke/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LowerInvoke/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerSwitch/dg.exp b/test/Transforms/LowerSwitch/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerSwitch/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerSwitch/lit.local.cfg b/test/Transforms/LowerSwitch/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LowerSwitch/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
index 52a8375..ea0d515 100644
--- a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
+++ b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -std-compile-opts -S | grep volatile | count 3
 ; PR1520
-; Don't promote volatile loads/stores. This is really needed to handle setjmp/lonjmp properly.
+; Don't promote load volatiles/stores. This is really needed to handle setjmp/lonjmp properly.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
@@ -14,7 +14,7 @@ entry:
 	%v = alloca i32, align 4		; <i32*> [#uses=3]
 	%tmp = alloca i32, align 4		; <i32*> [#uses=3]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	volatile store i32 0, i32* %v, align 4
+	store volatile i32 0, i32* %v, align 4
 	%tmp1 = call i32 @_setjmp( %struct.__jmp_buf_tag* getelementptr ([1 x %struct.__jmp_buf_tag]* @j, i32 0, i32 0) )		; <i32> [#uses=1]
 	%tmp2 = icmp ne i32 %tmp1, 0		; <i1> [#uses=1]
 	%tmp23 = zext i1 %tmp2 to i8		; <i8> [#uses=1]
@@ -22,12 +22,12 @@ entry:
 	br i1 %toBool, label %bb, label %bb5
 
 bb:		; preds = %entry
-	%tmp4 = volatile load i32* %v, align 4		; <i32> [#uses=1]
+	%tmp4 = load volatile i32* %v, align 4		; <i32> [#uses=1]
 	store i32 %tmp4, i32* %tmp, align 4
 	br label %bb6
 
 bb5:		; preds = %entry
-	volatile store i32 1, i32* %v, align 4
+	store volatile i32 1, i32* %v, align 4
 	call void @g( )
 	store i32 0, i32* %tmp, align 4
 	br label %bb6
diff --git a/test/Transforms/Mem2Reg/dg.exp b/test/Transforms/Mem2Reg/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Mem2Reg/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Mem2Reg/lit.local.cfg b/test/Transforms/Mem2Reg/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Mem2Reg/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MemCpyOpt/dg.exp b/test/Transforms/MemCpyOpt/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/MemCpyOpt/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll
index 1ac97e9..8832f89 100644
--- a/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/test/Transforms/MemCpyOpt/form-memset.ll
@@ -57,8 +57,8 @@ entry:
 
 declare i32 @bar(...)
 
+%struct.MV = type { i16, i16 }
 
-	%struct.MV = type { i16, i16 }
 
 define void @test2() nounwind  {
 entry:
@@ -220,3 +220,31 @@ entry:
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
 }
 
+; More aggressive heuristic
+; rdar://9892684
+define void @test7(i32* nocapture %c) nounwind optsize {
+  store i32 -1, i32* %c, align 4
+  %1 = getelementptr inbounds i32* %c, i32 1
+  store i32 -1, i32* %1, align 4
+  %2 = getelementptr inbounds i32* %c, i32 2
+  store i32 -1, i32* %2, align 4
+  %3 = getelementptr inbounds i32* %c, i32 3
+  store i32 -1, i32* %3, align 4
+  %4 = getelementptr inbounds i32* %c, i32 4
+  store i32 -1, i32* %4, align 4
+; CHECK: @test7
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false)
+  ret void
+}
+
+%struct.test8 = type { [4 x i32] }
+
+define void @test8() {
+entry:
+  %memtmp = alloca %struct.test8, align 16
+  %0 = bitcast %struct.test8* %memtmp to <4 x i32>*
+  store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
+  ret void
+; CHECK: @test8
+; CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
+}
diff --git a/test/Transforms/MemCpyOpt/lit.local.cfg b/test/Transforms/MemCpyOpt/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 71d4d4e..63d0ebf 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -59,7 +59,7 @@ define void @test3(%0* noalias sret %agg.result) nounwind  {
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false)
   ret void
 ; CHECK: @test3
-; CHECK-NEXT: %agg.result2 = bitcast 
+; CHECK-NEXT: %agg.result1 = bitcast 
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: ret void
 }
@@ -130,3 +130,21 @@ declare i32 @g(%struct.p* byval align 8)
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
+; PR11142 - When looking for a memcpy-memcpy dependency, don't get stuck on
+; instructions between the memcpy's that only affect the destination pointer.
+@test8.str = internal constant [7 x i8] c"ABCDEF\00"
+
+define void @test8() {
+; CHECK: test8
+; CHECK-NOT: memcpy
+  %A = tail call i8* @malloc(i32 10)
+  %B = getelementptr inbounds i8* %A, i64 2
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8]* @test8.str, i64 0, i64 0), i32 7, i32 1, i1 false)
+  %C = tail call i8* @malloc(i32 10)
+  %D = getelementptr inbounds i8* %C, i64 2
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %D, i8* %B, i32 7, i32 1, i1 false)
+  ret void
+; CHECK: ret void
+}
+
+declare noalias i8* @malloc(i32)
diff --git a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
index 201903e..e3e52b4 100644
--- a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
+++ b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
@@ -75,10 +75,12 @@ bb2:                                              ; preds = %bb1, %invcont
   ret void
 
 lpad:                                             ; preds = %bb
-  %eh_ptr = call i8* @llvm.eh.exception()
-  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+              cleanup
+  %exn = extractvalue { i8*, i32 } %eh_ptr, 0
+  store i8* %exn, i8** %eh_exception
   %eh_ptr4 = load i8** %eh_exception
-  %eh_select5 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr4, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0)
+  %eh_select5 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select5, i32* %eh_selector
   %eh_select = load i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.150, align 4
@@ -199,10 +201,12 @@ bb2:                                              ; preds = %bb1, %invcont
   ret void
 
 lpad:                                             ; preds = %bb
-  %eh_ptr = call i8* @llvm.eh.exception()
-  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+              cleanup
+  %exn = extractvalue { i8*, i32 } %eh_ptr, 0
+  store i8* %exn, i8** %eh_exception
   %eh_ptr4 = load i8** %eh_exception
-  %eh_select5 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr4, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0)
+  %eh_select5 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select5, i32* %eh_selector
   %eh_select = load i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.148, align 4
@@ -220,10 +224,6 @@ lpad:                                             ; preds = %bb
   unreachable
 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @__gxx_personality_v0(...)
 
 declare void @_Unwind_Resume_or_Rethrow()
diff --git a/test/Transforms/MergeFunc/dg.exp b/test/Transforms/MergeFunc/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/MergeFunc/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/MergeFunc/lit.local.cfg b/test/Transforms/MergeFunc/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/MergeFunc/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/apelim.ll b/test/Transforms/ObjCARC/apelim.ll
new file mode 100644
index 0000000..8c7b5b1
--- /dev/null
+++ b/test/Transforms/ObjCARC/apelim.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -objc-arc-apelim < %s | FileCheck %s
+; rdar://10227311
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_x }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_y }]
+
+@x = global i32 0
+
+declare i32 @bar() nounwind
+
+define i32 @foo() nounwind {
+entry:
+  ret i32 5
+}
+
+define internal void @__cxx_global_var_init() {
+entry:
+  %call = call i32 @foo()
+  store i32 %call, i32* @x, align 4
+  ret void
+}
+
+define internal void @__dxx_global_var_init() {
+entry:
+  %call = call i32 @bar()
+  store i32 %call, i32* @x, align 4
+  ret void
+}
+
+; CHECK: define internal void @_GLOBAL__I_x()
+; CHECK-NOT: @objc
+; CHECK: }
+define internal void @_GLOBAL__I_x() {
+entry:
+  %0 = call i8* @objc_autoreleasePoolPush() nounwind
+  call void @__cxx_global_var_init()
+  call void @objc_autoreleasePoolPop(i8* %0) nounwind
+  ret void
+}
+
+; CHECK: define internal void @_GLOBAL__I_y()
+; CHECK: %0 = call i8* @objc_autoreleasePoolPush() nounwind
+; CHECK: call void @objc_autoreleasePoolPop(i8* %0) nounwind
+; CHECK: }
+define internal void @_GLOBAL__I_y() {
+entry:
+  %0 = call i8* @objc_autoreleasePoolPush() nounwind
+  call void @__dxx_global_var_init()
+  call void @objc_autoreleasePoolPop(i8* %0) nounwind
+  ret void
+}
+
+declare i8* @objc_autoreleasePoolPush()
+declare void @objc_autoreleasePoolPop(i8*)
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 575cf42..ba2f778 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -3,10 +3,12 @@
 target datalayout = "e-p:64:64:64"
 
 declare i8* @objc_retain(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
 declare void @objc_release(i8*)
 declare i8* @objc_autorelease(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
 declare void @objc_autoreleasePoolPop(i8*)
-declare void @objc_autoreleasePoolPush()
+declare i8* @objc_autoreleasePoolPush()
 declare i8* @objc_retainBlock(i8*)
 
 declare i8* @objc_retainedObject(i8*)
@@ -86,6 +88,37 @@ alt_return:
   ret void
 }
 
+; Don't do partial elimination into two different CFG diamonds.
+
+; CHECK: define void @test1b(
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NOT: @objc_
+; CHECK: if.end5:
+; CHECK:   tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test1b(i8* %x, i1 %p, i1 %q) {
+entry:
+  tail call i8* @objc_retain(i8* %x) nounwind
+  br i1 %p, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @callee()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  br i1 %q, label %if.then3, label %if.end5
+
+if.then3:                                         ; preds = %if.end
+  tail call void @use_pointer(i8* %x)
+  br label %if.end5
+
+if.end5:                                          ; preds = %if.then3, %if.end
+  tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; Like test0 but the pointer is passed to an intervening call,
 ; so the optimization is not safe.
 
@@ -136,7 +169,7 @@ entry:
 loop:
   %c = bitcast i32* %x to i8*
   call void @objc_release(i8* %c) nounwind
-  %j = volatile load i1* %q
+  %j = load volatile i1* %q
   br i1 %j, label %loop, label %return
 
 return:
@@ -159,7 +192,7 @@ entry:
 loop:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
-  %j = volatile load i1* %q
+  %j = load volatile i1* %q
   br i1 %j, label %loop, label %return
 
 return:
@@ -495,7 +528,7 @@ entry:
 define void @test13d(i8* %x, i64 %n) {
 entry:
   call i8* @objc_retain(i8* %x) nounwind
-  call void @objc_autoreleasePoolPush()
+  call i8* @objc_autoreleasePoolPush()
   call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %x)
   call void @use_pointer(i8* %x)
@@ -755,7 +788,7 @@ C:
 @__block_holder_tmp_1 = external constant %block1
 define void @test23() {
 entry:
-  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind, !clang.arc.copy_on_escape !0
   call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
   call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
   call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
@@ -770,13 +803,28 @@ entry:
 ; CHECK: }
 define void @test23b(i8* %p) {
 entry:
-  %0 = call i8* @objc_retainBlock(i8* %p) nounwind
+  %0 = call i8* @objc_retainBlock(i8* %p) nounwind, !clang.arc.copy_on_escape !0
   call void @callee()
   call void @use_pointer(i8* %p)
   call void @objc_release(i8* %p) nounwind
   ret void
 }
 
+; Don't optimize objc_retainBlock, because there's no copy_on_escape metadata.
+
+; CHECK: define void @test23c(
+; CHECK: @objc_retainBlock
+; CHECK: @objc_release
+; CHECK: }
+define void @test23c() {
+entry:
+  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
+  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
+  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  ret void
+}
+
 ; Any call can decrement a retain count.
 
 ; CHECK: define void @test24(
@@ -1354,7 +1402,7 @@ entry:
 ; CHECK-NEXT: call i8* @objc_autorelease(i8* %p)
 ; CHECK-NEXT: call void @use_pointer(i8* %p)
 ; CHECK-NEXT: call void @use_pointer(i8* %p)
-; CHECK-NEXT: call void @objc_autoreleasePoolPush()
+; CHECK-NEXT: call i8* @objc_autoreleasePoolPush()
 ; CHECK-NEXT: ret void
 ; CHECK-NEXT: }
 define void @test43b(i8* %p) {
@@ -1364,7 +1412,7 @@ entry:
   call i8* @objc_retain(i8* %p)
   call void @use_pointer(i8* %p)
   call void @use_pointer(i8* %p)
-  call void @objc_autoreleasePoolPush()
+  call i8* @objc_autoreleasePoolPush()
   call void @objc_release(i8* %p)
   ret void
 }
@@ -1497,9 +1545,11 @@ define void @test52(i8** %zz, i8** %pp) {
 
 ; Like test52, but the pointer has function type, so it's assumed to
 ; be not reference counted.
+; Oops. That's wrong. Clang sometimes uses function types gratuitously.
+; See rdar://10551239.
 
 ; CHECK: define void @test53(
-; CHECK-NOT: @objc_
+; CHECK: @objc_
 ; CHECK: }
 define void @test53(void ()** %zz, i8** %pp) {
   %p = load i8** %pp
@@ -1673,6 +1723,154 @@ define void @test61() {
   ret void
 }
 
+; Delete a retain matched by releases when one is inside the loop and the
+; other is outside the loop.
+
+; CHECK: define void @test62(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test62(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  call void @objc_release(i8* %x)
+  br label %loop
+
+exit:
+  call void @objc_release(i8* %x)
+  ret void
+}
+
+; Like test62 but with no release in exit.
+; Don't delete anything!
+
+; CHECK: define void @test63(
+; CHECK: loop:
+; CHECK:   tail call i8* @objc_retain(i8* %x)
+; CHECK: loop.more:
+; CHECK:   call void @objc_release(i8* %x)
+; CHECK: }
+define void @test63(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  call void @objc_release(i8* %x)
+  br label %loop
+
+exit:
+  ret void
+}
+
+; Like test62 but with no release in loop.more.
+; Don't delete anything!
+
+; CHECK: define void @test64(
+; CHECK: loop:
+; CHECK:   tail call i8* @objc_retain(i8* %x)
+; CHECK: exit:
+; CHECK:   call void @objc_release(i8* %x)
+; CHECK: }
+define void @test64(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  br label %loop
+
+exit:
+  call void @objc_release(i8* %x)
+  ret void
+}
+
+; Move an autorelease past a phi with a null.
+
+; CHECK: define i8* @test65(
+; CHECK: if.then:
+; CHECK:   call i8* @objc_autorelease(
+; CHECK: return:
+; CHECK-NOT: @objc_autorelease
+; CHECK: }
+define i8* @test65(i1 %x) {
+entry:
+  br i1 %x, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %c = call i8* @returner()
+  %s = call i8* @objc_retainAutoreleasedReturnValue(i8* %c) nounwind
+  br label %return
+
+return:                                           ; preds = %if.then, %entry
+  %retval = phi i8* [ %s, %if.then ], [ null, %entry ]
+  %q = call i8* @objc_autorelease(i8* %retval) nounwind
+  ret i8* %retval
+}
+
+; Don't move an autorelease past an autorelease pool boundary.
+
+; CHECK: define i8* @test65b(
+; CHECK: if.then:
+; CHECK-NOT: @objc_autorelease
+; CHECK: return:
+; CHECK:   call i8* @objc_autorelease(
+; CHECK: }
+define i8* @test65b(i1 %x) {
+entry:
+  %t = call i8* @objc_autoreleasePoolPush()
+  br i1 %x, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %c = call i8* @returner()
+  %s = call i8* @objc_retainAutoreleasedReturnValue(i8* %c) nounwind
+  br label %return
+
+return:                                           ; preds = %if.then, %entry
+  %retval = phi i8* [ %s, %if.then ], [ null, %entry ]
+  call void @objc_autoreleasePoolPop(i8* %t)
+  %q = call i8* @objc_autorelease(i8* %retval) nounwind
+  ret i8* %retval
+}
+
+; Don't move an autoreleaseReuturnValue, which would break
+; the RV optimization.
+
+; CHECK: define i8* @test65c(
+; CHECK: if.then:
+; CHECK-NOT: @objc_autorelease
+; CHECK: return:
+; CHECK:   call i8* @objc_autoreleaseReturnValue(
+; CHECK: }
+define i8* @test65c(i1 %x) {
+entry:
+  br i1 %x, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %c = call i8* @returner()
+  %s = call i8* @objc_retainAutoreleasedReturnValue(i8* %c) nounwind
+  br label %return
+
+return:                                           ; preds = %if.then, %entry
+  %retval = phi i8* [ %s, %if.then ], [ null, %entry ]
+  %q = call i8* @objc_autoreleaseReturnValue(i8* %retval) nounwind
+  ret i8* %retval
+}
+
 declare void @bar(i32 ()*)
 
 ; A few real-world testcases.
diff --git a/test/Transforms/ObjCARC/contract-storestrong-ivar.ll b/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
index 4ad78e7..4a9b314 100644
--- a/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
 
-; CHECK: call void @objc_storeStrong(i8**
+; CHECK: tail call void @objc_storeStrong(i8**
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.0.0"
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 25c93f4..4ff0596 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -9,7 +9,7 @@ declare void @objc_release(i8*)
 
 ; CHECK: define void @test0(
 ; CHECK: entry:
-; CHECK-NEXT: call void @objc_storeStrong(i8** @x, i8* %p) nounwind
+; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) nounwind
 ; CHECK-NEXT: ret void
 define void @test0(i8* %p) {
 entry:
@@ -33,7 +33,7 @@ entry:
 define void @test1(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
-  %tmp = volatile load i8** @x, align 8
+  %tmp = load volatile i8** @x, align 8
   store i8* %0, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
@@ -53,7 +53,7 @@ define void @test2(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
   %tmp = load i8** @x, align 8
-  volatile store i8* %0, i8** @x, align 8
+  store volatile i8* %0, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
 }
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
index 04ae3ca..c48f8a5 100644
--- a/test/Transforms/ObjCARC/contract.ll
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -143,3 +143,21 @@ define i8* @test7(i8* %p) {
   %2 = tail call i8* @objc_autoreleaseReturnValue(i8* %p)
   ret i8* %p
 }
+
+; Do the return value substitution for PHI nodes too.
+
+; CHECK: define i8* @test8(
+; CHECK: %retval = phi i8* [ %p, %if.then ], [ null, %entry ]
+; CHECK: }
+define i8* @test8(i1 %x, i8* %c) {
+entry:
+  br i1 %x, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %p = call i8* @objc_retain(i8* %c) nounwind
+  br label %return
+
+return:                                           ; preds = %if.then, %entry
+  %retval = phi i8* [ %c, %if.then ], [ null, %entry ]
+  ret i8* %retval
+}
diff --git a/test/Transforms/ObjCARC/dg.exp b/test/Transforms/ObjCARC/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ObjCARC/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index cf971e4..76e82a5 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -2,9 +2,11 @@
 
 declare i8* @objc_retain(i8*)
 declare void @objc_release(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
 declare i8* @objc_msgSend(i8*, i8*, ...)
 declare void @use_pointer(i8*)
 declare void @callee()
+declare i8* @returner()
 
 ; ARCOpt shouldn't try to move the releases to the block containing the invoke.
 
@@ -68,6 +70,149 @@ done:
   ret void
 }
 
+; The optimizer should ignore invoke unwind paths consistently.
+; PR12265
+
+; CHECK: define void @test2() {
+; CHECK: invoke.cont:
+; CHECK-NEXT: call i8* @objc_retain
+; CHEK-NOT: @objc
+; CHECK: finally.cont:
+; CHECK-NEXT: call void @objc_release
+; CHEK-NOT: @objc
+; CHECK: finally.rethrow:
+; CHEK-NOT: @objc
+; CHECK: }
+define void @test2() {
+entry:
+  %call = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* ()*)()
+          to label %invoke.cont unwind label %finally.rethrow, !clang.arc.no_objc_arc_exceptions !0
+
+invoke.cont:                                      ; preds = %entry
+  %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void ()*)(), !clang.arc.no_objc_arc_exceptions !0
+  invoke void @use_pointer(i8* %call)
+          to label %finally.cont unwind label %finally.rethrow, !clang.arc.no_objc_arc_exceptions !0
+
+finally.cont:                                     ; preds = %invoke.cont
+  tail call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+  ret void
+
+finally.rethrow:                                  ; preds = %invoke.cont, %entry
+  %tmp2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  unreachable
+}
+
+; Don't try to place code on invoke critical edges.
+
+; CHECK: define void @test3(
+; CHECK: if.end:
+; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: ret void
+define void @test3(i8* %p, i1 %b) {
+entry:
+  %0 = call i8* @objc_retain(i8* %p)
+  call void @callee()
+  br i1 %b, label %if.else, label %if.then
+
+if.then:
+  invoke void @use_pointer(i8* %p)
+          to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+if.else:
+  invoke void @use_pointer(i8* %p)
+          to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+lpad:
+  %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+       cleanup
+  ret void
+
+if.end:
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Like test3, but with ARC-relevant exception handling.
+
+; CHECK: define void @test4(
+; CHECK: lpad:
+; CHECK-NEXT: %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: ret void
+define void @test4(i8* %p, i1 %b) {
+entry:
+  %0 = call i8* @objc_retain(i8* %p)
+  call void @callee()
+  br i1 %b, label %if.else, label %if.then
+
+if.then:
+  invoke void @use_pointer(i8* %p)
+          to label %if.end unwind label %lpad
+
+if.else:
+  invoke void @use_pointer(i8* %p)
+          to label %if.end unwind label %lpad
+
+lpad:
+  %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+       cleanup
+  call void @objc_release(i8* %p)
+  ret void
+
+if.end:
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Don't turn the retainAutoreleaseReturnValue into retain, because it's
+; for an invoke which we can assume codegen will put immediately prior.
+
+; CHECK: define void @test5(
+; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+; CHECK: }
+define void @test5() {
+entry:
+  %z = invoke i8* @returner()
+          to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+lpad:
+  %r13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  ret void
+
+if.end:
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  ret void
+}
+
+; Like test5, but there's intervening code.
+
+; CHECK: define void @test6(
+; CHECK: call i8* @objc_retain(i8* %z)
+; CHECK: }
+define void @test6() {
+entry:
+  %z = invoke i8* @returner()
+          to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+lpad:
+  %r13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  ret void
+
+if.end:
+  call void @callee()
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  ret void
+}
+
 declare i32 @__gxx_personality_v0(...)
+declare i32 @__objc_personality_v0(...)
 
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/lit.local.cfg b/test/Transforms/ObjCARC/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ObjCARC/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 9eada8a..a618a21 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -484,12 +484,14 @@ forcoll.empty:
   ret void
 }
 
-; Delete a nested retain+release pair.
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because of a split loop backedge.
+; See test9b for the same testcase without a split backedge.
 
 ; CHECK: define void @test9(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: call i8* @objc_retain
 ; CHECK: }
 define void @test9() nounwind {
 entry:
@@ -551,13 +553,79 @@ forcoll.empty:
   ret void
 }
 
-; Delete a nested retain+release pair.
+; Like test9, but without a split backedge. This we can optimize.
 
-; CHECK: define void @test10(
+; CHECK: define void @test9b(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
 ; CHECK-NOT: @objc_retain
 ; CHECK: }
+define void @test9b() nounwind {
+entry:
+  %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
+  %items.ptr = alloca [16 x i8*], align 8
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  %call1 = call i8* @returner()
+  %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
+  %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  %2 = call i8* @objc_retain(i8* %0) nounwind
+  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %iszero = icmp eq i64 %call4, 0
+  br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:
+  %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
+  %mutationsptr = load i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:
+  %forcoll.count.ph = phi i64 [ %call4, %forcoll.loopinit ], [ %call7, %forcoll.refetch ]
+  %tmp9 = icmp ugt i64 %forcoll.count.ph, 1
+  %umax = select i1 %tmp9, i64 %forcoll.count.ph, i64 1
+  br label %forcoll.loopbody
+
+forcoll.loopbody:
+  %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ]
+  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64* %mutationsptr5, align 8
+  %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
+  br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:
+  call void @objc_enumerationMutation(i8* %2)
+  br label %forcoll.notmutated
+
+forcoll.notmutated:
+  %phitmp = add i64 %forcoll.index, 1
+  %exitcond = icmp eq i64 %phitmp, %umax
+  br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
+
+forcoll.refetch:
+  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %4 = icmp eq i64 %call7, 0
+  br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:
+  call void @objc_release(i8* %2) nounwind
+  call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because of a split loop backedge.
+; See test10b for the same testcase without a split backedge.
+
+; CHECK: define void @test10(
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK: }
 define void @test10() nounwind {
 entry:
   %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
@@ -618,3 +686,68 @@ forcoll.empty:
   call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; Like test10, but without a split backedge. This we can optimize.
+
+; CHECK: define void @test10b(
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK-NOT: @objc_retain
+; CHECK: }
+define void @test10b() nounwind {
+entry:
+  %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
+  %items.ptr = alloca [16 x i8*], align 8
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  %call1 = call i8* @returner()
+  %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
+  call void @callee()
+  %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  %2 = call i8* @objc_retain(i8* %0) nounwind
+  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %iszero = icmp eq i64 %call4, 0
+  br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:
+  %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
+  %mutationsptr = load i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:
+  %forcoll.count.ph = phi i64 [ %call4, %forcoll.loopinit ], [ %call7, %forcoll.refetch ]
+  %tmp9 = icmp ugt i64 %forcoll.count.ph, 1
+  %umax = select i1 %tmp9, i64 %forcoll.count.ph, i64 1
+  br label %forcoll.loopbody
+
+forcoll.loopbody:
+  %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ]
+  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64* %mutationsptr5, align 8
+  %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
+  br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:
+  call void @objc_enumerationMutation(i8* %2)
+  br label %forcoll.notmutated
+
+forcoll.notmutated:
+  %phitmp = add i64 %forcoll.index, 1
+  %exitcond = icmp eq i64 %phitmp, %umax
+  br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
+
+forcoll.refetch:
+  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %4 = icmp eq i64 %call7, 0
+  br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:
+  call void @objc_release(i8* %2) nounwind
+  call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
new file mode 100644
index 0000000..9728f6e
--- /dev/null
+++ b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
@@ -0,0 +1,122 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+%struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
+%struct.__block_descriptor = type { i64, i64 }
+@_NSConcreteStackBlock = external global i8*
+@__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
+
+; The optimizer should make use of the !clang.arc.no_objc_arc_exceptions
+; metadata and eliminate the retainBlock+release pair here.
+; rdar://10803830.
+
+; CHECK: define void @test0(
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test0() {
+entry:
+  %x = alloca %struct.__block_byref_x, align 8
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
+  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
+  store i32 0, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
+  store i32 32, i32* %byref.size, align 4
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
+  %t1 = bitcast %struct.__block_byref_x* %x to i8*
+  store i8* %t1, i8** %block.captured, align 8
+  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
+  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
+  %t4 = getelementptr inbounds i8* %t3, i64 16
+  %t5 = bitcast i8* %t4 to i8**
+  %t6 = load i8** %t5, align 8
+  %t7 = bitcast i8* %t6 to void (i8*)*
+  invoke void %t7(i8* %t3)
+          to label %invoke.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !4
+
+invoke.cont:                                      ; preds = %entry
+  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  ret void
+
+lpad:                                             ; preds = %entry
+  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  resume { i8*, i32 } %t8
+}
+
+; There is no !clang.arc.no_objc_arc_exceptions
+; metadata here, so the optimizer shouldn't eliminate anything.
+
+; CHECK: define void @test0_no_metadata(
+; CHECK: call i8* @objc_retainBlock(
+; CHECK: invoke
+; CHECK: call void @objc_release(
+; CHECK: }
+define void @test0_no_metadata() {
+entry:
+  %x = alloca %struct.__block_byref_x, align 8
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
+  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
+  store i32 0, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
+  store i32 32, i32* %byref.size, align 4
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
+  %t1 = bitcast %struct.__block_byref_x* %x to i8*
+  store i8* %t1, i8** %block.captured, align 8
+  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
+  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
+  %t4 = getelementptr inbounds i8* %t3, i64 16
+  %t5 = bitcast i8* %t4 to i8**
+  %t6 = load i8** %t5, align 8
+  %t7 = bitcast i8* %t6 to void (i8*)*
+  invoke void %t7(i8* %t3)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  ret void
+
+lpad:                                             ; preds = %entry
+  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  resume { i8*, i32 } %t8
+}
+
+declare i8* @objc_retainBlock(i8*)
+declare void @objc_release(i8*)
+declare void @_Block_object_dispose(i8*, i32)
+declare i32 @__objc_personality_v0(...)
+declare void @__foo_block_invoke_0(i8* nocapture) uwtable ssp
+
+!4 = metadata !{}
diff --git a/test/Transforms/ObjCARC/pointer-types.ll b/test/Transforms/ObjCARC/pointer-types.ll
new file mode 100644
index 0000000..6abc939
--- /dev/null
+++ b/test/Transforms/ObjCARC/pointer-types.ll
@@ -0,0 +1,31 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+; Don't hoist @objc_release past a use of its pointer, even
+; if the use has function type, because clang uses function types
+; in dubious ways.
+; rdar://10551239
+
+; CHECK: define void @test0(
+; CHECK: %otherBlock = phi void ()* [ %b1, %if.then ], [ null, %entry ]
+; CHECK-NEXT: call void @use_fptr(void ()* %otherBlock)
+; CHECK-NEXT: %tmp11 = bitcast void ()* %otherBlock to i8*
+; CHECK-NEXT: call void @objc_release(i8* %tmp11)
+
+define void @test0(i1 %tobool, void ()* %b1) {
+entry:
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %otherBlock = phi void ()* [ %b1, %if.then ], [ null, %entry ]
+  call void @use_fptr(void ()* %otherBlock)
+  %tmp11 = bitcast void ()* %otherBlock to i8*
+  call void @objc_release(i8* %tmp11) nounwind
+  ret void
+}
+
+declare void @use_fptr(void ()*)
+declare void @objc_release(i8*)
+
diff --git a/test/Transforms/ObjCARC/pr12270.ll b/test/Transforms/ObjCARC/pr12270.ll
new file mode 100644
index 0000000..1faae5f
--- /dev/null
+++ b/test/Transforms/ObjCARC/pr12270.ll
@@ -0,0 +1,21 @@
+; RUN: opt -disable-output -objc-arc-contract %s
+; test that we don't crash on unreachable code
+%2 = type opaque
+
+define void @_i_Test__foo(%2 *%x) {
+entry:
+  unreachable
+
+return:                                           ; No predecessors!
+  %bar = bitcast %2* %x to i8*
+  %foo = call i8* @objc_autoreleaseReturnValue(i8* %bar) nounwind
+  call void @callee()
+  call void @use_pointer(i8* %foo)
+  call void @objc_release(i8* %foo) nounwind
+  ret void
+}
+
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare void @objc_release(i8*)
+declare void @callee()
+declare void @use_pointer(i8*)
diff --git a/test/Transforms/ObjCARC/retain-block-alloca.ll b/test/Transforms/ObjCARC/retain-block-alloca.ll
index 468da91..01f2087 100644
--- a/test/Transforms/ObjCARC/retain-block-alloca.ll
+++ b/test/Transforms/ObjCARC/retain-block-alloca.ll
@@ -1,11 +1,6 @@
 ; RUN: opt -S -objc-arc < %s | FileCheck %s
 ; rdar://10209613
 
-; CHECK: define void @test
-; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) nounwind
-; CHECK: @objc_msgSend
-; CHECK-NEXT: @objc_release(i8* %3)
-
 %0 = type opaque
 %struct.__block_descriptor = type { i64, i64 }
 
@@ -13,6 +8,10 @@
 @__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
 @"\01L_OBJC_SELECTOR_REFERENCES_" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
 
+; CHECK: define void @test(
+; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) nounwind
+; CHECK: @objc_msgSend
+; CHECK-NEXT: @objc_release(i8* %3)
 define void @test(%0* %array) uwtable {
 entry:
   %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
@@ -41,6 +40,43 @@ entry:
   ret void
 }
 
+; Same as test, but the objc_retainBlock has a clang.arc.copy_on_escape
+; tag so it's safe to delete.
+
+; CHECK: define void @test_with_COE(
+; CHECK-NOT: @objc_retainBlock
+; CHECK: @objc_msgSend
+; CHECK: @objc_release
+; CHECK-NOT: @objc_release
+; CHECK: }
+define void @test_with_COE(%0* %array) uwtable {
+entry:
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
+  %0 = bitcast %0* %array to i8*
+  %1 = tail call i8* @objc_retain(i8* %0) nounwind
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 5
+  store %0* %array, %0** %block.captured, align 8
+  %2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block to i8*
+  %3 = call i8* @objc_retainBlock(i8* %2) nounwind, !clang.arc.copy_on_escape !0
+  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*)*)(i8* %0, i8* %tmp2, i8* %3)
+  call void @objc_release(i8* %3) nounwind
+  %strongdestroy = load %0** %block.captured, align 8
+  %4 = bitcast %0* %strongdestroy to i8*
+  call void @objc_release(i8* %4) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 declare i8* @objc_retain(i8*)
 
 declare void @__test_block_invoke_0(i8* nocapture) uwtable
diff --git a/test/Transforms/ObjCARC/retain-block-load.ll b/test/Transforms/ObjCARC/retain-block-load.ll
new file mode 100644
index 0000000..a5170e3
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-block-load.ll
@@ -0,0 +1,51 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+; rdar://10803830
+; The optimizer should be able to prove that the block does not
+; "escape", so the retainBlock+release pair can be eliminated.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.__block_descriptor = type { i64, i64 }
+
+@_NSConcreteStackBlock = external global i8*
+@__block_descriptor_tmp = external global { i64, i64, i8*, i8* }
+
+; CHECK: define void @test() {
+; CHECK-NOT: @objc
+; CHECK: declare i8* @objc_retainBlock(i8*)
+; CHECK: declare void @objc_release(i8*)
+
+define void @test() {
+entry:
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, align 8
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 1
+  store i32 1073741824, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 3
+  store i8* bitcast (i32 (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 5
+  store i32 4, i32* %block.captured, align 8
+  %tmp = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block to i8*
+  %tmp1 = call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  %tmp2 = getelementptr inbounds i8* %tmp1, i64 16
+  %tmp3 = bitcast i8* %tmp2 to i8**
+  %tmp4 = load i8** %tmp3, align 8
+  %tmp5 = bitcast i8* %tmp4 to i32 (i8*)*
+  %call = call i32 %tmp5(i8* %tmp1)
+  call void @objc_release(i8* %tmp1) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+declare i32 @__test_block_invoke_0(i8* nocapture %.block_descriptor) nounwind readonly
+
+declare i8* @objc_retainBlock(i8*)
+
+declare void @objc_release(i8*)
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
new file mode 100644
index 0000000..b3b62d3
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-block.ll
@@ -0,0 +1,138 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+!0 = metadata !{}
+
+declare i8* @objc_retain(i8*)
+declare void @callee(i8)
+declare void @use_pointer(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_retainBlock(i8*)
+declare i8* @objc_autorelease(i8*)
+
+; Basic retainBlock+release elimination.
+
+; CHECK: define void @test0(i8* %tmp) {
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test0(i8* %tmp) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0, but there's no copy_on_escape metadata, so there's no
+; optimization possible.
+
+; CHECK: define void @test0_no_metadata(i8* %tmp) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_no_metadata(i8* %tmp) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0, but the pointer escapes, so there's no
+; optimization possible.
+
+; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0_escape, but there's no intervening call.
+
+; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_just_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Basic nested retainBlock+release elimination.
+
+; CHECK: define void @test1(i8* %tmp) {
+; CHECK-NOT: @objc
+; CHECK: tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK-NOT: @objc
+; CHECK: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1(i8* %tmp) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test1, but there's no copy_on_escape metadata, so there's no
+; retainBlock+release optimization possible. But we can still eliminate
+; the outer retain+release.
+
+; CHECK: define void @test1_no_metadata(i8* %tmp) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1_no_metadata(i8* %tmp) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test1, but the pointer escapes, so there's no
+; retainBlock+release optimization possible. But we can still eliminate
+; the outer retain+release
+
+; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK-NEXT: store i8* %tmp2, i8** %z
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll
index 41bde01..f876e51 100644
--- a/test/Transforms/ObjCARC/retain-not-declared.ll
+++ b/test/Transforms/ObjCARC/retain-not-declared.ll
@@ -30,7 +30,7 @@ entry:
 
 ; CHECK: @test1(
 ; CHECK: @objc_retain(
-; CHECK: @objc_retain(
+; CHECK: @objc_retainAutoreleasedReturnValue(
 ; CHECK: @objc_release(
 ; CHECK: @objc_release(
 ; CHECK: }
diff --git a/test/Transforms/PhaseOrdering/dg.exp b/test/Transforms/PhaseOrdering/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/PhaseOrdering/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/PhaseOrdering/lit.local.cfg b/test/Transforms/PhaseOrdering/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/PruneEH/dg.exp b/test/Transforms/PruneEH/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/PruneEH/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/PruneEH/lit.local.cfg b/test/Transforms/PruneEH/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/PruneEH/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Reassociate/dg.exp b/test/Transforms/Reassociate/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Reassociate/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Reassociate/lit.local.cfg b/test/Transforms/Reassociate/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Reassociate/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
index cd6cf97..63f41db 100644
--- a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
+++ b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
@@ -6,9 +6,9 @@ target triple = "i686-pc-linux-gnu"
 
 define i32 @x(i32 %b) {
 entry:
- %val = call i32 @llvm.cttz.i32(i32 undef)
+ %val = call i32 @llvm.cttz.i32(i32 undef, i1 true)
  ret i32 %val
 }
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
diff --git a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
index 7546bf5..f62ed70 100644
--- a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
+++ b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
@@ -21,10 +21,6 @@ define internal i32 @f() {
 
 declare i8* @__cxa_begin_catch(i8*) nounwind
 
-declare i8* @llvm.eh.exception() nounwind
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
-
 declare void @__cxa_end_catch()
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SCCP/dg.exp b/test/Transforms/SCCP/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/SCCP/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SCCP/lit.local.cfg b/test/Transforms/SCCP/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/SCCP/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/phitest.ll b/test/Transforms/SCCP/phitest.ll
deleted file mode 100644
index 4c5c3dc..0000000
--- a/test/Transforms/SCCP/phitest.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -sccp -dce -simplifycfg -S | not grep br
-
-define i32 @test(i32 %param) {
-entry:
-	%tmp.1 = icmp ne i32 %param, 0		; <i1> [#uses=1]
-	br i1 %tmp.1, label %endif.0, label %else
-else:		; preds = %entry
-	br label %endif.0
-endif.0:		; preds = %else, %entry
-	%a.0 = phi i32 [ 2, %else ], [ 3, %entry ]		; <i32> [#uses=1]
-	%b.0 = phi i32 [ 3, %else ], [ 2, %entry ]		; <i32> [#uses=1]
-	%tmp.5 = add i32 %a.0, %b.0		; <i32> [#uses=1]
-	%tmp.7 = icmp ne i32 %tmp.5, 5		; <i1> [#uses=1]
-	br i1 %tmp.7, label %UnifiedReturnBlock, label %endif.1
-endif.1:		; preds = %endif.0
-	ret i32 0
-UnifiedReturnBlock:		; preds = %endif.0
-	ret i32 2
-}
-
diff --git a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll b/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
deleted file mode 100644
index d71bcb9..0000000
--- a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
+++ /dev/null
@@ -1,184 +0,0 @@
-; RUN: opt < %s -scalarrepl -disable-output -stats |& grep "Number of aggregates converted to scalar"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin9.6"
-	%0 = type { }		; type %0
-	%1 = type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }		; type %1
-	%2 = type { i8*, i32 }		; type %2
-	%3 = type opaque		; type %3
-	%4 = type { i32 }		; type %4
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, %0*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
-	%llvm.dbg.composite.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0*, %0*, i32 }
-	%llvm.dbg.derivedtype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0* }
-	%llvm.dbg.subprogram.type = type { i32, %0*, %0*, i8*, i8*, i8*, %0*, i32, %0*, i1, i1 }
-	%llvm.dbg.subrange.type = type { i32, i64, i64 }
-	%llvm.dbg.variable.type = type { i32, %0*, i8*, %0*, i32, %0* }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-internal constant [8 x i8] c"PR491.c\00", section "llvm.metadata"		; <[8 x i8]*>:0 [#uses=1]
-internal constant [77 x i8] c"/Volumes/Nanpura/mainline/llvm/projects/llvm-test/SingleSource/Regression/C/\00", section "llvm.metadata"		; <[77 x i8]*>:1 [#uses=1]
-internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5641) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*>:2 [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 1, i8* getelementptr ([8 x i8]* @0, i32 0, i32 0), i8* getelementptr ([77 x i8]* @1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 0 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*>:3 [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([4 x i8]* @3, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-internal constant [5 x i8] c"char\00", section "llvm.metadata"		; <[5 x i8]*>:4 [#uses=1]
-@llvm.dbg.basictype5 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @4, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, i32 6 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458790, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype5 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype6 = internal constant %llvm.dbg.derivedtype.type { i32 458767, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-internal constant [13 x i8] c"unsigned int\00", section "llvm.metadata"		; <[13 x i8]*>:5 [#uses=1]
-@llvm.dbg.basictype8 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([13 x i8]* @5, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 7 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [3 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype6 to %0*), %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype8 to %0*)], section "llvm.metadata"		; <[3 x %0*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([3 x %0*]* @llvm.dbg.array to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-internal constant [12 x i8] c"assert_fail\00", section "llvm.metadata"		; <[12 x i8]*>:6 [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([12 x i8]* @6, i32 0, i32 0), i8* getelementptr ([12 x i8]* @6, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 4, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to %0*), i1 true, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=0]
-internal constant [2 x i8] c"l\00", section "llvm.metadata"		; <[2 x i8]*>:7 [#uses=1]
-@__stderrp = external global %1*		; <%1**> [#uses=4]
-internal constant [35 x i8] c"assertion failed in line %u: '%s'\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[35 x i8]*>:8 [#uses=1]
-@llvm.dbg.array13 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*), %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*)], section "llvm.metadata"		; <[2 x %0*]*> [#uses=1]
-@llvm.dbg.composite14 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array13 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [5 x i8] c"test\00", section "llvm.metadata"		; <[5 x i8]*>:9 [#uses=1]
-@llvm.dbg.subprogram16 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @9, i32 0, i32 0), i8* getelementptr ([5 x i8]* @9, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 10, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite14 to %0*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-internal constant [9 x i8] c"long int\00", section "llvm.metadata"		; <[9 x i8]*>:10 [#uses=1]
-@llvm.dbg.basictype21 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([9 x i8]* @10, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.derivedtype22 = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @7, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype21 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.subrange = internal constant %llvm.dbg.subrange.type { i32 458785, i64 0, i64 3 }, section "llvm.metadata"		; <%llvm.dbg.subrange.type*> [#uses=1]
-@llvm.dbg.array23 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.subrange.type* @llvm.dbg.subrange to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
-internal constant [14 x i8] c"unsigned char\00", section "llvm.metadata"		; <[14 x i8]*>:11 [#uses=1]
-@llvm.dbg.basictype25 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([14 x i8]* @11, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, i32 8 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.composite26 = internal constant %llvm.dbg.composite.type { i32 458753, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype25 to %0*), %0* bitcast ([1 x %0*]* @llvm.dbg.array23 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [2 x i8] c"c\00", section "llvm.metadata"		; <[2 x i8]*>:12 [#uses=1]
-@llvm.dbg.derivedtype28 = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @12, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite26 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array29 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype22 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype28 to %0*)], section "llvm.metadata"		; <[2 x %0*]*> [#uses=1]
-@llvm.dbg.composite30 = internal constant %llvm.dbg.composite.type { i32 458775, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 32, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array29 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [2 x i8] c"u\00", section "llvm.metadata"		; <[2 x i8]*>:13 [#uses=1]
-@llvm.dbg.variable32 = internal constant %llvm.dbg.variable.type { i32 459008, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @13, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite30 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-internal constant [11 x i8] c"u.l == 128\00", section "__TEXT,__cstring,cstring_literals"		; <[11 x i8]*>:14 [#uses=1]
-internal constant [8 x i8] c"u.l < 0\00", section "__TEXT,__cstring,cstring_literals"		; <[8 x i8]*>:15 [#uses=1]
-@llvm.dbg.array35 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
-@llvm.dbg.composite36 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([1 x %0*]* @llvm.dbg.array35 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*>:16 [#uses=1]
-@llvm.dbg.subprogram38 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @16, i32 0, i32 0), i8* getelementptr ([5 x i8]* @16, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 28, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite36 to %0*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-
-declare void @llvm.dbg.func.start(%0*) nounwind readnone
-
-declare void @llvm.dbg.declare(%0*, %0*) nounwind readnone
-
-declare void @llvm.dbg.stoppoint(i32, i32, %0*) nounwind readnone
-
-declare i32 @fprintf(%1* nocapture, i8* nocapture, ...) nounwind
-
-declare void @llvm.dbg.region.end(%0*) nounwind readnone
-
-define i32 @test(i32) nounwind {
-; <label>:1
-	%2 = alloca %4, align 8		; <%4*> [#uses=7]
-	call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*))
-	%3 = bitcast %4* %2 to %0*		; <%0*> [#uses=1]
-	call void @llvm.dbg.declare(%0* %3, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable32 to %0*))
-	call void @llvm.dbg.stoppoint(i32 21, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%4 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %4, align 8
-	%5 = bitcast %4* %2 to i8*		; <i8*> [#uses=1]
-	store i8 -128, i8* %5, align 8
-	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%6 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 8		; <i32> [#uses=1]
-	%8 = icmp eq i32 %7, 128		; <i1> [#uses=1]
-	br i1 %8, label %12, label %9
-
-; <label>:9		; preds = %1
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%10 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%11 = call i32 (%1*, i8*, ...)* @fprintf(%1* %10, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 22, i8* getelementptr ([11 x i8]* @14, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %12
-
-; <label>:12		; preds = %9, %1
-	%.0 = phi i32 [ 0, %9 ], [ 1, %1 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%13 = and i32 %.0, %0		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%14 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %14, align 8
-	%15 = bitcast %4* %2 to [4 x i8]*		; <[4 x i8]*> [#uses=1]
-	%16 = getelementptr [4 x i8]* %15, i32 0, i32 3		; <i8*> [#uses=1]
-	store i8 -128, i8* %16, align 1
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%17 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	%18 = load i32* %17, align 8		; <i32> [#uses=1]
-	%19 = icmp slt i32 %18, 0		; <i1> [#uses=1]
-	br i1 %19, label %23, label %20
-
-; <label>:20		; preds = %12
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%21 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%22 = call i32 (%1*, i8*, ...)* @fprintf(%1* %21, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 24, i8* getelementptr ([8 x i8]* @15, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %23
-
-; <label>:23		; preds = %20, %12
-	%.01 = phi i32 [ 0, %20 ], [ 1, %12 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%24 = and i32 %.01, %13		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 25, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*))
-	ret i32 %24
-}
-
-define i32 @main() nounwind {
-; <label>:0
-	%1 = alloca %4, align 8		; <%4*> [#uses=7]
-	call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram38 to %0*))
-	call void @llvm.dbg.stoppoint(i32 29, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%2 = bitcast %4* %1 to %0*		; <%0*> [#uses=1]
-	call void @llvm.dbg.declare(%0* %2, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable32 to %0*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 21, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%3 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %3, align 8
-	%4 = bitcast %4* %1 to i8*		; <i8*> [#uses=1]
-	store i8 -128, i8* %4, align 8
-	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%5 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	%6 = load i32* %5, align 8		; <i32> [#uses=1]
-	%7 = icmp eq i32 %6, 128		; <i1> [#uses=1]
-	br i1 %7, label %11, label %8
-
-; <label>:8		; preds = %0
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%9 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%10 = call i32 (%1*, i8*, ...)* @fprintf(%1* %9, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 22, i8* getelementptr ([11 x i8]* @14, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %11
-
-; <label>:11		; preds = %8, %0
-	%.0.i = phi i32 [ 0, %8 ], [ 1, %0 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%12 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %12, align 8
-	%13 = bitcast %4* %1 to [4 x i8]*		; <[4 x i8]*> [#uses=1]
-	%14 = getelementptr [4 x i8]* %13, i32 0, i32 3		; <i8*> [#uses=1]
-	store i8 -128, i8* %14, align 1
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%15 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	%16 = load i32* %15, align 8		; <i32> [#uses=1]
-	%17 = icmp slt i32 %16, 0		; <i1> [#uses=1]
-	br i1 %17, label %test.exit, label %18
-
-; <label>:18		; preds = %11
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%19 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%20 = call i32 (%1*, i8*, ...)* @fprintf(%1* %19, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 24, i8* getelementptr ([8 x i8]* @15, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %test.exit
-
-test.exit:		; preds = %18, %11
-	%.01.i = phi i32 [ 0, %18 ], [ 1, %11 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%21 = and i32 %.01.i, %.0.i		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 25, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%tmp = xor i32 %21, 1		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 29, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram38 to %0*))
-	ret i32 %tmp
-}
diff --git a/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll b/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
new file mode 100644
index 0000000..cd21ff5
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -S -scalarrepl | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios5.0.0"
+
+%union.anon = type { <4 x float> }
+
+; CHECK: @test
+; CHECK-NOT: alloca
+
+define void @test() nounwind {
+entry:
+  %u = alloca %union.anon, align 16
+  %u164 = bitcast %union.anon* %u to [4 x i32]*
+  %arrayidx165 = getelementptr inbounds [4 x i32]* %u164, i32 0, i32 0
+  store i32 undef, i32* %arrayidx165, align 4
+  %v186 = bitcast %union.anon* %u to <4 x float>*
+  store <4 x float> undef, <4 x float>* %v186, align 16
+  ret void
+}
diff --git a/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll b/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
new file mode 100644
index 0000000..da707b7
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -S -scalarrepl | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.S = type { [2 x %struct.anon], double }
+%struct.anon = type {}
+
+; CHECK: @test()
+; CHECK-NOT: alloca
+; CHECK: ret double 1.0
+
+define double @test() nounwind uwtable ssp {
+entry:
+  %retval = alloca %struct.S, align 8
+  %ret = alloca %struct.S, align 8
+  %b = getelementptr inbounds %struct.S* %ret, i32 0, i32 1
+  store double 1.000000e+00, double* %b, align 8
+  %0 = bitcast %struct.S* %retval to i8*
+  %1 = bitcast %struct.S* %ret to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i32 8, i1 false)
+  %2 = bitcast %struct.S* %retval to double*
+  %3 = load double* %2, align 1
+  ret double %3
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/debuginfo.ll b/test/Transforms/ScalarRepl/debuginfo.ll
deleted file mode 100644
index ae2c6cc..0000000
--- a/test/Transforms/ScalarRepl/debuginfo.ll
+++ /dev/null
@@ -1,107 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-; RUN: opt < %s -scalarrepl-ssa -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }* }
-	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-	%struct.Sphere = type { %struct.Vec }
-	%struct.Vec = type { i32, i32, i32 }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [6 x i8] c"r.cpp\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"Vec\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str6 = internal constant [2 x i8] c"y\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype7 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 32, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str8 = internal constant [2 x i8] c"z\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype9 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 64, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype10 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype11 = internal constant %llvm.dbg.derivedtype.type { i32 458790, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 96, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype12 = internal constant %llvm.dbg.derivedtype.type { i32 458768, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype11 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [3 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype10 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite13 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite13 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array14 = internal constant [5 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype10 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
-@llvm.dbg.composite15 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array14 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprogram16 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 5, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite15 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array17 = internal constant [5 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype7 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype9 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
-@llvm.dbg.composite18 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, i64 96, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array17 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.derivedtype19 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array20 = internal constant [5 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array20 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str21 = internal constant [13 x i8] c"__comp_ctor \00", section "llvm.metadata"		; <[13 x i8]*> [#uses=1]
-@.str22 = internal constant [14 x i8] c"_ZN3VecC1Eiii\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
-@llvm.dbg.array32 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite33 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array32 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str34 = internal constant [10 x i8] c"operator-\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
-@.str35 = internal constant [14 x i8] c"_ZmiRK3VecS1_\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
-@.str41 = internal constant [7 x i8] c"Sphere\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@.str43 = internal constant [7 x i8] c"center\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype44 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str43, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 14, i64 96, i64 32, i64 0, i32 1, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype45 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite52 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array46 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype45 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite47 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array46 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str48 = internal constant [11 x i8] c"ray_sphere\00", section "llvm.metadata"		; <[11 x i8]*> [#uses=1]
-@.str49 = internal constant [30 x i8] c"_ZN6Sphere10ray_sphereERK3Vec\00", section "llvm.metadata"		; <[30 x i8]*> [#uses=1]
-@llvm.dbg.subprogram50 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([30 x i8]* @.str49, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 16, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite47 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array51 = internal constant [2 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype44 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram50 to { }*) ], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite52 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str41, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, i64 96, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array51 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.derivedtype53 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite52 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array54 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype53 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite55 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array54 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprogram56 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([30 x i8]* @.str49, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 16, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite55 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str61 = internal constant [2 x i8] c"v\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable62 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*), i8* getelementptr ([2 x i8]* @.str61, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 17, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
-
-define i32 @_ZN6Sphere10ray_sphereERK3Vec(%struct.Sphere* %this, %struct.Vec* %Orig) nounwind {
-entry:
-	%v = alloca %struct.Vec, align 8		; <%struct.Vec*> [#uses=4]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*))
-	%0 = bitcast %struct.Vec* %v to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable62 to { }*))
-	%1 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 2		; <i32*> [#uses=1]
-	%2 = load i32* %1, align 4		; <i32> [#uses=1]
-	%3 = getelementptr %struct.Vec* %Orig, i32 0, i32 2		; <i32*> [#uses=1]
-	%4 = load i32* %3, align 4		; <i32> [#uses=1]
-	%5 = sub i32 %2, %4		; <i32> [#uses=1]
-	%6 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=1]
-	%8 = getelementptr %struct.Vec* %Orig, i32 0, i32 1		; <i32*> [#uses=1]
-	%9 = load i32* %8, align 4		; <i32> [#uses=1]
-	%10 = sub i32 %7, %9		; <i32> [#uses=1]
-	%11 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%12 = load i32* %11, align 4		; <i32> [#uses=1]
-	%13 = getelementptr %struct.Vec* %Orig, i32 0, i32 0		; <i32*> [#uses=1]
-	%14 = load i32* %13, align 4		; <i32> [#uses=1]
-	%15 = sub i32 %12, %14		; <i32> [#uses=1]
-	%16 = getelementptr %struct.Vec* %v, i32 0, i32 0		; <i32*> [#uses=2]
-	store i32 %15, i32* %16, align 8
-	%17 = getelementptr %struct.Vec* %v, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %10, i32* %17, align 4
-	%18 = getelementptr %struct.Vec* %v, i32 0, i32 2		; <i32*> [#uses=1]
-	store i32 %5, i32* %18, align 8
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 9, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	%19 = load i32* %16, align 8		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 18, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*))
-	ret i32 %19
-}
diff --git a/test/Transforms/ScalarRepl/dg.exp b/test/Transforms/ScalarRepl/dg.exp
deleted file mode 100644
index 39954d8..0000000
--- a/test/Transforms/ScalarRepl/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
diff --git a/test/Transforms/ScalarRepl/lit.local.cfg b/test/Transforms/ScalarRepl/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/ScalarRepl/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/ScalarRepl/negative-memset.ll b/test/Transforms/ScalarRepl/negative-memset.ll
new file mode 100644
index 0000000..e52ab46
--- /dev/null
+++ b/test/Transforms/ScalarRepl/negative-memset.ll
@@ -0,0 +1,20 @@
+; PR12202
+; RUN: opt < %s -scalarrepl -S
+; Ensure that we do not hang or crash when feeding a negative value to memset
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+define i32 @test() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %buff = alloca [1 x i8], align 1
+  store i32 0, i32* %retval
+  %0 = bitcast [1 x i8]* %buff to i8*
+  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
+  %arraydecay = getelementptr inbounds [1 x i8]* %buff, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -1, i32 -8, i32 1, i1 false)	; Negative 8!
+  ret i32 0
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll
new file mode 100644
index 0000000..cb5101c
--- /dev/null
+++ b/test/Transforms/ScalarRepl/phi-cycle.ll
@@ -0,0 +1,77 @@
+; RUN: opt -S -scalarrepl-ssa < %s | FileCheck %s
+; rdar://10589171
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.foo = type { i32, i32 }
+
+@.str = private unnamed_addr constant [6 x i8] c"x=%d\0A\00", align 1
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
+entry:
+  %f = alloca %struct.foo, align 4
+  %x.i = getelementptr inbounds %struct.foo* %f, i64 0, i32 0
+  store i32 1, i32* %x.i, align 4
+  %y.i = getelementptr inbounds %struct.foo* %f, i64 0, i32 1
+  br label %while.cond.i
+
+; CHECK: while.cond.i:
+; CHECK-NEXT: %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
+; CHECK-NEXT: %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
+; CHECK-NEXT: %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
+; CHECK-NOT: phi
+while.cond.i:                                     ; preds = %while.cond.backedge.i, %entry
+  %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
+  %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
+  %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
+  %cmp.i = icmp sgt i32 %left.0.i, 0
+  br i1 %cmp.i, label %while.body.i, label %while.cond.i.func.exit_crit_edge
+
+while.cond.i.func.exit_crit_edge:                 ; preds = %while.cond.i
+  br label %func.exit
+
+while.body.i:                                     ; preds = %while.cond.i
+  %dec.i = add nsw i32 %left.0.i, -1
+  switch i32 1, label %while.body.i.func.exit_crit_edge [
+    i32 0, label %while.cond.backedge.i
+    i32 1, label %sw.bb.i
+  ]
+
+while.body.i.func.exit_crit_edge:                 ; preds = %while.body.i
+  br label %func.exit
+
+sw.bb.i:                                          ; preds = %while.body.i
+  %cmp2.i = icmp eq i32 %tmp, 1
+  br i1 %cmp2.i, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %sw.bb.i
+  store i32 %pos.0.i, i32* %x.i, align 4
+  br label %if.end.i
+
+; CHECK: if.end.i:
+; CHECK-NEXT: %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
+; CHECK-NOT: phi
+if.end.i:                                         ; preds = %if.then.i, %sw.bb.i
+  %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
+  store i32 %tmp1, i32* %y.i, align 4
+  br label %while.cond.backedge.i
+
+; CHECK: while.cond.backedge.i:
+; CHECK-NEXT: %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
+; CHECK-NOT: phi
+while.cond.backedge.i:                            ; preds = %if.end.i, %while.body.i
+  %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
+  %xtmp.i = add i32 %pos.0.i, 1
+  br label %while.cond.i
+
+; CHECK: func.exit:
+; CHECK-NOT: load
+; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) nounwind
+func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
+  %tmp3 = load i32* %x.i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
index ab276b0..fadf1aa 100644
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@@ -4,9 +4,9 @@
 define i32 @voltest(i32 %T) {
 	%A = alloca {i32, i32}
 	%B = getelementptr {i32,i32}* %A, i32 0, i32 0
-	volatile store i32 %T, i32* %B
+	store volatile i32 %T, i32* %B
 
 	%C = getelementptr {i32,i32}* %A, i32 0, i32 1
-	%X = volatile load i32* %C
+	%X = load volatile i32* %C
 	ret i32 %X
 }
diff --git a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
index 568e61c..e2765e5 100644
--- a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
+++ b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
@@ -1,9 +1,14 @@
-; RUN: opt < %s -simplifycfg -S | grep {br i1 } | count 4
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 ; PR3354
 ; Do not merge bb1 into the entry block, it might trap.
 
 @G = extern_weak global i32
 
+; CHECK: @test(
+; CHECK: br i1 %tmp25
+; CHECK: bb1:
+; CHECK: sdiv
+
 define i32 @test(i32 %tmp21, i32 %tmp24) {
 	%tmp25 = icmp sle i32 %tmp21, %tmp24		
 	br i1 %tmp25, label %bb2, label %bb1	
@@ -18,6 +23,11 @@ bb6:
 	ret i32 927
 }
 
+; CHECK: @test2(
+; CHECK: br i1 %tmp34
+; CHECK: bb5:
+; CHECK: sdiv
+
 define i32 @test2(i32 %tmp21, i32 %tmp24, i1 %tmp34) {
 	br i1 %tmp34, label %bb5, label %bb6
 
diff --git a/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll b/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
deleted file mode 100644
index ebacf2f..0000000
--- a/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: opt %s -simplifycfg -disable-output
-; PR8445
-
-define void @test() {
-      unwind
-}
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index 5cfc77c..a61867f 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -1,7 +1,10 @@
-; RUN: opt < %s -simplifycfg  -S | grep select
-; RUN: opt < %s -simplifycfg  -S | grep br | count 2
+; RUN: opt < %s -simplifycfg -phi-node-folding-threshold=2 -S | FileCheck %s
 
-define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind  {
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @test1(i32 %a, i32 %b, i32 %c) nounwind  {
+; CHECK: @test1
 entry:
         %tmp1 = icmp eq i32 %b, 0
         br i1 %tmp1, label %bb1, label %bb3
@@ -9,6 +12,11 @@ entry:
 bb1:            ; preds = %entry
 	%tmp2 = icmp sgt i32 %c, 1
 	br i1 %tmp2, label %bb2, label %bb3
+; CHECK: bb1:
+; CHECK-NEXT: icmp sgt i32 %c, 1
+; CHECK-NEXT: add i32 %a, 1
+; CHECK-NEXT: select i1 %tmp2, i32 %tmp3, i32 %a
+; CHECK-NEXT: br label %bb3
 
 bb2:		; preds = bb1
 	%tmp3 = add i32 %a, 1
@@ -19,3 +27,20 @@ bb3:		; preds = %bb2, %entry
         %tmp5 = sub i32 %tmp4, 1
 	ret i32 %tmp5
 }
+
+declare i8 @llvm.cttz.i8(i8, i1)
+
+define i8 @test2(i8 %a) {
+; CHECK: @test2
+  br i1 undef, label %bb_true, label %bb_false
+bb_true:
+  %b = tail call i8 @llvm.cttz.i8(i8 %a, i1 false)
+  br label %join
+bb_false:
+  br label %join
+join:
+  %c = phi i8 [%b, %bb_true], [%a, %bb_false]
+; CHECK: select
+  ret i8 %c
+}
+
diff --git a/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll b/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
deleted file mode 100644
index 761f0d5..0000000
--- a/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | grep {br i1} | count 1
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
-	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([7 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [7 x i8] c"cond.c\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5555) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 393473, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
-@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable6 = internal constant %llvm.dbg.variable.type { i32 393473, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str7, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
-@.str7 = internal constant [2 x i8] c"y\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 393238, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([6 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, i64 0, i64 0, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype9 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str8 = internal constant [6 x i8] c"uint1\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
-@llvm.dbg.basictype9 = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 7 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-
-define i32 @foo(i32 %x1, i1 zeroext %y2) nounwind {
-entry:
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp4 = icmp eq i32 %x1, 0		; <i1> [#uses=1]
-	br i1 %tmp4, label %bb, label %bb14
-
-bb:		; preds = %entry
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %y2, label %bb14, label %bb10
-
-bb7:		; preds = %bb
-	call void @llvm.dbg.stoppoint(i32 7, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp9 = call i32 @g1(i32 %x1) nounwind		; <i32> [#uses=1]
-	ret i32 %tmp9
-
-bb10:		; preds = %bb
-	call void @llvm.dbg.stoppoint(i32 8, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp12 = add i32 %x1, 1		; <i32> [#uses=1]
-	%tmp13 = call i32 @g2(i32 %tmp12) nounwind		; <i32> [#uses=1]
-	ret i32 %tmp13
-
-bb14:		; preds = %entry
-	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp16 = call i32 @g1(i32 %x1) nounwind		; <i32> [#uses=1]
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret i32 %tmp16
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare i32 @g1(i32)
-
-declare i32 @g2(i32)
-
-declare void @llvm.dbg.region.end({ }*) nounwind
diff --git a/test/Transforms/SimplifyCFG/branch-fold.ll b/test/Transforms/SimplifyCFG/branch-fold.ll
index 266609b5..2b29681 100644
--- a/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -1,13 +1,19 @@
-; RUN: opt < %s -simplifycfg -S | grep {br i1} | count 1
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 define void @test(i32* %P, i32* %Q, i1 %A, i1 %B) {
+; CHECK: test
+; CHECK: br i1
+; CHECK-NOT: br i1
+; CHECK: ret
+; CHECK: ret
+
+entry:
         br i1 %A, label %a, label %b
-a:              ; preds = %0
+a:
         br i1 %B, label %b, label %c
-b:              ; preds = %a, %0
+b:
         store i32 123, i32* %P
         ret void
-c:              ; preds = %a
+c:
         ret void
 }
-
diff --git a/test/Transforms/SimplifyCFG/branch_fold_dbg.ll b/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
deleted file mode 100644
index 6a500de..0000000
--- a/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-; END.
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-
-define void @main() {
-entry:
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.14.i19 = icmp eq i32 0, 2		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.14.i19, label %endif.1.i20, label %read_min.exit
-endif.1.i20:		; preds = %entry
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.9.i.i = icmp eq i8* null, null		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.9.i.i, label %then.i12.i, label %then.i.i
-then.i.i:		; preds = %endif.1.i20
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-then.i12.i:		; preds = %endif.1.i20
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.9.i4.i = icmp eq i8* null, null		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.9.i4.i, label %endif.2.i33, label %then.i5.i
-then.i5.i:		; preds = %then.i12.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-endif.2.i33:		; preds = %then.i12.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %loopexit.0.i40, label %no_exit.0.i35
-no_exit.0.i35:		; preds = %no_exit.0.i35, %endif.2.i33
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.130.i = icmp slt i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.130.i, label %loopexit.0.i40.loopexit, label %no_exit.0.i35
-loopexit.0.i40.loopexit:		; preds = %no_exit.0.i35
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %loopexit.0.i40
-loopexit.0.i40:		; preds = %loopexit.0.i40.loopexit, %endif.2.i33
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.341.i = icmp eq i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.341.i, label %loopentry.1.i, label %read_min.exit
-loopentry.1.i:		; preds = %loopexit.0.i40
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.347.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.347.i, label %no_exit.1.i41, label %loopexit.2.i44
-no_exit.1.i41:		; preds = %endif.5.i, %loopentry.1.i
-	%indvar.i42 = phi i32 [ %indvar.next.i, %endif.5.i ], [ 0, %loopentry.1.i ]		; <i32> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.355.i = icmp eq i32 0, 3		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.355.i, label %endif.5.i, label %read_min.exit
-endif.5.i:		; preds = %no_exit.1.i41
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.34773.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%indvar.next.i = add i32 %indvar.i42, 1		; <i32> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.34773.i, label %no_exit.1.i41, label %loopexit.1.i.loopexit
-loopexit.1.i.loopexit:		; preds = %endif.5.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-loopexit.2.i44:		; preds = %loopentry.1.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-read_min.exit:		; preds = %no_exit.1.i41, %loopexit.0.i40, %entry
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.23 = icmp eq i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.23, label %endif.1, label %then.1
-then.1:		; preds = %read_min.exit
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %endif.0.i, label %then.0.i
-then.0.i:		; preds = %then.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %endif.1.i, label %then.1.i
-endif.0.i:		; preds = %then.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %endif.1.i, label %then.1.i
-then.1.i:		; preds = %endif.0.i, %then.0.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %getfree.exit, label %then.2.i
-endif.1.i:		; preds = %endif.0.i, %then.0.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %getfree.exit, label %then.2.i
-then.2.i:		; preds = %endif.1.i, %then.1.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-getfree.exit:		; preds = %endif.1.i, %then.1.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-endif.1:		; preds = %read_min.exit
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.27.i = getelementptr i32* null, i32 0		; <i32*> [#uses=0]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %loopexit.0.i15, label %no_exit.0.i14
-no_exit.0.i14:		; preds = %endif.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-loopexit.0.i15:		; preds = %endif.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
-no_exit.1.i16:		; preds = %no_exit.1.i16, %loopexit.0.i15
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
-primal_start_artificial.exit:		; preds = %no_exit.1.i16, %loopexit.0.i15
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-}
diff --git a/test/Transforms/SimplifyCFG/dg.exp b/test/Transforms/SimplifyCFG/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/SimplifyCFG/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll b/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
deleted file mode 100644
index 6fbbb1b..0000000
--- a/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @bar(i32)
-
-define void @test(i1 %P, i32* %Q) {
-        br i1 %P, label %T, label %F
-T:              ; preds = %0
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        store i32 1, i32* %Q
-        %A = load i32* %Q               ; <i32> [#uses=1]
-        call void @bar( i32 %A )
-        ret void
-F:              ; preds = %0
-        store i32 1, i32* %Q
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %B = load i32* %Q               ; <i32> [#uses=1]
-        call void @bar( i32 %B )
-        ret void
-}
-
diff --git a/test/Transforms/SimplifyCFG/lit.local.cfg b/test/Transforms/SimplifyCFG/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SimplifyCFG/multiple-phis.ll b/test/Transforms/SimplifyCFG/multiple-phis.ll
new file mode 100644
index 0000000..7845423
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/multiple-phis.ll
@@ -0,0 +1,39 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+; It's not worthwhile to if-convert one of the phi nodes and leave
+; the other behind, because that still requires a branch. If
+; SimplifyCFG if-converts one of the phis, it should do both.
+
+; CHECK:      %div.high.addr.0 = select i1 %cmp1, i32 %div, i32 %high.addr.0
+; CHECK-NEXT: %low.0.add2 = select i1 %cmp1, i32 %low.0, i32 %add2
+; CHECK-NEXT: br label %while.cond
+
+define i32 @upper_bound(i32* %r, i32 %high, i32 %k) nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %if.then, %if.else, %entry
+  %high.addr.0 = phi i32 [ %high, %entry ], [ %div, %if.then ], [ %high.addr.0, %if.else ]
+  %low.0 = phi i32 [ 0, %entry ], [ %low.0, %if.then ], [ %add2, %if.else ]
+  %cmp = icmp ult i32 %low.0, %high.addr.0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %add = add i32 %low.0, %high.addr.0
+  %div = udiv i32 %add, 2
+  %idxprom = zext i32 %div to i64
+  %arrayidx = getelementptr inbounds i32* %r, i64 %idxprom
+  %0 = load i32* %arrayidx
+  %cmp1 = icmp ult i32 %k, %0
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:                                          ; preds = %while.body
+  br label %while.cond
+
+if.else:                                          ; preds = %while.body
+  %add2 = add i32 %div, 1
+  br label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret i32 %low.0
+}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
new file mode 100644
index 0000000..c791785
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -0,0 +1,88 @@
+; RUN: opt -simplifycfg -S -o - < %s | FileCheck %s
+
+declare void @helper(i32)
+
+define void @test1(i1 %a, i1 %b) {
+; CHECK: @test1
+entry:
+  br i1 %a, label %Y, label %X, !prof !0
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !0
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !1
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test2(i1 %a, i1 %b) {
+; CHECK: @test2
+entry:
+  br i1 %a, label %X, label %Y, !prof !1
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1
+; CHECK-NOT: !prof
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !2
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test3(i1 %a, i1 %b) {
+; CHECK: @test3
+; CHECK-NOT: !prof
+entry:
+  br i1 %a, label %X, label %Y, !prof !1
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test4(i1 %a, i1 %b) {
+; CHECK: @test4
+; CHECK-NOT: !prof
+entry:
+  br i1 %a, label %X, label %Y
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !1
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 3, i32 5}
+!1 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!2 = metadata !{metadata !"branch_weights", i32 1, i32 2}
+
+; CHECK: !0 = metadata !{metadata !"branch_weights", i32 5, i32 11}
+; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 5}
+; CHECK-NOT: !2
diff --git a/test/Transforms/SimplifyCFG/select-gep.ll b/test/Transforms/SimplifyCFG/select-gep.ll
index 009f05e..7654d02 100644
--- a/test/Transforms/SimplifyCFG/select-gep.ll
+++ b/test/Transforms/SimplifyCFG/select-gep.ll
@@ -35,6 +35,6 @@ if.end:
   ret i8* %x.addr
 
 ; CHECK: @test2
-; CHECK: %x.addr = select i1 %cmp, i8* %incdec.ptr, i8* %y
-; CHECK: ret i8* %x.addr
+; CHECK: %incdec.ptr.y = select i1 %cmp, i8* %incdec.ptr, i8* %y
+; CHECK: ret i8* %incdec.ptr.y
 }
diff --git a/test/Transforms/SimplifyCFG/switch-masked-bits.ll b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
index fc83ec2..3b0c48b 100644
--- a/test/Transforms/SimplifyCFG/switch-masked-bits.ll
+++ b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
@@ -15,8 +15,8 @@ c:
   ret i32 5
 ; CHECK: @test1
 ; CHECK: %cond = icmp eq i32 %i, 24
-; CHECK: %merge = select i1 %cond, i32 5, i32 0
-; CHECK: ret i32 %merge
+; CHECK: %. = select i1 %cond, i32 5, i32 0
+; CHECK: ret i32 %.
 }
 
 
diff --git a/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 5494a65..673a62b 100644
--- a/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -115,7 +115,7 @@ entry:
 cont:
 ; CHECK: %lt = icmp slt i64 %x, %y
     %lt = icmp slt i64 %x, %y
-; CHECK-NEXT: br i1 %lt, label %a, label %r
+; CHECK-NEXT: select i1 %lt, i32 -1, i32 1
     %qux = select i1 %lt, i32 0, i32 2
     switch i32 %qux, label %bees [
         i32 0, label %a
diff --git a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
deleted file mode 100644
index 2723ec6..0000000
--- a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | FileCheck %s
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-define i1 @t({ i32, i32 }* %I) {
-; CHECK: @t
-; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
-; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
-entry:
-        %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
-        %tmp.2.i = load i32* %tmp.1.i           ; <i32> [#uses=6]
-        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
-        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
-shortcirc_next.0:               ; preds = %entry
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
-        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
-shortcirc_next.1:               ; preds = %shortcirc_next.0
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
-        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
-shortcirc_next.2:               ; preds = %shortcirc_next.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
-        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
-shortcirc_next.3:               ; preds = %shortcirc_next.2
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
-        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
-shortcirc_next.4:               ; preds = %shortcirc_next.3
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
-        br label %UnifiedReturnBlock
-shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
-        br label %UnifiedReturnBlock
-UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
-        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
-        ret i1 %UnifiedRetVal
-}
-
diff --git a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll b/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
deleted file mode 100644
index 343e169..0000000
--- a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
+++ /dev/null
@@ -1,116 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep " switch"
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
-	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }*, i32 }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [10 x i8] c"swithh2.c\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
-@.str1 = internal constant [38 x i8] c"/developer/home2/zsth/test/debug/tmp/\00", section "llvm.metadata"		; <[38 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5641) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([10 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([38 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 -1 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [2 x { }*] [{ }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
-
-define i32 @foo(i32 %x) nounwind {
-entry:
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	switch i32 %x, label %bb4 [
-		i32 1, label %bb
-		i32 2, label %bb1
-		i32 3, label %bb2
-		i32 4, label %bb3
-	]
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb
-
-bb:		; preds = %0, %entry
-	call void @llvm.dbg.stoppoint(i32 3, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 3, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb1
-
-bb1:		; preds = %1, %entry
-	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb2
-
-bb2:		; preds = %2, %entry
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb3
-
-bb3:		; preds = %3, %entry
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb4
-
-bb4:		; preds = %4, %entry
-	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	switch i32 %x, label %bb7 [
-		i32 5, label %bb5
-		i32 6, label %bb6
-	]
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb5
-
-bb5:		; preds = %5, %bb4
-	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb6
-
-bb6:		; preds = %6, %bb4
-	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb7
-
-bb7:		; preds = %7, %bb4
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-
-bb8:		; preds = %bb7, %bb6, %bb5, %bb3, %bb2, %bb1, %bb
-	%.0 = phi i32 [ 4, %bb3 ], [ 3, %bb2 ], [ 2, %bb1 ], [ 1, %bb ], [ 6, %bb6 ], [ 5, %bb5 ], [ 0, %bb7 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %return
-
-return:		; preds = %bb8
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret i32 %.0
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
diff --git a/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll b/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
deleted file mode 100644
index 01041eb..0000000
--- a/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-define i1 @qux(i8* %m, i8* %n, i8* %o, i8* %p) nounwind  {
-entry:
-        %tmp7 = icmp eq i8* %m, %n
-        br i1 %tmp7, label %bb, label %UnifiedReturnBlock
-
-bb:
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp15 = icmp eq i8* %o, %p
-        br label %UnifiedReturnBlock
-
-UnifiedReturnBlock:
-        %result = phi i1 [ 0, %entry ], [ %tmp15, %bb ]
-        ret i1 %result
-}
diff --git a/test/Transforms/SimplifyCFG/unreachable-blocks.ll b/test/Transforms/SimplifyCFG/unreachable-blocks.ll
new file mode 100644
index 0000000..1df0eab
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/unreachable-blocks.ll
@@ -0,0 +1,28 @@
+; RUN: opt -simplifycfg < %s -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; PR11825
+define void @test1() {
+entry:
+  br label %return
+
+while_block:                                      ; preds = %and_if_cont2, %and_if_cont
+  %newlen = sub i32 %newlen, 1
+  %newptr = getelementptr i8* %newptr, i64 1
+  %test = icmp sgt i32 %newlen, 0
+  br i1 %test, label %and_if1, label %and_if_cont2
+
+and_if1:                                          ; preds = %while_block
+  %char = load i8* %newptr
+  %test2 = icmp ule i8 %char, 32
+  br label %and_if_cont2
+
+and_if_cont2:                                     ; preds = %and_if1, %while_block
+  %a18 = phi i1 [ %test, %while_block ], [ %test2, %and_if1 ]
+  br i1 %a18, label %while_block, label %return
+
+return:                                           ; preds = %and_if_cont2, %and_if_cont
+  ret void
+}
diff --git a/test/Transforms/SimplifyLibCalls/PR7357.ll b/test/Transforms/SimplifyLibCalls/PR7357.ll
index 6d5c1d5..3529a9c 100644
--- a/test/Transforms/SimplifyLibCalls/PR7357.ll
+++ b/test/Transforms/SimplifyLibCalls/PR7357.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -default-data-layout="e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" -simplify-libcalls -S | FileCheck %s
+; RUN: opt < %s "-default-data-layout=e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" -simplify-libcalls -S | FileCheck %s
 @.str1 = private constant [11 x i8] c"(){};[]&|:\00", align 4
 
 ; check that simplify libcalls will not replace a call with one calling
diff --git a/test/Transforms/SimplifyLibCalls/Printf.ll b/test/Transforms/SimplifyLibCalls/Printf.ll
index caea311..489c993 100644
--- a/test/Transforms/SimplifyLibCalls/Printf.ll
+++ b/test/Transforms/SimplifyLibCalls/Printf.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -simplify-libcalls -S -o %t
-; RUN: FileCheck < %t %s
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
 
 @str = internal constant [13 x i8] c"hello world\0A\00"         ; <[13 x i8]*> [#uses=1]
 @str1 = internal constant [2 x i8] c"h\00"              ; <[2 x i8]*> [#uses=1]
 
+; CHECK: private unnamed_addr constant [12 x i8] c"hello world\00"
+
 declare i32 @printf(i8*, ...)
 
 ; CHECK: define void @f0
diff --git a/test/Transforms/SimplifyLibCalls/StrLen.ll b/test/Transforms/SimplifyLibCalls/StrLen.ll
index 45b349d..acd8aaf 100644
--- a/test/Transforms/SimplifyLibCalls/StrLen.ll
+++ b/test/Transforms/SimplifyLibCalls/StrLen.ll
@@ -6,6 +6,7 @@ target datalayout = "e-p:32:32"
 @hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=3]
 @null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=3]
 @null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
+@nullstring = constant i8 0
 
 declare i32 @strlen(i8*)
 
@@ -54,3 +55,8 @@ define i1 @test7() {
 	%ne_null = icmp ne i32 %null_l, 0		; <i1> [#uses=1]
 	ret i1 %ne_null
 }
+
+define i32 @test8() {
+	%len = tail call i32 @strlen(i8* @nullstring) nounwind
+	ret i32 %len
+}
diff --git a/test/Transforms/SimplifyLibCalls/cos.ll b/test/Transforms/SimplifyLibCalls/cos.ll
new file mode 100644
index 0000000..6a8ce8c
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/cos.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define double @foo(double %d) nounwind readnone {
+; CHECK: @foo
+    %1 = fsub double -0.000000e+00, %d
+    %2 = call double @cos(double %1) nounwind readnone
+; CHECK: call double @cos(double %d)
+    ret double %2
+}
+
+declare double @cos(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/dg.exp b/test/Transforms/SimplifyLibCalls/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/SimplifyLibCalls/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SimplifyLibCalls/fwrite.ll b/test/Transforms/SimplifyLibCalls/fwrite.ll
new file mode 100644
index 0000000..f0f3dca
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/fwrite.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+%FILE = type { i32 }
+
+@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+define i64 @foo(%FILE* %f) {
+; CHECK: %retval = call i64 @fwrite
+  %retval = call i64 @fwrite(i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0), i64 1, i64 1, %FILE* %f)
+  ret i64 %retval
+}
+
+declare i64 @fwrite(i8*, i64, i64, %FILE *)
diff --git a/test/Transforms/SimplifyLibCalls/lit.local.cfg b/test/Transforms/SimplifyLibCalls/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SimplifyLibCalls/osx-names.ll b/test/Transforms/SimplifyLibCalls/osx-names.ll
new file mode 100644
index 0000000..e321d1d
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/osx-names.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; <rdar://problem/9815881>
+; On OSX x86-32, fwrite and fputs aren't called fwrite and fputs.
+; Make sure we use the correct names.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7.2"
+
+%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sbuf = type { i8*, i32 }
+%struct.__sFILEX = type opaque
+
+@.str = private unnamed_addr constant [13 x i8] c"Hello world\0A\00", align 1
+@.str2 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
+
+define void @test1(%struct.__sFILE* %stream) nounwind {
+; CHECK: define void @test1
+; CHECK: call i32 @"fwrite$UNIX2003"
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) nounwind
+  ret void
+}
+
+define void @test2(%struct.__sFILE* %stream, i8* %str) nounwind ssp {
+; CHECK: define void @test2
+; CHECK: call i32 @"fputs$UNIX2003"
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([3 x i8]* @.str2, i32 0, i32 0), i8* %str) nounwind
+  ret void
+}
+
+declare i32 @fprintf(%struct.__sFILE*, i8*, ...) nounwind
diff --git a/test/Transforms/Sink/basic.ll b/test/Transforms/Sink/basic.ll
index 2343372..4c531d8 100644
--- a/test/Transforms/Sink/basic.ll
+++ b/test/Transforms/Sink/basic.ll
@@ -21,7 +21,7 @@ false:
   ret i32 0
 }
 
-; But don't sink volatile loads...
+; But don't sink load volatiles...
 
 ;      CHECK: @foo2
 ;      CHECK: load volatile
diff --git a/test/Transforms/Sink/dg.exp b/test/Transforms/Sink/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Sink/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Sink/lit.local.cfg b/test/Transforms/Sink/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Sink/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StripSymbols/dg.exp b/test/Transforms/StripSymbols/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/StripSymbols/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/StripSymbols/lit.local.cfg b/test/Transforms/StripSymbols/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/StripSymbols/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailCallElim/dg.exp b/test/Transforms/TailCallElim/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/TailCallElim/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/TailCallElim/dont_reorder_load.ll b/test/Transforms/TailCallElim/dont_reorder_load.ll
index 899e115..a29b72e 100644
--- a/test/Transforms/TailCallElim/dont_reorder_load.ll
+++ b/test/Transforms/TailCallElim/dont_reorder_load.ll
@@ -46,7 +46,7 @@ else:		; preds = %entry
 }
 
 ; This load can't be safely moved above the call because that would change the
-; order in which the volatile loads are performed.
+; order in which the load volatiles are performed.
 define fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
@@ -58,7 +58,7 @@ if:		; preds = %entry
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%tmp8 = call fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
-	%tmp9 = volatile load i32* %a_arg		; <i32> [#uses=1]
+	%tmp9 = load volatile i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
 	ret i32 %tmp10
 }
diff --git a/test/Transforms/TailCallElim/lit.local.cfg b/test/Transforms/TailCallElim/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/TailCallElim/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailCallElim/setjmp.ll b/test/Transforms/TailCallElim/setjmp.ll
index 7ef9cb3..4ce6ac7 100644
--- a/test/Transforms/TailCallElim/setjmp.ll
+++ b/test/Transforms/TailCallElim/setjmp.ll
@@ -15,7 +15,7 @@ bb:
   ret void
 }
 
-declare i32 @setjmp(i32*)
+declare i32 @setjmp(i32*) returns_twice
 
 ; CHECK: foo2
 ; CHECK-NOT: tail call void @bar()
diff --git a/test/Transforms/TailDup/X86/dg.exp b/test/Transforms/TailDup/X86/dg.exp
deleted file mode 100644
index 7b7bd4e..0000000
--- a/test/Transforms/TailDup/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/TailDup/X86/lit.local.cfg
new file mode 100644
index 0000000..da2db5a
--- /dev/null
+++ b/test/Transforms/TailDup/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/TailDup/dg.exp b/test/Transforms/TailDup/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/TailDup/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/TailDup/lit.local.cfg
new file mode 100644
index 0000000..18c604a
--- /dev/null
+++ b/test/Transforms/TailDup/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
diff --git a/test/Unit/lit.site.cfg.in b/test/Unit/lit.site.cfg.in
index 9643507..65e98d0 100644
--- a/test/Unit/lit.site.cfg.in
+++ b/test/Unit/lit.site.cfg.in
@@ -3,7 +3,6 @@
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
-config.llvmgcc_dir = "@LLVMGCCDIR@"
 config.llvm_build_mode = "@LLVM_BUILD_MODE@"
 config.enable_shared = @ENABLE_SHARED@
 config.shlibdir = "@SHLIBDIR@"
diff --git a/test/Verifier/cttz-undef-arg.ll b/test/Verifier/cttz-undef-arg.ll
new file mode 100644
index 0000000..48cd061
--- /dev/null
+++ b/test/Verifier/cttz-undef-arg.ll
@@ -0,0 +1,16 @@
+; RUN: not llvm-as < %s -o /dev/null |& FileCheck %s
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define void @f(i32 %x, i1 %is_not_zero) {
+entry:
+; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int
+; CHECK-NEXT: @llvm.ctlz.i32
+  call i32 @llvm.ctlz.i32(i32 %x, i1 %is_not_zero)
+
+; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int
+; CHECK-NEXT: @llvm.cttz.i32
+  call i32 @llvm.cttz.i32(i32 %x, i1 %is_not_zero)
+  ret void
+}
diff --git a/test/Verifier/dg.exp b/test/Verifier/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Verifier/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Verifier/fpaccuracy.ll b/test/Verifier/fpaccuracy.ll
new file mode 100644
index 0000000..2fefde0
--- /dev/null
+++ b/test/Verifier/fpaccuracy.ll
@@ -0,0 +1,31 @@
+; RUN: not llvm-as < %s |& FileCheck %s
+
+define void @foo(i32 %i, float %f, <2 x float> %g) {
+  %s = add i32 %i, %i, !fpaccuracy !0
+; CHECK: fpaccuracy requires a floating point result!
+  %t = fadd float %f, %f, !fpaccuracy !1
+; CHECK: fpaccuracy takes one operand!
+  %u = fadd float %f, %f, !fpaccuracy !2
+; CHECK: fpaccuracy takes one operand!
+  %v = fadd float %f, %f, !fpaccuracy !3
+; CHECK: fpaccuracy ULPs not a floating point number!
+  %w = fadd float %f, %f, !fpaccuracy !0
+; Above line is correct.
+  %w2 = fadd <2 x float> %g, %g, !fpaccuracy !0
+; Above line is correct.
+  %x = fadd float %f, %f, !fpaccuracy !4
+; CHECK: fpaccuracy ULPs is negative!
+  %y = fadd float %f, %f, !fpaccuracy !5
+; CHECK: fpaccuracy ULPs is negative!
+  %z = fadd float %f, %f, !fpaccuracy !6
+; CHECK: fpaccuracy ULPs not a normal number!
+  ret void
+}
+
+!0 = metadata !{ float 1.0 }
+!1 = metadata !{ }
+!2 = metadata !{ float 1.0, float 1.0 }
+!3 = metadata !{ i32 1 }
+!4 = metadata !{ float -1.0 }
+!5 = metadata !{ float -0.0 }
+!6 = metadata !{ float 0x7FFFFFFF00000000 }
diff --git a/test/Verifier/lit.local.cfg b/test/Verifier/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Verifier/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Verifier/range-1.ll b/test/Verifier/range-1.ll
new file mode 100644
index 0000000..611933a
--- /dev/null
+++ b/test/Verifier/range-1.ll
@@ -0,0 +1,78 @@
+; RUN: not llvm-as < %s -o /dev/null |& FileCheck %s
+
+define void @f1(i8* %x) {
+entry:
+  store i8 0, i8* %x, align 1, !range !0
+  ret void
+}
+!0 = metadata !{i8 0, i8 1}
+; CHECK: Ranges are only for loads!
+; CHECK-NEXT: store i8 0, i8* %x, align 1, !range !0
+
+define i8 @f2(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !1
+  ret i8 %y
+}
+!1 = metadata !{}
+; CHECK: It should have at least one range!
+; CHECK-NEXT: metadata
+
+define i8 @f3(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !2
+  ret i8 %y
+}
+!2 = metadata !{i8 0}
+; CHECK: Unfinished range!
+
+define i8 @f4(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !3
+  ret i8 %y
+}
+!3 = metadata !{double 0.0, i8 0}
+; CHECK: The lower limit must be an integer!
+
+define i8 @f5(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !4
+  ret i8 %y
+}
+!4 = metadata !{i8 0, double 0.0}
+; CHECK: The upper limit must be an integer!
+
+define i8 @f6(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !5
+  ret i8 %y
+}
+!5 = metadata !{i32 0, i8 0}
+; CHECK: Range types must match load type!
+; CHECK:  %y = load
+
+define i8 @f7(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !6
+  ret i8 %y
+}
+!6 = metadata !{i8 0, i32 0}
+; CHECK: Range types must match load type!
+; CHECK:  %y = load
+
+define i8 @f8(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !7
+  ret i8 %y
+}
+!7 = metadata !{i32 0, i32 0}
+; CHECK: Range types must match load type!
+; CHECK:  %y = load
+
+define i8 @f9(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !8
+  ret i8 %y
+}
+!8 = metadata !{i8 0, i8 0}
+; CHECK: Range must not be empty!
diff --git a/test/Verifier/range-2.ll b/test/Verifier/range-2.ll
new file mode 100644
index 0000000..ef542c8
--- /dev/null
+++ b/test/Verifier/range-2.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s -o /dev/null
+
+define i8 @f1(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !0
+  ret i8 %y
+}
+!0 = metadata !{i8 0, i8 1}
+
+define i8 @f2(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !1
+  ret i8 %y
+}
+!1 = metadata !{i8 255, i8 1}
+
+define i8 @f3(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !2
+  ret i8 %y
+}
+!2 = metadata !{i8 1, i8 3, i8 5, i8 42}
diff --git a/test/YAMLParser/LICENSE.txt b/test/YAMLParser/LICENSE.txt
new file mode 100644
index 0000000..050ced2
--- /dev/null
+++ b/test/YAMLParser/LICENSE.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2006 Kirill Simonov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/test/YAMLParser/bool.data b/test/YAMLParser/bool.data
new file mode 100644
index 0000000..e987a0e
--- /dev/null
+++ b/test/YAMLParser/bool.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+- yes
+- NO
+- True
+- on
diff --git a/test/YAMLParser/construct-bool.data b/test/YAMLParser/construct-bool.data
new file mode 100644
index 0000000..035ec0c
--- /dev/null
+++ b/test/YAMLParser/construct-bool.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: yes
+answer: NO
+logical: True
+option: on
+
+
+but:
+    y: is a string
+    n: is a string
diff --git a/test/YAMLParser/construct-custom.data b/test/YAMLParser/construct-custom.data
new file mode 100644
index 0000000..cac95e0
--- /dev/null
+++ b/test/YAMLParser/construct-custom.data
@@ -0,0 +1,28 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- !tag1
+  x: 1
+- !tag1
+  x: 1
+  'y': 2
+  z: 3
+- !tag2
+  10
+- !tag2
+  =: 10
+  'y': 20
+  z: 30
+- !tag3
+  x: 1
+- !tag3
+  x: 1
+  'y': 2
+  z: 3
+- !tag3
+  =: 1
+  'y': 2
+  z: 3
+- !foo
+  my-parameter: foo
+  my-another-parameter: [1,2,3]
diff --git a/test/YAMLParser/construct-float.data b/test/YAMLParser/construct-float.data
new file mode 100644
index 0000000..07c51bd
--- /dev/null
+++ b/test/YAMLParser/construct-float.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 6.8523015e+5
+exponential: 685.230_15e+03
+fixed: 685_230.15
+sexagesimal: 190:20:30.15
+negative infinity: -.inf
+not a number: .NaN
diff --git a/test/YAMLParser/construct-int.data b/test/YAMLParser/construct-int.data
new file mode 100644
index 0000000..b14c37f
--- /dev/null
+++ b/test/YAMLParser/construct-int.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 685230
+decimal: +685_230
+octal: 02472256
+hexadecimal: 0x_0A_74_AE
+binary: 0b1010_0111_0100_1010_1110
+sexagesimal: 190:20:30
diff --git a/test/YAMLParser/construct-map.data b/test/YAMLParser/construct-map.data
new file mode 100644
index 0000000..1b68120
--- /dev/null
+++ b/test/YAMLParser/construct-map.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+# Unordered set of key: value pairs.
+Block style: !!map
+  Clark : Evans
+  Brian : Ingerson
+  Oren  : Ben-Kiki
+Flow style: !!map { Clark: Evans, Brian: Ingerson, Oren: Ben-Kiki }
diff --git a/test/YAMLParser/construct-merge.data b/test/YAMLParser/construct-merge.data
new file mode 100644
index 0000000..0ebc9f6
--- /dev/null
+++ b/test/YAMLParser/construct-merge.data
@@ -0,0 +1,29 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- &CENTER { x: 1, 'y': 2 }
+- &LEFT { x: 0, 'y': 2 }
+- &BIG { r: 10 }
+- &SMALL { r: 1 }
+
+# All the following maps are equal:
+
+- # Explicit keys
+  x: 1
+  'y': 2
+  r: 10
+  label: center/big
+
+- # Merge one map
+  << : *CENTER
+  r: 10
+  label: center/big
+
+- # Merge multiple maps
+  << : [ *CENTER, *BIG ]
+  label: center/big
+
+- # Override
+  << : [ *BIG, *LEFT, *SMALL ]
+  x: 1
+  label: center/big
diff --git a/test/YAMLParser/construct-null.data b/test/YAMLParser/construct-null.data
new file mode 100644
index 0000000..51f8b61
--- /dev/null
+++ b/test/YAMLParser/construct-null.data
@@ -0,0 +1,20 @@
+# RUN: yaml-bench -canonical %s
+
+# A document may be null.
+---
+---
+# This mapping has four keys,
+# one has a value.
+empty:
+canonical: ~
+english: null
+~: null key
+---
+# This sequence has five
+# entries, two have values.
+sparse:
+  - ~
+  - 2nd entry
+  -
+  - 4th entry
+  - Null
diff --git a/test/YAMLParser/construct-omap.data b/test/YAMLParser/construct-omap.data
new file mode 100644
index 0000000..b96d679
--- /dev/null
+++ b/test/YAMLParser/construct-omap.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed ordered map (dictionary).
+Bestiary: !!omap
+  - aardvark: African pig-like ant eater. Ugly.
+  - anteater: South-American ant eater. Two species.
+  - anaconda: South-American constrictor snake. Scaly.
+  # Etc.
+# Flow style
+Numbers: !!omap [ one: 1, two: 2, three : 3 ]
diff --git a/test/YAMLParser/construct-pairs.data b/test/YAMLParser/construct-pairs.data
new file mode 100644
index 0000000..40f288d
--- /dev/null
+++ b/test/YAMLParser/construct-pairs.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed pairs.
+Block tasks: !!pairs
+  - meeting: with team.
+  - meeting: with boss.
+  - break: lunch.
+  - meeting: with client.
+Flow tasks: !!pairs [ meeting: with team, meeting: with boss ]
diff --git a/test/YAMLParser/construct-seq.data b/test/YAMLParser/construct-seq.data
new file mode 100644
index 0000000..f43fd39
--- /dev/null
+++ b/test/YAMLParser/construct-seq.data
@@ -0,0 +1,17 @@
+# RUN: yaml-bench -canonical %s
+
+# Ordered sequence of nodes
+Block style: !!seq
+- Mercury   # Rotates - no light/dark sides.
+- Venus     # Deadliest. Aptly named.
+- Earth     # Mostly dirt.
+- Mars      # Seems empty.
+- Jupiter   # The king.
+- Saturn    # Pretty.
+- Uranus    # Where the sun hardly shines.
+- Neptune   # Boring. No rings.
+- Pluto     # You call this a planet?
+Flow style: !!seq [ Mercury, Venus, Earth, Mars,      # Rocks
+                    Jupiter, Saturn, Uranus, Neptune, # Gas
+                    Pluto ]                           # Overrated
+
diff --git a/test/YAMLParser/construct-set.data b/test/YAMLParser/construct-set.data
new file mode 100644
index 0000000..3e9d095
--- /dev/null
+++ b/test/YAMLParser/construct-set.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed set.
+baseball players: !!set
+  ? Mark McGwire
+  ? Sammy Sosa
+  ? Ken Griffey
+# Flow style
+baseball teams: !!set { Boston Red Sox, Detroit Tigers, New York Yankees }
diff --git a/test/YAMLParser/construct-str-ascii.data b/test/YAMLParser/construct-str-ascii.data
new file mode 100644
index 0000000..24290ae
--- /dev/null
+++ b/test/YAMLParser/construct-str-ascii.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- !!str "ascii string"
diff --git a/test/YAMLParser/construct-str.data b/test/YAMLParser/construct-str.data
new file mode 100644
index 0000000..dc1ce82
--- /dev/null
+++ b/test/YAMLParser/construct-str.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+string: abcd
diff --git a/test/YAMLParser/construct-timestamp.data b/test/YAMLParser/construct-timestamp.data
new file mode 100644
index 0000000..f262c2d
--- /dev/null
+++ b/test/YAMLParser/construct-timestamp.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+canonical:        2001-12-15T02:59:43.1Z
+valid iso8601:    2001-12-14t21:59:43.10-05:00
+space separated:  2001-12-14 21:59:43.10 -5
+no time zone (Z): 2001-12-15 2:59:43.10
+date (00:00:00Z): 2002-12-14
diff --git a/test/YAMLParser/construct-value.data b/test/YAMLParser/construct-value.data
new file mode 100644
index 0000000..fe01a0d
--- /dev/null
+++ b/test/YAMLParser/construct-value.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+---     # Old schema
+link with:
+  - library1.dll
+  - library2.dll
+---     # New schema
+link with:
+  - = : library1.dll
+    version: 1.2
+  - = : library2.dll
+    version: 2.3
diff --git a/test/YAMLParser/duplicate-key.former-loader-error.data b/test/YAMLParser/duplicate-key.former-loader-error.data
new file mode 100644
index 0000000..9272103
--- /dev/null
+++ b/test/YAMLParser/duplicate-key.former-loader-error.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo: bar
+foo: baz
diff --git a/test/YAMLParser/duplicate-mapping-key.former-loader-error.data b/test/YAMLParser/duplicate-mapping-key.former-loader-error.data
new file mode 100644
index 0000000..96d175d
--- /dev/null
+++ b/test/YAMLParser/duplicate-mapping-key.former-loader-error.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+---
+&anchor foo:
+    foo: bar
+    *anchor: duplicate key
+    baz: bat
+    *anchor: duplicate key
diff --git a/test/YAMLParser/duplicate-merge-key.former-loader-error.data b/test/YAMLParser/duplicate-merge-key.former-loader-error.data
new file mode 100644
index 0000000..6b12764
--- /dev/null
+++ b/test/YAMLParser/duplicate-merge-key.former-loader-error.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+<<: {x: 1, y: 2}
+foo: bar
+<<: {z: 3, t: 4}
diff --git a/test/YAMLParser/duplicate-value-key.former-loader-error.data b/test/YAMLParser/duplicate-value-key.former-loader-error.data
new file mode 100644
index 0000000..dc20e0b
--- /dev/null
+++ b/test/YAMLParser/duplicate-value-key.former-loader-error.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+=: 1
+foo: bar
+=: 2
diff --git a/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data b/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data
new file mode 100644
index 0000000..f5adedb
--- /dev/null
+++ b/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+? |-
+  foo
+: |-
+  bar
diff --git a/test/YAMLParser/empty-document-bug.data b/test/YAMLParser/empty-document-bug.data
new file mode 100644
index 0000000..fa131fe
--- /dev/null
+++ b/test/YAMLParser/empty-document-bug.data
@@ -0,0 +1,2 @@
+# RUN: yaml-bench -canonical %s
+
diff --git a/test/YAMLParser/float.data b/test/YAMLParser/float.data
new file mode 100644
index 0000000..c4de970
--- /dev/null
+++ b/test/YAMLParser/float.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 6.8523015e+5
+- 685.230_15e+03
+- 685_230.15
+- 190:20:30.15
+- -.inf
+- .NaN
diff --git a/test/YAMLParser/int.data b/test/YAMLParser/int.data
new file mode 100644
index 0000000..2651d09
--- /dev/null
+++ b/test/YAMLParser/int.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 685230
+- +685_230
+- 02472256
+- 0x_0A_74_AE
+- 0b1010_0111_0100_1010_1110
+- 190:20:30
diff --git a/test/YAMLParser/invalid-single-quote-bug.data b/test/YAMLParser/invalid-single-quote-bug.data
new file mode 100644
index 0000000..3722a00
--- /dev/null
+++ b/test/YAMLParser/invalid-single-quote-bug.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- "foo 'bar'"
+- "foo\n'bar'"
diff --git a/test/YAMLParser/merge.data b/test/YAMLParser/merge.data
new file mode 100644
index 0000000..8631359
--- /dev/null
+++ b/test/YAMLParser/merge.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- <<
diff --git a/test/YAMLParser/more-floats.data b/test/YAMLParser/more-floats.data
new file mode 100644
index 0000000..668b31c
--- /dev/null
+++ b/test/YAMLParser/more-floats.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+[0.0, +1.0, -1.0, +.inf, -.inf, .nan, .nan]
diff --git a/test/YAMLParser/negative-float-bug.data b/test/YAMLParser/negative-float-bug.data
new file mode 100644
index 0000000..0ba0ffe
--- /dev/null
+++ b/test/YAMLParser/negative-float-bug.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+-1.0
diff --git a/test/YAMLParser/null.data b/test/YAMLParser/null.data
new file mode 100644
index 0000000..a38d7fa
--- /dev/null
+++ b/test/YAMLParser/null.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+-
+- ~
+- null
diff --git a/test/YAMLParser/resolver.data b/test/YAMLParser/resolver.data
new file mode 100644
index 0000000..8cbba63
--- /dev/null
+++ b/test/YAMLParser/resolver.data
@@ -0,0 +1,32 @@
+# RUN: yaml-bench -canonical %s
+
+---
+"this scalar should be selected"
+---
+key11: !foo
+    key12:
+        is: [selected]
+    key22:
+        key13: [not, selected]
+        key23: [not, selected]
+    key32:
+        key31: [not, selected]
+        key32: [not, selected]
+        key33: {not: selected}
+key21: !bar
+    - not selected
+    - selected
+    - not selected
+key31: !baz
+    key12:
+        key13:
+            key14: {selected}
+        key23:
+            key14: [not, selected]
+        key33:
+            key14: {selected}
+            key24: {not: selected}
+    key22:
+        -   key14: {selected}
+            key24: {not: selected}
+        -   key14: {selected}
diff --git a/test/YAMLParser/run-parser-crash-bug.data b/test/YAMLParser/run-parser-crash-bug.data
new file mode 100644
index 0000000..3ec910c
--- /dev/null
+++ b/test/YAMLParser/run-parser-crash-bug.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- Harry Potter and the Prisoner of Azkaban
+- Harry Potter and the Goblet of Fire
+- Harry Potter and the Order of the Phoenix
+---
+- Memoirs Found in a Bathtub
+- Snow Crash
+- Ghost World
diff --git a/test/YAMLParser/scan-document-end-bug.data b/test/YAMLParser/scan-document-end-bug.data
new file mode 100644
index 0000000..7354caf
--- /dev/null
+++ b/test/YAMLParser/scan-document-end-bug.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Ticket #4
+---
+...
diff --git a/test/YAMLParser/scan-line-break-bug.data b/test/YAMLParser/scan-line-break-bug.data
new file mode 100644
index 0000000..792973d
--- /dev/null
+++ b/test/YAMLParser/scan-line-break-bug.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+foo:
+    bar
+    baz
diff --git a/test/YAMLParser/single-dot-is-not-float-bug.data b/test/YAMLParser/single-dot-is-not-float-bug.data
new file mode 100644
index 0000000..810a593
--- /dev/null
+++ b/test/YAMLParser/single-dot-is-not-float-bug.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+.
diff --git a/test/YAMLParser/sloppy-indentation.data b/test/YAMLParser/sloppy-indentation.data
new file mode 100644
index 0000000..2b2b62b
--- /dev/null
+++ b/test/YAMLParser/sloppy-indentation.data
@@ -0,0 +1,19 @@
+# RUN: yaml-bench -canonical %s
+
+---
+in the block context:
+    indentation should be kept: { 
+    but in the flow context: [
+it may be violated]
+}
+---
+the parser does not require scalars
+to be indented with at least one space
+...
+---
+"the parser does not require scalars
+to be indented with at least one space"
+---
+foo:
+    bar: 'quoted scalars
+may not adhere indentation'
diff --git a/test/YAMLParser/spec-02-01.data b/test/YAMLParser/spec-02-01.data
new file mode 100644
index 0000000..dd15b2b
--- /dev/null
+++ b/test/YAMLParser/spec-02-01.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- Mark McGwire
+- Sammy Sosa
+- Ken Griffey
diff --git a/test/YAMLParser/spec-02-02.data b/test/YAMLParser/spec-02-02.data
new file mode 100644
index 0000000..a5695d5
--- /dev/null
+++ b/test/YAMLParser/spec-02-02.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+hr:  65    # Home runs
+avg: 0.278 # Batting average
+rbi: 147   # Runs Batted In
diff --git a/test/YAMLParser/spec-02-03.data b/test/YAMLParser/spec-02-03.data
new file mode 100644
index 0000000..81f8d99
--- /dev/null
+++ b/test/YAMLParser/spec-02-03.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+american:
+  - Boston Red Sox
+  - Detroit Tigers
+  - New York Yankees
+national:
+  - New York Mets
+  - Chicago Cubs
+  - Atlanta Braves
diff --git a/test/YAMLParser/spec-02-04.data b/test/YAMLParser/spec-02-04.data
new file mode 100644
index 0000000..44a218d
--- /dev/null
+++ b/test/YAMLParser/spec-02-04.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+-
+  name: Mark McGwire
+  hr:   65
+  avg:  0.278
+-
+  name: Sammy Sosa
+  hr:   63
+  avg:  0.288
diff --git a/test/YAMLParser/spec-02-05.data b/test/YAMLParser/spec-02-05.data
new file mode 100644
index 0000000..c9a4a75
--- /dev/null
+++ b/test/YAMLParser/spec-02-05.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- [name        , hr, avg  ]
+- [Mark McGwire, 65, 0.278]
+- [Sammy Sosa  , 63, 0.288]
diff --git a/test/YAMLParser/spec-02-06.data b/test/YAMLParser/spec-02-06.data
new file mode 100644
index 0000000..85c1e2b
--- /dev/null
+++ b/test/YAMLParser/spec-02-06.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+Mark McGwire: {hr: 65, avg: 0.278}
+Sammy Sosa: {
+    hr: 63,
+    avg: 0.288
+  }
diff --git a/test/YAMLParser/spec-02-07.data b/test/YAMLParser/spec-02-07.data
new file mode 100644
index 0000000..c349662
--- /dev/null
+++ b/test/YAMLParser/spec-02-07.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+# Ranking of 1998 home runs
+---
+- Mark McGwire
+- Sammy Sosa
+- Ken Griffey
+
+# Team ranking
+---
+- Chicago Cubs
+- St Louis Cardinals
diff --git a/test/YAMLParser/spec-02-08.data b/test/YAMLParser/spec-02-08.data
new file mode 100644
index 0000000..9746a43
--- /dev/null
+++ b/test/YAMLParser/spec-02-08.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+---
+time: 20:03:20
+player: Sammy Sosa
+action: strike (miss)
+...
+---
+time: 20:03:47
+player: Sammy Sosa
+action: grand slam
+...
diff --git a/test/YAMLParser/spec-02-09.data b/test/YAMLParser/spec-02-09.data
new file mode 100644
index 0000000..6aef933
--- /dev/null
+++ b/test/YAMLParser/spec-02-09.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+hr: # 1998 hr ranking
+  - Mark McGwire
+  - Sammy Sosa
+rbi:
+  # 1998 rbi ranking
+  - Sammy Sosa
+  - Ken Griffey
diff --git a/test/YAMLParser/spec-02-10.data b/test/YAMLParser/spec-02-10.data
new file mode 100644
index 0000000..0302fa7
--- /dev/null
+++ b/test/YAMLParser/spec-02-10.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+hr:
+  - Mark McGwire
+  # Following node labeled SS
+  - &SS Sammy Sosa
+rbi:
+  - *SS # Subsequent occurrence
+  - Ken Griffey
diff --git a/test/YAMLParser/spec-02-11.data b/test/YAMLParser/spec-02-11.data
new file mode 100644
index 0000000..d8cf863
--- /dev/null
+++ b/test/YAMLParser/spec-02-11.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+? - Detroit Tigers
+  - Chicago cubs
+:
+  - 2001-07-23
+
+? [ New York Yankees,
+    Atlanta Braves ]
+: [ 2001-07-02, 2001-08-12,
+    2001-08-14 ]
diff --git a/test/YAMLParser/spec-02-12.data b/test/YAMLParser/spec-02-12.data
new file mode 100644
index 0000000..3b4d537
--- /dev/null
+++ b/test/YAMLParser/spec-02-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+# products purchased
+- item    : Super Hoop
+  quantity: 1
+- item    : Basketball
+  quantity: 4
+- item    : Big Shoes
+  quantity: 1
diff --git a/test/YAMLParser/spec-02-13.data b/test/YAMLParser/spec-02-13.data
new file mode 100644
index 0000000..2bbccbf
--- /dev/null
+++ b/test/YAMLParser/spec-02-13.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# ASCII Art
+--- |
+  \//||\/||
+  // ||  ||__
diff --git a/test/YAMLParser/spec-02-14.data b/test/YAMLParser/spec-02-14.data
new file mode 100644
index 0000000..5a18ea2
--- /dev/null
+++ b/test/YAMLParser/spec-02-14.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+  Mark McGwire's
+  year was crippled
+  by a knee injury.
diff --git a/test/YAMLParser/spec-02-15.data b/test/YAMLParser/spec-02-15.data
new file mode 100644
index 0000000..2a7fbe9
--- /dev/null
+++ b/test/YAMLParser/spec-02-15.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ Sammy Sosa completed another
+ fine season with great stats.
+
+   63 Home Runs
+   0.288 Batting Average
+
+ What a year!
diff --git a/test/YAMLParser/spec-02-16.data b/test/YAMLParser/spec-02-16.data
new file mode 100644
index 0000000..3a5792c
--- /dev/null
+++ b/test/YAMLParser/spec-02-16.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+name: Mark McGwire
+accomplishment: >
+  Mark set a major league
+  home run record in 1998.
+stats: |
+  65 Home Runs
+  0.278 Batting Average
diff --git a/test/YAMLParser/spec-02-17.data b/test/YAMLParser/spec-02-17.data
new file mode 100644
index 0000000..2bcb60c
--- /dev/null
+++ b/test/YAMLParser/spec-02-17.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+unicode: "Sosa did fine.\u263A"
+control: "\b1998\t1999\t2000\n"
+hexesc:  "\x13\x10 is \r\n"
+
+single: '"Howdy!" he cried.'
+quoted: ' # not a ''comment''.'
+tie-fighter: '|\-*-/|'
+
+# CHECK: !!str "Sosa did fine.\u263A"
+# CHECK: !!str "\b1998\t1999\t2000\n"
+# CHECK: !!str "\x13\x10 is \r\n"
+# CHECK: !!str "\"Howdy!\" he cried."
+# CHECK: !!str " # not a 'comment'."
+# CHECK: !!str "|\\-*-/|"
diff --git a/test/YAMLParser/spec-02-18.data b/test/YAMLParser/spec-02-18.data
new file mode 100644
index 0000000..625a496
--- /dev/null
+++ b/test/YAMLParser/spec-02-18.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+plain:
+  This unquoted scalar
+  spans many lines.
+
+quoted: "So does this
+  quoted scalar.\n"
diff --git a/test/YAMLParser/spec-02-19.data b/test/YAMLParser/spec-02-19.data
new file mode 100644
index 0000000..cb9df6d
--- /dev/null
+++ b/test/YAMLParser/spec-02-19.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 12345
+decimal: +12,345
+sexagesimal: 3:25:45
+octal: 014
+hexadecimal: 0xC
diff --git a/test/YAMLParser/spec-02-20.data b/test/YAMLParser/spec-02-20.data
new file mode 100644
index 0000000..ed14798
--- /dev/null
+++ b/test/YAMLParser/spec-02-20.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 1.23015e+3
+exponential: 12.3015e+02
+sexagesimal: 20:30.15
+fixed: 1,230.15
+negative infinity: -.inf
+not a number: .NaN
diff --git a/test/YAMLParser/spec-02-21.data b/test/YAMLParser/spec-02-21.data
new file mode 100644
index 0000000..ea979db
--- /dev/null
+++ b/test/YAMLParser/spec-02-21.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+null: ~
+true: y
+false: n
+string: '12345'
diff --git a/test/YAMLParser/spec-02-22.data b/test/YAMLParser/spec-02-22.data
new file mode 100644
index 0000000..77724f7
--- /dev/null
+++ b/test/YAMLParser/spec-02-22.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 2001-12-15T02:59:43.1Z
+iso8601: 2001-12-14t21:59:43.10-05:00
+spaced: 2001-12-14 21:59:43.10 -5
+date: 2002-12-14
diff --git a/test/YAMLParser/spec-02-23.data b/test/YAMLParser/spec-02-23.data
new file mode 100644
index 0000000..d08dfa7
--- /dev/null
+++ b/test/YAMLParser/spec-02-23.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+not-date: !!str 2002-04-28
+
+picture: !!binary |
+ R0lGODlhDAAMAIQAAP//9/X
+ 17unp5WZmZgAAAOfn515eXv
+ Pz7Y6OjuDg4J+fn5OTk6enp
+ 56enmleECcgggoBADs=
+
+application specific tag: !something |
+ The semantics of the tag
+ above may be different for
+ different documents.
diff --git a/test/YAMLParser/spec-02-24.data b/test/YAMLParser/spec-02-24.data
new file mode 100644
index 0000000..01ca7f5
--- /dev/null
+++ b/test/YAMLParser/spec-02-24.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG ! tag:clarkevans.com,2002:
+--- !shape
+  # Use the ! handle for presenting
+  # tag:clarkevans.com,2002:circle
+- !circle
+  center: &ORIGIN {x: 73, y: 129}
+  radius: 7
+- !line
+  start: *ORIGIN
+  finish: { x: 89, y: 102 }
+- !label
+  start: *ORIGIN
+  color: 0xFFEEBB
+  text: Pretty vector drawing.
diff --git a/test/YAMLParser/spec-02-25.data b/test/YAMLParser/spec-02-25.data
new file mode 100644
index 0000000..fbadfda
--- /dev/null
+++ b/test/YAMLParser/spec-02-25.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# sets are represented as a
+# mapping where each key is
+# associated with the empty string
+--- !!set
+? Mark McGwire
+? Sammy Sosa
+? Ken Griff
diff --git a/test/YAMLParser/spec-02-26.data b/test/YAMLParser/spec-02-26.data
new file mode 100644
index 0000000..257108e
--- /dev/null
+++ b/test/YAMLParser/spec-02-26.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# ordered maps are represented as
+# a sequence of mappings, with
+# each mapping having one key
+--- !!omap
+- Mark McGwire: 65
+- Sammy Sosa: 63
+- Ken Griffy: 58
diff --git a/test/YAMLParser/spec-02-27.data b/test/YAMLParser/spec-02-27.data
new file mode 100644
index 0000000..a190ff1
--- /dev/null
+++ b/test/YAMLParser/spec-02-27.data
@@ -0,0 +1,31 @@
+# RUN: yaml-bench -canonical %s
+
+--- !<tag:clarkevans.com,2002:invoice>
+invoice: 34843
+date   : 2001-01-23
+bill-to: &id001
+    given  : Chris
+    family : Dumars
+    address:
+        lines: |
+            458 Walkman Dr.
+            Suite #292
+        city    : Royal Oak
+        state   : MI
+        postal  : 48046
+ship-to: *id001
+product:
+    - sku         : BL394D
+      quantity    : 4
+      description : Basketball
+      price       : 450.00
+    - sku         : BL4438H
+      quantity    : 1
+      description : Super Hoop
+      price       : 2392.00
+tax  : 251.42
+total: 4443.52
+comments:
+    Late afternoon is best.
+    Backup contact is Nancy
+    Billsmer @ 338-4338.
diff --git a/test/YAMLParser/spec-02-28.data b/test/YAMLParser/spec-02-28.data
new file mode 100644
index 0000000..695c27f
--- /dev/null
+++ b/test/YAMLParser/spec-02-28.data
@@ -0,0 +1,28 @@
+# RUN: yaml-bench -canonical %s
+
+---
+Time: 2001-11-23 15:01:42 -5
+User: ed
+Warning:
+  This is an error message
+  for the log file
+---
+Time: 2001-11-23 15:02:31 -5
+User: ed
+Warning:
+  A slightly different error
+  message.
+---
+Date: 2001-11-23 15:03:17 -5
+User: ed
+Fatal:
+  Unknown variable "bar"
+Stack:
+  - file: TopClass.py
+    line: 23
+    code: |
+      x = MoreObject("345\n")
+  - file: MoreClass.py
+    line: 58
+    code: |-
+      foo = bar
diff --git a/test/YAMLParser/spec-05-01-utf8.data b/test/YAMLParser/spec-05-01-utf8.data
new file mode 100644
index 0000000..349da06
--- /dev/null
+++ b/test/YAMLParser/spec-05-01-utf8.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+# Comment only.
diff --git a/test/YAMLParser/spec-05-02-utf8.data b/test/YAMLParser/spec-05-02-utf8.data
new file mode 100644
index 0000000..b306bdb
--- /dev/null
+++ b/test/YAMLParser/spec-05-02-utf8.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+# Invalid use of BOM
+# inside a
+# document.
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-03.data b/test/YAMLParser/spec-05-03.data
new file mode 100644
index 0000000..461e98d
--- /dev/null
+++ b/test/YAMLParser/spec-05-03.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+sequence:
+- one
+- two
+mapping:
+  ? sky
+  : blue
+  ? sea : green
diff --git a/test/YAMLParser/spec-05-04.data b/test/YAMLParser/spec-05-04.data
new file mode 100644
index 0000000..52850f4
--- /dev/null
+++ b/test/YAMLParser/spec-05-04.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+sequence: [ one, two, ]
+mapping: { sky: blue, sea: green }
diff --git a/test/YAMLParser/spec-05-05.data b/test/YAMLParser/spec-05-05.data
new file mode 100644
index 0000000..499ee8f
--- /dev/null
+++ b/test/YAMLParser/spec-05-05.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+# Comment only.
diff --git a/test/YAMLParser/spec-05-06.data b/test/YAMLParser/spec-05-06.data
new file mode 100644
index 0000000..729141a
--- /dev/null
+++ b/test/YAMLParser/spec-05-06.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+anchored: !local &anchor value
+alias: *anchor
diff --git a/test/YAMLParser/spec-05-07.data b/test/YAMLParser/spec-05-07.data
new file mode 100644
index 0000000..fc80a0d
--- /dev/null
+++ b/test/YAMLParser/spec-05-07.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+literal: |
+  text
+folded: >
+  text
diff --git a/test/YAMLParser/spec-05-08.data b/test/YAMLParser/spec-05-08.data
new file mode 100644
index 0000000..9f2b7ec
--- /dev/null
+++ b/test/YAMLParser/spec-05-08.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+single: 'text'
+double: "text"
diff --git a/test/YAMLParser/spec-05-09.data b/test/YAMLParser/spec-05-09.data
new file mode 100644
index 0000000..fc061fb
--- /dev/null
+++ b/test/YAMLParser/spec-05-09.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+%YAML 1.1
+--- text
diff --git a/test/YAMLParser/spec-05-10.data b/test/YAMLParser/spec-05-10.data
new file mode 100644
index 0000000..6788f0b
--- /dev/null
+++ b/test/YAMLParser/spec-05-10.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+commercial-at: @text
+grave-accent: `text
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-11.data b/test/YAMLParser/spec-05-11.data
new file mode 100644
index 0000000..7cba556
--- /dev/null
+++ b/test/YAMLParser/spec-05-11.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+|
+  Generic line break (no glyph)
+  Generic line break (glyphed)  Line separator   Paragraph separator 
diff --git a/test/YAMLParser/spec-05-12.data b/test/YAMLParser/spec-05-12.data
new file mode 100644
index 0000000..7dadff7
--- /dev/null
+++ b/test/YAMLParser/spec-05-12.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently reject tabs as indentation.
+# XFAIL: *
+
+# Tabs do's and don'ts:
+# comment:
+quoted: "Quoted		"
+block: |
+  void main() {
+  	printf("Hello, world!\n");
+  }
+elsewhere:	# separation
+	indentation, in	plain scalar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-13.data b/test/YAMLParser/spec-05-13.data
new file mode 100644
index 0000000..db62e86
--- /dev/null
+++ b/test/YAMLParser/spec-05-13.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+  "Text containing   
+  both space and	
+  	tab	characters"
diff --git a/test/YAMLParser/spec-05-14.data b/test/YAMLParser/spec-05-14.data
new file mode 100644
index 0000000..6545165
--- /dev/null
+++ b/test/YAMLParser/spec-05-14.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+"Fun with \\
+\" \a \b \e \f \
+\n \r \t \v \0 \
+\  \_ \N \L \P \
+\x41 \u0041 \U00000041"
+
+# CHECK: !!str "Fun with \\\n\" \a \b \e \f \n \r \t \v \0   \_ \N \L \P A A A"
diff --git a/test/YAMLParser/spec-05-15.data b/test/YAMLParser/spec-05-15.data
new file mode 100644
index 0000000..cd8421a
--- /dev/null
+++ b/test/YAMLParser/spec-05-15.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+Bad escapes:
+  "\c
+  \xq-"
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-06-01.data b/test/YAMLParser/spec-06-01.data
new file mode 100644
index 0000000..95b26bd
--- /dev/null
+++ b/test/YAMLParser/spec-06-01.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+  # Leading comment line spaces are
+   # neither content nor indentation.
+    
+Not indented:
+ By one space: |
+    By four
+      spaces
+ Flow style: [    # Leading spaces
+   By two,        # in flow style
+  Also by two,    # are neither
+# Tabs are not allowed:
+#  	Still by two   # content nor
+    Still by two   # content nor
+    ]             # indentation.
diff --git a/test/YAMLParser/spec-06-02.data b/test/YAMLParser/spec-06-02.data
new file mode 100644
index 0000000..40a15c9
--- /dev/null
+++ b/test/YAMLParser/spec-06-02.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+  # Comment
+   
+
diff --git a/test/YAMLParser/spec-06-03.data b/test/YAMLParser/spec-06-03.data
new file mode 100644
index 0000000..c1893ef
--- /dev/null
+++ b/test/YAMLParser/spec-06-03.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+key:    # Comment
+  value
diff --git a/test/YAMLParser/spec-06-04.data b/test/YAMLParser/spec-06-04.data
new file mode 100644
index 0000000..b61bcc6
--- /dev/null
+++ b/test/YAMLParser/spec-06-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+key:    # Comment
+        # lines
+  value
+
diff --git a/test/YAMLParser/spec-06-05.data b/test/YAMLParser/spec-06-05.data
new file mode 100644
index 0000000..4bcaa5a
--- /dev/null
+++ b/test/YAMLParser/spec-06-05.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+{ first: Sammy, last: Sosa }:
+# Statistics:
+  hr:  # Home runs
+    65
+  avg: # Average
+    0.278
diff --git a/test/YAMLParser/spec-06-06.data b/test/YAMLParser/spec-06-06.data
new file mode 100644
index 0000000..67e39dd
--- /dev/null
+++ b/test/YAMLParser/spec-06-06.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+plain: text
+  lines
+quoted: "text
+  	lines"
+block: |
+  text
+   	lines
diff --git a/test/YAMLParser/spec-06-07.data b/test/YAMLParser/spec-06-07.data
new file mode 100644
index 0000000..451bd34
--- /dev/null
+++ b/test/YAMLParser/spec-06-07.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+- foo
+ 
+  bar
+- |-
+  foo
+ 
+  bar
+  
diff --git a/test/YAMLParser/spec-06-08.data b/test/YAMLParser/spec-06-08.data
new file mode 100644
index 0000000..aa06f84
--- /dev/null
+++ b/test/YAMLParser/spec-06-08.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+>-
+  specific   trimmed     as  space
diff --git a/test/YAMLParser/spec-07-01.data b/test/YAMLParser/spec-07-01.data
new file mode 100644
index 0000000..21bc5e5
--- /dev/null
+++ b/test/YAMLParser/spec-07-01.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+%FOO  bar baz # Should be ignored
+               # with a warning.
+--- "foo"
diff --git a/test/YAMLParser/spec-07-02.data b/test/YAMLParser/spec-07-02.data
new file mode 100644
index 0000000..bf0e758
--- /dev/null
+++ b/test/YAMLParser/spec-07-02.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+%YAML 1.2 # Attempt parsing
+           # with a warning
+---
+"foo"
diff --git a/test/YAMLParser/spec-07-03.data b/test/YAMLParser/spec-07-03.data
new file mode 100644
index 0000000..7ca9483
--- /dev/null
+++ b/test/YAMLParser/spec-07-03.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+%YAML 1.1
+%YAML 1.1
+foo
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-07-04.data b/test/YAMLParser/spec-07-04.data
new file mode 100644
index 0000000..beba7d0
--- /dev/null
+++ b/test/YAMLParser/spec-07-04.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !yaml! tag:yaml.org,2002:
+---
+!yaml!str "foo"
diff --git a/test/YAMLParser/spec-07-05.data b/test/YAMLParser/spec-07-05.data
new file mode 100644
index 0000000..279b54a
--- /dev/null
+++ b/test/YAMLParser/spec-07-05.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently parse TAG directives.
+# XFAIL: *
+
+%TAG ! !foo
+%TAG ! !foo
+bar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-07-06.data b/test/YAMLParser/spec-07-06.data
new file mode 100644
index 0000000..9f27f91
--- /dev/null
+++ b/test/YAMLParser/spec-07-06.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !      !foo
+%TAG !yaml! tag:yaml.org,2002:
+---
+- !bar "baz"
+- !yaml!str "string"
diff --git a/test/YAMLParser/spec-07-07a.data b/test/YAMLParser/spec-07-07a.data
new file mode 100644
index 0000000..e51f8f7
--- /dev/null
+++ b/test/YAMLParser/spec-07-07a.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+# Private application:
+!foo "bar"
diff --git a/test/YAMLParser/spec-07-07b.data b/test/YAMLParser/spec-07-07b.data
new file mode 100644
index 0000000..003d575
--- /dev/null
+++ b/test/YAMLParser/spec-07-07b.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Migrated to global:
+%TAG ! tag:ben-kiki.org,2000:app/
+---
+!foo "bar"
diff --git a/test/YAMLParser/spec-07-08.data b/test/YAMLParser/spec-07-08.data
new file mode 100644
index 0000000..7197404
--- /dev/null
+++ b/test/YAMLParser/spec-07-08.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly specify default settings:
+%TAG !     !
+%TAG !!    tag:yaml.org,2002:
+# Named handles have no default:
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !foo "bar"
+- !!str "string"
+- !o!type "baz"
diff --git a/test/YAMLParser/spec-07-09.data b/test/YAMLParser/spec-07-09.data
new file mode 100644
index 0000000..1f98ba0
--- /dev/null
+++ b/test/YAMLParser/spec-07-09.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo
+...
+# Repeated end marker.
+...
+---
+bar
+# No end marker.
+---
+baz
+...
diff --git a/test/YAMLParser/spec-07-10.data b/test/YAMLParser/spec-07-10.data
new file mode 100644
index 0000000..a176683
--- /dev/null
+++ b/test/YAMLParser/spec-07-10.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+"Root flow
+ scalar"
+--- !!str >
+ Root block
+ scalar
+---
+# Root collection:
+foo : bar
+... # Is optional.
+---
+# Explicit document may be empty.
diff --git a/test/YAMLParser/spec-07-11.data b/test/YAMLParser/spec-07-11.data
new file mode 100644
index 0000000..ce14b7e
--- /dev/null
+++ b/test/YAMLParser/spec-07-11.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+# A stream may contain
+# no documents.
diff --git a/test/YAMLParser/spec-07-12a.data b/test/YAMLParser/spec-07-12a.data
new file mode 100644
index 0000000..7327f81
--- /dev/null
+++ b/test/YAMLParser/spec-07-12a.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Implicit document. Root
+# collection (mapping) node.
+foo : bar
diff --git a/test/YAMLParser/spec-07-12b.data b/test/YAMLParser/spec-07-12b.data
new file mode 100644
index 0000000..d759abe
--- /dev/null
+++ b/test/YAMLParser/spec-07-12b.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicit document. Root
+# scalar (literal) node.
+--- |
+ Text content
diff --git a/test/YAMLParser/spec-07-13.data b/test/YAMLParser/spec-07-13.data
new file mode 100644
index 0000000..ab74df1
--- /dev/null
+++ b/test/YAMLParser/spec-07-13.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+! "First document"
+---
+!foo "No directives"
+%TAG ! !foo
+---
+!bar "With directives"
+%YAML 1.1
+---
+!baz "Reset settings"
diff --git a/test/YAMLParser/spec-08-01.data b/test/YAMLParser/spec-08-01.data
new file mode 100644
index 0000000..5abbfa8
--- /dev/null
+++ b/test/YAMLParser/spec-08-01.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+!!str &a1 "foo" : !!str bar
+&a2 baz : *a1
diff --git a/test/YAMLParser/spec-08-02.data b/test/YAMLParser/spec-08-02.data
new file mode 100644
index 0000000..8a75783
--- /dev/null
+++ b/test/YAMLParser/spec-08-02.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+First occurrence: &anchor Value
+Second occurrence: *anchor
diff --git a/test/YAMLParser/spec-08-03.data b/test/YAMLParser/spec-08-03.data
new file mode 100644
index 0000000..8c71530
--- /dev/null
+++ b/test/YAMLParser/spec-08-03.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+!<tag:yaml.org,2002:str> foo :
+  !<!bar> baz
diff --git a/test/YAMLParser/spec-08-04.data b/test/YAMLParser/spec-08-04.data
new file mode 100644
index 0000000..f13538b
--- /dev/null
+++ b/test/YAMLParser/spec-08-04.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently look at the content of literal tags.
+# XFAIL: *
+
+- !<!> foo
+- !<$:?> bar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-08-05.data b/test/YAMLParser/spec-08-05.data
new file mode 100644
index 0000000..0613446
--- /dev/null
+++ b/test/YAMLParser/spec-08-05.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !local foo
+- !!str bar
+- !o!type baz
diff --git a/test/YAMLParser/spec-08-06.data b/test/YAMLParser/spec-08-06.data
new file mode 100644
index 0000000..a811bfd
--- /dev/null
+++ b/test/YAMLParser/spec-08-06.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently validate tags.
+# XFAIL: *
+
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !$a!b foo
+- !o! bar
+- !h!type baz
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-08-07.data b/test/YAMLParser/spec-08-07.data
new file mode 100644
index 0000000..fc3f2df
--- /dev/null
+++ b/test/YAMLParser/spec-08-07.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Assuming conventional resolution:
+- "12"
+- 12
+- ! 12
diff --git a/test/YAMLParser/spec-08-08.data b/test/YAMLParser/spec-08-08.data
new file mode 100644
index 0000000..460029f
--- /dev/null
+++ b/test/YAMLParser/spec-08-08.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo:
+ "bar
+ baz"
+---
+"foo
+ bar"
+---
+foo
+ bar
+--- |
+ foo
+...
diff --git a/test/YAMLParser/spec-08-09.data b/test/YAMLParser/spec-08-09.data
new file mode 100644
index 0000000..1c82585
--- /dev/null
+++ b/test/YAMLParser/spec-08-09.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+---
+scalars:
+  plain: !!str some text
+  quoted:
+    single: 'some text'
+    double: "some text"
+collections:
+  sequence: !!seq [ !!str entry,
+    # Mapping entry:
+      key: value ]
+  mapping: { key: value }
diff --git a/test/YAMLParser/spec-08-10.data b/test/YAMLParser/spec-08-10.data
new file mode 100644
index 0000000..74054eb0
--- /dev/null
+++ b/test/YAMLParser/spec-08-10.data
@@ -0,0 +1,17 @@
+# RUN: yaml-bench -canonical %s
+
+block styles:
+  scalars:
+    literal: !!str |
+      #!/usr/bin/perl
+      print "Hello, world!\n";
+    folded: >
+      This sentence
+      is false.
+  collections: !!map
+    sequence: !!seq # Entry:
+      - entry # Plain
+      # Mapping entry:
+      - key: value
+    mapping: 
+      key: value
diff --git a/test/YAMLParser/spec-08-11.data b/test/YAMLParser/spec-08-11.data
new file mode 100644
index 0000000..8a75783
--- /dev/null
+++ b/test/YAMLParser/spec-08-11.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+First occurrence: &anchor Value
+Second occurrence: *anchor
diff --git a/test/YAMLParser/spec-08-12.data b/test/YAMLParser/spec-08-12.data
new file mode 100644
index 0000000..69e78b4
--- /dev/null
+++ b/test/YAMLParser/spec-08-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+[
+  Without properties,
+  &anchor "Anchored",
+  !!str 'Tagged',
+  *anchor, # Alias node
+  !!str ,  # Empty plain scalar
+  '',   # Empty plain scalar
+]
diff --git a/test/YAMLParser/spec-08-13.data b/test/YAMLParser/spec-08-13.data
new file mode 100644
index 0000000..931d56a
--- /dev/null
+++ b/test/YAMLParser/spec-08-13.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+{
+  ? foo :,
+  ? : bar,
+}
diff --git a/test/YAMLParser/spec-08-14.data b/test/YAMLParser/spec-08-14.data
new file mode 100644
index 0000000..61c4483
--- /dev/null
+++ b/test/YAMLParser/spec-08-14.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- "flow in block"
+- >
+ Block scalar
+- !!map # Block collection
+  foo : bar
diff --git a/test/YAMLParser/spec-08-15.data b/test/YAMLParser/spec-08-15.data
new file mode 100644
index 0000000..f21e84a
--- /dev/null
+++ b/test/YAMLParser/spec-08-15.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- # Empty plain scalar
+- ? foo
+  :
+  ?
+  : bar
diff --git a/test/YAMLParser/spec-09-01.data b/test/YAMLParser/spec-09-01.data
new file mode 100644
index 0000000..8999b49
--- /dev/null
+++ b/test/YAMLParser/spec-09-01.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+"simple key" : {
+  "also simple" : value,
+  ? "not a
+  simple key" : "any
+  value"
+}
diff --git a/test/YAMLParser/spec-09-02.data b/test/YAMLParser/spec-09-02.data
new file mode 100644
index 0000000..f690378
--- /dev/null
+++ b/test/YAMLParser/spec-09-02.data
@@ -0,0 +1,14 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# Indent trimming is not yet implemented.
+# XFAIL: *
+
+ "as space
+ trimmed
+
+ specific
+
+ escaped	\
+ none"
+
+# CHECK: !!str "as space trimmed\nspecific\nescaped\tnone"
diff --git a/test/YAMLParser/spec-09-03.data b/test/YAMLParser/spec-09-03.data
new file mode 100644
index 0000000..3fb0d8b
--- /dev/null
+++ b/test/YAMLParser/spec-09-03.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- "
+  last"
+- " 	
+  last"
+- " 	first
+  last"
diff --git a/test/YAMLParser/spec-09-04.data b/test/YAMLParser/spec-09-04.data
new file mode 100644
index 0000000..4178ec6
--- /dev/null
+++ b/test/YAMLParser/spec-09-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+ "first
+ 	inner 1	
+ \ inner 2 \
+ last"
diff --git a/test/YAMLParser/spec-09-05.data b/test/YAMLParser/spec-09-05.data
new file mode 100644
index 0000000..e482d53
--- /dev/null
+++ b/test/YAMLParser/spec-09-05.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+- "first
+  	"
+- "first
+
+  	last"
+- "first
+ inner
+ \ 	last"
diff --git a/test/YAMLParser/spec-09-06.data b/test/YAMLParser/spec-09-06.data
new file mode 100644
index 0000000..edc0cbb
--- /dev/null
+++ b/test/YAMLParser/spec-09-06.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+ 'here''s to "quotes"'
diff --git a/test/YAMLParser/spec-09-07.data b/test/YAMLParser/spec-09-07.data
new file mode 100644
index 0000000..3c010ca
--- /dev/null
+++ b/test/YAMLParser/spec-09-07.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+'simple key' : {
+  'also simple' : value,
+  ? 'not a
+  simple key' : 'any
+  value'
+}
diff --git a/test/YAMLParser/spec-09-08.data b/test/YAMLParser/spec-09-08.data
new file mode 100644
index 0000000..d114e58
--- /dev/null
+++ b/test/YAMLParser/spec-09-08.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+ 'as space	 trimmed  specific  none'
diff --git a/test/YAMLParser/spec-09-09.data b/test/YAMLParser/spec-09-09.data
new file mode 100644
index 0000000..2fec1b5
--- /dev/null
+++ b/test/YAMLParser/spec-09-09.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- '
+  last'
+- ' 	
+  last'
+- ' 	first
+  last'
diff --git a/test/YAMLParser/spec-09-10.data b/test/YAMLParser/spec-09-10.data
new file mode 100644
index 0000000..faabfb0
--- /dev/null
+++ b/test/YAMLParser/spec-09-10.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+ 'first
+ 	inner	
+ last'
diff --git a/test/YAMLParser/spec-09-11.data b/test/YAMLParser/spec-09-11.data
new file mode 100644
index 0000000..3f487ad
--- /dev/null
+++ b/test/YAMLParser/spec-09-11.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- 'first
+  	'
+- 'first
+
+  	last'
diff --git a/test/YAMLParser/spec-09-12.data b/test/YAMLParser/spec-09-12.data
new file mode 100644
index 0000000..d992c58
--- /dev/null
+++ b/test/YAMLParser/spec-09-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+# Outside flow collection:
+- ::std::vector
+- Up, up, and away!
+- -123
+# Inside flow collection:
+- [ '::std::vector',
+  "Up, up, and away!",
+  -123 ]
diff --git a/test/YAMLParser/spec-09-13.data b/test/YAMLParser/spec-09-13.data
new file mode 100644
index 0000000..d48f2d2
--- /dev/null
+++ b/test/YAMLParser/spec-09-13.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+simple key : {
+  also simple : value,
+  ? not a
+  simple key : any
+  value
+}
diff --git a/test/YAMLParser/spec-09-14.data b/test/YAMLParser/spec-09-14.data
new file mode 100644
index 0000000..890f6bf
--- /dev/null
+++ b/test/YAMLParser/spec-09-14.data
@@ -0,0 +1,21 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# Not quite sure why this doesn't fail.
+# XFAIL: *
+
+---
+--- ||| : foo
+... >>>: bar
+---
+[
+---
+,
+... ,
+{
+--- :
+... # Nested
+}
+]
+...
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-09-15.data b/test/YAMLParser/spec-09-15.data
new file mode 100644
index 0000000..4111d1b
--- /dev/null
+++ b/test/YAMLParser/spec-09-15.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+"---" : foo
+...: bar
+---
+[
+---,
+...,
+{
+? ---
+: ...
+}
+]
+...
diff --git a/test/YAMLParser/spec-09-16.data b/test/YAMLParser/spec-09-16.data
new file mode 100644
index 0000000..e595f47
--- /dev/null
+++ b/test/YAMLParser/spec-09-16.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Tabs are confusing:
+# as space/trimmed/specific/none
+ as space  trimmed  specific  none
diff --git a/test/YAMLParser/spec-09-17.data b/test/YAMLParser/spec-09-17.data
new file mode 100644
index 0000000..1bacf4d
--- /dev/null
+++ b/test/YAMLParser/spec-09-17.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+ first line 
+   
+  more line
diff --git a/test/YAMLParser/spec-09-18.data b/test/YAMLParser/spec-09-18.data
new file mode 100644
index 0000000..ac623f9
--- /dev/null
+++ b/test/YAMLParser/spec-09-18.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+- | # Just the style
+ literal
+- >1 # Indentation indicator
+  folded
+- |+ # Chomping indicator
+ keep
+
+- >-1 # Both indicators
+  strip
diff --git a/test/YAMLParser/spec-09-19.data b/test/YAMLParser/spec-09-19.data
new file mode 100644
index 0000000..52aa157
--- /dev/null
+++ b/test/YAMLParser/spec-09-19.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+- |
+ literal
+- >
+ folded
diff --git a/test/YAMLParser/spec-09-20.data b/test/YAMLParser/spec-09-20.data
new file mode 100644
index 0000000..86fc7ab
--- /dev/null
+++ b/test/YAMLParser/spec-09-20.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+- |
+ detected
+- >
+ 
+  
+  # detected
+- |1
+  explicit
+- >
+ 	
+ detected
diff --git a/test/YAMLParser/spec-09-21.data b/test/YAMLParser/spec-09-21.data
new file mode 100644
index 0000000..2bcc283
--- /dev/null
+++ b/test/YAMLParser/spec-09-21.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+- |
+
+ text
+- >
+  text
+ text
+- |1
+ text
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-09-22.data b/test/YAMLParser/spec-09-22.data
new file mode 100644
index 0000000..b95faa5
--- /dev/null
+++ b/test/YAMLParser/spec-09-22.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+strip: |-
+  text clip: |
+  textkeep: |+
+  text 
diff --git a/test/YAMLParser/spec-09-23.data b/test/YAMLParser/spec-09-23.data
new file mode 100644
index 0000000..94f8398
--- /dev/null
+++ b/test/YAMLParser/spec-09-23.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+ # Strip
+  # Comments:
+strip: |-
+  # text     # Clip
+  # comments:
+clip: |
+  # text   # Keep
+  # comments:
+keep: |+
+  # text  # Trail
+  # comments.
diff --git a/test/YAMLParser/spec-09-24.data b/test/YAMLParser/spec-09-24.data
new file mode 100644
index 0000000..f08eae6
--- /dev/null
+++ b/test/YAMLParser/spec-09-24.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+strip: >-
+
+clip: >
+
+keep: |+
+
diff --git a/test/YAMLParser/spec-09-25.data b/test/YAMLParser/spec-09-25.data
new file mode 100644
index 0000000..b15edb5
--- /dev/null
+++ b/test/YAMLParser/spec-09-25.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+| # Simple block scalar
+ literal
+ 	text
diff --git a/test/YAMLParser/spec-09-26.data b/test/YAMLParser/spec-09-26.data
new file mode 100644
index 0000000..286740e
--- /dev/null
+++ b/test/YAMLParser/spec-09-26.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+ 
+  
+  literal
+ 
+  text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-27.data b/test/YAMLParser/spec-09-27.data
new file mode 100644
index 0000000..286740e
--- /dev/null
+++ b/test/YAMLParser/spec-09-27.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+ 
+  
+  literal
+ 
+  text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-28.data b/test/YAMLParser/spec-09-28.data
new file mode 100644
index 0000000..286740e
--- /dev/null
+++ b/test/YAMLParser/spec-09-28.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+ 
+  
+  literal
+ 
+  text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-29.data b/test/YAMLParser/spec-09-29.data
new file mode 100644
index 0000000..e8906ff
--- /dev/null
+++ b/test/YAMLParser/spec-09-29.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+> # Simple folded scalar
+ folded
+ text
+ 	lines
diff --git a/test/YAMLParser/spec-09-30.data b/test/YAMLParser/spec-09-30.data
new file mode 100644
index 0000000..a2d8bf4
--- /dev/null
+++ b/test/YAMLParser/spec-09-30.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-31.data b/test/YAMLParser/spec-09-31.data
new file mode 100644
index 0000000..a2d8bf4
--- /dev/null
+++ b/test/YAMLParser/spec-09-31.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-32.data b/test/YAMLParser/spec-09-32.data
new file mode 100644
index 0000000..a2d8bf4
--- /dev/null
+++ b/test/YAMLParser/spec-09-32.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-33.data b/test/YAMLParser/spec-09-33.data
new file mode 100644
index 0000000..a2d8bf4
--- /dev/null
+++ b/test/YAMLParser/spec-09-33.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-10-01.data b/test/YAMLParser/spec-10-01.data
new file mode 100644
index 0000000..549a54d
--- /dev/null
+++ b/test/YAMLParser/spec-10-01.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- [ inner, inner, ]
+- [inner,last]
diff --git a/test/YAMLParser/spec-10-02.data b/test/YAMLParser/spec-10-02.data
new file mode 100644
index 0000000..662427a
--- /dev/null
+++ b/test/YAMLParser/spec-10-02.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+[
+"double
+ quoted", 'single
+           quoted',
+plain
+ text, [ nested ],
+single: pair ,
+]
diff --git a/test/YAMLParser/spec-10-03.data b/test/YAMLParser/spec-10-03.data
new file mode 100644
index 0000000..43f300e
--- /dev/null
+++ b/test/YAMLParser/spec-10-03.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+block: # Block
+       # sequence
+- one
+- two : three
diff --git a/test/YAMLParser/spec-10-04.data b/test/YAMLParser/spec-10-04.data
new file mode 100644
index 0000000..733a570
--- /dev/null
+++ b/test/YAMLParser/spec-10-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+block:
+- one
+-
+ - two
diff --git a/test/YAMLParser/spec-10-05.data b/test/YAMLParser/spec-10-05.data
new file mode 100644
index 0000000..3848b2a
--- /dev/null
+++ b/test/YAMLParser/spec-10-05.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+- # Empty
+- |
+ block node
+- - one # in-line
+  - two # sequence
+- one: two # in-line
+           # mapping
diff --git a/test/YAMLParser/spec-10-06.data b/test/YAMLParser/spec-10-06.data
new file mode 100644
index 0000000..40efb2b
--- /dev/null
+++ b/test/YAMLParser/spec-10-06.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- { inner : entry , also: inner , }
+- {inner: entry,last : entry}
diff --git a/test/YAMLParser/spec-10-07.data b/test/YAMLParser/spec-10-07.data
new file mode 100644
index 0000000..7aa350e
--- /dev/null
+++ b/test/YAMLParser/spec-10-07.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+{
+? : value, # Empty key
+? explicit
+ key: value,
+simple key : value,
+[ collection, simple, key ]: value
+}
diff --git a/test/YAMLParser/spec-10-08.data b/test/YAMLParser/spec-10-08.data
new file mode 100644
index 0000000..5b981e9
--- /dev/null
+++ b/test/YAMLParser/spec-10-08.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# This fails because even without a key token, some contexts (in this case flow
+# maps) allow implicit null keys, which mix with this in weird ways.
+# XFAIL: *
+
+{
+multi-line
+ simple key : value,
+very long ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................(>1KB)................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... key: value
+}
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-10-09.data b/test/YAMLParser/spec-10-09.data
new file mode 100644
index 0000000..a6b1fd0
--- /dev/null
+++ b/test/YAMLParser/spec-10-09.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+{
+key : value,
+empty: # empty value↓
+}
diff --git a/test/YAMLParser/spec-10-10.data b/test/YAMLParser/spec-10-10.data
new file mode 100644
index 0000000..c97901d
--- /dev/null
+++ b/test/YAMLParser/spec-10-10.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+{
+? explicit key1 : explicit value,
+? explicit key2 : , # Explicit empty
+? explicit key3,     # Empty value
+simple key1 : explicit value,
+simple key2 : ,     # Explicit empty
+simple key3,         # Empty value
+}
diff --git a/test/YAMLParser/spec-10-11.data b/test/YAMLParser/spec-10-11.data
new file mode 100644
index 0000000..51bd06f0
--- /dev/null
+++ b/test/YAMLParser/spec-10-11.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+[
+? explicit key1 : explicit value,
+? explicit key2 : , # Explicit empty
+? explicit key3,     # Implicit empty
+simple key1 : explicit value,
+simple key2 : ,     # Explicit empty
+]
diff --git a/test/YAMLParser/spec-10-12.data b/test/YAMLParser/spec-10-12.data
new file mode 100644
index 0000000..65a90b3
--- /dev/null
+++ b/test/YAMLParser/spec-10-12.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+block: # Block
+    # mapping
+ key: value
diff --git a/test/YAMLParser/spec-10-13.data b/test/YAMLParser/spec-10-13.data
new file mode 100644
index 0000000..ccadeb1
--- /dev/null
+++ b/test/YAMLParser/spec-10-13.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+? explicit key # implicit value
+? |
+  block key
+: - one # explicit in-line
+  - two # block value
diff --git a/test/YAMLParser/spec-10-14.data b/test/YAMLParser/spec-10-14.data
new file mode 100644
index 0000000..866ec1f
--- /dev/null
+++ b/test/YAMLParser/spec-10-14.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+plain key: # empty value
+"quoted key":
+- one # explicit next-line
+- two # block value
diff --git a/test/YAMLParser/spec-10-15.data b/test/YAMLParser/spec-10-15.data
new file mode 100644
index 0000000..7d061bd
--- /dev/null
+++ b/test/YAMLParser/spec-10-15.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- sun: yellow
+- ? earth: blue
+  : moon: white
diff --git a/test/YAMLParser/str.data b/test/YAMLParser/str.data
new file mode 100644
index 0000000..bf013b6
--- /dev/null
+++ b/test/YAMLParser/str.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- abcd
diff --git a/test/YAMLParser/timestamp-bugs.data b/test/YAMLParser/timestamp-bugs.data
new file mode 100644
index 0000000..bf41a21
--- /dev/null
+++ b/test/YAMLParser/timestamp-bugs.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 2001-12-14 21:59:43.10 -5:30
+- 2001-12-14 21:59:43.10 +5:30
+- 2001-12-14 21:59:43.00101
+- 2001-12-14 21:59:43+1
+- 2001-12-14 21:59:43-1:30
+- 2005-07-08 17:35:04.517600
diff --git a/test/YAMLParser/timestamp.data b/test/YAMLParser/timestamp.data
new file mode 100644
index 0000000..7994545
--- /dev/null
+++ b/test/YAMLParser/timestamp.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- 2001-12-15T02:59:43.1Z
+- 2001-12-14t21:59:43.10-05:00
+- 2001-12-14 21:59:43.10 -5
+- 2001-12-15 2:59:43.10
+- 2002-12-14
diff --git a/test/YAMLParser/utf8-implicit.data b/test/YAMLParser/utf8-implicit.data
new file mode 100644
index 0000000..ee2791f
--- /dev/null
+++ b/test/YAMLParser/utf8-implicit.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- implicit UTF-8
diff --git a/test/YAMLParser/utf8.data b/test/YAMLParser/utf8.data
new file mode 100644
index 0000000..3935e9d
--- /dev/null
+++ b/test/YAMLParser/utf8.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- UTF-8
diff --git a/test/YAMLParser/value.data b/test/YAMLParser/value.data
new file mode 100644
index 0000000..311ccd4
--- /dev/null
+++ b/test/YAMLParser/value.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- =
diff --git a/test/YAMLParser/yaml.data b/test/YAMLParser/yaml.data
new file mode 100644
index 0000000..3ce5e4b
--- /dev/null
+++ b/test/YAMLParser/yaml.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- !!yaml '!'
+- !!yaml '&'
+- !!yaml '*'
diff --git a/test/lib/llvm.exp b/test/lib/llvm.exp
deleted file mode 100644
index 89be85c..0000000
--- a/test/lib/llvm.exp
+++ /dev/null
@@ -1,285 +0,0 @@
-# This procedure executes one line of a test case's execution script.
-proc execOneLine { test PRS outcome lineno line } {
-  set status 0
-  set resultmsg ""
-  set retval [ catch { eval exec -keepnewline -- $line } errmsg ]
-  if { $retval != 0 } {
-    set code [lindex $::errorCode 0]
-    set lineno [expr $lineno + 1]
-    if { $PRS != ""} {
-      set PRS " for $PRS"
-    }
-    set errmsg " at line $lineno\nwhile running: $line\n$errmsg"
-    switch "$code" {
-      CHILDSTATUS {
-        set status [lindex $::errorCode 2]
-        if { $status != 0 } {
-          set resultmsg "$test$PRS\nFailed with exit($status)$errmsg"
-        }
-      }
-      CHILDKILLED {
-        set signal [lindex $::errorCode 2]
-        set resultmsg "$test$PRS\nFailed with signal($signal)$errmsg"
-      }
-      CHILDSUSP {
-        set signal [lindex $::errorCode 2]
-        set resultmsg "$test$PRS\nFailed with suspend($signal)$errmsg"
-      }
-      POSIX {
-        set posixNum [lindex $::errorCode 1]
-        set posixMsg [lindex $::errorCode 2]
-        set resultmsg "$test$PRS\nFailed with posix($posixNum,$posixMsg)$errmsg"
-      }
-      NONE {
-        # Any other error such as stderr output of a program, or syntax error in
-        # the RUN line.
-        set resultmsg "$test$PRS\nFailed with unknown error (or has stderr output)$errmsg"
-      }
-      default {
-        set resultmsg "$test$PRS\nFailed with unknown error$errmsg"
-      }
-    }
-  }
-  return $resultmsg
-}
-
-# This procedure performs variable substitutions on the RUN: lines of a test
-# cases.
-proc substitute { line test tmpFile } {
-  global srcroot objroot srcdir objdir subdir target_triplet
-  global llvmgcc llvmgxx emitir ocamlopt
-  global gccpath gxxpath compile_c compile_cxx link shlibext llvmlibsdir
-  global llvmdsymutil valgrind grep gas bugpoint_topts
-  set path [file join $srcdir $subdir]
-
-  # Substitute all Tcl variables.
-  set new_line [subst $line ]
-
-  #replace %% with _#MARKER#_ to make the replacement of %% more predictable
-  regsub -all {%%} $new_line {_#MARKER#_} new_line
-  #replace %llvmgcc_only with actual path to llvmgcc
-  regsub -all {%llvmgcc_only} $new_line "$llvmgcc" new_line
-  #replace %llvmgcc with actual path to llvmgcc
-  regsub -all {%llvmgcc} $new_line "$llvmgcc $emitir -w" new_line
-  #replace %llvmgxx with actual path to llvmg++
-  regsub -all {%llvmgxx} $new_line "$llvmgxx $emitir -w" new_line
-  #replace %compile_cxx with C++ compilation command
-  regsub -all {%compile_cxx} $new_line "$compile_cxx" new_line
-  #replace %compile_c with C compilation command
-  regsub -all {%compile_c} $new_line "$compile_c" new_line
-  #replace %link with C++ link command
-  regsub -all {%link} $new_line "$link" new_line
-  #replace %shlibext with shared library extension
-  regsub -all {%shlibext} $new_line "$shlibext" new_line
-  #replace %ocamlopt with ocaml compiler command
-  regsub -all {%ocamlopt} $new_line "$ocamlopt" new_line
-  #replace %llvmdsymutil with dsymutil command
-  regsub -all {%llvmdsymutil} $new_line "$llvmdsymutil" new_line
-  #replace %llvmlibsdir with configure library directory
-  regsub -all {%llvmlibsdir} $new_line "$llvmlibsdir" new_line
-  #replace %bugpoint_topts with actual bugpoint target options
-  regsub -all {%bugpoint_topts} $new_line "$bugpoint_topts" new_line
-  #replace %p with path to source,
-  regsub -all {%p} $new_line [file join $srcdir $subdir] new_line
-  #replace %s with filename
-  regsub -all {%s} $new_line $test new_line
-  #replace %t with temp filenames
-  regsub -all {%t} $new_line $tmpFile new_line
-  #replace %abs_tmp with absolute temp filenames
-  regsub -all {%abs_tmp} $new_line [file join [pwd] $tmpFile] new_line
-  #replace _#MARKER#_ with %
-  regsub -all {_#MARKER#_} $new_line % new_line
-
-  #replace grep with GNU grep
-  regsub -all { grep } $new_line " $grep " new_line
-  #replace as with GNU as
-  regsub -all {\| as } $new_line "| $gas " new_line
-
-  #valgind related stuff
-# regsub -all {bugpoint } $new_line "$valgrind bugpoint " new_line
-  regsub -all {llc } $new_line "$valgrind llc " new_line
-  regsub -all {lli } $new_line "$valgrind lli " new_line
-  regsub -all {llvm-ar } $new_line "$valgrind llvm-ar " new_line
-  regsub -all {llvm-as } $new_line "$valgrind llvm-as " new_line
-  regsub -all {llvm-bcanalyzer } $new_line "$valgrind llvm-bcanalyzer " new_line
-  regsub -all {llvm-dis } $new_line "$valgrind llvm-dis " new_line
-  regsub -all {llvm-extract } $new_line "$valgrind llvm-extract " new_line
-  regsub -all {llvm-ld } $new_line "$valgrind llvm-ld " new_line
-  regsub -all {llvm-link } $new_line "$valgrind llvm-link " new_line
-  regsub -all {llvm-nm } $new_line "$valgrind llvm-nm " new_line
-  regsub -all {llvm-prof } $new_line "$valgrind llvm-prof " new_line
-  regsub -all {llvm-ranlib } $new_line "$valgrind llvm-ranlib " new_line
-  regsub -all {([^a-zA-Z_-])opt } $new_line "\\1$valgrind opt " new_line
-  regsub -all {^opt } $new_line "$valgrind opt " new_line
-  regsub -all {llvm-tblgen } $new_line "$valgrind llvm-tblgen " new_line
-  regsub -all "not $valgrind " $new_line "$valgrind not " new_line
-
-  return $new_line
-}
-
-# This procedure runs the set of tests for the test_source_files array.
-proc RunLLVMTests { test_source_files } {
-  global srcroot objroot srcdir objdir subdir target_triplet
-  set timeout 60
-
-  set path [file join $objdir $subdir]
-
-  #Make Output Directory if it does not exist already
-  if { [file exists path] } {
-    cd $path
-  } else {
-    file mkdir $path
-    cd $path
-  }
-
-  file mkdir Output
-  cd Output
-
-  foreach test $test_source_files {
-    #Should figure out best way to set the timeout
-    #set timeout 40
-
-    set filename [file tail $test]
-    verbose "ABOUT TO RUN: $filename" 2
-    set outcome PASS
-    set tmpFile "$filename.tmp"
-
-    # Mark that it should not be XFAIL for this target.
-    set targetPASS 0
-
-    #set hasRunline bool to check if testcase has a runline
-    set numLines 0
-
-    # Open the test file and start reading lines
-    set testFileId [ open $test r]
-    set runline ""
-    set PRNUMS ""
-    foreach line [split [read $testFileId] \n] {
-
-      # if its the END. line then stop parsing (optimization for big files)
-      if {[regexp {END.[[:space:]]*$} $line match endofscript]} {
-        break
-
-      # if the line is continued, concatenate and continue the loop
-      } elseif {[regexp {RUN: *(.+)(\\)$} $line match oneline suffix]} {
-        set runline "$runline$oneline "
-
-      # if its a terminating RUN: line then do substitution on the whole line
-      # and then save the line.
-      } elseif {[regexp {RUN: *(.+)$} $line match oneline suffix]} {
-        set runline "$runline$oneline"
-        set runline [ substitute $runline $test $tmpFile ]
-        set lines($numLines) $runline
-        set numLines [expr $numLines + 1]
-        set runline ""
-
-      # if its an PR line, save the problem report number
-      } elseif {[regexp {PR([0-9]+)} $line match prnum]} {
-        if {$PRNUMS == ""} {
-          set PRNUMS "PR$prnum"
-        } else {
-          set PRNUMS "$PRNUMS,$prnum"
-        }
-      # if its an XFAIL line, see if we should be XFAILing or not.
-      } elseif {[regexp {XFAIL:[ *](.+)} $line match targets]} {
-        set targets
-
-        #split up target if more then 1 specified
-        foreach target [split $targets ,] {
-          if { $target == "*" } {
-              if {$targetPASS != 1} {
-                 set outcome XFAIL
-              }
-          } elseif { [regexp $target $target_triplet match] } {
-              if {$targetPASS != 1} {
-                 set outcome XFAIL
-              }
-          }
-        }
-      } elseif {[regexp {XTARGET:[ *](.+)} $line match targets]} {
-        set targets
-
-        #split up target if more then 1 specified
-        foreach target [split $targets ,] {
-          if { [regexp {\*} $target match] } {
-              set targetPASS 1
-              set outcome PASS
-          } elseif { [regexp $target $target_triplet match] } {
-              set targetPASS 1
-              set outcome PASS
-          }
-        }
-      }
-    }
-
-    # Done reading the script
-    close $testFileId
-
-
-    if { $numLines == 0 } {
-      fail "$test: \nDoes not have a RUN line\n"
-    } else {
-      set failed 0
-      for { set i 0 } { $i < $numLines } { set i [ expr $i + 1 ] } {
-        regsub ^.*RUN:(.*) $lines($i) \1 theLine
-        set resultmsg [execOneLine $test $PRNUMS $outcome $i $theLine ]
-        if { $resultmsg != "" } {
-          if { $outcome == "XFAIL" } {
-            xfail "$resultmsg"
-          } else {
-            fail "$resultmsg"
-          }
-          set failed 1
-          break
-        }
-      }
-      if { $failed } {
-        continue
-      } else {
-        if { $PRNUMS != "" } {
-          set PRNUMS " for $PRNUMS"
-        }
-        if { $outcome == "XFAIL" } {
-          xpass "$test$PRNUMS"
-        } else {
-          pass "$test$PRNUMS"
-        }
-      }
-    }
-  }
-}
-
-# This procedure provides an interface to check the TARGETS_TO_BUILD makefile
-# variable to see if a particular target has been configured to build. This
-# helps avoid running tests for targets that aren't available.
-proc llvm_supports_target { tgtName } {
-  global TARGETS_TO_BUILD
-  foreach target [split $TARGETS_TO_BUILD] {
-    if { [regexp $tgtName $target match] } {
-      return 1
-    }
-  }
-  return 0
-}
-
-proc llvm_supports_darwin_and_target { tgtName } {
-  global target_triplet
-  if { [ llvm_supports_target $tgtName ] } {
-    if { [regexp darwin $target_triplet match] } {
-      return 1
-    }
-  }
-  return 0
-}
-
-# This procedure provides an interface to check the BINDINGS_TO_BUILD makefile
-# variable to see if a particular binding has been configured to build.
-proc llvm_supports_binding { name } {
-  global llvm_bindings
-  foreach item [split $llvm_bindings] {
-    if { [regexp $name $item match] } {
-      return 1
-    }
-  }
-  return 0
-}
diff --git a/test/lib/llvm2cpp.exp b/test/lib/llvm2cpp.exp
deleted file mode 100644
index f453033..0000000
--- a/test/lib/llvm2cpp.exp
+++ /dev/null
@@ -1,100 +0,0 @@
-# This file defines a tcl proc to assist with testing the llvm2cpp. There are
-# no llvm2cpp specific test cases. Instead, it utilizes all the existing test
-# cases and makes sure llvm2cpp can run them. The basic idea is that we find
-# all the LLVM Assembly (*.ll) files, run llvm2cpp on them to generate a C++
-# program, compile those programs, run them and see if what they produce matches
-# the original input to llvm2cpp.
-
-proc llvm2cpp-test { files } {
-  global subdir llvmtoolsdir llvmlibsdir objdir srcdir objroot srcroot 
-  set timeout 30
-  set path [file join $objdir $subdir]
-  set llc [file join $llvmtoolsdir llc ]
-  set llvmas [file join $llvmtoolsdir llvm-as ]
-  set llvmdis [file join $llvmtoolsdir llvm-dis ]
-
-  #Make Output Directory if it does not exist already
-  if { [file exists path] } {
-      cd $path
-  } else {
-      file mkdir $path
-      cd $path
-  }
-  
-  file mkdir Output
-
-  foreach test $files {
-      
-    set filename [file tail $test]
-    set generated [file join Output $filename.cpp]
-    set executable [file join Output $filename.exe]
-    set output [file join Output $filename.gen]
-    set assembly [file join Output $filename.asm]
-    set testname [file rootname $filename]
-    set bytecode [file join Output $filename.bc]
-
-    # Note that the stderr for llvm-as, etc. must be redirected to /dev/null 
-    # because otherwise exec will see the msgs and return 1 even though they 
-    # are only warnings. If real errors are generated on stderr then llvm-as 
-    # will return a non-zero retval anyway so we're good.
-
-    # Scan the test file to see if there's an XFAIL file. If so, don't run it
-    set retval [ catch { 
-      exec -keepnewline grep XFAIL $test 2>/dev/null } msg ]
-    if { $retval == 0 } {
-      continue;
-    }
-
-    # Run llvm-as/llvm-dis
-    set pipeline llvm-as|llvm-dis
-    set retval [ catch { 
-      exec -keepnewline $llvmas < $test -o - | $llvmdis -o $assembly 2>/dev/null } msg ]
-
-    if { $retval != 0 } {
-      fail "$test: $pipeline returned $retval\n$msg"
-      continue 
-    }
-
-    # Build bytecode for llvm2cpp input
-    set retval [ catch { 
-      exec -keepnewline $llvmas < $assembly > $bytecode 2>/dev/null } msg ]
-
-    if { $retval != 0 } {
-      fail "$test: llvm-as returned $retval\n$msg"
-      continue 
-    }
-
-    set retval [ catch { 
-      exec -keepnewline $llc -march=cpp -o $generated < $bytecode 2>/dev/null } msg]
-
-    if { $retval != 0 } {
-      fail "$test: llvm2cpp returned $retval\n$msg"
-      continue
-    }
-
-    set retval [ catch { 
-      exec -keepnewline gcc -g -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -o $executable $generated -I$srcroot/include -I$objroot/include -L$llvmlibsdir -lLLVMCore -lLLVMSupport -lLLVMSystem -lstdc++ } msg ] 
-    if { $retval != 0 } {
-      fail "$test: gcc returned $retval\n$msg"
-      continue
-    }
-
-    set retval [ catch { exec -keepnewline $executable > $output } msg ]
-    if { $retval != 0 } {
-      set execname [file tail $executable]
-      fail "$test: $execname returned $retval:\n$msg"
-      continue
-    } 
-
-    set retval [ catch { 
-      exec -keepnewline diff $assembly $output } msg ]
-
-    if { $retval != 0 } {
-      fail "$test: diff returned $retval:\n$msg"
-      continue
-    }
-    pass "$test"
-  }
-}
-
-
diff --git a/test/lit.cfg b/test/lit.cfg
index 91abb63..c589359 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -42,18 +42,6 @@ if llvm_obj_root is not None:
 # Tweak the PATH to include the scripts dir, the tools dir, and the llvm-gcc bin
 # dir (if available).
 if llvm_obj_root is not None:
-    # Include llvm-gcc first, as the llvm-gcc binaryies will not appear
-    # neither in the tools nor in the scripts dir. However it might be
-    # possible, that some old llvm tools are in the llvm-gcc dir. Adding
-    # llvm-gcc dir first ensures, that those will always be overwritten
-    # by the new tools in llvm_tools_dir. So now outdated tools are used
-      # for testing
-    llvmgcc_dir = getattr(config, 'llvmgcc_dir', None)
-    if llvmgcc_dir:
-        path = os.path.pathsep.join((os.path.join(llvmgcc_dir, 'bin'),
-                                     config.environment['PATH']))
-        config.environment['PATH'] = path
-
     llvm_src_root = getattr(config, 'llvm_src_root', None)
     if not llvm_src_root:
         lit.fatal('No LLVM source root set!')
@@ -153,22 +141,32 @@ for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
     if m:
         site_exp[m.group(1)] = m.group(2)
 
+# Provide target_triple for use in XFAIL and XTARGET.
+config.target_triple = site_exp['target_triplet']
+
+# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
+# triple so we can check it with XFAIL and XTARGET.
+config.target_triple += lit.valgrindTriple
+
+# Process jit implementation option
+jit_impl_cfg = lit.params.get('jit_impl', None)
+if jit_impl_cfg == 'mcjit':
+  # When running with mcjit, mangle -mcjit into target triple
+  # and add -use-mcjit flag to lli invocation
+  if 'i686' in config.target_triple:
+    config.target_triple += jit_impl_cfg + '-ia32'
+  elif 'x86_64' in config.target_triple:
+    config.target_triple += jit_impl_cfg + '-ia64'
+  else:
+    config.target_triple += jit_impl_cfg
+
+  config.substitutions.append( ('%lli', 'lli -use-mcjit') )
+else:
+  config.substitutions.append( ('%lli', 'lli') )
+
 # Add substitutions.
-config.substitutions.append(('%llvmgcc_only', site_exp['llvmgcc']))
-for sub in ['llvmgcc', 'llvmgxx', 'emitir', 'compile_cxx', 'compile_c',
-            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
-            'llvmshlibdir',
-            'bugpoint_topts']:
-    if sub in ('llvmgcc', 'llvmgxx'):
-        config.substitutions.append(('%' + sub,
-                                     site_exp[sub] + ' %emitir -w'))
-    # FIXME: This is a hack to avoid LLVMC tests failing due to a clang driver
-    #        warning when passing in "-fexceptions -fno-exceptions".
-    elif sub == 'compile_cxx':
-        config.substitutions.append(('%' + sub,
-                                  site_exp[sub].replace('-fno-exceptions', '')))
-    else:
-        config.substitutions.append(('%' + sub, site_exp[sub]))
+for sub in ['link', 'shlibext', 'ocamlopt', 'llvmshlibdir']:
+    config.substitutions.append(('%' + sub, site_exp[sub]))
 
 # For each occurrence of an llvm tool name as its own word, replace it
 # with the full path to the build directory holding that tool.  This
@@ -187,12 +185,14 @@ for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
                 r"\bllc\b",             r"\blli\b",
                 r"\bllvm-ar\b",         r"\bllvm-as\b",
                 r"\bllvm-bcanalyzer\b", r"\bllvm-config\b",
-                r"\bllvm-diff\b",       r"\bllvm-dis\b",
+                r"\bllvm-cov\b",        r"\bllvm-diff\b",
+                r"\bllvm-dis\b",        r"\bllvm-dwarfdump\b",
                 r"\bllvm-extract\b",    r"\bllvm-ld\b",
                 r"\bllvm-link\b",       r"\bllvm-mc\b",
-                r"\bllvm-nm\b",         r"\bllvm-prof\b",
-                r"\bllvm-ranlib\b",     r"\bllvm-shlib\b",
-                r"\bllvm-stub\b",       r"\bllvm2cpp\b",
+                r"\bllvm-nm\b",         r"\bllvm-objdump\b",
+                r"\bllvm-prof\b",       r"\bllvm-ranlib\b",
+                r"\bllvm-rtdyld\b",     r"\bllvm-shlib\b",
+                r"\bllvm-size\b",       r"\bllvm-stub\b",
                 # Don't match '-llvmc'.
                 r"(?<!-)\bllvmc\b",     r"\blto\b",
                                         # Don't match '.opt', '-opt',
@@ -218,78 +218,6 @@ for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
              break
     config.substitutions.append((pattern, substitution))
 
-excludes = []
-
-# Provide target_triple for use in XFAIL and XTARGET.
-config.target_triple = site_exp['target_triplet']
-
-# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
-# triple so we can check it with XFAIL and XTARGET.
-config.target_triple += lit.valgrindTriple
-
-# Provide llvm_supports_target for use in local configs.
-targets = set(site_exp["TARGETS_TO_BUILD"].split())
-def llvm_supports_target(name):
-    return name in targets
-
-def llvm_supports_darwin_and_target(name):
-    return 'darwin' in config.target_triple and llvm_supports_target(name)
-
-bindings = set([s.strip() for s in site_exp['llvm_bindings'].split(',')])
-def llvm_supports_binding(name):
-    return name.strip() in bindings
-
-# Provide on_clone hook for reading 'dg.exp'.
-import os
-simpleLibData = re.compile(r"""load_lib llvm.exp
-
-RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
-                           re.MULTILINE)
-conditionalLibData = re.compile(r"""load_lib llvm.exp
-
-if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
- *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
-\}""", re.MULTILINE)
-def on_clone(parent, cfg, for_path):
-    def addSuffixes(match):
-        if match[0] == '{' and match[-1] == '}':
-            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
-        else:
-            cfg.suffixes = ['.' + match]
-
-    libPath = os.path.join(os.path.dirname(for_path),
-                           'dg.exp')
-    if not os.path.exists(libPath):
-        cfg.unsupported = True
-        return
-
-    # Reset unsupported, in case we inherited it.
-    cfg.unsupported = False
-    lib = open(libPath).read().strip()
-
-    # Check for a simple library.
-    m = simpleLibData.match(lib)
-    if m:
-        addSuffixes(m.group(1))
-        return
-
-    # Check for a conditional test set.
-    m = conditionalLibData.match(lib)
-    if m:
-        funcname,arg,match = m.groups()
-        addSuffixes(match)
-
-        func = globals().get(funcname)
-        if not func:
-            lit.error('unsupported predicate %r' % funcname)
-        elif not func(arg):
-            cfg.unsupported = True
-        return
-    # Otherwise, give up.
-    lit.error('unable to understand %r:\n%s' % (libPath, lib))
-
-config.on_clone = on_clone
-
 ### Features
 
 # Shell execution
@@ -306,5 +234,10 @@ else:
 if loadable_module:
     config.available_features.add('loadable_module')
 
-if config.enable_assertions:
+# llc knows whether he is compiled with -DNDEBUG.
+import subprocess
+llc_cmd = subprocess.Popen([os.path.join(llvm_tools_dir, 'llc'), '-version'],
+                           stdout = subprocess.PIPE)
+if re.search(r'with assertions', llc_cmd.stdout.read()):
     config.available_features.add('asserts')
+llc_cmd.wait()
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index fe152ef..8b81186 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -3,11 +3,12 @@
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
-config.llvmgcc_dir = "@LLVMGCCDIR@"
 config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
 config.python_executable = "@PYTHON_EXECUTABLE@"
 config.enable_shared = @ENABLE_SHARED@
 config.enable_assertions = @ENABLE_ASSERTIONS@
+config.targets_to_build = "@TARGETS_TO_BUILD@"
+config.llvm_bindings = "@LLVM_BINDINGS@"
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/test/site.exp.in b/test/site.exp.in
index 277d549..cfb2eac 100644
--- a/test/site.exp.in
+++ b/test/site.exp.in
@@ -2,27 +2,15 @@
 # Do not edit!
 set target_triplet "@TARGET_TRIPLE@"
 set TARGETS_TO_BUILD "@TARGETS_TO_BUILD@"
-set llvmgcc_langs "@LLVMGCC_LANGS@"
-set llvmtoolsdir "@LLVM_TOOLS_DIR@"
-set llvmlibsdir "@LLVM_LIBS_DIR@"
 set llvmshlibdir "@SHLIBDIR@"
 set llvm_bindings "@LLVM_BINDINGS@"
 set srcroot "@LLVM_SOURCE_DIR@"
 set objroot "@LLVM_BINARY_DIR@"
 set srcdir "@LLVM_SOURCE_DIR@"
 set objdir "@LLVM_BINARY_DIR@"
-set gccpath "@GCCPATH@"
-set gxxpath "@GXXPATH@"
-set compile_c "@TEST_COMPILE_C_CMD@"
-set compile_cxx "@TEST_COMPILE_CXX_CMD@"
 set link "@TEST_LINK_CMD@"
-set llvmgcc "@LLVMGCC@"
-set llvmgxx "@LLVMGXX@"
-set bugpoint_topts "@BUGPOINT_TOPTS@"
 set shlibext "@SHLIBEXT@"
 set ocamlopt "@OCAMLOPT@"
 set valgrind "@VALGRIND@"
 set grep "@GREP@"
 set gas "@AS@"
-set llvmdsymutil "@DSYMUTIL@"
-set emitir "@LLVMCC_EMITIR_FLAG@"