52 files changed, 1571 insertions, 47 deletions
diff --git a/test/Analysis/BasicAA/noalias-bugs.ll b/test/Analysis/BasicAA/noalias-bugs.ll
new file mode 100644
index 0000000..c02a302
--- /dev/null
+++ b/test/Analysis/BasicAA/noalias-bugs.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; We incorrectly returned noalias in the example below for "ptr.64" and
+; "either_ptr.64".
+; PR18460
+
+%nested = type { %nested.i64 }
+%nested.i64 = type { i64 }
+
+define i64 @testcase(%nested * noalias %p1, %nested * noalias %p2,
+                     i32 %a, i32 %b) {
+  %ptr = getelementptr inbounds %nested* %p1, i64 -1, i32 0
+  %ptr.64 = getelementptr inbounds %nested.i64* %ptr, i64 0, i32 0
+  %ptr2= getelementptr inbounds %nested* %p2, i64 0, i32 0
+  %cmp = icmp ult i32 %a, %b
+  %either_ptr = select i1 %cmp, %nested.i64* %ptr2, %nested.i64* %ptr
+  %either_ptr.64 = getelementptr inbounds %nested.i64* %either_ptr, i64 0, i32 0
+
+; Because either_ptr.64 and ptr.64 can alias (we used to return noalias)
+; elimination of the first store is not valid.
+
+; CHECK: store i64 2
+; CHECK: load
+; CHECK; store i64 1
+
+  store i64 2, i64* %ptr.64, align 8
+  %r = load i64* %either_ptr.64, align 8
+  store i64 1, i64* %ptr.64, align 8
+  ret i64 %r
+}
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
index 6aa26c1..74279e1 100644
--- a/test/Analysis/BasicAA/phi-aa.ll
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -1,10 +1,14 @@
 ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
 ; rdar://7282591
 
 @X = common global i32 0
 @Y = common global i32 0
 @Z = common global i32 0
 
+; CHECK-LABEL: foo
 ; CHECK:  NoAlias: i32* %P, i32* @Z
 
 define void @foo(i32 %cond) nounwind {
@@ -29,3 +33,46 @@ bb2:
 return:
   ret void
 }
+
+; Pointers can vary in between iterations of loops.
+; PR18068
+
+; CHECK-LABEL: pr18068
+; CHECK: MayAlias: i32* %0, i32* %arrayidx5
+
+define i32 @pr18068(i32* %jj7, i32* %j) {
+entry:
+  %oa5 = alloca [100 x i32], align 16
+  br label %codeRepl
+
+codeRepl:
+  %0 = phi i32* [ %arrayidx13, %for.body ], [ %j, %entry ]
+  %targetBlock = call i1 @cond(i32* %jj7)
+  br i1 %targetBlock, label %for.body, label %bye
+
+for.body:
+  %1 = load i32* %jj7, align 4
+  %idxprom4 = zext i32 %1 to i64
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom4
+  %2 = load i32* %arrayidx5, align 4
+  %sub6 = sub i32 %2, 6
+  store i32 %sub6, i32* %arrayidx5, align 4
+  ; %0 and %arrayidx5 can alias! It is not safe to DSE the above store.
+  %3 = load i32* %0, align 4
+  store i32 %3, i32* %arrayidx5, align 4
+  %sub11 = add i32 %1, -1
+  %idxprom12 = zext i32 %sub11 to i64
+  %arrayidx13 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom12
+  call void @inc(i32* %jj7)
+  br label %codeRepl
+
+bye:
+  %.reload = load i32* %jj7, align 4
+  ret i32 %.reload
+}
+
+declare i1 @cond(i32*)
+
+declare void @inc(i32*)
+
+
diff --git a/test/Analysis/ScalarEvolution/zext-signed-addrec.ll b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
new file mode 100644
index 0000000..27aed3b
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
@@ -0,0 +1,81 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+; PR18000
+
+target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global i32 0, align 4
+@b = common global i32 0, align 4
+@e = common global i8 0, align 1
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: nounwind optsize uwtable
+; CHECK-LABEL: foo
+define i32 @foo() {
+entry:
+  %.pr = load i32* @b, align 4
+  %cmp10 = icmp slt i32 %.pr, 1
+  br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %entry.for.end9_crit_edge
+
+entry.for.end9_crit_edge:                         ; preds = %entry
+  %.pre = load i32* @c, align 4
+  br label %for.end9
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %0 = load i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %return.loopexit.split
+
+for.cond1.preheader.for.cond1.preheader.split_crit_edge: ; preds = %for.cond1.preheader.lr.ph, %for.inc8
+  %1 = phi i32 [ %inc, %for.inc8 ], [ %.pr, %for.cond1.preheader.lr.ph ]
+  br label %if.end
+
+; CHECK-LABEL: if.end
+if.end:                                           ; preds = %if.end, %for.cond1.preheader.for.cond1.preheader.split_crit_edge
+
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %if.end ], [ 258, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ]
+  %indvars.iv = phi i32 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %indvars.iv.next, %if.end ]
+
+  %2 = phi i8 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %dec, %if.end ]
+  %conv7 = mul i32 %indvars.iv, 258
+  %shl = and i32 %conv7, 510
+  store i32 %shl, i32* @c, align 4
+
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -258
+  %dec = add i8 %2, -1
+
+  %cmp2 = icmp sgt i8 %dec, -1
+  %indvars.iv.next = add i32 %indvars.iv, -1
+  br i1 %cmp2, label %if.end, label %for.inc8
+
+for.inc8:                                         ; preds = %if.end
+  store i32 0, i32* @d, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @b, align 4
+  %cmp = icmp slt i32 %1, 0
+  br i1 %cmp, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %for.cond.for.end9_crit_edge
+
+for.cond.for.end9_crit_edge:                      ; preds = %for.inc8
+  store i8 %dec, i8* @e, align 1
+  br label %for.end9
+
+for.end9:                                         ; preds = %entry.for.end9_crit_edge, %for.cond.for.end9_crit_edge
+  %3 = phi i32 [ %.pre, %entry.for.end9_crit_edge ], [ %shl, %for.cond.for.end9_crit_edge ]
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
+  br label %return
+
+return.loopexit.split:                            ; preds = %for.cond1.preheader.lr.ph
+  store i8 1, i8* @e, align 1
+  store i32 0, i32* @d, align 4
+  br label %return
+
+return:                                           ; preds = %return.loopexit.split, %for.end9
+  %retval.0 = phi i32 [ 0, %for.end9 ], [ 1, %return.loopexit.split ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...)
+
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index de84ff4..5857faf 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-REG %s
 
 @var8 = global i8 0
 @var16 = global i16 0
@@ -17,6 +18,8 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -37,6 +40,8 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -57,6 +62,8 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -77,6 +84,8 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-REG: add x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
+; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -97,6 +106,8 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -117,6 +128,8 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -137,6 +150,8 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -157,6 +172,8 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-REG: sub x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -177,6 +194,8 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -197,6 +216,8 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -217,6 +238,8 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -237,6 +260,8 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-REG: and x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
+; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -257,6 +282,8 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -277,6 +304,8 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -297,6 +326,8 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -317,6 +348,8 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-REG: orr x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -337,6 +370,8 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -357,6 +392,8 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -377,6 +414,8 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -397,6 +436,8 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-REG: eor x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
+; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -416,6 +457,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
+; CHECK-REG-NOT: stxrb w0, w0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -435,6 +477,7 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
+; CHECK-REG-NOT: stlxrh w0, w0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -454,6 +497,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
+; CHECK-REG-NOT: stlxr w0, w0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -473,6 +517,7 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
+; CHECK-REG-NOT: stxr w0, x0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -495,6 +540,8 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
+; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -516,6 +563,8 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
+; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -537,6 +586,8 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
+; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -558,6 +609,8 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
+; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, gt
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -579,6 +632,8 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
+; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -600,6 +655,8 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -621,6 +678,8 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -642,6 +701,8 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
+; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lt
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -663,6 +724,8 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -684,6 +747,8 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -705,6 +770,8 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -726,6 +793,8 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
+; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, hi
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -747,6 +816,8 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
+; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -768,6 +839,8 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
+; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -789,6 +862,8 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
+; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -810,6 +885,8 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
+; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lo
+; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -832,6 +909,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
+; CHECK-REG-NOT: stxrb w1, w1, [x{{[0-9]+}}]
 ; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
@@ -854,6 +932,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
+; CHECK-REG-NOT: stlxrh w1, w1, [x{{[0-9]+}}]
 ; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
@@ -876,6 +955,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
+; CHECK-REG-NOT: stlxr w1, w1, [x{{[0-9]+}}]
 ; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
@@ -898,6 +978,7 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-NEXT: cmp x[[OLD]], x0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
+; CHECK-REG-NOT: stxr w1, x1, [x{{[0-9]+}}]
 ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll
index 3ff1c1a..076ae27 100644
--- a/test/CodeGen/AArch64/init-array.ll
+++ b/test/CodeGen/AArch64/init-array.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array < %s | FileCheck %s
 
 define internal void @_GLOBAL__I_a() section ".text.startup" {
   ret void
diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll
index f3d376b..4c219eb 100644
--- a/test/CodeGen/AArch64/variadic.ll
+++ b/test/CodeGen/AArch64/variadic.ll
@@ -179,24 +179,19 @@ define void @test_va_copy() {
 
 ; Check beginning and end again:
 
-; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
 ; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
-; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-
-; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
-
-; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
 ; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
+; CHECK: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list]
+; CHECK: str [[BLOCK2]], [x[[DEST_LIST]], #24]
 
-; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24]
-
-; CHECK-NOFP: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
-
-; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
 ; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
-
-; CHECK-NOFP: str [[BLOCK]], [x[[DEST_LIST]], #24]
+; CHECK-NOFP: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK-NOFP: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK-NOFP: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list]
+; CHECK-NOFP: str [[BLOCK2]], [x[[DEST_LIST]], #24]
 
   ret void
 ; CHECK: ret
diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll
index 019ff61..5e5ca4b 100644
--- a/test/CodeGen/ARM/a15-SD-dep.ll
+++ b/test/CodeGen/ARM/a15-SD-dep.ll
@@ -56,3 +56,62 @@ define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
   %i2 = fadd <4 x float> %i1, %i1
   ret <4 x float> %i2
 }
+
+; Test that DPair can be successfully passed as QPR.
+; CHECK-ENABLED-LABEL: test_DPair1:
+; CHECK-DISABLED-LABEL: test_DPair1:
+define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) {
+entry:
+  %0 = insertelement <4 x float> undef, float %x, i32 1
+  %1 = insertelement <4 x float> %0, float %y, i32 0
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1]
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1]
+  ; CHECK-DISABLED-NOT: vdup
+  switch i32 %vsout, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 0, label %sw.bb6
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 0
+  br label %sw.bb6
+
+sw.bb6:                                           ; preds = %sw.bb, %entry
+  %sum.0 = phi <4 x float> [ %1, %entry ], [ %2, %sw.bb ]
+  %3 = extractelement <4 x float> %sum.0, i32 0
+  %conv = fptoui float %3 to i8
+  store i8 %conv, i8* %out, align 1
+  ret void
+
+sw.epilog:                                        ; preds = %entry
+  ret void
+}
+
+; CHECK-ENABLED-LABEL: test_DPair2:
+; CHECK-DISABLED-LABEL: test_DPair2:
+define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) {
+entry:
+  %0 = insertelement <4 x float> undef, float %x, i32 0
+  ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-DISABLED-NOT: vdup
+  switch i32 %vsout, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 0, label %sw.bb1
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %1 = insertelement <4 x float> %0, float 0.000000e+00, i32 0
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %sum.0 = phi <4 x float> [ %0, %entry ], [ %1, %sw.bb ]
+  %2 = extractelement <4 x float> %sum.0, i32 0
+  %conv = fptoui float %2 to i8
+  store i8 %conv, i8* %out, align 1
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb1
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 400541f..d6eb4c2 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -83,6 +83,19 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
 	ret <1 x i64> %tmp4
 }
 
+define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
+;CHECK-LABEL: vld3i64_update:
+;CHECK: vld1.64	{d16, d17, d18}, [r1:64]!
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+        %tmp5 = getelementptr i64* %A, i32 3
+        store i64* %tmp5, i64** %ptr
+        %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+        ret <1 x i64> %tmp4
+}
+
 define <16 x i8> @vld3Qi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index f7376b5..ff162bb 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -83,6 +83,19 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
 	ret <1 x i64> %tmp4
 }
 
+define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
+;CHECK-LABEL: vld4i64_update:
+;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+        %tmp5 = getelementptr i64* %A, i32 4
+        store i64* %tmp5, i64** %ptr
+        %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+        ret <1 x i64> %tmp4
+}
+
 define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index 91eb7fc..65625de 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -61,6 +61,18 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 	ret void
 }
 
+define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK-LABEL: vst3i64_update
+;CHECK: vst1.64	{d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+        %A = load i64** %ptr
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = load <1 x i64>* %B
+        call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+        %tmp2 = getelementptr i64* %A, i32 3
+        store i64* %tmp2, i64** %ptr
+        ret void
+}
+
 define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index ef5c83a..83a6c70 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -60,6 +60,18 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 	ret void
 }
 
+define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK-LABEL: vst4i64_update:
+;CHECK: vst1.64	{d16, d17, d18, d19}, [r1]!
+        %A = load i64** %ptr
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = load <1 x i64>* %B
+        call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+        %tmp2 = getelementptr i64* %A, i32 4
+        store i64* %tmp2, i64** %ptr
+        ret void
+}
+
 define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll
index 1525e05..ce07d88 100644
--- a/test/CodeGen/PowerPC/anon_aggr.ll
+++ b/test/CodeGen/PowerPC/anon_aggr.ll
@@ -119,9 +119,9 @@ unequal:
 ; CHECK: ld 3, -[[OFFSET1]](1)
 
 ; DARWIN32: _func3:
-; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 40
+; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36
 ; DARWIN32: addi r[[REG2:[0-9]+]], r[[REGSP]], 24
-; DARWIN32: lwz r[[REG3:[0-9]+]], 48(r[[REGSP]])
+; DARWIN32: lwz r[[REG3:[0-9]+]], 44(r[[REGSP]])
 ; DARWIN32: lwz r[[REG4:[0-9]+]], 32(r[[REGSP]])
 ; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]]
 ; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]]
diff --git a/test/CodeGen/PowerPC/byval-agg-info.ll b/test/CodeGen/PowerPC/byval-agg-info.ll
new file mode 100644
index 0000000..89ad8e4
--- /dev/null
+++ b/test/CodeGen/PowerPC/byval-agg-info.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -print-after=prologepilog >%t 2>&1 && FileCheck <%t %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.anon = type { i32, i32 }
+
+declare void @foo(%struct.anon* %v)
+define void @test(i32 %a, i32 %b, %struct.anon* byval nocapture %v) {
+entry:
+  call void @foo(%struct.anon* %v)
+  ret void
+}
+
+; Make sure that the MMO on the store has no offset from the byval
+; variable itself (we used to have mem:ST8[%v+64]).
+; CHECK: STD %X5<kill>, 176, %X1; mem:ST8[%v](align=16)
+
diff --git a/test/CodeGen/PowerPC/cc.ll b/test/CodeGen/PowerPC/cc.ll
new file mode 100644
index 0000000..ab724f5
--- /dev/null
+++ b/test/CodeGen/PowerPC/cc.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @test1(i64 %a, i64 %b) {
+entry:
+  %c = icmp eq i64 %a, %b
+  br label %foo
+
+foo:
+  call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cr0},~{cr1},~{cr2},~{cr3},~{cr4},~{cr5},~{cr6},~{cr7}" (i64 %a)
+  br i1 %c, label %bar, label %end
+
+bar:
+  ret i64 %b
+
+end:
+  ret i64 %a
+
+; CHECK-LABEL: @test1
+; CHECK: mfcr [[REG1:[0-9]+]]
+; CHECK-DAG: cmpld
+; CHECK-DAG: mfocrf [[REG2:[0-9]+]],
+; CHECK-DAG: stw [[REG1]], 8(1)
+; CHECK-DAG: stw [[REG2]], -4(1)
+
+; CHECK: sc
+; CHECK: lwz [[REG3:[0-9]+]], -4(1)
+; CHECK: mtocrf 128, [[REG3]]
+
+; CHECK: lwz [[REG4:[0-9]+]], 8(1)
+; CHECK-DAG: mtocrf 32, [[REG4]]
+; CHECK-DAG: mtocrf 16, [[REG4]]
+; CHECK-DAG: mtocrf 8, [[REG4]]
+; CHECK: blr
+}
+
+define i64 @test2(i64 %a, i64 %b) {
+entry:
+  %c = icmp eq i64 %a, %b
+  br label %foo
+
+foo:
+  call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc}" (i64 %a)
+  br i1 %c, label %bar, label %end
+
+bar:
+  ret i64 %b
+
+end:
+  ret i64 %a
+
+; CHECK-LABEL: @test2
+; CHECK: mfcr [[REG1:[0-9]+]]
+; CHECK-DAG: cmpld
+; CHECK-DAG: mfocrf [[REG2:[0-9]+]],
+; CHECK-DAG: stw [[REG1]], 8(1)
+; CHECK-DAG: stw [[REG2]], -4(1)
+
+; CHECK: sc
+; CHECK: lwz [[REG3:[0-9]+]], -4(1)
+; CHECK: mtocrf 128, [[REG3]]
+
+; CHECK: lwz [[REG4:[0-9]+]], 8(1)
+; CHECK-DAG: mtocrf 32, [[REG4]]
+; CHECK-DAG: mtocrf 16, [[REG4]]
+; CHECK-DAG: mtocrf 8, [[REG4]]
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/ctrloop-udivti3.ll b/test/CodeGen/PowerPC/ctrloop-udivti3.ll
new file mode 100644
index 0000000..d07a11f
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-udivti3.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define hidden void @_mpd_shortdiv(i64 %n) #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %i.018.in = phi i64 [ %n, %for.body.lr.ph ], [ %i.018, %for.body ]
+  %i.018 = add i64 %i.018.in, -1
+  %add.i = or i128 undef, undef
+  %div.i = udiv i128 %add.i, 0
+  %conv3.i11 = trunc i128 %div.i to i64
+  store i64 %conv3.i11, i64* undef, align 8
+  %cmp = icmp eq i64 %i.018, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK-LABEL: @_mpd_shortdiv
+; CHECK-NOT: mtctr
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
new file mode 100644
index 0000000..db0d8ed
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
@@ -0,0 +1,153 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr5 | FileCheck %s --check-prefix=ELF64
+
+; Test sitofp
+
+define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i32
+  %b.addr = alloca double, align 8
+  %conv = sitofp i32 %a to double
+; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
+; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i64
+  %b.addr = alloca double, align 8
+  %conv = sitofp i64 %a to double
+; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
+; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i16
+  %b.addr = alloca double, align 8
+  %conv = sitofp i16 %a to double
+; ELF64: extsh
+; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
+; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i8
+  %b.addr = alloca double, align 8
+  %conv = sitofp i8 %a to double
+; ELF64: extsb
+; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
+; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float_i32(float %a) nounwind ssp {
+entry:
+; ELF64: fptosi_float_i32
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+; ELF64: fctiwz
+; ELF64: stfd
+; ELF64: lwa
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_float_i64(float %a) nounwind ssp {
+entry:
+; ELF64: fptosi_float_i64
+  %b.addr = alloca i64, align 4
+  %conv = fptosi float %a to i64
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double_i32(double %a) nounwind ssp {
+entry:
+; ELF64: fptosi_double_i32
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+; ELF64: fctiwz
+; ELF64: stfd
+; ELF64: lwa
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+define void @fptosi_double_i64(double %a) nounwind ssp {
+entry:
+; ELF64: fptosi_double_i64
+  %b.addr = alloca i64, align 8
+  %conv = fptosi double %a to i64
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float_i32(float %a) nounwind ssp {
+entry:
+; ELF64: fptoui_float_i32
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: lwz
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_float_i64(float %a) nounwind ssp {
+entry:
+; ELF64: fptoui_float_i64
+  %b.addr = alloca i64, align 4
+  %conv = fptoui float %a to i64
+; ELF64: fctiduz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double_i32(double %a) nounwind ssp {
+entry:
+; ELF64: fptoui_double_i32
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: lwz
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+define void @fptoui_double_i64(double %a) nounwind ssp {
+entry:
+; ELF64: fptoui_double_i64
+  %b.addr = alloca i64, align 8
+  %conv = fptoui double %a to i64
+; ELF64: fctiduz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/spill-nor0.ll b/test/CodeGen/PowerPC/spill-nor0.ll
new file mode 100644
index 0000000..65bdc09
--- /dev/null
+++ b/test/CodeGen/PowerPC/spill-nor0.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -O0 -mcpu=ppc64 | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @_ZN4llvm3sys17RunningOnValgrindEv() #0 {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %0 = call i64 asm sideeffect "mr 3,$1\0A\09mr 4,$2\0A\09rotldi 0,0,3  ; rotldi 0,0,13\0A\09rotldi 0,0,61 ; rotldi 0,0,51\0A\09or 1,1,1\0A\09mr $0,3", "=b,b,b,~{cc},~{memory},~{r3},~{r4}"(i32 0, i64* undef) #0
+  unreachable
+
+; CHECK-LABEL: @_ZN4llvm3sys17RunningOnValgrindEv
+; CHECK: stw
+; CHECK: lwz
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll b/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
new file mode 100644
index 0000000..130d8fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
@@ -0,0 +1,38 @@
+; taken from X86 version of the same test
+; RUN: llc -mtriple=powerpc-apple-darwin10 -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
+; RUN: llc -mtriple=powerpc-apple-darwin8 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
+
+@v1 = linkonce_odr global i32 32
+; CHECK: .globl  _v1
+; CHECK: .weak_def_can_be_hidden _v1
+
+; CHECK-D89: .globl  _v1
+; CHECK-D89: .weak_definition _v1
+
+define i32 @f1() {
+  %x = load i32 * @v1
+  ret i32 %x
+}
+
+@v2 = linkonce_odr global i32 32
+; CHECK: .globl  _v2
+; CHECK: .weak_definition _v2
+
+; CHECK-D89: .globl  _v2
+; CHECK-D89: .weak_definition _v2
+
+@v3 = linkonce_odr unnamed_addr global i32 32
+; CHECK: .globl  _v3
+; CHECK: .weak_def_can_be_hidden _v3
+
+; CHECK-D89: .globl  _v3
+; CHECK-D89: .weak_definition _v3
+
+define i32* @f2() {
+  ret i32* @v2
+}
+
+define i32* @f3() {
+  ret i32* @v3
+}
diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/R600/bfe_uint.ll
index 92570c3..fe466e6 100644
--- a/test/CodeGen/R600/bfe_uint.ll
+++ b/test/CodeGen/R600/bfe_uint.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
+; XFAIL: *
+
 ; CHECK: @bfe_def
 ; CHECK: BFE_UINT
 define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index a5f5df9..2cd3a4f 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -9,7 +9,7 @@
 ; R600-CHECK-NOT: AND
 ; R600-CHECK: |PV.{{[XYZW]}}|
 ; SI-CHECK-LABEL: @fabs_free
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_AND_B32
 
 define void @fabs_free(float addrspace(1)* %out, i32 %in) {
 entry:
@@ -23,8 +23,8 @@ entry:
 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
 ; SI-CHECK-LABEL: @fabs_v2
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_AND_B32
+; SI-CHECK: V_AND_B32
 define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 entry:
   %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
@@ -38,10 +38,10 @@ entry:
 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
 ; SI-CHECK-LABEL: @fabs_v4
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_AND_B32
+; SI-CHECK: V_AND_B32
+; SI-CHECK: V_AND_B32
+; SI-CHECK: V_AND_B32
 define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/R600/fneg-fabs.ll
new file mode 100644
index 0000000..d95e131
--- /dev/null
+++ b/test/CodeGen/R600/fneg-fabs.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; DAGCombiner will transform:
+; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
+; unless isFabsFree returns true
+
+; R600-CHECK-LABEL: @fneg_fabs_free
+; R600-CHECK-NOT: AND
+; R600-CHECK: |PV.{{[XYZW]}}|
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg_fabs_free
+; SI-CHECK: V_OR_B32
+
+define void @fneg_fabs_free(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = bitcast i32 %in to float
+  %1 = call float @fabs(float %0)
+  %2 = fsub float -0.000000e+00, %1
+  store float %2, float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @fneg_fabs_v2
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: -PV
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg_fabs_v2
+; SI-CHECK: V_OR_B32
+; SI-CHECK: V_OR_B32
+define void @fneg_fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
+  %1 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %0
+  store <2 x float> %1, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK-LABEL: @fneg_fabs_v4
+; SI-CHECK: V_OR_B32
+; SI-CHECK: V_OR_B32
+; SI-CHECK: V_OR_B32
+; SI-CHECK: V_OR_B32
+define void @fneg_fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+  %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
+  store <4 x float> %1, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare float @fabs(float ) readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
index 9446aa8..f4e6be6 100644
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -4,7 +4,7 @@
 ; R600-CHECK-LABEL: @fneg
 ; R600-CHECK: -PV
 ; SI-CHECK-LABEL: @fneg
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_XOR_B32
 define void @fneg(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fsub float -0.000000e+00, %in
@@ -16,8 +16,8 @@ entry:
 ; R600-CHECK: -PV
 ; R600-CHECK: -PV
 ; SI-CHECK-LABEL: @fneg_v2
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_XOR_B32
+; SI-CHECK: V_XOR_B32
 define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
 entry:
   %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
@@ -31,10 +31,10 @@ entry:
 ; R600-CHECK: -PV
 ; R600-CHECK: -PV
 ; SI-CHECK-LABEL: @fneg_v4
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
-; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_XOR_B32
+; SI-CHECK: V_XOR_B32
+; SI-CHECK: V_XOR_B32
+; SI-CHECK: V_XOR_B32
 define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
 entry:
   %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
diff --git a/test/CodeGen/R600/lds-oqap-crash.ll b/test/CodeGen/R600/lds-oqap-crash.ll
new file mode 100644
index 0000000..7959150
--- /dev/null
+++ b/test/CodeGen/R600/lds-oqap-crash.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s
+
+; The test is for a bug in R600EmitClauseMarkers.cpp where this pass
+; was searching for a use of the OQAP register in order to determine
+; if an LDS instruction could fit in the current clause, but never finding
+; one.  This created an infinite loop and hung the compiler.
+;
+; The LDS instruction should not have been defining OQAP in the first place,
+; because the LDS instructions are pseudo instructions and the OQAP
+; reads and writes are bundled together in the same instruction.
+
+; CHECK: @lds_crash
+define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) {
+entry:
+  %0 = load i32 addrspace(3)* %in
+  ; This block needs to be > 115 ISA instructions to hit the bug,
+  ; so we'll use udiv instructions.
+  %div0 = udiv i32 %0, %b
+  %div1 = udiv i32 %div0, %a
+  %div2 = udiv i32 %div1, 11
+  %div3 = udiv i32 %div2, %a
+  %div4 = udiv i32 %div3, %b
+  %div5 = udiv i32 %div4, %c
+  %div6 = udiv i32 %div5, %div0
+  %div7 = udiv i32 %div6, %div1
+  store i32 %div7, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
new file mode 100644
index 0000000..bec5cdf
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
+
+; SI-LABEL: @kill_gs
+; SI: V_CMPX_LE_F32
+
+define void @kill_gs() #0 {
+main_body:
+  %0 = icmp ule i32 0, 3
+  %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
+  call void @llvm.AMDGPU.kill(float %1)
+  ret void
+}
+
+declare void @llvm.AMDGPU.kill(float)
+
+attributes #0 = { "ShaderType"="2" }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll
new file mode 100644
index 0000000..a622775
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.load.dword.ll
@@ -0,0 +1,40 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+; Example of a simple geometry shader loading vertex attributes from the
+; ESGS ring buffer
+
+; CHECK-LABEL: @main
+; CHECK: BUFFER_LOAD_DWORD
+; CHECK: BUFFER_LOAD_DWORD
+; CHECK: BUFFER_LOAD_DWORD
+; CHECK: BUFFER_LOAD_DWORD
+
+define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 {
+main_body:
+  %10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1
+  %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
+  %12 = shl i32 %6, 2
+  %13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
+  %14 = bitcast i32 %13 to float
+  %15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
+  %16 = bitcast i32 %15 to float
+  %17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
+  %18 = bitcast i32 %17 to float
+  %19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
+  %20 = bitcast i32 %19 to float
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, float %16, float %18, float %20)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { nounwind readonly }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/R600/llvm.SI.sendmsg.ll
new file mode 100644
index 0000000..cfcc7c4
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+; CHECK-LABEL: @main
+; CHECK: S_SENDMSG 34
+; CHECK: S_SENDMSG 274
+; CHECK: S_SENDMSG 562
+; CHECK: S_SENDMSG 3
+
+define void @main() {
+main_body:
+  call void @llvm.SI.sendmsg(i32 34, i32 0);
+  call void @llvm.SI.sendmsg(i32 274, i32 0);
+  call void @llvm.SI.sendmsg(i32 562, i32 0);
+  call void @llvm.SI.sendmsg(i32 3, i32 0);
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.SI.sendmsg(i32, i32) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index e4492d7..0153524 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -445,6 +445,7 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; SI-CHECK-LABEL: @load_i8_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U8
 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
   %1 = load i8 addrspace(3)* %in
@@ -458,6 +459,7 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
 ; R600-CHECK: ASHR
 ; SI-CHECK-LABEL: @load_i8_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I8
 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
 entry:
@@ -472,6 +474,7 @@ entry:
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; SI-CHECK-LABEL: @load_v2i8_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U8
 ; SI-CHECK: DS_READ_U8
 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
@@ -489,6 +492,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v2i8_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I8
 ; SI-CHECK: DS_READ_I8
 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
@@ -506,6 +510,7 @@ entry:
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; SI-CHECK-LABEL: @load_v4i8_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U8
 ; SI-CHECK: DS_READ_U8
 ; SI-CHECK: DS_READ_U8
@@ -529,6 +534,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v4i8_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I8
 ; SI-CHECK: DS_READ_I8
 ; SI-CHECK: DS_READ_I8
@@ -546,6 +552,7 @@ entry:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; SI-CHECK-LABEL: @load_i16_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U16
 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
@@ -560,6 +567,7 @@ entry:
 ; R600-CHECK: ASHR
 ; SI-CHECK-LABEL: @load_i16_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I16
 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
@@ -574,6 +582,7 @@ entry:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; SI-CHECK-LABEL: @load_v2i16_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U16
 ; SI-CHECK: DS_READ_U16
 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
@@ -591,6 +600,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v2i16_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I16
 ; SI-CHECK: DS_READ_I16
 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
@@ -608,6 +618,7 @@ entry:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; SI-CHECK-LABEL: @load_v4i16_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_U16
 ; SI-CHECK: DS_READ_U16
 ; SI-CHECK: DS_READ_U16
@@ -631,6 +642,7 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; SI-CHECK-LABEL: @load_v4i16_sext_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_I16
 ; SI-CHECK: DS_READ_I16
 ; SI-CHECK: DS_READ_I16
@@ -643,11 +655,12 @@ entry:
   ret void
 }
 
-; load an i32 value from the glocal address space.
+; load an i32 value from the local address space.
 ; R600-CHECK-LABEL: @load_i32_local
 ; R600-CHECK: LDS_READ_RET
 ; SI-CHECK-LABEL: @load_i32_local
 ; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_B32
 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
@@ -656,10 +669,11 @@ entry:
   ret void
 }
 
-; load a f32 value from the global address space.
+; load a f32 value from the local address space.
 ; R600-CHECK-LABEL: @load_f32_local
 ; R600-CHECK: LDS_READ_RET
 ; SI-CHECK-LABEL: @load_f32_local
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_B32
 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
 entry:
@@ -673,6 +687,7 @@ entry:
 ; R600-CHECK: LDS_READ_RET
 ; R600-CHECK: LDS_READ_RET
 ; SI-CHECK-LABEL: @load_v2f32_local
+; SI-CHECK: S_MOV_B32 m0
 ; SI-CHECK: DS_READ_B32
 ; SI-CHECK: DS_READ_B32
 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
index 0bd320a..6bbd7f7 100644
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -28,3 +28,13 @@ define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; SI-LABEL: @trunc_i32_to_i1:
+; SI: V_AND_B32
+; SI: V_CMP_EQ_I32
+define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
+  %trunc = trunc i32 %a to i1
+  %result = select i1 %trunc, i32 1, i32 0
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/vtx-fetch-branch.ll b/test/CodeGen/R600/vtx-fetch-branch.ll
new file mode 100644
index 0000000..0fc99de
--- /dev/null
+++ b/test/CodeGen/R600/vtx-fetch-branch.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=r600 -mcpu=redwood %s -o - | FileCheck %s
+
+; This tests for a bug where vertex fetch clauses right before an ENDIF
+; instruction where being emitted after the ENDIF.  We were using ALU_POP_AFTER
+; for the ALU clause before the vetex fetch instead of emitting a POP instruction
+; after the fetch clause.
+
+
+; CHECK-LABEL: @test
+; CHECK-NOT: ALU_POP_AFTER
+; CHECK: TEX
+; CHECK-NEXT: POP
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
+entry:
+  %0 = icmp eq i32 %cond, 0
+  br i1 %0, label %endif, label %if
+
+if:
+  %1 = load i32 addrspace(1)* %in
+  br label %endif
+
+endif:
+  %x = phi i32 [ %1, %if], [ 0, %entry]
+  store i32 %x, i32 addrspace(1)* %out
+  br label %done
+
+done:
+  ret void
+}
diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll
index 481b3b3..a114bfc 100644
--- a/test/CodeGen/R600/zero_extend.ll
+++ b/test/CodeGen/R600/zero_extend.ll
@@ -16,3 +16,13 @@ entry:
   store i64 %2, i64 addrspace(1)* %out
   ret void
 }
+
+; SI-CHECK-LABEL: @testi1toi32
+; SI-CHECK: V_CNDMASK_B32
+define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp eq i32 %a, %b
+  %1 = zext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index 5f5d5cc..50c62df 100644
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: llc < %s -march=x86 -mcpu=core2 > %t1
 ; RUN: grep movzwl %t1 | count 2
 ; RUN: grep movzbl %t1 | count 1
 ; RUN: grep movd %t1 | count 4
diff --git a/test/CodeGen/X86/bswap-vector.ll b/test/CodeGen/X86/bswap-vector.ll
new file mode 100644
index 0000000..7a7a8a4
--- /dev/null
+++ b/test/CodeGen/X86/bswap-vector.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mcpu=core | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+
+define <2 x i64> @foo(<2 x i64> %v) #0 {
+entry:
+  %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
+  ret <2 x i64> %r
+}
+
+; CHECK-LABEL: @foo
+; CHECK: bswapq
+; CHECK: bswapq
+; CHECK: ret
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index 7a1a9ae..494cb28 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
 
 ; VFMADD
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll
index 07baca8..7b08ad6 100644
--- a/test/CodeGen/X86/fp-fast.ll
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 -mattr=+avx,-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=corei7-avx -enable-unsafe-fp-math < %s | FileCheck %s
 
 ; CHECK-LABEL: test1
 define float @test1(float %a) {
diff --git a/test/CodeGen/X86/inline-asm-modifier-q.ll b/test/CodeGen/X86/inline-asm-modifier-q.ll
new file mode 100644
index 0000000..d20f06d
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-modifier-q.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86  | FileCheck %s
+
+; If the target does not have 64-bit integer registers, emit 32-bit register
+; names.
+
+; CHECK: movq (%e{{[abcd]}}x, %ebx, 4)
+
+define void @q_modifier(i32* %p) {
+entry:
+  tail call void asm sideeffect "movq (${0:q}, %ebx, 4), %mm0", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p)
+  ret void
+}
diff --git a/test/CodeGen/X86/isint.ll b/test/CodeGen/X86/isint.ll
index 4a98e63..38d05c6 100644
--- a/test/CodeGen/X86/isint.ll
+++ b/test/CodeGen/X86/isint.ll
@@ -1,6 +1,11 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck %s
+; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck %s
+
+; PR19059
+; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck -check-prefix=CHECK32 %s
 
 define i32 @isint_return(double %d) nounwind {
+; CHECK-LABEL: isint_return:
 ; CHECK-NOT: xor
 ; CHECK: cvt
   %i = fptosi double %d to i32
@@ -8,6 +13,24 @@ define i32 @isint_return(double %d) nounwind {
   %e = sitofp i32 %i to double
 ; CHECK: cmpeqsd
   %c = fcmp oeq double %d, %e
+; CHECK32-NOT: movd {{.*}}, %r{{.*}}
+; CHECK32-NOT: andq
+; CHECK-NEXT: movd
+; CHECK-NEXT: andl
+  %z = zext i1 %c to i32
+  ret i32 %z
+}
+
+define i32 @isint_float_return(float %f) nounwind {
+; CHECK-LABEL: isint_float_return:
+; CHECK-NOT: xor
+; CHECK: cvt
+  %i = fptosi float %f to i32
+; CHECK-NEXT: cvt
+  %g = sitofp i32 %i to float
+; CHECK: cmpeqss
+  %c = fcmp oeq float %f, %g
+; CHECK-NOT: movd {{.*}}, %r{{.*}}
 ; CHECK-NEXT: movd
 ; CHECK-NEXT: andl
   %z = zext i1 %c to i32
@@ -17,6 +40,7 @@ define i32 @isint_return(double %d) nounwind {
 declare void @foo()
 
 define void @isint_branch(double %d) nounwind {
+; CHECK-LABEL: isint_branch:
 ; CHECK: cvt
   %i = fptosi double %d to i32
 ; CHECK-NEXT: cvt
diff --git a/test/CodeGen/X86/pr10420.ll b/test/CodeGen/X86/pr10420.ll
index 3993f24..6295189 100644
--- a/test/CodeGen/X86/pr10420.ll
+++ b/test/CodeGen/X86/pr10420.ll
@@ -1,4 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -disable-cfi | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7 -disable-cfi | FileCheck --check-prefix=CHECK-64-D11 %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.6 -disable-cfi | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.5 -disable-cfi | FileCheck --check-prefix=CHECK-64-D89 %s
+; RUN: llc < %s -mtriple=i686-apple-macosx10.6 -disable-cfi | FileCheck --check-prefix=CHECK-I686-D10 %s
+; RUN: llc < %s -mtriple=i686-apple-macosx10.5 -disable-cfi | FileCheck --check-prefix=CHECK-I686-D89 %s
+; RUN: llc < %s -mtriple=i686-apple-macosx10.4 -disable-cfi | FileCheck --check-prefix=CHECK-I686-D89 %s
 
 define private void @foo() {
        ret void
@@ -19,3 +24,44 @@ define void @bar() {
 ; CHECK: Ltmp19:
 ; CHECK-NEXT: Ltmp20 = Ltmp2-Ltmp19                   ## FDE initial location
 ; CHECK-NEXT:         .quad   Ltmp20
+
+
+; CHECK-64-D11: Ltmp13:
+; CHECK-64-D11-NEXT: Ltmp14 = L_foo-Ltmp13                   ## FDE initial location
+; CHECK-64-D11-NEXT:         .quad   Ltmp14
+
+; CHECK-64-D11: Ltmp20:
+; CHECK-64-D11-NEXT: Ltmp21 = Ltmp2-Ltmp20                   ## FDE initial location
+; CHECK-64-D11-NEXT:         .quad   Ltmp21
+
+
+; CHECK-64-D89: Ltmp12:
+; CHECK-64-D89-NEXT: .quad	L_foo-Ltmp12                   ## FDE initial location
+; CHECK-64-D89-NEXT: Ltmp13 = (Ltmp0-L_foo)-0                   ## FDE address range
+; CHECK-64-D89-NEXT:         .quad   Ltmp13
+
+; CHECK-64-D89: Ltmp18:
+; CHECK-64-D89-NEXT: .quad	Ltmp2-Ltmp18                   ## FDE initial location
+; CHECK-64-D89-NEXT: Ltmp19 = (Ltmp4-Ltmp2)-0                   ## FDE address range
+; CHECK-64-D89-NEXT:         .quad   Ltmp19
+
+
+; CHECK-I686-D10: Ltmp12:
+; CHECK-I686-D10-NEXT: Ltmp13 = L_foo-Ltmp12                   ## FDE initial location
+; CHECK-I686-D10-NEXT:         .long   Ltmp13
+
+; CHECK-I686-D10: Ltmp19:
+; CHECK-I686-D10-NEXT: Ltmp20 = Ltmp2-Ltmp19                   ## FDE initial location
+; CHECK-I686-D10-NEXT:         .long   Ltmp20
+
+
+; CHECK-I686-D89: Ltmp12:
+; CHECK-I686-D89-NEXT: .long	L_foo-Ltmp12                   ## FDE initial location
+; CHECK-I686-D89-NEXT: Ltmp13 = (Ltmp0-L_foo)-0                   ## FDE address range
+; CHECK-I686-D89-NEXT:         .long   Ltmp13
+
+; CHECK-I686-D89: Ltmp18:
+; CHECK-I686-D89-NEXT: .long	Ltmp2-Ltmp18                   ## FDE initial location
+; CHECK-I686-D89-NEXT: Ltmp19 = (Ltmp4-Ltmp2)-0                   ## FDE address range
+; CHECK-I686-D89-NEXT:         .long   Ltmp19
+
diff --git a/test/CodeGen/X86/stores-merging.ll b/test/CodeGen/X86/stores-merging.ll
new file mode 100644
index 0000000..61dea08
--- /dev/null
+++ b/test/CodeGen/X86/stores-merging.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%structTy = type { i8, i32, i32 }
+
+@e = common global %structTy zeroinitializer, align 4
+
+; CHECK-LABEL: f
+define void @f() {
+entry:
+
+; CHECK:   movabsq	$528280977409, %rax
+; CHECK:   movq    %rax, e+4(%rip)
+; CHECK:   movl    $456, e+8(%rip)
+
+  store i32 1, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 1), align 4
+  store i32 123, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 2), align 4
+  store i32 456, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 2), align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/vaargs.ll b/test/CodeGen/X86/vaargs.ll
new file mode 100644
index 0000000..ddeb7a3
--- /dev/null
+++ b/test/CodeGen/X86/vaargs.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mcpu=corei7-avx %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=NO-FLAGS
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+; Check that vastart gets the right thing.
+define i32 @sum(i32 %count, ...) nounwind optsize ssp uwtable {
+; CHECK:      testb   %al, %al
+; CHECK-NEXT: je
+; CHECK-NEXT: ## BB#{{[0-9]+}}:
+; CHECK-NEXT: vmovaps %xmm0, 48(%rsp)
+; CHECK-NEXT: vmovaps %xmm1, 64(%rsp)
+; CHECK-NEXT: vmovaps %xmm2, 80(%rsp)
+; CHECK-NEXT: vmovaps %xmm3, 96(%rsp)
+; CHECK-NEXT: vmovaps %xmm4, 112(%rsp)
+; CHECK-NEXT: vmovaps %xmm5, 128(%rsp)
+; CHECK-NEXT: vmovaps %xmm6, 144(%rsp)
+; CHECK-NEXT: vmovaps %xmm7, 160(%rsp)
+
+; Check that [EFLAGS] hasn't been pulled in.
+; NO-FLAGS-NOT: %flags
+
+  %ap = alloca [1 x %struct.__va_list_tag], align 16
+  %1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*
+  call void @llvm.va_start(i8* %1)
+  %2 = icmp sgt i32 %count, 0
+  br i1 %2, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0
+  %3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0
+  %4 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2
+  %.pre = load i32* %3, align 16
+  br label %5
+
+; <label>:5                                       ; preds = %.lr.ph, %13
+  %6 = phi i32 [ %.pre, %.lr.ph ], [ %14, %13 ]
+  %.01 = phi i32 [ %count, %.lr.ph ], [ %15, %13 ]
+  %7 = icmp ult i32 %6, 41
+  br i1 %7, label %8, label %10
+
+; <label>:8                                       ; preds = %5
+  %9 = add i32 %6, 8
+  store i32 %9, i32* %3, align 16
+  br label %13
+
+; <label>:10                                      ; preds = %5
+  %11 = load i8** %4, align 8
+  %12 = getelementptr i8* %11, i64 8
+  store i8* %12, i8** %4, align 8
+  br label %13
+
+; <label>:13                                      ; preds = %10, %8
+  %14 = phi i32 [ %6, %10 ], [ %9, %8 ]
+  %15 = add nsw i32 %.01, 1
+  %16 = icmp sgt i32 %15, 0
+  br i1 %16, label %5, label %._crit_edge
+
+._crit_edge:                                      ; preds = %13, %0
+  %.0.lcssa = phi i32 [ %count, %0 ], [ %15, %13 ]
+  call void @llvm.va_end(i8* %1)
+  ret i32 %.0.lcssa
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
diff --git a/test/CodeGen/X86/vastart-defs-eflags.ll b/test/CodeGen/X86/vastart-defs-eflags.ll
new file mode 100644
index 0000000..6017753
--- /dev/null
+++ b/test/CodeGen/X86/vastart-defs-eflags.ll
@@ -0,0 +1,23 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Check that vastart handling doesn't get between testb and je for the branch.
+define i32 @check_flag(i32 %flags, ...) nounwind {
+entry:
+; CHECK: {{^}} testb $2, %bh
+; CHECK-NOT: test
+; CHECK: {{^}} je
+  %and = and i32 %flags, 512
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %hasflag = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %hasflag
+}
+
diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll
index e2fe45c..b266a69 100644
--- a/test/CodeGen/X86/vec_shift4.ll
+++ b/test/CodeGen/X86/vec_shift4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
 
 define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index 4363cd9..a060cf8 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same when using a shuffle splat.
diff --git a/test/CodeGen/X86/weak_def_can_be_hidden.ll b/test/CodeGen/X86/weak_def_can_be_hidden.ll
index f78f357..22aa135 100644
--- a/test/CodeGen/X86/weak_def_can_be_hidden.ll
+++ b/test/CodeGen/X86/weak_def_can_be_hidden.ll
@@ -1,9 +1,16 @@
-; RUN: llc -mtriple=x86_64-apple-darwin  -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin11 -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin10 -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
+; RUN: llc -mtriple=i686-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
+; RUN: llc -mtriple=i686-apple-darwin8 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
 
 @v1 = linkonce_odr global i32 32
 ; CHECK: .globl  _v1
 ; CHECK: .weak_def_can_be_hidden _v1
 
+; CHECK-D89: .globl  _v1
+; CHECK-D89: .weak_definition _v1
+
 define i32 @f1() {
   %x = load i32 * @v1
   ret i32 %x
@@ -13,10 +20,16 @@ define i32 @f1() {
 ; CHECK: .globl  _v2
 ; CHECK: .weak_definition _v2
 
+; CHECK-D89: .globl  _v2
+; CHECK-D89: .weak_definition _v2
+
 @v3 = linkonce_odr unnamed_addr global i32 32
 ; CHECK: .globl  _v3
 ; CHECK: .weak_def_can_be_hidden _v3
 
+; CHECK-D89: .globl  _v3
+; CHECK-D89: .weak_definition _v3
+
 define i32* @f2() {
   ret i32* @v2
 }
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index 8c6bc0e..6f072df 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -241,3 +241,27 @@
 
 # CHECK: pextrw $3, %xmm3, (%rax)
 0x66 0x0f 0x3a 0x15 0x18 0x03
+
+# CHECK: $0, 305419896(,%r8)
+0x43 0x80 0x04 0x05 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(%r13,%r8)
+0x43 0x80 0x84 0x05 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(,%r8)
+0x42 0x80 0x04 0x05 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(%rbp,%r8)
+0x42 0x80 0x84 0x05 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(,%r12)
+0x42 0x80 0x04 0x25 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(%rbp,%r12)
+0x42 0x80 0x84 0x25 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896
+0x80 0x04 0x25 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: $0, 305419896(%rbp)
+0x80 0x84 0x25 0x78 0x56 0x34 0x12 0x00
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 9677da7..50f29e0 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -584,3 +584,12 @@ fsub ST(1)
 fsubr ST(1)
 fdiv ST(1)
 fdivr ST(1)
+
+.bss
+.globl _g0
+.text
+
+// CHECK: movq _g0, %rbx
+// CHECK: movq _g0+8, %rcx
+mov rbx, qword ptr [_g0]
+mov rcx, qword ptr [_g0 + 8]
diff --git a/test/Transforms/InstCombine/vec_extract_var_elt.ll b/test/Transforms/InstCombine/vec_extract_var_elt.ll
index 3c98287..f6f9e01 100644
--- a/test/Transforms/InstCombine/vec_extract_var_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_var_elt.ll
@@ -16,3 +16,11 @@ define void @test (float %b, <8 x float> * %p)  {
   ret void    
 }
 
+; PR18600
+define i32 @test2(i32 %i) {
+  %e = extractelement <4 x i32> bitcast (<2 x i64> <i64 1, i64 2> to <4 x i32>), i32 %i
+  ret i32 %e
+
+; CHECK-LABEL: @test2
+; CHECK: extractelement
+}
diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll
index 314a149..3bd6d7a 100644
--- a/test/Transforms/LoopReroll/basic.ll
+++ b/test/Transforms/LoopReroll/basic.ll
@@ -33,7 +33,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
 ; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
 ; CHECK: %indvar.next = add i32 %indvar, 1
-; CHECK: %exitcond1 = icmp eq i32 %indvar.next, 498
+; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -83,7 +83,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
 ; CHECK: store i32 %call, i32* %arrayidx, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 1500
+; CHECK: %exitcond = icmp eq i64 %indvar, 1499
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -131,7 +131,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
 ; CHECK: store i32 %call, i32* %arrayidx, align 4
 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %exitcond1 = icmp eq i64 %indvars.iv.next, 1500
+; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499
 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -213,7 +213,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %add = fadd float %1, %mul
 ; CHECK: store float %add, float* %arrayidx2, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: %exitcond = icmp eq i64 %indvar, 3199
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -313,7 +313,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %add = fadd float %2, %mul
 ; CHECK: store float %add, float* %arrayidx4, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: %exitcond = icmp eq i64 %indvar, 3199
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
diff --git a/test/Transforms/LoopReroll/nonconst_lb.ll b/test/Transforms/LoopReroll/nonconst_lb.ll
new file mode 100644
index 0000000..a45469b
--- /dev/null
+++ b/test/Transforms/LoopReroll/nonconst_lb.ll
@@ -0,0 +1,152 @@
+; RUN: opt < %s -loop-reroll -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-none-linux"
+
+;void foo(int *A, int *B, int m, int n) {
+;  for (int i = m; i < n; i+=4) {
+;    A[i+0] = B[i+0] * 4;
+;    A[i+1] = B[i+1] * 4;
+;    A[i+2] = B[i+2] * 4;
+;    A[i+3] = B[i+3] * 4;
+;  }
+;}
+define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %m, i32 %n) {
+entry:
+  %cmp34 = icmp slt i32 %m, %n
+  br i1 %cmp34, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.035 = phi i32 [ %add18, %for.body ], [ %m, %entry ]
+  %arrayidx = getelementptr inbounds i32* %B, i32 %i.035
+  %0 = load i32* %arrayidx, align 4
+  %mul = shl nsw i32 %0, 2
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.035
+  store i32 %mul, i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %i.035, 1
+  %arrayidx4 = getelementptr inbounds i32* %B, i32 %add3
+  %1 = load i32* %arrayidx4, align 4
+  %mul5 = shl nsw i32 %1, 2
+  %arrayidx7 = getelementptr inbounds i32* %A, i32 %add3
+  store i32 %mul5, i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %i.035, 2
+  %arrayidx9 = getelementptr inbounds i32* %B, i32 %add8
+  %2 = load i32* %arrayidx9, align 4
+  %mul10 = shl nsw i32 %2, 2
+  %arrayidx12 = getelementptr inbounds i32* %A, i32 %add8
+  store i32 %mul10, i32* %arrayidx12, align 4
+  %add13 = add nsw i32 %i.035, 3
+  %arrayidx14 = getelementptr inbounds i32* %B, i32 %add13
+  %3 = load i32* %arrayidx14, align 4
+  %mul15 = shl nsw i32 %3, 2
+  %arrayidx17 = getelementptr inbounds i32* %A, i32 %add13
+  store i32 %mul15, i32* %arrayidx17, align 4
+  %add18 = add nsw i32 %i.035, 4
+  %cmp = icmp slt i32 %add18, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+; CHECK-LABEL: @foo
+; CHECK: for.body.preheader:                               ; preds = %entry
+; CHECK:   %0 = add i32 %n, -1
+; CHECK:   %1 = sub i32 %0, %m
+; CHECK:   %2 = lshr i32 %1, 2
+; CHECK:   %3 = mul i32 %2, 4
+; CHECK:   %4 = add i32 %m, %3
+; CHECK:   %5 = add i32 %4, 3
+; CHECK:   br label %for.body
+
+; CHECK: for.body:                                         ; preds = %for.body, %for.body.preheader
+; CHECK:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK:   %6 = add i32 %m, %indvar
+; CHECK:   %arrayidx = getelementptr inbounds i32* %B, i32 %6
+; CHECK:   %7 = load i32* %arrayidx, align 4
+; CHECK:   %mul = shl nsw i32 %7, 2
+; CHECK:   %arrayidx2 = getelementptr inbounds i32* %A, i32 %6
+; CHECK:   store i32 %mul, i32* %arrayidx2, align 4
+; CHECK:   %indvar.next = add i32 %indvar, 1
+; CHECK:   %exitcond = icmp eq i32 %6, %5
+; CHECK:   br i1 %exitcond, label %for.end, label %for.body
+
+;void daxpy_ur(int n,float da,float *dx,float *dy)
+;    {
+;    int m = n % 4;
+;    for (int i = m; i < n; i = i + 4)
+;        {
+;        dy[i]   = dy[i]   + da*dx[i];
+;        dy[i+1] = dy[i+1] + da*dx[i+1];
+;        dy[i+2] = dy[i+2] + da*dx[i+2];
+;        dy[i+3] = dy[i+3] + da*dx[i+3];
+;        }
+;    }
+define void @daxpy_ur(i32 %n, float %da, float* nocapture readonly %dx, float* nocapture %dy) {
+entry:
+  %rem = srem i32 %n, 4
+  %cmp55 = icmp slt i32 %rem, %n
+  br i1 %cmp55, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.056 = phi i32 [ %add27, %for.body ], [ %rem, %entry ]
+  %arrayidx = getelementptr inbounds float* %dy, i32 %i.056
+  %0 = load float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %dx, i32 %i.056
+  %1 = load float* %arrayidx1, align 4
+  %mul = fmul float %1, %da
+  %add = fadd float %0, %mul
+  store float %add, float* %arrayidx, align 4
+  %add3 = add nsw i32 %i.056, 1
+  %arrayidx4 = getelementptr inbounds float* %dy, i32 %add3
+  %2 = load float* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds float* %dx, i32 %add3
+  %3 = load float* %arrayidx6, align 4
+  %mul7 = fmul float %3, %da
+  %add8 = fadd float %2, %mul7
+  store float %add8, float* %arrayidx4, align 4
+  %add11 = add nsw i32 %i.056, 2
+  %arrayidx12 = getelementptr inbounds float* %dy, i32 %add11
+  %4 = load float* %arrayidx12, align 4
+  %arrayidx14 = getelementptr inbounds float* %dx, i32 %add11
+  %5 = load float* %arrayidx14, align 4
+  %mul15 = fmul float %5, %da
+  %add16 = fadd float %4, %mul15
+  store float %add16, float* %arrayidx12, align 4
+  %add19 = add nsw i32 %i.056, 3
+  %arrayidx20 = getelementptr inbounds float* %dy, i32 %add19
+  %6 = load float* %arrayidx20, align 4
+  %arrayidx22 = getelementptr inbounds float* %dx, i32 %add19
+  %7 = load float* %arrayidx22, align 4
+  %mul23 = fmul float %7, %da
+  %add24 = fadd float %6, %mul23
+  store float %add24, float* %arrayidx20, align 4
+  %add27 = add nsw i32 %i.056, 4
+  %cmp = icmp slt i32 %add27, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK-LABEL: @daxpy_ur
+; CHECK: for.body.preheader:
+; CHECK:   %0 = add i32 %n, -1
+; CHECK:   %1 = sub i32 %0, %rem
+; CHECK:   %2 = lshr i32 %1, 2
+; CHECK:   %3 = mul i32 %2, 4
+; CHECK:   %4 = add i32 %rem, %3
+; CHECK:   %5 = add i32 %4, 3
+; CHECK:   br label %for.body
+
+; CHECK: for.body:
+; CHECK:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK:   %6 = add i32 %rem, %indvar
+; CHECK:   %arrayidx = getelementptr inbounds float* %dy, i32 %6
+; CHECK:   %7 = load float* %arrayidx, align 4
+; CHECK:   %arrayidx1 = getelementptr inbounds float* %dx, i32 %6
+; CHECK:   %8 = load float* %arrayidx1, align 4
+; CHECK:   %mul = fmul float %8, %da
+; CHECK:   %add = fadd float %7, %mul
+; CHECK:   store float %add, float* %arrayidx, align 4
+; CHECK:   %indvar.next = add i32 %indvar, 1
+; CHECK:   %exitcond = icmp eq i32 %6, %5
+; CHECK:   br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/Transforms/LoopReroll/reduction.ll b/test/Transforms/LoopReroll/reduction.ll
index aed7670..c9991c7 100644
--- a/test/Transforms/LoopReroll/reduction.ll
+++ b/test/Transforms/LoopReroll/reduction.ll
@@ -38,7 +38,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %0 = load i32* %arrayidx, align 4
 ; CHECK: %add = add nsw i32 %0, %r.029
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: %exitcond = icmp eq i64 %indvar, 399
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
@@ -83,7 +83,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: %0 = load float* %arrayidx, align 4
 ; CHECK: %add = fadd float %0, %r.029
 ; CHECK: %indvar.next = add i64 %indvar, 1
-; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: %exitcond = icmp eq i64 %indvar, 399
 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
 
 ; CHECK: ret
diff --git a/test/Transforms/LoopStrengthReduce/X86/pr17473.ll b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
new file mode 100644
index 0000000..4204abc
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; LSR shouldn't normalize IV if it can't be denormalized to the original
+; expression.  In this testcase, the normalized expression was denormalized to
+; an expression different from the original, and we were losing sign extension.
+
+; CHECK:    [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8
+; CHECK:     {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32
+
+@j = common global i32 0, align 4
+@c = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i8 0, align 1
+@d = common global i32 0, align 4
+@i = common global i32 0, align 4
+@e = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1
+@a = common global i32 0, align 4
+@b = common global i16 0, align 2
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @main() #0 {
+entry:
+  store i8 0, i8* @h, align 1
+  %0 = load i32* @j, align 4
+  %tobool.i = icmp eq i32 %0, 0
+  %1 = load i32* @d, align 4
+  %cmp3 = icmp sgt i32 %1, -1
+  %.lobit = lshr i32 %1, 31
+  %.lobit.not = xor i32 %.lobit, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %fn3.exit
+  %inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ]
+  %conv = sext i8 %inc9 to i32
+  br i1 %tobool.i, label %fn3.exit, label %land.rhs.i
+
+land.rhs.i:                                       ; preds = %for.body
+  store i32 0, i32* @c, align 4
+  br label %fn3.exit
+
+fn3.exit:                                         ; preds = %for.body, %land.rhs.i
+  %inc = add i8 %inc9, 1
+  %cmp = icmp sgt i8 %inc, -1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %fn3.exit
+  %.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0
+  store i32 %conv, i32* @g, align 4
+  store i32 %.lobit.not., i32* @i, align 4
+  store i8 %inc, i8* @h, align 1
+  %conv7 = sext i8 %inc to i32
+  %add = add nsw i32 %conv7, %conv
+  store i32 %add, i32* @e, align 4
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %add) #2
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }
diff --git a/test/Transforms/LoopStrengthReduce/pr18165.ll b/test/Transforms/LoopStrengthReduce/pr18165.ll
new file mode 100644
index 0000000..89adef7
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr18165.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; LSR shouldn't reuse IV if the resultant offset is not valid for the operand type.
+; CHECK-NOT: trunc i32 %.ph to i8
+
+%struct.anon = type { i32, i32, i32 }
+
+@c = global i32 1, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+@b = common global i32 0, align 4
+@a = common global %struct.anon zeroinitializer, align 4
+@e = common global %struct.anon zeroinitializer, align 4
+@d = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @main() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 0), align 4, !tbaa !1
+  %tobool7.i = icmp eq i32 %0, 0
+  %.promoted.i = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6
+  %f.promoted.i = load i32* @f, align 4, !tbaa !7
+  br label %for.body6.i.outer
+
+for.body6.i.outer:                                ; preds = %entry, %lor.end.i
+  %.ph = phi i32 [ %add.i, %lor.end.i ], [ 0, %entry ]
+  %or1512.i.ph = phi i32 [ %or15.i, %lor.end.i ], [ %f.promoted.i, %entry ]
+  %or1410.i.ph = phi i32 [ %or14.i, %lor.end.i ], [ %.promoted.i, %entry ]
+  %p.addr.16.i.ph = phi i8 [ %inc10.i, %lor.end.i ], [ -128, %entry ]
+  br i1 %tobool7.i, label %if.end9.i, label %lbl.loopexit.i
+
+lbl.loopexit.i:                                   ; preds = %for.body6.i.outer, %lbl.loopexit.i
+  br label %lbl.loopexit.i
+
+if.end9.i:                                        ; preds = %for.body6.i.outer
+  %inc10.i = add i8 %p.addr.16.i.ph, 1
+  %tobool12.i = icmp eq i8 %p.addr.16.i.ph, 0
+  br i1 %tobool12.i, label %lor.rhs.i, label %lor.end.i
+
+lor.rhs.i:                                        ; preds = %if.end9.i
+  %1 = load i32* @b, align 4, !tbaa !7
+  %dec.i = add nsw i32 %1, -1
+  store i32 %dec.i, i32* @b, align 4, !tbaa !7
+  %tobool13.i = icmp ne i32 %1, 0
+  br label %lor.end.i
+
+lor.end.i:                                        ; preds = %lor.rhs.i, %if.end9.i
+  %2 = phi i1 [ true, %if.end9.i ], [ %tobool13.i, %lor.rhs.i ]
+  %lor.ext.i = zext i1 %2 to i32
+  %or14.i = or i32 %lor.ext.i, %or1410.i.ph
+  %or15.i = or i32 %or14.i, %or1512.i.ph
+  %add.i = add nsw i32 %.ph, 2
+  %cmp.i = icmp slt i32 %add.i, 21
+  br i1 %cmp.i, label %for.body6.i.outer, label %fn1.exit
+
+fn1.exit:                                         ; preds = %lor.end.i
+  store i32 0, i32* @g, align 4, !tbaa !7
+  store i32 %or14.i, i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6
+  store i32 %or15.i, i32* @f, align 4, !tbaa !7
+  store i32 %add.i, i32* getelementptr inbounds (%struct.anon* @e, i64 0, i32 1), align 4, !tbaa !8
+  store i32 0, i32* @h, align 4, !tbaa !7
+  %3 = load i32* @b, align 4, !tbaa !7
+  %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.5 "}
+!1 = metadata !{metadata !2, metadata !3, i64 0}
+!2 = metadata !{metadata !"", metadata !3, i64 0, metadata !3, i64 4, metadata !3, i64 8}
+!3 = metadata !{metadata !"int", metadata !4, i64 0}
+!4 = metadata !{metadata !"omnipotent char", metadata !5, i64 0}
+!5 = metadata !{metadata !"Simple C/C++ TBAA"}
+!6 = metadata !{metadata !2, metadata !3, i64 8}
+!7 = metadata !{metadata !3, metadata !3, i64 0}
+!8 = metadata !{metadata !2, metadata !3, i64 4}
diff --git a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
new file mode 100644
index 0000000..5fc5ed5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
@@ -0,0 +1,42 @@
+; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; We must not vectorize this loop. %add55 is not reduction. Its value is used
+; multiple times.
+
+; PR18526
+
+; CHECK: multiple_use_of_value
+; CHECK-NOT: <2 x i32>
+
+define void @multiple_use_of_value() {
+entry:
+  %n = alloca i32, align 4
+  %k7 = alloca i32, align 4
+  %nf = alloca i32, align 4
+  %0 = load i32* %k7, align 4
+  %.neg1 = sub i32 0, %0
+  %n.promoted = load i32* %n, align 4
+  %nf.promoted = load i32* %nf, align 4
+  br label %for.body
+
+for.body:
+  %inc107 = phi i32 [ undef, %entry ], [ %inc10, %for.body ]
+  %inc6 = phi i32 [ %nf.promoted, %entry ], [ undef, %for.body ]
+  %add55 = phi i32 [ %n.promoted, %entry ], [ %add5, %for.body ]
+  %.neg2 = sub i32 0, %inc6
+  %add.neg = add i32 0, %add55
+  %add4.neg = add i32 %add.neg, %.neg1
+  %sub = add i32 %add4.neg, %.neg2
+  %add5 = add i32 %sub, %add55
+  %inc10 = add i32 %inc107, 1
+  %cmp = icmp ult i32 %inc10, 61
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  %add5.lcssa = phi i32 [ %add5, %for.body ]
+  store i32 %add5.lcssa, i32* %n, align 4
+  ret void
+}