7 files changed, 240 insertions, 164 deletions
diff --git a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
index d754987..00e43a7 100644
--- a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
+++ b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -S | grep {alloca %T}
+; RUN: opt < %s -scalarrepl -S | grep {alloca %%T}
 
 %T = type { [80 x i8], i32, i32 }
 declare i32 @.callback_1(i8*)
diff --git a/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll b/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
new file mode 100644
index 0000000..f98f3e8
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+; PR10987
+
+; Make sure scalarrepl doesn't move a load across an invoke which could
+; modify the loaded value.
+; (The PHI could theoretically be transformed by splitting the critical
+; edge, but scalarrepl doesn't modify the CFG, at least at the moment.)
+
+declare void @extern_fn(i32*)
+declare i32 @extern_fn2(i32)
+declare i32 @__gcc_personality_v0(i32, i64, i8*, i8*)
+
+define void @odd_fn(i1) noinline {
+  %retptr1 = alloca i32
+  %retptr2 = alloca i32
+  br i1 %0, label %then, label %else
+
+then:                                             ; preds = %2
+  invoke void @extern_fn(i32* %retptr1)
+          to label %join unwind label %unwind
+
+else:                                             ; preds = %2
+  store i32 3, i32* %retptr2
+  br label %join
+
+join:                                             ; preds = %then, %else
+  %storemerge.in = phi i32* [ %retptr2, %else ], [ %retptr1, %then ]
+  %storemerge = load i32* %storemerge.in
+  %x3 = call i32 @extern_fn2(i32 %storemerge)
+  ret void
+
+unwind:                                           ; preds = %then
+  %info = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__gcc_personality_v0
+          cleanup
+  call void @extern_fn(i32* null)
+  unreachable
+}
+
+; CHECK: define void @odd_fn
+; CHECK: %storemerge.in = phi i32* [ %retptr2, %else ], [ %retptr1, %then ]
diff --git a/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll b/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
new file mode 100644
index 0000000..9e31231
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -S -scalarrepl | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.1"
+
+; CHECK: test
+; CHECK-NOT: alloca
+
+define void @test() nounwind {
+entry:
+  %a156286 = alloca [4 x <4 x float>], align 16
+  br i1 undef, label %cif_done, label %for_test158.preheader
+
+for_test158.preheader:                            ; preds = %entry
+  %a156286305 = bitcast [4 x <4 x float>]* %a156286 to i8*
+  call void @llvm.memset.p0i8.i64(i8* %a156286305, i8 -1, i64 64, i32 16, i1 false)
+  unreachable
+
+cif_done:                                         ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/lifetime.ll b/test/Transforms/ScalarRepl/lifetime.ll
new file mode 100644
index 0000000..3f558a1
--- /dev/null
+++ b/test/Transforms/ScalarRepl/lifetime.ll
@@ -0,0 +1,139 @@
+; RUN: opt -scalarrepl -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.lifetime.start(i64, i8*)
+declare void @llvm.lifetime.end(i64, i8*)
+
+%t1 = type {i32, i32, i32}
+
+define void @test1() {
+; CHECK: @test1
+  %A = alloca %t1
+  %A1 = getelementptr %t1* %A, i32 0, i32 0
+  %A2 = getelementptr %t1* %A, i32 0, i32 1
+  %A3 = getelementptr %t1* %A, i32 0, i32 2
+  %B = bitcast i32* %A1 to i8*
+  store i32 0, i32* %A1
+  call void @llvm.lifetime.start(i64 -1, i8* %B)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test2() {
+; CHECK: @test2
+  %A = alloca %t1
+  %A1 = getelementptr %t1* %A, i32 0, i32 0
+  %A2 = getelementptr %t1* %A, i32 0, i32 1
+  %A3 = getelementptr %t1* %A, i32 0, i32 2
+  %B = bitcast i32* %A2 to i8*
+  store i32 0, i32* %A2
+  call void @llvm.lifetime.start(i64 -1, i8* %B)
+  %C = load i32* %A2
+  ret void
+; CHECK: ret void
+}
+
+define void @test3() {
+; CHECK: @test3
+  %A = alloca %t1
+  %A1 = getelementptr %t1* %A, i32 0, i32 0
+  %A2 = getelementptr %t1* %A, i32 0, i32 1
+  %A3 = getelementptr %t1* %A, i32 0, i32 2
+  %B = bitcast i32* %A2 to i8*
+  store i32 0, i32* %A2
+  call void @llvm.lifetime.start(i64 6, i8* %B)
+  %C = load i32* %A2
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test4() {
+; CHECK: @test4
+  %A = alloca %t1
+  %A1 = getelementptr %t1* %A, i32 0, i32 0
+  %A2 = getelementptr %t1* %A, i32 0, i32 1
+  %A3 = getelementptr %t1* %A, i32 0, i32 2
+  %B = bitcast i32* %A2 to i8*
+  store i32 0, i32* %A2
+  call void @llvm.lifetime.start(i64 1, i8* %B)
+  %C = load i32* %A2
+  ret void
+; CHECK-NEXT: ret void
+}
+
+%t2 = type {i32, [4 x i8], i32}
+
+define void @test5() {
+; CHECK: @test5
+  %A = alloca %t2
+; CHECK: alloca{{.*}}i8
+; CHECK: alloca{{.*}}i8
+; CHECK: alloca{{.*}}i8
+
+  %A21 = getelementptr %t2* %A, i32 0, i32 1, i32 0
+  %A22 = getelementptr %t2* %A, i32 0, i32 1, i32 1
+  %A23 = getelementptr %t2* %A, i32 0, i32 1, i32 2
+  %A24 = getelementptr %t2* %A, i32 0, i32 1, i32 3
+; CHECK-NOT: store i8 1
+  store i8 1, i8* %A21
+  store i8 2, i8* %A22
+  store i8 3, i8* %A23
+  store i8 4, i8* %A24
+
+  %A1 = getelementptr %t2* %A, i32 0, i32 0
+  %A2 = getelementptr %t2* %A, i32 0, i32 1, i32 1
+  %A3 = getelementptr %t2* %A, i32 0, i32 2
+  store i8 0, i8* %A2
+  call void @llvm.lifetime.start(i64 5, i8* %A2)
+; CHECK: llvm.lifetime{{.*}}i64 1
+; CHECK: llvm.lifetime{{.*}}i64 1
+; CHECK: llvm.lifetime{{.*}}i64 1
+  %C = load i8* %A2
+  ret void
+}
+
+%t3 = type {[4 x i16], [4 x i8]}
+
+define void @test6() {
+; CHECK: @test6
+  %A = alloca %t3
+; CHECK: alloca i8
+; CHECK: alloca i8
+; CHECK: alloca i8
+
+  %A11 = getelementptr %t3* %A, i32 0, i32 0, i32 0
+  %A12 = getelementptr %t3* %A, i32 0, i32 0, i32 1
+  %A13 = getelementptr %t3* %A, i32 0, i32 0, i32 2
+  %A14 = getelementptr %t3* %A, i32 0, i32 0, i32 3
+  store i16 11, i16* %A11
+  store i16 12, i16* %A12
+  store i16 13, i16* %A13
+  store i16 14, i16* %A14
+; CHECK-NOT: store i16 11
+; CHECK-NOT: store i16 12
+; CHECK-NOT: store i16 13
+; CHECK-NOT: store i16 14
+
+  %A21 = getelementptr %t3* %A, i32 0, i32 1, i32 0
+  %A22 = getelementptr %t3* %A, i32 0, i32 1, i32 1
+  %A23 = getelementptr %t3* %A, i32 0, i32 1, i32 2
+  %A24 = getelementptr %t3* %A, i32 0, i32 1, i32 3
+  store i8 21, i8* %A21
+  store i8 22, i8* %A22
+  store i8 23, i8* %A23
+  store i8 24, i8* %A24
+; CHECK: store i8 21
+; CHECK: store i8 22
+; CHECK: store i8 23
+; CHECK-NOT: store i8 24
+
+  %B = bitcast i16* %A13 to i8*
+  call void @llvm.lifetime.start(i64 7, i8* %B)
+; CHECK: lifetime.start{{.*}}i64 1
+; CHECK: lifetime.start{{.*}}i64 1
+; CHECK: lifetime.start{{.*}}i64 1
+
+  ret void
+}
diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll
index c51ef10..5c82ae4 100644
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/test/Transforms/ScalarRepl/vector_promote.ll
@@ -86,7 +86,6 @@ define i32 @test5(float %X) {  ;; should turn into bitcast.
 ; CHECK-NEXT: ret i32
 }
 
-
 define i64 @test6(<2 x float> %X) {
 	%X_addr = alloca <2 x float>
         store <2 x float> %X, <2 x float>* %X_addr
@@ -98,168 +97,17 @@ define i64 @test6(<2 x float> %X) {
 ; CHECK: ret i64
 }
 
-define float @test7(<4 x float> %x) {
-	%a = alloca <4 x float>
-	store <4 x float> %x, <4 x float>* %a
-	%p = bitcast <4 x float>* %a to <2 x float>*
-	%b = load <2 x float>* %p
-	%q = getelementptr <4 x float>* %a, i32 0, i32 2
-	%c = load float* %q
-	ret float %c
-; CHECK: @test7
-; CHECK-NOT: alloca
-; CHECK: bitcast <4 x float> %x to <2 x double>
-; CHECK-NEXT: extractelement <2 x double>
-; CHECK-NEXT: bitcast double %tmp4 to <2 x float>
-; CHECK-NEXT: extractelement <4 x float>
-}
-
-define void @test8(<4 x float> %x, <2 x float> %y) {
-	%a = alloca <4 x float>
-	store <4 x float> %x, <4 x float>* %a
-	%p = bitcast <4 x float>* %a to <2 x float>*
-	store <2 x float> %y, <2 x float>* %p
-	ret void
-; CHECK: @test8
-; CHECK-NOT: alloca
-; CHECK: bitcast <4 x float> %x to <2 x double>
-; CHECK-NEXT: bitcast <2 x float> %y to double
-; CHECK-NEXT: insertelement <2 x double>
-; CHECK-NEXT: bitcast <2 x double> %tmp2 to <4 x float>
-}
+%struct.test7 = type { [6 x i32] }
 
-define i256 @test9(<4 x i256> %x) {
-	%a = alloca <4 x i256>
-	store <4 x i256> %x, <4 x i256>* %a
-	%p = bitcast <4 x i256>* %a to <2 x i256>*
-	%b = load <2 x i256>* %p
-	%q = getelementptr <4 x i256>* %a, i32 0, i32 2
-	%c = load i256* %q
-	ret i256 %c
-; CHECK: @test9
-; CHECK-NOT: alloca
-; CHECK: bitcast <4 x i256> %x to <2 x i512>
-; CHECK-NEXT: extractelement <2 x i512>
-; CHECK-NEXT: bitcast i512 %tmp4 to <2 x i256>
-; CHECK-NEXT: extractelement <4 x i256>
-}
-
-define void @test10(<4 x i256> %x, <2 x i256> %y) {
-	%a = alloca <4 x i256>
-	store <4 x i256> %x, <4 x i256>* %a
-	%p = bitcast <4 x i256>* %a to <2 x i256>*
-	store <2 x i256> %y, <2 x i256>* %p
-	ret void
-; CHECK: @test10
-; CHECK-NOT: alloca
-; CHECK: bitcast <4 x i256> %x to <2 x i512>
-; CHECK-NEXT: bitcast <2 x i256> %y to i512
-; CHECK-NEXT: insertelement <2 x i512>
-; CHECK-NEXT: bitcast <2 x i512> %tmp2 to <4 x i256>
-}
-
-%union.v = type { <2 x i64> }
-
-define void @test11(<2 x i64> %x) {
-  %a = alloca %union.v
-  %p = getelementptr inbounds %union.v* %a, i32 0, i32 0
-  store <2 x i64> %x, <2 x i64>* %p, align 16
-  %q = getelementptr inbounds %union.v* %a, i32 0, i32 0
-  %r = bitcast <2 x i64>* %q to <4 x float>*
-  %b = load <4 x float>* %r, align 16
-  ret void
-; CHECK: @test11
-; CHECK-NOT: alloca
-}
-
-define void @test12() {
-entry:
-  %a = alloca <64 x i8>, align 64
-  store <64 x i8> undef, <64 x i8>* %a, align 64
-  %p = bitcast <64 x i8>* %a to <16 x i8>*
-  %0 = load <16 x i8>* %p, align 64
-  store <16 x i8> undef, <16 x i8>* %p, align 64
-  %q = bitcast <16 x i8>* %p to <64 x i8>*
-  %1 = load <64 x i8>* %q, align 64
-  ret void
-; CHECK: @test12
-; CHECK-NOT: alloca
-; CHECK: extractelement <4 x i128>
-; CHECK: insertelement <4 x i128>
-}
-
-define float @test13(<4 x float> %x, <2 x i32> %y) {
-	%a = alloca <4 x float>
-	store <4 x float> %x, <4 x float>* %a
-	%p = bitcast <4 x float>* %a to <2 x float>*
-	%b = load <2 x float>* %p
-	%q = getelementptr <4 x float>* %a, i32 0, i32 2
-	%c = load float* %q
-	%r = bitcast <4 x float>* %a to <2 x i32>*
-	store <2 x i32> %y, <2 x i32>* %r
-	ret float %c
-; CHECK: @test13
-; CHECK-NOT: alloca
-; CHECK: bitcast <4 x float> %x to i128
-}
-
-define <3 x float> @test14(<3 x float> %x)  {
+define void @test7() {
 entry:
-  %x.addr = alloca <3 x float>, align 16
-  %r = alloca <3 x i32>, align 16
-  %extractVec = shufflevector <3 x float> %x, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
-  %storetmp = bitcast <3 x float>* %x.addr to <4 x float>*
-  store <4 x float> %extractVec, <4 x float>* %storetmp, align 16
-  %tmp = load <3 x float>* %x.addr, align 16
-  %cmp = fcmp une <3 x float> %tmp, zeroinitializer
-  %sext = sext <3 x i1> %cmp to <3 x i32>
-  %and = and <3 x i32> <i32 1065353216, i32 1065353216, i32 1065353216>, %sext
-  %extractVec1 = shufflevector <3 x i32> %and, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
-  %storetmp2 = bitcast <3 x i32>* %r to <4 x i32>*
-  store <4 x i32> %extractVec1, <4 x i32>* %storetmp2, align 16
-  %tmp3 = load <3 x i32>* %r, align 16
-  %0 = bitcast <3 x i32> %tmp3 to <3 x float>
-  %tmp4 = load <3 x float>* %x.addr, align 16
-  ret <3 x float> %tmp4
-; CHECK: @test14
-; CHECK-NOT: alloca
-; CHECK: shufflevector <4 x i32> %extractVec1, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-}
-
-define void @test15(<3 x i64>* sret %agg.result, <3 x i64> %x, <3 x i64> %min) {
-entry:
-  %x.addr = alloca <3 x i64>, align 32
-  %min.addr = alloca <3 x i64>, align 32
-  %extractVec = shufflevector <3 x i64> %x, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
-  %storetmp = bitcast <3 x i64>* %x.addr to <4 x i64>*
-  store <4 x i64> %extractVec, <4 x i64>* %storetmp, align 32
-  %extractVec1 = shufflevector <3 x i64> %min, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
-  %storetmp2 = bitcast <3 x i64>* %min.addr to <4 x i64>*
-  store <4 x i64> %extractVec1, <4 x i64>* %storetmp2, align 32
-  %tmp = load <3 x i64>* %x.addr
-  %tmp5 = extractelement <3 x i64> %tmp, i32 0
-  %tmp11 = insertelement <3 x i64> %tmp, i64 %tmp5, i32 0
-  store <3 x i64> %tmp11, <3 x i64>* %x.addr
-  %tmp30 = load <3 x i64>* %x.addr, align 32
-  store <3 x i64> %tmp30, <3 x i64>* %agg.result
+  %memtmp = alloca %struct.test7, align 16
+  %0 = bitcast %struct.test7* %memtmp to <4 x i32>*
+  store <4 x i32> zeroinitializer, <4 x i32>* %0, align 16
+  %1 = getelementptr inbounds %struct.test7* %memtmp, i64 0, i32 0, i64 5
+  store i32 0, i32* %1, align 4
   ret void
-; CHECK: @test15
-; CHECK-NOT: alloca
-; CHECK: shufflevector <4 x i64> %tmpV2, <4 x i64> undef, <3 x i32> <i32 0, i32 1, i32 2>
-}
-
-define <4 x float> @test16(<4 x float> %x, i64 %y0, i64 %y1) {
-entry:
-  %tmp8 = bitcast <4 x float> undef to <2 x double>
-  %tmp9 = bitcast i64 %y0 to double
-  %tmp10 = insertelement <2 x double> %tmp8, double %tmp9, i32 0
-  %tmp11 = bitcast <2 x double> %tmp10 to <4 x float>
-  %tmp3 = bitcast <4 x float> %tmp11 to <2 x double>
-  %tmp4 = bitcast i64 %y1 to double
-  %tmp5 = insertelement <2 x double> %tmp3, double %tmp4, i32 1
-  %tmp6 = bitcast <2 x double> %tmp5 to <4 x float>
-	ret <4 x float> %tmp6
-; CHECK: @test16
+; CHECK: @test7
 ; CHECK-NOT: alloca
-; CHECK: bitcast <4 x float> %tmp11 to <2 x double>
+; CHECK: and i192
 }
diff --git a/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll b/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll
new file mode 100644
index 0000000..c3fbdf5
--- /dev/null
+++ b/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll
@@ -0,0 +1,27 @@
+; RUN: opt -scalarrepl -S < %s | FileCheck %s
+; rdar://9786827
+
+; SROA should be able to handle the mixed types and eliminate the allocas here.
+
+; TODO: Currently it does this by falling back to integer "bags of bits".
+; With enough cleverness, it should be possible to convert between <3 x i32>
+; and <2 x i64> by using a combination of a bitcast and a shuffle.
+
+; CHECK: {
+; CHECK-NOT: alloca
+; CHECK: }
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+define <2 x i64> @foo() nounwind {
+entry:
+  %retval = alloca <3 x i32>, align 16
+  %z = alloca <4 x i32>, align 16
+  %tmp = load <4 x i32>* %z
+  %tmp1 = shufflevector <4 x i32> %tmp, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  store <3 x i32> %tmp1, <3 x i32>* %retval
+  %0 = bitcast <3 x i32>* %retval to <2 x i64>*
+  %1 = load <2 x i64>* %0, align 1
+  ret <2 x i64> %1
+}
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
index 3ff322e..ab276b0 100644
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -scalarrepl -S | grep {volatile load}
-; RUN: opt < %s -scalarrepl -S | grep {volatile store}
+; RUN: opt < %s -scalarrepl -S | grep {load volatile}
+; RUN: opt < %s -scalarrepl -S | grep {store volatile}
 
 define i32 @voltest(i32 %T) {
 	%A = alloca {i32, i32}