3 files changed, 179 insertions, 89 deletions
diff --git a/test/Transforms/PhaseOrdering/PR6627.ll b/test/Transforms/PhaseOrdering/PR6627.ll
new file mode 100644
index 0000000..ef9947f
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/PR6627.ll
@@ -0,0 +1,93 @@
+; RUN: opt -O3 -S %s | FileCheck %s
+; XFAIL: *
+
+declare i32 @doo(...)
+
+; PR6627 - This whole nasty sequence should be flattened down to a single
+; 32-bit comparison.
+define void @test2(i8* %arrayidx) nounwind ssp {
+entry:
+  %xx = bitcast i8* %arrayidx to i32*
+  %x1 = load i32* %xx, align 4
+  %tmp = trunc i32 %x1 to i8
+  %conv = zext i8 %tmp to i32
+  %cmp = icmp eq i32 %conv, 127
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
+  %tmp5 = load i8* %arrayidx4, align 1
+  %conv6 = zext i8 %tmp5 to i32
+  %cmp7 = icmp eq i32 %conv6, 69
+  br i1 %cmp7, label %land.lhs.true9, label %if.end
+
+land.lhs.true9:                                   ; preds = %land.lhs.true
+  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
+  %tmp13 = load i8* %arrayidx12, align 1
+  %conv14 = zext i8 %tmp13 to i32
+  %cmp15 = icmp eq i32 %conv14, 76
+  br i1 %cmp15, label %land.lhs.true17, label %if.end
+
+land.lhs.true17:                                  ; preds = %land.lhs.true9
+  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
+  %tmp21 = load i8* %arrayidx20, align 1
+  %conv22 = zext i8 %tmp21 to i32
+  %cmp23 = icmp eq i32 %conv22, 70
+  br i1 %cmp23, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.lhs.true17
+  %call25 = call i32 (...)* @doo()
+  br label %if.end
+
+if.end:
+  ret void
+
+; CHECK: @test2
+; CHECK: %x1 = load i32* %xx, align 4
+; CHECK-NEXT: icmp eq i32 %x1, 1179403647
+; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
+}
+
+; PR6627 - This should all be flattened down to one compare.  This is the same
+; as test2, except that the initial load is done as an i8 instead of i32, thus
+; requiring widening.
+define void @test2a(i8* %arrayidx) nounwind ssp {
+entry:
+  %x1 = load i8* %arrayidx, align 4
+  %conv = zext i8 %x1 to i32
+  %cmp = icmp eq i32 %conv, 127
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
+  %tmp5 = load i8* %arrayidx4, align 1
+  %conv6 = zext i8 %tmp5 to i32
+  %cmp7 = icmp eq i32 %conv6, 69
+  br i1 %cmp7, label %land.lhs.true9, label %if.end
+
+land.lhs.true9:                                   ; preds = %land.lhs.true
+  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
+  %tmp13 = load i8* %arrayidx12, align 1
+  %conv14 = zext i8 %tmp13 to i32
+  %cmp15 = icmp eq i32 %conv14, 76
+  br i1 %cmp15, label %land.lhs.true17, label %if.end
+
+land.lhs.true17:                                  ; preds = %land.lhs.true9
+  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
+  %tmp21 = load i8* %arrayidx20, align 1
+  %conv22 = zext i8 %tmp21 to i32
+  %cmp23 = icmp eq i32 %conv22, 70
+  br i1 %cmp23, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.lhs.true17
+  %call25 = call i32 (...)* @doo()
+  br label %if.end
+
+if.end:
+  ret void
+
+; CHECK: @test2a
+; CHECK: %x1 = load i32* {{.*}}, align 4
+; CHECK-NEXT: icmp eq i32 %x1, 1179403647
+; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
+}
diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll
index e5b2ba4..88ebca0 100644
--- a/test/Transforms/PhaseOrdering/basic.ll
+++ b/test/Transforms/PhaseOrdering/basic.ll
@@ -1,5 +1,4 @@
 ; RUN: opt -O3 -S %s | FileCheck %s
-; XFAIL: *
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.6.7"
@@ -24,95 +23,29 @@ define void @test1() nounwind ssp {
 ; CHECK-NEXT: ret void
 }
 
-
-; PR6627 - This whole nasty sequence should be flattened down to a single
-; 32-bit comparison.
-define void @test2(i8* %arrayidx) nounwind ssp {
+; This function exposes a phase ordering problem when InstCombine is
+; turning %add into a bitmask, making it difficult to spot a 0 return value.
+;
+; It it also important that %add is expressed as a multiple of %div so scalar
+; evolution can recognize it.
+define i32 @test2(i32 %a, i32* %p) nounwind uwtable ssp {
 entry:
-  %xx = bitcast i8* %arrayidx to i32*
-  %x1 = load i32* %xx, align 4
-  %tmp = trunc i32 %x1 to i8
-  %conv = zext i8 %tmp to i32
-  %cmp = icmp eq i32 %conv, 127
-  br i1 %cmp, label %land.lhs.true, label %if.end
-
-land.lhs.true:                                    ; preds = %entry
-  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
-  %tmp5 = load i8* %arrayidx4, align 1
-  %conv6 = zext i8 %tmp5 to i32
-  %cmp7 = icmp eq i32 %conv6, 69
-  br i1 %cmp7, label %land.lhs.true9, label %if.end
-
-land.lhs.true9:                                   ; preds = %land.lhs.true
-  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
-  %tmp13 = load i8* %arrayidx12, align 1
-  %conv14 = zext i8 %tmp13 to i32
-  %cmp15 = icmp eq i32 %conv14, 76
-  br i1 %cmp15, label %land.lhs.true17, label %if.end
-
-land.lhs.true17:                                  ; preds = %land.lhs.true9
-  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
-  %tmp21 = load i8* %arrayidx20, align 1
-  %conv22 = zext i8 %tmp21 to i32
-  %cmp23 = icmp eq i32 %conv22, 70
-  br i1 %cmp23, label %if.then, label %if.end
-
-if.then:                                          ; preds = %land.lhs.true17
-  %call25 = call i32 (...)* @doo()
-  br label %if.end
-
-if.end:
-  ret void
+  %div = udiv i32 %a, 4
+  %arrayidx = getelementptr inbounds i32* %p, i64 0
+  store i32 %div, i32* %arrayidx, align 4
+  %add = add i32 %div, %div
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 1
+  store i32 %add, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32* %p, i64 1
+  %0 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %p, i64 0
+  %1 = load i32* %arrayidx3, align 4
+  %mul = mul i32 2, %1
+  %sub = sub i32 %0, %mul
+  ret i32 %sub
 
 ; CHECK: @test2
-; CHECK: %x1 = load i32* %xx, align 4
-; CHECK-NEXT: icmp eq i32 %x1, 1179403647
-; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
+; CHECK: %div = lshr i32 %a, 2
+; CHECK: %add = shl nuw nsw i32 %div, 1
+; CHECK: ret i32 0
 }
-
-declare i32 @doo(...)
-
-; PR6627 - This should all be flattened down to one compare.  This is the same
-; as test2, except that the initial load is done as an i8 instead of i32, thus
-; requiring widening.
-define void @test2a(i8* %arrayidx) nounwind ssp {
-entry:
-  %x1 = load i8* %arrayidx, align 4
-  %conv = zext i8 %x1 to i32
-  %cmp = icmp eq i32 %conv, 127
-  br i1 %cmp, label %land.lhs.true, label %if.end
-
-land.lhs.true:                                    ; preds = %entry
-  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
-  %tmp5 = load i8* %arrayidx4, align 1
-  %conv6 = zext i8 %tmp5 to i32
-  %cmp7 = icmp eq i32 %conv6, 69
-  br i1 %cmp7, label %land.lhs.true9, label %if.end
-
-land.lhs.true9:                                   ; preds = %land.lhs.true
-  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
-  %tmp13 = load i8* %arrayidx12, align 1
-  %conv14 = zext i8 %tmp13 to i32
-  %cmp15 = icmp eq i32 %conv14, 76
-  br i1 %cmp15, label %land.lhs.true17, label %if.end
-
-land.lhs.true17:                                  ; preds = %land.lhs.true9
-  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
-  %tmp21 = load i8* %arrayidx20, align 1
-  %conv22 = zext i8 %tmp21 to i32
-  %cmp23 = icmp eq i32 %conv22, 70
-  br i1 %cmp23, label %if.then, label %if.end
-
-if.then:                                          ; preds = %land.lhs.true17
-  %call25 = call i32 (...)* @doo()
-  br label %if.end
-
-if.end:
-  ret void
-
-; CHECK: @test2a
-; CHECK: %x1 = load i32* {{.*}}, align 4
-; CHECK-NEXT: icmp eq i32 %x1, 1179403647
-; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
-}
-
diff --git a/test/Transforms/PhaseOrdering/scev.ll b/test/Transforms/PhaseOrdering/scev.ll
new file mode 100644
index 0000000..c731280
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/scev.ll
@@ -0,0 +1,64 @@
+; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s
+;
+; This file contains phase ordering tests for scalar evolution.
+; Test that the standard passes don't obfuscate the IR so scalar evolution can't
+; recognize expressions.
+
+; CHECK: test1
+; The loop body contains two increments by %div.
+; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d.
+; CHECK: -->  {%p,+,(2 * (%d /u 4) * sizeof(i32))}
+define void @test1(i64 %d, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i64 %d, 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %p.addr.0, align 4
+  %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
+  store i32 1, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; CHECK: test1a
+; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2)
+; CHECK: -->  {%p,+,(2 * (%d /u 2) * sizeof(i32))}
+define void @test1a(i64 %d, i32* %p) nounwind uwtable ssp {
+entry:
+  %div = udiv i64 %d, 2
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %p.addr.0, align 4
+  %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
+  store i32 1, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}