diff options
Diffstat (limited to 'test/Transforms/LoopVectorize')
29 files changed, 1525 insertions, 181 deletions
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll index 2dd7fe3..bab6300 100644 --- a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll +++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll @@ -15,7 +15,7 @@ entry: for.body: ; preds = %entry, %if.end %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ] %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0 + %0 = load i32* %arrayidx, align 4 %tobool = icmp eq i32 %0, 0 br i1 %tobool, label %if.end, label %if.then @@ -29,7 +29,7 @@ if.then: ; preds = %for.body if.end: ; preds = %for.body, %if.then %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ] - store i32 %z.0, i32* %arrayidx, align 4, !tbaa !0 + store i32 %z.0, i32* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %x @@ -38,7 +38,3 @@ if.end: ; preds = %for.body, %if.then for.end: ; preds = %if.end, %entry ret i32 undef } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll index 405582c..ae9f998 100644 --- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll +++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll @@ -24,7 +24,7 @@ entry: %3 = shl nsw i64 %indvars.iv, 2 %4 = getelementptr inbounds i8* %1, i64 %3 %5 = bitcast i8* %4 to float* - store float %value, float* %5, align 4, !tbaa !0 + store float %value, float* %5, align 4 %indvars.iv.next = add i64 %indvars.iv, %2 %6 = trunc i64 %indvars.iv.next to i32 %7 = icmp slt i32 %6, %_n @@ -43,7 +43,7 @@ entry: %0 = shl nsw i64 %indvars.iv, 2 %1 = getelementptr inbounds i8* bitcast (float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 16000) to i8*), i64 %0 %2 = bitcast i8* %1 to float* - store float -1.000000e+00, float* %2, align 4, !tbaa !0 + store float -1.000000e+00, float* %2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 16000 @@ -52,6 +52,3 @@ entry: "5": ; preds = %"3" ret i32 0 } - -!0 = metadata !{metadata !"alias set 7: float", metadata !1} -!1 = metadata !{metadata !1} diff --git a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll index 6c92440..f4c07b4 100644 --- a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll +++ b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll @@ -1,5 +1,7 @@ ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -loop-vectorize -dce -instcombine -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + @B = common global [1024 x i32] zeroinitializer, align 16 @A = common global [1024 x i32] zeroinitializer, align 16 diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll new file mode 100644 index 0000000..47a5e7a --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK: @foo +;CHECK-NOT: <4 x i32> +;CHECK: ret void + +; Function Attrs: nounwind uwtable +define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 { +entry: + %cmp27 = icmp sgt i32 %m, 0 + br i1 %cmp27, label %for.body3.lr.ph.us, label %for.end15 + +for.end.us: ; preds = %for.body3.us + %arrayidx9.us = getelementptr inbounds i32* %b, i64 %indvars.iv33 + %0 = load i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3 + %add10.us = add nsw i32 %0, 3 + store i32 %add10.us, i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.next34 = add i64 %indvars.iv33, 1 + %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32 + %exitcond36 = icmp eq i32 %lftr.wideiv35, %m + br i1 %exitcond36, label %for.end15, label %for.body3.lr.ph.us, !llvm.loop.parallel !5 + +for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us + %indvars.iv29 = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next30, %for.body3.us ] + %1 = trunc i64 %indvars.iv29 to i32 + %add4.us = add i32 %add.us, %1 + %idxprom.us = sext i32 %add4.us to i64 + %arrayidx.us = getelementptr inbounds i32* %a, i64 %idxprom.us + %2 = load i32* %arrayidx.us, align 4, !llvm.mem.parallel_loop_access !3 + %add5.us = add nsw i32 %2, 1 + store i32 %add5.us, i32* %arrayidx7.us, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.next30 = add i64 %indvars.iv29, 1 + %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32 + %exitcond32 = icmp eq i32 %lftr.wideiv31, %m + br i1 %exitcond32, label %for.end.us, label %for.body3.us, !llvm.loop.parallel !4 + +for.body3.lr.ph.us: ; preds = %for.end.us, %entry + %indvars.iv33 = phi i64 [ %indvars.iv.next34, %for.end.us ], [ 0, %entry ] + %3 = trunc i64 %indvars.iv33 to i32 + %add.us = add i32 %3, %k + %arrayidx7.us = getelementptr inbounds i32* %a, i64 %indvars.iv33 + br label %for.body3.us + +for.end15: ; preds = %for.end.us, %entry + ret void +} + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!3 = metadata !{metadata !4, metadata !5} +!4 = metadata !{metadata !4} +!5 = metadata !{metadata !5} + diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll index 186fba8..8716cff 100644 --- a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll +++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll @@ -11,9 +11,9 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %add = fadd float %0, 1.000000e+00 - store float %add, float* %arrayidx, align 4, !tbaa !0 + store float %add, float* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 8 @@ -22,7 +22,3 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll index 452d0df..f904a8e 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll @@ -19,19 +19,19 @@ entry: for.body: ; preds = %for.body.for.body_crit_edge, %entry %indvars.iv.reload = load i64* %indvars.iv.reg2mem %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv.reload - %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv.reload - %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !3 %indvars.iv.next = add i64 %indvars.iv.reload, 1 ; A new store without the parallel metadata here: store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next.reload1 - %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3 + store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 @@ -46,7 +46,4 @@ for.end: ; preds = %for.body ret void } -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} !3 = metadata !{metadata !3} diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll index f648722..3f1a071 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll @@ -21,16 +21,16 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0 + %0 = load i32* %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !tbaa !0 + %1 = load i32* %arrayidx2, align 4 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0 + store i32 %0, i32* %arrayidx4, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !tbaa !0 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0 + %2 = load i32* %arrayidx6, align 4 + store i32 %2, i32* %arrayidx2, align 4 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 br i1 %exitcond, label %for.end, label %for.body @@ -51,18 +51,18 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 ; This store might have originated from inlining a function with a parallel ; loop. Refers to a list with the "original loop reference" (!4) also included. - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !5 + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !5 %indvars.iv.next = add i64 %indvars.iv, 1 %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3 + store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !3 @@ -84,18 +84,18 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 + %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 ; This refers to the loop marked with !7 which we are not in at the moment. ; It should prevent detecting as a parallel loop. - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !7 + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !7 %indvars.iv.next = add i64 %indvars.iv, 1 %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 + %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6 + store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !6 @@ -104,9 +104,6 @@ for.end: ; preds = %for.body ret void } -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} !3 = metadata !{metadata !3} !4 = metadata !{metadata !4} !5 = metadata !{metadata !3, metadata !4} diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll new file mode 100644 index 0000000..b66119f --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll @@ -0,0 +1,29 @@ +; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7.0" + +@x = common global [1024 x x86_fp80] zeroinitializer, align 16 + +;CHECK: @example +;CHECK-NOT: bitcast x86_fp80* {{%[^ ]+}} to <{{[2-9][0-9]*}} x x86_fp80>* +;CHECK: store +;CHECK: ret void + +define void @example() nounwind ssp uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %conv = sitofp i32 1 to x86_fp80 + %arrayidx = getelementptr inbounds [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv + store x86_fp80 %conv, x86_fp80* %arrayidx, align 16 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll new file mode 100644 index 0000000..a14b92d --- /dev/null +++ b/test/Transforms/LoopVectorize/bsd_regex.ll @@ -0,0 +1,38 @@ +; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +;PR 15830. + +;CHECK: foo +; When scalarizing stores we need to preserve the original order. +; Make sure that we are extracting in the correct order (0101, and not 0011). +;CHECK: extractelement <2 x i64> {{.*}}, i32 0 +;CHECK: extractelement <2 x i64> {{.*}}, i32 1 +;CHECK: extractelement <2 x i64> {{.*}}, i32 0 +;CHECK: extractelement <2 x i64> {{.*}}, i32 1 +;CHECK: store +;CHECK: store +;CHECK: store +;CHECK: store +;CHECK: ret + +define i32 @foo(i32* nocapture %A) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = shl nsw i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds i32* %A, i64 %0 + store i32 4, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 10000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 undef +} + + diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll index 431e422..2648bbe 100644 --- a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll +++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll @@ -17,7 +17,7 @@ do.body: ; preds = %cond.end, %entry %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %cond.end ] %p.addr.0 = phi i16* [ %p, %entry ], [ %incdec.ptr, %cond.end ] %incdec.ptr = getelementptr inbounds i16* %p.addr.0, i64 -1 - %0 = load i16* %incdec.ptr, align 2, !tbaa !0 + %0 = load i16* %incdec.ptr, align 2 %conv = zext i16 %0 to i32 %cmp = icmp ult i32 %conv, %size br i1 %cmp, label %cond.end, label %cond.true @@ -29,7 +29,7 @@ cond.true: ; preds = %do.body cond.end: ; preds = %do.body, %cond.true %cond = phi i16 [ %phitmp, %cond.true ], [ 0, %do.body ] - store i16 %cond, i16* %incdec.ptr, align 2, !tbaa !0 + store i16 %cond, i16* %incdec.ptr, align 2 %dec = add i32 %n.addr.0, -1 %tobool = icmp eq i32 %dec, 0 br i1 %tobool, label %do.end, label %do.body @@ -52,11 +52,11 @@ do.body: ; preds = %do.body, %entry %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] %p.0 = phi i32* [ %a, %entry ], [ %incdec.ptr, %do.body ] %incdec.ptr = getelementptr inbounds i32* %p.0, i64 -1 - %0 = load i32* %incdec.ptr, align 4, !tbaa !3 + %0 = load i32* %incdec.ptr, align 4 %cmp = icmp slt i32 %0, %wsize %sub = sub nsw i32 %0, %wsize %cond = select i1 %cmp, i32 0, i32 %sub - store i32 %cond, i32* %incdec.ptr, align 4, !tbaa !3 + store i32 %cond, i32* %incdec.ptr, align 4 %dec = add nsw i32 %n.addr.0, -1 %tobool = icmp eq i32 %dec, 0 br i1 %tobool, label %do.end, label %do.body @@ -64,8 +64,3 @@ do.body: ; preds = %do.body, %entry do.end: ; preds = %do.body ret void } - -!0 = metadata !{metadata !"short", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} -!3 = metadata !{metadata !"int", metadata !1} diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll index 08c84ef..7e79916 100644 --- a/test/Transforms/LoopVectorize/calloc.ll +++ b/test/Transforms/LoopVectorize/calloc.ll @@ -23,7 +23,7 @@ for.body: ; preds = %for.body, %for.body %i.030 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] %shr = lshr i64 %i.030, 1 %arrayidx = getelementptr inbounds i8* %bytes, i64 %shr - %1 = load i8* %arrayidx, align 1, !tbaa !0 + %1 = load i8* %arrayidx, align 1 %conv = zext i8 %1 to i32 %and = shl i64 %i.030, 2 %neg = and i64 %and, 4 @@ -38,7 +38,7 @@ for.body: ; preds = %for.body, %for.body %add17 = add nsw i32 %cond, %shr11 %conv18 = trunc i32 %add17 to i8 %arrayidx19 = getelementptr inbounds i8* %call, i64 %i.030 - store i8 %conv18, i8* %arrayidx19, align 1, !tbaa !0 + store i8 %conv18, i8* %arrayidx19, align 1 %inc = add i64 %i.030, 1 %exitcond = icmp eq i64 %inc, %0 br i1 %exitcond, label %for.end, label %for.body @@ -48,6 +48,3 @@ for.end: ; preds = %for.body, %entry } declare noalias i8* @calloc(i64, i64) nounwind - -!0 = metadata !{metadata !"omnipotent char", metadata !1} -!1 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll index a2ea951..127d479 100644 --- a/test/Transforms/LoopVectorize/dbg.value.ll +++ b/test/Transforms/LoopVectorize/dbg.value.ll @@ -18,12 +18,12 @@ for.body: ;CHECK: load <4 x i32> %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !19 - %0 = load i32* %arrayidx, align 4, !dbg !19, !tbaa !21 + %0 = load i32* %arrayidx, align 4, !dbg !19 %arrayidx2 = getelementptr inbounds [1024 x i32]* @C, i64 0, i64 %indvars.iv, !dbg !19 - %1 = load i32* %arrayidx2, align 4, !dbg !19, !tbaa !21 + %1 = load i32* %arrayidx2, align 4, !dbg !19 %add = add nsw i32 %1, %0, !dbg !19 %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19 - store i32 %add, i32* %arrayidx4, align 4, !dbg !19, !tbaa !21 + store i32 %add, i32* %arrayidx4, align 4, !dbg !19 %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18 tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18 @@ -64,7 +64,4 @@ attributes #1 = { nounwind readnone } !18 = metadata !{i32 6, i32 0, metadata !10, null} !19 = metadata !{i32 7, i32 0, metadata !20, null} !20 = metadata !{i32 786443, metadata !10, i32 6, i32 0, metadata !4, i32 1} -!21 = metadata !{metadata !"int", metadata !22} -!22 = metadata !{metadata !"omnipotent char", metadata !23} -!23 = metadata !{metadata !"Simple C/C++ TBAA"} !24 = metadata !{i32 9, i32 0, metadata !3, null} diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll index 565684c..54ca172 100644 --- a/test/Transforms/LoopVectorize/float-reduction.ll +++ b/test/Transforms/LoopVectorize/float-reduction.ll @@ -13,7 +13,7 @@ for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %sum.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %add = fadd fast float %sum.04, %0 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 @@ -23,7 +23,3 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret float %add } - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll index 7759b70..2a0e826 100644 --- a/test/Transforms/LoopVectorize/i8-induction.ll +++ b/test/Transforms/LoopVectorize/i8-induction.ll @@ -8,8 +8,8 @@ target triple = "x86_64-apple-macosx10.8.0" define void @f() nounwind uwtable ssp { scalar.ph: - store i8 0, i8* inttoptr (i64 1 to i8*), align 1, !tbaa !0 - %0 = load i8* @a, align 1, !tbaa !0 + store i8 0, i8* inttoptr (i64 1 to i8*), align 1 + %0 = load i8* @a, align 1 br label %for.body for.body: @@ -26,10 +26,6 @@ for.body: br i1 %phitmp14, label %for.body, label %for.end for.end: ; preds = %for.body - store i8 %mul, i8* @b, align 1, !tbaa !0 + store i8 %mul, i8* @b, align 1 ret void } - -!0 = metadata !{metadata !"omnipotent char", metadata !1} -!1 = metadata !{metadata !"Simple C/C++ TBAA"} - diff --git a/test/Transforms/LoopVectorize/if-conversion-nest.ll b/test/Transforms/LoopVectorize/if-conversion-nest.ll new file mode 100644 index 0000000..f44862a --- /dev/null +++ b/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +;CHECK: @foo +;CHECK: icmp sgt +;CHECK: icmp sgt +;CHECK: icmp slt +;CHECK: select <4 x i1> +;CHECK: %[[P1:.*]] = select <4 x i1> +;CHECK: xor <4 x i1> +;CHECK: and <4 x i1> +;CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %[[P1]] +;CHECK: ret +define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) { +entry: + %cmp26 = icmp sgt i32 %n, 0 + br i1 %cmp26, label %for.body, label %for.end + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %if.end14 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv + %1 = load i32* %arrayidx2, align 4 + %cmp3 = icmp sgt i32 %0, %1 + br i1 %cmp3, label %if.then, label %if.end14 + +if.then: + %cmp6 = icmp sgt i32 %0, 19 + br i1 %cmp6, label %if.end14, label %if.else + +if.else: + %cmp10 = icmp slt i32 %1, 4 + %. = select i1 %cmp10, i32 4, i32 5 + br label %if.end14 + +if.end14: + %x.0 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %., %if.else ] ; <------------- A PHI with 3 entries that we can still vectorize. + store i32 %x.0, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 undef +} diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll index e79d78d..defbb5b 100644 --- a/test/Transforms/LoopVectorize/intrinsic.ll +++ b/test/Transforms/LoopVectorize/intrinsic.ll @@ -14,10 +14,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -40,10 +40,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -66,10 +66,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.sin.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -92,10 +92,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.sin.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -118,10 +118,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.cos.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -144,10 +144,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.cos.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -170,10 +170,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.exp.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -196,10 +196,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.exp.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -222,10 +222,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -248,10 +248,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -274,10 +274,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.log.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -300,10 +300,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.log.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -326,10 +326,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.log10.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -352,10 +352,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.log10.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -378,10 +378,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.log2.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -404,10 +404,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.log2.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -430,10 +430,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -453,10 +453,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.fabs(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -479,10 +479,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.floor.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -505,10 +505,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.floor.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -531,10 +531,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -557,10 +557,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -583,10 +583,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -609,10 +609,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -635,10 +635,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.rint.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -661,10 +661,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.rint.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -687,10 +687,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -713,10 +713,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -739,14 +739,14 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv - %1 = load float* %arrayidx2, align 4, !tbaa !0 + %1 = load float* %arrayidx2, align 4 %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv - %2 = load float* %arrayidx4, align 4, !tbaa !0 + %2 = load float* %arrayidx4, align 4 %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1) %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %3, float* %arrayidx6, align 4, !tbaa !0 + store float %3, float* %arrayidx6, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -769,14 +769,14 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv - %1 = load double* %arrayidx2, align 8, !tbaa !3 + %1 = load double* %arrayidx2, align 8 %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv - %2 = load double* %arrayidx4, align 8, !tbaa !3 + %2 = load double* %arrayidx4, align 8 %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1) %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %3, double* %arrayidx6, align 8, !tbaa !3 + store double %3, double* %arrayidx6, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -799,14 +799,14 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv - %1 = load float* %arrayidx2, align 4, !tbaa !0 + %1 = load float* %arrayidx2, align 4 %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv - %2 = load float* %arrayidx4, align 4, !tbaa !0 + %2 = load float* %arrayidx4, align 4 %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1) %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %3, float* %arrayidx6, align 4, !tbaa !0 + store float %3, float* %arrayidx6, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -829,14 +829,14 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv - %1 = load double* %arrayidx2, align 8, !tbaa !3 + %1 = load double* %arrayidx2, align 8 %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv - %2 = load double* %arrayidx4, align 8, !tbaa !3 + %2 = load double* %arrayidx4, align 8 %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1) %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %3, double* %arrayidx6, align 8, !tbaa !3 + store double %3, double* %arrayidx6, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -859,12 +859,12 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv - %1 = load float* %arrayidx2, align 4, !tbaa !0 + %1 = load float* %arrayidx2, align 4 %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx4, align 4, !tbaa !0 + store float %call, float* %arrayidx4, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -887,12 +887,12 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %arrayidx2 = getelementptr inbounds double* %z, i64 %indvars.iv - %1 = load double* %arrayidx2, align 8, !tbaa !3 + %1 = load double* %arrayidx2, align 8 %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx4, align 8, !tbaa !3 + store double %call, double* %arrayidx4, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -927,9 +927,3 @@ for.end: ; preds = %for.body declare float @fabsf(float) nounwind readnone declare double @llvm.pow.f64(double, double) nounwind readnone - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} -!3 = metadata !{metadata !"double", metadata !1} -!4 = metadata !{metadata !"int", metadata !1} diff --git a/test/Transforms/LoopVectorize/lcssa-crash.ll b/test/Transforms/LoopVectorize/lcssa-crash.ll index 06b3b08..de6be54 100644 --- a/test/Transforms/LoopVectorize/lcssa-crash.ll +++ b/test/Transforms/LoopVectorize/lcssa-crash.ll @@ -27,3 +27,14 @@ for.end.i.i.i: unreachable } +; PR16139 +define void @test2(i8* %x) { +entry: + indirectbr i8* %x, [ label %L0, label %L1 ] + +L0: + br label %L0 + +L1: + ret void +} diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll new file mode 100644 index 0000000..502fd8b --- /dev/null +++ b/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -0,0 +1,885 @@ +; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@A = common global [1024 x i32] zeroinitializer, align 16 +@fA = common global [1024 x float] zeroinitializer, align 16 +@dA = common global [1024 x double] zeroinitializer, align 16 + +; Signed tests. + +; Turn this into a max reduction. Make sure we use a splat to initialize the +; vector for the reduction. +; CHECK: @max_red +; CHECK: %[[VAR:.*]] = insertelement <2 x i32> undef, i32 %max, i32 0 +; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK: icmp sgt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp sgt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @max_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp sgt i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a max reduction. The select has its inputs reversed therefore +; this is a max reduction. +; CHECK: @max_red_inverse_select +; CHECK: icmp slt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp sgt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @max_red_inverse_select(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp slt i32 %max.red.08, %0 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a min reduction. +; CHECK: @min_red +; CHECK: icmp slt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp slt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @min_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp slt i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a min reduction. The select has its inputs reversed therefore +; this is a min reduction. +; CHECK: @min_red_inverse_select +; CHECK: icmp sgt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp slt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @min_red_inverse_select(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp sgt i32 %max.red.08, %0 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Unsigned tests. + +; Turn this into a max reduction. +; CHECK: @umax_red +; CHECK: icmp ugt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ugt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @umax_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp ugt i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a max reduction. The select has its inputs reversed therefore +; this is a max reduction. +; CHECK: @umax_red_inverse_select +; CHECK: icmp ult <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ugt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @umax_red_inverse_select(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp ult i32 %max.red.08, %0 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a min reduction. +; CHECK: @umin_red +; CHECK: icmp ult <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ult <2 x i32> +; CHECK: select <2 x i1> + +define i32 @umin_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp ult i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a min reduction. The select has its inputs reversed therefore +; this is a min reduction. +; CHECK: @umin_red_inverse_select +; CHECK: icmp ugt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ult <2 x i32> +; CHECK: select <2 x i1> + +define i32 @umin_red_inverse_select(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp ugt i32 %max.red.08, %0 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; SGE -> SLT +; Turn this into a min reduction (select inputs are reversed). +; CHECK: @sge_min_red +; CHECK: icmp sge <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp slt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @sge_min_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp sge i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; SLE -> SGT +; Turn this into a max reduction (select inputs are reversed). +; CHECK: @sle_min_red +; CHECK: icmp sle <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp sgt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @sle_min_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp sle i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; UGE -> ULT +; Turn this into a min reduction (select inputs are reversed). +; CHECK: @uge_min_red +; CHECK: icmp uge <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ult <2 x i32> +; CHECK: select <2 x i1> + +define i32 @uge_min_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp uge i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; ULE -> UGT +; Turn this into a max reduction (select inputs are reversed). +; CHECK: @ule_min_red +; CHECK: icmp ule <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ugt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @ule_min_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp ule i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; No reduction. +; CHECK: @no_red_1 +; CHECK-NOT: icmp <2 x i32> +define i32 @no_red_1(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %1 = load i32* %arrayidx1, align 4 + %cmp3 = icmp sgt i32 %0, %1 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; CHECK: @no_red_2 +; CHECK-NOT: icmp <2 x i32> +define i32 @no_red_2(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %1 = load i32* %arrayidx1, align 4 + %cmp3 = icmp sgt i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Float tests. + +; Maximum. + +; Turn this into a max reduction in the presence of a no-nans-fp-math attribute. +; CHECK: @max_red_float +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @max_red_float(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ogt float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @max_red_float_ge +; CHECK: fcmp oge <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @max_red_float_ge(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp oge float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @inverted_max_red_float +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_max_red_float(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp olt float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @inverted_max_red_float_le +; CHECK: fcmp ole <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_max_red_float_le(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ole float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @unordered_max_red +; CHECK: fcmp ugt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @unordered_max_red_float(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ugt float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @unordered_max_red_float_ge +; CHECK: fcmp uge <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @unordered_max_red_float_ge(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp uge float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @inverted_unordered_max_red +; CHECK: fcmp ult <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_unordered_max_red_float(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ult float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @inverted_unordered_max_red_float_le +; CHECK: fcmp ule <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_unordered_max_red_float_le(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ule float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; Minimum. + +; Turn this into a min reduction in the presence of a no-nans-fp-math attribute. +; CHECK: @min_red_float +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @min_red_float(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp olt float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @min_red_float_le +; CHECK: fcmp ole <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @min_red_float_le(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ole float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @inverted_min_red_float +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_min_red_float(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ogt float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @inverted_min_red_float_ge +; CHECK: fcmp oge <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_min_red_float_ge(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp oge float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @unordered_min_red +; CHECK: fcmp ult <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @unordered_min_red_float(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ult float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @unordered_min_red_float_le +; CHECK: fcmp ule <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @unordered_min_red_float_le(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ule float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @inverted_unordered_min_red +; CHECK: fcmp ugt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_unordered_min_red_float(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ugt float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @inverted_unordered_min_red_float_ge +; CHECK: fcmp uge <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_unordered_min_red_float_ge(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp uge float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; Make sure we handle doubles, too. +; CHECK: @min_red_double +; CHECK: fcmp olt <2 x double> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x double> +; CHECK: select <2 x i1> + +define double @min_red_double(double %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x double]* @dA, i64 0, i64 %indvars.iv + %0 = load double* %arrayidx, align 4 + %cmp3 = fcmp olt double %0, %min.red.08 + %min.red.0 = select i1 %cmp3, double %0, double %min.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret double %min.red.0 +} + + +; Don't this into a max reduction. The no-nans-fp-math attribute is missing +; CHECK: @max_red_float_nans +; CHECK-NOT: <2 x float> + +define float @max_red_float_nans(float %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ogt float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + + +attributes #0 = { "no-nans-fp-math"="true" } diff --git a/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/test/Transforms/LoopVectorize/no_idiv_reduction.ll new file mode 100644 index 0000000..cdfb3fd --- /dev/null +++ b/test/Transforms/LoopVectorize/no_idiv_reduction.ll @@ -0,0 +1,24 @@ +; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s +@a = common global [128 x i32] zeroinitializer, align 16 + +;; Must not vectorize division reduction. Division is lossy. +define i32 @g() { +entry: + br label %for.body + +for.body: + ; CHECK: @g + ; CHECK-NOT: sdiv <2 x i32> + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %r.05 = phi i32 [ 80, %entry ], [ %div, %for.body ] + %arrayidx = getelementptr inbounds [128 x i32]* @a, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %div = sdiv i32 %r.05, %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %div +} diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll new file mode 100644 index 0000000..6f0357c --- /dev/null +++ b/test/Transforms/LoopVectorize/no_outside_user.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" + +@f = common global i32 0, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 +@c = common global i32 0, align 4 +@a = common global i32 0, align 4 +@b = common global i32 0, align 4 +@e = common global i32 0, align 4 + +; We used to vectorize this loop. But it has a value that is used outside of the +; and is not a recognized reduction variable "tmp17". + +; CHECK-NOT: <2 x i32> + +define i32 @main() { +bb: + %b.promoted = load i32* @b, align 4 + br label %.lr.ph.i + +.lr.ph.i: + %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ] + %tmp2 = icmp sgt i32 %tmp8, 10 + br i1 %tmp2, label %bb16, label %bb10 + +bb10: + br label %bb16 + +bb16: + %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ] + %tmp18 = add nsw i32 %tmp8, 1 + %tmp19 = icmp slt i32 %tmp18, 4 + br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit + +f1.exit.loopexit: + %.lcssa = phi i32 [ %tmp17, %bb16 ] + ret i32 %.lcssa +} + + diff --git a/test/Transforms/LoopVectorize/phi-hang.ll b/test/Transforms/LoopVectorize/phi-hang.ll index b80d459..bbce239 100644 --- a/test/Transforms/LoopVectorize/phi-hang.ll +++ b/test/Transforms/LoopVectorize/phi-hang.ll @@ -27,3 +27,21 @@ bb5: ; preds = %bb4, %bb1 bb11: ; preds = %bb5 ret void } + +; PR15748 +define void @test2() { +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp = phi i32 [ 0, %bb ], [ %tmp5, %bb1 ] + %tmp2 = phi i32 [ 0, %bb ], [ 1, %bb1 ] + %tmp3 = phi i32 [ 0, %bb ], [ %tmp4, %bb1 ] + %tmp4 = or i32 %tmp2, %tmp3 + %tmp5 = add nsw i32 %tmp, 1 + %tmp6 = icmp eq i32 %tmp5, 0 + br i1 %tmp6, label %bb7, label %bb1 + +bb7: ; preds = %bb1 + ret void +} diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll new file mode 100644 index 0000000..f43f02b --- /dev/null +++ b/test/Transforms/LoopVectorize/reverse_induction.ll @@ -0,0 +1,79 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=4 -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Make sure consecutive vector generates correct negative indices. +; PR15882 + +; CHECK: reverse_induction_i64 +; CHECK: add <4 x i64> %[[SPLAT:.*]], <i64 0, i64 -1, i64 -2, i64 -3> +; CHECK: add <4 x i64> %[[SPLAT]], <i64 -4, i64 -5, i64 -6, i64 -7> + +define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) { +entry: + br label %for.body + +for.body: + %add.i7 = phi i64 [ %startval, %entry ], [ %add.i, %for.body ] + %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ] + %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ] + %add.i = add i64 %add.i7, -1 + %kind_.i = getelementptr inbounds i32* %ptr, i64 %add.i + %tmp.i1 = load i32* %kind_.i, align 4 + %inc.redux = add i32 %tmp.i1, %redux5 + %inc4 = add i32 %i.06, 1 + %exitcond = icmp ne i32 %inc4, 1024 + br i1 %exitcond, label %for.body, label %loopend + +loopend: + ret i32 %inc.redux +} + +; CHECK: reverse_induction_i128 +; CHECK: add <4 x i128> %[[SPLAT:.*]], <i128 0, i128 -1, i128 -2, i128 -3> +; CHECK: add <4 x i128> %[[SPLAT]], <i128 -4, i128 -5, i128 -6, i128 -7> +define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) { +entry: + br label %for.body + +for.body: + %add.i7 = phi i128 [ %startval, %entry ], [ %add.i, %for.body ] + %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ] + %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ] + %add.i = add i128 %add.i7, -1 + %kind_.i = getelementptr inbounds i32* %ptr, i128 %add.i + %tmp.i1 = load i32* %kind_.i, align 4 + %inc.redux = add i32 %tmp.i1, %redux5 + %inc4 = add i32 %i.06, 1 + %exitcond = icmp ne i32 %inc4, 1024 + br i1 %exitcond, label %for.body, label %loopend + +loopend: + ret i32 %inc.redux +} + +; CHECK: reverse_induction_i16 +; CHECK: add <4 x i16> %[[SPLAT:.*]], <i16 0, i16 -1, i16 -2, i16 -3> +; CHECK: add <4 x i16> %[[SPLAT]], <i16 -4, i16 -5, i16 -6, i16 -7> + +define i32 @reverse_induction_i16(i16 %startval, i32 * %ptr) { +entry: + br label %for.body + +for.body: + %add.i7 = phi i16 [ %startval, %entry ], [ %add.i, %for.body ] + %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ] + %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ] + %add.i = add i16 %add.i7, -1 + %kind_.i = getelementptr inbounds i32* %ptr, i16 %add.i + %tmp.i1 = load i32* %kind_.i, align 4 + %inc.redux = add i32 %tmp.i1, %redux5 + %inc4 = add i32 %i.06, 1 + %exitcond = icmp ne i32 %inc4, 1024 + br i1 %exitcond, label %for.body, label %loopend + +loopend: + ret i32 %inc.redux +} + + diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll new file mode 100644 index 0000000..4145d13 --- /dev/null +++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: add_ints +;CHECK: br +;CHECK: getelementptr +;CHECK-NEXT: getelementptr +;CHECK-NEXT: icmp uge +;CHECK-NEXT: icmp uge +;CHECK-NEXT: icmp uge +;CHECK-NEXT: icmp uge +;CHECK-NEXT: and +;CHECK: ret +define void @add_ints(i32* nocapture %A, i32* nocapture %B, i32* nocapture %C) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32* %C, i64 %indvars.iv + %1 = load i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 %add, i32* %arrayidx4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll index 86098a6..014c4fc 100644 --- a/test/Transforms/LoopVectorize/runtime-check.ll +++ b/test/Transforms/LoopVectorize/runtime-check.ll @@ -22,10 +22,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %mul = fmul float %0, 3.000000e+00 %arrayidx2 = getelementptr inbounds float* %a, i64 %indvars.iv - store float %mul, float* %arrayidx2, align 4, !tbaa !0 + store float %mul, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -34,7 +34,3 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret i32 undef } - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll new file mode 100644 index 0000000..d783974 --- /dev/null +++ b/test/Transforms/LoopVectorize/runtime-limit.ll @@ -0,0 +1,84 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +; We are vectorizing with 6 runtime checks. +;CHECK: func1x6 +;CHECK: <4 x i32> +;CHECK: ret +define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.016 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i32* %A, i64 %i.016 + %0 = load i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %B, i64 %i.016 + %1 = load i32* %arrayidx1, align 4 + %add = add nsw i32 %1, %0 + %arrayidx2 = getelementptr inbounds i32* %C, i64 %i.016 + %2 = load i32* %arrayidx2, align 4 + %add3 = add nsw i32 %add, %2 + %arrayidx4 = getelementptr inbounds i32* %E, i64 %i.016 + %3 = load i32* %arrayidx4, align 4 + %add5 = add nsw i32 %add3, %3 + %arrayidx6 = getelementptr inbounds i32* %F, i64 %i.016 + %4 = load i32* %arrayidx6, align 4 + %add7 = add nsw i32 %add5, %4 + %arrayidx8 = getelementptr inbounds i32* %out, i64 %i.016 + store i32 %add7, i32* %arrayidx8, align 4 + %inc = add i64 %i.016, 1 + %exitcond = icmp eq i64 %inc, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 undef +} + +; We are not vectorizing with 12 runtime checks. +;CHECK: func2x6 +;CHECK-NOT: <4 x i32> +;CHECK: ret +define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.037 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i32* %A, i64 %i.037 + %0 = load i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %B, i64 %i.037 + %1 = load i32* %arrayidx1, align 4 + %add = add nsw i32 %1, %0 + %arrayidx2 = getelementptr inbounds i32* %C, i64 %i.037 + %2 = load i32* %arrayidx2, align 4 + %add3 = add nsw i32 %add, %2 + %arrayidx4 = getelementptr inbounds i32* %E, i64 %i.037 + %3 = load i32* %arrayidx4, align 4 + %add5 = add nsw i32 %add3, %3 + %arrayidx6 = getelementptr inbounds i32* %F, i64 %i.037 + %4 = load i32* %arrayidx6, align 4 + %add7 = add nsw i32 %add5, %4 + %arrayidx8 = getelementptr inbounds i32* %out, i64 %i.037 + store i32 %add7, i32* %arrayidx8, align 4 + %5 = load i32* %arrayidx, align 4 + %6 = load i32* %arrayidx1, align 4 + %add11 = add nsw i32 %6, %5 + %7 = load i32* %arrayidx2, align 4 + %add13 = add nsw i32 %add11, %7 + %8 = load i32* %arrayidx4, align 4 + %add15 = add nsw i32 %add13, %8 + %9 = load i32* %arrayidx6, align 4 + %add17 = add nsw i32 %add15, %9 + %arrayidx18 = getelementptr inbounds i32* %out2, i64 %i.037 + store i32 %add17, i32* %arrayidx18, align 4 + %inc = add i64 %i.037, 1 + %exitcond = icmp eq i64 %inc, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 undef +} + diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll index 998001c..e8a089a 100644 --- a/test/Transforms/LoopVectorize/start-non-zero.ll +++ b/test/Transforms/LoopVectorize/start-non-zero.ll @@ -18,9 +18,9 @@ for.body.lr.ph: ; preds = %entry for.body: ; preds = %for.body.lr.ph, %for.body %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx, align 4, !tbaa !0 + %1 = load i32* %arrayidx, align 4 %mul = mul nuw i32 %1, 333 - store i32 %mul, i32* %arrayidx, align 4, !tbaa !0 + store i32 %mul, i32* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %2 = trunc i64 %indvars.iv.next to i32 %cmp = icmp slt i32 %2, %end @@ -29,7 +29,3 @@ for.body: ; preds = %for.body.lr.ph, %fo for.end: ; preds = %for.body, %entry ret i32 4 } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll index de65d0d..573480d 100644 --- a/test/Transforms/LoopVectorize/struct_access.ll +++ b/test/Transforms/LoopVectorize/struct_access.ll @@ -33,7 +33,7 @@ for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] %x = getelementptr inbounds %struct.coordinate* %A, i64 %indvars.iv, i32 0 - %0 = load i32* %x, align 4, !tbaa !0 + %0 = load i32* %x, align 4 %add = add nsw i32 %0, %sum.05 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 @@ -44,7 +44,3 @@ for.end: ; preds = %for.body, %entry %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] ret i32 %sum.0.lcssa } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll new file mode 100644 index 0000000..f376656 --- /dev/null +++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll @@ -0,0 +1,50 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; PR16073 + +; Because we were caching value pointers accross a function call that could RAUW +; we would generate an undefined value store below: +; SCEVExpander::expandCodeFor would change a value (the start value of an +; induction) that we cached in the induction variable list. + +; CHECK: test_vh +; CHECK-NOT: store <4 x i8> undef + +define void @test_vh(i32* %ptr265, i32* %ptr266, i32 %sub267) { +entry: + br label %loop + +loop: + %inc = phi i32 [ %sub267, %entry ], [ %add, %loop] + %ext.inc = sext i32 %inc to i64 + %add.ptr265 = getelementptr inbounds i32* %ptr265, i64 %ext.inc + %add.ptr266 = getelementptr inbounds i32* %ptr266, i64 %ext.inc + %add = add i32 %inc, 9 + %cmp = icmp slt i32 %add, 140 + br i1 %cmp, label %block1, label %loop + +block1: + %sub267.lcssa = phi i32 [ %add, %loop ] + %add.ptr266.lcssa = phi i32* [ %add.ptr266, %loop ] + %add.ptr265.lcssa = phi i32* [ %add.ptr265, %loop ] + %tmp29 = bitcast i32* %add.ptr265.lcssa to i8* + %tmp30 = bitcast i32* %add.ptr266.lcssa to i8* + br label %do.body272 + +do.body272: + %row_width.5 = phi i32 [ %sub267.lcssa, %block1 ], [ %dec, %do.body272 ] + %sp.4 = phi i8* [ %tmp30, %block1 ], [ %incdec.ptr273, %do.body272 ] + %dp.addr.4 = phi i8* [ %tmp29, %block1 ], [ %incdec.ptr274, %do.body272 ] + %incdec.ptr273 = getelementptr inbounds i8* %sp.4, i64 1 + %tmp31 = load i8* %sp.4, align 1 + %incdec.ptr274 = getelementptr inbounds i8* %dp.addr.4, i64 1 + store i8 %tmp31, i8* %dp.addr.4, align 1 + %dec = add i32 %row_width.5, -1 + %cmp276 = icmp eq i32 %dec, 0 + br i1 %cmp276, label %loop.exit, label %do.body272 + +loop.exit: + ret void +} diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll index ac16948..f289ded 100644 --- a/test/Transforms/LoopVectorize/vectorize-once.ll +++ b/test/Transforms/LoopVectorize/vectorize-once.ll @@ -29,7 +29,7 @@ entry: for.body.i: ; preds = %entry, %for.body.i %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ] %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ] - %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0 + %0 = load i32* %__first.addr.04.i, align 4 %add.i = add nsw i32 %0, %__init.addr.05.i %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1 %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr @@ -55,7 +55,7 @@ entry: for.body.i: ; preds = %entry, %for.body.i %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ] %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ] - %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0 + %0 = load i32* %__first.addr.04.i, align 4 %add.i = add nsw i32 %0, %__init.addr.05.i %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1 %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr @@ -68,8 +68,5 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %for.body.i, %entry attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" } -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} !3 = metadata !{} |