diff options
Diffstat (limited to 'test/Transforms/LoopVectorize/X86')
6 files changed, 109 insertions, 32 deletions
diff --git a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll index 6c92440..f4c07b4 100644 --- a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll +++ b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll @@ -1,5 +1,7 @@ ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -loop-vectorize -dce -instcombine -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + @B = common global [1024 x i32] zeroinitializer, align 16 @A = common global [1024 x i32] zeroinitializer, align 16 diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll new file mode 100644 index 0000000..47a5e7a --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK: @foo +;CHECK-NOT: <4 x i32> +;CHECK: ret void + +; Function Attrs: nounwind uwtable +define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 { +entry: + %cmp27 = icmp sgt i32 %m, 0 + br i1 %cmp27, label %for.body3.lr.ph.us, label %for.end15 + +for.end.us: ; preds = %for.body3.us + %arrayidx9.us = getelementptr inbounds i32* %b, i64 %indvars.iv33 + %0 = load i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3 + %add10.us = add nsw i32 %0, 3 + store i32 %add10.us, i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.next34 = add i64 %indvars.iv33, 1 + %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32 + %exitcond36 = icmp eq i32 %lftr.wideiv35, %m + br i1 %exitcond36, label %for.end15, label %for.body3.lr.ph.us, !llvm.loop.parallel !5 + +for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us + %indvars.iv29 = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next30, %for.body3.us ] + %1 = trunc i64 %indvars.iv29 to i32 + %add4.us = add i32 %add.us, %1 + %idxprom.us = sext i32 %add4.us to i64 + %arrayidx.us = getelementptr inbounds i32* %a, i64 %idxprom.us + %2 = load i32* %arrayidx.us, align 4, !llvm.mem.parallel_loop_access !3 + %add5.us = add nsw i32 %2, 1 + store i32 %add5.us, i32* %arrayidx7.us, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.next30 = add i64 %indvars.iv29, 1 + %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32 + %exitcond32 = icmp eq i32 %lftr.wideiv31, %m + br i1 %exitcond32, label %for.end.us, label %for.body3.us, !llvm.loop.parallel !4 + +for.body3.lr.ph.us: ; preds = %for.end.us, %entry + %indvars.iv33 = phi i64 [ %indvars.iv.next34, %for.end.us ], [ 0, %entry ] + %3 = trunc i64 %indvars.iv33 to i32 + %add.us = add i32 %3, %k + %arrayidx7.us = getelementptr inbounds i32* %a, i64 %indvars.iv33 + br label %for.body3.us + +for.end15: ; preds = %for.end.us, %entry + ret void +} + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!3 = metadata !{metadata !4, metadata !5} +!4 = metadata !{metadata !4} +!5 = metadata !{metadata !5} + diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll index 186fba8..8716cff 100644 --- a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll +++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll @@ -11,9 +11,9 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %add = fadd float %0, 1.000000e+00 - store float %add, float* %arrayidx, align 4, !tbaa !0 + store float %add, float* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 8 @@ -22,7 +22,3 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll index 452d0df..f904a8e 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll @@ -19,19 +19,19 @@ entry: for.body: ; preds = %for.body.for.body_crit_edge, %entry %indvars.iv.reload = load i64* %indvars.iv.reg2mem %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv.reload - %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv.reload - %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !3 %indvars.iv.next = add i64 %indvars.iv.reload, 1 ; A new store without the parallel metadata here: store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next.reload1 - %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3 + store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 @@ -46,7 +46,4 @@ for.end: ; preds = %for.body ret void } -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} !3 = metadata !{metadata !3} diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll index f648722..3f1a071 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll @@ -21,16 +21,16 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0 + %0 = load i32* %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !tbaa !0 + %1 = load i32* %arrayidx2, align 4 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0 + store i32 %0, i32* %arrayidx4, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !tbaa !0 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0 + %2 = load i32* %arrayidx6, align 4 + store i32 %2, i32* %arrayidx2, align 4 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 br i1 %exitcond, label %for.end, label %for.body @@ -51,18 +51,18 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 ; This store might have originated from inlining a function with a parallel ; loop. Refers to a list with the "original loop reference" (!4) also included. - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !5 + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !5 %indvars.iv.next = add i64 %indvars.iv, 1 %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3 + store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !3 @@ -84,18 +84,18 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 + %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 ; This refers to the loop marked with !7 which we are not in at the moment. ; It should prevent detecting as a parallel loop. - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !7 + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !7 %indvars.iv.next = add i64 %indvars.iv, 1 %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 + %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6 + store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !6 @@ -104,9 +104,6 @@ for.end: ; preds = %for.body ret void } -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} !3 = metadata !{metadata !3} !4 = metadata !{metadata !4} !5 = metadata !{metadata !3, metadata !4} diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll new file mode 100644 index 0000000..b66119f --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll @@ -0,0 +1,29 @@ +; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7.0" + +@x = common global [1024 x x86_fp80] zeroinitializer, align 16 + +;CHECK: @example +;CHECK-NOT: bitcast x86_fp80* {{%[^ ]+}} to <{{[2-9][0-9]*}} x x86_fp80>* +;CHECK: store +;CHECK: ret void + +define void @example() nounwind ssp uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %conv = sitofp i32 1 to x86_fp80 + %arrayidx = getelementptr inbounds [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv + store x86_fp80 %conv, x86_fp80* %arrayidx, align 16 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} |