diff options
Diffstat (limited to 'test/Transforms/LoopVectorize/X86')
-rw-r--r-- | test/Transforms/LoopVectorize/X86/avx1.ll | 49 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/X86/conversion-cost.ll | 48 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/X86/cost-model.ll | 38 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/X86/gcc-examples.ll | 62 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/X86/lit.local.cfg | 6 |
5 files changed, 203 insertions, 0 deletions
diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll new file mode 100644 index 0000000..a2d176a --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/avx1.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @read_mod_write_single_ptr +;CHECK: load <8 x float> +;CHECK: ret i32 +define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds float* %a, i64 %indvars.iv + %3 = load float* %2, align 4 + %4 = fmul float %3, 3.000000e+00 + store float %4, float* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} + + +;CHECK: @read_mod_i64 +;CHECK: load <8 x i64> +;CHECK: ret i32 +define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds i64* %a, i64 %indvars.iv + %3 = load i64* %2, align 4 + %4 = mul i64 %3, 3 + store i64 %4, i64* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll new file mode 100644 index 0000000..8f1bb54 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @conversion_cost1 +;CHECK: store <2 x i8> +;CHECK: ret +define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 3 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 3, %0 ] + %2 = trunc i64 %indvars.iv to i8 + %3 = getelementptr inbounds i8* %A, i64 %indvars.iv + store i8 %2, i8* %3, align 1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} + +;CHECK: @conversion_cost2 +;CHECK: <2 x float> +;CHECK: ret +define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 9 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] + %2 = add nsw i64 %indvars.iv, 3 + %3 = trunc i64 %2 to i32 + %4 = sitofp i32 %3 to float + %5 = getelementptr inbounds float* %B, i64 %indvars.iv + store float %4, float* %5, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll new file mode 100644 index 0000000..628f991 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@c = common global [2048 x i32] zeroinitializer, align 16 +@b = common global [2048 x i32] zeroinitializer, align 16 +@d = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 + +;CHECK: cost_model_1 +;CHECK: <4 x i32> +;CHECK: ret void +define void @cost_model_1() nounwind uwtable noinline ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = shl nsw i64 %indvars.iv, 1 + %arrayidx = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %0 + %1 = load i32* %arrayidx, align 8 + %idxprom1 = sext i32 %1 to i64 + %arrayidx2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %idxprom1 + %2 = load i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv + %3 = load i32* %arrayidx4, align 4 + %idxprom5 = sext i32 %3 to i64 + %arrayidx6 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %idxprom5 + store i32 %2, i32* %arrayidx6, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll new file mode 100644 index 0000000..574c529 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll @@ -0,0 +1,62 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 + +; Select VF = 8; +;CHECK: @example1 +;CHECK: load <8 x i32> +;CHECK: add nsw <8 x i32> +;CHECK: store <8 x i32> +;CHECK: ret void +define void @example1() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %6, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + + +; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive. +;CHECK: @example10b +;CHECK: load <4 x i16> +;CHECK: sext <4 x i16> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv + %3 = load i16* %2, align 2 + %4 = sext i16 %3 to i32 + %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv + store i32 %4, i32* %5, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %6, label %1 + +; <label>:6 ; preds = %1 + ret void +} + diff --git a/test/Transforms/LoopVectorize/X86/lit.local.cfg b/test/Transforms/LoopVectorize/X86/lit.local.cfg new file mode 100644 index 0000000..a8ad0f1 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + |