diff options
Diffstat (limited to 'test/CodeGen/PTX')
-rw-r--r-- | test/CodeGen/PTX/20110926-sitofp.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/PTX/add.ll | 40 | ||||
-rw-r--r-- | test/CodeGen/PTX/aggregates.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/PTX/bitwise.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/PTX/bra.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/PTX/cvt.ll | 186 | ||||
-rw-r--r-- | test/CodeGen/PTX/fdiv-sm10.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/PTX/fdiv-sm13.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/PTX/fneg.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/PTX/intrinsic.ll | 134 | ||||
-rw-r--r-- | test/CodeGen/PTX/ld.ll | 255 | ||||
-rw-r--r-- | test/CodeGen/PTX/llvm-intrinsic.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/PTX/mad.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/PTX/mov.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/PTX/mul.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/PTX/parameter-order.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/PTX/selp.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/PTX/setp.ll | 136 | ||||
-rw-r--r-- | test/CodeGen/PTX/shl.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/PTX/shr.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/PTX/simple-call.ll | 27 | ||||
-rw-r--r-- | test/CodeGen/PTX/st.ll | 235 | ||||
-rw-r--r-- | test/CodeGen/PTX/stack-object.ll | 19 | ||||
-rw-r--r-- | test/CodeGen/PTX/sub.ll | 40 |
24 files changed, 612 insertions, 625 deletions
diff --git a/test/CodeGen/PTX/20110926-sitofp.ll b/test/CodeGen/PTX/20110926-sitofp.ll new file mode 100644 index 0000000..38d35c5 --- /dev/null +++ b/test/CodeGen/PTX/20110926-sitofp.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +@A = common global [1536 x [1536 x float]] zeroinitializer, align 4 +@B = common global [1536 x [1536 x float]] zeroinitializer, align 4 + +define internal ptx_device void @init_array(i32 %x, i32 %y) { + %arrayidx103 = getelementptr [1536 x [1536 x float]]* @A, i32 0, i32 %x, i32 %y + %arrayidx224 = getelementptr [1536 x [1536 x float]]* @B, i32 0, i32 %x, i32 %y + %mul5 = mul i32 %x, %y + %rem = srem i32 %mul5, 1024 + %add = add nsw i32 %rem, 1 +; CHECK: cvt.rn.f64.s32 %fd{{[0-9]+}}, %r{{[0-9]+}} + %conv = sitofp i32 %add to double + %div = fmul double %conv, 5.000000e-01 + %conv7 = fptrunc double %div to float + store float %conv7, float* %arrayidx103, align 4 + %rem14 = srem i32 %mul5, 1024 + %add15 = add nsw i32 %rem14, 1 + %conv16 = sitofp i32 %add15 to double + %div17 = fmul double %conv16, 5.000000e-01 + %conv18 = fptrunc double %div17 to float + store float %conv18, float* %arrayidx224, align 4 + ret void +} diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll index 293aebe..8b10d11 100644 --- a/test/CodeGen/PTX/add.ll +++ b/test/CodeGen/PTX/add.ll @@ -1,71 +1,71 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i16 @t1_u16(i16 %x, i16 %y) { -; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}}; +; CHECK: ret; %z = add i16 %x, %y ret i16 %z } define ptx_device i32 @t1_u32(i32 %x, i32 %y) { -; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: ret; %z = add i32 %x, %y ret i32 %z } define ptx_device i64 @t1_u64(i64 %x, i64 %y) { -; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}; +; CHECK: ret; %z = add i64 %x, %y ret i64 %z } define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} -; CHECK-NEXT: ret; +; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret; %z = fadd float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}} -; CHECK-NEXT: ret; +; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}} +; CHECK: ret; %z = fadd double %x, %y ret double %z } define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, 1; -; CHECK-NEXT: ret; +; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, 1; +; CHECK: ret; %z = add i16 %x, 1 ret i16 %z } define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, 1; -; CHECK-NEXT: ret; +; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 1; +; CHECK: ret; %z = add i32 %x, 1 ret i32 %z } define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, 1; -; CHECK-NEXT: ret; +; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, 1; +; CHECK: ret; %z = add i64 %x, 1 ret i64 %z } define ptx_device float @t2_f32(float %x) { -; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F3F800000; -; CHECK-NEXT: ret; +; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D3FF0000000000000; +; CHECK: ret; %z = fadd float %x, 1.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D3FF0000000000000; -; CHECK-NEXT: ret; +; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D3FF0000000000000; +; CHECK: ret; %z = fadd double %x, 1.0 ret double %z } diff --git a/test/CodeGen/PTX/aggregates.ll b/test/CodeGen/PTX/aggregates.ll index 23f28a7..3fc0c40 100644 --- a/test/CodeGen/PTX/aggregates.ll +++ b/test/CodeGen/PTX/aggregates.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s +; XFAIL: * %complex = type { float, float } diff --git a/test/CodeGen/PTX/bitwise.ll b/test/CodeGen/PTX/bitwise.ll index 3859280..1403a23 100644 --- a/test/CodeGen/PTX/bitwise.ll +++ b/test/CodeGen/PTX/bitwise.ll @@ -3,21 +3,21 @@ ; preds define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) { -; CHECK: and.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}} +; CHECK: and.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}} %c = and i1 %x, %y %d = zext i1 %c to i32 ret i32 %d } define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) { -; CHECK: or.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}} +; CHECK: or.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}} %a = or i1 %x, %y %b = zext i1 %a to i32 ret i32 %b } define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) { -; CHECK: xor.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}} +; CHECK: xor.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}} %a = xor i1 %x, %y %b = zext i1 %a to i32 ret i32 %b diff --git a/test/CodeGen/PTX/bra.ll b/test/CodeGen/PTX/bra.ll index 7cc9444..464c29c 100644 --- a/test/CodeGen/PTX/bra.ll +++ b/test/CodeGen/PTX/bra.ll @@ -10,15 +10,15 @@ loop: define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) { entry: -; CHECK: setp.le.u32 p0, r[[R0:[0-9]+]], r[[R1:[0-9]+]] +; CHECK: setp.le.u32 %p0, %r[[R0:[0-9]+]], %r[[R1:[0-9]+]] %p = icmp ugt i32 %x, %y -; CHECK-NEXT: @p0 bra +; CHECK-NEXT: @%p0 bra ; CHECK-NOT: bra br i1 %p, label %clause.if, label %clause.else clause.if: -; CHECK: mov.u32 r{{[0-9]+}}, r[[R0]] +; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R0]] ret i32 %x clause.else: -; CHECK: mov.u32 r{{[0-9]+}}, r[[R1]] +; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R1]] ret i32 %y } diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll index 853abaf..a643d25 100644 --- a/test/CodeGen/PTX/cvt.ll +++ b/test/CodeGen/PTX/cvt.ll @@ -1,13 +1,13 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s -; preds +; preds ; (note: we convert back to i32 to return) define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) { -; CHECK: setp.gt.u16 p[[P0:[0-9]+]], rh{{[0-9]+}}, 0 -; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rh{{[0-9]+}}, 0 +; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]]; +; CHECK: ret; %a = trunc i16 %x to i1 %b = and i1 %a, %y %c = zext i1 %b to i32 @@ -15,10 +15,10 @@ define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) { } define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) { -; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0 -; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0 +; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]]; +; CHECK: ret; %a = trunc i32 %x to i1 %b = and i1 %a, %y %c = zext i1 %b to i32 @@ -26,10 +26,10 @@ define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) { } define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) { -; CHECK: setp.gt.u64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0 -; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0 +; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]]; +; CHECK: ret; %a = trunc i64 %x to i1 %b = and i1 %a, %y %c = zext i1 %b to i32 @@ -37,10 +37,10 @@ define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) { } define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) { -; CHECK: setp.gt.f32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0 -; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0 +; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]]; +; CHECK: ret; %a = fptoui float %x to i1 %b = and i1 %a, %y %c = zext i1 %b to i32 @@ -48,10 +48,10 @@ define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) { } define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) { -; CHECK: setp.gt.f64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0 -; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0 +; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]]; +; CHECK: ret; %a = fptoui double %x to i1 %b = and i1 %a, %y %c = zext i1 %b to i32 @@ -61,36 +61,36 @@ define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) { ; i16 define ptx_device i16 @cvt_i16_preds(i1 %x) { -; CHECK: selp.u16 rh{{[0-9]+}}, 1, 0, p{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: selp.u16 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}}; +; CHECK: ret; %a = zext i1 %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_i32(i32 %x) { -; CHECK: cvt.u16.u32 rh{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.u16.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: ret; %a = trunc i32 %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_i64(i64 %x) { -; CHECK: cvt.u16.u64 rh{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.u16.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}; +; CHECK: ret; %a = trunc i64 %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_f32(float %x) { -; CHECK: cvt.rzi.u16.f32 rh{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rzi.u16.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %a = fptoui float %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_f64(double %x) { -; CHECK: cvt.rzi.u16.f64 rh{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rzi.u16.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; %a = fptoui double %x to i16 ret i16 %a } @@ -98,36 +98,36 @@ define ptx_device i16 @cvt_i16_f64(double %x) { ; i32 define ptx_device i32 @cvt_i32_preds(i1 %x) { -; CHECK: selp.u32 r{{[0-9]+}}, 1, 0, p{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}}; +; CHECK: ret; %a = zext i1 %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_i16(i16 %x) { -; CHECK: cvt.u32.u16 r{{[0-9]+}}, rh{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.u32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}; +; CHECK: ret; %a = zext i16 %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_i64(i64 %x) { -; CHECK: cvt.u32.u64 r{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.u32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}; +; CHECK: ret; %a = trunc i64 %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_f32(float %x) { -; CHECK: cvt.rzi.u32.f32 r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rzi.u32.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %a = fptoui float %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_f64(double %x) { -; CHECK: cvt.rzi.u32.f64 r{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rzi.u32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; %a = fptoui double %x to i32 ret i32 %a } @@ -135,35 +135,35 @@ define ptx_device i32 @cvt_i32_f64(double %x) { ; i64 define ptx_device i64 @cvt_i64_preds(i1 %x) { -; CHECK: selp.u64 rd{{[0-9]+}}, 1, 0, p{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: selp.u64 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}}; +; CHECK: ret; %a = zext i1 %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_i16(i16 %x) { -; CHECK: cvt.u64.u16 rd{{[0-9]+}}, rh{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.u64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}; +; CHECK: ret; %a = zext i16 %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_i32(i32 %x) { -; CHECK: cvt.u64.u32 rd{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.u64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: ret; %a = zext i32 %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_f32(float %x) { -; CHECK: cvt.rzi.u64.f32 rd{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rzi.u64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %a = fptoui float %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_f64(double %x) { -; CHECK: cvt.rzi.u64.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; +; CHECK: cvt.rzi.u64.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; ; CHECK: ret; %a = fptoui double %x to i64 ret i64 %a @@ -172,73 +172,119 @@ define ptx_device i64 @cvt_i64_f64(double %x) { ; f32 define ptx_device float @cvt_f32_preds(i1 %x) { -; CHECK: selp.f32 r{{[0-9]+}}, 0F3F800000, 0F00000000, p{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: mov.b32 %f0, 1065353216; +; CHECK: mov.b32 %f1, 0; +; CHECK: selp.f32 %ret{{[0-9]+}}, %f0, %f1, %p{{[0-9]+}}; +; CHECK: ret; %a = uitofp i1 %x to float ret float %a } define ptx_device float @cvt_f32_i16(i16 %x) { -; CHECK: cvt.rn.f32.u16 r{{[0-9]+}}, rh{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rn.f32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}; +; CHECK: ret; %a = uitofp i16 %x to float ret float %a } define ptx_device float @cvt_f32_i32(i32 %x) { -; CHECK: cvt.rn.f32.u32 r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rn.f32.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: ret; %a = uitofp i32 %x to float ret float %a } define ptx_device float @cvt_f32_i64(i64 %x) { -; CHECK: cvt.rn.f32.u64 r{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rn.f32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}; +; CHECK: ret; %a = uitofp i64 %x to float ret float %a } define ptx_device float @cvt_f32_f64(double %x) { -; CHECK: cvt.rn.f32.f64 r{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rn.f32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; %a = fptrunc double %x to float ret float %a } +define ptx_device float @cvt_f32_s16(i16 %x) { +; CHECK: cvt.rn.f32.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}} +; CHECK: ret + %a = sitofp i16 %x to float + ret float %a +} + +define ptx_device float @cvt_f32_s32(i32 %x) { +; CHECK: cvt.rn.f32.s32 %ret{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %a = sitofp i32 %x to float + ret float %a +} + +define ptx_device float @cvt_f32_s64(i64 %x) { +; CHECK: cvt.rn.f32.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}} +; CHECK: ret + %a = sitofp i64 %x to float + ret float %a +} + ; f64 define ptx_device double @cvt_f64_preds(i1 %x) { -; CHECK: selp.f64 rd{{[0-9]+}}, 0D3F80000000000000, 0D0000000000000000, p{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: mov.b64 %fd0, 4575657221408423936; +; CHECK: mov.b64 %fd1, 0; +; CHECK: selp.f64 %ret{{[0-9]+}}, %fd0, %fd1, %p{{[0-9]+}}; +; CHECK: ret; %a = uitofp i1 %x to double ret double %a } define ptx_device double @cvt_f64_i16(i16 %x) { -; CHECK: cvt.rn.f64.u16 rd{{[0-9]+}}, rh{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rn.f64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}; +; CHECK: ret; %a = uitofp i16 %x to double ret double %a } define ptx_device double @cvt_f64_i32(i32 %x) { -; CHECK: cvt.rn.f64.u32 rd{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rn.f64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: ret; %a = uitofp i32 %x to double ret double %a } define ptx_device double @cvt_f64_i64(i64 %x) { -; CHECK: cvt.rn.f64.u64 rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.rn.f64.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}; +; CHECK: ret; %a = uitofp i64 %x to double ret double %a } define ptx_device double @cvt_f64_f32(float %x) { -; CHECK: cvt.f64.f32 rd{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cvt.f64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %a = fpext float %x to double ret double %a } + +define ptx_device double @cvt_f64_s16(i16 %x) { +; CHECK: cvt.rn.f64.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}} +; CHECK: ret + %a = sitofp i16 %x to double + ret double %a +} + +define ptx_device double @cvt_f64_s32(i32 %x) { +; CHECK: cvt.rn.f64.s32 %ret{{[0-9]+}}, %r{{[0-9]+}} +; CHECK: ret + %a = sitofp i32 %x to double + ret double %a +} + +define ptx_device double @cvt_f64_s64(i64 %x) { +; CHECK: cvt.rn.f64.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}} +; CHECK: ret + %a = sitofp i64 %x to double + ret double %a +} diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll index 049d891..e1013be 100644 --- a/test/CodeGen/PTX/fdiv-sm10.ll +++ b/test/CodeGen/PTX/fdiv-sm10.ll @@ -1,15 +1,15 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: div.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: div.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %a = fdiv float %x, %y ret float %a } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: div.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: div.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; %a = fdiv double %x, %y ret double %a } diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll index 2d95339..1afa2eb 100644 --- a/test/CodeGen/PTX/fdiv-sm13.ll +++ b/test/CodeGen/PTX/fdiv-sm13.ll @@ -1,15 +1,15 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: div.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: div.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %a = fdiv float %x, %y ret float %a } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: div.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: div.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; %a = fdiv double %x, %y ret double %a } diff --git a/test/CodeGen/PTX/fneg.ll b/test/CodeGen/PTX/fneg.ll index 66ca74a..2b76e63 100644 --- a/test/CodeGen/PTX/fneg.ll +++ b/test/CodeGen/PTX/fneg.ll @@ -1,15 +1,15 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device float @t1_f32(float %x) { -; CHECK: neg.f32 r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: neg.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %y = fsub float -0.000000e+00, %x ret float %y } define ptx_device double @t1_f64(double %x) { -; CHECK: neg.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: neg.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; %y = fsub double -0.000000e+00, %x ret double %y } diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/PTX/intrinsic.ll index af987d6..9f37ead 100644 --- a/test/CodeGen/PTX/intrinsic.ll +++ b/test/CodeGen/PTX/intrinsic.ll @@ -1,239 +1,239 @@ ; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s define ptx_device i32 @test_tid_x() { -; CHECK: mov.u32 r0, %tid.x; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %tid.x; +; CHECK: ret; %x = call i32 @llvm.ptx.read.tid.x() ret i32 %x } define ptx_device i32 @test_tid_y() { -; CHECK: mov.u32 r0, %tid.y; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %tid.y; +; CHECK: ret; %x = call i32 @llvm.ptx.read.tid.y() ret i32 %x } define ptx_device i32 @test_tid_z() { -; CHECK: mov.u32 r0, %tid.z; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %tid.z; +; CHECK: ret; %x = call i32 @llvm.ptx.read.tid.z() ret i32 %x } define ptx_device i32 @test_tid_w() { -; CHECK: mov.u32 r0, %tid.w; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %tid.w; +; CHECK: ret; %x = call i32 @llvm.ptx.read.tid.w() ret i32 %x } define ptx_device i32 @test_ntid_x() { -; CHECK: mov.u32 r0, %ntid.x; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %ntid.x; +; CHECK: ret; %x = call i32 @llvm.ptx.read.ntid.x() ret i32 %x } define ptx_device i32 @test_ntid_y() { -; CHECK: mov.u32 r0, %ntid.y; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %ntid.y; +; CHECK: ret; %x = call i32 @llvm.ptx.read.ntid.y() ret i32 %x } define ptx_device i32 @test_ntid_z() { -; CHECK: mov.u32 r0, %ntid.z; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %ntid.z; +; CHECK: ret; %x = call i32 @llvm.ptx.read.ntid.z() ret i32 %x } define ptx_device i32 @test_ntid_w() { -; CHECK: mov.u32 r0, %ntid.w; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %ntid.w; +; CHECK: ret; %x = call i32 @llvm.ptx.read.ntid.w() ret i32 %x } define ptx_device i32 @test_laneid() { -; CHECK: mov.u32 r0, %laneid; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %laneid; +; CHECK: ret; %x = call i32 @llvm.ptx.read.laneid() ret i32 %x } define ptx_device i32 @test_warpid() { -; CHECK: mov.u32 r0, %warpid; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %warpid; +; CHECK: ret; %x = call i32 @llvm.ptx.read.warpid() ret i32 %x } define ptx_device i32 @test_nwarpid() { -; CHECK: mov.u32 r0, %nwarpid; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %nwarpid; +; CHECK: ret; %x = call i32 @llvm.ptx.read.nwarpid() ret i32 %x } define ptx_device i32 @test_ctaid_x() { -; CHECK: mov.u32 r0, %ctaid.x; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %ctaid.x; +; CHECK: ret; %x = call i32 @llvm.ptx.read.ctaid.x() ret i32 %x } define ptx_device i32 @test_ctaid_y() { -; CHECK: mov.u32 r0, %ctaid.y; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %ctaid.y; +; CHECK: ret; %x = call i32 @llvm.ptx.read.ctaid.y() ret i32 %x } define ptx_device i32 @test_ctaid_z() { -; CHECK: mov.u32 r0, %ctaid.z; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %ctaid.z; +; CHECK: ret; %x = call i32 @llvm.ptx.read.ctaid.z() ret i32 %x } define ptx_device i32 @test_ctaid_w() { -; CHECK: mov.u32 r0, %ctaid.w; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %ctaid.w; +; CHECK: ret; %x = call i32 @llvm.ptx.read.ctaid.w() ret i32 %x } define ptx_device i32 @test_nctaid_x() { -; CHECK: mov.u32 r0, %nctaid.x; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %nctaid.x; +; CHECK: ret; %x = call i32 @llvm.ptx.read.nctaid.x() ret i32 %x } define ptx_device i32 @test_nctaid_y() { -; CHECK: mov.u32 r0, %nctaid.y; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %nctaid.y; +; CHECK: ret; %x = call i32 @llvm.ptx.read.nctaid.y() ret i32 %x } define ptx_device i32 @test_nctaid_z() { -; CHECK: mov.u32 r0, %nctaid.z; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %nctaid.z; +; CHECK: ret; %x = call i32 @llvm.ptx.read.nctaid.z() ret i32 %x } define ptx_device i32 @test_nctaid_w() { -; CHECK: mov.u32 r0, %nctaid.w; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %nctaid.w; +; CHECK: ret; %x = call i32 @llvm.ptx.read.nctaid.w() ret i32 %x } define ptx_device i32 @test_smid() { -; CHECK: mov.u32 r0, %smid; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %smid; +; CHECK: ret; %x = call i32 @llvm.ptx.read.smid() ret i32 %x } define ptx_device i32 @test_nsmid() { -; CHECK: mov.u32 r0, %nsmid; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %nsmid; +; CHECK: ret; %x = call i32 @llvm.ptx.read.nsmid() ret i32 %x } define ptx_device i32 @test_gridid() { -; CHECK: mov.u32 r0, %gridid; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %gridid; +; CHECK: ret; %x = call i32 @llvm.ptx.read.gridid() ret i32 %x } define ptx_device i32 @test_lanemask_eq() { -; CHECK: mov.u32 r0, %lanemask_eq; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %lanemask_eq; +; CHECK: ret; %x = call i32 @llvm.ptx.read.lanemask.eq() ret i32 %x } define ptx_device i32 @test_lanemask_le() { -; CHECK: mov.u32 r0, %lanemask_le; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %lanemask_le; +; CHECK: ret; %x = call i32 @llvm.ptx.read.lanemask.le() ret i32 %x } define ptx_device i32 @test_lanemask_lt() { -; CHECK: mov.u32 r0, %lanemask_lt; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %lanemask_lt; +; CHECK: ret; %x = call i32 @llvm.ptx.read.lanemask.lt() ret i32 %x } define ptx_device i32 @test_lanemask_ge() { -; CHECK: mov.u32 r0, %lanemask_ge; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %lanemask_ge; +; CHECK: ret; %x = call i32 @llvm.ptx.read.lanemask.ge() ret i32 %x } define ptx_device i32 @test_lanemask_gt() { -; CHECK: mov.u32 r0, %lanemask_gt; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %lanemask_gt; +; CHECK: ret; %x = call i32 @llvm.ptx.read.lanemask.gt() ret i32 %x } define ptx_device i32 @test_clock() { -; CHECK: mov.u32 r0, %clock; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %clock; +; CHECK: ret; %x = call i32 @llvm.ptx.read.clock() ret i32 %x } define ptx_device i64 @test_clock64() { -; CHECK: mov.u64 rd0, %clock64; -; CHECK-NEXT: ret; +; CHECK: mov.u64 %ret0, %clock64; +; CHECK: ret; %x = call i64 @llvm.ptx.read.clock64() ret i64 %x } define ptx_device i32 @test_pm0() { -; CHECK: mov.u32 r0, %pm0; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %pm0; +; CHECK: ret; %x = call i32 @llvm.ptx.read.pm0() ret i32 %x } define ptx_device i32 @test_pm1() { -; CHECK: mov.u32 r0, %pm1; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %pm1; +; CHECK: ret; %x = call i32 @llvm.ptx.read.pm1() ret i32 %x } define ptx_device i32 @test_pm2() { -; CHECK: mov.u32 r0, %pm2; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %pm2; +; CHECK: ret; %x = call i32 @llvm.ptx.read.pm2() ret i32 %x } define ptx_device i32 @test_pm3() { -; CHECK: mov.u32 r0, %pm3; -; CHECK-NEXT: ret; +; CHECK: mov.u32 %ret0, %pm3; +; CHECK: ret; %x = call i32 @llvm.ptx.read.pm3() ret i32 %x } define ptx_device void @test_bar_sync() { ; CHECK: bar.sync 0 -; CHECK-NEXT: ret; +; CHECK: ret; call void @llvm.ptx.bar.sync(i32 0) ret void } diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll index d184d12..81fd33a 100644 --- a/test/CodeGen/PTX/ld.ll +++ b/test/CodeGen/PTX/ld.ll @@ -6,9 +6,6 @@ ;CHECK: .extern .const .b8 array_constant_i16[20]; @array_constant_i16 = external addrspace(1) constant [10 x i16] -;CHECK: .extern .local .b8 array_local_i16[20]; -@array_local_i16 = external addrspace(2) global [10 x i16] - ;CHECK: .extern .shared .b8 array_shared_i16[20]; @array_shared_i16 = external addrspace(4) global [10 x i16] @@ -18,9 +15,6 @@ ;CHECK: .extern .const .b8 array_constant_i32[40]; @array_constant_i32 = external addrspace(1) constant [10 x i32] -;CHECK: .extern .local .b8 array_local_i32[40]; -@array_local_i32 = external addrspace(2) global [10 x i32] - ;CHECK: .extern .shared .b8 array_shared_i32[40]; @array_shared_i32 = external addrspace(4) global [10 x i32] @@ -30,9 +24,6 @@ ;CHECK: .extern .const .b8 array_constant_i64[80]; @array_constant_i64 = external addrspace(1) constant [10 x i64] -;CHECK: .extern .local .b8 array_local_i64[80]; -@array_local_i64 = external addrspace(2) global [10 x i64] - ;CHECK: .extern .shared .b8 array_shared_i64[80]; @array_shared_i64 = external addrspace(4) global [10 x i64] @@ -42,9 +33,6 @@ ;CHECK: .extern .const .b8 array_constant_float[40]; @array_constant_float = external addrspace(1) constant [10 x float] -;CHECK: .extern .local .b8 array_local_float[40]; -@array_local_float = external addrspace(2) global [10 x float] - ;CHECK: .extern .shared .b8 array_shared_float[40]; @array_shared_float = external addrspace(4) global [10 x float] @@ -54,57 +42,54 @@ ;CHECK: .extern .const .b8 array_constant_double[80]; @array_constant_double = external addrspace(1) constant [10 x double] -;CHECK: .extern .local .b8 array_local_double[80]; -@array_local_double = external addrspace(2) global [10 x double] - ;CHECK: .extern .shared .b8 array_shared_double[80]; @array_shared_double = external addrspace(4) global [10 x double] define ptx_device i16 @t1_u16(i16* %p) { entry: -;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}]; -;CHECK-NEXT: ret; +;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; +;CHECK: ret; %x = load i16* %p ret i16 %x } define ptx_device i32 @t1_u32(i32* %p) { entry: -;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}]; -;CHECK-NEXT: ret; +;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; +;CHECK: ret; %x = load i32* %p ret i32 %x } define ptx_device i64 @t1_u64(i64* %p) { entry: -;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}]; -;CHECK-NEXT: ret; +;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; +;CHECK: ret; %x = load i64* %p ret i64 %x } define ptx_device float @t1_f32(float* %p) { entry: -;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}]; -;CHECK-NEXT: ret; +;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; +;CHECK: ret; %x = load float* %p ret float %x } define ptx_device double @t1_f64(double* %p) { entry: -;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}]; -;CHECK-NEXT: ret; +;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; +;CHECK: ret; %x = load double* %p ret double %x } define ptx_device i16 @t2_u16(i16* %p) { entry: -;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2]; -;CHECK-NEXT: ret; +;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}+2]; +;CHECK: ret; %i = getelementptr i16* %p, i32 1 %x = load i16* %i ret i16 %x @@ -112,8 +97,8 @@ entry: define ptx_device i32 @t2_u32(i32* %p) { entry: -;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}+4]; -;CHECK-NEXT: ret; +;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4]; +;CHECK: ret; %i = getelementptr i32* %p, i32 1 %x = load i32* %i ret i32 %x @@ -121,8 +106,8 @@ entry: define ptx_device i64 @t2_u64(i64* %p) { entry: -;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}+8]; -;CHECK-NEXT: ret; +;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8]; +;CHECK: ret; %i = getelementptr i64* %p, i32 1 %x = load i64* %i ret i64 %x @@ -130,8 +115,8 @@ entry: define ptx_device float @t2_f32(float* %p) { entry: -;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}+4]; -;CHECK-NEXT: ret; +;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4]; +;CHECK: ret; %i = getelementptr float* %p, i32 1 %x = load float* %i ret float %x @@ -139,8 +124,8 @@ entry: define ptx_device double @t2_f64(double* %p) { entry: -;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}+8]; -;CHECK-NEXT: ret; +;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8]; +;CHECK: ret; %i = getelementptr double* %p, i32 1 %x = load double* %i ret double %x @@ -148,9 +133,9 @@ entry: define ptx_device i16 @t3_u16(i16* %p, i32 %q) { entry: -;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1; -;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; -;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]]; +;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1; +;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; +;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; %i = getelementptr i16* %p, i32 %q %x = load i16* %i ret i16 %x @@ -158,9 +143,9 @@ entry: define ptx_device i32 @t3_u32(i32* %p, i32 %q) { entry: -;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; -;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; -;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]]; +;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2; +;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; +;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; %i = getelementptr i32* %p, i32 %q %x = load i32* %i ret i32 %x @@ -168,9 +153,9 @@ entry: define ptx_device i64 @t3_u64(i64* %p, i32 %q) { entry: -;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; -;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; -;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]]; +;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3; +;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; +;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; %i = getelementptr i64* %p, i32 %q %x = load i64* %i ret i64 %x @@ -178,9 +163,9 @@ entry: define ptx_device float @t3_f32(float* %p, i32 %q) { entry: -;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; -;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; -;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]]; +;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2; +;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; +;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; %i = getelementptr float* %p, i32 %q %x = load float* %i ret float %x @@ -188,9 +173,9 @@ entry: define ptx_device double @t3_f64(double* %p, i32 %q) { entry: -;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; -;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; -;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]]; +;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3; +;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; +;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; %i = getelementptr double* %p, i32 %q %x = load double* %i ret double %x @@ -198,9 +183,9 @@ entry: define ptx_device i16 @t4_global_u16() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; -;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16; +;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0 %x = load i16* %i ret i16 %x @@ -208,9 +193,9 @@ entry: define ptx_device i32 @t4_global_u32() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; -;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32; +;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 %x = load i32* %i ret i32 %x @@ -218,9 +203,9 @@ entry: define ptx_device i64 @t4_global_u64() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; -;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64; +;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 %x = load i64* %i ret i64 %x @@ -228,9 +213,9 @@ entry: define ptx_device float @t4_global_f32() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; -;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float; +;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 %x = load float* %i ret float %x @@ -238,9 +223,9 @@ entry: define ptx_device double @t4_global_f64() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; -;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double; +;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 %x = load double* %i ret double %x @@ -248,9 +233,9 @@ entry: define ptx_device i16 @t4_const_u16() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i16; -;CHECK-NEXT: ld.const.u16 rh{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i16; +;CHECK: ld.const.u16 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0 %x = load i16 addrspace(1)* %i ret i16 %x @@ -258,9 +243,9 @@ entry: define ptx_device i32 @t4_const_u32() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i32; -;CHECK-NEXT: ld.const.u32 r{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i32; +;CHECK: ld.const.u32 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0 %x = load i32 addrspace(1)* %i ret i32 %x @@ -268,9 +253,9 @@ entry: define ptx_device i64 @t4_const_u64() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i64; -;CHECK-NEXT: ld.const.u64 rd{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i64; +;CHECK: ld.const.u64 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0 %x = load i64 addrspace(1)* %i ret i64 %x @@ -278,9 +263,9 @@ entry: define ptx_device float @t4_const_f32() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_float; -;CHECK-NEXT: ld.const.f32 r{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_float; +;CHECK: ld.const.f32 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0 %x = load float addrspace(1)* %i ret float %x @@ -288,69 +273,19 @@ entry: define ptx_device double @t4_const_f64() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_double; -;CHECK-NEXT: ld.const.f64 rd{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_double; +;CHECK: ld.const.f64 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0 %x = load double addrspace(1)* %i ret double %x } -define ptx_device i16 @t4_local_u16() { -entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16; -;CHECK-NEXT: ld.local.u16 rh{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; - %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 - %x = load i16 addrspace(2)* %i - ret i16 %x -} - -define ptx_device i32 @t4_local_u32() { -entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32; -;CHECK-NEXT: ld.local.u32 r{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; - %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 - %x = load i32 addrspace(2)* %i - ret i32 %x -} - -define ptx_device i64 @t4_local_u64() { -entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64; -;CHECK-NEXT: ld.local.u64 rd{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; - %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 - %x = load i64 addrspace(2)* %i - ret i64 %x -} - -define ptx_device float @t4_local_f32() { -entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float; -;CHECK-NEXT: ld.local.f32 r{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; - %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 - %x = load float addrspace(2)* %i - ret float %x -} - -define ptx_device double @t4_local_f64() { -entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double; -;CHECK-NEXT: ld.local.f64 rd{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; - %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 - %x = load double addrspace(2)* %i - ret double %x -} - define ptx_device i16 @t4_shared_u16() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16; -;CHECK-NEXT: ld.shared.u16 rh{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16; +;CHECK: ld.shared.u16 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 %x = load i16 addrspace(4)* %i ret i16 %x @@ -358,9 +293,9 @@ entry: define ptx_device i32 @t4_shared_u32() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32; -;CHECK-NEXT: ld.shared.u32 r{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32; +;CHECK: ld.shared.u32 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 %x = load i32 addrspace(4)* %i ret i32 %x @@ -368,9 +303,9 @@ entry: define ptx_device i64 @t4_shared_u64() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64; -;CHECK-NEXT: ld.shared.u64 rd{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64; +;CHECK: ld.shared.u64 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 %x = load i64 addrspace(4)* %i ret i64 %x @@ -378,9 +313,9 @@ entry: define ptx_device float @t4_shared_f32() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float; -;CHECK-NEXT: ld.shared.f32 r{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float; +;CHECK: ld.shared.f32 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 %x = load float addrspace(4)* %i ret float %x @@ -388,9 +323,9 @@ entry: define ptx_device double @t4_shared_f64() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double; -;CHECK-NEXT: ld.shared.f64 rd{{[0-9]+}}, [r[[R0]]]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double; +;CHECK: ld.shared.f64 %ret{{[0-9]+}}, [%r[[R0]]]; +;CHECK: ret; %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 %x = load double addrspace(4)* %i ret double %x @@ -398,9 +333,9 @@ entry: define ptx_device i16 @t5_u16() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; -;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]+2]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16; +;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]+2]; +;CHECK: ret; %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 %x = load i16* %i ret i16 %x @@ -408,9 +343,9 @@ entry: define ptx_device i32 @t5_u32() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; -;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]+4]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32; +;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]+4]; +;CHECK: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 %x = load i32* %i ret i32 %x @@ -418,9 +353,9 @@ entry: define ptx_device i64 @t5_u64() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; -;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]+8]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64; +;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]+8]; +;CHECK: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 %x = load i64* %i ret i64 %x @@ -428,9 +363,9 @@ entry: define ptx_device float @t5_f32() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; -;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]+4]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float; +;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]+4]; +;CHECK: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 %x = load float* %i ret float %x @@ -438,9 +373,9 @@ entry: define ptx_device double @t5_f64() { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; -;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]+8]; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double; +;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]+8]; +;CHECK: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 %x = load double* %i ret double %x diff --git a/test/CodeGen/PTX/llvm-intrinsic.ll b/test/CodeGen/PTX/llvm-intrinsic.ll index 4611c54..e73ad25 100644 --- a/test/CodeGen/PTX/llvm-intrinsic.ll +++ b/test/CodeGen/PTX/llvm-intrinsic.ll @@ -2,48 +2,48 @@ define ptx_device float @test_sqrt_f32(float %x) { entry: -; CHECK: sqrt.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: sqrt.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %y = call float @llvm.sqrt.f32(float %x) ret float %y } define ptx_device double @test_sqrt_f64(double %x) { entry: -; CHECK: sqrt.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: sqrt.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; %y = call double @llvm.sqrt.f64(double %x) ret double %y } define ptx_device float @test_sin_f32(float %x) { entry: -; CHECK: sin.approx.f32 r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: sin.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %y = call float @llvm.sin.f32(float %x) ret float %y } define ptx_device double @test_sin_f64(double %x) { entry: -; CHECK: sin.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: sin.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; %y = call double @llvm.sin.f64(double %x) ret double %y } define ptx_device float @test_cos_f32(float %x) { entry: -; CHECK: cos.approx.f32 r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cos.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %y = call float @llvm.cos.f32(float %x) ret float %y } define ptx_device double @test_cos_f64(double %x) { entry: -; CHECK: cos.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: cos.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; %y = call double @llvm.cos.f64(double %x) ret double %y } diff --git a/test/CodeGen/PTX/mad.ll b/test/CodeGen/PTX/mad.ll index 0e4d3f9..cc28e3f 100644 --- a/test/CodeGen/PTX/mad.ll +++ b/test/CodeGen/PTX/mad.ll @@ -1,16 +1,16 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y, float %z) { -; CHECK: mad.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: mad.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; %a = fmul float %x, %y %b = fadd float %a, %z ret float %b } define ptx_device double @t1_f64(double %x, double %y, double %z) { -; CHECK: mad.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: mad.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; %a = fmul double %x, %y %b = fadd double %a, %z ret double %b diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll index cce6a5b..75555a7 100644 --- a/test/CodeGen/PTX/mov.ll +++ b/test/CodeGen/PTX/mov.ll @@ -1,62 +1,62 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i16 @t1_u16() { -; CHECK: mov.u16 rh{{[0-9]+}}, 0; +; CHECK: mov.u16 %ret{{[0-9]+}}, 0; ; CHECK: ret; ret i16 0 } define ptx_device i32 @t1_u32() { -; CHECK: mov.u32 r{{[0-9]+}}, 0; +; CHECK: mov.u32 %ret{{[0-9]+}}, 0; ; CHECK: ret; ret i32 0 } define ptx_device i64 @t1_u64() { -; CHECK: mov.u64 rd{{[0-9]+}}, 0; +; CHECK: mov.u64 %ret{{[0-9]+}}, 0; ; CHECK: ret; ret i64 0 } define ptx_device float @t1_f32() { -; CHECK: mov.f32 r{{[0-9]+}}, 0F00000000; +; CHECK: mov.f32 %ret{{[0-9]+}}, 0D0000000000000000; ; CHECK: ret; ret float 0.0 } define ptx_device double @t1_f64() { -; CHECK: mov.f64 rd{{[0-9]+}}, 0D0000000000000000; +; CHECK: mov.f64 %ret{{[0-9]+}}, 0D0000000000000000; ; CHECK: ret; ret double 0.0 } define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: mov.u16 rh{{[0-9]+}}, rh{{[0-9]+}}; +; CHECK: mov.b16 %ret{{[0-9]+}}, %param{{[0-9]+}}; ; CHECK: ret; ret i16 %x } define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK: mov.b32 %ret{{[0-9]+}}, %param{{[0-9]+}}; ; CHECK: ret; ret i32 %x } define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}}; +; CHECK: mov.b64 %ret{{[0-9]+}}, %param{{[0-9]+}}; ; CHECK: ret; ret i64 %x } define ptx_device float @t3_f32(float %x) { -; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: mov.f32 %ret{{[0-9]+}}, %param{{[0-9]+}}; +; CHECK: ret; ret float %x } define ptx_device double @t3_f64(double %x) { -; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: mov.f64 %ret{{[0-9]+}}, %param{{[0-9]+}}; +; CHECK: ret; ret double %x } diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll index 491cc74..91949db 100644 --- a/test/CodeGen/PTX/mul.ll +++ b/test/CodeGen/PTX/mul.ll @@ -11,29 +11,29 @@ ;} define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} -; CHECK-NEXT: ret; +; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret; %z = fmul float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}} -; CHECK-NEXT: ret; +; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}} +; CHECK: ret; %z = fmul double %x, %y ret double %z } define ptx_device float @t2_f32(float %x) { -; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F40A00000; -; CHECK-NEXT: ret; +; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D4014000000000000; +; CHECK: ret; %z = fmul float %x, 5.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D4014000000000000; -; CHECK-NEXT: ret; +; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D4014000000000000; +; CHECK: ret; %z = fmul double %x, 5.0 ret double %z } diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll index b16556e..09015da 100644 --- a/test/CodeGen/PTX/parameter-order.ll +++ b/test/CodeGen/PTX/parameter-order.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s -; CHECK: .func (.reg .b32 r{{[0-9]+}}) test_parameter_order (.reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}) +; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}) define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) { -; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} +; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} %result = sub i32 %b, %c ret i32 %result } diff --git a/test/CodeGen/PTX/selp.ll b/test/CodeGen/PTX/selp.ll index e705fbe..aa7ce85 100644 --- a/test/CodeGen/PTX/selp.ll +++ b/test/CodeGen/PTX/selp.ll @@ -1,25 +1,25 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) { -; CHECK: selp.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}}; %a = select i1 %x, i32 %y, i32 %z ret i32 %a } define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) { -; CHECK: selp.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}}; +; CHECK: selp.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}, %p{{[0-9]+}}; %a = select i1 %x, i64 %y, i64 %z ret i64 %a } define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) { -; CHECK: selp.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}}; +; CHECK: selp.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %p{{[0-9]+}}; %a = select i1 %x, float %y, float %z ret float %a } define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) { -; CHECK: selp.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}}; +; CHECK: selp.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %p{{[0-9]+}}; %a = select i1 %x, double %y, double %z ret double %a } diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll index e0044d6..646abab 100644 --- a/test/CodeGen/PTX/setp.ll +++ b/test/CodeGen/PTX/setp.ll @@ -1,190 +1,190 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp eq i32 %x, %y %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp ne i32 %x, %y %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp ult i32 %x, %y %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.le.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp ule i32 %x, %y %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp ugt i32 %x, %y %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.ge.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp uge i32 %x, %y %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp slt i32 %x, %y %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.le.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp sle i32 %x, %y %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp sgt i32 %x, %y %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.ge.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp sge i32 %x, %y %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) { -; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp eq i32 %x, 1 %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) { -; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp ne i32 %x, 1 %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) { -; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp ult i32 %x, 1 %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_le_u32_ri(i32 %x) { -; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp ule i32 %x, 1 %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) { -; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp ugt i32 %x, 1 %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) { -; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp uge i32 %x, 1 %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) { -; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp slt i32 %x, 1 %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_le_s32_ri(i32 %x) { -; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp sle i32 %x, 1 %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) { -; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp sgt i32 %x, 1 %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) { -; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; +; CHECK: ret; %p = icmp sge i32 %x, 1 %z = zext i1 %p to i32 ret i32 %z } define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) { -; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, p[[P0]]; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, %p[[P0]]; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1; +; CHECK: ret; %c = icmp eq i32 %x, %y %d = icmp ugt i32 %u, %v %e = and i1 %c, %d @@ -193,10 +193,10 @@ define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) { } define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) { -; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; -; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, !p[[P0]]; -; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; -; CHECK-NEXT: ret; +; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0; +; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, !%p[[P0]]; +; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1; +; CHECK: ret; %c = trunc i32 %w to i1 %d = icmp eq i32 %x, %y %e = xor i1 %c, 1 diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll index b3818e1..d9fe2cd 100644 --- a/test/CodeGen/PTX/shl.ll +++ b/test/CodeGen/PTX/shl.ll @@ -1,21 +1,21 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @t1(i32 %x, i32 %y) { -; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} +; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} %z = shl i32 %x, %y ; CHECK: ret; ret i32 %z } define ptx_device i32 @t2(i32 %x) { -; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, 3 +; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3 %z = shl i32 %x, 3 ; CHECK: ret; ret i32 %z } define ptx_device i32 @t3(i32 %x) { -; CHECK: shl.b32 r{{[0-9]+}}, 3, r{{[0-9]+}} +; CHECK: shl.b32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}} %z = shl i32 3, %x ; CHECK: ret; ret i32 %z diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll index cb57546..eb4666f 100644 --- a/test/CodeGen/PTX/shr.ll +++ b/test/CodeGen/PTX/shr.ll @@ -1,42 +1,42 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @t1(i32 %x, i32 %y) { -; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} +; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} %z = lshr i32 %x, %y ; CHECK: ret; ret i32 %z } define ptx_device i32 @t2(i32 %x) { -; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, 3 +; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3 %z = lshr i32 %x, 3 ; CHECK: ret; ret i32 %z } define ptx_device i32 @t3(i32 %x) { -; CHECK: shr.u32 r{{[0-9]+}}, 3, r{{[0-9]+}} +; CHECK: shr.u32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}} %z = lshr i32 3, %x ; CHECK: ret; ret i32 %z } define ptx_device i32 @t4(i32 %x, i32 %y) { -; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} +; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} %z = ashr i32 %x, %y ; CHECK: ret; ret i32 %z } define ptx_device i32 @t5(i32 %x) { -; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, 3 +; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3 %z = ashr i32 %x, 3 ; CHECK: ret; ret i32 %z } define ptx_device i32 @t6(i32 %x) { -; CHECK: shr.s32 r{{[0-9]+}}, -3, r{{[0-9]+}} +; CHECK: shr.s32 %ret{{[0-9]+}}, -3, %r{{[0-9]+}} %z = ashr i32 -3, %x ; CHECK: ret; ret i32 %z diff --git a/test/CodeGen/PTX/simple-call.ll b/test/CodeGen/PTX/simple-call.ll new file mode 100644 index 0000000..77ea29e --- /dev/null +++ b/test/CodeGen/PTX/simple-call.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s + +define ptx_device void @test_add(float %x, float %y) { +; CHECK: ret; + %z = fadd float %x, %y + ret void +} + +define ptx_device float @test_call(float %x, float %y) { + %a = fadd float %x, %y +; CHECK: call.uni test_add, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}}); + call void @test_add(float %a, float %y) + ret float %a +} + +define ptx_device float @test_compute(float %x, float %y) { +; CHECK: ret; + %z = fadd float %x, %y + ret float %z +} + +define ptx_device float @test_call_compute(float %x, float %y) { +; CHECK: call.uni (__localparam_{{[0-9]+}}), test_compute, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}}) + %z = call float @test_compute(float %x, float %y) + ret float %z +} + diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll index b08528e..63ef58c 100644 --- a/test/CodeGen/PTX/st.ll +++ b/test/CodeGen/PTX/st.ll @@ -6,9 +6,6 @@ ;CHECK: .extern .const .b8 array_constant_i16[20]; @array_constant_i16 = external addrspace(1) constant [10 x i16] -;CHECK: .extern .local .b8 array_local_i16[20]; -@array_local_i16 = external addrspace(2) global [10 x i16] - ;CHECK: .extern .shared .b8 array_shared_i16[20]; @array_shared_i16 = external addrspace(4) global [10 x i16] @@ -18,9 +15,6 @@ ;CHECK: .extern .const .b8 array_constant_i32[40]; @array_constant_i32 = external addrspace(1) constant [10 x i32] -;CHECK: .extern .local .b8 array_local_i32[40]; -@array_local_i32 = external addrspace(2) global [10 x i32] - ;CHECK: .extern .shared .b8 array_shared_i32[40]; @array_shared_i32 = external addrspace(4) global [10 x i32] @@ -30,9 +24,6 @@ ;CHECK: .extern .const .b8 array_constant_i64[80]; @array_constant_i64 = external addrspace(1) constant [10 x i64] -;CHECK: .extern .local .b8 array_local_i64[80]; -@array_local_i64 = external addrspace(2) global [10 x i64] - ;CHECK: .extern .shared .b8 array_shared_i64[80]; @array_shared_i64 = external addrspace(4) global [10 x i64] @@ -42,9 +33,6 @@ ;CHECK: .extern .const .b8 array_constant_float[40]; @array_constant_float = external addrspace(1) constant [10 x float] -;CHECK: .extern .local .b8 array_local_float[40]; -@array_local_float = external addrspace(2) global [10 x float] - ;CHECK: .extern .shared .b8 array_shared_float[40]; @array_shared_float = external addrspace(4) global [10 x float] @@ -54,57 +42,54 @@ ;CHECK: .extern .const .b8 array_constant_double[80]; @array_constant_double = external addrspace(1) constant [10 x double] -;CHECK: .extern .local .b8 array_local_double[80]; -@array_local_double = external addrspace(2) global [10 x double] - ;CHECK: .extern .shared .b8 array_shared_double[80]; @array_shared_double = external addrspace(4) global [10 x double] define ptx_device void @t1_u16(i16* %p, i16 %x) { entry: -;CHECK: st.global.u16 [r{{[0-9]+}}], rh{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}}; +;CHECK: ret; store i16 %x, i16* %p ret void } define ptx_device void @t1_u32(i32* %p, i32 %x) { entry: -;CHECK: st.global.u32 [r{{[0-9]+}}], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}; +;CHECK: ret; store i32 %x, i32* %p ret void } define ptx_device void @t1_u64(i64* %p, i64 %x) { entry: -;CHECK: st.global.u64 [r{{[0-9]+}}], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}; +;CHECK: ret; store i64 %x, i64* %p ret void } define ptx_device void @t1_f32(float* %p, float %x) { entry: -;CHECK: st.global.f32 [r{{[0-9]+}}], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}; +;CHECK: ret; store float %x, float* %p ret void } define ptx_device void @t1_f64(double* %p, double %x) { entry: -;CHECK: st.global.f64 [r{{[0-9]+}}], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}; +;CHECK: ret; store double %x, double* %p ret void } define ptx_device void @t2_u16(i16* %p, i16 %x) { entry: -;CHECK: st.global.u16 [r{{[0-9]+}}+2], rh{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: st.global.u16 [%r{{[0-9]+}}+2], %rh{{[0-9]+}}; +;CHECK: ret; %i = getelementptr i16* %p, i32 1 store i16 %x, i16* %i ret void @@ -112,8 +97,8 @@ entry: define ptx_device void @t2_u32(i32* %p, i32 %x) { entry: -;CHECK: st.global.u32 [r{{[0-9]+}}+4], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: st.global.u32 [%r{{[0-9]+}}+4], %r{{[0-9]+}}; +;CHECK: ret; %i = getelementptr i32* %p, i32 1 store i32 %x, i32* %i ret void @@ -121,8 +106,8 @@ entry: define ptx_device void @t2_u64(i64* %p, i64 %x) { entry: -;CHECK: st.global.u64 [r{{[0-9]+}}+8], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: st.global.u64 [%r{{[0-9]+}}+8], %rd{{[0-9]+}}; +;CHECK: ret; %i = getelementptr i64* %p, i32 1 store i64 %x, i64* %i ret void @@ -130,8 +115,8 @@ entry: define ptx_device void @t2_f32(float* %p, float %x) { entry: -;CHECK: st.global.f32 [r{{[0-9]+}}+4], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: st.global.f32 [%r{{[0-9]+}}+4], %f{{[0-9]+}}; +;CHECK: ret; %i = getelementptr float* %p, i32 1 store float %x, float* %i ret void @@ -139,8 +124,8 @@ entry: define ptx_device void @t2_f64(double* %p, double %x) { entry: -;CHECK: st.global.f64 [r{{[0-9]+}}+8], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: st.global.f64 [%r{{[0-9]+}}+8], %fd{{[0-9]+}}; +;CHECK: ret; %i = getelementptr double* %p, i32 1 store double %x, double* %i ret void @@ -148,10 +133,10 @@ entry: define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) { entry: -;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1; -;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; -;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1; +;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; +;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}}; +;CHECK: ret; %i = getelementptr i16* %p, i32 %q store i16 %x, i16* %i ret void @@ -159,10 +144,10 @@ entry: define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) { entry: -;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; -;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; -;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2; +;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; +;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}; +;CHECK: ret; %i = getelementptr i32* %p, i32 %q store i32 %x, i32* %i ret void @@ -170,10 +155,10 @@ entry: define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) { entry: -;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; -;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; -;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3; +;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; +;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}; +;CHECK: ret; %i = getelementptr i64* %p, i32 %q store i64 %x, i64* %i ret void @@ -181,10 +166,10 @@ entry: define ptx_device void @t3_f32(float* %p, i32 %q, float %x) { entry: -;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; -;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; -;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2; +;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; +;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}; +;CHECK: ret; %i = getelementptr float* %p, i32 %q store float %x, float* %i ret void @@ -192,10 +177,10 @@ entry: define ptx_device void @t3_f64(double* %p, i32 %q, double %x) { entry: -;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; -;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; -;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3; +;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; +;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}; +;CHECK: ret; %i = getelementptr double* %p, i32 %q store double %x, double* %i ret void @@ -203,9 +188,9 @@ entry: define ptx_device void @t4_global_u16(i16 %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; -;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16; +;CHECK: st.global.u16 [%r[[R0]]], %rh{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0 store i16 %x, i16* %i ret void @@ -213,9 +198,9 @@ entry: define ptx_device void @t4_global_u32(i32 %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; -;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32; +;CHECK: st.global.u32 [%r[[R0]]], %r{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 store i32 %x, i32* %i ret void @@ -223,9 +208,9 @@ entry: define ptx_device void @t4_global_u64(i64 %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; -;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64; +;CHECK: st.global.u64 [%r[[R0]]], %rd{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 store i64 %x, i64* %i ret void @@ -233,9 +218,9 @@ entry: define ptx_device void @t4_global_f32(float %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; -;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float; +;CHECK: st.global.f32 [%r[[R0]]], %f{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 store float %x, float* %i ret void @@ -243,69 +228,19 @@ entry: define ptx_device void @t4_global_f64(double %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; -;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double; +;CHECK: st.global.f64 [%r[[R0]]], %fd{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 store double %x, double* %i ret void } -define ptx_device void @t4_local_u16(i16 %x) { -entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16; -;CHECK-NEXT: st.local.u16 [r[[R0]]], rh{{[0-9]+}}; -;CHECK-NEXT: ret; - %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 - store i16 %x, i16 addrspace(2)* %i - ret void -} - -define ptx_device void @t4_local_u32(i32 %x) { -entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32; -;CHECK-NEXT: st.local.u32 [r[[R0]]], r{{[0-9]+}}; -;CHECK-NEXT: ret; - %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 - store i32 %x, i32 addrspace(2)* %i - ret void -} - -define ptx_device void @t4_local_u64(i64 %x) { -entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64; -;CHECK-NEXT: st.local.u64 [r[[R0]]], rd{{[0-9]+}}; -;CHECK-NEXT: ret; - %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 - store i64 %x, i64 addrspace(2)* %i - ret void -} - -define ptx_device void @t4_local_f32(float %x) { -entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float; -;CHECK-NEXT: st.local.f32 [r[[R0]]], r{{[0-9]+}}; -;CHECK-NEXT: ret; - %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 - store float %x, float addrspace(2)* %i - ret void -} - -define ptx_device void @t4_local_f64(double %x) { -entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double; -;CHECK-NEXT: st.local.f64 [r[[R0]]], rd{{[0-9]+}}; -;CHECK-NEXT: ret; - %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 - store double %x, double addrspace(2)* %i - ret void -} - define ptx_device void @t4_shared_u16(i16 %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16; -;CHECK-NEXT: st.shared.u16 [r[[R0]]], rh{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16; +;CHECK: st.shared.u16 [%r[[R0]]], %rh{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 store i16 %x, i16 addrspace(4)* %i ret void @@ -313,9 +248,9 @@ entry: define ptx_device void @t4_shared_u32(i32 %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32; -;CHECK-NEXT: st.shared.u32 [r[[R0]]], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32; +;CHECK: st.shared.u32 [%r[[R0]]], %r{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 store i32 %x, i32 addrspace(4)* %i ret void @@ -323,9 +258,9 @@ entry: define ptx_device void @t4_shared_u64(i64 %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64; -;CHECK-NEXT: st.shared.u64 [r[[R0]]], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64; +;CHECK: st.shared.u64 [%r[[R0]]], %rd{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 store i64 %x, i64 addrspace(4)* %i ret void @@ -333,9 +268,9 @@ entry: define ptx_device void @t4_shared_f32(float %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float; -;CHECK-NEXT: st.shared.f32 [r[[R0]]], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float; +;CHECK: st.shared.f32 [%r[[R0]]], %f{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 store float %x, float addrspace(4)* %i ret void @@ -343,9 +278,9 @@ entry: define ptx_device void @t4_shared_f64(double %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double; -;CHECK-NEXT: st.shared.f64 [r[[R0]]], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double; +;CHECK: st.shared.f64 [%r[[R0]]], %fd{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 store double %x, double addrspace(4)* %i ret void @@ -353,9 +288,9 @@ entry: define ptx_device void @t5_u16(i16 %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; -;CHECK-NEXT: st.global.u16 [r[[R0]]+2], rh{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16; +;CHECK: st.global.u16 [%r[[R0]]+2], %rh{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 store i16 %x, i16* %i ret void @@ -363,9 +298,9 @@ entry: define ptx_device void @t5_u32(i32 %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; -;CHECK-NEXT: st.global.u32 [r[[R0]]+4], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32; +;CHECK: st.global.u32 [%r[[R0]]+4], %r{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 store i32 %x, i32* %i ret void @@ -373,9 +308,9 @@ entry: define ptx_device void @t5_u64(i64 %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; -;CHECK-NEXT: st.global.u64 [r[[R0]]+8], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64; +;CHECK: st.global.u64 [%r[[R0]]+8], %rd{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 store i64 %x, i64* %i ret void @@ -383,9 +318,9 @@ entry: define ptx_device void @t5_f32(float %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; -;CHECK-NEXT: st.global.f32 [r[[R0]]+4], r{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float; +;CHECK: st.global.f32 [%r[[R0]]+4], %f{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 store float %x, float* %i ret void @@ -393,9 +328,9 @@ entry: define ptx_device void @t5_f64(double %x) { entry: -;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; -;CHECK-NEXT: st.global.f64 [r[[R0]]+8], rd{{[0-9]+}}; -;CHECK-NEXT: ret; +;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double; +;CHECK: st.global.f64 [%r[[R0]]+8], %fd{{[0-9]+}}; +;CHECK: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 store double %x, double* %i ret void diff --git a/test/CodeGen/PTX/stack-object.ll b/test/CodeGen/PTX/stack-object.ll new file mode 100644 index 0000000..65f8ee2 --- /dev/null +++ b/test/CodeGen/PTX/stack-object.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s + +define ptx_device float @stack1(float %a) { + ; CHECK: .local .align 4 .b8 __local0[4]; + %a.2 = alloca float, align 4 + ; CHECK: st.local.f32 [__local0], %f0 + store float %a, float* %a.2 + %a.3 = load float* %a.2 + ret float %a.3 +} + +define ptx_device float @stack1_align8(float %a) { + ; CHECK: .local .align 8 .b8 __local0[4]; + %a.2 = alloca float, align 8 + ; CHECK: st.local.f32 [__local0], %f0 + store float %a, float* %a.2 + %a.3 = load float* %a.2 + ret float %a.3 +} diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll index acef396..7ac886a 100644 --- a/test/CodeGen/PTX/sub.ll +++ b/test/CodeGen/PTX/sub.ll @@ -1,71 +1,71 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i16 @t1_u16(i16 %x, i16 %y) { -; CHECK: sub.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: sub.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}}; +; CHECK: ret; %z = sub i16 %x, %y ret i16 %z } define ptx_device i32 @t1_u32(i32 %x, i32 %y) { -; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}; +; CHECK: ret; %z = sub i32 %x, %y ret i32 %z } define ptx_device i64 @t1_u64(i64 %x, i64 %y) { -; CHECK: sub.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; -; CHECK-NEXT: ret; +; CHECK: sub.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}; +; CHECK: ret; %z = sub i64 %x, %y ret i64 %z } define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: sub.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} -; CHECK-NEXT: ret; +; CHECK: sub.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} +; CHECK: ret; %z = fsub float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: sub.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}} -; CHECK-NEXT: ret; +; CHECK: sub.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}} +; CHECK: ret; %z = fsub double %x, %y ret double %z } define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, -1; -; CHECK-NEXT: ret; +; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, -1; +; CHECK: ret; %z = sub i16 %x, 1 ret i16 %z } define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, -1; -; CHECK-NEXT: ret; +; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, -1; +; CHECK: ret; %z = sub i32 %x, 1 ret i32 %z } define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, -1; -; CHECK-NEXT: ret; +; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, -1; +; CHECK: ret; %z = sub i64 %x, 1 ret i64 %z } define ptx_device float @t2_f32(float %x) { -; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0FBF800000; -; CHECK-NEXT: ret; +; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0DBFF0000000000000; +; CHECK: ret; %z = fsub float %x, 1.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0DBFF0000000000000; -; CHECK-NEXT: ret; +; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0DBFF0000000000000; +; CHECK: ret; %z = fsub double %x, 1.0 ret double %z } |