summaryrefslogtreecommitdiffstats
path: root/test/CodeGen/X86/avx2-intrinsics-x86.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/avx2-intrinsics-x86.ll')
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86.ll179
1 files changed, 179 insertions, 0 deletions
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 3f27a02..a6141b0 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -976,3 +976,182 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
ret void
}
declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
+
+define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1,
+ <4 x i32> %idx, <2 x double> %mask) {
+ ; CHECK: vgatherdpd
+ %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0,
+ i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ;
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*,
+ <4 x i32>, <2 x double>, i8) nounwind readonly
+
+define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1,
+ <4 x i32> %idx, <4 x double> %mask) {
+ ; CHECK: vgatherdpd
+ %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
+ i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
+ <4 x i32>, <4 x double>, i8) nounwind readonly
+
+define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1,
+ <2 x i64> %idx, <2 x double> %mask) {
+ ; CHECK: vgatherqpd
+ %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0,
+ i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ;
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*,
+ <2 x i64>, <2 x double>, i8) nounwind readonly
+
+define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x double> %mask) {
+ ; CHECK: vgatherqpd
+ %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ;
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*,
+ <4 x i64>, <4 x double>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1,
+ <4 x i32> %idx, <4 x float> %mask) {
+ ; CHECK: vgatherdps
+ %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0,
+ i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
+ <4 x i32>, <4 x float>, i8) nounwind readonly
+
+define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1,
+ <8 x i32> %idx, <8 x float> %mask) {
+ ; CHECK: vgatherdps
+ %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
+ i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ;
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*,
+ <8 x i32>, <8 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1,
+ <2 x i64> %idx, <4 x float> %mask) {
+ ; CHECK: vgatherqps
+ %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0,
+ i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*,
+ <2 x i64>, <4 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x float> %mask) {
+ ; CHECK: vgatherqps
+ %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*,
+ <4 x i64>, <4 x float>, i8) nounwind readonly
+
+define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1,
+ <4 x i32> %idx, <2 x i64> %mask) {
+ ; CHECK: vpgatherdq
+ %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
+ i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*,
+ <4 x i32>, <2 x i64>, i8) nounwind readonly
+
+define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1,
+ <4 x i32> %idx, <4 x i64> %mask) {
+ ; CHECK: vpgatherdq
+ %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
+ i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*,
+ <4 x i32>, <4 x i64>, i8) nounwind readonly
+
+define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1,
+ <2 x i64> %idx, <2 x i64> %mask) {
+ ; CHECK: vpgatherqq
+ %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
+ i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*,
+ <2 x i64>, <2 x i64>, i8) nounwind readonly
+
+define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x i64> %mask) {
+ ; CHECK: vpgatherqq
+ %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*,
+ <4 x i64>, <4 x i64>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1,
+ <4 x i32> %idx, <4 x i32> %mask) {
+ ; CHECK: vpgatherdd
+ %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
+ i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*,
+ <4 x i32>, <4 x i32>, i8) nounwind readonly
+
+define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1,
+ <8 x i32> %idx, <8 x i32> %mask) {
+ ; CHECK: vpgatherdd
+ %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
+ i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*,
+ <8 x i32>, <8 x i32>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1,
+ <2 x i64> %idx, <4 x i32> %mask) {
+ ; CHECK: vpgatherqd
+ %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
+ i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*,
+ <2 x i64>, <4 x i32>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x i32> %mask) {
+ ; CHECK: vpgatherqd
+ %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
+ <4 x i64>, <4 x i32>, i8) nounwind readonly
+
+; PR13298
+define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a,
+ <8 x i32> %idx, <8 x float> %mask,
+ float* nocapture %out) {
+; CHECK: test_gather_mask
+; CHECK: vmovdqa %ymm2, [[DEST:%.*]]
+; CHECK: vgatherdps [[DEST]]
+;; gather with mask
+ %a_i8 = bitcast float* %a to i8*
+ %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
+ i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
+
+;; for debugging, we'll just dump out the mask
+ %out_ptr = bitcast float * %out to <8 x float> *
+ store <8 x float> %mask, <8 x float> * %out_ptr, align 4
+
+ ret <8 x float> %res
+}
OpenPOWER on IntegriCloud