summaryrefslogtreecommitdiffstats
path: root/test/CodeGen/X86/avx-shuffle.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/avx-shuffle.ll')
-rw-r--r--test/CodeGen/X86/avx-shuffle.ll63
1 files changed, 57 insertions, 6 deletions
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index ec11654..73faa1f 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -6,7 +6,7 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
ret <4 x float> %b
; CHECK: test1:
; CHECK: vshufps
-; CHECK: vpermilps
+; CHECK: vpshufd
}
; rdar://10538417
@@ -98,23 +98,23 @@ define i32 @test10(<4 x i32> %a) nounwind {
}
define <4 x float> @test11(<4 x float> %a) nounwind {
-; check: test11
-; check: vpermilps $27
+; CHECK: test11
+; CHECK: vpshufd $27
%tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %tmp1
}
define <4 x float> @test12(<4 x float>* %a) nounwind {
; CHECK: test12
-; CHECK: vpermilps $27, (
+; CHECK: vpshufd
%tmp0 = load <4 x float>* %a
%tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %tmp1
}
define <4 x i32> @test13(<4 x i32> %a) nounwind {
-; check: test13
-; check: vpshufd $27
+; CHECK: test13
+; CHECK: vpshufd $27
%tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %tmp1
}
@@ -246,3 +246,54 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
ret <8 x float>%S
}
+; rdar://12684358
+; Make sure loads happen before stores.
+; CHECK: swap8doubles
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
+; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
+; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
+; CHECK: vextractf128
+; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
+; CHECK: vextractf128
+; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
+; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
+define void @swap8doubles(double* nocapture %A, double* nocapture %C) nounwind uwtable ssp {
+entry:
+ %add.ptr = getelementptr inbounds double* %A, i64 2
+ %v.i = bitcast double* %A to <2 x double>*
+ %0 = load <2 x double>* %v.i, align 1
+ %shuffle.i.i = shufflevector <2 x double> %0, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+ %v1.i = bitcast double* %add.ptr to <2 x double>*
+ %1 = load <2 x double>* %v1.i, align 1
+ %2 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i, <2 x double> %1, i8 1) nounwind
+ %add.ptr1 = getelementptr inbounds double* %A, i64 6
+ %add.ptr2 = getelementptr inbounds double* %A, i64 4
+ %v.i27 = bitcast double* %add.ptr2 to <2 x double>*
+ %3 = load <2 x double>* %v.i27, align 1
+ %shuffle.i.i28 = shufflevector <2 x double> %3, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+ %v1.i29 = bitcast double* %add.ptr1 to <2 x double>*
+ %4 = load <2 x double>* %v1.i29, align 1
+ %5 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i28, <2 x double> %4, i8 1) nounwind
+ %6 = bitcast double* %C to <4 x double>*
+ %7 = load <4 x double>* %6, align 32
+ %add.ptr5 = getelementptr inbounds double* %C, i64 4
+ %8 = bitcast double* %add.ptr5 to <4 x double>*
+ %9 = load <4 x double>* %8, align 32
+ %shuffle.i26 = shufflevector <4 x double> %7, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+ %10 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %7, i8 1)
+ %shuffle.i = shufflevector <4 x double> %9, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+ %11 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %9, i8 1)
+ store <2 x double> %shuffle.i26, <2 x double>* %v.i, align 16
+ store <2 x double> %10, <2 x double>* %v1.i, align 16
+ store <2 x double> %shuffle.i, <2 x double>* %v.i27, align 16
+ store <2 x double> %11, <2 x double>* %v1.i29, align 16
+ store <4 x double> %2, <4 x double>* %6, align 32
+ store <4 x double> %5, <4 x double>* %8, align 32
+ ret void
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
OpenPOWER on IntegriCloud