summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/README-SSE.txt
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-02-16 09:30:23 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-02-16 09:30:23 +0000
commitf25ddd991a5601d0101602c4c263a58c7af4b8a2 (patch)
tree4cfca640904d1896e25032757a61f8959c066919 /lib/Target/X86/README-SSE.txt
parent3fd58f91dd318518f7daa4ba64c0aaf31799d89b (diff)
downloadFreeBSD-src-f25ddd991a5601d0101602c4c263a58c7af4b8a2.zip
FreeBSD-src-f25ddd991a5601d0101602c4c263a58c7af4b8a2.tar.gz
Update LLVM to r96341.
Diffstat (limited to 'lib/Target/X86/README-SSE.txt')
-rw-r--r--lib/Target/X86/README-SSE.txt53
1 files changed, 52 insertions, 1 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 0f3e44b..19eb05e 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -376,7 +376,7 @@ ret
... saving two instructions.
The basic idea is that a reload from a spill slot, can, if only one 4-byte
-chunk is used, bring in 3 zeros the the one element instead of 4 elements.
+chunk is used, bring in 3 zeros the one element instead of 4 elements.
This can be used to simplify a variety of shuffle operations, where the
elements are fixed zeros.
@@ -936,3 +936,54 @@ Also, the 'ret's should be shared. This is PR6032.
//===---------------------------------------------------------------------===//
+These should compile into the same code (PR6214): Perhaps instcombine should
+canonicalize the former into the later?
+
+define float @foo(float %x) nounwind {
+ %t = bitcast float %x to i32
+ %s = and i32 %t, 2147483647
+ %d = bitcast i32 %s to float
+ ret float %d
+}
+
+declare float @fabsf(float %n)
+define float @bar(float %x) nounwind {
+ %d = call float @fabsf(float %x)
+ ret float %d
+}
+
+//===---------------------------------------------------------------------===//
+
+This IR (from PR6194):
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type { double, double }
+%struct.float3 = type { float, float, float }
+
+define void @test(%0, %struct.float3* nocapture %res) nounwind noinline ssp {
+entry:
+ %tmp18 = extractvalue %0 %0, 0 ; <double> [#uses=1]
+ %tmp19 = bitcast double %tmp18 to i64 ; <i64> [#uses=1]
+ %tmp20 = zext i64 %tmp19 to i128 ; <i128> [#uses=1]
+ %tmp10 = lshr i128 %tmp20, 32 ; <i128> [#uses=1]
+ %tmp11 = trunc i128 %tmp10 to i32 ; <i32> [#uses=1]
+ %tmp12 = bitcast i32 %tmp11 to float ; <float> [#uses=1]
+ %tmp5 = getelementptr inbounds %struct.float3* %res, i64 0, i32 1 ; <float*> [#uses=1]
+ store float %tmp12, float* %tmp5
+ ret void
+}
+
+Compiles to:
+
+_test: ## @test
+ movd %xmm0, %rax
+ shrq $32, %rax
+ movl %eax, 4(%rdi)
+ ret
+
+This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and
+doing a shuffle from v[1] to v[0] then a float store.
+
+//===---------------------------------------------------------------------===//
OpenPOWER on IntegriCloud