summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/README.txt
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2011-02-26 22:03:50 +0000
committerdim <dim@FreeBSD.org>2011-02-26 22:03:50 +0000
commitc80ac9d286b8fcc6d1ee5d76048134cf80aa9edc (patch)
treeddf53b8bd9235bcb0b8aae16c5e22310dcdad665 /lib/Target/X86/README.txt
parentcbb70ce070d220642b038ea101d9c0f9fbf860d6 (diff)
downloadFreeBSD-src-c80ac9d286b8fcc6d1ee5d76048134cf80aa9edc.zip
FreeBSD-src-c80ac9d286b8fcc6d1ee5d76048134cf80aa9edc.tar.gz
Vendor import of llvm trunk r126547:
http://llvm.org/svn/llvm-project/llvm/trunk@126547
Diffstat (limited to 'lib/Target/X86/README.txt')
-rw-r--r--lib/Target/X86/README.txt82
1 files changed, 57 insertions, 25 deletions
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index c10e170..abd1515 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1879,39 +1879,71 @@ _add32carry:
//===---------------------------------------------------------------------===//
-This:
-char t(char c) {
- return c/3;
+The hot loop of 256.bzip2 contains code that looks a bit like this:
+
+int foo(char *P, char *Q, int x, int y) {
+ if (P[0] != Q[0])
+ return P[0] < Q[0];
+ if (P[1] != Q[1])
+ return P[1] < Q[1];
+ if (P[2] != Q[2])
+ return P[2] < Q[2];
+ return P[3] < Q[3];
}
-Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer
+In the real code, we get a lot more wrong than this. However, even in this
+code we generate:
-_t: ## @t
- movslq %edi, %rax
- imulq $-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB
- shrq $32, %rcx
- addl %ecx, %eax
- movl %eax, %ecx
- shrl $31, %ecx
- shrl %eax
- addl %ecx, %eax
- movsbl %al, %eax
+_foo: ## @foo
+## BB#0: ## %entry
+ movb (%rsi), %al
+ movb (%rdi), %cl
+ cmpb %al, %cl
+ je LBB0_2
+LBB0_1: ## %if.then
+ cmpb %al, %cl
+ jmp LBB0_5
+LBB0_2: ## %if.end
+ movb 1(%rsi), %al
+ movb 1(%rdi), %cl
+ cmpb %al, %cl
+ jne LBB0_1
+## BB#3: ## %if.end38
+ movb 2(%rsi), %al
+ movb 2(%rdi), %cl
+ cmpb %al, %cl
+ jne LBB0_1
+## BB#4: ## %if.end60
+ movb 3(%rdi), %al
+ cmpb 3(%rsi), %al
+LBB0_5: ## %if.end60
+ setl %al
+ movzbl %al, %eax
ret
-GCC gets:
+Note that we generate jumps to LBB0_1 which does a redundant compare. The
+redundant compare also forces the register values to be live, which prevents
+folding one of the loads into the compare. In contrast, GCC 4.2 produces:
-_t:
- movl $86, %eax
- imulb %dil
- shrw $8, %ax
- sarb $7, %dil
- subb %dil, %al
- movsbl %al,%eax
+_foo:
+ movzbl (%rsi), %eax
+ cmpb %al, (%rdi)
+ jne L10
+L12:
+ movzbl 1(%rsi), %eax
+ cmpb %al, 1(%rdi)
+ jne L10
+ movzbl 2(%rsi), %eax
+ cmpb %al, 2(%rdi)
+ jne L10
+ movzbl 3(%rdi), %eax
+ cmpb 3(%rsi), %al
+L10:
+ setl %al
+ movzbl %al, %eax
ret
-which is nicer. This also happens for int, not just char.
+which is "perfect".
//===---------------------------------------------------------------------===//
-
-
OpenPOWER on IntegriCloud