summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpeter <peter@FreeBSD.org>2003-05-10 00:49:56 +0000
committerpeter <peter@FreeBSD.org>2003-05-10 00:49:56 +0000
commit63c2624c8113edd59bb45a1684a302060455df68 (patch)
tree4443470aafa10e7bc3e3d2f7095696148e68a496
parentdba87522febe69543d7e0e418541ddfd1d34560c (diff)
downloadFreeBSD-src-63c2624c8113edd59bb45a1684a302060455df68.zip
FreeBSD-src-63c2624c8113edd59bb45a1684a302060455df68.tar.gz
Finish translating i386/support.s into amd64 asm - replace bcopy etc with
asm versions. This yields about a 5% kernel compile time speedup.
-rw-r--r--sys/amd64/amd64/machdep.c61
-rw-r--r--sys/amd64/amd64/support.S142
2 files changed, 140 insertions, 63 deletions
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index dd9c8c3..8e0ccf8 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1697,64 +1697,3 @@ outb(u_int port, u_char data)
}
#endif /* DDB */
-
-void
-bcopy(const void *src, void *dest, size_t len)
-{
- const char *csrc;
- char *cdest;
- size_t i;
-
- csrc = (const char *)src;
- cdest = (char *)dest;
- if (src < dest) {
- for (i = len - 1; i != (size_t)-1; i--)
- cdest[i] = csrc[i];
- } else {
- for (i = 0; i < len; i++)
- cdest[i] = csrc[i];
- }
-}
-
-void *
-memcpy(void *dest, const void *src, size_t len)
-{
-
- bcopy(src, dest, len);
- return dest;
-}
-
-void
-bzero(void *buf, size_t len)
-{
- char *cbuf;
- size_t i;
-
- cbuf = (char *)buf;
- for (i = 0; i < len; i++)
- cbuf[i] = 0;
-}
-
-void
-pagezero(void *buf)
-{
-
- bzero(buf, PAGE_SIZE);
-}
-
-int
-bcmp(const void *s1, const void *s2, size_t len)
-{
- const char *cs1, *cs2;
- int diff;
- size_t i;
-
- cs1 = (const char *)s1;
- cs2 = (const char *)s2;
- for (i = 0; i < len; i++) {
- diff = cs2[i] - cs1[i];
- if (diff)
- return diff;
- }
- return 0;
-}
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index 06a3c4e..b9b18e8 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -34,15 +34,153 @@
*/
#include <machine/asmacros.h>
-#include <machine/cputypes.h>
#include <machine/pmap.h>
-#include <machine/specialreg.h>
#include "assym.s"
.text
+/*
+ * bcopy family
+ * void bzero(void *buf, u_int len)
+ */
+
+/* done */
+ENTRY(bzero)
+ movq %rsi,%rcx
+ xorq %rax,%rax
+ shrq $3,%rcx
+ cld
+ rep
+ stosq
+ movq %rsi,%rcx
+ andq $7,%rcx
+ rep
+ stosb
+ ret
+
+/* Address: %rdi */
+ENTRY(pagezero)
+ movq $512, %rcx
+ cld
+
+ ALIGN_TEXT
+1:
+ xorq %rax, %rax
+ repe
+ scasq
+ jnz 2f
+
+ ret
+
+ ALIGN_TEXT
+
+2:
+ incq %rcx
+ subq $8, %rdi
+
+ movq %rcx, %rdx
+ cmpq $16, %rcx
+
+ jge 3f
+
+ movq %rdi, %r8
+ andq $0x7f, %r8
+ shrq $3, %r8
+ movq $16, %rcx
+ subq %r8, %rcx
+
+3:
+ subq %rcx, %rdx
+ rep
+ stosq
+
+ movq %rdx, %rcx
+ testq %rdx, %rdx
+ jnz 1b
+
+ ret
+
+
+ENTRY(bcmp)
+ xorq %rax,%rax
+
+ movq %rdx,%rcx
+ shrq $3,%rcx
+ cld /* compare forwards */
+ repe
+ cmpsq
+ jne 1f
+
+ movq %rdx,%rcx
+ andq $7,%rcx
+ repe
+ cmpsb
+ je 2f
+1:
+ incq %rax
+2:
+ ret
+
+/*
+ * bcopy(src, dst, cnt)
+ * rdi, rsi, rdx
+ * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+ */
+ENTRY(bcopy)
+ xchgq %rsi,%rdi
+ movq %rdx,%rcx
+
+ movq %rdi,%rax
+ subq %rsi,%rax
+ cmpq %rcx,%rax /* overlapping && src < dst? */
+ jb 1f
+
+ shrq $3,%rcx /* copy by 64-bit words */
+ cld /* nope, copy forwards */
+ rep
+ movsq
+ movq %rdx,%rcx
+ andq $7,%rcx /* any bytes left? */
+ rep
+ movsb
+ ret
+
+ /* ALIGN_TEXT */
+1:
+ addq %rcx,%rdi /* copy backwards */
+ addq %rcx,%rsi
+ decq %rdi
+ decq %rsi
+ andq $7,%rcx /* any fractional bytes? */
+ std
+ rep
+ movsb
+ movq %rdx,%rcx /* copy remainder by 32-bit words */
+ shrq $3,%rcx
+ subq $7,%rsi
+ subq $7,%rdi
+ rep
+ movsq
+ cld
+ ret
+
+/*
+ * Note: memcpy does not support overlapping copies
+ */
+ENTRY(memcpy)
+ movq %rdx,%rcx
+ shrq $3,%rcx /* copy by 64-bit words */
+ cld /* copy forwards */
+ rep
+ movsq
+ movq %rdx,%rcx
+ andq $7,%rcx /* any bytes left? */
+ rep
+ movsb
+ ret
+
/* fillw(pat, base, cnt) */
/* %rdi,%rsi, %rdx */
ENTRY(fillw)
OpenPOWER on IntegriCloud