summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>1996-09-20 16:52:09 +0000
committerbde <bde@FreeBSD.org>1996-09-20 16:52:09 +0000
commit4d197a331f4ccecd9f1e2bba6d6b74ab0d65528d (patch)
treef6b095c7a68d03e94aa65688a465a542356a3cdb
parent4f06fd88112fe09a6629d16e753784eaeaab05e7 (diff)
downloadFreeBSD-src-4d197a331f4ccecd9f1e2bba6d6b74ab0d65528d.zip
FreeBSD-src-4d197a331f4ccecd9f1e2bba6d6b74ab0d65528d.tar.gz
Changed an arg name in the pseudo-prototype for bzero() to match
the prototype. Put the jump table for i486_bzero() in the data section. This speeds up i486_bzero() a little on Pentiums without significantly affecting its speed on 486's. Don't waste time falling through 14 nop's to return from do1 in i486_bzero(). Use fastmove() for counts >= 1024 (was > 1024). Cosmetic. Fixed profiling of fastmove(). Restored meaningful labels from the pre-1.1 version in fastmove(). Local labels are evil. Fixed (high resolution non-) profiling of __bb_init_func().
-rw-r--r--sys/amd64/amd64/support.S51
-rw-r--r--sys/amd64/amd64/support.s51
-rw-r--r--sys/i386/i386/support.s51
3 files changed, 84 insertions, 69 deletions
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index 0874d2b..e583aee 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: support.s,v 1.37 1996/06/13 07:17:20 asami Exp $
+ * $Id: support.s,v 1.38 1996/09/10 08:31:57 bde Exp $
*/
#include <sys/errno.h>
@@ -53,7 +53,7 @@ _bzero: .long _generic_bzero
/*
* bcopy family
- * void bzero(void *base, u_int cnt)
+ * void bzero(void *buf, u_int len)
*/
ENTRY(generic_bzero)
@@ -141,13 +141,14 @@ ENTRY(i486_bzero)
*
* XXX need a const section for non-text
*/
- SUPERALIGN_TEXT
+ .data
jtab:
.long do0
.long do1
.long do2
.long do3
+ .text
SUPERALIGN_TEXT
5:
jmp jtab(,%ecx,4)
@@ -166,6 +167,7 @@ do2:
SUPERALIGN_TEXT
do1:
movb %al,(%edx)
+ ret
SUPERALIGN_TEXT
do0:
@@ -294,6 +296,7 @@ bcopy:
subl %esi,%eax
cmpl %ecx,%eax /* overlapping? */
jb 1f
+
shrl $2,%ecx /* copy by 32-bit words */
cld /* nope, copy forwards */
rep
@@ -458,14 +461,16 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */
movl %ebx,%ecx
#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
cmpl $1024,%ecx
- jbe slow_copyout
+ jb slow_copyout
#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU)
cmpl $CPUCLASS_586,_cpu_class
jne slow_copyout
#endif /* I386_CPU || I486_CPU || I686_CPU */
- call fastmove
+ pushl %ecx
+ call _fastmove
+ addl $4,%esp
jmp done_copyout
ALIGN_TEXT
@@ -520,14 +525,16 @@ ENTRY(copyin)
#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
cmpl $1024,%ecx
- jbe slow_copyin
+ jb slow_copyin
#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU)
cmpl $CPUCLASS_586,_cpu_class
jne slow_copyin
#endif /* I386_CPU || I486_CPU || I686_CPU */
- call fastmove
+ pushl %ecx
+ call _fastmove
+ addl $4,%esp
jmp done_copyin
ALIGN_TEXT
@@ -567,19 +574,20 @@ copyin_fault:
/* fastmove(src, dst, len)
src in %esi
dst in %edi
- len in %ecx
+ len in %ecx XXX changed to on stack for profiling
uses %eax and %edx for tmp. storage
*/
- ALIGN_TEXT
-fastmove:
+/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */
+ENTRY(fastmove)
+ movl 4(%esp),%ecx
cmpl $63,%ecx
- jbe 8f
+ jbe fastmove_tail
testl $7,%esi /* check if src addr is multiple of 8 */
- jnz 8f
+ jnz fastmove_tail
testl $7,%edi /* check if dst addr is multiple of 8 */
- jnz 8f
+ jnz fastmove_tail
pushl %ebp
movl %esp,%ebp
@@ -652,7 +660,7 @@ fastmove:
popl %esi
5:
ALIGN_TEXT
-7:
+fastmove_loop:
fildq 0(%esi)
fildq 8(%esi)
fildq 16(%esi)
@@ -673,7 +681,7 @@ fastmove:
addl $64,%esi
addl $64,%edi
cmpl $63,%ecx
- ja 7b
+ ja fastmove_loop
popl %eax
addl %eax,%ecx
cmpl $64,%ecx
@@ -704,7 +712,7 @@ fastmove:
popl %ebp
ALIGN_TEXT
-8:
+fastmove_tail:
movb %cl,%al
shrl $2,%ecx /* copy longword-wise */
cld
@@ -1171,10 +1179,7 @@ ENTRY(longjmp)
* Here for doing BB-profiling (gcc -a).
* We rely on the "bbset" instead, but need a dummy function.
*/
- .text
- .align 2
-.globl ___bb_init_func
-___bb_init_func:
- movl 4(%esp),%eax
- movl $1,(%eax)
- ret
+NON_GPROF_ENTRY(__bb_init_func)
+ movl 4(%esp),%eax
+ movl $1,(%eax)
+ .byte 0xc3 /* avoid macro for `ret' */
diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s
index 0874d2b..e583aee 100644
--- a/sys/amd64/amd64/support.s
+++ b/sys/amd64/amd64/support.s
@@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: support.s,v 1.37 1996/06/13 07:17:20 asami Exp $
+ * $Id: support.s,v 1.38 1996/09/10 08:31:57 bde Exp $
*/
#include <sys/errno.h>
@@ -53,7 +53,7 @@ _bzero: .long _generic_bzero
/*
* bcopy family
- * void bzero(void *base, u_int cnt)
+ * void bzero(void *buf, u_int len)
*/
ENTRY(generic_bzero)
@@ -141,13 +141,14 @@ ENTRY(i486_bzero)
*
* XXX need a const section for non-text
*/
- SUPERALIGN_TEXT
+ .data
jtab:
.long do0
.long do1
.long do2
.long do3
+ .text
SUPERALIGN_TEXT
5:
jmp jtab(,%ecx,4)
@@ -166,6 +167,7 @@ do2:
SUPERALIGN_TEXT
do1:
movb %al,(%edx)
+ ret
SUPERALIGN_TEXT
do0:
@@ -294,6 +296,7 @@ bcopy:
subl %esi,%eax
cmpl %ecx,%eax /* overlapping? */
jb 1f
+
shrl $2,%ecx /* copy by 32-bit words */
cld /* nope, copy forwards */
rep
@@ -458,14 +461,16 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */
movl %ebx,%ecx
#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
cmpl $1024,%ecx
- jbe slow_copyout
+ jb slow_copyout
#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU)
cmpl $CPUCLASS_586,_cpu_class
jne slow_copyout
#endif /* I386_CPU || I486_CPU || I686_CPU */
- call fastmove
+ pushl %ecx
+ call _fastmove
+ addl $4,%esp
jmp done_copyout
ALIGN_TEXT
@@ -520,14 +525,16 @@ ENTRY(copyin)
#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
cmpl $1024,%ecx
- jbe slow_copyin
+ jb slow_copyin
#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU)
cmpl $CPUCLASS_586,_cpu_class
jne slow_copyin
#endif /* I386_CPU || I486_CPU || I686_CPU */
- call fastmove
+ pushl %ecx
+ call _fastmove
+ addl $4,%esp
jmp done_copyin
ALIGN_TEXT
@@ -567,19 +574,20 @@ copyin_fault:
/* fastmove(src, dst, len)
src in %esi
dst in %edi
- len in %ecx
+ len in %ecx XXX changed to on stack for profiling
uses %eax and %edx for tmp. storage
*/
- ALIGN_TEXT
-fastmove:
+/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */
+ENTRY(fastmove)
+ movl 4(%esp),%ecx
cmpl $63,%ecx
- jbe 8f
+ jbe fastmove_tail
testl $7,%esi /* check if src addr is multiple of 8 */
- jnz 8f
+ jnz fastmove_tail
testl $7,%edi /* check if dst addr is multiple of 8 */
- jnz 8f
+ jnz fastmove_tail
pushl %ebp
movl %esp,%ebp
@@ -652,7 +660,7 @@ fastmove:
popl %esi
5:
ALIGN_TEXT
-7:
+fastmove_loop:
fildq 0(%esi)
fildq 8(%esi)
fildq 16(%esi)
@@ -673,7 +681,7 @@ fastmove:
addl $64,%esi
addl $64,%edi
cmpl $63,%ecx
- ja 7b
+ ja fastmove_loop
popl %eax
addl %eax,%ecx
cmpl $64,%ecx
@@ -704,7 +712,7 @@ fastmove:
popl %ebp
ALIGN_TEXT
-8:
+fastmove_tail:
movb %cl,%al
shrl $2,%ecx /* copy longword-wise */
cld
@@ -1171,10 +1179,7 @@ ENTRY(longjmp)
* Here for doing BB-profiling (gcc -a).
* We rely on the "bbset" instead, but need a dummy function.
*/
- .text
- .align 2
-.globl ___bb_init_func
-___bb_init_func:
- movl 4(%esp),%eax
- movl $1,(%eax)
- ret
+NON_GPROF_ENTRY(__bb_init_func)
+ movl 4(%esp),%eax
+ movl $1,(%eax)
+ .byte 0xc3 /* avoid macro for `ret' */
diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s
index 0874d2b..e583aee 100644
--- a/sys/i386/i386/support.s
+++ b/sys/i386/i386/support.s
@@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: support.s,v 1.37 1996/06/13 07:17:20 asami Exp $
+ * $Id: support.s,v 1.38 1996/09/10 08:31:57 bde Exp $
*/
#include <sys/errno.h>
@@ -53,7 +53,7 @@ _bzero: .long _generic_bzero
/*
* bcopy family
- * void bzero(void *base, u_int cnt)
+ * void bzero(void *buf, u_int len)
*/
ENTRY(generic_bzero)
@@ -141,13 +141,14 @@ ENTRY(i486_bzero)
*
* XXX need a const section for non-text
*/
- SUPERALIGN_TEXT
+ .data
jtab:
.long do0
.long do1
.long do2
.long do3
+ .text
SUPERALIGN_TEXT
5:
jmp jtab(,%ecx,4)
@@ -166,6 +167,7 @@ do2:
SUPERALIGN_TEXT
do1:
movb %al,(%edx)
+ ret
SUPERALIGN_TEXT
do0:
@@ -294,6 +296,7 @@ bcopy:
subl %esi,%eax
cmpl %ecx,%eax /* overlapping? */
jb 1f
+
shrl $2,%ecx /* copy by 32-bit words */
cld /* nope, copy forwards */
rep
@@ -458,14 +461,16 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */
movl %ebx,%ecx
#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
cmpl $1024,%ecx
- jbe slow_copyout
+ jb slow_copyout
#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU)
cmpl $CPUCLASS_586,_cpu_class
jne slow_copyout
#endif /* I386_CPU || I486_CPU || I686_CPU */
- call fastmove
+ pushl %ecx
+ call _fastmove
+ addl $4,%esp
jmp done_copyout
ALIGN_TEXT
@@ -520,14 +525,16 @@ ENTRY(copyin)
#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
cmpl $1024,%ecx
- jbe slow_copyin
+ jb slow_copyin
#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU)
cmpl $CPUCLASS_586,_cpu_class
jne slow_copyin
#endif /* I386_CPU || I486_CPU || I686_CPU */
- call fastmove
+ pushl %ecx
+ call _fastmove
+ addl $4,%esp
jmp done_copyin
ALIGN_TEXT
@@ -567,19 +574,20 @@ copyin_fault:
/* fastmove(src, dst, len)
src in %esi
dst in %edi
- len in %ecx
+ len in %ecx XXX changed to on stack for profiling
uses %eax and %edx for tmp. storage
*/
- ALIGN_TEXT
-fastmove:
+/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */
+ENTRY(fastmove)
+ movl 4(%esp),%ecx
cmpl $63,%ecx
- jbe 8f
+ jbe fastmove_tail
testl $7,%esi /* check if src addr is multiple of 8 */
- jnz 8f
+ jnz fastmove_tail
testl $7,%edi /* check if dst addr is multiple of 8 */
- jnz 8f
+ jnz fastmove_tail
pushl %ebp
movl %esp,%ebp
@@ -652,7 +660,7 @@ fastmove:
popl %esi
5:
ALIGN_TEXT
-7:
+fastmove_loop:
fildq 0(%esi)
fildq 8(%esi)
fildq 16(%esi)
@@ -673,7 +681,7 @@ fastmove:
addl $64,%esi
addl $64,%edi
cmpl $63,%ecx
- ja 7b
+ ja fastmove_loop
popl %eax
addl %eax,%ecx
cmpl $64,%ecx
@@ -704,7 +712,7 @@ fastmove:
popl %ebp
ALIGN_TEXT
-8:
+fastmove_tail:
movb %cl,%al
shrl $2,%ecx /* copy longword-wise */
cld
@@ -1171,10 +1179,7 @@ ENTRY(longjmp)
* Here for doing BB-profiling (gcc -a).
* We rely on the "bbset" instead, but need a dummy function.
*/
- .text
- .align 2
-.globl ___bb_init_func
-___bb_init_func:
- movl 4(%esp),%eax
- movl $1,(%eax)
- ret
+NON_GPROF_ENTRY(__bb_init_func)
+ movl 4(%esp),%eax
+ movl $1,(%eax)
+ .byte 0xc3 /* avoid macro for `ret' */
OpenPOWER on IntegriCloud