diff options
author | bde <bde@FreeBSD.org> | 1996-09-20 16:52:09 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 1996-09-20 16:52:09 +0000 |
commit | 4d197a331f4ccecd9f1e2bba6d6b74ab0d65528d (patch) | |
tree | f6b095c7a68d03e94aa65688a465a542356a3cdb | |
parent | 4f06fd88112fe09a6629d16e753784eaeaab05e7 (diff) | |
download | FreeBSD-src-4d197a331f4ccecd9f1e2bba6d6b74ab0d65528d.zip FreeBSD-src-4d197a331f4ccecd9f1e2bba6d6b74ab0d65528d.tar.gz |
Changed an arg name in the pseudo-prototype for bzero() to match
the prototype.
Put the jump table for i486_bzero() in the data section. This
speeds up i486_bzero() a little on Pentiums without significantly
affecting its speed on 486's.
Don't waste time falling through 14 nop's to return from do1 in
i486_bzero().
Use fastmove() for counts >= 1024 (was > 1024). Cosmetic.
Fixed profiling of fastmove().
Restored meaningful labels from the pre-1.1 version in fastmove().
Local labels are evil.
Fixed (high resolution non-) profiling of __bb_init_func().
-rw-r--r-- | sys/amd64/amd64/support.S | 51 | ||||
-rw-r--r-- | sys/amd64/amd64/support.s | 51 | ||||
-rw-r--r-- | sys/i386/i386/support.s | 51 |
3 files changed, 84 insertions, 69 deletions
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 0874d2b..e583aee 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: support.s,v 1.37 1996/06/13 07:17:20 asami Exp $ + * $Id: support.s,v 1.38 1996/09/10 08:31:57 bde Exp $ */ #include <sys/errno.h> @@ -53,7 +53,7 @@ _bzero: .long _generic_bzero /* * bcopy family - * void bzero(void *base, u_int cnt) + * void bzero(void *buf, u_int len) */ ENTRY(generic_bzero) @@ -141,13 +141,14 @@ ENTRY(i486_bzero) * * XXX need a const section for non-text */ - SUPERALIGN_TEXT + .data jtab: .long do0 .long do1 .long do2 .long do3 + .text SUPERALIGN_TEXT 5: jmp jtab(,%ecx,4) @@ -166,6 +167,7 @@ do2: SUPERALIGN_TEXT do1: movb %al,(%edx) + ret SUPERALIGN_TEXT do0: @@ -294,6 +296,7 @@ bcopy: subl %esi,%eax cmpl %ecx,%eax /* overlapping? */ jb 1f + shrl $2,%ecx /* copy by 32-bit words */ cld /* nope, copy forwards */ rep @@ -458,14 +461,16 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ movl %ebx,%ecx #if defined(I586_CPU) && defined(I586_FAST_BCOPY) cmpl $1024,%ecx - jbe slow_copyout + jb slow_copyout #if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) cmpl $CPUCLASS_586,_cpu_class jne slow_copyout #endif /* I386_CPU || I486_CPU || I686_CPU */ - call fastmove + pushl %ecx + call _fastmove + addl $4,%esp jmp done_copyout ALIGN_TEXT @@ -520,14 +525,16 @@ ENTRY(copyin) #if defined(I586_CPU) && defined(I586_FAST_BCOPY) cmpl $1024,%ecx - jbe slow_copyin + jb slow_copyin #if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) cmpl $CPUCLASS_586,_cpu_class jne slow_copyin #endif /* I386_CPU || I486_CPU || I686_CPU */ - call fastmove + pushl %ecx + call _fastmove + addl $4,%esp jmp done_copyin ALIGN_TEXT @@ -567,19 +574,20 @@ copyin_fault: /* fastmove(src, dst, len) src in %esi dst in %edi - len in %ecx + len in %ecx XXX changed to on stack for profiling uses %eax and %edx for tmp. storage */ - ALIGN_TEXT -fastmove: +/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ +ENTRY(fastmove) + movl 4(%esp),%ecx cmpl $63,%ecx - jbe 8f + jbe fastmove_tail testl $7,%esi /* check if src addr is multiple of 8 */ - jnz 8f + jnz fastmove_tail testl $7,%edi /* check if dst addr is multiple of 8 */ - jnz 8f + jnz fastmove_tail pushl %ebp movl %esp,%ebp @@ -652,7 +660,7 @@ fastmove: popl %esi 5: ALIGN_TEXT -7: +fastmove_loop: fildq 0(%esi) fildq 8(%esi) fildq 16(%esi) @@ -673,7 +681,7 @@ fastmove: addl $64,%esi addl $64,%edi cmpl $63,%ecx - ja 7b + ja fastmove_loop popl %eax addl %eax,%ecx cmpl $64,%ecx @@ -704,7 +712,7 @@ fastmove: popl %ebp ALIGN_TEXT -8: +fastmove_tail: movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld @@ -1171,10 +1179,7 @@ ENTRY(longjmp) * Here for doing BB-profiling (gcc -a). * We rely on the "bbset" instead, but need a dummy function. */ - .text - .align 2 -.globl ___bb_init_func -___bb_init_func: - movl 4(%esp),%eax - movl $1,(%eax) - ret +NON_GPROF_ENTRY(__bb_init_func) + movl 4(%esp),%eax + movl $1,(%eax) + .byte 0xc3 /* avoid macro for `ret' */ diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s index 0874d2b..e583aee 100644 --- a/sys/amd64/amd64/support.s +++ b/sys/amd64/amd64/support.s @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: support.s,v 1.37 1996/06/13 07:17:20 asami Exp $ + * $Id: support.s,v 1.38 1996/09/10 08:31:57 bde Exp $ */ #include <sys/errno.h> @@ -53,7 +53,7 @@ _bzero: .long _generic_bzero /* * bcopy family - * void bzero(void *base, u_int cnt) + * void bzero(void *buf, u_int len) */ ENTRY(generic_bzero) @@ -141,13 +141,14 @@ ENTRY(i486_bzero) * * XXX need a const section for non-text */ - SUPERALIGN_TEXT + .data jtab: .long do0 .long do1 .long do2 .long do3 + .text SUPERALIGN_TEXT 5: jmp jtab(,%ecx,4) @@ -166,6 +167,7 @@ do2: SUPERALIGN_TEXT do1: movb %al,(%edx) + ret SUPERALIGN_TEXT do0: @@ -294,6 +296,7 @@ bcopy: subl %esi,%eax cmpl %ecx,%eax /* overlapping? */ jb 1f + shrl $2,%ecx /* copy by 32-bit words */ cld /* nope, copy forwards */ rep @@ -458,14 +461,16 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ movl %ebx,%ecx #if defined(I586_CPU) && defined(I586_FAST_BCOPY) cmpl $1024,%ecx - jbe slow_copyout + jb slow_copyout #if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) cmpl $CPUCLASS_586,_cpu_class jne slow_copyout #endif /* I386_CPU || I486_CPU || I686_CPU */ - call fastmove + pushl %ecx + call _fastmove + addl $4,%esp jmp done_copyout ALIGN_TEXT @@ -520,14 +525,16 @@ ENTRY(copyin) #if defined(I586_CPU) && defined(I586_FAST_BCOPY) cmpl $1024,%ecx - jbe slow_copyin + jb slow_copyin #if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) cmpl $CPUCLASS_586,_cpu_class jne slow_copyin #endif /* I386_CPU || I486_CPU || I686_CPU */ - call fastmove + pushl %ecx + call _fastmove + addl $4,%esp jmp done_copyin ALIGN_TEXT @@ -567,19 +574,20 @@ copyin_fault: /* fastmove(src, dst, len) src in %esi dst in %edi - len in %ecx + len in %ecx XXX changed to on stack for profiling uses %eax and %edx for tmp. storage */ - ALIGN_TEXT -fastmove: +/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ +ENTRY(fastmove) + movl 4(%esp),%ecx cmpl $63,%ecx - jbe 8f + jbe fastmove_tail testl $7,%esi /* check if src addr is multiple of 8 */ - jnz 8f + jnz fastmove_tail testl $7,%edi /* check if dst addr is multiple of 8 */ - jnz 8f + jnz fastmove_tail pushl %ebp movl %esp,%ebp @@ -652,7 +660,7 @@ fastmove: popl %esi 5: ALIGN_TEXT -7: +fastmove_loop: fildq 0(%esi) fildq 8(%esi) fildq 16(%esi) @@ -673,7 +681,7 @@ fastmove: addl $64,%esi addl $64,%edi cmpl $63,%ecx - ja 7b + ja fastmove_loop popl %eax addl %eax,%ecx cmpl $64,%ecx @@ -704,7 +712,7 @@ fastmove: popl %ebp ALIGN_TEXT -8: +fastmove_tail: movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld @@ -1171,10 +1179,7 @@ ENTRY(longjmp) * Here for doing BB-profiling (gcc -a). * We rely on the "bbset" instead, but need a dummy function. */ - .text - .align 2 -.globl ___bb_init_func -___bb_init_func: - movl 4(%esp),%eax - movl $1,(%eax) - ret +NON_GPROF_ENTRY(__bb_init_func) + movl 4(%esp),%eax + movl $1,(%eax) + .byte 0xc3 /* avoid macro for `ret' */ diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s index 0874d2b..e583aee 100644 --- a/sys/i386/i386/support.s +++ b/sys/i386/i386/support.s @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: support.s,v 1.37 1996/06/13 07:17:20 asami Exp $ + * $Id: support.s,v 1.38 1996/09/10 08:31:57 bde Exp $ */ #include <sys/errno.h> @@ -53,7 +53,7 @@ _bzero: .long _generic_bzero /* * bcopy family - * void bzero(void *base, u_int cnt) + * void bzero(void *buf, u_int len) */ ENTRY(generic_bzero) @@ -141,13 +141,14 @@ ENTRY(i486_bzero) * * XXX need a const section for non-text */ - SUPERALIGN_TEXT + .data jtab: .long do0 .long do1 .long do2 .long do3 + .text SUPERALIGN_TEXT 5: jmp jtab(,%ecx,4) @@ -166,6 +167,7 @@ do2: SUPERALIGN_TEXT do1: movb %al,(%edx) + ret SUPERALIGN_TEXT do0: @@ -294,6 +296,7 @@ bcopy: subl %esi,%eax cmpl %ecx,%eax /* overlapping? */ jb 1f + shrl $2,%ecx /* copy by 32-bit words */ cld /* nope, copy forwards */ rep @@ -458,14 +461,16 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ movl %ebx,%ecx #if defined(I586_CPU) && defined(I586_FAST_BCOPY) cmpl $1024,%ecx - jbe slow_copyout + jb slow_copyout #if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) cmpl $CPUCLASS_586,_cpu_class jne slow_copyout #endif /* I386_CPU || I486_CPU || I686_CPU */ - call fastmove + pushl %ecx + call _fastmove + addl $4,%esp jmp done_copyout ALIGN_TEXT @@ -520,14 +525,16 @@ ENTRY(copyin) #if defined(I586_CPU) && defined(I586_FAST_BCOPY) cmpl $1024,%ecx - jbe slow_copyin + jb slow_copyin #if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) cmpl $CPUCLASS_586,_cpu_class jne slow_copyin #endif /* I386_CPU || I486_CPU || I686_CPU */ - call fastmove + pushl %ecx + call _fastmove + addl $4,%esp jmp done_copyin ALIGN_TEXT @@ -567,19 +574,20 @@ copyin_fault: /* fastmove(src, dst, len) src in %esi dst in %edi - len in %ecx + len in %ecx XXX changed to on stack for profiling uses %eax and %edx for tmp. storage */ - ALIGN_TEXT -fastmove: +/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ +ENTRY(fastmove) + movl 4(%esp),%ecx cmpl $63,%ecx - jbe 8f + jbe fastmove_tail testl $7,%esi /* check if src addr is multiple of 8 */ - jnz 8f + jnz fastmove_tail testl $7,%edi /* check if dst addr is multiple of 8 */ - jnz 8f + jnz fastmove_tail pushl %ebp movl %esp,%ebp @@ -652,7 +660,7 @@ fastmove: popl %esi 5: ALIGN_TEXT -7: +fastmove_loop: fildq 0(%esi) fildq 8(%esi) fildq 16(%esi) @@ -673,7 +681,7 @@ fastmove: addl $64,%esi addl $64,%edi cmpl $63,%ecx - ja 7b + ja fastmove_loop popl %eax addl %eax,%ecx cmpl $64,%ecx @@ -704,7 +712,7 @@ fastmove: popl %ebp ALIGN_TEXT -8: +fastmove_tail: movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld @@ -1171,10 +1179,7 @@ ENTRY(longjmp) * Here for doing BB-profiling (gcc -a). * We rely on the "bbset" instead, but need a dummy function. */ - .text - .align 2 -.globl ___bb_init_func -___bb_init_func: - movl 4(%esp),%eax - movl $1,(%eax) - ret +NON_GPROF_ENTRY(__bb_init_func) + movl 4(%esp),%eax + movl $1,(%eax) + .byte 0xc3 /* avoid macro for `ret' */ |