summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
authorasami <asami@FreeBSD.org>1996-06-13 07:17:21 +0000
committerasami <asami@FreeBSD.org>1996-06-13 07:17:21 +0000
commit0e9b8f90be7f12913c6de4393299556a4c0dce1d (patch)
tree02fbaca3c04f4dabde374d47c4725f986f4c16cd /sys/amd64
parent6514dd9d224b3df44e79cea4b23c52755e957450 (diff)
downloadFreeBSD-src-0e9b8f90be7f12913c6de4393299556a4c0dce1d.zip
FreeBSD-src-0e9b8f90be7f12913c6de4393299556a4c0dce1d.tar.gz
A fast memory copy for Pentiums using floating point registers.
It is called from copyin and copyout. The new routine is conditioned on I586_CPU and I586_FAST_BCOPY, so you need options "I586_FAST_BCOPY" (quotes essenstial) in your kernel config file. Also, if you have other kernel types configured in your kernel, an additional check to make sure it is running on a Pentium is inserted. (It is not clear why it doesn't help on P6s, it may be just that the Orion chipset doesn't prefetch as efficiently as Tritons and friends.) Bruce can now hack this away. :)
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/support.S192
-rw-r--r--sys/amd64/amd64/support.s192
-rw-r--r--sys/amd64/amd64/trap.c10
3 files changed, 391 insertions, 3 deletions
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index 0d5bc5d..751e923 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -30,13 +30,14 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: support.s,v 1.35 1996/05/03 21:01:00 phk Exp $
+ * $Id: support.s,v 1.36 1996/05/31 01:08:03 peter Exp $
*/
#include "assym.s" /* system definitions */
#include "errno.h" /* error return codes */
#include "machine/asmacros.h" /* miscellaneous asm macros */
#include "machine/cputypes.h" /* types of CPUs */
+#include "machine/specialreg.h"
#define KDSEL 0x10 /* kernel data selector */
#define IDXSHIFT 10
@@ -453,6 +454,21 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */
/* bcopy(%esi, %edi, %ebx) */
3:
movl %ebx,%ecx
+#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
+ cmpl $1024,%ecx
+ jbe slow_copyout
+
+#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU)
+ cmpl $CPUCLASS_586,_cpu_class
+ jne slow_copyout
+#endif /* I386_CPU || I486_CPU || I686_CPU */
+
+ call fastmove
+ jmp done_copyout
+
+ ALIGN_TEXT
+slow_copyout:
+#endif /* I586_CPU && I586_FAST_BCOPY */
shrl $2,%ecx
cld
rep
@@ -500,6 +516,21 @@ ENTRY(copyin)
cmpl $VM_MAXUSER_ADDRESS,%edx
ja copyin_fault
+#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
+ cmpl $1024,%ecx
+ jbe slow_copyin
+
+#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU)
+ cmpl $CPUCLASS_586,_cpu_class
+ jne slow_copyin
+#endif /* I386_CPU || I486_CPU || I686_CPU */
+
+ call fastmove
+ jmp done_copyin
+
+ ALIGN_TEXT
+slow_copyin:
+#endif /* I586_CPU && I586_FAST_BCOPY */
movb %cl,%al
shrl $2,%ecx /* copy longword-wise */
cld
@@ -510,6 +541,10 @@ ENTRY(copyin)
rep
movsb
+#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
+ ALIGN_TEXT
+done_copyin:
+#endif /* I586_CPU && I586_FAST_BCOPY */
popl %edi
popl %esi
xorl %eax,%eax
@@ -526,6 +561,161 @@ copyin_fault:
movl $EFAULT,%eax
ret
+#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
+/* fastmove(src, dst, len)
+ src in %esi
+ dst in %edi
+ len in %ecx
+ uses %eax and %edx for tmp. storage
+ */
+ ALIGN_TEXT
+fastmove:
+ cmpl $63,%ecx
+ jbe 8f
+
+ testl $7,%esi /* check if src addr is multiple of 8 */
+ jnz 8f
+
+ testl $7,%edi /* check if dst addr is multiple of 8 */
+ jnz 8f
+
+ pushl %ebp
+ movl %esp,%ebp
+ subl $PCB_SAVEFPU_SIZE,%esp
+
+/* if (npxproc != NULL) { */
+ cmpl $0,_npxproc
+ je 6f
+/* fnsave(&curpcb->pcb_savefpu); */
+ movl _curpcb,%eax
+ fnsave PCB_SAVEFPU(%eax)
+/* npxproc = NULL; */
+ movl $0,_npxproc
+/* } */
+6:
+/* now we own the FPU. */
+
+/*
+ * The process' FP state is saved in the pcb, but if we get
+ * switched, the cpu_switch() will store our FP state in the
+ * pcb. It should be possible to avoid all the copying for
+ * this, e.g., by setting a flag to tell cpu_switch() to
+ * save the state somewhere else.
+ */
+/* tmp = curpcb->pcb_savefpu; */
+ pushl %edi
+ pushl %esi
+ pushl %ecx
+ leal -PCB_SAVEFPU_SIZE(%ebp),%edi
+ movl _curpcb,%esi
+ addl $PCB_SAVEFPU,%esi
+ cld
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ rep
+ movsl
+ popl %ecx
+ popl %esi
+ popl %edi
+/* stop_emulating(); */
+ clts
+/* npxproc = curproc; */
+ movl _curproc,%eax
+ movl %eax,_npxproc
+4:
+ pushl %ecx
+ cmpl $1792,%ecx
+ jbe 2f
+ movl $1792,%ecx
+2:
+ subl %ecx,0(%esp)
+ cmpl $256,%ecx
+ jb 5f
+ pushl %esi
+ pushl %ecx
+ ALIGN_TEXT
+3:
+ movl 0(%esi),%eax
+ movl 32(%esi),%eax
+ movl 64(%esi),%eax
+ movl 96(%esi),%eax
+ movl 128(%esi),%eax
+ movl 160(%esi),%eax
+ movl 192(%esi),%eax
+ movl 224(%esi),%eax
+ addl $256,%esi
+ subl $256,%ecx
+ cmpl $256,%ecx
+ jae 3b
+ popl %ecx
+ popl %esi
+5:
+ ALIGN_TEXT
+7:
+ fildq 0(%esi)
+ fildq 8(%esi)
+ fildq 16(%esi)
+ fildq 24(%esi)
+ fildq 32(%esi)
+ fildq 40(%esi)
+ fildq 48(%esi)
+ fildq 56(%esi)
+ fistpq 56(%edi)
+ fistpq 48(%edi)
+ fistpq 40(%edi)
+ fistpq 32(%edi)
+ fistpq 24(%edi)
+ fistpq 16(%edi)
+ fistpq 8(%edi)
+ fistpq 0(%edi)
+ addl $-64,%ecx
+ addl $64,%esi
+ addl $64,%edi
+ cmpl $63,%ecx
+ ja 7b
+ popl %eax
+ addl %eax,%ecx
+ cmpl $64,%ecx
+ jae 4b
+
+/* curpcb->pcb_savefpu = tmp; */
+ pushl %edi
+ pushl %esi
+ pushl %ecx
+ movl _curpcb,%edi
+ addl $PCB_SAVEFPU,%edi
+ leal -PCB_SAVEFPU_SIZE(%ebp),%esi
+ cld
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ rep
+ movsl
+ popl %ecx
+ popl %esi
+ popl %edi
+
+/* start_emulating(); */
+ smsw %ax
+ orb $CR0_TS,%al
+ lmsw %ax
+/* npxproc = NULL; */
+ movl $0,_npxproc
+ movl %ebp,%esp
+ popl %ebp
+
+ ALIGN_TEXT
+8:
+ movb %cl,%al
+ shrl $2,%ecx /* copy longword-wise */
+ cld
+ rep
+ movsl
+ movb %al,%cl
+ andb $3,%cl /* copy remaining bytes */
+ rep
+ movsb
+
+ ret
+#endif /* I586_CPU && I586_FAST_BCOPY */
+
/*
* fu{byte,sword,word} : fetch a byte (sword, word) from user memory
*/
diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s
index 0d5bc5d..751e923 100644
--- a/sys/amd64/amd64/support.s
+++ b/sys/amd64/amd64/support.s
@@ -30,13 +30,14 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: support.s,v 1.35 1996/05/03 21:01:00 phk Exp $
+ * $Id: support.s,v 1.36 1996/05/31 01:08:03 peter Exp $
*/
#include "assym.s" /* system definitions */
#include "errno.h" /* error return codes */
#include "machine/asmacros.h" /* miscellaneous asm macros */
#include "machine/cputypes.h" /* types of CPUs */
+#include "machine/specialreg.h"
#define KDSEL 0x10 /* kernel data selector */
#define IDXSHIFT 10
@@ -453,6 +454,21 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */
/* bcopy(%esi, %edi, %ebx) */
3:
movl %ebx,%ecx
+#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
+ cmpl $1024,%ecx
+ jbe slow_copyout
+
+#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU)
+ cmpl $CPUCLASS_586,_cpu_class
+ jne slow_copyout
+#endif /* I386_CPU || I486_CPU || I686_CPU */
+
+ call fastmove
+ jmp done_copyout
+
+ ALIGN_TEXT
+slow_copyout:
+#endif /* I586_CPU && I586_FAST_BCOPY */
shrl $2,%ecx
cld
rep
@@ -500,6 +516,21 @@ ENTRY(copyin)
cmpl $VM_MAXUSER_ADDRESS,%edx
ja copyin_fault
+#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
+ cmpl $1024,%ecx
+ jbe slow_copyin
+
+#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU)
+ cmpl $CPUCLASS_586,_cpu_class
+ jne slow_copyin
+#endif /* I386_CPU || I486_CPU || I686_CPU */
+
+ call fastmove
+ jmp done_copyin
+
+ ALIGN_TEXT
+slow_copyin:
+#endif /* I586_CPU && I586_FAST_BCOPY */
movb %cl,%al
shrl $2,%ecx /* copy longword-wise */
cld
@@ -510,6 +541,10 @@ ENTRY(copyin)
rep
movsb
+#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
+ ALIGN_TEXT
+done_copyin:
+#endif /* I586_CPU && I586_FAST_BCOPY */
popl %edi
popl %esi
xorl %eax,%eax
@@ -526,6 +561,161 @@ copyin_fault:
movl $EFAULT,%eax
ret
+#if defined(I586_CPU) && defined(I586_FAST_BCOPY)
+/* fastmove(src, dst, len)
+ src in %esi
+ dst in %edi
+ len in %ecx
+ uses %eax and %edx for tmp. storage
+ */
+ ALIGN_TEXT
+fastmove:
+ cmpl $63,%ecx
+ jbe 8f
+
+ testl $7,%esi /* check if src addr is multiple of 8 */
+ jnz 8f
+
+ testl $7,%edi /* check if dst addr is multiple of 8 */
+ jnz 8f
+
+ pushl %ebp
+ movl %esp,%ebp
+ subl $PCB_SAVEFPU_SIZE,%esp
+
+/* if (npxproc != NULL) { */
+ cmpl $0,_npxproc
+ je 6f
+/* fnsave(&curpcb->pcb_savefpu); */
+ movl _curpcb,%eax
+ fnsave PCB_SAVEFPU(%eax)
+/* npxproc = NULL; */
+ movl $0,_npxproc
+/* } */
+6:
+/* now we own the FPU. */
+
+/*
+ * The process' FP state is saved in the pcb, but if we get
+ * switched, the cpu_switch() will store our FP state in the
+ * pcb. It should be possible to avoid all the copying for
+ * this, e.g., by setting a flag to tell cpu_switch() to
+ * save the state somewhere else.
+ */
+/* tmp = curpcb->pcb_savefpu; */
+ pushl %edi
+ pushl %esi
+ pushl %ecx
+ leal -PCB_SAVEFPU_SIZE(%ebp),%edi
+ movl _curpcb,%esi
+ addl $PCB_SAVEFPU,%esi
+ cld
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ rep
+ movsl
+ popl %ecx
+ popl %esi
+ popl %edi
+/* stop_emulating(); */
+ clts
+/* npxproc = curproc; */
+ movl _curproc,%eax
+ movl %eax,_npxproc
+4:
+ pushl %ecx
+ cmpl $1792,%ecx
+ jbe 2f
+ movl $1792,%ecx
+2:
+ subl %ecx,0(%esp)
+ cmpl $256,%ecx
+ jb 5f
+ pushl %esi
+ pushl %ecx
+ ALIGN_TEXT
+3:
+ movl 0(%esi),%eax
+ movl 32(%esi),%eax
+ movl 64(%esi),%eax
+ movl 96(%esi),%eax
+ movl 128(%esi),%eax
+ movl 160(%esi),%eax
+ movl 192(%esi),%eax
+ movl 224(%esi),%eax
+ addl $256,%esi
+ subl $256,%ecx
+ cmpl $256,%ecx
+ jae 3b
+ popl %ecx
+ popl %esi
+5:
+ ALIGN_TEXT
+7:
+ fildq 0(%esi)
+ fildq 8(%esi)
+ fildq 16(%esi)
+ fildq 24(%esi)
+ fildq 32(%esi)
+ fildq 40(%esi)
+ fildq 48(%esi)
+ fildq 56(%esi)
+ fistpq 56(%edi)
+ fistpq 48(%edi)
+ fistpq 40(%edi)
+ fistpq 32(%edi)
+ fistpq 24(%edi)
+ fistpq 16(%edi)
+ fistpq 8(%edi)
+ fistpq 0(%edi)
+ addl $-64,%ecx
+ addl $64,%esi
+ addl $64,%edi
+ cmpl $63,%ecx
+ ja 7b
+ popl %eax
+ addl %eax,%ecx
+ cmpl $64,%ecx
+ jae 4b
+
+/* curpcb->pcb_savefpu = tmp; */
+ pushl %edi
+ pushl %esi
+ pushl %ecx
+ movl _curpcb,%edi
+ addl $PCB_SAVEFPU,%edi
+ leal -PCB_SAVEFPU_SIZE(%ebp),%esi
+ cld
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ rep
+ movsl
+ popl %ecx
+ popl %esi
+ popl %edi
+
+/* start_emulating(); */
+ smsw %ax
+ orb $CR0_TS,%al
+ lmsw %ax
+/* npxproc = NULL; */
+ movl $0,_npxproc
+ movl %ebp,%esp
+ popl %ebp
+
+ ALIGN_TEXT
+8:
+ movb %cl,%al
+ shrl $2,%ecx /* copy longword-wise */
+ cld
+ rep
+ movsl
+ movb %al,%cl
+ andb $3,%cl /* copy remaining bytes */
+ rep
+ movsb
+
+ ret
+#endif /* I586_CPU && I586_FAST_BCOPY */
+
/*
* fu{byte,sword,word} : fetch a byte (sword, word) from user memory
*/
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 7cf89d1..224a6f4 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -35,7 +35,7 @@
* SUCH DAMAGE.
*
* from: @(#)trap.c 7.4 (Berkeley) 5/13/91
- * $Id: trap.c,v 1.76 1996/05/18 03:36:19 dyson Exp $
+ * $Id: trap.c,v 1.77 1996/06/12 05:02:54 gpalmer Exp $
*/
/*
@@ -319,6 +319,14 @@ trap(frame)
(void) trap_pfault(&frame, FALSE);
return;
+ case T_DNA:
+#if NNPX > 0
+ /* if a transparent fault (due to context switch "late") */
+ if (npxdna())
+ return;
+#endif /* NNPX > 0 */
+ break;
+
case T_PROTFLT: /* general protection fault */
case T_SEGNPFLT: /* segment not present fault */
/*
OpenPOWER on IntegriCloud