summaryrefslogtreecommitdiffstats
path: root/sys/amd64/isa
diff options
context:
space:
mode:
authorpeter <peter@FreeBSD.org>2001-07-12 06:32:51 +0000
committerpeter <peter@FreeBSD.org>2001-07-12 06:32:51 +0000
commite00129231d89ad2ab6ec8862931dd864ff079d0c (patch)
treeacb374c639798d8213f972fae7e8609dec013b5f /sys/amd64/isa
parentb5164c6585fabc3c848ff92ada3f6ffb64a9e8f2 (diff)
downloadFreeBSD-src-e00129231d89ad2ab6ec8862931dd864ff079d0c.zip
FreeBSD-src-e00129231d89ad2ab6ec8862931dd864ff079d0c.tar.gz
Activate SSE/SIMD. This is the extra context switching support that
we are required to do if we let user processes use the extra 128 bit registers etc. This is the base part of the diff I got from: http://www.issei.org/issei/FreeBSD/sse.html I believe this is by: Mr. SUZUKI Issei <issei@issei.org> SMP support apparently by: Takekazu KATO <kato@chino.it.okayama-u.ac.jp> Test code by: NAKAMURA Kazushi <kaz@kobe1995.net>, see http://kobe1995.net/~kaz/FreeBSD/SSE.en.html I have fixed a couple of style(9) deviations. I have some followup commits to fix a couple of non-style things.
Diffstat (limited to 'sys/amd64/isa')
-rw-r--r--sys/amd64/isa/npx.c93
1 files changed, 82 insertions, 11 deletions
diff --git a/sys/amd64/isa/npx.c b/sys/amd64/isa/npx.c
index b6c69a0..f6410e9 100644
--- a/sys/amd64/isa/npx.c
+++ b/sys/amd64/isa/npx.c
@@ -35,6 +35,7 @@
* $FreeBSD$
*/
+#include "opt_cpu.h"
#include "opt_debug_npx.h"
#include "opt_math_emulate.h"
@@ -99,6 +100,8 @@
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
+#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
+#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
: : "n" (CR0_TS) : "ax")
#define stop_emulating() __asm("clts")
@@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr));
void fnstsw __P((caddr_t addr));
void fp_divide_by_0 __P((void));
void frstor __P((caddr_t addr));
+void fxsave __P((caddr_t addr));
+void fxrstor __P((caddr_t addr));
void start_emulating __P((void));
void stop_emulating __P((void));
#endif /* __GNUC__ */
+#ifdef CPU_ENABLE_SSE
+#define GET_FPU_CW(proc) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(proc) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
+#define MASK_FPU_SW(proc, mask) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
+#define GET_FPU_EXSW_PTR(pcb) \
+ (cpu_fxsr ? \
+ &(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
+ &(pcb)->pcb_save.sv_87.sv_ex_sw)
+#else /* CPU_ENABLE_SSE */
+#define GET_FPU_CW(proc) \
+ (proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(proc) \
+ (proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
+#define MASK_FPU_SW(proc, mask) \
+ ((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
+#define GET_FPU_EXSW_PTR(pcb) \
+ (&(pcb)->pcb_save.sv_87.sv_ex_sw)
+#endif /* CPU_ENABLE_SSE */
+
typedef u_char bool_t;
static int npx_attach __P((device_t dev));
@@ -127,6 +160,8 @@ static void npx_intr __P((void *));
#endif
static int npx_probe __P((device_t dev));
static int npx_probe1 __P((device_t dev));
+static void fpusave __P((union savefpu *, u_char));
+static void fpurstor __P((union savefpu *, u_char));
#ifdef I586_CPU_XXX
static long timezero __P((const char *funcname,
void (*func)(void *buf, size_t len)));
@@ -529,7 +564,7 @@ void
npxinit(control)
u_short control;
{
- struct save87 dummy;
+ union savefpu dummy;
critical_t savecrit;
if (!npx_exists)
@@ -544,7 +579,7 @@ npxinit(control)
stop_emulating();
fldcw(&control);
if (PCPU_GET(curpcb) != NULL)
- fnsave(&PCPU_GET(curpcb)->pcb_savefpu);
+ fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
start_emulating();
critical_exit(savecrit);
}
@@ -560,7 +595,7 @@ npxexit(p)
savecrit = critical_enter();
if (p == PCPU_GET(npxproc))
- npxsave(&PCPU_GET(curpcb)->pcb_savefpu);
+ npxsave(&PCPU_GET(curpcb)->pcb_save);
critical_exit(savecrit);
#ifdef NPX_DEBUG
if (npx_exists) {
@@ -773,6 +808,7 @@ npxtrap()
{
critical_t savecrit;
u_short control, status;
+ u_long *exstat;
if (!npx_exists) {
printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n",
@@ -787,16 +823,17 @@ npxtrap()
* wherever they are.
*/
if (PCPU_GET(npxproc) != curproc) {
- control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw;
- status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw;
+ control = GET_FPU_CW(curproc);
+ status = GET_FPU_SW(curproc);
} else {
fnstcw(&control);
fnstsw(&status);
}
- curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status;
+ exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb);
+ *exstat = status;
if (PCPU_GET(npxproc) != curproc)
- curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf;
+ MASK_FPU_SW(curproc, ~0x80bf);
else
fnclex();
critical_exit(savecrit);
@@ -813,6 +850,7 @@ npxtrap()
int
npxdna()
{
+ u_long *exstat;
critical_t s;
if (!npx_exists)
@@ -828,7 +866,9 @@ npxdna()
* Record new context early in case frstor causes an IRQ13.
*/
PCPU_SET(npxproc, CURPROC);
- PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0;
+
+ exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb));
+ *exstat = 0;
/*
* The following frstor may cause an IRQ13 when the state being
* restored has a pending error. The error will appear to have been
@@ -841,7 +881,7 @@ npxdna()
* fnsave are broken, so our treatment breaks fnclex if it is the
* first FPU instruction after a context switch.
*/
- frstor(&PCPU_GET(curpcb)->pcb_savefpu);
+ fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
critical_exit(s);
return (1);
@@ -872,15 +912,46 @@ npxdna()
*/
void
npxsave(addr)
- struct save87 *addr;
+ union savefpu *addr;
{
stop_emulating();
- fnsave(addr);
+ fpusave(addr, curproc->p_oncpu);
+
start_emulating();
PCPU_SET(npxproc, NULL);
}
+static void
+fpusave(addr, oncpu)
+ union savefpu *addr;
+ u_char oncpu;
+{
+ static struct savexmm svxmm[MAXCPU];
+
+ if (!cpu_fxsr)
+ fnsave(addr);
+ else {
+ fxsave(&svxmm[oncpu]);
+ bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
+ }
+}
+
+static void
+fpurstor(addr, oncpu)
+ union savefpu *addr;
+ u_char oncpu;
+{
+ static struct savexmm svxmm[MAXCPU];
+
+ if (!cpu_fxsr)
+ frstor(addr);
+ else {
+ bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
+ fxrstor(&svxmm[oncpu]);
+ }
+}
+
#ifdef I586_CPU_XXX
static long
timezero(funcname, func)
OpenPOWER on IntegriCloud