From e00129231d89ad2ab6ec8862931dd864ff079d0c Mon Sep 17 00:00:00 2001 From: peter Date: Thu, 12 Jul 2001 06:32:51 +0000 Subject: Activate SSE/SIMD. This is the extra context switching support that we are required to do if we let user processes use the extra 128 bit registers etc. This is the base part of the diff I got from: http://www.issei.org/issei/FreeBSD/sse.html I believe this is by: Mr. SUZUKI Issei SMP support apparently by: Takekazu KATO Test code by: NAKAMURA Kazushi , see http://kobe1995.net/~kaz/FreeBSD/SSE.en.html I have fixed a couple of style(9) deviations. I have some followup commits to fix a couple of non-style things. --- sys/amd64/isa/npx.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 82 insertions(+), 11 deletions(-) (limited to 'sys/amd64/isa') diff --git a/sys/amd64/isa/npx.c b/sys/amd64/isa/npx.c index b6c69a0..f6410e9 100644 --- a/sys/amd64/isa/npx.c +++ b/sys/amd64/isa/npx.c @@ -35,6 +35,7 @@ * $FreeBSD$ */ +#include "opt_cpu.h" #include "opt_debug_npx.h" #include "opt_math_emulate.h" @@ -99,6 +100,8 @@ #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) #define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop") #define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) +#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) +#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ : : "n" (CR0_TS) : "ax") #define stop_emulating() __asm("clts") @@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr)); void fnstsw __P((caddr_t addr)); void fp_divide_by_0 __P((void)); void frstor __P((caddr_t addr)); +void fxsave __P((caddr_t addr)); +void fxrstor __P((caddr_t addr)); void start_emulating __P((void)); void stop_emulating __P((void)); #endif /* __GNUC__ */ +#ifdef CPU_ENABLE_SSE +#define GET_FPU_CW(proc) \ + (cpu_fxsr ? \ + (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \ + (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw) +#define GET_FPU_SW(proc) \ + (cpu_fxsr ? \ + (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \ + (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw) +#define MASK_FPU_SW(proc, mask) \ + (cpu_fxsr ? \ + (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \ + (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask)) +#define GET_FPU_EXSW_PTR(pcb) \ + (cpu_fxsr ? \ + &(pcb)->pcb_save.sv_xmm.sv_ex_sw : \ + &(pcb)->pcb_save.sv_87.sv_ex_sw) +#else /* CPU_ENABLE_SSE */ +#define GET_FPU_CW(proc) \ + (proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw) +#define GET_FPU_SW(proc) \ + (proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw) +#define MASK_FPU_SW(proc, mask) \ + ((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask)) +#define GET_FPU_EXSW_PTR(pcb) \ + (&(pcb)->pcb_save.sv_87.sv_ex_sw) +#endif /* CPU_ENABLE_SSE */ + typedef u_char bool_t; static int npx_attach __P((device_t dev)); @@ -127,6 +160,8 @@ static void npx_intr __P((void *)); #endif static int npx_probe __P((device_t dev)); static int npx_probe1 __P((device_t dev)); +static void fpusave __P((union savefpu *, u_char)); +static void fpurstor __P((union savefpu *, u_char)); #ifdef I586_CPU_XXX static long timezero __P((const char *funcname, void (*func)(void *buf, size_t len))); @@ -529,7 +564,7 @@ void npxinit(control) u_short control; { - struct save87 dummy; + union savefpu dummy; critical_t savecrit; if (!npx_exists) @@ -544,7 +579,7 @@ npxinit(control) stop_emulating(); fldcw(&control); if (PCPU_GET(curpcb) != NULL) - fnsave(&PCPU_GET(curpcb)->pcb_savefpu); + fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu); start_emulating(); critical_exit(savecrit); } @@ -560,7 +595,7 @@ npxexit(p) savecrit = critical_enter(); if (p == PCPU_GET(npxproc)) - npxsave(&PCPU_GET(curpcb)->pcb_savefpu); + npxsave(&PCPU_GET(curpcb)->pcb_save); critical_exit(savecrit); #ifdef NPX_DEBUG if (npx_exists) { @@ -773,6 +808,7 @@ npxtrap() { critical_t savecrit; u_short control, status; + u_long *exstat; if (!npx_exists) { printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n", @@ -787,16 +823,17 @@ npxtrap() * wherever they are. */ if (PCPU_GET(npxproc) != curproc) { - control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw; - status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw; + control = GET_FPU_CW(curproc); + status = GET_FPU_SW(curproc); } else { fnstcw(&control); fnstsw(&status); } - curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status; + exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb); + *exstat = status; if (PCPU_GET(npxproc) != curproc) - curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf; + MASK_FPU_SW(curproc, ~0x80bf); else fnclex(); critical_exit(savecrit); @@ -813,6 +850,7 @@ npxtrap() int npxdna() { + u_long *exstat; critical_t s; if (!npx_exists) @@ -828,7 +866,9 @@ npxdna() * Record new context early in case frstor causes an IRQ13. */ PCPU_SET(npxproc, CURPROC); - PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0; + + exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb)); + *exstat = 0; /* * The following frstor may cause an IRQ13 when the state being * restored has a pending error. The error will appear to have been @@ -841,7 +881,7 @@ npxdna() * fnsave are broken, so our treatment breaks fnclex if it is the * first FPU instruction after a context switch. */ - frstor(&PCPU_GET(curpcb)->pcb_savefpu); + fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu); critical_exit(s); return (1); @@ -872,15 +912,46 @@ npxdna() */ void npxsave(addr) - struct save87 *addr; + union savefpu *addr; { stop_emulating(); - fnsave(addr); + fpusave(addr, curproc->p_oncpu); + start_emulating(); PCPU_SET(npxproc, NULL); } +static void +fpusave(addr, oncpu) + union savefpu *addr; + u_char oncpu; +{ + static struct savexmm svxmm[MAXCPU]; + + if (!cpu_fxsr) + fnsave(addr); + else { + fxsave(&svxmm[oncpu]); + bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm)); + } +} + +static void +fpurstor(addr, oncpu) + union savefpu *addr; + u_char oncpu; +{ + static struct savexmm svxmm[MAXCPU]; + + if (!cpu_fxsr) + frstor(addr); + else { + bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm)); + fxrstor(&svxmm[oncpu]); + } +} + #ifdef I586_CPU_XXX static long timezero(funcname, func) -- cgit v1.1