diff options
author | jhb <jhb@FreeBSD.org> | 2014-12-22 21:32:39 +0000 |
---|---|---|
committer | jhb <jhb@FreeBSD.org> | 2014-12-22 21:32:39 +0000 |
commit | 5ae50f92a8ff8229d56cc9462a1c3df5032d186b (patch) | |
tree | 3fa3dcfc9a65568b6d8ac45849bc58c6d2d9af7a /sys/i386/isa | |
parent | 2194e370615972dfe133b82534e6c48764ab9d27 (diff) | |
download | FreeBSD-src-5ae50f92a8ff8229d56cc9462a1c3df5032d186b.zip FreeBSD-src-5ae50f92a8ff8229d56cc9462a1c3df5032d186b.tar.gz |
MFC 273988,273989,273995,274057:
MFamd64: Add support for extended FPU states on i386. This includes
support for AVX on i386.
Diffstat (limited to 'sys/i386/isa')
-rw-r--r-- | sys/i386/isa/npx.c | 555 |
1 files changed, 399 insertions, 156 deletions
diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index dd8403a..0034f2d 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #endif #include <sys/signalvar.h> +#include <vm/uma.h> #include <machine/asmacros.h> #include <machine/cputypes.h> @@ -99,7 +100,40 @@ __FBSDID("$FreeBSD$"); #ifdef CPU_ENABLE_SSE #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) +#define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) + +static __inline void +xrstor(char *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); +} + +static __inline void +xsave(char *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : + "memory"); +} + +static __inline void +xsaveopt(char *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : + "memory"); +} #endif #else /* !(__GNUCLIKE_ASM && !lint) */ @@ -114,7 +148,11 @@ void frstor(caddr_t addr); #ifdef CPU_ENABLE_SSE void fxsave(caddr_t addr); void fxrstor(caddr_t addr); +void ldmxcsr(u_int csr); void stmxcsr(u_int *csr); +void xrstor(char *addr, uint64_t mask); +void xsave(char *addr, uint64_t mask); +void xsaveopt(char *addr, uint64_t mask); #endif #endif /* __GNUCLIKE_ASM && !lint */ @@ -151,25 +189,46 @@ void stmxcsr(u_int *csr); (savefpu)->sv_87.sv_env.en_cw = (value) #endif /* CPU_ENABLE_SSE */ -typedef u_char bool_t; - #ifdef CPU_ENABLE_SSE +CTASSERT(sizeof(union savefpu) == 512); +CTASSERT(sizeof(struct xstate_hdr) == 64); +CTASSERT(sizeof(struct savefpu_ymm) == 832); + +/* + * This requirement is to make it easier for asm code to calculate + * offset of the fpu save area from the pcb address. FPU save area + * must be 64-byte aligned. + */ +CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); + static void fpu_clean_state(void); #endif static void fpusave(union savefpu *); static void fpurstor(union savefpu *); -static int npx_attach(device_t dev); -static void npx_identify(driver_t *driver, device_t parent); -static int npx_probe(device_t dev); int hw_float; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floating point instructions executed in hardware"); +#ifdef CPU_ENABLE_SSE +int use_xsave; +uint64_t xsave_mask; +#endif +static uma_zone_t fpu_save_area_zone; +static union savefpu *npx_initialstate; + +#ifdef CPU_ENABLE_SSE +struct xsave_area_elm_descr { + u_int offset; + u_int size; +} *xsave_area_desc; + +static int use_xsaveopt; +#endif + static volatile u_int npx_traps_while_probing; -static union savefpu npx_initialstate; alias_for_inthand_t probetrap; __asm(" \n\ @@ -184,32 +243,14 @@ __asm(" \n\ "); /* - * Identify routine. Create a connection point on our parent for probing. - */ -static void -npx_identify(driver, parent) - driver_t *driver; - device_t parent; -{ - device_t child; - - child = BUS_ADD_CHILD(parent, 0, "npx", 0); - if (child == NULL) - panic("npx_identify"); -} - -/* - * Probe routine. Set flags to tell npxattach() what to do. Set up an - * interrupt handler if npx needs to use interrupts. + * Determine if an FPU is present and how to use it. */ static int -npx_probe(device_t dev) +npx_probe(void) { struct gate_descriptor save_idt_npxtrap; u_short control, status; - device_set_desc(dev, "math processor"); - /* * Modern CPUs all have an FPU that uses the INT16 interface * and provide a simple way to verify that, so handle the @@ -217,8 +258,7 @@ npx_probe(device_t dev) */ if (cpu_feature & CPUID_FPU) { hw_float = 1; - device_quiet(dev); - return (0); + return (1); } save_idt_npxtrap = idt[IDT_MF]; @@ -272,7 +312,7 @@ npx_probe(device_t dev) * accelerator board. */ hw_float = 1; - return (0); + return (1); #endif npx_traps_while_probing = 0; fp_divide_by_0(); @@ -283,7 +323,7 @@ npx_probe(device_t dev) hw_float = 1; goto cleanup; } - device_printf(dev, + printf( "FPU does not use exception 16 for error reporting\n"); goto cleanup; } @@ -293,80 +333,219 @@ npx_probe(device_t dev) * Probe failed. Floating point simply won't work. * Notify user and disable FPU/MMX/SSE instruction execution. */ - device_printf(dev, "WARNING: no FPU!\n"); + printf("WARNING: no FPU!\n"); __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : : "n" (CR0_EM | CR0_MP) : "ax"); cleanup: idt[IDT_MF] = save_idt_npxtrap; - return (hw_float ? 0 : ENXIO); + return (hw_float); } +#ifdef CPU_ENABLE_SSE /* - * Attach routine - announce which it is, and wire into system + * Enable XSAVE if supported and allowed by user. + * Calculate the xsave_mask. */ -static int -npx_attach(device_t dev) +static void +npxinit_bsp1(void) { + u_int cp[4]; + uint64_t xsave_mask_user; - npxinit(); - critical_enter(); - stop_emulating(); - fpusave(&npx_initialstate); - start_emulating(); + if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) { + use_xsave = 1; + TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); + } + if (!use_xsave) + return; + + cpuid_count(0xd, 0x0, cp); + xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; + if ((cp[0] & xsave_mask) != xsave_mask) + panic("CPU0 does not support X87 or SSE: %x", cp[0]); + xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; + xsave_mask_user = xsave_mask; + TUNABLE_QUAD_FETCH("hw.xsave_mask", &xsave_mask_user); + xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; + xsave_mask &= xsave_mask_user; + if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) + xsave_mask &= ~XFEATURE_AVX512; + if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) + xsave_mask &= ~XFEATURE_MPX; + + cpuid_count(0xd, 0x1, cp); + if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) + use_xsaveopt = 1; +} +#endif +/* + + * Calculate the fpu save area size. + */ +static void +npxinit_bsp2(void) +{ #ifdef CPU_ENABLE_SSE - if (cpu_fxsr) { - if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask) - cpu_mxcsr_mask = - npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask; - else - cpu_mxcsr_mask = 0xFFBF; - bzero(npx_initialstate.sv_xmm.sv_fp, - sizeof(npx_initialstate.sv_xmm.sv_fp)); - bzero(npx_initialstate.sv_xmm.sv_xmm, - sizeof(npx_initialstate.sv_xmm.sv_xmm)); - /* XXX might need even more zeroing. */ + u_int cp[4]; + + if (use_xsave) { + cpuid_count(0xd, 0x0, cp); + cpu_max_ext_state_size = cp[1]; + + /* + * Reload the cpu_feature2, since we enabled OSXSAVE. + */ + do_cpuid(1, cp); + cpu_feature2 = cp[2]; } else #endif - bzero(npx_initialstate.sv_87.sv_ac, - sizeof(npx_initialstate.sv_87.sv_ac)); - critical_exit(); - - return (0); + cpu_max_ext_state_size = sizeof(union savefpu); } /* * Initialize floating point unit. */ void -npxinit(void) +npxinit(bool bsp) { static union savefpu dummy; register_t saveintr; +#ifdef CPU_ENABLE_SSE + u_int mxcsr; +#endif u_short control; - if (!hw_float) - return; + if (bsp) { + if (!npx_probe()) + return; +#ifdef CPU_ENABLE_SSE + npxinit_bsp1(); +#endif + } + +#ifdef CPU_ENABLE_SSE + if (use_xsave) { + load_cr4(rcr4() | CR4_XSAVE); + load_xcr(XCR0, xsave_mask); + } +#endif + + /* + * XCR0 shall be set up before CPU can report the save area size. + */ + if (bsp) + npxinit_bsp2(); + /* * fninit has the same h/w bugs as fnsave. Use the detoxified - * fnsave to throw away any junk in the fpu. npxsave() initializes - * the fpu and sets fpcurthread = NULL as important side effects. + * fnsave to throw away any junk in the fpu. fpusave() initializes + * the fpu. * * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); - npxsave(&dummy); stop_emulating(); #ifdef CPU_ENABLE_SSE - /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */ if (cpu_fxsr) fninit(); + else #endif + fnsave(&dummy); control = __INITIAL_NPXCW__; fldcw(control); +#ifdef CPU_ENABLE_SSE + if (cpu_fxsr) { + mxcsr = __INITIAL_MXCSR__; + ldmxcsr(mxcsr); + } +#endif + start_emulating(); + intr_restore(saveintr); +} + +/* + * On the boot CPU we generate a clean state that is used to + * initialize the floating point unit when it is first used by a + * process. + */ +static void +npxinitstate(void *arg __unused) +{ + register_t saveintr; +#ifdef CPU_ENABLE_SSE + int cp[4], i, max_ext_n; +#endif + + if (!hw_float) + return; + + npx_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, + M_WAITOK | M_ZERO); + saveintr = intr_disable(); + stop_emulating(); + + fpusave(npx_initialstate); +#ifdef CPU_ENABLE_SSE + if (cpu_fxsr) { + if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask) + cpu_mxcsr_mask = + npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask; + else + cpu_mxcsr_mask = 0xFFBF; + + /* + * The fninit instruction does not modify XMM + * registers. The fpusave call dumped the garbage + * contained in the registers after reset to the + * initial state saved. Clear XMM registers file + * image to make the startup program state and signal + * handler XMM register content predictable. + */ + bzero(npx_initialstate->sv_xmm.sv_fp, + sizeof(npx_initialstate->sv_xmm.sv_fp)); + bzero(npx_initialstate->sv_xmm.sv_xmm, + sizeof(npx_initialstate->sv_xmm.sv_xmm)); + } else +#endif + bzero(npx_initialstate->sv_87.sv_ac, + sizeof(npx_initialstate->sv_87.sv_ac)); + +#ifdef CPU_ENABLE_SSE + /* + * Create a table describing the layout of the CPU Extended + * Save Area. + */ + if (use_xsave) { + if (xsave_mask >> 32 != 0) + max_ext_n = fls(xsave_mask >> 32) + 32; + else + max_ext_n = fls(xsave_mask); + xsave_area_desc = malloc(max_ext_n * sizeof(struct + xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); + /* x87 state */ + xsave_area_desc[0].offset = 0; + xsave_area_desc[0].size = 160; + /* XMM */ + xsave_area_desc[1].offset = 160; + xsave_area_desc[1].size = 288 - 160; + + for (i = 2; i < max_ext_n; i++) { + cpuid_count(0xd, i, cp); + xsave_area_desc[i].offset = cp[1]; + xsave_area_desc[i].size = cp[0]; + } + } +#endif + + fpu_save_area_zone = uma_zcreate("FPU_save_area", + cpu_max_ext_state_size, NULL, NULL, NULL, NULL, + XSAVE_AREA_ALIGN - 1, 0); + start_emulating(); intr_restore(saveintr); } +SYSINIT(npxinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, npxinitstate, NULL); /* * Free coprocessor (if we have it). @@ -377,8 +556,12 @@ npxexit(td) { critical_enter(); - if (curthread == PCPU_GET(fpcurthread)) - npxsave(curpcb->pcb_save); + if (curthread == PCPU_GET(fpcurthread)) { + stop_emulating(); + fpusave(curpcb->pcb_save); + start_emulating(); + PCPU_SET(fpcurthread, NULL); + } critical_exit(); #ifdef NPX_DEBUG if (hw_float) { @@ -683,7 +866,7 @@ npxdna(void) } stop_emulating(); /* - * Record new context early in case frstor causes an IRQ13. + * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, curthread); @@ -697,28 +880,20 @@ npxdna(void) * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. + * + * We prefer to restore the state from the actual save + * area in PCB instead of directly loading from + * npx_initialstate, to ignite the XSAVEOPT + * tracking engine. */ - fpurstor(&npx_initialstate); + bcopy(npx_initialstate, curpcb->pcb_save, cpu_max_ext_state_size); + fpurstor(curpcb->pcb_save); if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(curpcb->pcb_initial_npxcw); curpcb->pcb_flags |= PCB_NPXINITDONE; if (PCB_USER_FPU(curpcb)) curpcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { - /* - * The following fpurstor() may cause an IRQ13 when the - * state being restored has a pending error. The error will - * appear to have been triggered by the current (npx) user - * instruction even when that instruction is a no-wait - * instruction that should not trigger an error (e.g., - * fnclex). On at least one 486 system all of the no-wait - * instructions are broken the same as frstor, so our - * treatment does not amplify the breakage. On at least - * one 386/Cyrix 387 system, fnclex works correctly while - * frstor and fnsave are broken, so our treatment breaks - * fnclex if it is the first FPU instruction after a context - * switch. - */ fpurstor(curpcb->pcb_save); } critical_exit(); @@ -727,27 +902,12 @@ npxdna(void) } /* - * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx - * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by - * no-wait npx instructions. See the Intel application note AP-578 for - * details. This doesn't cause any additional complications here. IRQ13's - * are inherently asynchronous unless the CPU is frozen to deliver them -- - * one that started in userland may be delivered many instructions later, - * after the process has entered the kernel. It may even be delivered after - * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in - * the same way as a very-late-arriving non-spurious IRQ13 from user mode: - * it is normally ignored at first because we set fpcurthread to NULL; it is - * normally retriggered in npxdna() after return to user mode. + * Wrapper for fpusave() called from context switch routines. * * npxsave() must be called with interrupts disabled, so that it clears * fpcurthread atomically with saving the state. We require callers to do the * disabling, since most callers need to disable interrupts anyway to call * npxsave() atomically with checking fpcurthread. - * - * A previous version of npxsave() went to great lengths to excecute fnsave - * with interrupts enabled in case executing it froze the CPU. This case - * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply - * spurious freezes. */ void npxsave(addr) @@ -755,8 +915,12 @@ npxsave(addr) { stop_emulating(); - fpusave(addr); - +#ifdef CPU_ENABLE_SSE + if (use_xsaveopt) + xsaveopt((char *)addr, xsave_mask); + else +#endif + fpusave(addr); start_emulating(); PCPU_SET(fpcurthread, NULL); } @@ -773,11 +937,11 @@ npxsuspend(union savefpu *addr) if (!hw_float) return; if (PCPU_GET(fpcurthread) == NULL) { - *addr = npx_initialstate; + bcopy(npx_initialstate, addr, cpu_max_ext_state_size); return; } cr0 = rcr0(); - clts(); + stop_emulating(); fpusave(addr); load_cr0(cr0); } @@ -791,8 +955,7 @@ npxresume(union savefpu *addr) return; cr0 = rcr0(); - clts(); - npxinit(); + npxinit(false); stop_emulating(); fpurstor(addr); load_cr0(cr0); @@ -829,21 +992,27 @@ int npxgetregs(struct thread *td) { struct pcb *pcb; +#ifdef CPU_ENABLE_SSE + uint64_t *xstate_bv, bit; + char *sa; + int max_ext_n, i; +#endif + int owned; if (!hw_float) return (_MC_FPOWNED_NONE); pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { - bcopy(&npx_initialstate, &pcb->pcb_user_save, - sizeof(npx_initialstate)); - SET_FPU_CW(&pcb->pcb_user_save, pcb->pcb_initial_npxcw); + bcopy(npx_initialstate, get_pcb_user_save_pcb(pcb), + cpu_max_ext_state_size); + SET_FPU_CW(get_pcb_user_save_pcb(pcb), pcb->pcb_initial_npxcw); npxuserinited(td); return (_MC_FPOWNED_PCB); } critical_enter(); if (td == PCPU_GET(fpcurthread)) { - fpusave(&pcb->pcb_user_save); + fpusave(get_pcb_user_save_pcb(pcb)); #ifdef CPU_ENABLE_SSE if (!cpu_fxsr) #endif @@ -853,12 +1022,36 @@ npxgetregs(struct thread *td) * starts with a clean state next time. */ npxdrop(); - critical_exit(); - return (_MC_FPOWNED_FPU); + owned = _MC_FPOWNED_FPU; } else { - critical_exit(); - return (_MC_FPOWNED_PCB); + owned = _MC_FPOWNED_PCB; + } + critical_exit(); +#ifdef CPU_ENABLE_SSE + if (use_xsave) { + /* + * Handle partially saved state. + */ + sa = (char *)get_pcb_user_save_pcb(pcb); + xstate_bv = (uint64_t *)(sa + sizeof(union savefpu) + + offsetof(struct xstate_hdr, xstate_bv)); + if (xsave_mask >> 32 != 0) + max_ext_n = fls(xsave_mask >> 32) + 32; + else + max_ext_n = fls(xsave_mask); + for (i = 0; i < max_ext_n; i++) { + bit = 1ULL << i; + if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) + continue; + bcopy((char *)npx_initialstate + + xsave_area_desc[i].offset, + sa + xsave_area_desc[i].offset, + xsave_area_desc[i].size); + *xstate_bv |= bit; + } } +#endif + return (owned); } void @@ -872,34 +1065,85 @@ npxuserinited(struct thread *td) pcb->pcb_flags |= PCB_NPXUSERINITDONE; } +#ifdef CPU_ENABLE_SSE +int +npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) +{ + struct xstate_hdr *hdr, *ehdr; + size_t len, max_len; + uint64_t bv; -void -npxsetregs(struct thread *td, union savefpu *addr) + /* XXXKIB should we clear all extended state in xstate_bv instead ? */ + if (xfpustate == NULL) + return (0); + if (!use_xsave) + return (EOPNOTSUPP); + + len = xfpustate_size; + if (len < sizeof(struct xstate_hdr)) + return (EINVAL); + max_len = cpu_max_ext_state_size - sizeof(union savefpu); + if (len > max_len) + return (EINVAL); + + ehdr = (struct xstate_hdr *)xfpustate; + bv = ehdr->xstate_bv; + + /* + * Avoid #gp. + */ + if (bv & ~xsave_mask) + return (EINVAL); + + hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); + + hdr->xstate_bv = bv; + bcopy(xfpustate + sizeof(struct xstate_hdr), + (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); + + return (0); +} +#endif + +int +npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate, + size_t xfpustate_size) { struct pcb *pcb; +#ifdef CPU_ENABLE_SSE + int error; +#endif if (!hw_float) - return; + return (ENXIO); pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { #ifdef CPU_ENABLE_SSE + error = npxsetxstate(td, xfpustate, xfpustate_size); + if (error != 0) { + critical_exit(); + return (error); + } if (!cpu_fxsr) #endif fnclex(); /* As in npxdrop(). */ - if (((uintptr_t)addr & 0xf) != 0) { - bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); - fpurstor(&pcb->pcb_user_save); - } else - fpurstor(addr); + bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); + fpurstor(get_pcb_user_save_td(td)); critical_exit(); pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; } else { critical_exit(); - bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); +#ifdef CPU_ENABLE_SSE + error = npxsetxstate(td, xfpustate, xfpustate_size); + if (error != 0) + return (error); +#endif + bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); npxuserinited(td); } + return (0); } static void @@ -908,7 +1152,9 @@ fpusave(addr) { #ifdef CPU_ENABLE_SSE - if (cpu_fxsr) + if (use_xsave) + xsave((char *)addr, xsave_mask); + else if (cpu_fxsr) fxsave(addr); else #endif @@ -954,40 +1200,15 @@ fpurstor(addr) { #ifdef CPU_ENABLE_SSE - if (cpu_fxsr) + if (use_xsave) + xrstor((char *)addr, xsave_mask); + else if (cpu_fxsr) fxrstor(addr); else #endif frstor(addr); } -static device_method_t npx_methods[] = { - /* Device interface */ - DEVMETHOD(device_identify, npx_identify), - DEVMETHOD(device_probe, npx_probe), - DEVMETHOD(device_attach, npx_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), - - { 0, 0 } -}; - -static driver_t npx_driver = { - "npx", - npx_methods, - 1, /* no softc */ -}; - -static devclass_t npx_devclass; - -/* - * We prefer to attach to the root nexus so that the usual case (exception 16) - * doesn't describe the processor as being `on isa'. - */ -DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); - #ifdef DEV_ISA /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. @@ -1042,8 +1263,6 @@ DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); -#define XSAVE_AREA_ALIGN 64 - #define FPU_KERN_CTX_NPXINITDONE 0x01 #define FPU_KERN_CTX_DUMMY 0x02 @@ -1060,7 +1279,7 @@ fpu_kern_alloc_ctx(u_int flags) size_t sz; sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + - sizeof(union savefpu); + cpu_max_ext_state_size; res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); return (res); @@ -1094,8 +1313,8 @@ fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) return (0); } pcb = td->td_pcb; - KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, - ("mangled pcb_save")); + KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == + get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = 0; if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_NPXINITDONE; @@ -1120,7 +1339,7 @@ fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) npxdrop(); critical_exit(); pcb->pcb_save = ctx->prev; - if (pcb->pcb_save == &pcb->pcb_user_save) { + if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else @@ -1144,7 +1363,7 @@ fpu_kern_thread(u_int flags) pcb = curpcb; KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); - KASSERT(curpcb->pcb_save == &curpcb->pcb_user_save, + KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); @@ -1160,3 +1379,27 @@ is_fpu_kern_thread(u_int flags) return (0); return ((curpcb->pcb_flags & PCB_KERNNPX) != 0); } + +/* + * FPU save area alloc/free/init utility routines + */ +union savefpu * +fpu_save_area_alloc(void) +{ + + return (uma_zalloc(fpu_save_area_zone, 0)); +} + +void +fpu_save_area_free(union savefpu *fsa) +{ + + uma_zfree(fpu_save_area_zone, fsa); +} + +void +fpu_save_area_reset(union savefpu *fsa) +{ + + bcopy(npx_initialstate, fsa, cpu_max_ext_state_size); +} |