diff options
-rw-r--r-- | sys/amd64/amd64/fpu.c | 69 | ||||
-rw-r--r-- | sys/amd64/amd64/machdep.c | 10 | ||||
-rw-r--r-- | sys/amd64/ia32/ia32_reg.c | 14 | ||||
-rw-r--r-- | sys/amd64/ia32/ia32_signal.c | 7 | ||||
-rw-r--r-- | sys/amd64/include/fpu.h | 5 | ||||
-rw-r--r-- | sys/i386/i386/machdep.c | 99 | ||||
-rw-r--r-- | sys/i386/include/npx.h | 5 | ||||
-rw-r--r-- | sys/i386/isa/npx.c | 94 | ||||
-rw-r--r-- | sys/pc98/pc98/machdep.c | 103 |
9 files changed, 115 insertions, 291 deletions
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 4b1583a..482b5da 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -426,9 +426,7 @@ fpudna(void) fxrstor(&fpu_initialstate); if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(pcb->pcb_initial_fpucw); - pcb->pcb_flags |= PCB_FPUINITDONE; - if (PCB_USER_FPU(pcb)) - pcb->pcb_flags |= PCB_USERFPUINITDONE; + fpuuserinited(curthread); } else fxrstor(pcb->pcb_save); critical_exit(); @@ -448,60 +446,50 @@ fpudrop() } /* - * Get the state of the FPU without dropping ownership (if possible). - * It returns the FPU ownership status. + * Get the user state of the FPU into pcb->pcb_user_save without + * dropping ownership (if possible). It returns the FPU ownership + * status. */ int -fpugetuserregs(struct thread *td, struct savefpu *addr) +fpugetregs(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { - bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate)); - addr->sv_env.en_cw = pcb->pcb_initial_fpucw; - return (_MC_FPOWNED_NONE); + bcopy(&fpu_initialstate, &pcb->pcb_user_save, + sizeof(fpu_initialstate)); + pcb->pcb_user_save.sv_env.en_cw = pcb->pcb_initial_fpucw; + fpuuserinited(td); + return (_MC_FPOWNED_PCB); } critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { - fxsave(addr); + fxsave(&pcb->pcb_user_save); critical_exit(); return (_MC_FPOWNED_FPU); } else { critical_exit(); - bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } -int -fpugetregs(struct thread *td, struct savefpu *addr) +void +fpuuserinited(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; - if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { - bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate)); - addr->sv_env.en_cw = pcb->pcb_initial_fpucw; - return (_MC_FPOWNED_NONE); - } - critical_enter(); - if (td == PCPU_GET(fpcurthread)) { - fxsave(addr); - critical_exit(); - return (_MC_FPOWNED_FPU); - } else { - critical_exit(); - bcopy(pcb->pcb_save, addr, sizeof(*addr)); - return (_MC_FPOWNED_PCB); - } + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_FPUINITDONE; + pcb->pcb_flags |= PCB_USERFPUINITDONE; } /* * Set the state of the FPU. */ void -fpusetuserregs(struct thread *td, struct savefpu *addr) +fpusetregs(struct thread *td, struct savefpu *addr) { struct pcb *pcb; @@ -514,29 +502,8 @@ fpusetuserregs(struct thread *td, struct savefpu *addr) } else { critical_exit(); bcopy(addr, &td->td_pcb->pcb_user_save, sizeof(*addr)); - if (PCB_USER_FPU(pcb)) - pcb->pcb_flags |= PCB_FPUINITDONE; - pcb->pcb_flags |= PCB_USERFPUINITDONE; - } -} - -void -fpusetregs(struct thread *td, struct savefpu *addr) -{ - struct pcb *pcb; - - pcb = td->td_pcb; - critical_enter(); - if (td == PCPU_GET(fpcurthread)) { - fxrstor(addr); - critical_exit(); - } else { - critical_exit(); - bcopy(addr, td->td_pcb->pcb_save, sizeof(*addr)); + fpuuserinited(td); } - if (PCB_USER_FPU(pcb)) - pcb->pcb_flags |= PCB_USERFPUINITDONE; - pcb->pcb_flags |= PCB_FPUINITDONE; } /* diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 738427f..194cf71 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1975,6 +1975,8 @@ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { + KASSERT(TD_IS_SUSPENDED(td), ("not suspended thread %p", td)); + fpugetregs(td); fill_fpregs_xmm(&td->td_pcb->pcb_user_save, fpregs); return (0); } @@ -1985,6 +1987,7 @@ set_fpregs(struct thread *td, struct fpreg *fpregs) { set_fpregs_xmm(fpregs, &td->td_pcb->pcb_user_save); + fpuuserinited(td); return (0); } @@ -2099,8 +2102,9 @@ static void get_fpcontext(struct thread *td, mcontext_t *mcp) { - mcp->mc_ownedfp = fpugetuserregs(td, - (struct savefpu *)&mcp->mc_fpstate); + mcp->mc_ownedfp = fpugetregs(td); + bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate, + sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); } @@ -2120,7 +2124,7 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp) mcp->mc_ownedfp == _MC_FPOWNED_PCB) { fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; - fpusetuserregs(td, fpstate); + fpusetregs(td, fpstate); } else return (EINVAL); return (0); diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c index 30fcffb..da6eb0f 100644 --- a/sys/amd64/ia32/ia32_reg.c +++ b/sys/amd64/ia32/ia32_reg.c @@ -145,13 +145,18 @@ set_regs32(struct thread *td, struct reg32 *regs) int fill_fpregs32(struct thread *td, struct fpreg32 *regs) { - struct save87 *sv_87 = (struct save87 *)regs; - struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; - struct envxmm *penv_xmm = &sv_fpu->sv_env; + struct savefpu *sv_fpu; + struct save87 *sv_87; + struct env87 *penv_87; + struct envxmm *penv_xmm; int i; bzero(regs, sizeof(*regs)); + sv_87 = (struct save87 *)regs; + penv_87 = &sv_87->sv_env; + fpugetregs(td); + sv_fpu = &td->td_pcb->pcb_user_save; + penv_xmm = &sv_fpu->sv_env; /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; @@ -200,6 +205,7 @@ set_fpregs32(struct thread *td, struct fpreg32 *regs) sv_fpu->sv_fp[i].fp_acc = sv_87->sv_ac[i]; for (i = 8; i < 16; ++i) bzero(&sv_fpu->sv_fp[i].fp_acc, sizeof(sv_fpu->sv_fp[i].fp_acc)); + fpuuserinited(td); return (0); } diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c index d85a3bb..aefe9f0 100644 --- a/sys/amd64/ia32/ia32_signal.c +++ b/sys/amd64/ia32/ia32_signal.c @@ -99,8 +99,9 @@ ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp) * 64bit instruction and data pointers. Ignore the difference * for now, it should be irrelevant for most applications. */ - mcp->mc_ownedfp = fpugetuserregs(td, - (struct savefpu *)&mcp->mc_fpstate); + mcp->mc_ownedfp = fpugetregs(td); + bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate, + sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); } @@ -117,7 +118,7 @@ ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp) fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { - fpusetuserregs(td, (struct savefpu *)&mcp->mc_fpstate); + fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate); } else return (EINVAL); return (0); diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h index ca0ac8f..50b3819 100644 --- a/sys/amd64/include/fpu.h +++ b/sys/amd64/include/fpu.h @@ -112,12 +112,11 @@ void fpudna(void); void fpudrop(void); void fpuexit(struct thread *td); int fpuformat(void); -int fpugetregs(struct thread *td, struct savefpu *addr); -int fpugetuserregs(struct thread *td, struct savefpu *addr); +int fpugetregs(struct thread *td); void fpuinit(void); void fpusetregs(struct thread *td, struct savefpu *addr); -void fpusetuserregs(struct thread *td, struct savefpu *addr); int fputrap(void); +void fpuuserinited(struct thread *td); int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags); int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index f89de32..935a8d20 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -633,13 +633,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ - - /* - * The get_fpcontext() call must be placed before assignments - * to mc_fsbase and mc_gsbase due to the alignment-override - * code in get_fpcontext() that possibly clobbers 12 bytes of - * mcontext after mc_fpstate. - */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* @@ -3209,28 +3202,33 @@ set_fpregs_xmm(sv_87, sv_xmm) int fill_fpregs(struct thread *td, struct fpreg *fpregs) { + + KASSERT(TD_IS_SUSPENDED(td), ("not suspended thread %p", td)); + npxgetregs(td); #ifdef CPU_ENABLE_SSE - if (cpu_fxsr) { + if (cpu_fxsr) fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm, - (struct save87 *)fpregs); - return (0); - } + (struct save87 *)fpregs); + else #endif /* CPU_ENABLE_SSE */ - bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs, sizeof *fpregs); + bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs, + sizeof(*fpregs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { + #ifdef CPU_ENABLE_SSE - if (cpu_fxsr) { + if (cpu_fxsr) set_fpregs_xmm((struct save87 *)fpregs, &td->td_pcb->pcb_user_save.sv_xmm); - return (0); - } + else #endif /* CPU_ENABLE_SSE */ - bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87, sizeof *fpregs); + bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87, + sizeof(*fpregs)); + npxuserinited(td); return (0); } @@ -3272,13 +3270,6 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags) mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); - - /* - * The get_fpcontext() call must be placed before assignments - * to mc_fsbase and mc_gsbase due to the alignment-override - * code in get_fpcontext() that possibly clobbers 12 bytes of - * mcontext after mc_fpstate. - */ get_fpcontext(td, mcp); sdp = &td->td_pcb->pcb_fsd; mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; @@ -3329,39 +3320,14 @@ set_mcontext(struct thread *td, const mcontext_t *mcp) static void get_fpcontext(struct thread *td, mcontext_t *mcp) { + #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; #else - union savefpu *addr; - - /* - * XXX mc_fpstate might be misaligned, since its declaration is not - * unportabilized using __attribute__((aligned(16))) like the - * declaration of struct savemm, and anyway, alignment doesn't work - * for auto variables since we don't use gcc's pessimal stack - * alignment. Work around this by abusing the spare fields after - * mcp->mc_fpstate. - * - * XXX unpessimize most cases by only aligning when fxsave might be - * called, although this requires knowing too much about - * npxgetuserregs()'s internals. - */ - addr = (union savefpu *)&mcp->mc_fpstate; - if (td == PCPU_GET(fpcurthread) && -#ifdef CPU_ENABLE_SSE - cpu_fxsr && -#endif - ((uintptr_t)(void *)addr & 0xF)) { - do - addr = (void *)((char *)addr + 4); - while ((uintptr_t)(void *)addr & 0xF); - } - mcp->mc_ownedfp = npxgetuserregs(td, addr); - if (addr != (union savefpu *)&mcp->mc_fpstate) { - bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); - bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); - } + mcp->mc_ownedfp = npxgetregs(td); + bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate, + sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = npxformat(); #endif } @@ -3369,7 +3335,6 @@ get_fpcontext(struct thread *td, mcontext_t *mcp) static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { - union savefpu *addr; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); @@ -3381,30 +3346,14 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp) fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { - /* XXX align as above. */ - addr = (union savefpu *)&mcp->mc_fpstate; - if (td == PCPU_GET(fpcurthread) && -#ifdef CPU_ENABLE_SSE - cpu_fxsr && -#endif - ((uintptr_t)(void *)addr & 0xF)) { - do - addr = (void *)((char *)addr + 4); - while ((uintptr_t)(void *)addr & 0xF); - bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); - } #ifdef DEV_NPX #ifdef CPU_ENABLE_SSE if (cpu_fxsr) - addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; + ((union savefpu *)&mcp->mc_fpstate)->sv_xmm.sv_env. + en_mxcsr &= cpu_mxcsr_mask; #endif - npxsetuserregs(td, addr); + npxsetregs(td, (union savefpu *)&mcp->mc_fpstate); #endif - /* - * Don't bother putting things back where they were in the - * misaligned case, since we know that the caller won't use - * them again. - */ } else return (EINVAL); return (0); @@ -3422,12 +3371,12 @@ fpstate_drop(struct thread *td) #endif /* * XXX force a full drop of the npx. The above only drops it if we - * owned it. npxgetuserregs() has the same bug in the !cpu_fxsr case. + * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * - * XXX I don't much like npxgetuserregs()'s semantics of doing a full + * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But - * sendsig() is the only caller of npxgetuserregs()... perhaps we just + * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | diff --git a/sys/i386/include/npx.h b/sys/i386/include/npx.h index 71073a6..6b56bb4 100644 --- a/sys/i386/include/npx.h +++ b/sys/i386/include/npx.h @@ -151,13 +151,12 @@ int npxdna(void); void npxdrop(void); void npxexit(struct thread *td); int npxformat(void); -int npxgetregs(struct thread *td, union savefpu *addr); -int npxgetuserregs(struct thread *td, union savefpu *addr); +int npxgetregs(struct thread *td); void npxinit(void); void npxsave(union savefpu *addr); void npxsetregs(struct thread *td, union savefpu *addr); -void npxsetuserregs(struct thread *td, union savefpu *addr); int npxtrap(void); +void npxuserinited(struct thread *); int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags); int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index 8d37562..9ec5d25 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -684,9 +684,7 @@ npxdna(void) fpurstor(&npx_initialstate); if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(pcb->pcb_initial_npxcw); - pcb->pcb_flags |= PCB_NPXINITDONE; - if (PCB_USER_FPU(pcb)) - pcb->pcb_flags |= PCB_NPXUSERINITDONE; + npxuserinited(curthread); } else { /* * The following fpurstor() may cause an IRQ13 when the @@ -767,11 +765,12 @@ npxdrop() } /* - * Get the state of the FPU without dropping ownership (if possible). - * It returns the FPU ownership status. + * Get the user state of the FPU into pcb->pcb_user_save without + * dropping ownership (if possible). It returns the FPU ownership + * status. */ int -npxgetregs(struct thread *td, union savefpu *addr) +npxgetregs(struct thread *td) { struct pcb *pcb; @@ -780,48 +779,15 @@ npxgetregs(struct thread *td, union savefpu *addr) pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { - bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); - SET_FPU_CW(addr, pcb->pcb_initial_npxcw); - return (_MC_FPOWNED_NONE); - } - critical_enter(); - if (td == PCPU_GET(fpcurthread)) { - fpusave(addr); -#ifdef CPU_ENABLE_SSE - if (!cpu_fxsr) -#endif - /* - * fnsave initializes the FPU and destroys whatever - * context it contains. Make sure the FPU owner - * starts with a clean state next time. - */ - npxdrop(); - critical_exit(); - return (_MC_FPOWNED_FPU); - } else { - critical_exit(); - bcopy(pcb->pcb_save, addr, sizeof(*addr)); + bcopy(&npx_initialstate, &pcb->pcb_user_save, + sizeof(npx_initialstate)); + SET_FPU_CW(&pcb->pcb_user_save, pcb->pcb_initial_npxcw); + npxuserinited(td); return (_MC_FPOWNED_PCB); } -} - -int -npxgetuserregs(struct thread *td, union savefpu *addr) -{ - struct pcb *pcb; - - if (!hw_float) - return (_MC_FPOWNED_NONE); - - pcb = td->td_pcb; - if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) == 0) { - bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); - SET_FPU_CW(addr, pcb->pcb_initial_npxcw); - return (_MC_FPOWNED_NONE); - } critical_enter(); - if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { - fpusave(addr); + if (td == PCPU_GET(fpcurthread)) { + fpusave(&pcb->pcb_user_save); #ifdef CPU_ENABLE_SSE if (!cpu_fxsr) #endif @@ -835,42 +801,24 @@ npxgetuserregs(struct thread *td, union savefpu *addr) return (_MC_FPOWNED_FPU); } else { critical_exit(); - bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } -/* - * Set the state of the FPU. - */ void -npxsetregs(struct thread *td, union savefpu *addr) +npxuserinited(struct thread *td) { struct pcb *pcb; - if (!hw_float) - return; - pcb = td->td_pcb; - critical_enter(); - if (td == PCPU_GET(fpcurthread)) { -#ifdef CPU_ENABLE_SSE - if (!cpu_fxsr) -#endif - fnclex(); /* As in npxdrop(). */ - fpurstor(addr); - critical_exit(); - } else { - critical_exit(); - bcopy(addr, pcb->pcb_save, sizeof(*addr)); - } if (PCB_USER_FPU(pcb)) - pcb->pcb_flags |= PCB_NPXUSERINITDONE; - pcb->pcb_flags |= PCB_NPXINITDONE; + pcb->pcb_flags |= PCB_NPXINITDONE; + pcb->pcb_flags |= PCB_NPXUSERINITDONE; } + void -npxsetuserregs(struct thread *td, union savefpu *addr) +npxsetregs(struct thread *td, union savefpu *addr) { struct pcb *pcb; @@ -884,15 +832,17 @@ npxsetuserregs(struct thread *td, union savefpu *addr) if (!cpu_fxsr) #endif fnclex(); /* As in npxdrop(). */ - fpurstor(addr); + if (((uintptr_t)addr & 0xf) != 0) { + bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); + fpurstor(&pcb->pcb_user_save); + } else + fpurstor(addr); critical_exit(); pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; } else { critical_exit(); bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); - if (PCB_USER_FPU(pcb)) - pcb->pcb_flags |= PCB_NPXINITDONE; - pcb->pcb_flags |= PCB_NPXUSERINITDONE; + npxuserinited(td); } } diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index 942aa45..03933a6 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -568,13 +568,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ - - /* - * The get_fpcontext() call must be placed before assignments - * to mc_fsbase and mc_gsbase due to the alignment-override - * code in get_fpcontext() that possibly clobbers 12 bytes of - * mcontext after mc_fpstate. - */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* @@ -2553,28 +2546,33 @@ set_fpregs_xmm(sv_87, sv_xmm) int fill_fpregs(struct thread *td, struct fpreg *fpregs) { + + KASSERT(TD_IS_SUSPENDED(td), ("not suspended thread %p", td)); + npxgetregs(td); #ifdef CPU_ENABLE_SSE - if (cpu_fxsr) { - fill_fpregs_xmm(&td->td_pcb->pcb_save->sv_xmm, - (struct save87 *)fpregs); - return (0); - } + if (cpu_fxsr) + fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm, + (struct save87 *)fpregs); + else #endif /* CPU_ENABLE_SSE */ - bcopy(&td->td_pcb->pcb_save->sv_87, fpregs, sizeof *fpregs); + bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs, + sizeof(*fpregs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { + #ifdef CPU_ENABLE_SSE - if (cpu_fxsr) { + if (cpu_fxsr) set_fpregs_xmm((struct save87 *)fpregs, - &td->td_pcb->pcb_save->sv_xmm); - return (0); - } + &td->td_pcb->pcb_user_save.sv_xmm); + else #endif /* CPU_ENABLE_SSE */ - bcopy(fpregs, &td->td_pcb->pcb_save->sv_87, sizeof *fpregs); + bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87, + sizeof(*fpregs)); + npxuserinited(td); return (0); } @@ -2616,13 +2614,6 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags) mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); - - /* - * The get_fpcontext() call must be placed before assignments - * to mc_fsbase and mc_gsbase due to the alignment-override - * code in get_fpcontext() that possibly clobbers 12 bytes of - * mcontext after mc_fpstate. - */ get_fpcontext(td, mcp); sdp = &td->td_pcb->pcb_fsd; mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; @@ -2673,39 +2664,14 @@ set_mcontext(struct thread *td, const mcontext_t *mcp) static void get_fpcontext(struct thread *td, mcontext_t *mcp) { + #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; #else - union savefpu *addr; - - /* - * XXX mc_fpstate might be misaligned, since its declaration is not - * unportabilized using __attribute__((aligned(16))) like the - * declaration of struct savemm, and anyway, alignment doesn't work - * for auto variables since we don't use gcc's pessimal stack - * alignment. Work around this by abusing the spare fields after - * mcp->mc_fpstate. - * - * XXX unpessimize most cases by only aligning when fxsave might be - * called, although this requires knowing too much about - * npxgetuserregs()'s internals. - */ - addr = (union savefpu *)&mcp->mc_fpstate; - if (td == PCPU_GET(fpcurthread) && -#ifdef CPU_ENABLE_SSE - cpu_fxsr && -#endif - ((uintptr_t)(void *)addr & 0xF)) { - do - addr = (void *)((char *)addr + 4); - while ((uintptr_t)(void *)addr & 0xF); - } - mcp->mc_ownedfp = npxgetuserregs(td, addr); - if (addr != (union savefpu *)&mcp->mc_fpstate) { - bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); - bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); - } + mcp->mc_ownedfp = npxgetregs(td); + bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate, + sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = npxformat(); #endif } @@ -2713,7 +2679,6 @@ get_fpcontext(struct thread *td, mcontext_t *mcp) static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { - union savefpu *addr; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); @@ -2725,30 +2690,14 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp) fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { - /* XXX align as above. */ - addr = (union savefpu *)&mcp->mc_fpstate; - if (td == PCPU_GET(fpcurthread) && -#ifdef CPU_ENABLE_SSE - cpu_fxsr && -#endif - ((uintptr_t)(void *)addr & 0xF)) { - do - addr = (void *)((char *)addr + 4); - while ((uintptr_t)(void *)addr & 0xF); - bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); - } #ifdef DEV_NPX #ifdef CPU_ENABLE_SSE if (cpu_fxsr) - addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; + ((union savefpu *)&mcp->mc_fpstate)->sv_xmm.sv_env. + en_mxcsr &= cpu_mxcsr_mask; #endif - npxsetuserregs(td, addr); + npxsetregs(td, (union savefpu *)&mcp->mc_fpstate); #endif - /* - * Don't bother putting things back where they were in the - * misaligned case, since we know that the caller won't use - * them again. - */ } else return (EINVAL); return (0); @@ -2765,12 +2714,12 @@ fpstate_drop(struct thread *td) #endif /* * XXX force a full drop of the npx. The above only drops it if we - * owned it. npxusergetregs() has the same bug in the !cpu_fxsr case. + * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * - * XXX I don't much like npxgetuserregs()'s semantics of doing a full + * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But - * sendsig() is the only caller of npxgetuserregs()... perhaps we just + * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | |