diff options
author | mini <mini@FreeBSD.org> | 2002-09-16 19:25:08 +0000 |
---|---|---|
committer | mini <mini@FreeBSD.org> | 2002-09-16 19:25:08 +0000 |
commit | d0ffcf396dc1f28a9d3e188b2dd0a72ad4838880 (patch) | |
tree | 1422f3e0eee520ddbe0818b7dcdb3a3c7f038166 /sys | |
parent | 94a57caeb217b8a70c593972f7402cca3de08a19 (diff) | |
download | FreeBSD-src-d0ffcf396dc1f28a9d3e188b2dd0a72ad4838880.zip FreeBSD-src-d0ffcf396dc1f28a9d3e188b2dd0a72ad4838880.tar.gz |
Add kernel support needed for the KSE-aware libpthread:
- Maintain fpu state across signals.
- Use ucontext_t's to store KSE thread state.
- Synthesize state for the UTS upon each upcall, rather than
saving and copying a trapframe.
- Save and restore FPU state properly in ucontext_t's.
Reviewed by: deischen, julian
Approved by: -arch
Diffstat (limited to 'sys')
-rw-r--r-- | sys/amd64/amd64/genassym.c | 1 | ||||
-rw-r--r-- | sys/amd64/amd64/machdep.c | 204 | ||||
-rw-r--r-- | sys/amd64/amd64/support.S | 8 | ||||
-rw-r--r-- | sys/amd64/amd64/support.s | 8 | ||||
-rw-r--r-- | sys/amd64/amd64/trap.c | 2 | ||||
-rw-r--r-- | sys/amd64/amd64/vm_machdep.c | 86 | ||||
-rw-r--r-- | sys/i386/i386/genassym.c | 1 | ||||
-rw-r--r-- | sys/i386/i386/machdep.c | 204 | ||||
-rw-r--r-- | sys/i386/i386/support.s | 8 | ||||
-rw-r--r-- | sys/i386/i386/trap.c | 2 | ||||
-rw-r--r-- | sys/i386/i386/vm_machdep.c | 86 |
11 files changed, 444 insertions, 166 deletions
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 06c87f0..d4aa9e3 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -142,7 +142,6 @@ ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu)); -ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_SIZE, sizeof(struct pcb)); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 95edb4b..bf2104a 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -138,6 +138,9 @@ extern void initializecpu(void); #endif static void cpu_startup(void *); +static void fpstate_drop(struct thread *td); +static void get_fpcontext(struct thread *td, mcontext_t *mcp); +static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); @@ -440,8 +443,10 @@ sendsig(catcher, sig, mask, code) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); - sf.sf_uc.uc_mcontext.mc_flags = __UC_MC_VALID; /* no FP regs */ bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); + sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ + get_fpcontext(td, &sf.sf_uc.uc_mcontext); + fpstate_drop(td); /* Allocate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && @@ -675,7 +680,7 @@ sigreturn(td, uap) struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; - int cs, eflags, error; + int cs, eflags, error, ret; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) @@ -749,6 +754,9 @@ sigreturn(td, uap) return (EINVAL); } + ret = set_fpcontext(td, &ucp->uc_mcontext); + if (ret != 0) + return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } @@ -909,10 +917,19 @@ exec_setregs(td, entry, stack, ps_strings) */ load_cr0(rcr0() | CR0_MP | CR0_TS); -#ifdef DEV_NPX /* Initialize the npx (if any) for the current process. */ - npxinit(__INITIAL_NPXCW__); -#endif + /* + * XXX the above load_cr0() also initializes it and is a layering + * violation if NPX is configured. It drops the npx partially + * and this would be fatal if we were interrupted now, and decided + * to force the state to the pcb, and checked the invariant + * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL). + * ALL of this can happen except the check. The check used to + * happen and be fatal later when we didn't complete the drop + * before returning to user mode. This should be fixed properly + * soon. + */ + fpstate_drop(td); /* * XXX - Linux emulator @@ -2003,8 +2020,6 @@ fill_fpregs_xmm(sv_xmm, sv_87) /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; - - sv_87->sv_ex_sw = sv_xmm->sv_ex_sw; } static void @@ -2029,8 +2044,6 @@ set_fpregs_xmm(sv_87, sv_xmm) /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; - - sv_xmm->sv_ex_sw = sv_87->sv_ex_sw; } #endif /* CPU_ENABLE_SSE */ @@ -2062,6 +2075,179 @@ set_fpregs(struct thread *td, struct fpreg *fpregs) return (0); } +/* + * Get machine context. + */ +void +get_mcontext(struct thread *td, mcontext_t *mcp) +{ + struct trapframe *tp; + + tp = td->td_frame; + + mcp->mc_onstack = sigonstack(tp->tf_esp); + mcp->mc_gs = td->td_pcb->pcb_gs; + mcp->mc_fs = tp->tf_fs; + mcp->mc_es = tp->tf_es; + mcp->mc_ds = tp->tf_ds; + mcp->mc_edi = tp->tf_edi; + mcp->mc_esi = tp->tf_esi; + mcp->mc_ebp = tp->tf_ebp; + mcp->mc_isp = tp->tf_isp; + mcp->mc_ebx = tp->tf_ebx; + mcp->mc_edx = tp->tf_edx; + mcp->mc_ecx = tp->tf_ecx; + mcp->mc_eax = tp->tf_eax; + mcp->mc_eip = tp->tf_eip; + mcp->mc_cs = tp->tf_cs; + mcp->mc_eflags = tp->tf_eflags; + mcp->mc_esp = tp->tf_esp; + mcp->mc_ss = tp->tf_ss; + mcp->mc_len = sizeof(*mcp); + get_fpcontext(td, mcp); +} + +/* + * Set machine context. + * + * However, we don't set any but the user modifyable flags, and + * we we won't touch the cs selector. + */ +int +set_mcontext(struct thread *td, const mcontext_t *mcp) +{ + struct trapframe *tp; + int ret; + int eflags; + + tp = td->td_frame; + if (mcp->mc_len != sizeof(*mcp)) + return (EINVAL); + eflags = (mcp->mc_eflags & PSL_USERCHANGE) | + (tp->tf_eflags & ~PSL_USERCHANGE); + if ((ret = set_fpcontext(td, mcp)) == 0) { + tp->tf_fs = mcp->mc_fs; + tp->tf_es = mcp->mc_es; + tp->tf_ds = mcp->mc_ds; + tp->tf_edi = mcp->mc_edi; + tp->tf_esi = mcp->mc_esi; + tp->tf_ebp = mcp->mc_ebp; + tp->tf_ebx = mcp->mc_ebx; + tp->tf_edx = mcp->mc_edx; + tp->tf_ecx = mcp->mc_ecx; + tp->tf_eax = mcp->mc_eax; + tp->tf_eip = mcp->mc_eip; + tp->tf_eflags = eflags; + tp->tf_esp = mcp->mc_esp; + tp->tf_ss = mcp->mc_ss; + td->td_pcb->pcb_gs = mcp->mc_gs; + ret = 0; + } + return (ret); +} + +static void +get_fpcontext(struct thread *td, mcontext_t *mcp) +{ +#ifndef DEV_NPX + mcp->mc_fpformat = _MC_FPFMT_NODEV; + mcp->mc_ownedfp = _MC_FPOWNED_NONE; +#else + union savefpu *addr; + + /* + * XXX mc_fpstate might be misaligned, since its declaration is not + * unportabilized using __attribute__((aligned(16))) like the + * declaration of struct savemm, and anyway, alignment doesn't work + * for auto variables since we don't use gcc's pessimal stack + * alignment. Work around this by abusing the spare fields after + * mcp->mc_fpstate. + * + * XXX unpessimize most cases by only aligning when fxsave might be + * called, although this requires knowing too much about + * npxgetregs()'s internals. + */ + addr = (union savefpu *)&mcp->mc_fpstate; + if (td == PCPU_GET(fpcurthread) && cpu_fxsr && + ((uintptr_t)(void *)addr & 0xF)) { + do + addr = (void *)((char *)addr + 4); + while ((uintptr_t)(void *)addr & 0xF); + } + mcp->mc_ownedfp = npxgetregs(td, addr); + if (addr != (union savefpu *)&mcp->mc_fpstate) { + bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); + bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); + } + mcp->mc_fpformat = npxformat(); +#endif +} + +static int +set_fpcontext(struct thread *td, const mcontext_t *mcp) +{ + union savefpu *addr; + + if (mcp->mc_fpformat == _MC_FPFMT_NODEV) + return (0); + else if (mcp->mc_fpformat != _MC_FPFMT_387 && + mcp->mc_fpformat != _MC_FPFMT_XMM) + return (EINVAL); + else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) + /* We don't care what state is left in the FPU or PCB. */ + fpstate_drop(td); + else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || + mcp->mc_ownedfp == _MC_FPOWNED_PCB) { + /* XXX align as above. */ + addr = (union savefpu *)&mcp->mc_fpstate; + if (td == PCPU_GET(fpcurthread) && cpu_fxsr && + ((uintptr_t)(void *)addr & 0xF)) { + do + addr = (void *)((char *)addr + 4); + while ((uintptr_t)(void *)addr & 0xF); + bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); + } +#ifdef DEV_NPX + /* + * XXX we violate the dubious requirement that npxsetregs() + * be called with interrupts disabled. + */ + npxsetregs(td, addr); +#endif + /* + * Don't bother putting things back where they were in the + * misaligned case, since we know that the caller won't use + * them again. + */ + } else + return (EINVAL); + return (0); +} + +static void +fpstate_drop(struct thread *td) +{ + register_t s; + + s = intr_disable(); +#ifdef DEV_NPX + if (PCPU_GET(fpcurthread) == td) + npxdrop(); +#endif + /* + * XXX force a full drop of the npx. The above only drops it if we + * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. + * + * XXX I don't much like npxgetregs()'s semantics of doing a full + * drop. Dropping only to the pcb matches fnsave's behaviour. + * We only need to drop to !PCB_INITDONE in sendsig(). But + * sendsig() is the only caller of npxgetregs()... perhaps we just + * have too many layers. + */ + curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; + intr_restore(s); +} + int fill_dbregs(struct thread *td, struct dbreg *dbregs) { diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 23c611c..6e3286e 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -976,7 +976,7 @@ ENTRY(i586_copyin) ENTRY(fastmove) pushl %ebp movl %esp,%ebp - subl $PCB_SAVE87_SIZE+3*4,%esp + subl $PCB_SAVEFPU_SIZE+3*4,%esp movl 8(%ebp),%ecx cmpl $63,%ecx @@ -1018,7 +1018,7 @@ ENTRY(fastmove) movl PCPU(CURPCB),%esi addl $PCB_SAVEFPU,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl movl -12(%ebp),%ecx @@ -1102,7 +1102,7 @@ fastmove_loop: addl $PCB_SAVEFPU,%edi movl %esp,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl movl -12(%ebp),%ecx @@ -1147,7 +1147,7 @@ fastmove_fault: addl $PCB_SAVEFPU,%edi movl %esp,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s index 23c611c..6e3286e 100644 --- a/sys/amd64/amd64/support.s +++ b/sys/amd64/amd64/support.s @@ -976,7 +976,7 @@ ENTRY(i586_copyin) ENTRY(fastmove) pushl %ebp movl %esp,%ebp - subl $PCB_SAVE87_SIZE+3*4,%esp + subl $PCB_SAVEFPU_SIZE+3*4,%esp movl 8(%ebp),%ecx cmpl $63,%ecx @@ -1018,7 +1018,7 @@ ENTRY(fastmove) movl PCPU(CURPCB),%esi addl $PCB_SAVEFPU,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl movl -12(%ebp),%ecx @@ -1102,7 +1102,7 @@ fastmove_loop: addl $PCB_SAVEFPU,%edi movl %esp,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl movl -12(%ebp),%ecx @@ -1147,7 +1147,7 @@ fastmove_fault: addl $PCB_SAVEFPU,%edi movl %esp,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 102d54b..cdfc327 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -971,7 +971,7 @@ syscall(frame) * but for now do it every time. */ td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox - + offsetof(struct kse_mailbox, kmbx_current_thread)); + + offsetof(struct kse_mailbox, km_curthread)); if ((td->td_mailbox == NULL) || (td->td_mailbox == (void *)-1)) { td->td_mailbox = NULL; /* single thread it.. */ diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 926ea9a..3e1329d 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -307,43 +307,10 @@ cpu_thread_setup(struct thread *td) td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; } -struct md_store { - struct pcb mds_pcb; - struct trapframe mds_frame; -}; - -void -cpu_save_upcall(struct thread *td, struct kse *newkse) -{ - struct trapframe *tf; - - newkse->ke_mdstorage = malloc(sizeof(struct md_store), M_TEMP, - M_WAITOK); - /* Note: use of M_WAITOK means it won't fail. */ - /* set up shortcuts in MI section */ - newkse->ke_pcb = - &(((struct md_store *)(newkse->ke_mdstorage))->mds_pcb); - newkse->ke_frame = - &(((struct md_store *)(newkse->ke_mdstorage))->mds_frame); - tf = newkse->ke_frame; - - /* Copy the upcall pcb. Kernel mode & fp regs are here. */ - /* XXXKSE this may be un-needed */ - bcopy(td->td_pcb, newkse->ke_pcb, sizeof(struct pcb)); - - /* - * This initialises most of the user mode register values - * to good values. Eventually set them explicitly to know values - */ - bcopy(td->td_frame, newkse->ke_frame, sizeof(struct trapframe)); - tf->tf_edi = 0; - tf->tf_esi = 0; /* trampoline arg */ - tf->tf_ebp = 0; - tf->tf_esp = (int)newkse->ke_stackbase + newkse->ke_stacksize - 16; - tf->tf_ebx = 0; /* trampoline arg */ - tf->tf_eip = (int)newkse->ke_upcall; -} - +/* + * Initialize machine state (pcb and trap frame) for a new thread about to + * upcall. + */ void cpu_set_upcall(struct thread *td, void *pcb) { @@ -401,41 +368,28 @@ cpu_set_upcall(struct thread *td, void *pcb) pcb2->pcb_ext = NULL; } +/* + * Set the machine state for performing an upcall that had to + * wait until we selected a KSE to perform the upcall on. + */ void -cpu_set_args(struct thread *td, struct kse *ke) -{ - suword((void *)(ke->ke_frame->tf_esp + sizeof(void *)), - (int)ke->ke_mailbox); -} - -void -cpu_free_kse_mdstorage(struct kse *kse) +cpu_set_upcall_kse(struct thread *td, struct kse *ke) { - free(kse->ke_mdstorage, M_TEMP); - kse->ke_mdstorage = NULL; - kse->ke_pcb = NULL; - kse->ke_frame = NULL; -} + /* + * Set the trap frame to point at the beginning of the uts + * function. + */ + td->td_frame->tf_esp = + (int)ke->ke_stack.ss_sp + ke->ke_stack.ss_size - 16; + td->td_frame->tf_eip = (int)ke->ke_upcall; -int -cpu_export_context(struct thread *td) -{ - struct trapframe *frame; - struct thread_mailbox *tm; - struct trapframe *uframe; - int error; - - frame = td->td_frame; - tm = td->td_mailbox; - uframe = &tm->ctx.tfrm.tf_tf; - error = copyout(frame, uframe, sizeof(*frame)); /* - * "What about the fp regs?" I hear you ask.... XXXKSE - * Don't know where gs and "onstack" come from. - * May need to fiddle a few other values too. + * Pass the address of the mailbox for this kse to the uts + * function as a parameter on the stack. */ - return (error); + suword((void *)(td->td_frame->tf_esp + sizeof(void *)), + (int)ke->ke_mailbox); } void diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index 06c87f0..d4aa9e3 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -142,7 +142,6 @@ ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu)); -ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_SIZE, sizeof(struct pcb)); diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 95edb4b..bf2104a 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -138,6 +138,9 @@ extern void initializecpu(void); #endif static void cpu_startup(void *); +static void fpstate_drop(struct thread *td); +static void get_fpcontext(struct thread *td, mcontext_t *mcp); +static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); @@ -440,8 +443,10 @@ sendsig(catcher, sig, mask, code) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); - sf.sf_uc.uc_mcontext.mc_flags = __UC_MC_VALID; /* no FP regs */ bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); + sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ + get_fpcontext(td, &sf.sf_uc.uc_mcontext); + fpstate_drop(td); /* Allocate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && @@ -675,7 +680,7 @@ sigreturn(td, uap) struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; - int cs, eflags, error; + int cs, eflags, error, ret; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) @@ -749,6 +754,9 @@ sigreturn(td, uap) return (EINVAL); } + ret = set_fpcontext(td, &ucp->uc_mcontext); + if (ret != 0) + return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } @@ -909,10 +917,19 @@ exec_setregs(td, entry, stack, ps_strings) */ load_cr0(rcr0() | CR0_MP | CR0_TS); -#ifdef DEV_NPX /* Initialize the npx (if any) for the current process. */ - npxinit(__INITIAL_NPXCW__); -#endif + /* + * XXX the above load_cr0() also initializes it and is a layering + * violation if NPX is configured. It drops the npx partially + * and this would be fatal if we were interrupted now, and decided + * to force the state to the pcb, and checked the invariant + * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL). + * ALL of this can happen except the check. The check used to + * happen and be fatal later when we didn't complete the drop + * before returning to user mode. This should be fixed properly + * soon. + */ + fpstate_drop(td); /* * XXX - Linux emulator @@ -2003,8 +2020,6 @@ fill_fpregs_xmm(sv_xmm, sv_87) /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; - - sv_87->sv_ex_sw = sv_xmm->sv_ex_sw; } static void @@ -2029,8 +2044,6 @@ set_fpregs_xmm(sv_87, sv_xmm) /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; - - sv_xmm->sv_ex_sw = sv_87->sv_ex_sw; } #endif /* CPU_ENABLE_SSE */ @@ -2062,6 +2075,179 @@ set_fpregs(struct thread *td, struct fpreg *fpregs) return (0); } +/* + * Get machine context. + */ +void +get_mcontext(struct thread *td, mcontext_t *mcp) +{ + struct trapframe *tp; + + tp = td->td_frame; + + mcp->mc_onstack = sigonstack(tp->tf_esp); + mcp->mc_gs = td->td_pcb->pcb_gs; + mcp->mc_fs = tp->tf_fs; + mcp->mc_es = tp->tf_es; + mcp->mc_ds = tp->tf_ds; + mcp->mc_edi = tp->tf_edi; + mcp->mc_esi = tp->tf_esi; + mcp->mc_ebp = tp->tf_ebp; + mcp->mc_isp = tp->tf_isp; + mcp->mc_ebx = tp->tf_ebx; + mcp->mc_edx = tp->tf_edx; + mcp->mc_ecx = tp->tf_ecx; + mcp->mc_eax = tp->tf_eax; + mcp->mc_eip = tp->tf_eip; + mcp->mc_cs = tp->tf_cs; + mcp->mc_eflags = tp->tf_eflags; + mcp->mc_esp = tp->tf_esp; + mcp->mc_ss = tp->tf_ss; + mcp->mc_len = sizeof(*mcp); + get_fpcontext(td, mcp); +} + +/* + * Set machine context. + * + * However, we don't set any but the user modifyable flags, and + * we we won't touch the cs selector. + */ +int +set_mcontext(struct thread *td, const mcontext_t *mcp) +{ + struct trapframe *tp; + int ret; + int eflags; + + tp = td->td_frame; + if (mcp->mc_len != sizeof(*mcp)) + return (EINVAL); + eflags = (mcp->mc_eflags & PSL_USERCHANGE) | + (tp->tf_eflags & ~PSL_USERCHANGE); + if ((ret = set_fpcontext(td, mcp)) == 0) { + tp->tf_fs = mcp->mc_fs; + tp->tf_es = mcp->mc_es; + tp->tf_ds = mcp->mc_ds; + tp->tf_edi = mcp->mc_edi; + tp->tf_esi = mcp->mc_esi; + tp->tf_ebp = mcp->mc_ebp; + tp->tf_ebx = mcp->mc_ebx; + tp->tf_edx = mcp->mc_edx; + tp->tf_ecx = mcp->mc_ecx; + tp->tf_eax = mcp->mc_eax; + tp->tf_eip = mcp->mc_eip; + tp->tf_eflags = eflags; + tp->tf_esp = mcp->mc_esp; + tp->tf_ss = mcp->mc_ss; + td->td_pcb->pcb_gs = mcp->mc_gs; + ret = 0; + } + return (ret); +} + +static void +get_fpcontext(struct thread *td, mcontext_t *mcp) +{ +#ifndef DEV_NPX + mcp->mc_fpformat = _MC_FPFMT_NODEV; + mcp->mc_ownedfp = _MC_FPOWNED_NONE; +#else + union savefpu *addr; + + /* + * XXX mc_fpstate might be misaligned, since its declaration is not + * unportabilized using __attribute__((aligned(16))) like the + * declaration of struct savemm, and anyway, alignment doesn't work + * for auto variables since we don't use gcc's pessimal stack + * alignment. Work around this by abusing the spare fields after + * mcp->mc_fpstate. + * + * XXX unpessimize most cases by only aligning when fxsave might be + * called, although this requires knowing too much about + * npxgetregs()'s internals. + */ + addr = (union savefpu *)&mcp->mc_fpstate; + if (td == PCPU_GET(fpcurthread) && cpu_fxsr && + ((uintptr_t)(void *)addr & 0xF)) { + do + addr = (void *)((char *)addr + 4); + while ((uintptr_t)(void *)addr & 0xF); + } + mcp->mc_ownedfp = npxgetregs(td, addr); + if (addr != (union savefpu *)&mcp->mc_fpstate) { + bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); + bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); + } + mcp->mc_fpformat = npxformat(); +#endif +} + +static int +set_fpcontext(struct thread *td, const mcontext_t *mcp) +{ + union savefpu *addr; + + if (mcp->mc_fpformat == _MC_FPFMT_NODEV) + return (0); + else if (mcp->mc_fpformat != _MC_FPFMT_387 && + mcp->mc_fpformat != _MC_FPFMT_XMM) + return (EINVAL); + else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) + /* We don't care what state is left in the FPU or PCB. */ + fpstate_drop(td); + else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || + mcp->mc_ownedfp == _MC_FPOWNED_PCB) { + /* XXX align as above. */ + addr = (union savefpu *)&mcp->mc_fpstate; + if (td == PCPU_GET(fpcurthread) && cpu_fxsr && + ((uintptr_t)(void *)addr & 0xF)) { + do + addr = (void *)((char *)addr + 4); + while ((uintptr_t)(void *)addr & 0xF); + bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); + } +#ifdef DEV_NPX + /* + * XXX we violate the dubious requirement that npxsetregs() + * be called with interrupts disabled. + */ + npxsetregs(td, addr); +#endif + /* + * Don't bother putting things back where they were in the + * misaligned case, since we know that the caller won't use + * them again. + */ + } else + return (EINVAL); + return (0); +} + +static void +fpstate_drop(struct thread *td) +{ + register_t s; + + s = intr_disable(); +#ifdef DEV_NPX + if (PCPU_GET(fpcurthread) == td) + npxdrop(); +#endif + /* + * XXX force a full drop of the npx. The above only drops it if we + * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. + * + * XXX I don't much like npxgetregs()'s semantics of doing a full + * drop. Dropping only to the pcb matches fnsave's behaviour. + * We only need to drop to !PCB_INITDONE in sendsig(). But + * sendsig() is the only caller of npxgetregs()... perhaps we just + * have too many layers. + */ + curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; + intr_restore(s); +} + int fill_dbregs(struct thread *td, struct dbreg *dbregs) { diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s index 23c611c..6e3286e 100644 --- a/sys/i386/i386/support.s +++ b/sys/i386/i386/support.s @@ -976,7 +976,7 @@ ENTRY(i586_copyin) ENTRY(fastmove) pushl %ebp movl %esp,%ebp - subl $PCB_SAVE87_SIZE+3*4,%esp + subl $PCB_SAVEFPU_SIZE+3*4,%esp movl 8(%ebp),%ecx cmpl $63,%ecx @@ -1018,7 +1018,7 @@ ENTRY(fastmove) movl PCPU(CURPCB),%esi addl $PCB_SAVEFPU,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl movl -12(%ebp),%ecx @@ -1102,7 +1102,7 @@ fastmove_loop: addl $PCB_SAVEFPU,%edi movl %esp,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl movl -12(%ebp),%ecx @@ -1147,7 +1147,7 @@ fastmove_fault: addl $PCB_SAVEFPU,%edi movl %esp,%esi cld - movl $PCB_SAVE87_SIZE>>2,%ecx + movl $PCB_SAVEFPU_SIZE>>2,%ecx rep movsl diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 102d54b..cdfc327 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -971,7 +971,7 @@ syscall(frame) * but for now do it every time. */ td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox - + offsetof(struct kse_mailbox, kmbx_current_thread)); + + offsetof(struct kse_mailbox, km_curthread)); if ((td->td_mailbox == NULL) || (td->td_mailbox == (void *)-1)) { td->td_mailbox = NULL; /* single thread it.. */ diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 926ea9a..3e1329d 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -307,43 +307,10 @@ cpu_thread_setup(struct thread *td) td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; } -struct md_store { - struct pcb mds_pcb; - struct trapframe mds_frame; -}; - -void -cpu_save_upcall(struct thread *td, struct kse *newkse) -{ - struct trapframe *tf; - - newkse->ke_mdstorage = malloc(sizeof(struct md_store), M_TEMP, - M_WAITOK); - /* Note: use of M_WAITOK means it won't fail. */ - /* set up shortcuts in MI section */ - newkse->ke_pcb = - &(((struct md_store *)(newkse->ke_mdstorage))->mds_pcb); - newkse->ke_frame = - &(((struct md_store *)(newkse->ke_mdstorage))->mds_frame); - tf = newkse->ke_frame; - - /* Copy the upcall pcb. Kernel mode & fp regs are here. */ - /* XXXKSE this may be un-needed */ - bcopy(td->td_pcb, newkse->ke_pcb, sizeof(struct pcb)); - - /* - * This initialises most of the user mode register values - * to good values. Eventually set them explicitly to know values - */ - bcopy(td->td_frame, newkse->ke_frame, sizeof(struct trapframe)); - tf->tf_edi = 0; - tf->tf_esi = 0; /* trampoline arg */ - tf->tf_ebp = 0; - tf->tf_esp = (int)newkse->ke_stackbase + newkse->ke_stacksize - 16; - tf->tf_ebx = 0; /* trampoline arg */ - tf->tf_eip = (int)newkse->ke_upcall; -} - +/* + * Initialize machine state (pcb and trap frame) for a new thread about to + * upcall. + */ void cpu_set_upcall(struct thread *td, void *pcb) { @@ -401,41 +368,28 @@ cpu_set_upcall(struct thread *td, void *pcb) pcb2->pcb_ext = NULL; } +/* + * Set the machine state for performing an upcall that had to + * wait until we selected a KSE to perform the upcall on. + */ void -cpu_set_args(struct thread *td, struct kse *ke) -{ - suword((void *)(ke->ke_frame->tf_esp + sizeof(void *)), - (int)ke->ke_mailbox); -} - -void -cpu_free_kse_mdstorage(struct kse *kse) +cpu_set_upcall_kse(struct thread *td, struct kse *ke) { - free(kse->ke_mdstorage, M_TEMP); - kse->ke_mdstorage = NULL; - kse->ke_pcb = NULL; - kse->ke_frame = NULL; -} + /* + * Set the trap frame to point at the beginning of the uts + * function. + */ + td->td_frame->tf_esp = + (int)ke->ke_stack.ss_sp + ke->ke_stack.ss_size - 16; + td->td_frame->tf_eip = (int)ke->ke_upcall; -int -cpu_export_context(struct thread *td) -{ - struct trapframe *frame; - struct thread_mailbox *tm; - struct trapframe *uframe; - int error; - - frame = td->td_frame; - tm = td->td_mailbox; - uframe = &tm->ctx.tfrm.tf_tf; - error = copyout(frame, uframe, sizeof(*frame)); /* - * "What about the fp regs?" I hear you ask.... XXXKSE - * Don't know where gs and "onstack" come from. - * May need to fiddle a few other values too. + * Pass the address of the mailbox for this kse to the uts + * function as a parameter on the stack. */ - return (error); + suword((void *)(td->td_frame->tf_esp + sizeof(void *)), + (int)ke->ke_mailbox); } void |