summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authormini <mini@FreeBSD.org>2002-09-16 19:25:08 +0000
committermini <mini@FreeBSD.org>2002-09-16 19:25:08 +0000
commitd0ffcf396dc1f28a9d3e188b2dd0a72ad4838880 (patch)
tree1422f3e0eee520ddbe0818b7dcdb3a3c7f038166 /sys
parent94a57caeb217b8a70c593972f7402cca3de08a19 (diff)
downloadFreeBSD-src-d0ffcf396dc1f28a9d3e188b2dd0a72ad4838880.zip
FreeBSD-src-d0ffcf396dc1f28a9d3e188b2dd0a72ad4838880.tar.gz
Add kernel support needed for the KSE-aware libpthread:
- Maintain fpu state across signals. - Use ucontext_t's to store KSE thread state. - Synthesize state for the UTS upon each upcall, rather than saving and copying a trapframe. - Save and restore FPU state properly in ucontext_t's. Reviewed by: deischen, julian Approved by: -arch
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/genassym.c1
-rw-r--r--sys/amd64/amd64/machdep.c204
-rw-r--r--sys/amd64/amd64/support.S8
-rw-r--r--sys/amd64/amd64/support.s8
-rw-r--r--sys/amd64/amd64/trap.c2
-rw-r--r--sys/amd64/amd64/vm_machdep.c86
-rw-r--r--sys/i386/i386/genassym.c1
-rw-r--r--sys/i386/i386/machdep.c204
-rw-r--r--sys/i386/i386/support.s8
-rw-r--r--sys/i386/i386/trap.c2
-rw-r--r--sys/i386/i386/vm_machdep.c86
11 files changed, 444 insertions, 166 deletions
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 06c87f0..d4aa9e3 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -142,7 +142,6 @@ ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
-ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
ASSYM(PCB_SIZE, sizeof(struct pcb));
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 95edb4b..bf2104a 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -138,6 +138,9 @@ extern void initializecpu(void);
#endif
static void cpu_startup(void *);
+static void fpstate_drop(struct thread *td);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp);
+static int set_fpcontext(struct thread *td, const mcontext_t *mcp);
#ifdef CPU_ENABLE_SSE
static void set_fpregs_xmm(struct save87 *, struct savexmm *);
static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
@@ -440,8 +443,10 @@ sendsig(catcher, sig, mask, code)
? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
sf.sf_uc.uc_mcontext.mc_gs = rgs();
- sf.sf_uc.uc_mcontext.mc_flags = __UC_MC_VALID; /* no FP regs */
bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
+ sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
+ get_fpcontext(td, &sf.sf_uc.uc_mcontext);
+ fpstate_drop(td);
/* Allocate space for the signal handler context. */
if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
@@ -675,7 +680,7 @@ sigreturn(td, uap)
struct proc *p = td->td_proc;
struct trapframe *regs;
const ucontext_t *ucp;
- int cs, eflags, error;
+ int cs, eflags, error, ret;
error = copyin(uap->sigcntxp, &uc, sizeof(uc));
if (error != 0)
@@ -749,6 +754,9 @@ sigreturn(td, uap)
return (EINVAL);
}
+ ret = set_fpcontext(td, &ucp->uc_mcontext);
+ if (ret != 0)
+ return (ret);
bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
}
@@ -909,10 +917,19 @@ exec_setregs(td, entry, stack, ps_strings)
*/
load_cr0(rcr0() | CR0_MP | CR0_TS);
-#ifdef DEV_NPX
/* Initialize the npx (if any) for the current process. */
- npxinit(__INITIAL_NPXCW__);
-#endif
+ /*
+ * XXX the above load_cr0() also initializes it and is a layering
+ * violation if NPX is configured. It drops the npx partially
+ * and this would be fatal if we were interrupted now, and decided
+ * to force the state to the pcb, and checked the invariant
+ * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL).
+ * ALL of this can happen except the check. The check used to
+ * happen and be fatal later when we didn't complete the drop
+ * before returning to user mode. This should be fixed properly
+ * soon.
+ */
+ fpstate_drop(td);
/*
* XXX - Linux emulator
@@ -2003,8 +2020,6 @@ fill_fpregs_xmm(sv_xmm, sv_87)
/* FPU registers */
for (i = 0; i < 8; ++i)
sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
-
- sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
}
static void
@@ -2029,8 +2044,6 @@ set_fpregs_xmm(sv_87, sv_xmm)
/* FPU registers */
for (i = 0; i < 8; ++i)
sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
-
- sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
}
#endif /* CPU_ENABLE_SSE */
@@ -2062,6 +2075,179 @@ set_fpregs(struct thread *td, struct fpreg *fpregs)
return (0);
}
+/*
+ * Get machine context.
+ */
+void
+get_mcontext(struct thread *td, mcontext_t *mcp)
+{
+ struct trapframe *tp;
+
+ tp = td->td_frame;
+
+ mcp->mc_onstack = sigonstack(tp->tf_esp);
+ mcp->mc_gs = td->td_pcb->pcb_gs;
+ mcp->mc_fs = tp->tf_fs;
+ mcp->mc_es = tp->tf_es;
+ mcp->mc_ds = tp->tf_ds;
+ mcp->mc_edi = tp->tf_edi;
+ mcp->mc_esi = tp->tf_esi;
+ mcp->mc_ebp = tp->tf_ebp;
+ mcp->mc_isp = tp->tf_isp;
+ mcp->mc_ebx = tp->tf_ebx;
+ mcp->mc_edx = tp->tf_edx;
+ mcp->mc_ecx = tp->tf_ecx;
+ mcp->mc_eax = tp->tf_eax;
+ mcp->mc_eip = tp->tf_eip;
+ mcp->mc_cs = tp->tf_cs;
+ mcp->mc_eflags = tp->tf_eflags;
+ mcp->mc_esp = tp->tf_esp;
+ mcp->mc_ss = tp->tf_ss;
+ mcp->mc_len = sizeof(*mcp);
+ get_fpcontext(td, mcp);
+}
+
+/*
+ * Set machine context.
+ *
+ * However, we don't set any but the user modifyable flags, and
+ * we we won't touch the cs selector.
+ */
+int
+set_mcontext(struct thread *td, const mcontext_t *mcp)
+{
+ struct trapframe *tp;
+ int ret;
+ int eflags;
+
+ tp = td->td_frame;
+ if (mcp->mc_len != sizeof(*mcp))
+ return (EINVAL);
+ eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
+ (tp->tf_eflags & ~PSL_USERCHANGE);
+ if ((ret = set_fpcontext(td, mcp)) == 0) {
+ tp->tf_fs = mcp->mc_fs;
+ tp->tf_es = mcp->mc_es;
+ tp->tf_ds = mcp->mc_ds;
+ tp->tf_edi = mcp->mc_edi;
+ tp->tf_esi = mcp->mc_esi;
+ tp->tf_ebp = mcp->mc_ebp;
+ tp->tf_ebx = mcp->mc_ebx;
+ tp->tf_edx = mcp->mc_edx;
+ tp->tf_ecx = mcp->mc_ecx;
+ tp->tf_eax = mcp->mc_eax;
+ tp->tf_eip = mcp->mc_eip;
+ tp->tf_eflags = eflags;
+ tp->tf_esp = mcp->mc_esp;
+ tp->tf_ss = mcp->mc_ss;
+ td->td_pcb->pcb_gs = mcp->mc_gs;
+ ret = 0;
+ }
+ return (ret);
+}
+
+static void
+get_fpcontext(struct thread *td, mcontext_t *mcp)
+{
+#ifndef DEV_NPX
+ mcp->mc_fpformat = _MC_FPFMT_NODEV;
+ mcp->mc_ownedfp = _MC_FPOWNED_NONE;
+#else
+ union savefpu *addr;
+
+ /*
+ * XXX mc_fpstate might be misaligned, since its declaration is not
+ * unportabilized using __attribute__((aligned(16))) like the
+ * declaration of struct savemm, and anyway, alignment doesn't work
+ * for auto variables since we don't use gcc's pessimal stack
+ * alignment. Work around this by abusing the spare fields after
+ * mcp->mc_fpstate.
+ *
+ * XXX unpessimize most cases by only aligning when fxsave might be
+ * called, although this requires knowing too much about
+ * npxgetregs()'s internals.
+ */
+ addr = (union savefpu *)&mcp->mc_fpstate;
+ if (td == PCPU_GET(fpcurthread) && cpu_fxsr &&
+ ((uintptr_t)(void *)addr & 0xF)) {
+ do
+ addr = (void *)((char *)addr + 4);
+ while ((uintptr_t)(void *)addr & 0xF);
+ }
+ mcp->mc_ownedfp = npxgetregs(td, addr);
+ if (addr != (union savefpu *)&mcp->mc_fpstate) {
+ bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
+ bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
+ }
+ mcp->mc_fpformat = npxformat();
+#endif
+}
+
+static int
+set_fpcontext(struct thread *td, const mcontext_t *mcp)
+{
+ union savefpu *addr;
+
+ if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
+ return (0);
+ else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
+ mcp->mc_fpformat != _MC_FPFMT_XMM)
+ return (EINVAL);
+ else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
+ /* We don't care what state is left in the FPU or PCB. */
+ fpstate_drop(td);
+ else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+ mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
+ /* XXX align as above. */
+ addr = (union savefpu *)&mcp->mc_fpstate;
+ if (td == PCPU_GET(fpcurthread) && cpu_fxsr &&
+ ((uintptr_t)(void *)addr & 0xF)) {
+ do
+ addr = (void *)((char *)addr + 4);
+ while ((uintptr_t)(void *)addr & 0xF);
+ bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
+ }
+#ifdef DEV_NPX
+ /*
+ * XXX we violate the dubious requirement that npxsetregs()
+ * be called with interrupts disabled.
+ */
+ npxsetregs(td, addr);
+#endif
+ /*
+ * Don't bother putting things back where they were in the
+ * misaligned case, since we know that the caller won't use
+ * them again.
+ */
+ } else
+ return (EINVAL);
+ return (0);
+}
+
+static void
+fpstate_drop(struct thread *td)
+{
+ register_t s;
+
+ s = intr_disable();
+#ifdef DEV_NPX
+ if (PCPU_GET(fpcurthread) == td)
+ npxdrop();
+#endif
+ /*
+ * XXX force a full drop of the npx. The above only drops it if we
+ * owned it. npxgetregs() has the same bug in the !cpu_fxsr case.
+ *
+ * XXX I don't much like npxgetregs()'s semantics of doing a full
+ * drop. Dropping only to the pcb matches fnsave's behaviour.
+ * We only need to drop to !PCB_INITDONE in sendsig(). But
+ * sendsig() is the only caller of npxgetregs()... perhaps we just
+ * have too many layers.
+ */
+ curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
+ intr_restore(s);
+}
+
int
fill_dbregs(struct thread *td, struct dbreg *dbregs)
{
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index 23c611c..6e3286e 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -976,7 +976,7 @@ ENTRY(i586_copyin)
ENTRY(fastmove)
pushl %ebp
movl %esp,%ebp
- subl $PCB_SAVE87_SIZE+3*4,%esp
+ subl $PCB_SAVEFPU_SIZE+3*4,%esp
movl 8(%ebp),%ecx
cmpl $63,%ecx
@@ -1018,7 +1018,7 @@ ENTRY(fastmove)
movl PCPU(CURPCB),%esi
addl $PCB_SAVEFPU,%esi
cld
- movl $PCB_SAVE87_SIZE>>2,%ecx
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1102,7 +1102,7 @@ fastmove_loop:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVE87_SIZE>>2,%ecx
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1147,7 +1147,7 @@ fastmove_fault:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVE87_SIZE>>2,%ecx
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
rep
movsl
diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s
index 23c611c..6e3286e 100644
--- a/sys/amd64/amd64/support.s
+++ b/sys/amd64/amd64/support.s
@@ -976,7 +976,7 @@ ENTRY(i586_copyin)
ENTRY(fastmove)
pushl %ebp
movl %esp,%ebp
- subl $PCB_SAVE87_SIZE+3*4,%esp
+ subl $PCB_SAVEFPU_SIZE+3*4,%esp
movl 8(%ebp),%ecx
cmpl $63,%ecx
@@ -1018,7 +1018,7 @@ ENTRY(fastmove)
movl PCPU(CURPCB),%esi
addl $PCB_SAVEFPU,%esi
cld
- movl $PCB_SAVE87_SIZE>>2,%ecx
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1102,7 +1102,7 @@ fastmove_loop:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVE87_SIZE>>2,%ecx
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1147,7 +1147,7 @@ fastmove_fault:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVE87_SIZE>>2,%ecx
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
rep
movsl
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 102d54b..cdfc327 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -971,7 +971,7 @@ syscall(frame)
* but for now do it every time.
*/
td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox
- + offsetof(struct kse_mailbox, kmbx_current_thread));
+ + offsetof(struct kse_mailbox, km_curthread));
if ((td->td_mailbox == NULL) ||
(td->td_mailbox == (void *)-1)) {
td->td_mailbox = NULL; /* single thread it.. */
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 926ea9a..3e1329d 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -307,43 +307,10 @@ cpu_thread_setup(struct thread *td)
td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
}
-struct md_store {
- struct pcb mds_pcb;
- struct trapframe mds_frame;
-};
-
-void
-cpu_save_upcall(struct thread *td, struct kse *newkse)
-{
- struct trapframe *tf;
-
- newkse->ke_mdstorage = malloc(sizeof(struct md_store), M_TEMP,
- M_WAITOK);
- /* Note: use of M_WAITOK means it won't fail. */
- /* set up shortcuts in MI section */
- newkse->ke_pcb =
- &(((struct md_store *)(newkse->ke_mdstorage))->mds_pcb);
- newkse->ke_frame =
- &(((struct md_store *)(newkse->ke_mdstorage))->mds_frame);
- tf = newkse->ke_frame;
-
- /* Copy the upcall pcb. Kernel mode & fp regs are here. */
- /* XXXKSE this may be un-needed */
- bcopy(td->td_pcb, newkse->ke_pcb, sizeof(struct pcb));
-
- /*
- * This initialises most of the user mode register values
- * to good values. Eventually set them explicitly to know values
- */
- bcopy(td->td_frame, newkse->ke_frame, sizeof(struct trapframe));
- tf->tf_edi = 0;
- tf->tf_esi = 0; /* trampoline arg */
- tf->tf_ebp = 0;
- tf->tf_esp = (int)newkse->ke_stackbase + newkse->ke_stacksize - 16;
- tf->tf_ebx = 0; /* trampoline arg */
- tf->tf_eip = (int)newkse->ke_upcall;
-}
-
+/*
+ * Initialize machine state (pcb and trap frame) for a new thread about to
+ * upcall.
+ */
void
cpu_set_upcall(struct thread *td, void *pcb)
{
@@ -401,41 +368,28 @@ cpu_set_upcall(struct thread *td, void *pcb)
pcb2->pcb_ext = NULL;
}
+/*
+ * Set the machine state for performing an upcall that had to
+ * wait until we selected a KSE to perform the upcall on.
+ */
void
-cpu_set_args(struct thread *td, struct kse *ke)
-{
- suword((void *)(ke->ke_frame->tf_esp + sizeof(void *)),
- (int)ke->ke_mailbox);
-}
-
-void
-cpu_free_kse_mdstorage(struct kse *kse)
+cpu_set_upcall_kse(struct thread *td, struct kse *ke)
{
- free(kse->ke_mdstorage, M_TEMP);
- kse->ke_mdstorage = NULL;
- kse->ke_pcb = NULL;
- kse->ke_frame = NULL;
-}
+ /*
+ * Set the trap frame to point at the beginning of the uts
+ * function.
+ */
+ td->td_frame->tf_esp =
+ (int)ke->ke_stack.ss_sp + ke->ke_stack.ss_size - 16;
+ td->td_frame->tf_eip = (int)ke->ke_upcall;
-int
-cpu_export_context(struct thread *td)
-{
- struct trapframe *frame;
- struct thread_mailbox *tm;
- struct trapframe *uframe;
- int error;
-
- frame = td->td_frame;
- tm = td->td_mailbox;
- uframe = &tm->ctx.tfrm.tf_tf;
- error = copyout(frame, uframe, sizeof(*frame));
/*
- * "What about the fp regs?" I hear you ask.... XXXKSE
- * Don't know where gs and "onstack" come from.
- * May need to fiddle a few other values too.
+ * Pass the address of the mailbox for this kse to the uts
+ * function as a parameter on the stack.
*/
- return (error);
+ suword((void *)(td->td_frame->tf_esp + sizeof(void *)),
+ (int)ke->ke_mailbox);
}
void
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index 06c87f0..d4aa9e3 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -142,7 +142,6 @@ ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
-ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
ASSYM(PCB_SIZE, sizeof(struct pcb));
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 95edb4b..bf2104a 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -138,6 +138,9 @@ extern void initializecpu(void);
#endif
static void cpu_startup(void *);
+static void fpstate_drop(struct thread *td);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp);
+static int set_fpcontext(struct thread *td, const mcontext_t *mcp);
#ifdef CPU_ENABLE_SSE
static void set_fpregs_xmm(struct save87 *, struct savexmm *);
static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
@@ -440,8 +443,10 @@ sendsig(catcher, sig, mask, code)
? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
sf.sf_uc.uc_mcontext.mc_gs = rgs();
- sf.sf_uc.uc_mcontext.mc_flags = __UC_MC_VALID; /* no FP regs */
bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
+ sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
+ get_fpcontext(td, &sf.sf_uc.uc_mcontext);
+ fpstate_drop(td);
/* Allocate space for the signal handler context. */
if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
@@ -675,7 +680,7 @@ sigreturn(td, uap)
struct proc *p = td->td_proc;
struct trapframe *regs;
const ucontext_t *ucp;
- int cs, eflags, error;
+ int cs, eflags, error, ret;
error = copyin(uap->sigcntxp, &uc, sizeof(uc));
if (error != 0)
@@ -749,6 +754,9 @@ sigreturn(td, uap)
return (EINVAL);
}
+ ret = set_fpcontext(td, &ucp->uc_mcontext);
+ if (ret != 0)
+ return (ret);
bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
}
@@ -909,10 +917,19 @@ exec_setregs(td, entry, stack, ps_strings)
*/
load_cr0(rcr0() | CR0_MP | CR0_TS);
-#ifdef DEV_NPX
/* Initialize the npx (if any) for the current process. */
- npxinit(__INITIAL_NPXCW__);
-#endif
+ /*
+ * XXX the above load_cr0() also initializes it and is a layering
+ * violation if NPX is configured. It drops the npx partially
+ * and this would be fatal if we were interrupted now, and decided
+ * to force the state to the pcb, and checked the invariant
+ * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL).
+ * ALL of this can happen except the check. The check used to
+ * happen and be fatal later when we didn't complete the drop
+ * before returning to user mode. This should be fixed properly
+ * soon.
+ */
+ fpstate_drop(td);
/*
* XXX - Linux emulator
@@ -2003,8 +2020,6 @@ fill_fpregs_xmm(sv_xmm, sv_87)
/* FPU registers */
for (i = 0; i < 8; ++i)
sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
-
- sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
}
static void
@@ -2029,8 +2044,6 @@ set_fpregs_xmm(sv_87, sv_xmm)
/* FPU registers */
for (i = 0; i < 8; ++i)
sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
-
- sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
}
#endif /* CPU_ENABLE_SSE */
@@ -2062,6 +2075,179 @@ set_fpregs(struct thread *td, struct fpreg *fpregs)
return (0);
}
+/*
+ * Get machine context.
+ */
+void
+get_mcontext(struct thread *td, mcontext_t *mcp)
+{
+ struct trapframe *tp;
+
+ tp = td->td_frame;
+
+ mcp->mc_onstack = sigonstack(tp->tf_esp);
+ mcp->mc_gs = td->td_pcb->pcb_gs;
+ mcp->mc_fs = tp->tf_fs;
+ mcp->mc_es = tp->tf_es;
+ mcp->mc_ds = tp->tf_ds;
+ mcp->mc_edi = tp->tf_edi;
+ mcp->mc_esi = tp->tf_esi;
+ mcp->mc_ebp = tp->tf_ebp;
+ mcp->mc_isp = tp->tf_isp;
+ mcp->mc_ebx = tp->tf_ebx;
+ mcp->mc_edx = tp->tf_edx;
+ mcp->mc_ecx = tp->tf_ecx;
+ mcp->mc_eax = tp->tf_eax;
+ mcp->mc_eip = tp->tf_eip;
+ mcp->mc_cs = tp->tf_cs;
+ mcp->mc_eflags = tp->tf_eflags;
+ mcp->mc_esp = tp->tf_esp;
+ mcp->mc_ss = tp->tf_ss;
+ mcp->mc_len = sizeof(*mcp);
+ get_fpcontext(td, mcp);
+}
+
+/*
+ * Set machine context.
+ *
+ * However, we don't set any but the user modifyable flags, and
+ * we we won't touch the cs selector.
+ */
+int
+set_mcontext(struct thread *td, const mcontext_t *mcp)
+{
+ struct trapframe *tp;
+ int ret;
+ int eflags;
+
+ tp = td->td_frame;
+ if (mcp->mc_len != sizeof(*mcp))
+ return (EINVAL);
+ eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
+ (tp->tf_eflags & ~PSL_USERCHANGE);
+ if ((ret = set_fpcontext(td, mcp)) == 0) {
+ tp->tf_fs = mcp->mc_fs;
+ tp->tf_es = mcp->mc_es;
+ tp->tf_ds = mcp->mc_ds;
+ tp->tf_edi = mcp->mc_edi;
+ tp->tf_esi = mcp->mc_esi;
+ tp->tf_ebp = mcp->mc_ebp;
+ tp->tf_ebx = mcp->mc_ebx;
+ tp->tf_edx = mcp->mc_edx;
+ tp->tf_ecx = mcp->mc_ecx;
+ tp->tf_eax = mcp->mc_eax;
+ tp->tf_eip = mcp->mc_eip;
+ tp->tf_eflags = eflags;
+ tp->tf_esp = mcp->mc_esp;
+ tp->tf_ss = mcp->mc_ss;
+ td->td_pcb->pcb_gs = mcp->mc_gs;
+ ret = 0;
+ }
+ return (ret);
+}
+
+static void
+get_fpcontext(struct thread *td, mcontext_t *mcp)
+{
+#ifndef DEV_NPX
+ mcp->mc_fpformat = _MC_FPFMT_NODEV;
+ mcp->mc_ownedfp = _MC_FPOWNED_NONE;
+#else
+ union savefpu *addr;
+
+ /*
+ * XXX mc_fpstate might be misaligned, since its declaration is not
+ * unportabilized using __attribute__((aligned(16))) like the
+ * declaration of struct savemm, and anyway, alignment doesn't work
+ * for auto variables since we don't use gcc's pessimal stack
+ * alignment. Work around this by abusing the spare fields after
+ * mcp->mc_fpstate.
+ *
+ * XXX unpessimize most cases by only aligning when fxsave might be
+ * called, although this requires knowing too much about
+ * npxgetregs()'s internals.
+ */
+ addr = (union savefpu *)&mcp->mc_fpstate;
+ if (td == PCPU_GET(fpcurthread) && cpu_fxsr &&
+ ((uintptr_t)(void *)addr & 0xF)) {
+ do
+ addr = (void *)((char *)addr + 4);
+ while ((uintptr_t)(void *)addr & 0xF);
+ }
+ mcp->mc_ownedfp = npxgetregs(td, addr);
+ if (addr != (union savefpu *)&mcp->mc_fpstate) {
+ bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
+ bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
+ }
+ mcp->mc_fpformat = npxformat();
+#endif
+}
+
+static int
+set_fpcontext(struct thread *td, const mcontext_t *mcp)
+{
+ union savefpu *addr;
+
+ if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
+ return (0);
+ else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
+ mcp->mc_fpformat != _MC_FPFMT_XMM)
+ return (EINVAL);
+ else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
+ /* We don't care what state is left in the FPU or PCB. */
+ fpstate_drop(td);
+ else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+ mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
+ /* XXX align as above. */
+ addr = (union savefpu *)&mcp->mc_fpstate;
+ if (td == PCPU_GET(fpcurthread) && cpu_fxsr &&
+ ((uintptr_t)(void *)addr & 0xF)) {
+ do
+ addr = (void *)((char *)addr + 4);
+ while ((uintptr_t)(void *)addr & 0xF);
+ bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
+ }
+#ifdef DEV_NPX
+ /*
+ * XXX we violate the dubious requirement that npxsetregs()
+ * be called with interrupts disabled.
+ */
+ npxsetregs(td, addr);
+#endif
+ /*
+ * Don't bother putting things back where they were in the
+ * misaligned case, since we know that the caller won't use
+ * them again.
+ */
+ } else
+ return (EINVAL);
+ return (0);
+}
+
+static void
+fpstate_drop(struct thread *td)
+{
+ register_t s;
+
+ s = intr_disable();
+#ifdef DEV_NPX
+ if (PCPU_GET(fpcurthread) == td)
+ npxdrop();
+#endif
+ /*
+ * XXX force a full drop of the npx. The above only drops it if we
+ * owned it. npxgetregs() has the same bug in the !cpu_fxsr case.
+ *
+ * XXX I don't much like npxgetregs()'s semantics of doing a full
+ * drop. Dropping only to the pcb matches fnsave's behaviour.
+ * We only need to drop to !PCB_INITDONE in sendsig(). But
+ * sendsig() is the only caller of npxgetregs()... perhaps we just
+ * have too many layers.
+ */
+ curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
+ intr_restore(s);
+}
+
int
fill_dbregs(struct thread *td, struct dbreg *dbregs)
{
diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s
index 23c611c..6e3286e 100644
--- a/sys/i386/i386/support.s
+++ b/sys/i386/i386/support.s
@@ -976,7 +976,7 @@ ENTRY(i586_copyin)
ENTRY(fastmove)
pushl %ebp
movl %esp,%ebp
- subl $PCB_SAVE87_SIZE+3*4,%esp
+ subl $PCB_SAVEFPU_SIZE+3*4,%esp
movl 8(%ebp),%ecx
cmpl $63,%ecx
@@ -1018,7 +1018,7 @@ ENTRY(fastmove)
movl PCPU(CURPCB),%esi
addl $PCB_SAVEFPU,%esi
cld
- movl $PCB_SAVE87_SIZE>>2,%ecx
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1102,7 +1102,7 @@ fastmove_loop:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVE87_SIZE>>2,%ecx
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1147,7 +1147,7 @@ fastmove_fault:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVE87_SIZE>>2,%ecx
+ movl $PCB_SAVEFPU_SIZE>>2,%ecx
rep
movsl
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 102d54b..cdfc327 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -971,7 +971,7 @@ syscall(frame)
* but for now do it every time.
*/
td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox
- + offsetof(struct kse_mailbox, kmbx_current_thread));
+ + offsetof(struct kse_mailbox, km_curthread));
if ((td->td_mailbox == NULL) ||
(td->td_mailbox == (void *)-1)) {
td->td_mailbox = NULL; /* single thread it.. */
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 926ea9a..3e1329d 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -307,43 +307,10 @@ cpu_thread_setup(struct thread *td)
td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
}
-struct md_store {
- struct pcb mds_pcb;
- struct trapframe mds_frame;
-};
-
-void
-cpu_save_upcall(struct thread *td, struct kse *newkse)
-{
- struct trapframe *tf;
-
- newkse->ke_mdstorage = malloc(sizeof(struct md_store), M_TEMP,
- M_WAITOK);
- /* Note: use of M_WAITOK means it won't fail. */
- /* set up shortcuts in MI section */
- newkse->ke_pcb =
- &(((struct md_store *)(newkse->ke_mdstorage))->mds_pcb);
- newkse->ke_frame =
- &(((struct md_store *)(newkse->ke_mdstorage))->mds_frame);
- tf = newkse->ke_frame;
-
- /* Copy the upcall pcb. Kernel mode & fp regs are here. */
- /* XXXKSE this may be un-needed */
- bcopy(td->td_pcb, newkse->ke_pcb, sizeof(struct pcb));
-
- /*
- * This initialises most of the user mode register values
- * to good values. Eventually set them explicitly to know values
- */
- bcopy(td->td_frame, newkse->ke_frame, sizeof(struct trapframe));
- tf->tf_edi = 0;
- tf->tf_esi = 0; /* trampoline arg */
- tf->tf_ebp = 0;
- tf->tf_esp = (int)newkse->ke_stackbase + newkse->ke_stacksize - 16;
- tf->tf_ebx = 0; /* trampoline arg */
- tf->tf_eip = (int)newkse->ke_upcall;
-}
-
+/*
+ * Initialize machine state (pcb and trap frame) for a new thread about to
+ * upcall.
+ */
void
cpu_set_upcall(struct thread *td, void *pcb)
{
@@ -401,41 +368,28 @@ cpu_set_upcall(struct thread *td, void *pcb)
pcb2->pcb_ext = NULL;
}
+/*
+ * Set the machine state for performing an upcall that had to
+ * wait until we selected a KSE to perform the upcall on.
+ */
void
-cpu_set_args(struct thread *td, struct kse *ke)
-{
- suword((void *)(ke->ke_frame->tf_esp + sizeof(void *)),
- (int)ke->ke_mailbox);
-}
-
-void
-cpu_free_kse_mdstorage(struct kse *kse)
+cpu_set_upcall_kse(struct thread *td, struct kse *ke)
{
- free(kse->ke_mdstorage, M_TEMP);
- kse->ke_mdstorage = NULL;
- kse->ke_pcb = NULL;
- kse->ke_frame = NULL;
-}
+ /*
+ * Set the trap frame to point at the beginning of the uts
+ * function.
+ */
+ td->td_frame->tf_esp =
+ (int)ke->ke_stack.ss_sp + ke->ke_stack.ss_size - 16;
+ td->td_frame->tf_eip = (int)ke->ke_upcall;
-int
-cpu_export_context(struct thread *td)
-{
- struct trapframe *frame;
- struct thread_mailbox *tm;
- struct trapframe *uframe;
- int error;
-
- frame = td->td_frame;
- tm = td->td_mailbox;
- uframe = &tm->ctx.tfrm.tf_tf;
- error = copyout(frame, uframe, sizeof(*frame));
/*
- * "What about the fp regs?" I hear you ask.... XXXKSE
- * Don't know where gs and "onstack" come from.
- * May need to fiddle a few other values too.
+ * Pass the address of the mailbox for this kse to the uts
+ * function as a parameter on the stack.
*/
- return (error);
+ suword((void *)(td->td_frame->tf_esp + sizeof(void *)),
+ (int)ke->ke_mailbox);
}
void
OpenPOWER on IntegriCloud