summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjulian <julian@FreeBSD.org>2002-06-29 17:26:22 +0000
committerjulian <julian@FreeBSD.org>2002-06-29 17:26:22 +0000
commitaa2dc0a5d9e7a19420c153cd414fefa8498eab71 (patch)
tree0a0483a267784fa8e2bf86857d8727edb5b122e9
parent6dbff7f2c1f8150887038aed666e11675adf0b4e (diff)
downloadFreeBSD-src-aa2dc0a5d9e7a19420c153cd414fefa8498eab71.zip
FreeBSD-src-aa2dc0a5d9e7a19420c153cd414fefa8498eab71.tar.gz
Part 1 of KSE-III
The ability to schedule multiple threads per process (one one cpu) by making ALL system calls optionally asynchronous. to come: ia64 and power-pc patches, patches for gdb, test program (in tools) Reviewed by: Almost everyone who counts (at various times, peter, jhb, matt, alfred, mini, bernd, and a cast of thousands) NOTE: this is still Beta code, and contains lots of debugging stuff. expect slight instability in signals..
-rw-r--r--lib/libkvm/kvm_proc.c21
-rw-r--r--sys/alpha/alpha/genassym.c2
-rw-r--r--sys/alpha/alpha/pmap.c21
-rw-r--r--sys/alpha/alpha/swtch.s3
-rw-r--r--sys/alpha/alpha/trap.c28
-rw-r--r--sys/alpha/alpha/vm_machdep.c138
-rw-r--r--sys/alpha/linux/linux_machdep.c1
-rw-r--r--sys/amd64/amd64/cpu_switch.S13
-rw-r--r--sys/amd64/amd64/genassym.c9
-rw-r--r--sys/amd64/amd64/machdep.c2
-rw-r--r--sys/amd64/amd64/pmap.c22
-rw-r--r--sys/amd64/amd64/swtch.s13
-rw-r--r--sys/amd64/amd64/trap.c43
-rw-r--r--sys/amd64/amd64/vm_machdep.c160
-rw-r--r--sys/compat/linprocfs/linprocfs.c63
-rw-r--r--sys/compat/svr4/svr4_misc.c7
-rw-r--r--sys/conf/files1
-rw-r--r--sys/ddb/db_ps.c38
-rw-r--r--sys/fs/procfs/procfs_ctl.c15
-rw-r--r--sys/fs/procfs/procfs_dbregs.c2
-rw-r--r--sys/fs/procfs/procfs_fpregs.c2
-rw-r--r--sys/fs/procfs/procfs_ioctl.c6
-rw-r--r--sys/fs/procfs/procfs_regs.c2
-rw-r--r--sys/i386/i386/genassym.c9
-rw-r--r--sys/i386/i386/machdep.c2
-rw-r--r--sys/i386/i386/pmap.c22
-rw-r--r--sys/i386/i386/swtch.s13
-rw-r--r--sys/i386/i386/trap.c43
-rw-r--r--sys/i386/i386/vm_machdep.c160
-rw-r--r--sys/i386/linux/linux_machdep.c1
-rw-r--r--sys/i386/linux/linux_ptrace.c2
-rw-r--r--sys/ia64/ia64/trap.c14
-rw-r--r--sys/kern/init_main.c33
-rw-r--r--sys/kern/init_sysent.c2
-rw-r--r--sys/kern/kern_condvar.c89
-rw-r--r--sys/kern/kern_exec.c10
-rw-r--r--sys/kern/kern_exit.c97
-rw-r--r--sys/kern/kern_fork.c75
-rw-r--r--sys/kern/kern_idle.c19
-rw-r--r--sys/kern/kern_intr.c27
-rw-r--r--sys/kern/kern_kthread.c3
-rw-r--r--sys/kern/kern_mutex.c31
-rw-r--r--sys/kern/kern_poll.c1
-rw-r--r--sys/kern/kern_proc.c217
-rw-r--r--sys/kern/kern_shutdown.c1
-rw-r--r--sys/kern/kern_sig.c386
-rw-r--r--sys/kern/kern_subr.c1
-rw-r--r--sys/kern/kern_switch.c662
-rw-r--r--sys/kern/kern_synch.c275
-rw-r--r--sys/kern/ksched.c27
-rw-r--r--sys/kern/subr_smp.c4
-rw-r--r--sys/kern/subr_trap.c37
-rw-r--r--sys/kern/subr_turnstile.c31
-rw-r--r--sys/kern/subr_witness.c1
-rw-r--r--sys/kern/sys_generic.c2
-rw-r--r--sys/kern/sys_process.c6
-rw-r--r--sys/kern/syscalls.master2
-rw-r--r--sys/kern/tty.c53
-rw-r--r--sys/posix4/ksched.c27
-rw-r--r--sys/sparc64/sparc64/genassym.c2
-rw-r--r--sys/sparc64/sparc64/swtch.S3
-rw-r--r--sys/sparc64/sparc64/swtch.s3
-rw-r--r--sys/sparc64/sparc64/trap.c33
-rw-r--r--sys/sparc64/sparc64/vm_machdep.c36
-rw-r--r--sys/sys/condvar.h1
-rw-r--r--sys/sys/proc.h195
-rw-r--r--sys/sys/queue.h53
-rw-r--r--sys/sys/signalvar.h4
-rw-r--r--sys/sys/systm.h1
-rw-r--r--sys/sys/ucred.h16
-rw-r--r--sys/vm/uma_int.h2
-rw-r--r--sys/vm/vm_glue.c48
-rw-r--r--sys/vm/vm_meter.c71
-rw-r--r--sys/vm/vm_pageout.c30
-rw-r--r--sys/vm/vm_zeroidle.c1
75 files changed, 2765 insertions, 731 deletions
diff --git a/lib/libkvm/kvm_proc.c b/lib/libkvm/kvm_proc.c
index 865377c..547792e 100644
--- a/lib/libkvm/kvm_proc.c
+++ b/lib/libkvm/kvm_proc.c
@@ -325,11 +325,28 @@ nopgrp:
kp->ki_estcpu = proc.p_ksegrp.kg_estcpu; /* XXXKSE */
kp->ki_slptime = proc.p_kse.ke_slptime; /* XXXKSE */
kp->ki_swtime = proc.p_swtime;
- kp->ki_flag = proc.p_flag;
+ kp->ki_flag = proc.p_flag; /* WILDLY INNACURATE XXXKSE */
kp->ki_sflag = proc.p_sflag;
kp->ki_wchan = mainthread.td_wchan; /* XXXKSE */
kp->ki_traceflag = proc.p_traceflag;
- kp->ki_stat = proc.p_stat;
+ if (proc.p_state == PRS_NORMAL) { /* XXXKSE very aproximate */
+ if ((mainthread.td_state == TDS_RUNQ) ||
+ (mainthread.td_state == TDS_RUNNING)) {
+ kp->ki_stat = SRUN;
+ } else if (mainthread.td_state == TDS_SLP) {
+ kp->ki_stat = SSLEEP;
+ } else if (P_SHOULDSTOP(&proc)) {
+ kp->ki_stat = SSTOP;
+ } else if (mainthread.td_state == TDS_MTX) {
+ kp->ki_stat = SMTX;
+ } else {
+ kp->ki_stat = SWAIT;
+ }
+ } else if (proc.p_state == PRS_ZOMBIE) {
+ kp->ki_stat = SZOMB;
+ } else {
+ kp->ki_stat = SIDL;
+ }
kp->ki_pri.pri_class = proc.p_ksegrp.kg_pri_class; /* XXXKSE */
kp->ki_pri.pri_user = proc.p_ksegrp.kg_user_pri; /* XXXKSE */
kp->ki_pri.pri_level = mainthread.td_priority; /* XXXKSE */
diff --git a/sys/alpha/alpha/genassym.c b/sys/alpha/alpha/genassym.c
index 62ff3a4..96092da 100644
--- a/sys/alpha/alpha/genassym.c
+++ b/sys/alpha/alpha/genassym.c
@@ -80,6 +80,8 @@ ASSYM(MTX_UNOWNED, MTX_UNOWNED);
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
ASSYM(TD_KSE, offsetof(struct thread, td_kse));
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
+ASSYM(TD_STATE, offsetof(struct thread, td_state));
+ASSYM(TDS_RUNNING, TDS_RUNNING);
ASSYM(KE_FLAGS, offsetof(struct kse, ke_flags));
diff --git a/sys/alpha/alpha/pmap.c b/sys/alpha/alpha/pmap.c
index c758edb..5137f79 100644
--- a/sys/alpha/alpha/pmap.c
+++ b/sys/alpha/alpha/pmap.c
@@ -1151,7 +1151,12 @@ pmap_dispose_thread(td)
ksobj = td->td_kstack_obj;
ks = td->td_kstack;
ptek = vtopte(ks);
+#ifdef KSTACK_GUARD
+ ks -= PAGE_SIZE;
+ for (i = 1; i < (KSTACK_PAGES + 1); i++) {
+#else
for (i = 0; i < KSTACK_PAGES; i++) {
+#endif
m = vm_page_lookup(ksobj, i);
if (m == NULL)
panic("pmap_dispose_thread: kstack already missing?");
@@ -1164,14 +1169,16 @@ pmap_dispose_thread(td)
}
/*
- * If the thread got swapped out some of its KSTACK might have gotten
- * swapped. Just get rid of the object to clean up the swap use
- * proactively. NOTE! might block waiting for paging I/O to complete.
+ * Free the space that this stack was mapped to in the kernel
+ * address map.
*/
- if (ksobj->type == OBJT_SWAP) {
- td->td_kstack_obj = NULL;
- vm_object_deallocate(ksobj);
- }
+#ifdef KSTACK_GUARD
+ kmem_free(kernel_map, ks, (KSTACK_PAGES + 1) * PAGE_SIZE);
+#else
+ kmem_free(kernel_map, ks, KSTACK_PAGES * PAGE_SIZE);
+#endif
+ td->td_kstack_obj = NULL;
+ vm_object_deallocate(ksobj);
}
/*
diff --git a/sys/alpha/alpha/swtch.s b/sys/alpha/alpha/swtch.s
index 34f3453..bae5227 100644
--- a/sys/alpha/alpha/swtch.s
+++ b/sys/alpha/alpha/swtch.s
@@ -127,6 +127,9 @@ Lcs1: LDGP(pv)
mov v0, s2 /* s2 = new thread */
ldq s3, TD_MD_PCBPADDR(s2) /* s3 = new pcbpaddr */
+ ldiq t0, TDS_RUNNING
+ stl t0, TD_STATE(s2)
+
/*
* Check to see if we're switching to ourself. If we are,
* don't bother loading the new context.
diff --git a/sys/alpha/alpha/trap.c b/sys/alpha/alpha/trap.c
index 6cdf9f4..17dcb14 100644
--- a/sys/alpha/alpha/trap.c
+++ b/sys/alpha/alpha/trap.c
@@ -39,6 +39,7 @@
#include <sys/sysproto.h>
#include <sys/kernel.h>
#include <sys/proc.h>
+#include <sys/kse.h>
#include <sys/exec.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -299,6 +300,12 @@ trap(a0, a1, a2, entry, framep)
td->td_frame = framep;
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
+ if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
+ mtx_lock_spin(&sched_lock);
+ PROC_LOCK(p);
+ thread_exit();
+ /* NOTREACHED */
+ }
} else {
sticks = 0; /* XXX bogus -Wuninitialized warning */
KASSERT(cold || td->td_ucred != NULL,
@@ -659,6 +666,23 @@ syscall(code, framep)
sticks = td->td_kse->ke_sticks;
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
+ if (p->p_flag & P_KSES) {
+ /*
+ * If we are doing a syscall in a KSE environment,
+ * note where our mailbox is. There is always the
+ * possibility that we could do this lazily (in sleep()),
+ * but for now do it every time.
+ */
+ td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox
+ + offsetof(struct kse_mailbox, kmbx_current_thread));
+ if ((td->td_mailbox == NULL) ||
+ (td->td_mailbox == (void *)-1)) {
+ td->td_mailbox = NULL; /* single thread it.. */
+ td->td_flags &= ~TDF_UNBOUND;
+ } else {
+ td->td_flags |= TDF_UNBOUND;
+ }
+ }
#ifdef DIAGNOSTIC
alpha_fpstate_check(td);
@@ -756,14 +780,14 @@ syscall(code, framep)
break;
}
- userret(td, framep, sticks);
-
/*
* Release Giant if we had to get it.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0)
mtx_unlock(&Giant);
+ userret(td, framep, sticks);
+
#ifdef KTRACE
if (KTRPOINT(td, KTR_SYSRET))
ktrsysret(code, error, td->td_retval[0]);
diff --git a/sys/alpha/alpha/vm_machdep.c b/sys/alpha/alpha/vm_machdep.c
index e57593c..80f5f03 100644
--- a/sys/alpha/alpha/vm_machdep.c
+++ b/sys/alpha/alpha/vm_machdep.c
@@ -240,8 +240,7 @@ cpu_set_fork_handler(td, func, arg)
* from proc0.
*/
void
-cpu_exit(td)
- register struct thread *td;
+cpu_exit(struct thread *td)
{
alpha_fpstate_drop(td);
@@ -254,6 +253,141 @@ cpu_sched_exit(td)
}
void
+cpu_thread_exit(struct thread *td)
+{
+
+ return;
+}
+
+void
+cpu_thread_setup(struct thread *td)
+{
+
+ td->td_pcb =
+ (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
+ td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb) - 1;
+}
+
+struct md_store {
+ struct pcb mds_pcb;
+ struct trapframe mds_frame;
+};
+
+void
+cpu_save_upcall(struct thread *td, struct kse *newkse)
+{
+
+ newkse->ke_mdstorage = malloc(sizeof(struct md_store), M_TEMP,
+ M_WAITOK);
+ /* Note: use of M_WAITOK means it won't fail. */
+ /* set up shortcuts in MI section */
+ newkse->ke_pcb =
+ &(((struct md_store *)(newkse->ke_mdstorage))->mds_pcb);
+ newkse->ke_frame =
+ &(((struct md_store *)(newkse->ke_mdstorage))->mds_frame);
+
+ /* Copy the upcall pcb. Kernel mode & fp regs are here. */
+ /* XXXKSE this may be un-needed */
+ bcopy(td->td_pcb, newkse->ke_pcb, sizeof(struct pcb));
+
+ /* This copies most of the user mode register values. */
+ bcopy(td->td_frame, newkse->ke_frame, sizeof(struct trapframe));
+}
+
+void
+cpu_set_upcall(struct thread *td, void *pcb)
+{
+ struct pcb *pcb2;
+
+ td->td_flags |= TDF_UPCALLING;
+
+ /* Point the pcb to the top of the stack. */
+ pcb2 = td->td_pcb;
+
+ /*
+ * Copy the upcall pcb. This loads kernel regs.
+ * Those not loaded individually below get their default
+ * values here.
+ *
+ * XXXKSE It might be a good idea to simply skip this as
+ * the values of the other registers may be unimportant.
+ * This would remove any requirement for knowing the KSE
+ * at this time (see the matching comment below for
+ * more analysis) (need a good safe default).
+ */
+ bcopy(pcb, pcb2, sizeof(*pcb2));
+
+ /*
+ * Create a new fresh stack for the new thread.
+ * Don't forget to set this stack value into whatever supplies
+ * the address for the fault handlers.
+ * The contexts are filled in at the time we actually DO the
+ * upcall as only then do we know which KSE we got.
+ */
+ td->td_frame = (struct trapframe *)((caddr_t)pcb2) - 1;
+
+ /*
+ * Arrange for continuation at fork_return(), which
+ * will return to exception_return(). Note that the child
+ * process doesn't stay in the kernel for long!
+ */
+ pcb2->pcb_hw.apcb_ksp = (u_int64_t)td->td_frame;
+ pcb2->pcb_context[0] = (u_int64_t)fork_return; /* s0: a0 */
+ pcb2->pcb_context[1] = (u_int64_t)exception_return; /* s1: ra */
+ pcb2->pcb_context[2] = (u_long)td; /* s2: a1 */
+ pcb2->pcb_context[7] = (u_int64_t)fork_trampoline; /* ra: magic*/
+#ifdef SMP
+ /*
+ * We start off at a nesting level of 1 within the kernel.
+ */
+ td->td_md.md_kernnest = 1;
+#endif
+}
+
+void
+cpu_set_args(struct thread *td, struct kse *ke)
+{
+/* XXX
+ suword((void *)(ke->ke_frame->tf_esp + sizeof(void *)),
+ (int)ke->ke_mailbox);
+*/
+}
+
+void
+cpu_free_kse_mdstorage(struct kse *kse)
+{
+
+ free(kse->ke_mdstorage, M_TEMP);
+ kse->ke_mdstorage = NULL;
+ kse->ke_pcb = NULL;
+ kse->ke_frame = NULL;
+}
+
+int
+cpu_export_context(struct thread *td)
+{
+ /* XXXKSE */
+#if 0
+ struct trapframe *frame;
+ struct thread_mailbox *tm;
+ struct trapframe *uframe;
+ int error;
+
+ frame = td->td_frame;
+ tm = td->td_mailbox;
+ uframe = &tm->ctx.tfrm.tf_tf;
+ error = copyout(frame, uframe, sizeof(*frame));
+ /*
+ * "What about the fp regs?" I hear you ask.... XXXKSE
+ * Don't know where gs and "onstack" come from.
+ * May need to fiddle a few other values too.
+ */
+ return (error);
+#endif
+ return (0);
+}
+
+void
cpu_wait(p)
struct proc *p;
{
diff --git a/sys/alpha/linux/linux_machdep.c b/sys/alpha/linux/linux_machdep.c
index 51d68f1..5f33c80 100644
--- a/sys/alpha/linux/linux_machdep.c
+++ b/sys/alpha/linux/linux_machdep.c
@@ -180,7 +180,6 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
* Make this runnable after we are finished with it.
*/
mtx_lock_spin(&sched_lock);
- p2->p_stat = SRUN;
setrunqueue(FIRST_THREAD_IN_PROC(p2));
mtx_unlock_spin(&sched_lock);
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index e0f9bcd..80db485 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -65,12 +65,19 @@ tlb_flush_count: .long 0
/*
* cpu_throw()
+ *
+ * This is the second half of cpu_swtch(). It is used when the current
+ * thread is either a dummy or slated to die, and we no longer care
+ * about its state.
*/
ENTRY(cpu_throw)
jmp sw1
/*
* cpu_switch()
+ *
+ * Save the current thread state, then select the next thread to run
+ * and load its state.
*/
ENTRY(cpu_switch)
@@ -166,11 +173,11 @@ sw1b:
movl %eax,%ecx
#ifdef INVARIANTS
- movl TD_PROC(%ecx), %eax /* XXXKSE */
- cmpb $SRUN,P_STAT(%eax)
+ cmpb $TDS_RUNQ,TD_STATE(%ecx)
jne badsw2
#endif
+ movl $TDS_RUNNING,TD_STATE(%ecx)
movl TD_PCB(%ecx),%edx
#if defined(SWTCH_OPTIM_STATS)
@@ -310,12 +317,14 @@ cpu_switch_load_gs:
#ifdef INVARIANTS
badsw2:
+ pushal
pushl $sw0_2
call panic
sw0_2: .asciz "cpu_switch: not TDS_RUNQ"
badsw3:
+ pushal
pushl $sw0_3
call panic
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index f3e9f04..dcc1880 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -79,10 +79,10 @@ ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
ASSYM(P_SFLAG, offsetof(struct proc, p_sflag));
-ASSYM(P_STAT, offsetof(struct proc, p_stat));
+ASSYM(P_STATE, offsetof(struct proc, p_state));
ASSYM(P_UAREA, offsetof(struct proc, p_uarea));
-/*ASSYM(TD_STAT, offsetof(struct thread, td__stat));*/
+ASSYM(TD_STATE, offsetof(struct thread, td_state));
ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
@@ -101,8 +101,9 @@ ASSYM(KE_FLAGS, offsetof(struct kse, ke_flags));
ASSYM(KEF_ASTPENDING, KEF_ASTPENDING);
ASSYM(KEF_NEEDRESCHED, KEF_NEEDRESCHED);
-ASSYM(SSLEEP, SSLEEP);
-ASSYM(SRUN, SRUN);
+ASSYM(TDS_SLP, TDS_SLP);
+ASSYM(TDS_RUNQ, TDS_RUNQ);
+ASSYM(TDS_RUNNING, TDS_RUNNING);
ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap));
ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall));
ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 2f11ee2..c73c5e1 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -799,7 +799,7 @@ cpu_idle(void)
{
if (cpu_idle_hlt) {
disable_intr();
- if (procrunnable()) {
+ if (kserunnable()) {
enable_intr();
} else {
/*
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index e2cebaf..9e35ad7 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1100,7 +1100,12 @@ pmap_dispose_thread(td)
ksobj = td->td_kstack_obj;
ks = td->td_kstack;
ptek = vtopte(ks);
+#ifdef KSTACK_GUARD
+ ks -= PAGE_SIZE;
+ for (i = 1; i < (KSTACK_PAGES + 1); i++) {
+#else
for (i = 0; i < KSTACK_PAGES; i++) {
+#endif
m = vm_page_lookup(ksobj, i);
if (m == NULL)
panic("pmap_dispose_thread: kstack already missing?");
@@ -1116,16 +1121,17 @@ pmap_dispose_thread(td)
#ifdef I386_CPU
invltlb();
#endif
-
/*
- * If the thread got swapped out some of its KSTACK might have gotten
- * swapped. Just get rid of the object to clean up the swap use
- * proactively. NOTE! might block waiting for paging I/O to complete.
+ * Free the space that this stack was mapped to in the kernel
+ * address map.
*/
- if (ksobj->type == OBJT_SWAP) {
- td->td_kstack_obj = NULL;
- vm_object_deallocate(ksobj);
- }
+#ifdef KSTACK_GUARD
+ kmem_free(kernel_map, ks, (KSTACK_PAGES + 1) * PAGE_SIZE);
+#else
+ kmem_free(kernel_map, ks, KSTACK_PAGES * PAGE_SIZE);
+#endif
+ vm_object_deallocate(ksobj);
+ td->td_kstack_obj = NULL; /* play it safe */
}
/*
diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s
index e0f9bcd..80db485 100644
--- a/sys/amd64/amd64/swtch.s
+++ b/sys/amd64/amd64/swtch.s
@@ -65,12 +65,19 @@ tlb_flush_count: .long 0
/*
* cpu_throw()
+ *
+ * This is the second half of cpu_swtch(). It is used when the current
+ * thread is either a dummy or slated to die, and we no longer care
+ * about its state.
*/
ENTRY(cpu_throw)
jmp sw1
/*
* cpu_switch()
+ *
+ * Save the current thread state, then select the next thread to run
+ * and load its state.
*/
ENTRY(cpu_switch)
@@ -166,11 +173,11 @@ sw1b:
movl %eax,%ecx
#ifdef INVARIANTS
- movl TD_PROC(%ecx), %eax /* XXXKSE */
- cmpb $SRUN,P_STAT(%eax)
+ cmpb $TDS_RUNQ,TD_STATE(%ecx)
jne badsw2
#endif
+ movl $TDS_RUNNING,TD_STATE(%ecx)
movl TD_PCB(%ecx),%edx
#if defined(SWTCH_OPTIM_STATS)
@@ -310,12 +317,14 @@ cpu_switch_load_gs:
#ifdef INVARIANTS
badsw2:
+ pushal
pushl $sw0_2
call panic
sw0_2: .asciz "cpu_switch: not TDS_RUNQ"
badsw3:
+ pushal
pushl $sw0_3
call panic
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 08c75e4..8282416 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -54,6 +54,7 @@
#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/proc.h>
+#include <sys/kse.h>
#include <sys/pioctl.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
@@ -267,6 +268,17 @@ trap(frame)
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
+ /*
+ * First check that we shouldn't just abort.
+ * But check if we are the single thread first!
+ */
+ if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
+ mtx_lock_spin(&sched_lock);
+ PROC_LOCK(p);
+ thread_exit();
+ /* NOTREACHED */
+ }
+
switch (type) {
case T_PRIVINFLT: /* privileged instruction fault */
ucode = type;
@@ -939,11 +951,30 @@ syscall(frame)
mtx_unlock(&Giant);
}
#endif
+ KASSERT((td->td_kse != NULL), ("syscall: kse/thread UNLINKED"));
+ KASSERT((td->td_kse->ke_thread == td), ("syscall:kse/thread mismatch"));
sticks = td->td_kse->ke_sticks;
td->td_frame = &frame;
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
+ if (p->p_flag & P_KSES) {
+ /*
+ * If we are doing a syscall in a KSE environment,
+ * note where our mailbox is. There is always the
+ * possibility that we could do this lazily (in sleep()),
+ * but for now do it every time.
+ */
+ td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox
+ + offsetof(struct kse_mailbox, kmbx_current_thread));
+ if ((td->td_mailbox == NULL) ||
+ (td->td_mailbox == (void *)-1)) {
+ td->td_mailbox = NULL; /* single thread it.. */
+ td->td_flags &= ~TDF_UNBOUND;
+ } else {
+ td->td_flags |= TDF_UNBOUND;
+ }
+ }
params = (caddr_t)frame.tf_esp + sizeof(int);
code = frame.tf_eax;
orig_tf_eflags = frame.tf_eflags;
@@ -1045,6 +1076,12 @@ syscall(frame)
}
/*
+ * Release Giant if we previously set it.
+ */
+ if ((callp->sy_narg & SYF_MPSAFE) == 0)
+ mtx_unlock(&Giant);
+
+ /*
* Traced syscall.
*/
if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
@@ -1057,12 +1094,6 @@ syscall(frame)
*/
userret(td, &frame, sticks);
- /*
- * Release Giant if we previously set it.
- */
- if ((callp->sy_narg & SYF_MPSAFE) == 0)
- mtx_unlock(&Giant);
-
#ifdef KTRACE
if (KTRPOINT(td, KTR_SYSRET))
ktrsysret(code, error, td->td_retval[0]);
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 5dc2e14..04742c3 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -53,6 +53,7 @@
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/proc.h>
+#include <sys/kse.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/vnode.h>
@@ -254,15 +255,26 @@ cpu_set_fork_handler(td, func, arg)
}
void
-cpu_exit(td)
- register struct thread *td;
+cpu_exit(struct thread *td)
+{
+ struct mdproc *mdp;
+
+ mdp = &td->td_proc->p_md;
+ if (mdp->md_ldt)
+ user_ldt_free(td);
+ reset_dbregs();
+}
+
+void
+cpu_thread_exit(struct thread *td)
{
struct pcb *pcb = td->td_pcb;
- struct mdproc *mdp = &td->td_proc->p_md;
#ifdef DEV_NPX
npxexit(td);
#endif
if (pcb->pcb_ext != 0) {
+ /* XXXKSE XXXSMP not SMP SAFE.. what locks do we have? */
+ /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */
/*
* XXX do we need to move the TSS off the allocated pages
* before freeing them? (not done here)
@@ -271,8 +283,6 @@ cpu_exit(td)
ctob(IOPAGES + 1));
pcb->pcb_ext = 0;
}
- if (mdp->md_ldt)
- user_ldt_free(td);
if (pcb->pcb_flags & PCB_DBREGS) {
/*
* disable all hardware breakpoints
@@ -289,6 +299,146 @@ cpu_sched_exit(td)
}
void
+cpu_thread_setup(struct thread *td)
+{
+
+ td->td_pcb =
+ (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
+ td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
+}
+
+struct md_store {
+ struct pcb mds_pcb;
+ struct trapframe mds_frame;
+};
+
+void
+cpu_save_upcall(struct thread *td, struct kse *newkse)
+{
+ struct trapframe *tf;
+
+ newkse->ke_mdstorage = malloc(sizeof(struct md_store), M_TEMP,
+ M_WAITOK);
+ /* Note: use of M_WAITOK means it won't fail. */
+ /* set up shortcuts in MI section */
+ newkse->ke_pcb =
+ &(((struct md_store *)(newkse->ke_mdstorage))->mds_pcb);
+ newkse->ke_frame =
+ &(((struct md_store *)(newkse->ke_mdstorage))->mds_frame);
+ tf = newkse->ke_frame;
+
+ /* Copy the upcall pcb. Kernel mode & fp regs are here. */
+ /* XXXKSE this may be un-needed */
+ bcopy(td->td_pcb, newkse->ke_pcb, sizeof(struct pcb));
+
+ /*
+ * This initialises most of the user mode register values
+ * to good values. Eventually set them explicitly to know values
+ */
+ bcopy(td->td_frame, newkse->ke_frame, sizeof(struct trapframe));
+ tf->tf_edi = 0;
+ tf->tf_esi = 0; /* trampoline arg */
+ tf->tf_ebp = 0;
+ tf->tf_esp = (int)newkse->ke_stackbase + newkse->ke_stacksize - 16;
+ tf->tf_ebx = 0; /* trampoline arg */
+ tf->tf_eip = (int)newkse->ke_upcall;
+}
+
+void
+cpu_set_upcall(struct thread *td, void *pcb)
+{
+ struct pcb *pcb2;
+
+ td->td_flags |= TDF_UPCALLING;
+
+ /* Point the pcb to the top of the stack. */
+ pcb2 = td->td_pcb;
+
+ /*
+ * Copy the upcall pcb. This loads kernel regs.
+ * Those not loaded individually below get their default
+ * values here.
+ *
+ * XXXKSE It might be a good idea to simply skip this as
+ * the values of the other registers may be unimportant.
+ * This would remove any requirement for knowing the KSE
+ * at this time (see the matching comment below for
+ * more analysis) (need a good safe default).
+ */
+ bcopy(pcb, pcb2, sizeof(*pcb2));
+
+ /*
+ * Create a new fresh stack for the new thread.
+ * The -16 is so we can expand the trapframe if we go to vm86.
+ * Don't forget to set this stack value into whatever supplies
+ * the address for the fault handlers.
+ * The contexts are filled in at the time we actually DO the
+ * upcall as only then do we know which KSE we got.
+ */
+ td->td_frame = (struct trapframe *)((caddr_t)pcb2 - 16) - 1;
+
+ /*
+ * Set registers for trampoline to user mode. Leave space for the
+ * return address on stack. These are the kernel mode register values.
+ */
+ pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir);
+ pcb2->pcb_edi = 0;
+ pcb2->pcb_esi = (int)fork_return; /* trampoline arg */
+ pcb2->pcb_ebp = 0;
+ pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
+ pcb2->pcb_ebx = (int)td; /* trampoline arg */
+ pcb2->pcb_eip = (int)fork_trampoline;
+ pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */
+ /*
+ * If we didn't copy the pcb, we'd need to do the following registers:
+ * pcb2->pcb_dr*: cloned above.
+ * pcb2->pcb_savefpu: cloned above.
+ * pcb2->pcb_flags: cloned above.
+ * pcb2->pcb_onfault: cloned above (always NULL here?).
+ * pcb2->pcb_gs: cloned above. XXXKSE ???
+ * pcb2->pcb_ext: cleared below.
+ */
+ pcb2->pcb_ext = NULL;
+}
+
+void
+cpu_set_args(struct thread *td, struct kse *ke)
+{
+ suword((void *)(ke->ke_frame->tf_esp + sizeof(void *)),
+ (int)ke->ke_mailbox);
+}
+
+void
+cpu_free_kse_mdstorage(struct kse *kse)
+{
+
+ free(kse->ke_mdstorage, M_TEMP);
+ kse->ke_mdstorage = NULL;
+ kse->ke_pcb = NULL;
+ kse->ke_frame = NULL;
+}
+
+int
+cpu_export_context(struct thread *td)
+{
+ struct trapframe *frame;
+ struct thread_mailbox *tm;
+ struct trapframe *uframe;
+ int error;
+
+ frame = td->td_frame;
+ tm = td->td_mailbox;
+ uframe = &tm->ctx.tfrm.tf_tf;
+ error = copyout(frame, uframe, sizeof(*frame));
+ /*
+ * "What about the fp regs?" I hear you ask.... XXXKSE
+ * Don't know where gs and "onstack" come from.
+ * May need to fiddle a few other values too.
+ */
+ return (error);
+}
+
+void
cpu_wait(p)
struct proc *p;
{
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index 02b858e..5129746 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -539,21 +539,6 @@ linprocfs_doprocstat(PFS_FILL_ARGS)
}
/*
- * Map process state to descriptive letter. Note that this does not
- * quite correspond to what Linux outputs, but it's close enough.
- */
-static char *state_str[] = {
- "? (unknown)",
- "I (idle)",
- "R (running)",
- "S (sleeping)",
- "T (stopped)",
- "Z (zombie)",
- "W (waiting)",
- "M (mutex)"
-};
-
-/*
* Filler function for proc/pid/status
*/
static int
@@ -562,13 +547,53 @@ linprocfs_doprocstatus(PFS_FILL_ARGS)
struct kinfo_proc kp;
char *state;
segsz_t lsize;
+ struct thread *td2;
int i;
mtx_lock_spin(&sched_lock);
- if (p->p_stat > sizeof state_str / sizeof *state_str)
- state = state_str[0];
- else
- state = state_str[(int)p->p_stat];
+ td2 = FIRST_THREAD_IN_PROC(p); /* XXXKSE pretend only one thread */
+
+ if (P_SHOULDSTOP(p)) {
+ state = "T (stopped)";
+ } else {
+ switch(p->p_state) {
+ case PRS_NEW:
+ state = "I (idle)";
+ break;
+ case PRS_NORMAL:
+ if (p->p_flag & P_WEXIT) {
+ state = "X (exiting)";
+ break;
+ }
+ switch(td2->td_state) {
+ case TDS_SLP:
+ case TDS_MTX:
+ state = "S (sleeping)";
+ break;
+ case TDS_RUNQ:
+ case TDS_RUNNING:
+ state = "R (running)";
+ break;
+ case TDS_NEW:
+ case TDS_UNQUEUED:
+ case TDS_IWAIT:
+ case TDS_SURPLUS:
+ default:
+ state = "? (unknown)";
+ break;
+ }
+ break;
+ case PRS_WAIT:
+ state = "W (waiting)";
+ break;
+ case PRS_ZOMBIE:
+ state = "Z (zombie)";
+ break;
+ default:
+ state = "? (unknown)";
+ break;
+ }
+ }
mtx_unlock_spin(&sched_lock);
PROC_LOCK(p);
diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c
index 7ef01b9..f60d62c 100644
--- a/sys/compat/svr4/svr4_misc.c
+++ b/sys/compat/svr4/svr4_misc.c
@@ -1168,7 +1168,7 @@ svr4_setinfo(p, st, s)
if (p) {
i.si_pid = p->p_pid;
mtx_lock_spin(&sched_lock);
- if (p->p_stat == SZOMB) {
+ if (p->p_state == PRS_ZOMBIE) {
i.si_stime = p->p_ru->ru_stime.tv_sec;
i.si_utime = p->p_ru->ru_utime.tv_sec;
}
@@ -1256,7 +1256,7 @@ loop:
}
nfound++;
mtx_lock_spin(&sched_lock);
- if (q->p_stat == SZOMB &&
+ if ((q->p_state == PRS_ZOMBIE) &&
((SCARG(uap, options) & (SVR4_WEXITED|SVR4_WTRAPPED)))) {
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(q);
@@ -1372,7 +1372,8 @@ loop:
nprocs--;
return 0;
}
- if (q->p_stat == SSTOP && (q->p_flag & P_WAITED) == 0 &&
+ /* XXXKSE this needs clarification */
+ if (P_SHOULDSTOP(q) && ((q->p_flag & P_WAITED) == 0) &&
(q->p_flag & P_TRACED ||
(SCARG(uap, options) & (SVR4_WSTOPPED|SVR4_WCONTINUED)))) {
mtx_unlock_spin(&sched_lock);
diff --git a/sys/conf/files b/sys/conf/files
index 1cff41f..9994c11 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -870,6 +870,7 @@ kern/kern_synch.c standard
kern/kern_syscalls.c standard
kern/kern_sysctl.c standard
kern/kern_tc.c standard
+kern/kern_thread.c standard
kern/kern_time.c standard
kern/kern_timeout.c standard
kern/kern_uuid.c standard
diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c
index 9468f63..996e4eb 100644
--- a/sys/ddb/db_ps.c
+++ b/sys/ddb/db_ps.c
@@ -52,6 +52,7 @@ db_ps(dummy1, dummy2, dummy3, dummy4)
int nl = 0;
volatile struct proc *p, *pp;
volatile struct thread *td;
+ char *state;
np = nprocs;
@@ -96,23 +97,44 @@ db_ps(dummy1, dummy2, dummy3, dummy4)
if (pp == NULL)
pp = p;
- db_printf("%5d %8p %8p %4d %5d %5d %07x %d",
+
+ switch(p->p_state) {
+ case PRS_NORMAL:
+ if (P_SHOULDSTOP(p))
+ state = "stopped";
+ else
+ state = "Normal";
+ break;
+ case PRS_NEW:
+ state = "New";
+ break;
+ case PRS_WAIT:
+ state = "Wait";
+ break;
+ case PRS_ZOMBIE:
+ state = "Zombie";
+ break;
+ default:
+ state = "Unknown";
+ break;
+ }
+ db_printf("%5d %8p %8p %4d %5d %5d %07x %s",
p->p_pid, (volatile void *)p, (void *)p->p_uarea,
p->p_ucred ? p->p_ucred->cr_ruid : 0, pp->p_pid,
- p->p_pgrp ? p->p_pgrp->pg_id : 0, p->p_flag, p->p_stat);
+ p->p_pgrp ? p->p_pgrp->pg_id : 0, p->p_flag, state);
if (p->p_flag & P_KSES) {
db_printf("(threaded) %s\n", p->p_comm);
FOREACH_THREAD_IN_PROC(p, td) {
db_printf( ". . . . . . . "
- ". . . . . . . . ");
+ ". thread %p . . . ", td);
if (td->td_wchan) {
- db_printf("%6s %8p", td->td_wmesg,
+ db_printf("SLP %6s %8p\n", td->td_wmesg,
(void *)td->td_wchan);
- } else if (p->p_stat == SMTX) {
- db_printf("%6s %8p", td->td_mtxname,
+ } else if (td->td_state == TDS_MTX) {
+ db_printf("MTX %6s %8p\n", td->td_mtxname,
(void *)td->td_blocked);
} else {
- db_printf("--not blocked--");
+ db_printf("--not blocked--\n");
}
}
} else {
@@ -120,7 +142,7 @@ db_ps(dummy1, dummy2, dummy3, dummy4)
if (td->td_wchan) {
db_printf(" %6s %8p", td->td_wmesg,
(void *)td->td_wchan);
- } else if (p->p_stat == SMTX) {
+ } else if (td->td_state == TDS_MTX) {
db_printf(" %6s %8p", td->td_mtxname,
(void *)td->td_blocked);
} else {
diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c
index 0f35370..15ed718 100644
--- a/sys/fs/procfs/procfs_ctl.c
+++ b/sys/fs/procfs/procfs_ctl.c
@@ -62,7 +62,7 @@
* relative to process (curp)
*/
#define TRACE_WAIT_P(curp, p) \
- ((p)->p_stat == SSTOP && \
+ (P_SHOULDSTOP(p) && \
(p)->p_pptr == (curp) && \
((p)->p_flag & P_TRACED))
@@ -262,6 +262,7 @@ out:
*/
case PROCFS_CTL_RUN:
PROC_UNLOCK(p);
+ p->p_flag &= ~P_STOPPED_SGNL; /* this uses SIGSTOP */
break;
/*
@@ -272,27 +273,26 @@ out:
case PROCFS_CTL_WAIT:
if (p->p_flag & P_TRACED) {
while (error == 0 &&
- (p->p_stat != SSTOP) &&
+ (P_SHOULDSTOP(p)) &&
(p->p_flag & P_TRACED) &&
(p->p_pptr == td->td_proc))
error = msleep((caddr_t) p, &p->p_mtx,
PWAIT|PCATCH, "procfsx", 0);
if (error == 0 && !TRACE_WAIT_P(td->td_proc, p))
error = EBUSY;
- } else
- while (error == 0 && p->p_stat != SSTOP)
+ } else {
+ while (error == 0 && P_SHOULDSTOP(p))
error = msleep((caddr_t) p, &p->p_mtx,
PWAIT|PCATCH, "procfs", 0);
+ }
PROC_UNLOCK(p);
return (error);
-
default:
panic("procfs_control");
}
mtx_lock_spin(&sched_lock);
- if (p->p_stat == SSTOP)
- setrunnable(FIRST_THREAD_IN_PROC(p)); /* XXXKSE */
+ thread_unsuspend(p); /* If it can run, let it do so. */
mtx_unlock_spin(&sched_lock);
return (0);
}
@@ -349,6 +349,7 @@ procfs_doprocctl(PFS_FILL_ARGS)
#endif
mtx_lock_spin(&sched_lock);
/* XXXKSE: */
+ p->p_flag &= ~P_STOPPED_SGNL;
setrunnable(FIRST_THREAD_IN_PROC(p));
mtx_unlock_spin(&sched_lock);
} else
diff --git a/sys/fs/procfs/procfs_dbregs.c b/sys/fs/procfs/procfs_dbregs.c
index 361f34b..442521c 100644
--- a/sys/fs/procfs/procfs_dbregs.c
+++ b/sys/fs/procfs/procfs_dbregs.c
@@ -90,7 +90,7 @@ procfs_doprocdbregs(PFS_FILL_ARGS)
if (error == 0)
error = uiomove(kv, kl, uio);
if (error == 0 && uio->uio_rw == UIO_WRITE) {
- if (p->p_stat != SSTOP)
+ if (!P_SHOULDSTOP(p)) /* XXXKSE should be P_TRACED? */
error = EBUSY;
else
/* XXXKSE: */
diff --git a/sys/fs/procfs/procfs_fpregs.c b/sys/fs/procfs/procfs_fpregs.c
index afabb33..f1401f3 100644
--- a/sys/fs/procfs/procfs_fpregs.c
+++ b/sys/fs/procfs/procfs_fpregs.c
@@ -84,7 +84,7 @@ procfs_doprocfpregs(PFS_FILL_ARGS)
if (error == 0)
error = uiomove(kv, kl, uio);
if (error == 0 && uio->uio_rw == UIO_WRITE) {
- if (p->p_stat != SSTOP)
+ if (!P_SHOULDSTOP(p))
error = EBUSY;
else
/* XXXKSE: */
diff --git a/sys/fs/procfs/procfs_ioctl.c b/sys/fs/procfs/procfs_ioctl.c
index 09aef86..9d49be9 100644
--- a/sys/fs/procfs/procfs_ioctl.c
+++ b/sys/fs/procfs/procfs_ioctl.c
@@ -94,9 +94,11 @@ procfs_ioctl(PFS_IOCTL_ARGS)
#if 0
mtx_lock_spin(&sched_lock);
p->p_step = 0;
- if (p->p_stat == SSTOP) {
+ if (P_SHOULDSTOP(p)) {
p->p_xstat = sig;
- setrunnable(FIRST_THREAD_IN_PROC(p));
+ p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SGNL);
+ FOREACH_THREAD_IN_PROC(p, td)
+ setrunnable(td); /* XXX Totally bogus */
mtx_unlock_spin(&sched_lock);
} else {
mtx_unlock_spin(&sched_lock);
diff --git a/sys/fs/procfs/procfs_regs.c b/sys/fs/procfs/procfs_regs.c
index 5fcb450..6cefe7e 100644
--- a/sys/fs/procfs/procfs_regs.c
+++ b/sys/fs/procfs/procfs_regs.c
@@ -86,7 +86,7 @@ procfs_doprocregs(PFS_FILL_ARGS)
error = uiomove(kv, kl, uio);
PROC_LOCK(p);
if (error == 0 && uio->uio_rw == UIO_WRITE) {
- if (p->p_stat != SSTOP)
+ if (!P_SHOULDSTOP(p))
error = EBUSY;
else
/* XXXKSE: */
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index f3e9f04..dcc1880 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -79,10 +79,10 @@ ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
ASSYM(P_SFLAG, offsetof(struct proc, p_sflag));
-ASSYM(P_STAT, offsetof(struct proc, p_stat));
+ASSYM(P_STATE, offsetof(struct proc, p_state));
ASSYM(P_UAREA, offsetof(struct proc, p_uarea));
-/*ASSYM(TD_STAT, offsetof(struct thread, td__stat));*/
+ASSYM(TD_STATE, offsetof(struct thread, td_state));
ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
@@ -101,8 +101,9 @@ ASSYM(KE_FLAGS, offsetof(struct kse, ke_flags));
ASSYM(KEF_ASTPENDING, KEF_ASTPENDING);
ASSYM(KEF_NEEDRESCHED, KEF_NEEDRESCHED);
-ASSYM(SSLEEP, SSLEEP);
-ASSYM(SRUN, SRUN);
+ASSYM(TDS_SLP, TDS_SLP);
+ASSYM(TDS_RUNQ, TDS_RUNQ);
+ASSYM(TDS_RUNNING, TDS_RUNNING);
ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap));
ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall));
ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 2f11ee2..c73c5e1 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -799,7 +799,7 @@ cpu_idle(void)
{
if (cpu_idle_hlt) {
disable_intr();
- if (procrunnable()) {
+ if (kserunnable()) {
enable_intr();
} else {
/*
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index e2cebaf..9e35ad7 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -1100,7 +1100,12 @@ pmap_dispose_thread(td)
ksobj = td->td_kstack_obj;
ks = td->td_kstack;
ptek = vtopte(ks);
+#ifdef KSTACK_GUARD
+ ks -= PAGE_SIZE;
+ for (i = 1; i < (KSTACK_PAGES + 1); i++) {
+#else
for (i = 0; i < KSTACK_PAGES; i++) {
+#endif
m = vm_page_lookup(ksobj, i);
if (m == NULL)
panic("pmap_dispose_thread: kstack already missing?");
@@ -1116,16 +1121,17 @@ pmap_dispose_thread(td)
#ifdef I386_CPU
invltlb();
#endif
-
/*
- * If the thread got swapped out some of its KSTACK might have gotten
- * swapped. Just get rid of the object to clean up the swap use
- * proactively. NOTE! might block waiting for paging I/O to complete.
+ * Free the space that this stack was mapped to in the kernel
+ * address map.
*/
- if (ksobj->type == OBJT_SWAP) {
- td->td_kstack_obj = NULL;
- vm_object_deallocate(ksobj);
- }
+#ifdef KSTACK_GUARD
+ kmem_free(kernel_map, ks, (KSTACK_PAGES + 1) * PAGE_SIZE);
+#else
+ kmem_free(kernel_map, ks, KSTACK_PAGES * PAGE_SIZE);
+#endif
+ vm_object_deallocate(ksobj);
+ td->td_kstack_obj = NULL; /* play it safe */
}
/*
diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s
index e0f9bcd..80db485 100644
--- a/sys/i386/i386/swtch.s
+++ b/sys/i386/i386/swtch.s
@@ -65,12 +65,19 @@ tlb_flush_count: .long 0
/*
* cpu_throw()
+ *
+ * This is the second half of cpu_swtch(). It is used when the current
+ * thread is either a dummy or slated to die, and we no longer care
+ * about its state.
*/
ENTRY(cpu_throw)
jmp sw1
/*
* cpu_switch()
+ *
+ * Save the current thread state, then select the next thread to run
+ * and load its state.
*/
ENTRY(cpu_switch)
@@ -166,11 +173,11 @@ sw1b:
movl %eax,%ecx
#ifdef INVARIANTS
- movl TD_PROC(%ecx), %eax /* XXXKSE */
- cmpb $SRUN,P_STAT(%eax)
+ cmpb $TDS_RUNQ,TD_STATE(%ecx)
jne badsw2
#endif
+ movl $TDS_RUNNING,TD_STATE(%ecx)
movl TD_PCB(%ecx),%edx
#if defined(SWTCH_OPTIM_STATS)
@@ -310,12 +317,14 @@ cpu_switch_load_gs:
#ifdef INVARIANTS
badsw2:
+ pushal
pushl $sw0_2
call panic
sw0_2: .asciz "cpu_switch: not TDS_RUNQ"
badsw3:
+ pushal
pushl $sw0_3
call panic
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 08c75e4..8282416 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -54,6 +54,7 @@
#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/proc.h>
+#include <sys/kse.h>
#include <sys/pioctl.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
@@ -267,6 +268,17 @@ trap(frame)
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
+ /*
+ * First check that we shouldn't just abort.
+ * But check if we are the single thread first!
+ */
+ if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
+ mtx_lock_spin(&sched_lock);
+ PROC_LOCK(p);
+ thread_exit();
+ /* NOTREACHED */
+ }
+
switch (type) {
case T_PRIVINFLT: /* privileged instruction fault */
ucode = type;
@@ -939,11 +951,30 @@ syscall(frame)
mtx_unlock(&Giant);
}
#endif
+ KASSERT((td->td_kse != NULL), ("syscall: kse/thread UNLINKED"));
+ KASSERT((td->td_kse->ke_thread == td), ("syscall:kse/thread mismatch"));
sticks = td->td_kse->ke_sticks;
td->td_frame = &frame;
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
+ if (p->p_flag & P_KSES) {
+ /*
+ * If we are doing a syscall in a KSE environment,
+ * note where our mailbox is. There is always the
+ * possibility that we could do this lazily (in sleep()),
+ * but for now do it every time.
+ */
+ td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox
+ + offsetof(struct kse_mailbox, kmbx_current_thread));
+ if ((td->td_mailbox == NULL) ||
+ (td->td_mailbox == (void *)-1)) {
+ td->td_mailbox = NULL; /* single thread it.. */
+ td->td_flags &= ~TDF_UNBOUND;
+ } else {
+ td->td_flags |= TDF_UNBOUND;
+ }
+ }
params = (caddr_t)frame.tf_esp + sizeof(int);
code = frame.tf_eax;
orig_tf_eflags = frame.tf_eflags;
@@ -1045,6 +1076,12 @@ syscall(frame)
}
/*
+ * Release Giant if we previously set it.
+ */
+ if ((callp->sy_narg & SYF_MPSAFE) == 0)
+ mtx_unlock(&Giant);
+
+ /*
* Traced syscall.
*/
if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
@@ -1057,12 +1094,6 @@ syscall(frame)
*/
userret(td, &frame, sticks);
- /*
- * Release Giant if we previously set it.
- */
- if ((callp->sy_narg & SYF_MPSAFE) == 0)
- mtx_unlock(&Giant);
-
#ifdef KTRACE
if (KTRPOINT(td, KTR_SYSRET))
ktrsysret(code, error, td->td_retval[0]);
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 5dc2e14..04742c3 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -53,6 +53,7 @@
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/proc.h>
+#include <sys/kse.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/vnode.h>
@@ -254,15 +255,26 @@ cpu_set_fork_handler(td, func, arg)
}
void
-cpu_exit(td)
- register struct thread *td;
+cpu_exit(struct thread *td)
+{
+ struct mdproc *mdp;
+
+ mdp = &td->td_proc->p_md;
+ if (mdp->md_ldt)
+ user_ldt_free(td);
+ reset_dbregs();
+}
+
+void
+cpu_thread_exit(struct thread *td)
{
struct pcb *pcb = td->td_pcb;
- struct mdproc *mdp = &td->td_proc->p_md;
#ifdef DEV_NPX
npxexit(td);
#endif
if (pcb->pcb_ext != 0) {
+ /* XXXKSE XXXSMP not SMP SAFE.. what locks do we have? */
+ /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */
/*
* XXX do we need to move the TSS off the allocated pages
* before freeing them? (not done here)
@@ -271,8 +283,6 @@ cpu_exit(td)
ctob(IOPAGES + 1));
pcb->pcb_ext = 0;
}
- if (mdp->md_ldt)
- user_ldt_free(td);
if (pcb->pcb_flags & PCB_DBREGS) {
/*
* disable all hardware breakpoints
@@ -289,6 +299,146 @@ cpu_sched_exit(td)
}
void
+cpu_thread_setup(struct thread *td)
+{
+
+ td->td_pcb =
+ (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
+ td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
+}
+
+struct md_store {
+ struct pcb mds_pcb;
+ struct trapframe mds_frame;
+};
+
+void
+cpu_save_upcall(struct thread *td, struct kse *newkse)
+{
+ struct trapframe *tf;
+
+ newkse->ke_mdstorage = malloc(sizeof(struct md_store), M_TEMP,
+ M_WAITOK);
+ /* Note: use of M_WAITOK means it won't fail. */
+ /* set up shortcuts in MI section */
+ newkse->ke_pcb =
+ &(((struct md_store *)(newkse->ke_mdstorage))->mds_pcb);
+ newkse->ke_frame =
+ &(((struct md_store *)(newkse->ke_mdstorage))->mds_frame);
+ tf = newkse->ke_frame;
+
+ /* Copy the upcall pcb. Kernel mode & fp regs are here. */
+ /* XXXKSE this may be un-needed */
+ bcopy(td->td_pcb, newkse->ke_pcb, sizeof(struct pcb));
+
+ /*
+ * This initialises most of the user mode register values
+ * to good values. Eventually set them explicitly to know values
+ */
+ bcopy(td->td_frame, newkse->ke_frame, sizeof(struct trapframe));
+ tf->tf_edi = 0;
+ tf->tf_esi = 0; /* trampoline arg */
+ tf->tf_ebp = 0;
+ tf->tf_esp = (int)newkse->ke_stackbase + newkse->ke_stacksize - 16;
+ tf->tf_ebx = 0; /* trampoline arg */
+ tf->tf_eip = (int)newkse->ke_upcall;
+}
+
+void
+cpu_set_upcall(struct thread *td, void *pcb)
+{
+ struct pcb *pcb2;
+
+ td->td_flags |= TDF_UPCALLING;
+
+ /* Point the pcb to the top of the stack. */
+ pcb2 = td->td_pcb;
+
+ /*
+ * Copy the upcall pcb. This loads kernel regs.
+ * Those not loaded individually below get their default
+ * values here.
+ *
+ * XXXKSE It might be a good idea to simply skip this as
+ * the values of the other registers may be unimportant.
+ * This would remove any requirement for knowing the KSE
+ * at this time (see the matching comment below for
+ * more analysis) (need a good safe default).
+ */
+ bcopy(pcb, pcb2, sizeof(*pcb2));
+
+ /*
+ * Create a new fresh stack for the new thread.
+ * The -16 is so we can expand the trapframe if we go to vm86.
+ * Don't forget to set this stack value into whatever supplies
+ * the address for the fault handlers.
+ * The contexts are filled in at the time we actually DO the
+ * upcall as only then do we know which KSE we got.
+ */
+ td->td_frame = (struct trapframe *)((caddr_t)pcb2 - 16) - 1;
+
+ /*
+ * Set registers for trampoline to user mode. Leave space for the
+ * return address on stack. These are the kernel mode register values.
+ */
+ pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir);
+ pcb2->pcb_edi = 0;
+ pcb2->pcb_esi = (int)fork_return; /* trampoline arg */
+ pcb2->pcb_ebp = 0;
+ pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
+ pcb2->pcb_ebx = (int)td; /* trampoline arg */
+ pcb2->pcb_eip = (int)fork_trampoline;
+ pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */
+ /*
+ * If we didn't copy the pcb, we'd need to do the following registers:
+ * pcb2->pcb_dr*: cloned above.
+ * pcb2->pcb_savefpu: cloned above.
+ * pcb2->pcb_flags: cloned above.
+ * pcb2->pcb_onfault: cloned above (always NULL here?).
+ * pcb2->pcb_gs: cloned above. XXXKSE ???
+ * pcb2->pcb_ext: cleared below.
+ */
+ pcb2->pcb_ext = NULL;
+}
+
+void
+cpu_set_args(struct thread *td, struct kse *ke)
+{
+ suword((void *)(ke->ke_frame->tf_esp + sizeof(void *)),
+ (int)ke->ke_mailbox);
+}
+
+void
+cpu_free_kse_mdstorage(struct kse *kse)
+{
+
+ free(kse->ke_mdstorage, M_TEMP);
+ kse->ke_mdstorage = NULL;
+ kse->ke_pcb = NULL;
+ kse->ke_frame = NULL;
+}
+
+int
+cpu_export_context(struct thread *td)
+{
+ struct trapframe *frame;
+ struct thread_mailbox *tm;
+ struct trapframe *uframe;
+ int error;
+
+ frame = td->td_frame;
+ tm = td->td_mailbox;
+ uframe = &tm->ctx.tfrm.tf_tf;
+ error = copyout(frame, uframe, sizeof(*frame));
+ /*
+ * "What about the fp regs?" I hear you ask.... XXXKSE
+ * Don't know where gs and "onstack" come from.
+ * May need to fiddle a few other values too.
+ */
+ return (error);
+}
+
+void
cpu_wait(p)
struct proc *p;
{
diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c
index 245c96a..0819b67 100644
--- a/sys/i386/linux/linux_machdep.c
+++ b/sys/i386/linux/linux_machdep.c
@@ -361,7 +361,6 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
* Make this runnable after we are finished with it.
*/
mtx_lock_spin(&sched_lock);
- p2->p_stat = SRUN;
setrunqueue(FIRST_THREAD_IN_PROC(p2));
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p2);
diff --git a/sys/i386/linux/linux_ptrace.c b/sys/i386/linux/linux_ptrace.c
index 536188b..a19dcc7 100644
--- a/sys/i386/linux/linux_ptrace.c
+++ b/sys/i386/linux/linux_ptrace.c
@@ -409,7 +409,7 @@ linux_ptrace(struct thread *td, struct linux_ptrace_args *uap)
}
/* not currently stopped */
- if (p->p_stat != SSTOP || (p->p_flag & P_WAITED) == 0) {
+ if ((p->p_flag & (P_TRACED|P_WAITED)) == 0) {
error = EBUSY;
goto fail;
}
diff --git a/sys/ia64/ia64/trap.c b/sys/ia64/ia64/trap.c
index e38945f..4ffdb15 100644
--- a/sys/ia64/ia64/trap.c
+++ b/sys/ia64/ia64/trap.c
@@ -872,14 +872,14 @@ syscall(int code, u_int64_t *args, struct trapframe *framep)
break;
}
- userret(td, framep, sticks);
-
/*
* Release Giant if we had to get it.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0)
mtx_unlock(&Giant);
+ userret(td, framep, sticks);
+
#ifdef KTRACE
if (KTRPOINT(td, KTR_SYSRET))
ktrsysret(code, error, td->td_retval[0]);
@@ -1043,16 +1043,16 @@ ia32_syscall(struct trapframe *framep)
}
/*
- * Handle reschedule and other end-of-syscall issues
- */
- userret(td, framep, sticks);
-
- /*
* Release Giant if we previously set it.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0)
mtx_unlock(&Giant);
+ /*
+ * Handle reschedule and other end-of-syscall issues
+ */
+ userret(td, framep, sticks);
+
#ifdef KTRACE
if (KTRPOINT(td, KTR_SYSRET))
ktrsysret(code, error, td->td_retval[0]);
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index d5c5656..06cc8d8 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -289,6 +289,7 @@ proc0_init(void *dummy __unused)
* Initialize thread, process and pgrp structures.
*/
procinit();
+ threadinit();
/*
* Initialize sleep queue hash table
@@ -322,19 +323,34 @@ proc0_init(void *dummy __unused)
p->p_sysent = &aout_sysvec;
#endif
+ /*
+ * proc_linkup was already done in init_i386() or alphainit() etc.
+ * because the earlier code needed to follow td->td_proc. Otherwise
+ * I would have done it here.. maybe this means this should be
+ * done earlier too.
+ */
ke = &proc0.p_kse; /* XXXKSE */
kg = &proc0.p_ksegrp; /* XXXKSE */
p->p_flag = P_SYSTEM;
p->p_sflag = PS_INMEM;
- p->p_stat = SRUN;
- p->p_ksegrp.kg_nice = NZERO;
- kg->kg_pri_class = PRI_TIMESHARE;
- kg->kg_user_pri = PUSER;
- td->td_priority = PVM;
- td->td_base_pri = PUSER;
-
+ p->p_state = PRS_NORMAL;
+ td->td_state = TDS_RUNNING;
+ kg->kg_nice = NZERO;
+ kg->kg_pri_class = PRI_TIMESHARE;
+ kg->kg_user_pri = PUSER;
+ td->td_priority = PVM;
+ td->td_base_pri = PUSER;
+ td->td_kse = ke; /* XXXKSE */
+ ke->ke_oncpu = 0;
+ ke->ke_state = KES_RUNNING;
+ ke->ke_thread = td;
+ /* proc_linkup puts it in the idle queue, that's not what we want. */
+ TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses--;
p->p_peers = 0;
p->p_leader = p;
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+
bcopy("swapper", p->p_comm, sizeof ("swapper"));
@@ -662,8 +678,7 @@ kick_init(const void *udata __unused)
td = FIRST_THREAD_IN_PROC(initproc);
mtx_lock_spin(&sched_lock);
- initproc->p_stat = SRUN;
- setrunqueue(FIRST_THREAD_IN_PROC(initproc)); /* XXXKSE */
+ setrunqueue(td); /* XXXKSE */
mtx_unlock_spin(&sched_lock);
}
SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 425e3b7..cf8ba80 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -405,7 +405,7 @@ struct sysent sysent[] = {
{ 0, (sy_call_t *)kse_wakeup }, /* 380 = kse_wakeup */
{ AS(kse_new_args), (sy_call_t *)kse_new }, /* 381 = kse_new */
{ AS(thread_wakeup_args), (sy_call_t *)thread_wakeup }, /* 382 = thread_wakeup */
- { 0, (sy_call_t *)kse_yield }, /* 383 = kse_yield */
+ { SYF_MPSAFE | 0, (sy_call_t *)kse_yield }, /* 383 = kse_yield */
{ 0, (sy_call_t *)nosys }, /* 384 = __mac_get_proc */
{ 0, (sy_call_t *)nosys }, /* 385 = __mac_set_proc */
{ 0, (sy_call_t *)nosys }, /* 386 = __mac_get_fd */
diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c
index 9d30d25..78585b2 100644
--- a/sys/kern/kern_condvar.c
+++ b/sys/kern/kern_condvar.c
@@ -48,7 +48,7 @@
*/
#define CV_ASSERT(cvp, mp, td) do { \
KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \
- KASSERT((td)->td_proc->p_stat == SRUN, ("%s: not SRUN", __func__)); \
+ KASSERT((td)->td_state == TDS_RUNNING, ("%s: not TDS_RUNNING", __func__)); \
KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \
KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \
mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \
@@ -80,6 +80,7 @@
#endif
static void cv_timedwait_end(void *arg);
+static void cv_check_upcall(struct thread *td);
/*
* Initialize a condition variable. Must be called before use.
@@ -109,14 +110,47 @@ cv_destroy(struct cv *cvp)
*/
/*
+ * Decide if we need to queue an upcall.
+ * This is copied from msleep(), perhaps this should be a common function.
+ */
+static void
+cv_check_upcall(struct thread *td)
+{
+
+ /*
+ * If we are capable of async syscalls and there isn't already
+ * another one ready to return, start a new thread
+ * and queue it as ready to run. Note that there is danger here
+ * because we need to make sure that we don't sleep allocating
+ * the thread (recursion here might be bad).
+ * Hence the TDF_INMSLEEP flag.
+ */
+ if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox &&
+ (td->td_flags & TDF_INMSLEEP) == 0) {
+ /*
+ * If we have no queued work to do,
+ * upcall to the UTS to see if it has more work.
+ * We don't need to upcall now, just queue it.
+ */
+ if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) {
+ /* Don't recurse here! */
+ td->td_flags |= TDF_INMSLEEP;
+ thread_schedule_upcall(td, td->td_kse);
+ td->td_flags &= ~TDF_INMSLEEP;
+ }
+ }
+}
+
+/*
* Switch context.
*/
static __inline void
cv_switch(struct thread *td)
{
- td->td_proc->p_stat = SSLEEP;
+ td->td_state = TDS_SLP;
td->td_proc->p_stats->p_ru.ru_nvcsw++;
+ cv_check_upcall(td);
mi_switch();
CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
td->td_proc->p_pid, td->td_proc->p_comm);
@@ -135,7 +169,7 @@ cv_switch_catch(struct thread *td)
* We put ourselves on the sleep queue and start our timeout before
* calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
* both) could occur while we were stopped. A SIGCONT would cause us to
- * be marked as SSLEEP without resuming us, thus we must be ready for
+ * be marked as TDS_SLP without resuming us, thus we must be ready for
* sleep when cursig is called. If the wakeup happens while we're
* stopped, td->td_wchan will be 0 upon return from cursig.
*/
@@ -143,13 +177,15 @@ cv_switch_catch(struct thread *td)
mtx_unlock_spin(&sched_lock);
p = td->td_proc;
PROC_LOCK(p);
- sig = cursig(p); /* XXXKSE */
+ sig = cursig(td); /* XXXKSE */
+ if (thread_suspend_check(1))
+ sig = SIGSTOP;
mtx_lock_spin(&sched_lock);
PROC_UNLOCK(p);
if (sig != 0) {
if (td->td_wchan != NULL)
cv_waitq_remove(td);
- td->td_proc->p_stat = SRUN;
+ td->td_state = TDS_RUNNING; /* XXXKSE */
} else if (td->td_wchan != NULL) {
cv_switch(td);
}
@@ -175,7 +211,6 @@ cv_waitq_add(struct cv *cvp, struct thread *td)
td->td_flags |= TDF_CVWAITQ;
td->td_wchan = cvp;
td->td_wmesg = cvp->cv_description;
- td->td_kse->ke_slptime = 0; /* XXXKSE */
td->td_ksegrp->kg_slptime = 0; /* XXXKSE */
td->td_base_pri = td->td_priority;
CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
@@ -285,7 +320,7 @@ cv_wait_sig(struct cv *cvp, struct mtx *mp)
PROC_LOCK(p);
if (sig == 0)
- sig = cursig(p); /* XXXKSE */
+ sig = cursig(td); /* XXXKSE */
if (sig != 0) {
if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
rval = EINTR;
@@ -293,6 +328,8 @@ cv_wait_sig(struct cv *cvp, struct mtx *mp)
rval = ERESTART;
}
PROC_UNLOCK(p);
+ if (p->p_flag & P_WEXIT)
+ rval = EINTR;
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
@@ -363,6 +400,8 @@ cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
mi_switch();
}
+ if (td->td_proc->p_flag & P_WEXIT)
+ rval = EWOULDBLOCK;
mtx_unlock_spin(&sched_lock);
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
@@ -436,12 +475,11 @@ cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
td->td_proc->p_stats->p_ru.ru_nivcsw++;
mi_switch();
}
-
mtx_unlock_spin(&sched_lock);
PROC_LOCK(p);
if (sig == 0)
- sig = cursig(p);
+ sig = cursig(td);
if (sig != 0) {
if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
rval = EINTR;
@@ -450,6 +488,9 @@ cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
}
PROC_UNLOCK(p);
+ if (p->p_flag & P_WEXIT)
+ rval = EINTR;
+
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
ktrcsw(0, 0);
@@ -477,15 +518,13 @@ cv_wakeup(struct cv *cvp)
TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
td->td_flags &= ~TDF_CVWAITQ;
td->td_wchan = 0;
- if (td->td_proc->p_stat == SSLEEP) {
+ if (td->td_state == TDS_SLP) {
/* OPTIMIZED EXPANSION OF setrunnable(td); */
CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)",
td, td->td_proc->p_pid, td->td_proc->p_comm);
if (td->td_ksegrp->kg_slptime > 1) /* XXXKSE */
updatepri(td);
- td->td_kse->ke_slptime = 0;
td->td_ksegrp->kg_slptime = 0;
- td->td_proc->p_stat = SRUN;
if (td->td_proc->p_sflag & PS_INMEM) {
setrunqueue(td);
maybe_resched(td);
@@ -568,7 +607,7 @@ cv_timedwait_end(void *arg)
td->td_flags &= ~TDF_TIMEOUT;
setrunqueue(td);
} else if (td->td_wchan != NULL) {
- if (td->td_proc->p_stat == SSLEEP) /* XXXKSE */
+ if (td->td_state == TDS_SLP) /* XXXKSE */
setrunnable(td);
else
cv_waitq_remove(td);
@@ -577,3 +616,27 @@ cv_timedwait_end(void *arg)
td->td_flags |= TDF_TIMOFAIL;
mtx_unlock_spin(&sched_lock);
}
+
+/*
+ * For now only abort interruptable waits.
+ * The others will have to either complete on their own or have a timeout.
+ */
+void
+cv_abort(struct thread *td)
+{
+
+ CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
+ td->td_proc->p_pid,
+ td->td_proc->p_comm);
+ mtx_lock_spin(&sched_lock);
+ if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
+ if (td->td_wchan != NULL) {
+ if (td->td_state == TDS_SLP)
+ setrunnable(td);
+ else
+ cv_waitq_remove(td);
+ }
+ }
+ mtx_unlock_spin(&sched_lock);
+}
+
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index feaa123..0cd7f27 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -154,12 +154,14 @@ execve(td, uap)
PROC_LOCK(p);
KASSERT((p->p_flag & P_INEXEC) == 0,
("%s(): process already has P_INEXEC flag", __func__));
+ if ((p->p_flag & P_KSES) && thread_single(SNGLE_EXIT)) {
+ PROC_UNLOCK(p);
+ mtx_unlock(&Giant);
+ return (ERESTART); /* Try again later. */
+ }
+ /* If we get here all other threads are dead. */
p->p_flag |= P_INEXEC;
PROC_UNLOCK(p);
-
-/* XXXKSE */
-/* !!!!!!!! we need abort all the other threads of this process before we */
-/* proceed beyond his point! */
/*
* Initialize part of the common data
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 63a5135..fea5438 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -145,6 +145,67 @@ exit1(td, rv)
/*
* XXXXKSE: MUST abort all other threads before proceeding past here.
*/
+ PROC_LOCK(p);
+ if (p->p_flag & P_KSES) {
+ /*
+ * First check if some other thread got here before us..
+ * if so, act apropriatly, (exit or suspend);
+ */
+ thread_suspend_check(0);
+ /*
+ * Here is a trick..
+ * We need to free up our KSE to process other threads
+ * so that we can safely set the UNBOUND flag
+ * (whether or not we have a mailbox) as we are NEVER
+ * going to return to the user.
+ * The flag will not be set yet if we are exiting
+ * because of a signal, pagefault, or similar
+ * (or even an exit(2) from the UTS).
+ */
+ td->td_flags |= TDF_UNBOUND;
+
+ /*
+ * Kill off the other threads. This requires
+ * Some co-operation from other parts of the kernel
+ * so it may not be instant.
+ * With this state set:
+ * Any thread entering the kernel from userspace will
+ * thread_exit() in trap(). Any thread attempting to
+ * sleep will return immediatly
+ * with EINTR or EWOULDBLOCK, which will hopefully force them
+ * to back out to userland, freeing resources as they go, and
+ * anything attempting to return to userland will thread_exit()
+ * from userret(). thread_exit() will unsuspend us
+ * when the last other thread exits.
+ */
+ if (thread_single(SNGLE_EXIT)) {
+ panic ("Exit: Single threading fouled up");
+ }
+ /*
+ * All other activity in this process is now stopped.
+ * Remove excess KSEs and KSEGRPS. XXXKSE (when we have them)
+ * ...
+ * Turn off threading support.
+ */
+ p->p_flag &= ~P_KSES;
+ td->td_flags &= ~TDF_UNBOUND;
+ thread_single_end(); /* Don't need this any more. */
+ }
+ /*
+ * With this state set:
+ * Any thread entering the kernel from userspace will thread_exit()
+ * in trap(). Any thread attempting to sleep will return immediatly
+ * with EINTR or EWOULDBLOCK, which will hopefully force them
+ * to back out to userland, freeing resources as they go, and
+ * anything attempting to return to userland will thread_exit()
+ * from userret(). thread_exit() will do a wakeup on p->p_numthreads
+ * if it transitions to 1.
+ */
+
+ p->p_flag |= P_WEXIT;
+ PROC_UNLOCK(p);
+ if (td->td_kse->ke_mdstorage)
+ cpu_free_kse_mdstorage(td->td_kse);
/* Are we a task leader? */
PROC_LOCK(p);
@@ -185,7 +246,6 @@ exit1(td, rv)
*/
PROC_LOCK(p);
p->p_flag &= ~(P_TRACED | P_PPWAIT);
- p->p_flag |= P_WEXIT;
SIGEMPTYSET(p->p_siglist);
PROC_UNLOCK(p);
if (timevalisset(&p->p_realtimer.it_value))
@@ -434,22 +494,24 @@ exit1(td, rv)
/*
* We have to wait until after releasing all locks before
- * changing p_stat. If we block on a mutex then we will be
+ * changing p_state. If we block on a mutex then we will be
* back at SRUN when we resume and our parent will never
* harvest us.
*/
- p->p_stat = SZOMB;
+ p->p_state = PRS_ZOMBIE;
wakeup(p->p_pptr);
PROC_UNLOCK(p->p_pptr);
- PROC_UNLOCK(p);
-
cnt.v_swtch++;
binuptime(PCPU_PTR(switchtime));
PCPU_SET(switchticks, ticks);
- cpu_sched_exit(td);
- cpu_throw();
+ cpu_sched_exit(td); /* XXXKSE check if this should be in thread_exit */
+ /*
+ * Make sure this thread is discarded from the zombie.
+ * This will also release this thread's reference to the ucred.
+ */
+ thread_exit();
panic("exit1");
}
@@ -504,6 +566,8 @@ wait1(td, uap, compat)
register int nfound;
register struct proc *p, *q, *t;
int status, error;
+ struct kse *ke;
+ struct ksegrp *kg;
q = td->td_proc;
if (uap->pid == 0) {
@@ -540,7 +604,7 @@ loop:
}
nfound++;
- if (p->p_stat == SZOMB) {
+ if (p->p_state == PRS_ZOMBIE) {
/*
* charge childs scheduling cpu usage to parent
* XXXKSE assume only one thread & kse & ksegrp
@@ -656,6 +720,21 @@ loop:
}
/*
+ * There should only be one KSE/KSEGRP but
+ * do it right anyhow.
+ */
+ FOREACH_KSEGRP_IN_PROC(p, kg) {
+ FOREACH_KSE_IN_GROUP(kg, ke) {
+ /* Free the KSE spare thread. */
+ if (ke->ke_tdspare != NULL) {
+ thread_free(ke->ke_tdspare);
+ p->p_kse.ke_tdspare = NULL;
+ }
+ }
+ }
+ thread_reap(); /* check for zombie threads */
+
+ /*
* Give vm and machine-dependent layer a chance
* to free anything that cpu_exit couldn't
* release while still running in process context.
@@ -669,7 +748,7 @@ loop:
mtx_unlock(&Giant);
return (0);
}
- if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 &&
+ if (P_SHOULDSTOP(p) && ((p->p_flag & P_WAITED) == 0) &&
(p->p_flag & P_TRACED || uap->options & WUNTRACED)) {
p->p_flag |= P_WAITED;
sx_xunlock(&proctree_lock);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 016653b..eac0267 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -212,23 +212,6 @@ sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
-#if 0
-void
-kse_init(struct kse *kse1, struct kse *kse2)
-{
-}
-
-void
-thread_init(struct thread *thread1, struct thread *thread2)
-{
-}
-
-void
-ksegrp_init(struct ksegrp *ksegrp1, struct ksegrp *ksegrp2)
-{
-}
-#endif
-
int
fork1(td, flags, procp)
struct thread *td; /* parent proc */
@@ -296,6 +279,29 @@ fork1(td, flags, procp)
return (0);
}
+ if (p1->p_flag & P_KSES) {
+ /*
+ * Idle the other threads for a second.
+ * Since the user space is copied, it must remain stable.
+ * In addition, all threads (from the user perspective)
+ * need to either be suspended or in the kernel,
+ * where they will try restart in the parent and will
+ * be aborted in the child.
+ */
+ PROC_LOCK(p1);
+ if (thread_single(SNGLE_NO_EXIT)) {
+ /* Abort.. someone else is single threading before us */
+ PROC_UNLOCK(p1);
+ return (ERESTART);
+ }
+ PROC_UNLOCK(p1);
+ /*
+ * All other activity in this process
+ * is now suspended at the user boundary,
+ * (or other safe places if we think of any).
+ */
+ }
+
/* Allocate new proc. */
newproc = uma_zalloc(proc_zone, M_WAITOK);
@@ -311,6 +317,11 @@ fork1(td, flags, procp)
if ((nprocs >= maxproc - 10 && uid != 0) || nprocs >= maxproc) {
sx_xunlock(&allproc_lock);
uma_zfree(proc_zone, newproc);
+ if (p1->p_flag & P_KSES) {
+ PROC_LOCK(p1);
+ thread_single_end();
+ PROC_UNLOCK(p1);
+ }
tsleep(&forksleep, PUSER, "fork", hz / 2);
return (EAGAIN);
}
@@ -325,6 +336,11 @@ fork1(td, flags, procp)
if (!ok) {
sx_xunlock(&allproc_lock);
uma_zfree(proc_zone, newproc);
+ if (p1->p_flag & P_KSES) {
+ PROC_LOCK(p1);
+ thread_single_end();
+ PROC_UNLOCK(p1);
+ }
tsleep(&forksleep, PUSER, "fork", hz / 2);
return (EAGAIN);
}
@@ -411,7 +427,7 @@ again:
lastpid = trypid;
p2 = newproc;
- p2->p_stat = SIDL; /* protect against others */
+ p2->p_state = PRS_NEW; /* protect against others */
p2->p_pid = trypid;
LIST_INSERT_HEAD(&allproc, p2, p_list);
LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
@@ -449,7 +465,7 @@ again:
* Start by zeroing the section of proc that is zero-initialized,
* then copy the section that is copied directly from the parent.
*/
- td2 = thread_get(p2);
+ td2 = thread_alloc();
ke2 = &p2->p_kse;
kg2 = &p2->p_ksegrp;
@@ -459,8 +475,10 @@ again:
(unsigned) RANGEOF(struct proc, p_startzero, p_endzero));
bzero(&ke2->ke_startzero,
(unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero));
+#if 0 /* bzero'd by the thread allocator */
bzero(&td2->td_startzero,
(unsigned) RANGEOF(struct thread, td_startzero, td_endzero));
+#endif
bzero(&kg2->kg_startzero,
(unsigned) RANGEOF(struct ksegrp, kg_startzero, kg_endzero));
@@ -482,9 +500,22 @@ again:
* XXXKSE Theoretically only the running thread would get copied
* Others in the kernel would be 'aborted' in the child.
* i.e return E*something*
+ * On SMP we would have to stop them running on
+ * other CPUs! (set a flag in the proc that stops
+ * all returns to userland until completed)
+ * This is wrong but ok for 1:1.
*/
proc_linkup(p2, kg2, ke2, td2);
+ /* Set up the thread as an active thread (as if runnable). */
+ TAILQ_REMOVE(&kg2->kg_iq, ke2, ke_kgrlist);
+ kg2->kg_idle_kses--;
+ ke2->ke_state = KES_UNQUEUED;
+ ke2->ke_thread = td2;
+ td2->td_kse = ke2;
+ td2->td_flags &= ~TDF_UNBOUND; /* For the rest of this syscall. */
+KASSERT((ke2->ke_kgrlist.tqe_next != ke2), ("linked to self!"));
+
/* note.. XXXKSE no pcb or u-area yet */
/*
@@ -699,7 +730,6 @@ again:
p2->p_acflag = AFORK;
if ((flags & RFSTOPPED) == 0) {
mtx_lock_spin(&sched_lock);
- p2->p_stat = SRUN;
setrunqueue(td2);
mtx_unlock_spin(&sched_lock);
}
@@ -803,6 +833,9 @@ fork_exit(callout, arg, frame)
struct proc *p = td->td_proc;
td->td_kse->ke_oncpu = PCPU_GET(cpuid);
+ p->p_state = PRS_NORMAL;
+ td->td_state = TDS_RUNNING; /* Already done in switch() on 386. */
+ td->td_kse->ke_state = KES_RUNNING;
/*
* Finish setting up thread glue. We need to initialize
* the thread into a td_critnest=1 state. Some platforms
@@ -814,7 +847,7 @@ fork_exit(callout, arg, frame)
sched_lock.mtx_lock = (uintptr_t)td;
sched_lock.mtx_recurse = 0;
cpu_critical_fork_exit();
- CTR3(KTR_PROC, "fork_exit: new proc %p (pid %d, %s)", p, p->p_pid,
+ CTR3(KTR_PROC, "fork_exit: new thread %p (pid %d, %s)", td, p->p_pid,
p->p_comm);
if (PCPU_GET(switchtime.sec) == 0)
binuptime(PCPU_PTR(switchtime));
diff --git a/sys/kern/kern_idle.c b/sys/kern/kern_idle.c
index 29194b7..306f2a5 100644
--- a/sys/kern/kern_idle.c
+++ b/sys/kern/kern_idle.c
@@ -40,6 +40,7 @@ idle_setup(void *dummy)
struct pcpu *pc;
#endif
struct proc *p;
+ struct thread *td;
int error;
#ifdef SMP
@@ -60,7 +61,10 @@ idle_setup(void *dummy)
panic("idle_setup: kthread_create error %d\n", error);
p->p_flag |= P_NOLOAD;
- p->p_stat = SRUN;
+ td = FIRST_THREAD_IN_PROC(p);
+ td->td_state = TDS_RUNQ;
+ td->td_kse->ke_state = KES_ONRUNQ;
+ td->td_kse->ke_flags |= KEF_IDLEKSE;
#ifdef SMP
}
#endif
@@ -75,16 +79,22 @@ idle_proc(void *dummy)
#ifdef DIAGNOSTIC
int count;
#endif
+ struct thread *td;
+ struct proc *p;
+ td = curthread;
+ p = td->td_proc;
+ td->td_state = TDS_RUNNING;
+ td->td_kse->ke_state = KES_RUNNING;
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
#ifdef DIAGNOSTIC
count = 0;
- while (count >= 0 && procrunnable() == 0) {
+ while (count >= 0 && kserunnable() == 0) {
#else
- while (procrunnable() == 0) {
+ while (kserunnable() == 0) {
#endif
/*
* This is a good place to put things to be done in
@@ -103,8 +113,9 @@ idle_proc(void *dummy)
}
mtx_lock_spin(&sched_lock);
- curproc->p_stats->p_ru.ru_nvcsw++;
+ p->p_stats->p_ru.ru_nvcsw++;
mi_switch();
+ td->td_kse->ke_state = KES_RUNNING;
mtx_unlock_spin(&sched_lock);
}
}
diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index d65dc82..fb9c092 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c
@@ -201,7 +201,7 @@ ithread_create(struct ithd **ithread, int vector, int flags,
td = FIRST_THREAD_IN_PROC(p); /* XXXKSE */
td->td_ksegrp->kg_pri_class = PRI_ITHD;
td->td_priority = PRI_MAX_ITHD;
- p->p_stat = SWAIT;
+ td->td_state = TDS_IWAIT;
ithd->it_td = td;
td->td_ithd = ithd;
if (ithread != NULL)
@@ -229,8 +229,7 @@ ithread_destroy(struct ithd *ithread)
}
ithread->it_flags |= IT_DEAD;
mtx_lock_spin(&sched_lock);
- if (p->p_stat == SWAIT) {
- p->p_stat = SRUN; /* XXXKSE */
+ if (td->td_state == TDS_IWAIT) {
setrunqueue(td);
}
mtx_unlock_spin(&sched_lock);
@@ -327,7 +326,7 @@ ok:
* handler as being dead and let the ithread do the actual removal.
*/
mtx_lock_spin(&sched_lock);
- if (ithread->it_td->td_proc->p_stat != SWAIT) {
+ if (ithread->it_td->td_state != TDS_IWAIT) {
handler->ih_flags |= IH_DEAD;
/*
@@ -374,8 +373,8 @@ ithread_schedule(struct ithd *ithread, int do_switch)
td = ithread->it_td;
p = td->td_proc;
KASSERT(p != NULL, ("ithread %s has no process", ithread->it_name));
- CTR4(KTR_INTR, "%s: pid %d: (%s) need = %d", __func__, p->p_pid, p->p_comm,
- ithread->it_need);
+ CTR4(KTR_INTR, "%s: pid %d: (%s) need = %d",
+ __func__, p->p_pid, p->p_comm, ithread->it_need);
/*
* Set it_need to tell the thread to keep running if it is already
@@ -387,14 +386,16 @@ ithread_schedule(struct ithd *ithread, int do_switch)
*/
ithread->it_need = 1;
mtx_lock_spin(&sched_lock);
- if (p->p_stat == SWAIT) {
+ if (td->td_state == TDS_IWAIT) {
CTR2(KTR_INTR, "%s: setrunqueue %d", __func__, p->p_pid);
- p->p_stat = SRUN;
- setrunqueue(td); /* XXXKSE */
- if (do_switch && curthread->td_critnest == 1 &&
- curthread->td_proc->p_stat == SRUN) {
+ setrunqueue(td);
+ if (do_switch &&
+ (curthread->td_critnest == 1)/* &&
+ (curthread->td_state == TDS_RUNNING) XXXKSE*/) {
+#if 0 /* not needed in KSE */
if (curthread != PCPU_GET(idlethread))
setrunqueue(curthread);
+#endif
curthread->td_proc->p_stats->p_ru.ru_nivcsw++;
mi_switch();
} else {
@@ -402,7 +403,7 @@ ithread_schedule(struct ithd *ithread, int do_switch)
}
} else {
CTR4(KTR_INTR, "%s: pid %d: it_need %d, state %d",
- __func__, p->p_pid, ithread->it_need, p->p_stat);
+ __func__, p->p_pid, ithread->it_need, p->p_state);
}
mtx_unlock_spin(&sched_lock);
@@ -550,7 +551,7 @@ restart:
*/
if (ithd->it_enable != NULL)
ithd->it_enable(ithd->it_vector);
- p->p_stat = SWAIT; /* we're idle */
+ td->td_state = TDS_IWAIT; /* we're idle */
p->p_stats->p_ru.ru_nvcsw++;
CTR2(KTR_INTR, "%s: pid %d: done", __func__, p->p_pid);
mi_switch();
diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c
index a456a86..e8e2fea 100644
--- a/sys/kern/kern_kthread.c
+++ b/sys/kern/kern_kthread.c
@@ -109,8 +109,7 @@ kthread_create(void (*func)(void *), void *arg,
mtx_lock_spin(&sched_lock);
p2->p_sflag |= PS_INMEM;
if (!(flags & RFSTOPPED)) {
- p2->p_stat = SRUN;
- setrunqueue(FIRST_THREAD_IN_PROC(p2)); /* XXXKSE */
+ setrunqueue(FIRST_THREAD_IN_PROC(p2));
}
mtx_unlock_spin(&sched_lock);
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 08bca8d..c2e79d0 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -119,23 +119,20 @@ propagate_priority(struct thread *td)
return;
}
+ KASSERT(td->td_state != TDS_SURPLUS, ("Mutex owner SURPLUS"));
+ MPASS(td->td_proc != NULL);
MPASS(td->td_proc->p_magic == P_MAGIC);
- KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex"));
+ KASSERT(td->td_state != TDS_SLP,
+ ("sleeping thread owns a mutex"));
if (td->td_priority <= pri) /* lower is higher priority */
return;
- /*
- * Bump this thread's priority.
- */
- td->td_priority = pri;
/*
* If lock holder is actually running, just bump priority.
*/
- if (thread_running(td)) {
- MPASS(td->td_proc->p_stat == SRUN
- || td->td_proc->p_stat == SZOMB
- || td->td_proc->p_stat == SSTOP);
+ if (td->td_state == TDS_RUNNING) {
+ td->td_priority = pri;
return;
}
@@ -151,20 +148,26 @@ propagate_priority(struct thread *td)
* If on run queue move to new run queue, and quit.
* XXXKSE this gets a lot more complicated under threads
* but try anyhow.
+ * We should have a special call to do this more efficiently.
*/
- if (td->td_proc->p_stat == SRUN) {
+ if (td->td_state == TDS_RUNQ) {
MPASS(td->td_blocked == NULL);
remrunqueue(td);
+ td->td_priority = pri;
setrunqueue(td);
return;
}
+ /*
+ * Adjust for any other cases.
+ */
+ td->td_priority = pri;
/*
* If we aren't blocked on a mutex, we should be.
*/
- KASSERT(td->td_proc->p_stat == SMTX, (
+ KASSERT(td->td_state == TDS_MTX, (
"process %d(%s):%d holds %s but isn't blocked on a mutex\n",
- td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat,
+ td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
m->mtx_object.lo_name));
/*
@@ -590,7 +593,7 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
*/
td->td_blocked = m;
td->td_mtxname = m->mtx_object.lo_name;
- td->td_proc->p_stat = SMTX;
+ td->td_state = TDS_MTX;
propagate_priority(td);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
@@ -727,7 +730,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
m, td1);
td1->td_blocked = NULL;
- td1->td_proc->p_stat = SRUN;
setrunqueue(td1);
if (td->td_critnest == 1 && td1->td_priority < pri) {
@@ -744,7 +746,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
}
}
#endif
- setrunqueue(td);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR2(KTR_LOCK,
"_mtx_unlock_sleep: %p switching out lock=%p", m,
diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c
index a197bc0..9dd6924 100644
--- a/sys/kern/kern_poll.c
+++ b/sys/kern/kern_poll.c
@@ -503,7 +503,6 @@ poll_idle(void)
mtx_unlock(&Giant);
mtx_assert(&Giant, MA_NOTOWNED);
mtx_lock_spin(&sched_lock);
- setrunqueue(td);
td->td_proc->p_stats->p_ru.ru_nvcsw++;
mi_switch();
mtx_unlock_spin(&sched_lock);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index a5378d9..8b15fc2 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -44,6 +44,7 @@
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sysproto.h>
+#include <sys/kse.h>
#include <sys/sysctl.h>
#include <sys/filedesc.h>
#include <sys/tty.h>
@@ -111,44 +112,28 @@ procinit()
uihashinit();
}
-/*
- * Note that we do not link to the proc's ucred here
- * The thread is linked as if running but no KSE assigned
- */
-static void
-thread_link(struct thread *td, struct ksegrp *kg)
-{
- struct proc *p = kg->kg_proc;
-
- td->td_proc = p;
- td->td_ksegrp = kg;
- td->td_last_kse = &p->p_kse;
-
- TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
- TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
- td->td_critnest = 0;
- td->td_kse = NULL;
- cpu_thread_link(td);
-}
-
/*
* KSE is linked onto the idle queue.
*/
-static void
+void
kse_link(struct kse *ke, struct ksegrp *kg)
{
struct proc *p = kg->kg_proc;
+KASSERT((ke->ke_state != KES_ONRUNQ), ("linking suspect kse on run queue"));
TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
kg->kg_kses++;
+KASSERT((ke->ke_state != KES_IDLE), ("already on idle queue"));
+ ke->ke_state = KES_IDLE;
TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses++;
ke->ke_proc = p;
ke->ke_ksegrp = kg;
ke->ke_thread = NULL;
ke->ke_oncpu = NOCPU;
}
-static void
+void
ksegrp_link(struct ksegrp *kg, struct proc *p)
{
@@ -159,10 +144,13 @@ ksegrp_link(struct ksegrp *kg, struct proc *p)
TAILQ_INIT(&kg->kg_iq); /* all kses in ksegrp */
kg->kg_proc = p;
/* the following counters are in the -zero- section and may not need clearing */
+ kg->kg_numthreads = 0;
kg->kg_runnable = 0;
kg->kg_kses = 0;
+ kg->kg_idle_kses = 0;
kg->kg_runq_kses = 0; /* XXXKSE change name */
/* link it in now that it's consitant */
+ p->p_numksegrps++;
TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
}
@@ -177,30 +165,13 @@ proc_linkup(struct proc *p, struct ksegrp *kg,
TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */
TAILQ_INIT(&p->p_threads); /* all threads in proc */
+ TAILQ_INIT(&p->p_suspended); /* Threads suspended */
ksegrp_link(kg, p);
kse_link(ke, kg);
thread_link(td, kg);
- /* link them together for 1:1 */
- td->td_kse = ke;
- ke->ke_thread = td;
}
-/* temporary version is ultra simple while we are in 1:1 mode */
-struct thread *
-thread_get(struct proc *p)
-{
- struct thread *td = &p->p_xxthread;
-
- return (td);
-}
-
-
-/*********************
-* STUB KSE syscalls
-*********************/
-
-/* struct thread_wakeup_args { struct thread_mailbox *tmbx; }; */
int
thread_wakeup(struct thread *td, struct thread_wakeup_args *uap)
{
@@ -219,7 +190,11 @@ int
kse_yield(struct thread *td, struct kse_yield_args *uap)
{
- return(ENOSYS);
+ PROC_LOCK(td->td_proc);
+ mtx_lock_spin(&sched_lock);
+ thread_exit();
+ /* NOTREACHED */
+ return(0);
}
int kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
@@ -228,16 +203,80 @@ int kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
return(ENOSYS);
}
-
-int
-kse_new(struct thread *td, struct kse_new_args *uap)
+/*
+ * No new KSEG: first call: use current KSE, don't schedule an upcall
+ * All other situations, do alloate a new KSE and schedule an upcall on it.
+ */
/* struct kse_new_args {
struct kse_mailbox *mbx;
int new_grp_flag;
}; */
+int
+kse_new(struct thread *td, struct kse_new_args *uap)
{
+ struct kse *newkse;
+ struct proc *p;
+ struct kse_mailbox mbx;
+ int err;
- return (ENOSYS);
+ p = td->td_proc;
+ if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
+ return (err);
+ PROC_LOCK(p);
+ /*
+ * If we have no KSE mode set, just set it, and skip KSE and KSEGRP
+ * creation. You cannot request a new group with the first one as
+ * you are effectively getting one. Instead, go directly to saving
+ * the upcall info.
+ */
+ if ((td->td_proc->p_flag & P_KSES) || (uap->new_grp_flag)) {
+
+ return (EINVAL); /* XXX */
+ /*
+ * If newgroup then create the new group.
+ * Check we have the resources for this.
+ */
+ /* Copy lots of fields from the current KSEGRP. */
+ /* Create the new KSE */
+ /* Copy lots of fields from the current KSE. */
+ } else {
+ /*
+ * We are switching to KSEs so just
+ * use the preallocated ones for this call.
+ * XXXKSE if we have to initialise any fields for KSE
+ * mode operation, do it here.
+ */
+ newkse = td->td_kse;
+ }
+ /*
+ * Fill out the KSE-mode specific fields of the new kse.
+ */
+ PROC_UNLOCK(p);
+ mtx_lock_spin(&sched_lock);
+ mi_switch(); /* Save current registers to PCB. */
+ mtx_unlock_spin(&sched_lock);
+ newkse->ke_upcall = mbx.kmbx_upcall;
+ newkse->ke_stackbase = mbx.kmbx_stackbase;
+ newkse->ke_stacksize = mbx.kmbx_stacksize;
+ newkse->ke_mailbox = uap->mbx;
+ cpu_save_upcall(td, newkse);
+ /* Note that we are the returning syscall */
+ td->td_retval[0] = 0;
+ td->td_retval[1] = 0;
+
+ if ((td->td_proc->p_flag & P_KSES) || (uap->new_grp_flag)) {
+ thread_schedule_upcall(td, newkse);
+ } else {
+ /*
+ * Don't set this until we are truely ready, because
+ * things will start acting differently. Return to the
+ * calling code for the first time. Assuming we set up
+ * the mailboxes right, all syscalls after this will be
+ * asynchronous.
+ */
+ td->td_proc->p_flag |= P_KSES;
+ }
+ return (0);
}
/*
@@ -554,7 +593,7 @@ fixjobc(p, pgrp, entering)
LIST_FOREACH(p, &p->p_children, p_sibling) {
if ((hispgrp = p->p_pgrp) != pgrp &&
hispgrp->pg_session == mysession &&
- p->p_stat != SZOMB) {
+ p->p_state != PRS_ZOMBIE) {
PGRP_LOCK(hispgrp);
if (entering)
hispgrp->pg_jobc++;
@@ -583,7 +622,7 @@ orphanpg(pg)
mtx_lock_spin(&sched_lock);
LIST_FOREACH(p, &pg->pg_members, p_pglist) {
- if (p->p_stat == SSTOP) {
+ if (P_SHOULDSTOP(p)) {
mtx_unlock_spin(&sched_lock);
LIST_FOREACH(p, &pg->pg_members, p_pglist) {
PROC_LOCK(p);
@@ -674,7 +713,9 @@ fill_kinfo_proc(p, kp)
kp->ki_sigcatch = p->p_procsig->ps_sigcatch;
}
mtx_lock_spin(&sched_lock);
- if (p->p_stat != SIDL && p->p_stat != SZOMB && p->p_vmspace != NULL) {
+ if (p->p_state != PRS_NEW &&
+ p->p_state != PRS_ZOMBIE &&
+ p->p_vmspace != NULL) {
struct vmspace *vm = p->p_vmspace;
kp->ki_size = vm->vm_map.size;
@@ -697,35 +738,65 @@ fill_kinfo_proc(p, kp)
p->p_stats->p_cru.ru_stime.tv_usec;
}
td = FIRST_THREAD_IN_PROC(p);
- if (td->td_wmesg != NULL)
- strncpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg) - 1);
- if (p->p_stat == SMTX) {
- kp->ki_kiflag |= KI_MTXBLOCK;
- strncpy(kp->ki_mtxname, td->td_mtxname,
- sizeof(kp->ki_mtxname) - 1);
+ if (!(p->p_flag & P_KSES)) {
+ if (td->td_wmesg != NULL) {
+ strncpy(kp->ki_wmesg, td->td_wmesg,
+ sizeof(kp->ki_wmesg) - 1);
+ }
+ if (td->td_state == TDS_MTX) {
+ kp->ki_kiflag |= KI_MTXBLOCK;
+ strncpy(kp->ki_mtxname, td->td_mtxname,
+ sizeof(kp->ki_mtxname) - 1);
+ }
}
- kp->ki_stat = p->p_stat;
+
+ if (p->p_state == PRS_NORMAL) { /* XXXKSE very aproximate */
+ if ((td->td_state == TDS_RUNQ) ||
+ (td->td_state == TDS_RUNNING)) {
+ kp->ki_stat = SRUN;
+ } else if (td->td_state == TDS_SLP) {
+ kp->ki_stat = SSLEEP;
+ } else if (P_SHOULDSTOP(p)) {
+ kp->ki_stat = SSTOP;
+ } else if (td->td_state == TDS_MTX) {
+ kp->ki_stat = SMTX;
+ } else {
+ kp->ki_stat = SWAIT;
+ }
+ } else if (p->p_state == PRS_ZOMBIE) {
+ kp->ki_stat = SZOMB;
+ } else {
+ kp->ki_stat = SIDL;
+ }
+
kp->ki_sflag = p->p_sflag;
kp->ki_swtime = p->p_swtime;
kp->ki_pid = p->p_pid;
/* vvv XXXKSE */
- bintime2timeval(&p->p_runtime, &tv);
- kp->ki_runtime = tv.tv_sec * (u_int64_t)1000000 + tv.tv_usec;
- kp->ki_pctcpu = p->p_kse.ke_pctcpu;
- kp->ki_estcpu = td->td_ksegrp->kg_estcpu;
- kp->ki_slptime = td->td_ksegrp->kg_slptime;
- kp->ki_wchan = td->td_wchan;
- kp->ki_pri.pri_level = td->td_priority;
- kp->ki_pri.pri_user = td->td_ksegrp->kg_user_pri;
- kp->ki_pri.pri_class = td->td_ksegrp->kg_pri_class;
- kp->ki_pri.pri_native = td->td_base_pri;
- kp->ki_nice = td->td_ksegrp->kg_nice;
- kp->ki_rqindex = p->p_kse.ke_rqindex;
- kp->ki_oncpu = p->p_kse.ke_oncpu;
- kp->ki_lastcpu = td->td_lastcpu;
- kp->ki_tdflags = td->td_flags;
- kp->ki_pcb = td->td_pcb;
- kp->ki_kstack = (void *)td->td_kstack;
+ if (!(p->p_flag & P_KSES)) {
+ bintime2timeval(&p->p_runtime, &tv);
+ kp->ki_runtime = tv.tv_sec * (u_int64_t)1000000 + tv.tv_usec;
+ kp->ki_pctcpu = p->p_kse.ke_pctcpu;
+ kp->ki_estcpu = p->p_ksegrp.kg_estcpu;
+ kp->ki_slptime = p->p_ksegrp.kg_slptime;
+ kp->ki_wchan = td->td_wchan;
+ kp->ki_pri.pri_level = td->td_priority;
+ kp->ki_pri.pri_user = p->p_ksegrp.kg_user_pri;
+ kp->ki_pri.pri_class = p->p_ksegrp.kg_pri_class;
+ kp->ki_pri.pri_native = td->td_base_pri;
+ kp->ki_nice = p->p_ksegrp.kg_nice;
+ kp->ki_rqindex = p->p_kse.ke_rqindex;
+ kp->ki_oncpu = p->p_kse.ke_oncpu;
+ kp->ki_lastcpu = td->td_lastcpu;
+ kp->ki_tdflags = td->td_flags;
+ kp->ki_pcb = td->td_pcb;
+ kp->ki_kstack = (void *)td->td_kstack;
+ } else {
+ kp->ki_oncpu = -1;
+ kp->ki_lastcpu = -1;
+ kp->ki_tdflags = -1;
+ /* All the reast are 0 */
+ }
/* ^^^ XXXKSE */
mtx_unlock_spin(&sched_lock);
sp = NULL;
@@ -878,7 +949,7 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
/*
* Skip embryonic processes.
*/
- if (p->p_stat == SIDL) {
+ if (p->p_state == PRS_NEW) {
PROC_UNLOCK(p);
continue;
}
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index d2cb69d..0803cff 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -281,7 +281,6 @@ boot(int howto)
DROP_GIANT();
for (subiter = 0; subiter < 50 * iter; subiter++) {
mtx_lock_spin(&sched_lock);
- setrunqueue(curthread);
curthread->td_proc->p_stats->p_ru.ru_nvcsw++;
mi_switch(); /* Allow interrupt threads to run */
mtx_unlock_spin(&sched_lock);
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index a561a19..e8ded21 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -84,7 +84,7 @@ static int killpg1(struct thread *td, int sig, int pgid, int all);
static int sig_ffs(sigset_t *set);
static int sigprop(int sig);
static void stop(struct proc *);
-
+static void tdsignal(struct thread *td, int sig, sig_t action);
static int filt_sigattach(struct knote *kn);
static void filt_sigdetach(struct knote *kn);
static int filt_signal(struct knote *kn, long hint);
@@ -168,16 +168,18 @@ static int sigproptbl[NSIG] = {
* Determine signal that should be delivered to process p, the current
* process, 0 if none. If there is a pending stop signal with default
* action, the process stops in issignal().
+ * XXXKSE the check for a pending stop is not done under KSE
*
* MP SAFE.
*/
int
-cursig(struct proc *p)
+cursig(struct thread *td)
{
+ struct proc *p = td->td_proc;
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_assert(&sched_lock, MA_NOTOWNED);
- return (SIGPENDING(p) ? issignal(p) : 0);
+ return (SIGPENDING(p) ? issignal(td) : 0);
}
/*
@@ -1042,7 +1044,7 @@ killpg1(td, sig, pgid, all)
PROC_UNLOCK(p);
continue;
}
- if (p->p_stat == SZOMB) {
+ if (p->p_state == PRS_ZOMBIE) {
PROC_UNLOCK(p);
continue;
}
@@ -1243,12 +1245,10 @@ psignal(p, sig)
register struct proc *p;
register int sig;
{
- register int prop;
register sig_t action;
struct thread *td;
-#ifdef SMP
- struct ksegrp *kg;
-#endif
+ register int prop;
+
KASSERT(_SIG_VALID(sig),
("psignal(): invalid signal %d\n", sig));
@@ -1257,7 +1257,6 @@ psignal(p, sig)
KNOTE(&p->p_klist, NOTE_SIGNAL | sig);
prop = sigprop(sig);
-
/*
* If proc is traced, always give parent a chance;
* if signal event is tracked by procfs, give *that*
@@ -1283,29 +1282,6 @@ psignal(p, sig)
action = SIG_DFL;
}
- /*
- * bring the priority of a process up if we want it to get
- * killed in this lifetime.
- * XXXKSE think if a better way to do this.
- *
- * What we need to do is see if there is a thread that will
- * be able to accept the signal. e.g.
- * FOREACH_THREAD_IN_PROC() {
- * if runnable, we're done
- * else pick one at random.
- * }
- */
- /* XXXKSE
- * For now there is one thread per proc.
- * Effectively select one sucker thread..
- */
- td = FIRST_THREAD_IN_PROC(p);
- mtx_lock_spin(&sched_lock);
- if ((p->p_ksegrp.kg_nice > NZERO) && (action == SIG_DFL) &&
- (prop & SA_KILL) && ((p->p_flag & P_TRACED) == 0))
- p->p_ksegrp.kg_nice = NZERO; /* XXXKSE */
- mtx_unlock_spin(&sched_lock);
-
if (prop & SA_CONT)
SIG_STOPSIGMASK(p->p_siglist);
@@ -1316,48 +1292,125 @@ psignal(p, sig)
* is default; don't stop the process below if sleeping,
* and don't clear any pending SIGCONT.
*/
- if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0 &&
- action == SIG_DFL)
+ if ((prop & SA_TTYSTOP) &&
+ (p->p_pgrp->pg_jobc == 0) &&
+ (action == SIG_DFL))
return;
SIG_CONTSIGMASK(p->p_siglist);
}
SIGADDSET(p->p_siglist, sig);
mtx_lock_spin(&sched_lock);
signotify(p);
+ mtx_unlock_spin(&sched_lock);
/*
- * Defer further processing for signals which are held,
- * except that stopped processes must be continued by SIGCONT.
+ * Some signals have a process-wide effect and a per-thread
+ * component. Most processing occurs when the process next
+ * tries to cross the user boundary, however there are some
+ * times when processing needs to be done immediatly, such as
+ * waking up threads so that they can cross the user boundary.
+ * We try do the per-process part here.
*/
- if (action == SIG_HOLD && (!(prop & SA_CONT) || p->p_stat != SSTOP)) {
- mtx_unlock_spin(&sched_lock);
- return;
- }
-
- switch (p->p_stat) {
-
- case SSLEEP:
+ if (P_SHOULDSTOP(p)) {
/*
- * If process is sleeping uninterruptibly
- * we can't interrupt the sleep... the signal will
- * be noticed when the process returns through
- * trap() or syscall().
+ * The process is in stopped mode. All the threads should be
+ * either winding down or already on the suspended queue.
*/
- if ((td->td_flags & TDF_SINTR) == 0)
+ if (p->p_flag & P_TRACED) {
+ /*
+ * The traced process is already stopped,
+ * so no further action is necessary.
+ * No signal can restart us.
+ */
goto out;
+ }
+
+ if (sig == SIGKILL) {
+ /*
+ * SIGKILL sets process running.
+ * It will die elsewhere.
+ * All threads must be restarted.
+ */
+ p->p_flag &= ~P_STOPPED;
+ goto runfast;
+ }
+
+ if (prop & SA_CONT) {
+ /*
+ * If SIGCONT is default (or ignored), we continue the
+ * process but don't leave the signal in p_siglist as
+ * it has no further action. If SIGCONT is held, we
+ * continue the process and leave the signal in
+ * p_siglist. If the process catches SIGCONT, let it
+ * handle the signal itself. If it isn't waiting on
+ * an event, it goes back to run state.
+ * Otherwise, process goes back to sleep state.
+ */
+ p->p_flag &= ~P_STOPPED_SGNL;
+ if (action == SIG_DFL) {
+ SIGDELSET(p->p_siglist, sig);
+ } else if (action == SIG_CATCH) {
+ /*
+ * The process wants to catch it so it needs
+ * to run at least one thread, but which one?
+ * It would seem that the answer would be to
+ * run an upcall in the next KSE to run, and
+ * deliver the signal that way. In a NON KSE
+ * process, we need to make sure that the
+ * single thread is runnable asap.
+ * XXXKSE for now however, make them all run.
+ */
+ goto runfast;
+ }
+ /*
+ * The signal is not ignored or caught.
+ */
+ mtx_lock_spin(&sched_lock);
+ thread_unsuspend(p); /* Checks if should do it. */
+ mtx_unlock_spin(&sched_lock);
+ goto out;
+ }
+
+ if (prop & SA_STOP) {
+ /*
+ * Already stopped, don't need to stop again
+ * (If we did the shell could get confused).
+ */
+ SIGDELSET(p->p_siglist, sig);
+ goto out;
+ }
+
/*
- * Process is sleeping and traced... make it runnable
- * so it can discover the signal in issignal() and stop
- * for the parent.
+ * All other kinds of signals:
+ * If a thread is sleeping interruptibly, simulate a
+ * wakeup so that when it is continued it will be made
+ * runnable and can look at the signal. However, don't make
+ * the process runnable, leave it stopped.
+ * It may run a bit until it hits a thread_suspend_check().
+ *
+ * XXXKSE I don't understand this at all.
*/
- if (p->p_flag & P_TRACED)
- goto run;
+ mtx_lock_spin(&sched_lock);
+ FOREACH_THREAD_IN_PROC(p, td) {
+ if (td->td_wchan && (td->td_flags & TDF_SINTR)) {
+ if (td->td_flags & TDF_CVWAITQ)
+ cv_waitq_remove(td);
+ else
+ unsleep(td);
+ setrunnable(td);
+ }
+ }
+ mtx_unlock_spin(&sched_lock);
+ goto out;
/*
- * If SIGCONT is default (or ignored) and process is
- * asleep, we are finished; the process should not
- * be awakened.
+ * XXXKSE What about threads that are waiting on mutexes?
+ * Shouldn't they abort too?
*/
- if ((prop & SA_CONT) && action == SIG_DFL) {
+ } else if (p->p_state == PRS_NORMAL) {
+ if (prop & SA_CONT) {
+ /*
+ * Already active, don't need to start again.
+ */
SIGDELSET(p->p_siglist, sig);
goto out;
}
@@ -1370,133 +1423,128 @@ psignal(p, sig)
if (prop & SA_STOP) {
if (action != SIG_DFL)
goto runfast;
+
/*
* If a child holding parent blocked,
* stopping could cause deadlock.
*/
if (p->p_flag & P_PPWAIT)
goto out;
- mtx_unlock_spin(&sched_lock);
SIGDELSET(p->p_siglist, sig);
p->p_xstat = sig;
PROC_LOCK(p->p_pptr);
- if ((p->p_pptr->p_procsig->ps_flag & PS_NOCLDSTOP) == 0)
+ if (!(p->p_pptr->p_procsig->ps_flag & PS_NOCLDSTOP))
psignal(p->p_pptr, SIGCHLD);
PROC_UNLOCK(p->p_pptr);
mtx_lock_spin(&sched_lock);
stop(p);
+ mtx_unlock_spin(&sched_lock);
goto out;
} else
goto runfast;
/* NOTREACHED */
+ } else {
+ /* Not in "NORMAL" state. discard the signal. */
+ SIGDELSET(p->p_siglist, sig);
+ goto out;
+ }
- case SSTOP:
- /*
- * If traced process is already stopped,
- * then no further action is necessary.
- */
- if (p->p_flag & P_TRACED)
- goto out;
+ /*
+ * The process is not stopped so we need to apply the signal to all the
+ * running threads.
+ */
- /*
- * Kill signal always sets processes running.
- */
- if (sig == SIGKILL)
- goto runfast;
+runfast:
+ FOREACH_THREAD_IN_PROC(p, td)
+ tdsignal(td, sig, action);
+ mtx_lock_spin(&sched_lock);
+ thread_unsuspend(p);
+ mtx_unlock_spin(&sched_lock);
+out:
+ /* If we jump here, sched_lock should not be owned. */
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+}
- if (prop & SA_CONT) {
- /*
- * If SIGCONT is default (or ignored), we continue the
- * process but don't leave the signal in p_siglist, as
- * it has no further action. If SIGCONT is held, we
- * continue the process and leave the signal in
- * p_siglist. If the process catches SIGCONT, let it
- * handle the signal itself. If it isn't waiting on
- * an event, then it goes back to run state.
- * Otherwise, process goes back to sleep state.
- */
- if (action == SIG_DFL)
- SIGDELSET(p->p_siglist, sig);
- if (action == SIG_CATCH)
- goto runfast;
- /*
- * XXXKSE
- * do this for each thread.
- */
- if (p->p_flag & P_KSES) {
- mtx_assert(&sched_lock,
- MA_OWNED | MA_NOTRECURSED);
- FOREACH_THREAD_IN_PROC(p, td) {
- if (td->td_wchan == NULL) {
- setrunnable(td); /* XXXKSE */
- } else {
- /* mark it as sleeping */
- }
- }
- } else {
- p->p_flag |= P_CONTINUED;
- wakeup(p->p_pptr);
- if (td->td_wchan == NULL)
- goto run;
- p->p_stat = SSLEEP;
- }
- goto out;
+/*
+ * The force of a signal has been directed against a single
+ * thread. We need to see what we can do about knocking it
+ * out of any sleep it may be in etc.
+ */
+static void
+tdsignal(struct thread *td, int sig, sig_t action)
+{
+ struct proc *p = td->td_proc;
+ register int prop;
+
+ prop = sigprop(sig);
+
+ /*
+ * Bring the priority of a process up if we want it to get
+ * killed in this lifetime.
+ * XXXKSE we should shift the priority to the thread.
+ */
+ mtx_lock_spin(&sched_lock);
+ if ((action == SIG_DFL) && (prop & SA_KILL)) {
+ if (td->td_priority > PUSER) {
+ td->td_priority = PUSER;
}
+ }
+ mtx_unlock_spin(&sched_lock);
- if (prop & SA_STOP) {
- /*
- * Already stopped, don't need to stop again.
- * (If we did the shell could get confused.)
- */
- SIGDELSET(p->p_siglist, sig);
+ /*
+ * Defer further processing for signals which are held,
+ * except that stopped processes must be continued by SIGCONT.
+ */
+ if (action == SIG_HOLD) {
+ goto out;
+ }
+ mtx_lock_spin(&sched_lock);
+ if (td->td_state == TDS_SLP) {
+ /*
+ * If thread is sleeping uninterruptibly
+ * we can't interrupt the sleep... the signal will
+ * be noticed when the process returns through
+ * trap() or syscall().
+ */
+ if ((td->td_flags & TDF_SINTR) == 0) {
+ mtx_unlock_spin(&sched_lock);
goto out;
}
-
/*
- * If process is sleeping interruptibly, then simulate a
- * wakeup so that when it is continued, it will be made
- * runnable and can look at the signal. But don't make
- * the process runnable, leave it stopped.
- * XXXKSE should we wake ALL blocked threads?
+ * Process is sleeping and traced. Make it runnable
+ * so it can discover the signal in issignal() and stop
+ * for its parent.
*/
- if (p->p_flag & P_KSES) {
- FOREACH_THREAD_IN_PROC(p, td) {
- if (td->td_wchan && (td->td_flags & TDF_SINTR)){
- if (td->td_flags & TDF_CVWAITQ)
- cv_waitq_remove(td);
- else
- unsleep(td); /* XXXKSE */
- }
- }
- } else {
- if (td->td_wchan && td->td_flags & TDF_SINTR) {
- if (td->td_flags & TDF_CVWAITQ)
- cv_waitq_remove(td);
- else
- unsleep(td); /* XXXKSE */
- }
+ if (p->p_flag & P_TRACED) {
+ p->p_flag &= ~P_STOPPED_TRACE;
+ goto run;
}
- goto out;
+ mtx_unlock_spin(&sched_lock);
+ /*
+ * If SIGCONT is default (or ignored) and process is
+ * asleep, we are finished; the process should not
+ * be awakened.
+ */
+ if ((prop & SA_CONT) && action == SIG_DFL) {
+ SIGDELSET(p->p_siglist, sig);
+ goto out;
+ }
+ goto runfast;
+ /* NOTREACHED */
- default:
+ } else {
/*
- * SRUN, SIDL, SZOMB do nothing with the signal,
+ * Other states do nothing with the signal immediatly,
* other than kicking ourselves if we are running.
* It will either never be noticed, or noticed very soon.
*/
- if (p->p_stat == SRUN) {
+ mtx_unlock_spin(&sched_lock);
+ if (td->td_state == TDS_RUNQ ||
+ td->td_state == TDS_RUNNING) {
+ signotify(td->td_proc);
#ifdef SMP
- struct kse *ke;
- struct thread *td = curthread;
-/* we should only deliver to one thread.. but which one? */
- FOREACH_KSEGRP_IN_PROC(p, kg) {
- FOREACH_KSE_IN_GROUP(kg, ke) {
- if (ke->ke_thread == td) {
- continue;
- }
- forward_signal(ke->ke_thread);
- }
- }
+ if (td->td_state == TDS_RUNNING && td != curthread)
+ forward_signal(td);
#endif
}
goto out;
@@ -1506,21 +1554,17 @@ psignal(p, sig)
runfast:
/*
* Raise priority to at least PUSER.
- * XXXKSE Should we make them all run fast?
- * Maybe just one would be enough?
*/
-
- if (FIRST_THREAD_IN_PROC(p)->td_priority > PUSER) {
- FIRST_THREAD_IN_PROC(p)->td_priority = PUSER;
+ mtx_lock_spin(&sched_lock);
+ if (td->td_priority > PUSER) {
+ td->td_priority = PUSER;
}
run:
- /* If we jump here, sched_lock has to be owned. */
mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
- setrunnable(td); /* XXXKSE */
-out:
+ setrunnable(td);
mtx_unlock_spin(&sched_lock);
- /* Once we get here, sched_lock should not be owned. */
+out:
mtx_assert(&sched_lock, MA_NOTOWNED);
}
@@ -1533,16 +1577,18 @@ out:
* by checking the pending signal masks in cursig.) The normal call
* sequence is
*
- * while (sig = cursig(curproc))
+ * while (sig = cursig(curthread))
* postsig(sig);
*/
int
-issignal(p)
- register struct proc *p;
+issignal(td)
+ struct thread *td;
{
+ struct proc *p;
sigset_t mask;
register int sig, prop;
+ p = td->td_proc;
PROC_LOCK_ASSERT(p, MA_OWNED);
for (;;) {
int traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG);
@@ -1576,6 +1622,7 @@ issignal(p)
PROC_UNLOCK(p->p_pptr);
mtx_lock_spin(&sched_lock);
stop(p);
+ td->td_state = TDS_UNQUEUED;
PROC_UNLOCK(p);
DROP_GIANT();
p->p_stats->p_ru.ru_nivcsw++;
@@ -1633,6 +1680,7 @@ issignal(p)
#endif
break; /* == ignore */
}
+#if 0
/*
* If there is a pending stop signal to process
* with default action, stop here,
@@ -1647,8 +1695,10 @@ issignal(p)
break; /* == ignore */
p->p_xstat = sig;
PROC_LOCK(p->p_pptr);
- if ((p->p_pptr->p_procsig->ps_flag & PS_NOCLDSTOP) == 0)
+ if ((p->p_pptr->p_procsig->ps_flag &
+ PS_NOCLDSTOP) == 0) {
psignal(p->p_pptr, SIGCHLD);
+ }
PROC_UNLOCK(p->p_pptr);
mtx_lock_spin(&sched_lock);
stop(p);
@@ -1660,7 +1710,9 @@ issignal(p)
PICKUP_GIANT();
PROC_LOCK(p);
break;
- } else if (prop & SA_IGNORE) {
+ } else
+#endif
+ if (prop & SA_IGNORE) {
/*
* Except for SIGCONT, shouldn't get here.
* Default action is to ignore; drop it.
@@ -1706,7 +1758,7 @@ stop(p)
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_assert(&sched_lock, MA_OWNED);
- p->p_stat = SSTOP;
+ p->p_flag |= P_STOPPED_SGNL;
p->p_flag &= ~P_WAITED;
wakeup(p->p_pptr);
}
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
index 5e32eee..c63091c 100644
--- a/sys/kern/kern_subr.c
+++ b/sys/kern/kern_subr.c
@@ -538,7 +538,6 @@ uio_yield()
mtx_lock_spin(&sched_lock);
DROP_GIANT();
td->td_priority = td->td_ksegrp->kg_user_pri; /* XXXKSE */
- setrunqueue(td);
td->td_proc->p_stats->p_ru.ru_nivcsw++;
mi_switch();
mtx_unlock_spin(&sched_lock);
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index 2b531c0..40d3ef8 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -26,6 +26,69 @@
* $FreeBSD$
*/
+/***
+
+Here is the logic..
+
+If there are N processors, then there are at most N KSEs (kernel
+schedulable entities) working to process threads that belong to a
+KSEGOUP (kg). If there are X of these KSEs actually running at the
+moment in question, then there are at most M (N-X) of these KSEs on
+the run queue, as running KSEs are not on the queue.
+
+Runnable threads are queued off the KSEGROUP in priority order.
+If there are M or more threads runnable, the top M threads
+(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
+their priority from those threads and are put on the run queue.
+
+The last thread that had a priority high enough to have a KSE associated
+with it, AND IS ON THE RUN QUEUE is pointed to by
+kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
+assigned as all the available KSEs are activly running, or because there
+are no threads queued, that pointer is NULL.
+
+When a KSE is removed from the run queue to become runnable, we know
+it was associated with the highest priority thread in the queue (at the head
+of the queue). If it is also the last assigned we know M was 1 and must
+now be 0. Since the thread is no longer queued that pointer must be
+removed from it. Since we know there were no more KSEs available,
+(M was 1 and is now 0) and since we are not FREEING our KSE
+but using it, we know there are STILL no more KSEs available, we can prove
+that the next thread in the ksegrp list will not have a KSE to assign to
+it, so we can show that the pointer must be made 'invalid' (NULL).
+
+The pointer exists so that when a new thread is made runnable, it can
+have its priority compared with the last assigned thread to see if
+it should 'steal' its KSE or not.. i.e. is it 'earlier'
+on the list than that thread or later.. If it's earlier, then the KSE is
+removed from the last assigned (which is now not assigned a KSE)
+and reassigned to the new thread, which is placed earlier in the list.
+The pointer is then backed up to the previous thread (which may or may not
+be the new thread).
+
+When a thread sleeps or is removed, the KSE becomes available and if there
+are queued threads that are not assigned KSEs, the highest priority one of
+them is assigned the KSE, which is then placed back on the run queue at
+the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
+to point to it.
+
+The following diagram shows 2 KSEs and 3 threads from a single process.
+
+ RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads)
+ \ \____
+ \ \
+ KSEGROUP---thread--thread--thread (queued in priority order)
+ \ /
+ \_______________/
+ (last_assigned)
+
+The result of this scheme is that the M available KSEs are always
+queued at the priorities they have inherrited from the M highest priority
+threads for that KSEGROUP. If this situation changes, the KSEs are
+reassigned to keep this true.
+
+*/
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -44,34 +107,442 @@ CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
static struct runq runq;
SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
+static void runq_readjust(struct runq *rq, struct kse *ke);
+/************************************************************************
+ * Functions that manipulate runnability from a thread perspective. *
+ ************************************************************************/
+
/*
- * Wrappers which implement old interface; act on global run queue.
+ * Select the KSE that will be run next. From that find the thread, and x
+ * remove it from the KSEGRP's run queue. If there is thread clustering,
+ * this will be what does it.
*/
-
struct thread *
choosethread(void)
{
- return (runq_choose(&runq)->ke_thread);
+ struct kse *ke;
+ struct thread *td;
+ struct ksegrp *kg;
+
+ if ((ke = runq_choose(&runq))) {
+ td = ke->ke_thread;
+ KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
+ kg = ke->ke_ksegrp;
+ if (td->td_flags & TDF_UNBOUND) {
+ TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+ if (kg->kg_last_assigned == td)
+ if (TAILQ_PREV(td, threadqueue, td_runq)
+ != NULL)
+ printf("Yo MAMA!\n");
+ kg->kg_last_assigned = TAILQ_PREV(td,
+ threadqueue, td_runq);
+ /*
+ * If we have started running an upcall,
+ * Then TDF_UNBOUND WAS set because the thread was
+ * created without a KSE. Now that we have one,
+ * and it is our time to run, we make sure
+ * that BOUND semantics apply for the rest of
+ * the journey to userland, and into the UTS.
+ */
+#ifdef NOTYET
+ if (td->td_flags & TDF_UPCALLING)
+ tdf->td_flags &= ~TDF_UNBOUND;
+#endif
+ }
+ kg->kg_runnable--;
+ CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
+ td, td->td_priority);
+ } else {
+ /* Pretend the idle thread was on the run queue. */
+ td = PCPU_GET(idlethread);
+ /* Simulate that it was on the run queue */
+ td->td_state = TDS_RUNQ;
+ td->td_kse->ke_state = KES_UNQUEUED;
+ CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
+ }
+ thread_sanity_check(td);
+ return (td);
+}
+
+/*
+ * Given a KSE (now surplus), either assign a new runable thread to it
+ * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
+ * Assumes the kse is not linked to any threads any more. (has been cleaned).
+ */
+void
+kse_reassign(struct kse *ke)
+{
+ struct ksegrp *kg;
+ struct thread *td;
+
+ kg = ke->ke_ksegrp;
+
+KASSERT((ke->ke_state != KES_ONRUNQ), ("kse_reassigning non-free kse"));
+ /*
+ * Find the first unassigned thread
+ * If there is a 'last assigned' then see what's next.
+ * otherwise look at what is first.
+ */
+ if ((td = kg->kg_last_assigned)) {
+ td = TAILQ_NEXT(td, td_runq);
+ } else {
+ td = TAILQ_FIRST(&kg->kg_runq);
+ }
+
+ /*
+ * If we found one assign it the kse, otherwise idle the kse.
+ */
+ if (td) {
+ thread_sanity_check(td);
+ kg->kg_last_assigned = td;
+ td->td_kse = ke;
+ ke->ke_thread = td;
+ runq_add(&runq, ke);
+ CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
+ } else {
+ KASSERT((ke->ke_state != KES_IDLE), ("kse already idle"));
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ ke->ke_state = KES_IDLE;
+ ke->ke_thread = NULL;
+ TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses++;
+ CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self2!"));
+ }
}
int
-procrunnable(void)
+kserunnable(void)
{
return runq_check(&runq);
}
+/*
+ * Remove a thread from its KSEGRP's run queue.
+ * This in turn may remove it from a KSE if it was already assigned
+ * to one, possibly causing a new thread to be assigned to the KSE
+ * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
+ */
void
remrunqueue(struct thread *td)
{
- runq_remove(&runq, td->td_kse);
+ struct thread *td2, *td3;
+ struct ksegrp *kg;
+ struct kse *ke;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ thread_sanity_check(td);
+ KASSERT ((td->td_state == TDS_RUNQ),
+ ("remrunqueue: Bad state on run queue"));
+ kg = td->td_ksegrp;
+ ke = td->td_kse;
+ /*
+ * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
+ * threads are BOUND.
+ */
+ CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
+ td->td_state = TDS_UNQUEUED;
+ kg->kg_runnable--;
+ if ((td->td_flags & TDF_UNBOUND) == 0) {
+ /* Bring its kse with it, leave the thread attached */
+ runq_remove(&runq, ke);
+ ke->ke_state = KES_UNQUEUED;
+ return;
+ }
+ if (ke) {
+ /*
+ * This thread has been assigned to a KSE.
+ * We need to dissociate it and try assign the
+ * KSE to the next available thread. Then, we should
+ * see if we need to move the KSE in the run queues.
+ */
+ td2 = kg->kg_last_assigned;
+ KASSERT((td2 != NULL), ("last assigned has wrong value "));
+ td->td_kse = NULL;
+ if ((td3 = TAILQ_NEXT(td2, td_runq))) {
+ KASSERT(td3 != td, ("td3 somehow matched td"));
+ /*
+ * Give the next unassigned thread to the KSE
+ * so the number of runnable KSEs remains
+ * constant.
+ */
+ td3->td_kse = ke;
+ ke->ke_thread = td3;
+ kg->kg_last_assigned = td3;
+ runq_readjust(&runq, ke);
+ } else {
+ /*
+ * There is no unassigned thread.
+ * If we were the last assigned one,
+ * adjust the last assigned pointer back
+ * one, which may result in NULL.
+ */
+ if (td == td2) {
+ kg->kg_last_assigned =
+ TAILQ_PREV(td, threadqueue, td_runq);
+ }
+ runq_remove(&runq, ke);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ KASSERT((ke->ke_state != KES_IDLE),
+ ("kse already idle"));
+ ke->ke_state = KES_IDLE;
+ ke->ke_thread = NULL;
+KASSERT((TAILQ_FIRST(&kg->kg_iq) != ke), ("really bad screwup"));
+ TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses++;
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self2!"));
+ }
+ }
+ TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+ thread_sanity_check(td);
}
+#if 1 /* use the first version */
+
void
setrunqueue(struct thread *td)
{
- runq_add(&runq, td->td_kse);
+ struct kse *ke;
+ struct ksegrp *kg;
+ struct thread *td2;
+ struct thread *tda;
+
+ CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
+ mtx_assert(&sched_lock, MA_OWNED);
+ thread_sanity_check(td);
+ KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
+ td->td_state = TDS_RUNQ;
+ kg = td->td_ksegrp;
+ kg->kg_runnable++;
+ if ((td->td_flags & TDF_UNBOUND) == 0) {
+ KASSERT((td->td_kse != NULL),
+ ("queueing BAD thread to run queue"));
+ /*
+ * Common path optimisation: Only one of everything
+ * and the KSE is always already attached.
+ * Totally ignore the ksegrp run queue.
+ */
+ runq_add(&runq, td->td_kse);
+ return;
+ }
+ /*
+ * Ok, so we are threading with this thread.
+ * We don't have a KSE, see if we can get one..
+ */
+ tda = kg->kg_last_assigned;
+ if ((ke = td->td_kse) == NULL) {
+ /*
+ * We will need a KSE, see if there is one..
+ * First look for a free one, before getting desperate.
+ * If we can't get one, our priority is not high enough..
+ * that's ok..
+ */
+ if (kg->kg_idle_kses) {
+ /*
+ * There is a free one so it's ours for the asking..
+ */
+ ke = TAILQ_FIRST(&kg->kg_iq);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self3!"));
+ TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
+ ke->ke_state = KES_UNQUEUED;
+ kg->kg_idle_kses--;
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self4!"));
+ } else if (tda && (tda->td_priority > td->td_priority)) {
+ /*
+ * None free, but there is one we can commandeer.
+ */
+ ke = tda->td_kse;
+ tda->td_kse = NULL;
+ ke->ke_thread = NULL;
+ tda = kg->kg_last_assigned =
+ TAILQ_PREV(tda, threadqueue, td_runq);
+ runq_remove(&runq, ke);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self5!"));
+ }
+ } else {
+ KASSERT(ke->ke_thread == td, ("KSE/thread mismatch"));
+ KASSERT(ke->ke_state != KES_IDLE, ("KSE unexpectedly idle"));
+ ke->ke_thread = NULL;
+ td->td_kse = NULL;
+ }
+
+ /*
+ * Add the thread to the ksegrp's run queue at
+ * the appropriate place.
+ */
+ TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
+ if (td2->td_priority > td->td_priority) {
+ TAILQ_INSERT_BEFORE(td2, td, td_runq);
+ break;
+ }
+ }
+ if (td2 == NULL) {
+ /* We ran off the end of the TAILQ or it was empty. */
+ TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
+ }
+
+ /*
+ * If we have a ke to use, then put it on the run queue and
+ * If needed, readjust the last_assigned pointer.
+ */
+ if (ke) {
+ if (tda == NULL) {
+ /*
+ * No pre-existing last assigned so whoever is first
+ * gets the KSE we borught in.. (may be us)
+ */
+ td2 = TAILQ_FIRST(&kg->kg_runq);
+ KASSERT((td2->td_kse == NULL),
+ ("unexpected ke present"));
+ td2->td_kse = ke;
+ ke->ke_thread = td2;
+ kg->kg_last_assigned = td2;
+ } else if (tda->td_priority > td->td_priority) {
+ /*
+ * It's ours, grab it, but last_assigned is past us
+ * so don't change it.
+ */
+ td->td_kse = ke;
+ ke->ke_thread = td;
+ } else {
+ /*
+ * We are past last_assigned, so
+ * put the new kse on whatever is next,
+ * which may or may not be us.
+ */
+ td2 = TAILQ_NEXT(tda, td_runq);
+ kg->kg_last_assigned = td2;
+ td2->td_kse = ke;
+ ke->ke_thread = td2;
+ }
+ runq_add(&runq, ke);
+ }
+ thread_sanity_check(td);
}
+#else
+
+void
+setrunqueue(struct thread *td)
+{
+ struct kse *ke;
+ struct ksegrp *kg;
+ struct thread *td2;
+
+ CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
+ KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
+ td->td_state = TDS_RUNQ;
+ kg = td->td_ksegrp;
+ kg->kg_runnable++;
+ if ((td->td_flags & TDF_UNBOUND) == 0) {
+ /*
+ * Common path optimisation: Only one of everything
+ * and the KSE is always already attached.
+ * Totally ignore the ksegrp run queue.
+ */
+ runq_add(&runq, td->td_kse);
+ return;
+ }
+ /*
+ * First add the thread to the ksegrp's run queue at
+ * the appropriate place.
+ */
+ TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
+ if (td2->td_priority > td->td_priority) {
+ TAILQ_INSERT_BEFORE(td2, td, td_runq);
+ break;
+ }
+ }
+ if (td2 == NULL) {
+ /* We ran off the end of the TAILQ or it was empty. */
+ TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
+ }
+
+ /*
+ * The following could be achieved by simply doing:
+ * td->td_kse = NULL; kse_reassign(ke);
+ * but I felt that I'd try do it inline here.
+ * All this work may not be worth it.
+ */
+ if ((ke = td->td_kse)) { /* XXXKSE */
+ /*
+ * We have a KSE already. See whether we can keep it
+ * or if we need to give it to someone else.
+ * Either way it will need to be inserted into
+ * the runq. kse_reassign() will do this as will runq_add().
+ */
+ if ((kg->kg_last_assigned) &&
+ (kg->kg_last_assigned->td_priority > td->td_priority)) {
+ /*
+ * We can definitly keep the KSE
+ * as the "last assignead thread" has
+ * less priority than we do.
+ * The "last assigned" pointer stays the same.
+ */
+ runq_add(&runq, ke);
+ return;
+
+ }
+ /*
+ * Give it to the correct thread,
+ * which may be (often is) us, but may not be.
+ */
+ td->td_kse = NULL;
+ kse_reassign(ke);
+ return;
+ }
+ /*
+ * There are two cases where KSE adjustment is needed.
+ * Usurpation of an already assigned KSE, and assignment
+ * of a previously IDLE KSE.
+ */
+ if (kg->kg_idle_kses) {
+ /*
+ * If there are unassigned KSEs then we definitly
+ * will be assigned one from the idle KSE list.
+ * If we are the last, we should get the "last
+ * assigned" pointer set to us as well.
+ */
+ ke = TAILQ_FIRST(&kg->kg_iq);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
+ ke->ke_state = KES_UNQUEUED;
+ kg->kg_idle_kses--;
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ ke->ke_thread = td;
+ td->td_kse = ke;
+ runq_add(&runq, ke);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ if (TAILQ_NEXT(td, td_runq) == NULL) {
+ kg->kg_last_assigned = td;
+ }
+ } else if (kg->kg_last_assigned &&
+ (kg->kg_last_assigned->td_priority > td->td_priority)) {
+ /*
+ * If there were none last-assigned, all KSEs
+ * are actually out running as we speak.
+ * If there was a last assigned, but we didn't see it,
+ * we must be inserting before it, so take the KSE from
+ * the last assigned, and back it up one entry. Then,
+ * assign the KSE to the new thread and adjust its priority.
+ */
+ td2 = kg->kg_last_assigned;
+ ke = td2->td_kse;
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ kg->kg_last_assigned =
+ TAILQ_PREV(td2, threadqueue, td_runq);
+ td2->td_kse = NULL;
+ td->td_kse = ke;
+ ke->ke_thread = td;
+ runq_readjust(&runq, ke);
+KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
+ }
+}
+#endif
+
+/************************************************************************
+ * Critical section marker functions *
+ ************************************************************************/
/* Critical sections that prevent preemption. */
void
critical_enter(void)
@@ -98,6 +569,23 @@ critical_exit(void)
}
}
+
+/************************************************************************
+ * SYSTEM RUN QUEUE manipulations and tests *
+ ************************************************************************/
+/*
+ * Initialize a run structure.
+ */
+void
+runq_init(struct runq *rq)
+{
+ int i;
+
+ bzero(rq, sizeof *rq);
+ for (i = 0; i < RQ_NQS; i++)
+ TAILQ_INIT(&rq->rq_queues[i]);
+}
+
/*
* Clear the status bit of the queue corresponding to priority level pri,
* indicating that it is empty.
@@ -156,7 +644,7 @@ runq_setbit(struct runq *rq, int pri)
}
/*
- * Add the process to the queue specified by its priority, and set the
+ * Add the KSE to the queue specified by its priority, and set the
* corresponding status bit.
*/
void
@@ -165,14 +653,16 @@ runq_add(struct runq *rq, struct kse *ke)
struct rqhead *rqh;
int pri;
-#ifdef INVARIANTS
- struct proc *p = ke->ke_proc;
-#endif
- if (ke->ke_flags & KEF_ONRUNQ)
- return;
mtx_assert(&sched_lock, MA_OWNED);
- KASSERT(p->p_stat == SRUN, ("runq_add: proc %p (%s) not SRUN",
- p, p->p_comm));
+ KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
+ KASSERT((ke->ke_thread->td_kse != NULL), ("runq_add: No KSE on thread"));
+ if (ke->ke_state == KES_ONRUNQ)
+ return;
+#if defined(INVARIANTS) && defined(DIAGNOSTIC)
+ KASSERT(ke->ke_state != KES_ONRUNQ,
+ ("runq_add: kse %p (%s) already in run queue", ke,
+ ke->ke_proc->p_comm));
+#endif
pri = ke->ke_thread->td_priority / RQ_PPQ;
ke->ke_rqindex = pri;
runq_setbit(rq, pri);
@@ -180,7 +670,8 @@ runq_add(struct runq *rq, struct kse *ke)
CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
- ke->ke_flags |= KEF_ONRUNQ;
+ ke->ke_ksegrp->kg_runq_kses++;
+ ke->ke_state = KES_ONRUNQ;
}
/*
@@ -219,43 +710,38 @@ runq_choose(struct runq *rq)
int pri;
mtx_assert(&sched_lock, MA_OWNED);
- if ((pri = runq_findbit(rq)) != -1) {
+ while ((pri = runq_findbit(rq)) != -1) {
rqh = &rq->rq_queues[pri];
ke = TAILQ_FIRST(rqh);
KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
- KASSERT(ke->ke_proc->p_stat == SRUN,
- ("runq_choose: process %d(%s) in state %d", ke->ke_proc->p_pid,
- ke->ke_proc->p_comm, ke->ke_proc->p_stat));
- CTR3(KTR_RUNQ, "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
+ CTR3(KTR_RUNQ,
+ "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
+KASSERT(ke->ke_procq.tqe_prev != NULL, ("no prev"));
+if (ke->ke_procq.tqe_next)
+ KASSERT(ke->ke_procq.tqe_next->ke_procq.tqe_prev != NULL, ("no next"));
TAILQ_REMOVE(rqh, ke, ke_procq);
+ ke->ke_ksegrp->kg_runq_kses--;
if (TAILQ_EMPTY(rqh)) {
CTR0(KTR_RUNQ, "runq_choose: empty");
runq_clrbit(rq, pri);
}
- ke->ke_flags &= ~KEF_ONRUNQ;
+
+ ke->ke_state = KES_RUNNING;
+ KASSERT((ke->ke_thread != NULL),
+ ("runq_choose: No thread on KSE"));
+ KASSERT((ke->ke_thread->td_kse != NULL),
+ ("runq_choose: No KSE on thread"));
return (ke);
}
CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
- return (PCPU_GET(idlethread)->td_kse);
+ return (NULL);
}
/*
- * Initialize a run structure.
- */
-void
-runq_init(struct runq *rq)
-{
- int i;
-
- bzero(rq, sizeof *rq);
- for (i = 0; i < RQ_NQS; i++)
- TAILQ_INIT(&rq->rq_queues[i]);
-}
-
-/*
- * Remove the process from the queue specified by its priority, and clear the
+ * Remove the KSE from the queue specified by its priority, and clear the
* corresponding status bit if the queue becomes empty.
+ * Caller must set ke->ke_state afterwards.
*/
void
runq_remove(struct runq *rq, struct kse *ke)
@@ -263,8 +749,7 @@ runq_remove(struct runq *rq, struct kse *ke)
struct rqhead *rqh;
int pri;
- if (!(ke->ke_flags & KEF_ONRUNQ))
- return;
+ KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
mtx_assert(&sched_lock, MA_OWNED);
pri = ke->ke_rqindex;
rqh = &rq->rq_queues[pri];
@@ -276,5 +761,104 @@ runq_remove(struct runq *rq, struct kse *ke)
CTR0(KTR_RUNQ, "runq_remove: empty");
runq_clrbit(rq, pri);
}
- ke->ke_flags &= ~KEF_ONRUNQ;
+ ke->ke_state = KES_UNQUEUED;
+ ke->ke_ksegrp->kg_runq_kses--;
+}
+
+static void
+runq_readjust(struct runq *rq, struct kse *ke)
+{
+
+ if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
+ runq_remove(rq, ke);
+ runq_add(rq, ke);
+ }
+}
+
+void
+thread_sanity_check(struct thread *td)
+{
+ struct proc *p;
+ struct ksegrp *kg;
+ struct kse *ke;
+ struct thread *td2;
+ unsigned int prevpri;
+ int saw_lastassigned;
+ int unassigned;
+ int assigned;
+
+ p = td->td_proc;
+ kg = td->td_ksegrp;
+ ke = td->td_kse;
+
+ if (kg != &p->p_ksegrp) {
+ panic ("wrong ksegrp");
+ }
+
+ if (ke) {
+ if (ke != &p->p_kse) {
+ panic("wrong kse");
+ }
+ if (ke->ke_thread != td) {
+ panic("wrong thread");
+ }
+ }
+
+ if ((p->p_flag & P_KSES) == 0) {
+ if (ke == NULL) {
+ panic("non KSE thread lost kse");
+ }
+ } else {
+ prevpri = 0;
+ saw_lastassigned = 0;
+ unassigned = 0;
+ assigned = 0;
+ TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
+ if (td2->td_priority < prevpri) {
+ panic("thread runqueue unosorted");
+ }
+ prevpri = td2->td_priority;
+ if (td2->td_kse) {
+ assigned++;
+ if (unassigned) {
+ panic("unassigned before assigned");
+ }
+ if (kg->kg_last_assigned == NULL) {
+ panic("lastassigned corrupt");
+ }
+ if (saw_lastassigned) {
+ panic("last assigned not last");
+ }
+ if (td2->td_kse->ke_thread != td2) {
+ panic("mismatched kse/thread");
+ }
+ } else {
+ unassigned++;
+ }
+ if (td2 == kg->kg_last_assigned) {
+ saw_lastassigned = 1;
+ if (td2->td_kse == NULL) {
+ panic("last assigned not assigned");
+ }
+ }
+ }
+ if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
+ panic("where on earth does lastassigned point?");
+ }
+ FOREACH_THREAD_IN_GROUP(kg, td2) {
+ if (((td2->td_flags & TDF_UNBOUND) == 0) &&
+ (td2->td_state == TDS_RUNQ)) {
+ assigned++;
+ if (td2->td_kse == NULL) {
+ panic ("BOUND thread with no KSE");
+ }
+ }
+ }
+#if 0
+ if ((unassigned + assigned) != kg->kg_runnable) {
+ panic("wrong number in runnable");
+ }
+#endif
+ }
}
+
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index bd1a625..a2a44ff 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -277,9 +277,13 @@ schedcpu(arg)
* with 16-bit int's (remember them?)
* overflow takes 45 days.
*/
- /* XXXKSE */
- /* if ((ke->ke_flags & KEF_ONRUNQ) == 0) */
- if (p->p_stat == SSLEEP || p->p_stat == SSTOP) {
+ /* XXXKSE **WRONG***/
+ /*
+ * the kse slptimes are not touched in wakeup
+ * because the thread may not HAVE a KSE
+ */
+ if (ke->ke_state == KES_ONRUNQ &&
+ ke->ke_state == KES_RUNNING) {
ke->ke_slptime++;
} else {
ke->ke_slptime = 0;
@@ -321,20 +325,31 @@ schedcpu(arg)
}
kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu);
resetpriority(kg);
- td = FIRST_THREAD_IN_PROC(p);
- if (td->td_priority >= PUSER &&
- (p->p_sflag & PS_INMEM)) {
- int changedqueue =
- ((td->td_priority / RQ_PPQ) !=
- (kg->kg_user_pri / RQ_PPQ));
-
- td->td_priority = kg->kg_user_pri;
- FOREACH_KSE_IN_GROUP(kg, ke) {
- if ((ke->ke_oncpu == NOCPU) &&
- (p->p_stat == SRUN) && /* XXXKSE */
- changedqueue) {
- remrunqueue(ke->ke_thread);
- setrunqueue(ke->ke_thread);
+ FOREACH_THREAD_IN_GROUP(kg, td) {
+ int changedqueue;
+ if (td->td_priority >= PUSER) {
+ /*
+ * Only change the priority
+ * of threads that are still at their
+ * user priority.
+ * XXXKSE This is problematic
+ * as we may need to re-order
+ * the threads on the KSEG list.
+ */
+ changedqueue =
+ ((td->td_priority / RQ_PPQ) !=
+ (kg->kg_user_pri / RQ_PPQ));
+
+ td->td_priority = kg->kg_user_pri;
+ if (changedqueue &&
+ td->td_state == TDS_RUNQ) {
+ /* this could be optimised */
+ remrunqueue(td);
+ td->td_priority =
+ kg->kg_user_pri;
+ setrunqueue(td);
+ } else {
+ td->td_priority = kg->kg_user_pri;
}
}
}
@@ -409,6 +424,7 @@ sleepinit(void)
* entered before msleep returns. If priority includes the PDROP
* flag the mutex is not entered before returning.
*/
+
int
msleep(ident, mtx, priority, wmesg, timo)
void *ident;
@@ -426,9 +442,48 @@ msleep(ident, mtx, priority, wmesg, timo)
if (KTRPOINT(td, KTR_CSW))
ktrcsw(1, 0);
#endif
+ KASSERT((td->td_kse != NULL), ("msleep: NULL KSE?"));
+ KASSERT((td->td_kse->ke_state == KES_RUNNING), ("msleep: kse state?"));
WITNESS_SLEEP(0, &mtx->mtx_object);
KASSERT(timo != 0 || mtx_owned(&Giant) || mtx != NULL,
("sleeping without a mutex"));
+ /*
+ * If we are capable of async syscalls and there isn't already
+ * another one ready to return, start a new thread
+ * and queue it as ready to run. Note that there is danger here
+ * because we need to make sure that we don't sleep allocating
+ * the thread (recursion here might be bad).
+ * Hence the TDF_INMSLEEP flag.
+ */
+ if (p->p_flag & P_KSES) {
+ /* Just don't bother if we are exiting
+ and not the exiting thread. */
+ if ((p->p_flag & P_WEXIT) && catch && p->p_singlethread != td)
+ return (EINTR);
+ if (td->td_mailbox && (!(td->td_flags & TDF_INMSLEEP))) {
+ /*
+ * If we have no queued work to do, then
+ * upcall to the UTS to see if it has more to do.
+ * We don't need to upcall now, just make it and
+ * queue it.
+ */
+ mtx_lock_spin(&sched_lock);
+ if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) {
+ /* Don't recurse here! */
+ KASSERT((td->td_kse->ke_state == KES_RUNNING), ("msleep: kse stateX?"));
+ td->td_flags |= TDF_INMSLEEP;
+ thread_schedule_upcall(td, td->td_kse);
+ td->td_flags &= ~TDF_INMSLEEP;
+ KASSERT((td->td_kse->ke_state == KES_RUNNING), ("msleep: kse stateY?"));
+ }
+ mtx_unlock_spin(&sched_lock);
+ }
+ KASSERT((td->td_kse != NULL), ("msleep: NULL KSE2?"));
+ KASSERT((td->td_kse->ke_state == KES_RUNNING),
+ ("msleep: kse state2?"));
+ KASSERT((td->td_kse->ke_thread == td),
+ ("msleep: kse/thread mismatch?"));
+ }
mtx_lock_spin(&sched_lock);
if (cold || panicstr) {
/*
@@ -454,7 +509,7 @@ msleep(ident, mtx, priority, wmesg, timo)
}
KASSERT(p != NULL, ("msleep1"));
- KASSERT(ident != NULL && td->td_proc->p_stat == SRUN, ("msleep"));
+ KASSERT(ident != NULL && td->td_state == TDS_RUNNING, ("msleep"));
td->td_wchan = ident;
td->td_wmesg = wmesg;
@@ -468,20 +523,23 @@ msleep(ident, mtx, priority, wmesg, timo)
callout_reset(&td->td_slpcallout, timo, endtsleep, td);
/*
* We put ourselves on the sleep queue and start our timeout
- * before calling cursig, as we could stop there, and a wakeup
- * or a SIGCONT (or both) could occur while we were stopped.
- * A SIGCONT would cause us to be marked as SSLEEP
+ * before calling thread_suspend_check, as we could stop there, and
+ * a wakeup or a SIGCONT (or both) could occur while we were stopped.
* without resuming us, thus we must be ready for sleep
* when cursig is called. If the wakeup happens while we're
* stopped, td->td_wchan will be 0 upon return from cursig.
*/
if (catch) {
- CTR3(KTR_PROC, "msleep caught: proc %p (pid %d, %s)", p,
+ CTR3(KTR_PROC, "msleep caught: thread %p (pid %d, %s)", td,
p->p_pid, p->p_comm);
td->td_flags |= TDF_SINTR;
mtx_unlock_spin(&sched_lock);
PROC_LOCK(p);
- sig = cursig(p);
+ sig = cursig(td);
+ if (thread_suspend_check(1)) {
+ sig = EINTR;
+ rval = EINTR;
+ }
mtx_lock_spin(&sched_lock);
PROC_UNLOCK(p);
if (sig != 0) {
@@ -492,13 +550,13 @@ msleep(ident, mtx, priority, wmesg, timo)
} else
sig = 0;
if (td->td_wchan != NULL) {
- td->td_proc->p_stat = SSLEEP;
p->p_stats->p_ru.ru_nvcsw++;
+ td->td_state = TDS_SLP;
mi_switch();
}
- CTR3(KTR_PROC, "msleep resume: proc %p (pid %d, %s)", td, p->p_pid,
+ CTR3(KTR_PROC, "msleep resume: thread %p (pid %d, %s)", td, p->p_pid,
p->p_comm);
- KASSERT(td->td_proc->p_stat == SRUN, ("running but not SRUN"));
+ KASSERT(td->td_state == TDS_RUNNING, ("running but not TDS_RUNNING"));
td->td_flags &= ~TDF_SINTR;
if (td->td_flags & TDF_TIMEOUT) {
td->td_flags &= ~TDF_TIMEOUT;
@@ -524,8 +582,8 @@ msleep(ident, mtx, priority, wmesg, timo)
if (rval == 0 && catch) {
PROC_LOCK(p);
- /* XXX: shouldn't we always be calling cursig() */
- if (sig != 0 || (sig = cursig(p))) {
+ /* XXX: shouldn't we always be calling cursig() */
+ if (sig != 0 || (sig = cursig(td))) {
if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
rval = EINTR;
else
@@ -571,7 +629,7 @@ endtsleep(arg)
td->td_flags &= ~TDF_TIMEOUT;
setrunqueue(td);
} else if (td->td_wchan != NULL) {
- if (td->td_proc->p_stat == SSLEEP) /* XXXKSE */
+ if (td->td_state == TDS_SLP) /* XXXKSE */
setrunnable(td);
else
unsleep(td);
@@ -583,6 +641,38 @@ endtsleep(arg)
}
/*
+ * Abort a thread, as if an interrupt had occured. Only abort
+ * interruptable waits (unfortunatly it isn't only safe to abort others).
+ * This is about identical to cv_abort().
+ * Think about merging them?
+ * Also, whatever the signal code does...
+ */
+void
+abortsleep(struct thread *td)
+{
+
+ mtx_lock_spin(&sched_lock);
+ /*
+ * If the TDF_TIMEOUT flag is set, just leave. A
+ * timeout is scheduled anyhow.
+ */
+ if ((td->td_flags & (TDF_TIMEOUT | TDF_SINTR)) == TDF_SINTR) {
+ if (td->td_wchan != NULL) {
+ if (td->td_state == TDS_SLP) { /* XXXKSE */
+ setrunnable(td);
+ } else {
+ /*
+ * Probably in a suspended state..
+ * um.. dunno XXXKSE
+ */
+ unsleep(td);
+ }
+ }
+ }
+ mtx_unlock_spin(&sched_lock);
+}
+
+/*
* Remove a process from its wait queue
*/
void
@@ -618,25 +708,24 @@ restart:
if (td->td_wchan == ident) {
TAILQ_REMOVE(qp, td, td_slpq);
td->td_wchan = NULL;
- if (td->td_proc->p_stat == SSLEEP) {
+ if (td->td_state == TDS_SLP) {
/* OPTIMIZED EXPANSION OF setrunnable(p); */
CTR3(KTR_PROC, "wakeup: thread %p (pid %d, %s)",
td, p->p_pid, p->p_comm);
if (td->td_ksegrp->kg_slptime > 1)
updatepri(td);
td->td_ksegrp->kg_slptime = 0;
- td->td_kse->ke_slptime = 0;
- td->td_proc->p_stat = SRUN;
if (p->p_sflag & PS_INMEM) {
setrunqueue(td);
maybe_resched(td);
} else {
+/* XXXKSE Wrong! */ td->td_state = TDS_RUNQ;
p->p_sflag |= PS_SWAPINREQ;
wakeup(&proc0);
}
/* END INLINE EXPANSION */
- goto restart;
}
+ goto restart;
}
}
mtx_unlock_spin(&sched_lock);
@@ -665,20 +754,19 @@ restart:
if (td->td_wchan == ident) {
TAILQ_REMOVE(qp, td, td_slpq);
td->td_wchan = NULL;
- if (td->td_proc->p_stat == SSLEEP) {
+ if (td->td_state == TDS_SLP) {
/* OPTIMIZED EXPANSION OF setrunnable(p); */
- CTR3(KTR_PROC, "wakeup1: proc %p (pid %d, %s)",
- p, p->p_pid, p->p_comm);
+ CTR3(KTR_PROC,"wakeup1: thread %p (pid %d, %s)",
+ td, p->p_pid, p->p_comm);
if (td->td_ksegrp->kg_slptime > 1)
updatepri(td);
td->td_ksegrp->kg_slptime = 0;
- td->td_kse->ke_slptime = 0;
- td->td_proc->p_stat = SRUN;
if (p->p_sflag & PS_INMEM) {
setrunqueue(td);
maybe_resched(td);
break;
} else {
+/* XXXKSE Wrong */ td->td_state = TDS_RUNQ;
p->p_sflag |= PS_SWAPINREQ;
wakeup(&proc0);
}
@@ -698,15 +786,19 @@ mi_switch()
{
struct bintime new_switchtime;
struct thread *td = curthread; /* XXX */
- register struct proc *p = td->td_proc; /* XXX */
+ struct proc *p = td->td_proc; /* XXX */
+ struct kse *ke = td->td_kse;
#if 0
register struct rlimit *rlim;
#endif
u_int sched_nest;
mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
+ KASSERT((ke->ke_state == KES_RUNNING), ("mi_switch: kse state?"));
#ifdef INVARIANTS
- if (p->p_stat != SMTX && p->p_stat != SRUN)
+ if (td->td_state != TDS_MTX &&
+ td->td_state != TDS_RUNQ &&
+ td->td_state != TDS_RUNNING)
mtx_assert(&Giant, MA_NOTOWNED);
#endif
@@ -735,7 +827,8 @@ mi_switch()
*
* XXX drop sched_lock, pickup Giant
*/
- if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY &&
+ if (p->p_state != PRS_ZOMBIE &&
+ p->p_limit->p_cpulimit != RLIM_INFINITY &&
p->p_runtime > p->p_limit->p_cpulimit) {
rlim = &p->p_rlimit[RLIMIT_CPU];
if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) {
@@ -763,17 +856,35 @@ mi_switch()
*/
cnt.v_swtch++;
PCPU_SET(switchtime, new_switchtime);
- CTR3(KTR_PROC, "mi_switch: old proc %p (pid %d, %s)", p, p->p_pid,
+ CTR3(KTR_PROC, "mi_switch: old thread %p (pid %d, %s)", td, p->p_pid,
p->p_comm);
sched_nest = sched_lock.mtx_recurse;
- td->td_lastcpu = td->td_kse->ke_oncpu;
- td->td_kse->ke_oncpu = NOCPU;
- td->td_kse->ke_flags &= ~KEF_NEEDRESCHED;
+ td->td_lastcpu = ke->ke_oncpu;
+ ke->ke_oncpu = NOCPU;
+ ke->ke_flags &= ~KEF_NEEDRESCHED;
+ /*
+ * At the last moment: if this KSE is not on the run queue,
+ * it needs to be freed correctly and the thread treated accordingly.
+ */
+ if ((td->td_state == TDS_RUNNING) &&
+ ((ke->ke_flags & KEF_IDLEKSE) == 0)) {
+ /* Put us back on the run queue (kse and all). */
+ setrunqueue(td);
+ } else if ((td->td_flags & TDF_UNBOUND) &&
+ (td->td_state != TDS_RUNQ)) { /* in case of old code */
+ /*
+ * We will not be on the run queue.
+ * Someone else can use the KSE if they need it.
+ */
+ td->td_kse = NULL;
+ kse_reassign(ke);
+ }
cpu_switch();
td->td_kse->ke_oncpu = PCPU_GET(cpuid);
+ td->td_kse->ke_state = KES_RUNNING;
sched_lock.mtx_recurse = sched_nest;
sched_lock.mtx_lock = (uintptr_t)td;
- CTR3(KTR_PROC, "mi_switch: new proc %p (pid %d, %s)", p, p->p_pid,
+ CTR3(KTR_PROC, "mi_switch: new thread %p (pid %d, %s)", td, p->p_pid,
p->p_comm);
if (PCPU_GET(switchtime.sec) == 0)
binuptime(PCPU_PTR(switchtime));
@@ -791,37 +902,42 @@ setrunnable(struct thread *td)
struct proc *p = td->td_proc;
mtx_lock_spin(&sched_lock);
- switch (p->p_stat) {
- case SZOMB: /* not a thread flag XXXKSE */
+ switch (p->p_state) {
+ case PRS_ZOMBIE:
panic("setrunnable(1)");
+ default:
+ break;
}
- switch (td->td_proc->p_stat) {
+ switch (td->td_state) {
case 0:
- case SRUN:
- case SWAIT:
+ case TDS_RUNNING:
+ case TDS_IWAIT:
default:
+ printf("state is %d", td->td_state);
panic("setrunnable(2)");
- case SSTOP:
- case SSLEEP: /* e.g. when sending signals */
+ case TDS_SUSPENDED:
+ thread_unsuspend(p);
+ break;
+ case TDS_SLP: /* e.g. when sending signals */
if (td->td_flags & TDF_CVWAITQ)
cv_waitq_remove(td);
else
unsleep(td);
- break;
-
- case SIDL:
+ case TDS_UNQUEUED: /* being put back onto the queue */
+ case TDS_NEW: /* not yet had time to suspend */
+ case TDS_RUNQ: /* not yet had time to suspend */
break;
}
- td->td_proc->p_stat = SRUN;
if (td->td_ksegrp->kg_slptime > 1)
updatepri(td);
td->td_ksegrp->kg_slptime = 0;
- td->td_kse->ke_slptime = 0;
if ((p->p_sflag & PS_INMEM) == 0) {
+ td->td_state = TDS_RUNQ; /* XXXKSE not a good idea */
p->p_sflag |= PS_SWAPINREQ;
wakeup(&proc0);
} else {
- setrunqueue(td);
+ if (td->td_state != TDS_RUNQ)
+ setrunqueue(td); /* XXXKSE */
maybe_resched(td);
}
mtx_unlock_spin(&sched_lock);
@@ -848,7 +964,7 @@ resetpriority(kg)
kg->kg_user_pri = newpriority;
}
FOREACH_THREAD_IN_GROUP(kg, td) {
- maybe_resched(td);
+ maybe_resched(td); /* XXXKSE silly */
}
mtx_unlock_spin(&sched_lock);
}
@@ -865,20 +981,21 @@ loadav(void *arg)
int i, nrun;
struct loadavg *avg;
struct proc *p;
- struct ksegrp *kg;
+ struct thread *td;
avg = &averunnable;
sx_slock(&allproc_lock);
nrun = 0;
FOREACH_PROC_IN_SYSTEM(p) {
- FOREACH_KSEGRP_IN_PROC(p, kg) {
- switch (p->p_stat) {
- case SRUN:
+ FOREACH_THREAD_IN_PROC(p, td) {
+ switch (td->td_state) {
+ case TDS_RUNQ:
+ case TDS_RUNNING:
if ((p->p_flag & P_NOLOAD) != 0)
goto nextproc;
- /* FALLTHROUGH */
- case SIDL:
- nrun++;
+ nrun++; /* XXXKSE */
+ default:
+ break;
}
nextproc:
continue;
@@ -932,19 +1049,18 @@ void
schedclock(td)
struct thread *td;
{
- struct kse *ke = td->td_kse;
- struct ksegrp *kg = td->td_ksegrp;
+ struct kse *ke;
+ struct ksegrp *kg;
- if (td) {
- ke->ke_cpticks++;
- kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1);
- if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
- resetpriority(td->td_ksegrp);
- if (td->td_priority >= PUSER)
- td->td_priority = kg->kg_user_pri;
- }
- } else {
- panic("schedclock");
+ KASSERT((td != NULL), ("schedlock: null thread pointer"));
+ ke = td->td_kse;
+ kg = td->td_ksegrp;
+ ke->ke_cpticks++;
+ kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1);
+ if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
+ resetpriority(kg);
+ if (td->td_priority >= PUSER)
+ td->td_priority = kg->kg_user_pri;
}
}
@@ -959,7 +1075,6 @@ yield(struct thread *td, struct yield_args *uap)
mtx_assert(&Giant, MA_NOTOWNED);
mtx_lock_spin(&sched_lock);
td->td_priority = PRI_MAX_TIMESHARE;
- setrunqueue(td);
kg->kg_proc->p_stats->p_ru.ru_nvcsw++;
mi_switch();
mtx_unlock_spin(&sched_lock);
diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c
index c9081c3..bbe36be 100644
--- a/sys/kern/ksched.c
+++ b/sys/kern/ksched.c
@@ -181,7 +181,18 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched,
mtx_lock_spin(&sched_lock);
rtp_to_pri(&rtp, kg);
- td->td_last_kse->ke_flags |= KEF_NEEDRESCHED; /* XXXKSE */
+ FOREACH_THREAD_IN_GROUP(kg, td) { /* XXXKSE */
+ if (td->td_state == TDS_RUNNING) {
+ td->td_kse->ke_flags |= KEF_NEEDRESCHED;
+ } else if (td->td_state == TDS_RUNQ) {
+ if (td->td_priority > kg->kg_user_pri) {
+ remrunqueue(td);
+ td->td_priority =
+ kg->kg_user_pri;
+ setrunqueue(td);
+ }
+ }
+ }
mtx_unlock_spin(&sched_lock);
}
else
@@ -203,7 +214,19 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched,
* on the scheduling code: You must leave the
* scheduling info alone.
*/
- td->td_last_kse->ke_flags |= KEF_NEEDRESCHED; /* XXXKSE */
+ FOREACH_THREAD_IN_GROUP(kg, td) {
+ if (td->td_state == TDS_RUNNING) {
+ td->td_kse->ke_flags |= KEF_NEEDRESCHED;
+ } else if (td->td_state == TDS_RUNQ) {
+ if (td->td_priority > kg->kg_user_pri) {
+ remrunqueue(td);
+ td->td_priority =
+ kg->kg_user_pri;
+ setrunqueue(td);
+ }
+ }
+
+ }
mtx_unlock_spin(&sched_lock);
}
break;
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 9dad93b..afd4c5d 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -124,8 +124,8 @@ forward_signal(struct thread *td)
* executing so that it executes ast().
*/
mtx_assert(&sched_lock, MA_OWNED);
- KASSERT(td->td_proc->p_stat == SRUN,
- ("forward_signal: process is not SRUN"));
+ KASSERT(td->td_state == TDS_RUNNING,
+ ("forward_signal: thread is not TDS_RUNNING"));
CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc);
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 3b415de..027aa9c 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -48,6 +48,8 @@
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/kse.h>
+#include <sys/ktr.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/systm.h>
@@ -71,13 +73,15 @@ userret(td, frame, oticks)
struct kse *ke = td->td_kse;
struct ksegrp *kg = td->td_ksegrp;
+ CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
+ p->p_comm);
#ifdef INVARIANTS
/* Check that we called signotify() enough. */
mtx_lock(&Giant);
PROC_LOCK(p);
mtx_lock_spin(&sched_lock);
if (SIGPENDING(p) && ((p->p_sflag & PS_NEEDSIGCHK) == 0 ||
- (p->p_kse.ke_flags & KEF_ASTPENDING) == 0))
+ (ke->ke_flags & KEF_ASTPENDING) == 0))
printf("failed to set signal flags proprly for ast()\n");
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
@@ -100,6 +104,22 @@ userret(td, frame, oticks)
}
/*
+ * We need to check to see if we have to exit or wait due to a
+ * single threading requirement or some other STOP condition.
+ */
+ PROC_LOCK(p);
+ thread_suspend_check(0); /* Can suspend or kill */
+ PROC_UNLOCK(p);
+
+ /*
+ * DO special thread processing, e.g. upcall tweaking and such
+ */
+ if (p->p_flag & P_KSES) {
+ thread_userret(p, kg, ke, td, frame);
+ /* printf("KSE thread returned"); */
+ }
+
+ /*
* Charge system time if profiling.
*
* XXX should move PS_PROFIL to a place that can obviously be
@@ -121,8 +141,7 @@ userret(td, frame, oticks)
* This function will return with preemption disabled.
*/
void
-ast(framep)
- struct trapframe *framep;
+ast(struct trapframe *framep)
{
struct thread *td = curthread;
struct proc *p = td->td_proc;
@@ -136,6 +155,8 @@ ast(framep)
int ucode;
#endif
+ CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid,
+ p->p_comm);
KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
#ifdef WITNESS
if (witness_list(td))
@@ -164,6 +185,13 @@ ast(framep)
p->p_stats->p_prof.pr_ticks = 0;
}
mtx_unlock_spin(&sched_lock);
+ /*
+ * XXXKSE While the fact that we owe a user profiling
+ * tick is stored per KSE in this code, the statistics
+ * themselves are still stored per process.
+ * This should probably change, by which I mean that
+ * possibly the location of both might change.
+ */
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
@@ -192,14 +220,13 @@ ast(framep)
if (flags & KEF_NEEDRESCHED) {
mtx_lock_spin(&sched_lock);
td->td_priority = kg->kg_user_pri;
- setrunqueue(td);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
mtx_unlock_spin(&sched_lock);
}
if (sflag & PS_NEEDSIGCHK) {
PROC_LOCK(p);
- while ((sig = cursig(p)) != 0)
+ while ((sig = cursig(td)) != 0)
postsig(sig);
PROC_UNLOCK(p);
}
diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
index 08bca8d..c2e79d0 100644
--- a/sys/kern/subr_turnstile.c
+++ b/sys/kern/subr_turnstile.c
@@ -119,23 +119,20 @@ propagate_priority(struct thread *td)
return;
}
+ KASSERT(td->td_state != TDS_SURPLUS, ("Mutex owner SURPLUS"));
+ MPASS(td->td_proc != NULL);
MPASS(td->td_proc->p_magic == P_MAGIC);
- KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex"));
+ KASSERT(td->td_state != TDS_SLP,
+ ("sleeping thread owns a mutex"));
if (td->td_priority <= pri) /* lower is higher priority */
return;
- /*
- * Bump this thread's priority.
- */
- td->td_priority = pri;
/*
* If lock holder is actually running, just bump priority.
*/
- if (thread_running(td)) {
- MPASS(td->td_proc->p_stat == SRUN
- || td->td_proc->p_stat == SZOMB
- || td->td_proc->p_stat == SSTOP);
+ if (td->td_state == TDS_RUNNING) {
+ td->td_priority = pri;
return;
}
@@ -151,20 +148,26 @@ propagate_priority(struct thread *td)
* If on run queue move to new run queue, and quit.
* XXXKSE this gets a lot more complicated under threads
* but try anyhow.
+ * We should have a special call to do this more efficiently.
*/
- if (td->td_proc->p_stat == SRUN) {
+ if (td->td_state == TDS_RUNQ) {
MPASS(td->td_blocked == NULL);
remrunqueue(td);
+ td->td_priority = pri;
setrunqueue(td);
return;
}
+ /*
+ * Adjust for any other cases.
+ */
+ td->td_priority = pri;
/*
* If we aren't blocked on a mutex, we should be.
*/
- KASSERT(td->td_proc->p_stat == SMTX, (
+ KASSERT(td->td_state == TDS_MTX, (
"process %d(%s):%d holds %s but isn't blocked on a mutex\n",
- td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat,
+ td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
m->mtx_object.lo_name));
/*
@@ -590,7 +593,7 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
*/
td->td_blocked = m;
td->td_mtxname = m->mtx_object.lo_name;
- td->td_proc->p_stat = SMTX;
+ td->td_state = TDS_MTX;
propagate_priority(td);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
@@ -727,7 +730,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
m, td1);
td1->td_blocked = NULL;
- td1->td_proc->p_stat = SRUN;
setrunqueue(td1);
if (td->td_critnest == 1 && td1->td_priority < pri) {
@@ -744,7 +746,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
}
}
#endif
- setrunqueue(td);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR2(KTR_LOCK,
"_mtx_unlock_sleep: %p switching out lock=%p", m,
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index 182221d..02b3a0d 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -225,6 +225,7 @@ static struct witness_order_list_entry order_lists[] = {
#endif
{ "clk", &lock_class_mtx_spin },
{ "mutex profiling lock", &lock_class_mtx_spin },
+ { "zombie_thread_lock", &lock_class_mtx_spin },
{ NULL, NULL },
{ NULL, NULL }
};
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 1bdd913..d8fba59 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1187,7 +1187,7 @@ selwakeup(sip)
sip->si_thread = NULL;
mtx_lock_spin(&sched_lock);
if (td->td_wchan == (caddr_t)&selwait) {
- if (td->td_proc->p_stat == SSLEEP)
+ if (td->td_state == TDS_SLP)
setrunnable(td);
else
cv_waitq_remove(td);
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index dacb9d9..ab6f1e8 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -467,7 +467,7 @@ ptrace(struct thread *td, struct ptrace_args *uap)
}
/* not currently stopped */
- if (p->p_stat != SSTOP || (p->p_flag & P_WAITED) == 0) {
+ if (!P_SHOULDSTOP(p) || (p->p_flag & P_WAITED) == 0) {
error = EBUSY;
goto fail;
}
@@ -566,10 +566,12 @@ ptrace(struct thread *td, struct ptrace_args *uap)
if (proctree_locked)
sx_xunlock(&proctree_lock);
/* deliver or queue signal */
- if (p->p_stat == SSTOP) {
+ if (P_SHOULDSTOP(p)) {
p->p_xstat = uap->data;
mtx_lock_spin(&sched_lock);
+ p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SGNL);
setrunnable(td2); /* XXXKSE */
+ /* Need foreach kse in proc, ... make_kse_queued(). */
mtx_unlock_spin(&sched_lock);
} else if (uap->data)
psignal(p, uap->data);
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index d8115fb..15a5d7c 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -552,7 +552,7 @@
381 STD BSD { int kse_new(struct kse_mailbox * mbx, \
int new_grp_flag); }
382 STD BSD { int thread_wakeup(struct thread_mailbox *tmbx); }
-383 STD BSD { int kse_yield(void); }
+383 MSTD BSD { int kse_yield(void); }
384 UNIMPL BSD __mac_get_proc
385 UNIMPL BSD __mac_set_proc
386 UNIMPL BSD __mac_get_fd
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
index b9c5743..6c915e1 100644
--- a/sys/kern/tty.c
+++ b/sys/kern/tty.c
@@ -2392,17 +2392,35 @@ ttyinfo(struct tty *tp)
PGRP_UNLOCK(tp->t_pgrp);
td = FIRST_THREAD_IN_PROC(pick);
- stmp = pick->p_stat == SRUN ? "running" : /* XXXKSE */
- pick->p_stat == SMTX ? td->td_mtxname :
- td->td_wmesg ? td->td_wmesg : "iowait";
+ if (pick->p_flag & P_KSES) {
+ stmp = "KSE" ; /* XXXKSE */
+ } else {
+ if (td) {
+ if (td->td_state == TDS_RUNQ) {
+ stmp = "running";
+ } else if (td->td_state == TDS_MTX) {
+ stmp = td->td_mtxname;
+ } else if (td->td_wmesg) {
+ stmp = td->td_wmesg;
+ } else {
+ stmp = "iowait";
+ }
+ } else {
+ stmp = "threadless";
+ panic("ttyinfo: no thread!?");
+ }
+ }
calcru(pick, &utime, &stime, NULL);
- ltmp = pick->p_stat == SIDL || pick->p_stat == SWAIT ||
- pick->p_stat == SZOMB ? 0 :
- pgtok(vmspace_resident_count(pick->p_vmspace));
+ ltmp = ((pick->p_state == PRS_NEW)
+ || (td && (td->td_state == TDS_IWAIT))
+ || (pick->p_state == PRS_ZOMBIE ? 0 :
+ pgtok(vmspace_resident_count(pick->p_vmspace))));
mtx_unlock_spin(&sched_lock);
ttyprintf(tp, " cmd: %s %d [%s%s] ", pick->p_comm,
- pick->p_pid, pick->p_stat == SMTX ? "*" : "", stmp);
+ pick->p_pid,
+ td->td_state == TDS_MTX ? "*" : "",
+ stmp);
/* Print user time. */
ttyprintf(tp, "%ld.%02ldu ",
@@ -2433,7 +2451,19 @@ ttyinfo(struct tty *tp)
* we pick out just "short-term" sleepers (P_SINTR == 0).
* 4) Further ties are broken by picking the highest pid.
*/
-#define ISRUN(p) (((p)->p_stat == SRUN) || ((p)->p_stat == SIDL))
+#define ISRUN(p, val) \
+do { \
+ struct thread *td; \
+ val = 0; \
+ FOREACH_THREAD_IN_PROC(p, td) { \
+ if (td->td_state == TDS_RUNQ || \
+ td->td_state == TDS_RUNNING) { \
+ val = 1; \
+ break; \
+ } \
+ } \
+} while (0)
+
#define TESTAB(a, b) ((a)<<1 | (b))
#define ONLYA 2
#define ONLYB 1
@@ -2449,10 +2479,13 @@ proc_compare(struct proc *p1, struct proc *p2)
if (p1 == NULL)
return (1);
+ ISRUN(p1, esta);
+ ISRUN(p2, estb);
+
/*
* see if at least one of them is runnable
*/
- switch (TESTAB(ISRUN(p1), ISRUN(p2))) {
+ switch (TESTAB(esta, estb)) {
case ONLYA:
return (0);
case ONLYB:
@@ -2477,7 +2510,7 @@ proc_compare(struct proc *p1, struct proc *p2)
/*
* weed out zombies
*/
- switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) {
+ switch (TESTAB(p1->p_state == PRS_ZOMBIE, p2->p_state == PRS_ZOMBIE)) {
case ONLYA:
return (1);
case ONLYB:
diff --git a/sys/posix4/ksched.c b/sys/posix4/ksched.c
index c9081c3..bbe36be 100644
--- a/sys/posix4/ksched.c
+++ b/sys/posix4/ksched.c
@@ -181,7 +181,18 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched,
mtx_lock_spin(&sched_lock);
rtp_to_pri(&rtp, kg);
- td->td_last_kse->ke_flags |= KEF_NEEDRESCHED; /* XXXKSE */
+ FOREACH_THREAD_IN_GROUP(kg, td) { /* XXXKSE */
+ if (td->td_state == TDS_RUNNING) {
+ td->td_kse->ke_flags |= KEF_NEEDRESCHED;
+ } else if (td->td_state == TDS_RUNQ) {
+ if (td->td_priority > kg->kg_user_pri) {
+ remrunqueue(td);
+ td->td_priority =
+ kg->kg_user_pri;
+ setrunqueue(td);
+ }
+ }
+ }
mtx_unlock_spin(&sched_lock);
}
else
@@ -203,7 +214,19 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched,
* on the scheduling code: You must leave the
* scheduling info alone.
*/
- td->td_last_kse->ke_flags |= KEF_NEEDRESCHED; /* XXXKSE */
+ FOREACH_THREAD_IN_GROUP(kg, td) {
+ if (td->td_state == TDS_RUNNING) {
+ td->td_kse->ke_flags |= KEF_NEEDRESCHED;
+ } else if (td->td_state == TDS_RUNQ) {
+ if (td->td_priority > kg->kg_user_pri) {
+ remrunqueue(td);
+ td->td_priority =
+ kg->kg_user_pri;
+ setrunqueue(td);
+ }
+ }
+
+ }
mtx_unlock_spin(&sched_lock);
}
break;
diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c
index 4f47a75..eee4abc 100644
--- a/sys/sparc64/sparc64/genassym.c
+++ b/sys/sparc64/sparc64/genassym.c
@@ -232,6 +232,8 @@ ASSYM(TD_KSE, offsetof(struct thread, td_kse));
ASSYM(TD_KSTACK, offsetof(struct thread, td_kstack));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
+ASSYM(TD_STATE, offsetof(struct thread, td_state));
+ASSYM(TDS_RUNNING, TDS_RUNNING);
ASSYM(PCB_SIZEOF, sizeof(struct pcb));
ASSYM(PCB_FPSTATE, offsetof(struct pcb, pcb_fpstate));
diff --git a/sys/sparc64/sparc64/swtch.S b/sys/sparc64/sparc64/swtch.S
index 429e961..a8a753a 100644
--- a/sys/sparc64/sparc64/swtch.S
+++ b/sys/sparc64/sparc64/swtch.S
@@ -109,6 +109,9 @@ ENTRY(cpu_switch)
stx %o0, [PCPU(CURTHREAD)]
stx %o1, [PCPU(CURPCB)]
+ mov TDS_RUNNING, %o2
+ stw %o2, [%o0 + TD_STATE]
+
SET(sched_lock, %o3, %o2)
stx %o0, [%o2 + MTX_LOCK]
diff --git a/sys/sparc64/sparc64/swtch.s b/sys/sparc64/sparc64/swtch.s
index 429e961..a8a753a 100644
--- a/sys/sparc64/sparc64/swtch.s
+++ b/sys/sparc64/sparc64/swtch.s
@@ -109,6 +109,9 @@ ENTRY(cpu_switch)
stx %o0, [PCPU(CURTHREAD)]
stx %o1, [PCPU(CURPCB)]
+ mov TDS_RUNNING, %o2
+ stw %o2, [%o0 + TD_STATE]
+
SET(sched_lock, %o3, %o2)
stx %o0, [%o2 + MTX_LOCK]
diff --git a/sys/sparc64/sparc64/trap.c b/sys/sparc64/sparc64/trap.c
index 61e3b44..f39d2f6 100644
--- a/sys/sparc64/sparc64/trap.c
+++ b/sys/sparc64/sparc64/trap.c
@@ -49,6 +49,7 @@
#include <sys/bus.h>
#include <sys/interrupt.h>
#include <sys/ktr.h>
+#include <sys/kse.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/systm.h>
@@ -190,6 +191,11 @@ trap(struct trapframe *tf)
td->td_frame = tf;
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
+ if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
+ mtx_lock_spin(&sched_lock);
+ thread_exit();
+ /* NOTREACHED */
+ }
} else {
sticks = 0;
if ((type & ~T_KERNEL) != T_BREAKPOINT)
@@ -528,6 +534,23 @@ syscall(struct trapframe *tf)
td->td_frame = tf;
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
+ if (p->p_flag & P_KSES) {
+ /*
+ * If we are doing a syscall in a KSE environment,
+ * note where our mailbox is. There is always the
+ * possibility that we could do this lazily (in sleep()),
+ * but for now do it every time.
+ */
+ td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox
+ + offsetof(struct kse_mailbox, kmbx_current_thread));
+ if ((td->td_mailbox == NULL) ||
+ (td->td_mailbox == (void *)-1)) {
+ td->td_mailbox = NULL; /* single thread it.. */
+ td->td_flags &= ~TDF_UNBOUND;
+ } else {
+ td->td_flags |= TDF_UNBOUND;
+ }
+ }
code = tf->tf_global[1];
/*
@@ -634,17 +657,17 @@ syscall(struct trapframe *tf)
}
/*
- * Handle reschedule and other end-of-syscall issues
- */
- userret(td, tf, sticks);
-
- /*
* Release Giant if we had to get it. Don't use mtx_owned(),
* we want to catch broken syscalls.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0)
mtx_unlock(&Giant);
+ /*
+ * Handle reschedule and other end-of-syscall issues
+ */
+ userret(td, tf, sticks);
+
#ifdef KTRACE
if (KTRPOINT(td, KTR_SYSRET))
ktrsysret(code, error, td->td_retval[0]);
diff --git a/sys/sparc64/sparc64/vm_machdep.c b/sys/sparc64/sparc64/vm_machdep.c
index a896754..8282e93 100644
--- a/sys/sparc64/sparc64/vm_machdep.c
+++ b/sys/sparc64/sparc64/vm_machdep.c
@@ -108,6 +108,42 @@ cpu_sched_exit(struct thread *td)
}
}
+void
+cpu_thread_exit(struct thread *td)
+{
+}
+
+void
+cpu_thread_setup(struct thread *td)
+{
+}
+
+void
+cpu_save_upcall(struct thread *td, struct kse *newkse)
+{
+}
+
+void
+cpu_set_upcall(struct thread *td, void *pcb)
+{
+}
+
+void
+cpu_set_args(struct thread *td, struct kse *ke)
+{
+}
+
+void
+cpu_free_kse_mdstorage(struct kse *ke)
+{
+}
+
+int
+cpu_export_context(struct thread *td)
+{
+ return (0);
+}
+
/*
* Finish a fork operation, with process p2 nearly set up.
* Copy and update the pcb, set up the stack so that the child
diff --git a/sys/sys/condvar.h b/sys/sys/condvar.h
index 0050255..cf6a6c6 100644
--- a/sys/sys/condvar.h
+++ b/sys/sys/condvar.h
@@ -62,6 +62,7 @@ void cv_signal(struct cv *cvp);
void cv_broadcast(struct cv *cvp);
void cv_waitq_remove(struct thread *td);
+void cv_abort(struct thread *td);
#define cv_waitq_empty(cvp) (TAILQ_EMPTY(&(cvp)->cv_waitq))
#define cv_wmesg(cvp) ((cvp)->cv_description)
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index a4f29de..2c198c8 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -249,12 +249,13 @@ They would be given priorities calculated from the KSEG.
* This is what is put to sleep and reactivated.
* The first KSE available in the correct group will run this thread.
* If several are available, use the one on the same CPU as last time.
+ * When waing to be run, threads are hung off the KSEGRP in priority order.
+ * with N runnable and queued KSEs in the KSEGRP, the first N threads
+ * are linked to them. Other threads are not yet assigned.
*/
struct thread {
struct proc *td_proc; /* Associated process. */
struct ksegrp *td_ksegrp; /* Associated KSEG. */
- struct kse *td_last_kse; /* Where it wants to be if possible. */
- struct kse *td_kse; /* Current KSE if running. */
TAILQ_ENTRY(thread) td_plist; /* All threads in this proc */
TAILQ_ENTRY(thread) td_kglist; /* All threads in this ksegrp */
@@ -267,6 +268,8 @@ struct thread {
#define td_startzero td_flags
int td_flags; /* (j) TDF_* flags. */
+ struct kse *td_last_kse; /* Where it wants to be if possible. */
+ struct kse *td_kse; /* Current KSE if running. */
int td_dupfd; /* (k) Ret value from fdopen. XXX */
void *td_wchan; /* (j) Sleep address. */
const char *td_wmesg; /* (j) Reason for sleep. */
@@ -280,6 +283,8 @@ struct thread {
LIST_HEAD(, mtx) td_contested; /* (j) Contested locks. */
struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
int td_intr_nesting_level; /* (k) Interrupt recursion. */
+ void *td_mailbox; /* the userland mailbox address */
+ struct ucred *td_ucred; /* (k) Reference to credentials. */
#define td_endzero td_md
#define td_startcopy td_endzero
@@ -290,14 +295,44 @@ struct thread {
u_char td_priority; /* (j) Thread active priority. */
#define td_endcopy td_pcb
- struct ucred *td_ucred; /* (k) Reference to credentials. */
struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */
+ enum {
+ TDS_NEW = 0x20,
+ TDS_UNQUEUED,
+ TDS_SLP,
+ TDS_MTX,
+ TDS_RUNQ,
+ TDS_RUNNING,
+ TDS_SUSPENDED, /* would have liked to have run */
+ TDS_IWAIT,
+ TDS_SURPLUS
+ } td_state;
struct callout td_slpcallout; /* (h) Callout for sleep. */
struct trapframe *td_frame; /* (k) */
struct vm_object *td_kstack_obj;/* (a) Kstack object. */
vm_offset_t td_kstack; /* Kernel VA of kstack. */
u_int td_critnest; /* (k) Critical section nest level. */
};
+/* flags kept in td_flags */
+#define TDF_UNBOUND 0x000001 /* may give away the kse, uses the kg runq */
+#define TDF_SINTR 0x000008 /* Sleep is interruptible. */
+#define TDF_TIMEOUT 0x000010 /* Timing out during sleep. */
+#define TDF_SELECT 0x000040 /* Selecting; wakeup/waiting danger. */
+#define TDF_CVWAITQ 0x000080 /* Thread is on a cv_waitq (not slpq). */
+#define TDF_UPCALLING 0x000100 /* This thread is doing an upcall. */
+#define TDF_INMSLEEP 0x000400 /* Don't recurse in msleep() */
+#define TDF_TIMOFAIL 0x001000 /* Timeout from sleep after we were awake. */
+#define TDF_DEADLKTREAT 0x800000 /* Lock aquisition - deadlock treatment. */
+
+/*
+ * Traps for young players:
+ * The main thread flag that controls whether a thread acts as a threaded
+ * or unthreaded thread is the TDF_UNBOUND flag.
+ * UPCALLS run with the UNBOUND flags clear, after they are first scheduled.
+ * i.e. they bind themselves to whatever thread thay are first scheduled with.
+ * You may see BOUND threads in KSE processes but you should never see
+ * UNBOUND threads in non KSE processes.
+ */
/*
* The schedulable entity that can be given a context to run.
@@ -309,14 +344,14 @@ struct thread {
struct kse {
struct proc *ke_proc; /* Associated process. */
struct ksegrp *ke_ksegrp; /* Associated KSEG. */
- struct thread *ke_thread; /* Associated thread, if running. */
TAILQ_ENTRY(kse) ke_kglist; /* Queue of all KSEs in ke_ksegrp. */
TAILQ_ENTRY(kse) ke_kgrlist; /* Queue of all KSEs in this state. */
TAILQ_ENTRY(kse) ke_procq; /* (j) Run queue. */
- TAILQ_HEAD(, thread) ke_runq; /* (td_runq) RUNNABLE bound to KSE. */
#define ke_startzero ke_flags
int ke_flags; /* (j) KEF_* flags. */
+ struct thread *ke_thread; /* Active associated thread. */
+ struct thread *ke_bound; /* Thread bound to this KSE (*) */
/*u_int ke_estcpu; */ /* (j) Time averaged val of cpticks. */
int ke_cpticks; /* (j) Ticks of cpu time. */
fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */
@@ -329,15 +364,45 @@ struct kse {
u_char ke_oncpu; /* (j) Which cpu we are on. */
u_int ke_slptime; /* (j) Time since last idle. */
char ke_rqindex; /* (j) Run queue index. */
-#define ke_endzero ke_priority
+ enum {
+ KES_IDLE = 0x10,
+ KES_ONRUNQ,
+ KES_UNQUEUED, /* in transit */
+ KES_RUNNING
+ } ke_state; /* (j) S* process status. */
+ void *ke_mailbox; /* the userland mailbox address */
+ struct thread *ke_tdspare; /* spare thread for upcalls */
+#define ke_endzero ke_dummy
#define ke_startcopy ke_endzero
- u_char ke_priority; /* (j) Process priority. */
- u_char ke_usrpri; /* (j) User pri from cpu & nice. */
-#define ke_endcopy ke_end
-
- int ke_end; /* dummy entry */
+ u_char ke_dummy; /* */
+#define ke_endcopy ke_mdstorage
+
+ void *ke_upcall;
+ void *ke_stackbase;
+ u_long ke_stacksize;
+ void *ke_mdstorage; /* where we store the pcb and frame */
+ struct pcb *ke_pcb; /* the pcb saved for the upcalls */
+ struct trapframe *ke_frame; /* the upcall trapframe */
+ void *mdkse; /* eventually you load from this in */
+ /* switch for our extension PCB x86 */
};
+/* flags kept in ke_flags */
+#define KEF_OWEUPC 0x00002 /* Owe process an addupc() call at next ast. */
+#define KEF_IDLEKSE 0x00004 /* A 'Per CPU idle process'.. has one thread */
+#define KEF_LOANED 0x00004 /* On loan from the bound thread to another */
+#define KEF_ASTPENDING 0x00400 /* KSE has a pending ast. */
+#define KEF_NEEDRESCHED 0x00800 /* Process needs to yield. */
+
+/*
+ * (*) A bound KSE with a bound thread in a KSE process may be lent to
+ * Other threads, as long as those threads do not leave the kernel.
+ * The other threads must be either exiting, or be unbound with a valid
+ * mailbox so that they can save their state there rather than going
+ * to user space. While this happens the real bound thread is still linked
+ * to the kse via the ke_bound field, and the KSE has its "KEF_LOANED
+ * flag set.
+ */
/*
* Kernel-scheduled entity group (KSEG). The scheduler considers each KSEG to
@@ -348,27 +413,29 @@ struct ksegrp {
struct proc *kg_proc; /* Process that contains this KSEG. */
TAILQ_ENTRY(ksegrp) kg_ksegrp; /* Queue of KSEGs in kg_proc. */
TAILQ_HEAD(, kse) kg_kseq; /* (ke_kglist) All KSEs. */
- TAILQ_HEAD(, kse) kg_rq; /* (ke_kgrlist) Runnable KSEs. */
TAILQ_HEAD(, kse) kg_iq; /* (ke_kgrlist) Idle KSEs. */
TAILQ_HEAD(, thread) kg_threads;/* (td_kglist) All threads. */
- TAILQ_HEAD(, thread) kg_runq; /* (td_runq) Unbound RUNNABLE threads */
+ TAILQ_HEAD(, thread) kg_runq; /* (td_runq) waiting RUNNABLE threads */
TAILQ_HEAD(, thread) kg_slpq; /* (td_runq) NONRUNNABLE threads. */
#define kg_startzero kg_estcpu
u_int kg_estcpu; /* Sum of the same field in KSEs. */
u_int kg_slptime; /* (j) How long completely blocked. */
+ struct thread *kg_last_assigned; /* Last thread assigned to a KSE */
+ int kg_numthreads; /* Num threads in total */
+ int kg_runnable; /* Num runnable threads on queue. */
+ int kg_kses; /* Num KSEs in group. */
+ int kg_runq_kses; /* Num KSEs on runq. */
+ int kg_idle_kses; /* num KSEs idle */
#define kg_endzero kg_pri_class
#define kg_startcopy kg_endzero
u_char kg_pri_class; /* (j) Scheduling class. */
u_char kg_user_pri; /* (j) User pri from estcpu and nice. */
char kg_nice; /* (j?/k?) Process "nice" value. */
- struct rtprio kg_rtprio; /* (j) Realtime priority. */
-#define kg_endcopy kg_runnable
-
- int kg_runnable; /* Num runnable threads on queue. */
- int kg_runq_kses; /* Num KSEs on runq. */
- int kg_kses; /* Num KSEs in group. */
+/* struct rtprio kg_rtprio; */ /* (j) Realtime priority. */
+#define kg_endcopy kg_dummy
+ int kg_dummy;
};
/*
@@ -379,6 +446,7 @@ struct proc {
LIST_ENTRY(proc) p_list; /* (d) List of all processes. */
TAILQ_HEAD(, ksegrp) p_ksegrps; /* (kg_ksegrp) All KSEGs. */
TAILQ_HEAD(, thread) p_threads; /* (td_plist) Threads. (shortcut) */
+ TAILQ_HEAD(, thread) p_suspended; /* (td_runq) suspended threads */
struct ucred *p_ucred; /* (c) Process owner's identity. */
struct filedesc *p_fd; /* (b) Ptr to open files structure. */
/* Accumulated stats for all KSEs? */
@@ -389,7 +457,6 @@ struct proc {
struct ksegrp p_ksegrp;
struct kse p_kse;
- struct thread p_xxthread;
/*
* The following don't make too much sense..
@@ -397,8 +464,12 @@ struct proc {
*/
int p_flag; /* (c) P_* flags. */
int p_sflag; /* (j) PS_* flags. */
- int p_stat; /* (j) S* process status. */
-
+ enum {
+ PRS_NEW = 0, /* In creation */
+ PRS_NORMAL, /* KSEs can be run */
+ PRS_WAIT, /* Waiting on interrupt ? */
+ PRS_ZOMBIE
+ } p_state; /* (j) S* process status. */
pid_t p_pid; /* (b) Process identifier. */
LIST_ENTRY(proc) p_hash; /* (d) Hash chain. */
LIST_ENTRY(proc) p_pglist; /* (g + e) List of processes in pgrp. */
@@ -431,6 +502,10 @@ struct proc {
u_char p_pfsflags; /* (c) Procfs flags. */
struct nlminfo *p_nlminfo; /* (?) Only used by/for lockd. */
void *p_aioinfo; /* (c) ASYNC I/O info. */
+ int p_numthreads; /* (?) number of threads */
+ int p_numksegrps; /* (?) number of ksegrps */
+ struct thread *p_singlethread;/* If single threading this is it */
+ int p_suspcount; /* # waiting threads in suspended mode*/
/* End area that is zeroed on creation. */
#define p_startcopy p_sigmask
@@ -467,13 +542,6 @@ struct proc {
#define NOCPU 0xff /* For p_oncpu when we aren't on a CPU. */
/* Status values (p_stat). */
-#define SIDL 1 /* Process being created by fork. */
-#define SRUN 2 /* Currently runnable. */
-#define SSLEEP 3 /* Sleeping on an address. */
-#define SSTOP 4 /* Process debugging or suspension. */
-#define SZOMB 5 /* Awaiting collection by parent. */
-#define SWAIT 6 /* Waiting for interrupt. */
-#define SMTX 7 /* Blocked on a mutex. */
/* These flags are kept in p_flag. */
#define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */
@@ -483,13 +551,21 @@ struct proc {
#define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */
#define P_SUGID 0x00100 /* Had set id privileges since last exec. */
#define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */
-#define P_TRACED 0x00800 /* Debugged process being traced. */
-#define P_WAITED 0x01000 /* Debugging process has waited for child. */
+#define P_WAITED 0x01000 /* Someone is waiting for us */
#define P_WEXIT 0x02000 /* Working on exiting. */
#define P_EXEC 0x04000 /* Process called exec. */
#define P_KSES 0x08000 /* Process is using KSEs. */
#define P_CONTINUED 0x10000 /* Proc has continued from a stopped state. */
+/* flags that control how threads may be suspended for some reason */
+#define P_STOPPED_SGNL 0x10000 /* Stopped due to SIGSTOP/SIGTSTP */
+#define P_STOPPED_TRACE 0x20000 /* Stopped because of tracing */
+#define P_STOPPED_SNGL 0x40000 /* Only one thread can continue (not to user) */
+#define P_SINGLE_EXIT 0x00400 /* Threads suspending should exit, not wait */
+#define P_TRACED 0x00800 /* Debugged process being traced. */
+#define P_STOPPED (P_STOPPED_SGNL|P_STOPPED_SNGL|P_STOPPED_TRACE)
+#define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED)
+
/* Should be moved to machine-dependent areas. */
#define P_UNUSED100000 0x100000
#define P_COWINPROGRESS 0x400000 /* Snapshot copy-on-write in progress. */
@@ -508,21 +584,14 @@ struct proc {
#define PS_SWAPPING 0x00200 /* Process is being swapped. */
#define PS_NEEDSIGCHK 0x02000 /* Process may need signal delivery. */
-/* flags kept in td_flags */
-#define TDF_ONRUNQ 0x00001 /* This KE is on a run queue */
-#define TDF_SINTR 0x00008 /* Sleep is interruptible. */
-#define TDF_TIMEOUT 0x00010 /* Timing out during sleep. */
-#define TDF_SELECT 0x00040 /* Selecting; wakeup/waiting danger. */
-#define TDF_CVWAITQ 0x00080 /* Thread is on a cv_waitq (not slpq). */
-#define TDF_TIMOFAIL 0x01000 /* Timeout from sleep after we were awake. */
-#define TDF_DEADLKTREAT 0x800000 /* Lock aquisition - deadlock treatment. */
-
-/* flags kept in ke_flags */
-#define KEF_ONRUNQ 0x00001 /* This KE is on a run queue */
-#define KEF_OWEUPC 0x00002 /* Owe process an addupc() call at next ast. */
-#define KEF_ASTPENDING 0x00400 /* KSE has a pending ast. */
-#define KEF_NEEDRESCHED 0x00800 /* Process needs to yield. */
-
+/* used only in legacy conversion code */
+#define SIDL 1 /* Process being created by fork. */
+#define SRUN 2 /* Currently runnable. */
+#define SSLEEP 3 /* Sleeping on an address. */
+#define SSTOP 4 /* Process debugging or suspension. */
+#define SZOMB 5 /* Awaiting collection by parent. */
+#define SWAIT 6 /* Waiting for interrupt. */
+#define SMTX 7 /* Blocked on a mutex. */
#define P_MAGIC 0xbeefface
@@ -728,6 +797,7 @@ void pargs_drop(struct pargs *pa);
void pargs_free(struct pargs *pa);
void pargs_hold(struct pargs *pa);
void procinit(void);
+void threadinit(void);
void proc_linkup(struct proc *p, struct ksegrp *kg,
struct kse *ke, struct thread *td);
void proc_reparent(struct proc *child, struct proc *newparent);
@@ -758,7 +828,38 @@ void cpu_fork(struct thread *, struct proc *, struct thread *, int);
void cpu_set_fork_handler(struct thread *, void (*)(void *), void *);
void cpu_wait(struct proc *);
int cpu_coredump(struct thread *, struct vnode *, struct ucred *);
-struct thread *thread_get(struct proc *);
+
+/* New in KSE. */
+struct thread *thread_alloc(void);
+void thread_free(struct thread *td);
+int cpu_export_context(struct thread *td);
+void cpu_free_kse_mdstorage(struct kse *kse);
+void cpu_save_upcall(struct thread *td, struct kse *newkse);
+void cpu_set_args(struct thread *, struct kse *);
+void cpu_set_upcall(struct thread *td, void *pcb);
+void cpu_thread_exit(struct thread *);
+void cpu_thread_setup(struct thread *td);
+void kse_reassign(struct kse *ke);
+void kse_link(struct kse *ke, struct ksegrp *kg);
+void ksegrp_link(struct ksegrp *kg, struct proc *p);
+int kserunnable(void);
+void make_kse_runnable(struct kse *ke);
+void thread_exit(void) __dead2;
+int thread_export_context(struct thread *td);
+void thread_link(struct thread *td, struct ksegrp *kg);
+void thread_reap(void);
+struct thread *thread_schedule_upcall(struct thread *td, struct kse *ke);
+int thread_single(int how);
+#define SNGLE_NO_EXIT 0 /* values for 'how' */
+#define SNGLE_EXIT 1
+void thread_single_end(void);
+void thread_stash(struct thread *td);
+int thread_suspend_check(int how);
+void thread_unsuspend(struct proc *p);
+int thread_userret(struct proc *p, struct ksegrp *kg, struct kse *ke,
+ struct thread *td, struct trapframe *frame);
+
+void thread_sanity_check(struct thread *td);
#endif /* _KERNEL */
#endif /* !_SYS_PROC_H_ */
diff --git a/sys/sys/queue.h b/sys/sys/queue.h
index 5209f4e..ffddc86 100644
--- a/sys/sys/queue.h
+++ b/sys/sys/queue.h
@@ -102,6 +102,36 @@
* _REMOVE + + + +
*
*/
+#define QUEUE_MACRO_DEBUG 1
+#ifdef QUEUE_MACRO_DEBUG
+struct qm_trace {
+ char * lastfile;
+ int lastline;
+ char * prevfile;
+ int prevline;
+};
+
+#define TRACEBUF struct qm_trace trace;
+
+#define QMD_TRACE_HEAD(head) do { \
+ (head)->trace.prevline = (head)->trace.lastline; \
+ (head)->trace.prevfile = (head)->trace.lastfile; \
+ (head)->trace.lastline = __LINE__; \
+ (head)->trace.lastfile = __FILE__; \
+} while (0)
+
+#define QMD_TRACE_ELEM(elem) do { \
+ (elem)->trace.prevline = (elem)->trace.lastline; \
+ (elem)->trace.prevfile = (elem)->trace.lastfile; \
+ (elem)->trace.lastline = __LINE__; \
+ (elem)->trace.lastfile = __FILE__; \
+} while (0)
+
+#else
+#define QMD_TRACE_ELEM(elem)
+#define QMD_TRACE_HEAD(head)
+#define TRACEBUF
+#endif /* QUEUE_MACRO_DEBUG */
/*
* Singly-linked List declarations.
@@ -329,6 +359,7 @@ struct { \
struct name { \
struct type *tqh_first; /* first element */ \
struct type **tqh_last; /* addr of last next element */ \
+ TRACEBUF \
}
#define TAILQ_HEAD_INITIALIZER(head) \
@@ -338,6 +369,7 @@ struct name { \
struct { \
struct type *tqe_next; /* next element */ \
struct type **tqe_prev; /* address of previous next element */ \
+ TRACEBUF \
}
/*
@@ -349,6 +381,8 @@ struct { \
(head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \
(head1)->tqh_last = (head2)->tqh_last; \
TAILQ_INIT((head2)); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_HEAD(head2); \
} \
} while (0)
@@ -369,16 +403,21 @@ struct { \
#define TAILQ_INIT(head) do { \
TAILQ_FIRST((head)) = NULL; \
(head)->tqh_last = &TAILQ_FIRST((head)); \
+ QMD_TRACE_HEAD(head); \
} while (0)
#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\
TAILQ_NEXT((elm), field)->field.tqe_prev = \
&TAILQ_NEXT((elm), field); \
- else \
+ else { \
(head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_HEAD(head); \
+ } \
TAILQ_NEXT((listelm), field) = (elm); \
(elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+ QMD_TRACE_ELEM(&listelm->field); \
} while (0)
#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
@@ -386,6 +425,8 @@ struct { \
TAILQ_NEXT((elm), field) = (listelm); \
*(listelm)->field.tqe_prev = (elm); \
(listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+ QMD_TRACE_ELEM(&listelm->field); \
} while (0)
#define TAILQ_INSERT_HEAD(head, elm, field) do { \
@@ -396,6 +437,8 @@ struct { \
(head)->tqh_last = &TAILQ_NEXT((elm), field); \
TAILQ_FIRST((head)) = (elm); \
(elm)->field.tqe_prev = &TAILQ_FIRST((head)); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_ELEM(&(elm)->field); \
} while (0)
#define TAILQ_INSERT_TAIL(head, elm, field) do { \
@@ -403,6 +446,8 @@ struct { \
(elm)->field.tqe_prev = (head)->tqh_last; \
*(head)->tqh_last = (elm); \
(head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_ELEM(&(elm)->field); \
} while (0)
#define TAILQ_LAST(head, headname) \
@@ -417,9 +462,13 @@ struct { \
if ((TAILQ_NEXT((elm), field)) != NULL) \
TAILQ_NEXT((elm), field)->field.tqe_prev = \
(elm)->field.tqe_prev; \
- else \
+ else { \
(head)->tqh_last = (elm)->field.tqe_prev; \
+ QMD_TRACE_HEAD(head); \
+ } \
*(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \
+ (elm)->field.tqe_next = (void *)-1; \
+ QMD_TRACE_ELEM(&(elm)->field); \
} while (0)
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 6302d03..a8a68fc 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -234,10 +234,10 @@ extern struct mtx sigio_lock;
/*
* Machine-independent functions:
*/
-int cursig(struct proc *p);
+int cursig(struct thread *td);
void execsigs(struct proc *p);
void gsignal(int pgid, int sig);
-int issignal(struct proc *p);
+int issignal(struct thread *p);
void killproc(struct proc *p, char *why);
void pgsigio(struct sigio **, int signum, int checkctty);
void pgsignal(struct pgrp *pgrp, int sig, int checkctty);
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index ccba626..134700b 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -309,6 +309,7 @@ extern watchdog_tickle_fn wdog_tickler;
*/
int msleep(void *chan, struct mtx *mtx, int pri, const char *wmesg,
int timo);
+void abortsleep(struct thread *td);
#define tsleep(chan, pri, wmesg, timo) msleep(chan, NULL, pri, wmesg, timo)
void wakeup(void *chan);
void wakeup_one(void *chan);
diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h
index 3025eb4..565bd41 100644
--- a/sys/sys/ucred.h
+++ b/sys/sys/ucred.h
@@ -44,15 +44,15 @@
* Only the suser() or suser_cred() function should be used for this.
*/
struct ucred {
- u_int cr_ref; /* reference count */
+ u_int cr_ref; /* reference count */
#define cr_startcopy cr_uid
- uid_t cr_uid; /* effective user id */
- uid_t cr_ruid; /* real user id */
- uid_t cr_svuid; /* saved user id */
- short cr_ngroups; /* number of groups */
- gid_t cr_groups[NGROUPS]; /* groups */
- gid_t cr_rgid; /* real group id */
- gid_t cr_svgid; /* saved user id */
+ uid_t cr_uid; /* effective user id */
+ uid_t cr_ruid; /* real user id */
+ uid_t cr_svuid; /* saved user id */
+ short cr_ngroups; /* number of groups */
+ gid_t cr_groups[NGROUPS]; /* groups */
+ gid_t cr_rgid; /* real group id */
+ gid_t cr_svgid; /* saved user id */
struct uidinfo *cr_uidinfo; /* per euid resource consumption */
struct uidinfo *cr_ruidinfo; /* per ruid resource consumption */
struct prison *cr_prison; /* jail(4) */
diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h
index e09d549..cf6dc39 100644
--- a/sys/vm/uma_int.h
+++ b/sys/vm/uma_int.h
@@ -109,7 +109,7 @@
#define UMA_SLAB_MASK (PAGE_SIZE - 1) /* Mask to get back to the page */
#define UMA_SLAB_SHIFT PAGE_SHIFT /* Number of bits PAGE_MASK */
-#define UMA_BOOT_PAGES 15 /* Number of pages allocated for startup */
+#define UMA_BOOT_PAGES 30 /* Number of pages allocated for startup */
#define UMA_WORKING_TIME 20 /* Seconds worth of items to keep */
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 6c48cbc..25aa48e 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -299,8 +299,11 @@ vm_waitproc(p)
GIANT_REQUIRED;
cpu_wait(p);
pmap_dispose_proc(p); /* drop per-process resources */
- FOREACH_THREAD_IN_PROC(p, td)
+/* XXXKSE by here there should not be any threads left! */
+ FOREACH_THREAD_IN_PROC(p, td) {
+ panic("vm_waitproc: Survivor thread!");
pmap_dispose_thread(td);
+ }
vmspace_exitfree(p); /* and clean-out the vmspace */
}
@@ -355,7 +358,7 @@ faultin(p)
PROC_LOCK(p);
mtx_lock_spin(&sched_lock);
FOREACH_THREAD_IN_PROC (p, td)
- if (td->td_proc->p_stat == SRUN) /* XXXKSE */
+ if (td->td_state == TDS_RUNQ) /* XXXKSE */
setrunqueue(td);
p->p_sflag |= PS_INMEM;
@@ -371,7 +374,7 @@ faultin(p)
* is enough space for them. Of course, if a process waits for a long
* time, it will be swapped in anyway.
*
- * XXXKSE - KSEGRP with highest priority counts..
+ * XXXKSE - process with the thread with highest priority counts..
*
* Giant is still held at this point, to be released in tsleep.
*/
@@ -381,6 +384,7 @@ scheduler(dummy)
void *dummy;
{
struct proc *p;
+ struct thread *td;
int pri;
struct proc *pp;
int ppri;
@@ -399,11 +403,14 @@ loop:
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
struct ksegrp *kg;
+ if (p->p_sflag & (PS_INMEM | PS_SWAPPING)) {
+ continue;
+ }
mtx_lock_spin(&sched_lock);
- if (p->p_stat == SRUN
- && (p->p_sflag & (PS_INMEM | PS_SWAPPING)) == 0) {
- /* Find the minimum sleeptime for the process */
- FOREACH_KSEGRP_IN_PROC(p, kg) {
+ FOREACH_THREAD_IN_PROC(p, td) {
+ /* Only consider runnable threads */
+ if (td->td_state == TDS_RUNQ) {
+ kg = td->td_ksegrp;
pri = p->p_swtime + kg->kg_slptime;
if ((p->p_sflag & PS_SWAPINREQ) == 0) {
pri -= kg->kg_nice * 8;
@@ -438,6 +445,7 @@ loop:
/*
* We would like to bring someone in. (only if there is space).
+ * [What checks the space? ]
*/
PROC_LOCK(p);
faultin(p);
@@ -478,6 +486,7 @@ swapout_procs(action)
int action;
{
struct proc *p;
+ struct thread *td;
struct ksegrp *kg;
struct proc *outp, *outp2;
int outpri, outpri2;
@@ -489,13 +498,13 @@ int action;
outpri = outpri2 = INT_MIN;
retry:
sx_slock(&allproc_lock);
- LIST_FOREACH(p, &allproc, p_list) {
+ FOREACH_PROC_IN_SYSTEM(p) {
struct vmspace *vm;
int minslptime = 100000;
PROC_LOCK(p);
if (p->p_lock != 0 ||
- (p->p_flag & (P_TRACED|P_SYSTEM|P_WEXIT)) != 0) {
+ (p->p_flag & (P_STOPPED_SNGL|P_TRACED|P_SYSTEM|P_WEXIT)) != 0) {
PROC_UNLOCK(p);
continue;
}
@@ -512,14 +521,15 @@ retry:
continue;
}
- switch (p->p_stat) {
+ switch (p->p_state) {
default:
+ /* Don't swap out processes in any sort
+ * of 'special' state. */
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
continue;
- case SSLEEP:
- case SSTOP:
+ case PRS_NORMAL:
/*
* do not swapout a realtime process
* Check all the thread groups..
@@ -537,13 +547,18 @@ retry:
* Also guarantee swap_idle_threshold1
* time in memory.
*/
- if (((FIRST_THREAD_IN_PROC(p)->td_priority) < PSOCK) ||
- (kg->kg_slptime < swap_idle_threshold1)) {
+ if (kg->kg_slptime < swap_idle_threshold1) {
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
goto nextproc;
}
-
+ FOREACH_THREAD_IN_PROC(p, td) {
+ if ((td->td_priority) < PSOCK) {
+ mtx_unlock_spin(&sched_lock);
+ PROC_UNLOCK(p);
+ goto nextproc;
+ }
+ }
/*
* If the system is under memory stress,
* or if we are swapping
@@ -624,14 +639,13 @@ swapout(p)
p->p_sflag |= PS_SWAPPING;
PROC_UNLOCK(p);
FOREACH_THREAD_IN_PROC (p, td)
- if (td->td_proc->p_stat == SRUN) /* XXXKSE */
+ if (td->td_state == TDS_RUNQ) /* XXXKSE */
remrunqueue(td); /* XXXKSE */
mtx_unlock_spin(&sched_lock);
pmap_swapout_proc(p);
FOREACH_THREAD_IN_PROC(p, td)
pmap_swapout_thread(td);
-
mtx_lock_spin(&sched_lock);
p->p_sflag &= ~PS_SWAPPING;
p->p_swtime = 0;
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 935979ae..a1b8adb 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -81,6 +81,7 @@ SYSCTL_STRUCT(_vm, VM_LOADAVG, loadavg, CTLFLAG_RD,
static int
vmtotal(SYSCTL_HANDLER_ARGS)
{
+/* XXXKSE almost completely broken */
struct proc *p;
struct vmtotal total, *totalp;
vm_map_entry_t entry;
@@ -88,6 +89,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
vm_map_t map;
int paging;
struct ksegrp *kg;
+ struct thread *td;
totalp = &total;
bzero(totalp, sizeof *totalp);
@@ -107,44 +109,49 @@ vmtotal(SYSCTL_HANDLER_ARGS)
if (p->p_flag & P_SYSTEM)
continue;
mtx_lock_spin(&sched_lock);
- switch (p->p_stat) {
- case 0:
+ switch (p->p_state) {
+ case PRS_NEW:
+ if (p->p_sflag & PS_INMEM)
+ totalp->t_rq++;
+ else
+ totalp->t_sw++;
mtx_unlock_spin(&sched_lock);
continue;
-
- case SMTX:
- case SSLEEP:
- case SSTOP:
- kg = &p->p_ksegrp; /* XXXKSE */
- if (p->p_sflag & PS_INMEM) {
- if (FIRST_THREAD_IN_PROC(p)->td_priority
- <= PZERO)
- totalp->t_dw++;
- else if (kg->kg_slptime < maxslp)
- totalp->t_sl++;
- } else if (kg->kg_slptime < maxslp)
- totalp->t_sw++;
- if (kg->kg_slptime >= maxslp) {
- mtx_unlock_spin(&sched_lock);
- continue;
- }
break;
+ default:
+ FOREACH_THREAD_IN_PROC(p, td) {
+ switch (td->td_state) {
+ case TDS_MTX:
+ case TDS_SLP:
+ kg = td->td_ksegrp; /* XXXKSE */
+ if (p->p_sflag & PS_INMEM) {
+ if (td->td_priority <= PZERO)
+ totalp->t_dw++;
+ else if (kg->kg_slptime
+ < maxslp)
+ totalp->t_sl++;
+ } else if (kg->kg_slptime < maxslp)
+ totalp->t_sw++;
+ if (kg->kg_slptime >= maxslp) {
+ continue;
+ }
+ break;
- case SWAIT:
- totalp->t_sl++;
- continue;
+ case TDS_RUNQ:
+ case TDS_RUNNING:
+ if (p->p_sflag & PS_INMEM)
+ totalp->t_rq++;
+ else
+ totalp->t_sw++;
+ continue;
- case SRUN:
- case SIDL:
- if (p->p_sflag & PS_INMEM)
- totalp->t_rq++;
- else
- totalp->t_sw++;
- if (p->p_stat == SIDL) {
- mtx_unlock_spin(&sched_lock);
- continue;
+ case TDS_IWAIT:
+ totalp->t_sl++;
+ continue;
+ default:
+ break;
+ }
}
- break;
}
mtx_unlock_spin(&sched_lock);
/*
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 5708d8d..2e5bd07 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -642,6 +642,7 @@ vm_pageout_scan(int pass)
int vnodes_skipped = 0;
int maxlaunder;
int s;
+ struct thread *td;
GIANT_REQUIRED;
/*
@@ -1123,7 +1124,8 @@ rescan0:
bigproc = NULL;
bigsize = 0;
sx_slock(&allproc_lock);
- LIST_FOREACH(p, &allproc, p_list) {
+ FOREACH_PROC_IN_SYSTEM(p) {
+ int breakout;
/*
* If this process is already locked, skip it.
*/
@@ -1139,10 +1141,19 @@ rescan0:
}
/*
* if the process is in a non-running type state,
- * don't touch it.
+ * don't touch it. Check all the threads individually.
*/
mtx_lock_spin(&sched_lock);
- if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
+ breakout = 0;
+ FOREACH_THREAD_IN_PROC(p, td) {
+ if (td->td_state != TDS_RUNQ &&
+ td->td_state != TDS_RUNNING &&
+ td->td_state != TDS_SLP) {
+ breakout = 1;
+ break;
+ }
+ }
+ if (breakout) {
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
continue;
@@ -1445,6 +1456,8 @@ static void
vm_daemon()
{
struct proc *p;
+ int breakout;
+ struct thread *td;
mtx_lock(&Giant);
while (TRUE) {
@@ -1473,7 +1486,16 @@ vm_daemon()
* don't touch it.
*/
mtx_lock_spin(&sched_lock);
- if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
+ breakout = 0;
+ FOREACH_THREAD_IN_PROC(p, td) {
+ if (td->td_state != TDS_RUNQ &&
+ td->td_state != TDS_RUNNING &&
+ td->td_state != TDS_SLP) {
+ breakout = 1;
+ break;
+ }
+ }
+ if (breakout) {
mtx_unlock_spin(&sched_lock);
continue;
}
diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c
index 99ace6e..d7ab1ce 100644
--- a/sys/vm/vm_zeroidle.c
+++ b/sys/vm/vm_zeroidle.c
@@ -127,7 +127,6 @@ vm_pagezero(void)
pages += vm_page_zero_idle();
if (pages > idlezero_maxrun) {
mtx_lock_spin(&sched_lock);
- setrunqueue(td);
td->td_proc->p_stats->p_ru.ru_nvcsw++;
mi_switch();
mtx_unlock_spin(&sched_lock);
OpenPOWER on IntegriCloud