diff options
author | julian <julian@FreeBSD.org> | 2002-06-29 17:26:22 +0000 |
---|---|---|
committer | julian <julian@FreeBSD.org> | 2002-06-29 17:26:22 +0000 |
commit | aa2dc0a5d9e7a19420c153cd414fefa8498eab71 (patch) | |
tree | 0a0483a267784fa8e2bf86857d8727edb5b122e9 | |
parent | 6dbff7f2c1f8150887038aed666e11675adf0b4e (diff) | |
download | FreeBSD-src-aa2dc0a5d9e7a19420c153cd414fefa8498eab71.zip FreeBSD-src-aa2dc0a5d9e7a19420c153cd414fefa8498eab71.tar.gz |
Part 1 of KSE-III
The ability to schedule multiple threads per process
(one one cpu) by making ALL system calls optionally asynchronous.
to come: ia64 and power-pc patches, patches for gdb, test program (in tools)
Reviewed by: Almost everyone who counts
(at various times, peter, jhb, matt, alfred, mini, bernd,
and a cast of thousands)
NOTE: this is still Beta code, and contains lots of debugging stuff.
expect slight instability in signals..
75 files changed, 2765 insertions, 731 deletions
diff --git a/lib/libkvm/kvm_proc.c b/lib/libkvm/kvm_proc.c index 865377c..547792e 100644 --- a/lib/libkvm/kvm_proc.c +++ b/lib/libkvm/kvm_proc.c @@ -325,11 +325,28 @@ nopgrp: kp->ki_estcpu = proc.p_ksegrp.kg_estcpu; /* XXXKSE */ kp->ki_slptime = proc.p_kse.ke_slptime; /* XXXKSE */ kp->ki_swtime = proc.p_swtime; - kp->ki_flag = proc.p_flag; + kp->ki_flag = proc.p_flag; /* WILDLY INNACURATE XXXKSE */ kp->ki_sflag = proc.p_sflag; kp->ki_wchan = mainthread.td_wchan; /* XXXKSE */ kp->ki_traceflag = proc.p_traceflag; - kp->ki_stat = proc.p_stat; + if (proc.p_state == PRS_NORMAL) { /* XXXKSE very aproximate */ + if ((mainthread.td_state == TDS_RUNQ) || + (mainthread.td_state == TDS_RUNNING)) { + kp->ki_stat = SRUN; + } else if (mainthread.td_state == TDS_SLP) { + kp->ki_stat = SSLEEP; + } else if (P_SHOULDSTOP(&proc)) { + kp->ki_stat = SSTOP; + } else if (mainthread.td_state == TDS_MTX) { + kp->ki_stat = SMTX; + } else { + kp->ki_stat = SWAIT; + } + } else if (proc.p_state == PRS_ZOMBIE) { + kp->ki_stat = SZOMB; + } else { + kp->ki_stat = SIDL; + } kp->ki_pri.pri_class = proc.p_ksegrp.kg_pri_class; /* XXXKSE */ kp->ki_pri.pri_user = proc.p_ksegrp.kg_user_pri; /* XXXKSE */ kp->ki_pri.pri_level = mainthread.td_priority; /* XXXKSE */ diff --git a/sys/alpha/alpha/genassym.c b/sys/alpha/alpha/genassym.c index 62ff3a4..96092da 100644 --- a/sys/alpha/alpha/genassym.c +++ b/sys/alpha/alpha/genassym.c @@ -80,6 +80,8 @@ ASSYM(MTX_UNOWNED, MTX_UNOWNED); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_KSE, offsetof(struct thread, td_kse)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); +ASSYM(TD_STATE, offsetof(struct thread, td_state)); +ASSYM(TDS_RUNNING, TDS_RUNNING); ASSYM(KE_FLAGS, offsetof(struct kse, ke_flags)); diff --git a/sys/alpha/alpha/pmap.c b/sys/alpha/alpha/pmap.c index c758edb..5137f79 100644 --- a/sys/alpha/alpha/pmap.c +++ b/sys/alpha/alpha/pmap.c @@ -1151,7 +1151,12 @@ pmap_dispose_thread(td) ksobj = td->td_kstack_obj; ks = td->td_kstack; ptek = vtopte(ks); +#ifdef KSTACK_GUARD + ks -= PAGE_SIZE; + for (i = 1; i < (KSTACK_PAGES + 1); i++) { +#else for (i = 0; i < KSTACK_PAGES; i++) { +#endif m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_dispose_thread: kstack already missing?"); @@ -1164,14 +1169,16 @@ pmap_dispose_thread(td) } /* - * If the thread got swapped out some of its KSTACK might have gotten - * swapped. Just get rid of the object to clean up the swap use - * proactively. NOTE! might block waiting for paging I/O to complete. + * Free the space that this stack was mapped to in the kernel + * address map. */ - if (ksobj->type == OBJT_SWAP) { - td->td_kstack_obj = NULL; - vm_object_deallocate(ksobj); - } +#ifdef KSTACK_GUARD + kmem_free(kernel_map, ks, (KSTACK_PAGES + 1) * PAGE_SIZE); +#else + kmem_free(kernel_map, ks, KSTACK_PAGES * PAGE_SIZE); +#endif + td->td_kstack_obj = NULL; + vm_object_deallocate(ksobj); } /* diff --git a/sys/alpha/alpha/swtch.s b/sys/alpha/alpha/swtch.s index 34f3453..bae5227 100644 --- a/sys/alpha/alpha/swtch.s +++ b/sys/alpha/alpha/swtch.s @@ -127,6 +127,9 @@ Lcs1: LDGP(pv) mov v0, s2 /* s2 = new thread */ ldq s3, TD_MD_PCBPADDR(s2) /* s3 = new pcbpaddr */ + ldiq t0, TDS_RUNNING + stl t0, TD_STATE(s2) + /* * Check to see if we're switching to ourself. If we are, * don't bother loading the new context. diff --git a/sys/alpha/alpha/trap.c b/sys/alpha/alpha/trap.c index 6cdf9f4..17dcb14 100644 --- a/sys/alpha/alpha/trap.c +++ b/sys/alpha/alpha/trap.c @@ -39,6 +39,7 @@ #include <sys/sysproto.h> #include <sys/kernel.h> #include <sys/proc.h> +#include <sys/kse.h> #include <sys/exec.h> #include <sys/lock.h> #include <sys/mutex.h> @@ -299,6 +300,12 @@ trap(a0, a1, a2, entry, framep) td->td_frame = framep; if (td->td_ucred != p->p_ucred) cred_update_thread(td); + if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) { + mtx_lock_spin(&sched_lock); + PROC_LOCK(p); + thread_exit(); + /* NOTREACHED */ + } } else { sticks = 0; /* XXX bogus -Wuninitialized warning */ KASSERT(cold || td->td_ucred != NULL, @@ -659,6 +666,23 @@ syscall(code, framep) sticks = td->td_kse->ke_sticks; if (td->td_ucred != p->p_ucred) cred_update_thread(td); + if (p->p_flag & P_KSES) { + /* + * If we are doing a syscall in a KSE environment, + * note where our mailbox is. There is always the + * possibility that we could do this lazily (in sleep()), + * but for now do it every time. + */ + td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox + + offsetof(struct kse_mailbox, kmbx_current_thread)); + if ((td->td_mailbox == NULL) || + (td->td_mailbox == (void *)-1)) { + td->td_mailbox = NULL; /* single thread it.. */ + td->td_flags &= ~TDF_UNBOUND; + } else { + td->td_flags |= TDF_UNBOUND; + } + } #ifdef DIAGNOSTIC alpha_fpstate_check(td); @@ -756,14 +780,14 @@ syscall(code, framep) break; } - userret(td, framep, sticks); - /* * Release Giant if we had to get it. */ if ((callp->sy_narg & SYF_MPSAFE) == 0) mtx_unlock(&Giant); + userret(td, framep, sticks); + #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); diff --git a/sys/alpha/alpha/vm_machdep.c b/sys/alpha/alpha/vm_machdep.c index e57593c..80f5f03 100644 --- a/sys/alpha/alpha/vm_machdep.c +++ b/sys/alpha/alpha/vm_machdep.c @@ -240,8 +240,7 @@ cpu_set_fork_handler(td, func, arg) * from proc0. */ void -cpu_exit(td) - register struct thread *td; +cpu_exit(struct thread *td) { alpha_fpstate_drop(td); @@ -254,6 +253,141 @@ cpu_sched_exit(td) } void +cpu_thread_exit(struct thread *td) +{ + + return; +} + +void +cpu_thread_setup(struct thread *td) +{ + + td->td_pcb = + (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb) - 1; +} + +struct md_store { + struct pcb mds_pcb; + struct trapframe mds_frame; +}; + +void +cpu_save_upcall(struct thread *td, struct kse *newkse) +{ + + newkse->ke_mdstorage = malloc(sizeof(struct md_store), M_TEMP, + M_WAITOK); + /* Note: use of M_WAITOK means it won't fail. */ + /* set up shortcuts in MI section */ + newkse->ke_pcb = + &(((struct md_store *)(newkse->ke_mdstorage))->mds_pcb); + newkse->ke_frame = + &(((struct md_store *)(newkse->ke_mdstorage))->mds_frame); + + /* Copy the upcall pcb. Kernel mode & fp regs are here. */ + /* XXXKSE this may be un-needed */ + bcopy(td->td_pcb, newkse->ke_pcb, sizeof(struct pcb)); + + /* This copies most of the user mode register values. */ + bcopy(td->td_frame, newkse->ke_frame, sizeof(struct trapframe)); +} + +void +cpu_set_upcall(struct thread *td, void *pcb) +{ + struct pcb *pcb2; + + td->td_flags |= TDF_UPCALLING; + + /* Point the pcb to the top of the stack. */ + pcb2 = td->td_pcb; + + /* + * Copy the upcall pcb. This loads kernel regs. + * Those not loaded individually below get their default + * values here. + * + * XXXKSE It might be a good idea to simply skip this as + * the values of the other registers may be unimportant. + * This would remove any requirement for knowing the KSE + * at this time (see the matching comment below for + * more analysis) (need a good safe default). + */ + bcopy(pcb, pcb2, sizeof(*pcb2)); + + /* + * Create a new fresh stack for the new thread. + * Don't forget to set this stack value into whatever supplies + * the address for the fault handlers. + * The contexts are filled in at the time we actually DO the + * upcall as only then do we know which KSE we got. + */ + td->td_frame = (struct trapframe *)((caddr_t)pcb2) - 1; + + /* + * Arrange for continuation at fork_return(), which + * will return to exception_return(). Note that the child + * process doesn't stay in the kernel for long! + */ + pcb2->pcb_hw.apcb_ksp = (u_int64_t)td->td_frame; + pcb2->pcb_context[0] = (u_int64_t)fork_return; /* s0: a0 */ + pcb2->pcb_context[1] = (u_int64_t)exception_return; /* s1: ra */ + pcb2->pcb_context[2] = (u_long)td; /* s2: a1 */ + pcb2->pcb_context[7] = (u_int64_t)fork_trampoline; /* ra: magic*/ +#ifdef SMP + /* + * We start off at a nesting level of 1 within the kernel. + */ + td->td_md.md_kernnest = 1; +#endif +} + +void +cpu_set_args(struct thread *td, struct kse *ke) +{ +/* XXX + suword((void *)(ke->ke_frame->tf_esp + sizeof(void *)), + (int)ke->ke_mailbox); +*/ +} + +void +cpu_free_kse_mdstorage(struct kse *kse) +{ + + free(kse->ke_mdstorage, M_TEMP); + kse->ke_mdstorage = NULL; + kse->ke_pcb = NULL; + kse->ke_frame = NULL; +} + +int +cpu_export_context(struct thread *td) +{ + /* XXXKSE */ +#if 0 + struct trapframe *frame; + struct thread_mailbox *tm; + struct trapframe *uframe; + int error; + + frame = td->td_frame; + tm = td->td_mailbox; + uframe = &tm->ctx.tfrm.tf_tf; + error = copyout(frame, uframe, sizeof(*frame)); + /* + * "What about the fp regs?" I hear you ask.... XXXKSE + * Don't know where gs and "onstack" come from. + * May need to fiddle a few other values too. + */ + return (error); +#endif + return (0); +} + +void cpu_wait(p) struct proc *p; { diff --git a/sys/alpha/linux/linux_machdep.c b/sys/alpha/linux/linux_machdep.c index 51d68f1..5f33c80 100644 --- a/sys/alpha/linux/linux_machdep.c +++ b/sys/alpha/linux/linux_machdep.c @@ -180,7 +180,6 @@ linux_clone(struct thread *td, struct linux_clone_args *args) * Make this runnable after we are finished with it. */ mtx_lock_spin(&sched_lock); - p2->p_stat = SRUN; setrunqueue(FIRST_THREAD_IN_PROC(p2)); mtx_unlock_spin(&sched_lock); diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index e0f9bcd..80db485 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -65,12 +65,19 @@ tlb_flush_count: .long 0 /* * cpu_throw() + * + * This is the second half of cpu_swtch(). It is used when the current + * thread is either a dummy or slated to die, and we no longer care + * about its state. */ ENTRY(cpu_throw) jmp sw1 /* * cpu_switch() + * + * Save the current thread state, then select the next thread to run + * and load its state. */ ENTRY(cpu_switch) @@ -166,11 +173,11 @@ sw1b: movl %eax,%ecx #ifdef INVARIANTS - movl TD_PROC(%ecx), %eax /* XXXKSE */ - cmpb $SRUN,P_STAT(%eax) + cmpb $TDS_RUNQ,TD_STATE(%ecx) jne badsw2 #endif + movl $TDS_RUNNING,TD_STATE(%ecx) movl TD_PCB(%ecx),%edx #if defined(SWTCH_OPTIM_STATS) @@ -310,12 +317,14 @@ cpu_switch_load_gs: #ifdef INVARIANTS badsw2: + pushal pushl $sw0_2 call panic sw0_2: .asciz "cpu_switch: not TDS_RUNQ" badsw3: + pushal pushl $sw0_3 call panic diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index f3e9f04..dcc1880 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -79,10 +79,10 @@ ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); ASSYM(P_SFLAG, offsetof(struct proc, p_sflag)); -ASSYM(P_STAT, offsetof(struct proc, p_stat)); +ASSYM(P_STATE, offsetof(struct proc, p_state)); ASSYM(P_UAREA, offsetof(struct proc, p_uarea)); -/*ASSYM(TD_STAT, offsetof(struct thread, td__stat));*/ +ASSYM(TD_STATE, offsetof(struct thread, td_state)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); @@ -101,8 +101,9 @@ ASSYM(KE_FLAGS, offsetof(struct kse, ke_flags)); ASSYM(KEF_ASTPENDING, KEF_ASTPENDING); ASSYM(KEF_NEEDRESCHED, KEF_NEEDRESCHED); -ASSYM(SSLEEP, SSLEEP); -ASSYM(SRUN, SRUN); +ASSYM(TDS_SLP, TDS_SLP); +ASSYM(TDS_RUNQ, TDS_RUNQ); +ASSYM(TDS_RUNNING, TDS_RUNNING); ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall)); ASSYM(V_INTR, offsetof(struct vmmeter, v_intr)); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 2f11ee2..c73c5e1 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -799,7 +799,7 @@ cpu_idle(void) { if (cpu_idle_hlt) { disable_intr(); - if (procrunnable()) { + if (kserunnable()) { enable_intr(); } else { /* diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index e2cebaf..9e35ad7 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1100,7 +1100,12 @@ pmap_dispose_thread(td) ksobj = td->td_kstack_obj; ks = td->td_kstack; ptek = vtopte(ks); +#ifdef KSTACK_GUARD + ks -= PAGE_SIZE; + for (i = 1; i < (KSTACK_PAGES + 1); i++) { +#else for (i = 0; i < KSTACK_PAGES; i++) { +#endif m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_dispose_thread: kstack already missing?"); @@ -1116,16 +1121,17 @@ pmap_dispose_thread(td) #ifdef I386_CPU invltlb(); #endif - /* - * If the thread got swapped out some of its KSTACK might have gotten - * swapped. Just get rid of the object to clean up the swap use - * proactively. NOTE! might block waiting for paging I/O to complete. + * Free the space that this stack was mapped to in the kernel + * address map. */ - if (ksobj->type == OBJT_SWAP) { - td->td_kstack_obj = NULL; - vm_object_deallocate(ksobj); - } +#ifdef KSTACK_GUARD + kmem_free(kernel_map, ks, (KSTACK_PAGES + 1) * PAGE_SIZE); +#else + kmem_free(kernel_map, ks, KSTACK_PAGES * PAGE_SIZE); +#endif + vm_object_deallocate(ksobj); + td->td_kstack_obj = NULL; /* play it safe */ } /* diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s index e0f9bcd..80db485 100644 --- a/sys/amd64/amd64/swtch.s +++ b/sys/amd64/amd64/swtch.s @@ -65,12 +65,19 @@ tlb_flush_count: .long 0 /* * cpu_throw() + * + * This is the second half of cpu_swtch(). It is used when the current + * thread is either a dummy or slated to die, and we no longer care + * about its state. */ ENTRY(cpu_throw) jmp sw1 /* * cpu_switch() + * + * Save the current thread state, then select the next thread to run + * and load its state. */ ENTRY(cpu_switch) @@ -166,11 +173,11 @@ sw1b: movl %eax,%ecx #ifdef INVARIANTS - movl TD_PROC(%ecx), %eax /* XXXKSE */ - cmpb $SRUN,P_STAT(%eax) + cmpb $TDS_RUNQ,TD_STATE(%ecx) jne badsw2 #endif + movl $TDS_RUNNING,TD_STATE(%ecx) movl TD_PCB(%ecx),%edx #if defined(SWTCH_OPTIM_STATS) @@ -310,12 +317,14 @@ cpu_switch_load_gs: #ifdef INVARIANTS badsw2: + pushal pushl $sw0_2 call panic sw0_2: .asciz "cpu_switch: not TDS_RUNQ" badsw3: + pushal pushl $sw0_3 call panic diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 08c75e4..8282416 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -54,6 +54,7 @@ #include <sys/bus.h> #include <sys/systm.h> #include <sys/proc.h> +#include <sys/kse.h> #include <sys/pioctl.h> #include <sys/kernel.h> #include <sys/ktr.h> @@ -267,6 +268,17 @@ trap(frame) if (td->td_ucred != p->p_ucred) cred_update_thread(td); + /* + * First check that we shouldn't just abort. + * But check if we are the single thread first! + */ + if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) { + mtx_lock_spin(&sched_lock); + PROC_LOCK(p); + thread_exit(); + /* NOTREACHED */ + } + switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ ucode = type; @@ -939,11 +951,30 @@ syscall(frame) mtx_unlock(&Giant); } #endif + KASSERT((td->td_kse != NULL), ("syscall: kse/thread UNLINKED")); + KASSERT((td->td_kse->ke_thread == td), ("syscall:kse/thread mismatch")); sticks = td->td_kse->ke_sticks; td->td_frame = &frame; if (td->td_ucred != p->p_ucred) cred_update_thread(td); + if (p->p_flag & P_KSES) { + /* + * If we are doing a syscall in a KSE environment, + * note where our mailbox is. There is always the + * possibility that we could do this lazily (in sleep()), + * but for now do it every time. + */ + td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox + + offsetof(struct kse_mailbox, kmbx_current_thread)); + if ((td->td_mailbox == NULL) || + (td->td_mailbox == (void *)-1)) { + td->td_mailbox = NULL; /* single thread it.. */ + td->td_flags &= ~TDF_UNBOUND; + } else { + td->td_flags |= TDF_UNBOUND; + } + } params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; orig_tf_eflags = frame.tf_eflags; @@ -1045,6 +1076,12 @@ syscall(frame) } /* + * Release Giant if we previously set it. + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) + mtx_unlock(&Giant); + + /* * Traced syscall. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { @@ -1057,12 +1094,6 @@ syscall(frame) */ userret(td, &frame, sticks); - /* - * Release Giant if we previously set it. - */ - if ((callp->sy_narg & SYF_MPSAFE) == 0) - mtx_unlock(&Giant); - #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 5dc2e14..04742c3 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -53,6 +53,7 @@ #include <sys/systm.h> #include <sys/malloc.h> #include <sys/proc.h> +#include <sys/kse.h> #include <sys/bio.h> #include <sys/buf.h> #include <sys/vnode.h> @@ -254,15 +255,26 @@ cpu_set_fork_handler(td, func, arg) } void -cpu_exit(td) - register struct thread *td; +cpu_exit(struct thread *td) +{ + struct mdproc *mdp; + + mdp = &td->td_proc->p_md; + if (mdp->md_ldt) + user_ldt_free(td); + reset_dbregs(); +} + +void +cpu_thread_exit(struct thread *td) { struct pcb *pcb = td->td_pcb; - struct mdproc *mdp = &td->td_proc->p_md; #ifdef DEV_NPX npxexit(td); #endif if (pcb->pcb_ext != 0) { + /* XXXKSE XXXSMP not SMP SAFE.. what locks do we have? */ + /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ /* * XXX do we need to move the TSS off the allocated pages * before freeing them? (not done here) @@ -271,8 +283,6 @@ cpu_exit(td) ctob(IOPAGES + 1)); pcb->pcb_ext = 0; } - if (mdp->md_ldt) - user_ldt_free(td); if (pcb->pcb_flags & PCB_DBREGS) { /* * disable all hardware breakpoints @@ -289,6 +299,146 @@ cpu_sched_exit(td) } void +cpu_thread_setup(struct thread *td) +{ + + td->td_pcb = + (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; +} + +struct md_store { + struct pcb mds_pcb; + struct trapframe mds_frame; +}; + +void +cpu_save_upcall(struct thread *td, struct kse *newkse) +{ + struct trapframe *tf; + + newkse->ke_mdstorage = malloc(sizeof(struct md_store), M_TEMP, + M_WAITOK); + /* Note: use of M_WAITOK means it won't fail. */ + /* set up shortcuts in MI section */ + newkse->ke_pcb = + &(((struct md_store *)(newkse->ke_mdstorage))->mds_pcb); + newkse->ke_frame = + &(((struct md_store *)(newkse->ke_mdstorage))->mds_frame); + tf = newkse->ke_frame; + + /* Copy the upcall pcb. Kernel mode & fp regs are here. */ + /* XXXKSE this may be un-needed */ + bcopy(td->td_pcb, newkse->ke_pcb, sizeof(struct pcb)); + + /* + * This initialises most of the user mode register values + * to good values. Eventually set them explicitly to know values + */ + bcopy(td->td_frame, newkse->ke_frame, sizeof(struct trapframe)); + tf->tf_edi = 0; + tf->tf_esi = 0; /* trampoline arg */ + tf->tf_ebp = 0; + tf->tf_esp = (int)newkse->ke_stackbase + newkse->ke_stacksize - 16; + tf->tf_ebx = 0; /* trampoline arg */ + tf->tf_eip = (int)newkse->ke_upcall; +} + +void +cpu_set_upcall(struct thread *td, void *pcb) +{ + struct pcb *pcb2; + + td->td_flags |= TDF_UPCALLING; + + /* Point the pcb to the top of the stack. */ + pcb2 = td->td_pcb; + + /* + * Copy the upcall pcb. This loads kernel regs. + * Those not loaded individually below get their default + * values here. + * + * XXXKSE It might be a good idea to simply skip this as + * the values of the other registers may be unimportant. + * This would remove any requirement for knowing the KSE + * at this time (see the matching comment below for + * more analysis) (need a good safe default). + */ + bcopy(pcb, pcb2, sizeof(*pcb2)); + + /* + * Create a new fresh stack for the new thread. + * The -16 is so we can expand the trapframe if we go to vm86. + * Don't forget to set this stack value into whatever supplies + * the address for the fault handlers. + * The contexts are filled in at the time we actually DO the + * upcall as only then do we know which KSE we got. + */ + td->td_frame = (struct trapframe *)((caddr_t)pcb2 - 16) - 1; + + /* + * Set registers for trampoline to user mode. Leave space for the + * return address on stack. These are the kernel mode register values. + */ + pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir); + pcb2->pcb_edi = 0; + pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ + pcb2->pcb_ebp = 0; + pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */ + pcb2->pcb_ebx = (int)td; /* trampoline arg */ + pcb2->pcb_eip = (int)fork_trampoline; + pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */ + /* + * If we didn't copy the pcb, we'd need to do the following registers: + * pcb2->pcb_dr*: cloned above. + * pcb2->pcb_savefpu: cloned above. + * pcb2->pcb_flags: cloned above. + * pcb2->pcb_onfault: cloned above (always NULL here?). + * pcb2->pcb_gs: cloned above. XXXKSE ??? + * pcb2->pcb_ext: cleared below. + */ + pcb2->pcb_ext = NULL; +} + +void +cpu_set_args(struct thread *td, struct kse *ke) +{ + suword((void *)(ke->ke_frame->tf_esp + sizeof(void *)), + (int)ke->ke_mailbox); +} + +void +cpu_free_kse_mdstorage(struct kse *kse) +{ + + free(kse->ke_mdstorage, M_TEMP); + kse->ke_mdstorage = NULL; + kse->ke_pcb = NULL; + kse->ke_frame = NULL; +} + +int +cpu_export_context(struct thread *td) +{ + struct trapframe *frame; + struct thread_mailbox *tm; + struct trapframe *uframe; + int error; + + frame = td->td_frame; + tm = td->td_mailbox; + uframe = &tm->ctx.tfrm.tf_tf; + error = copyout(frame, uframe, sizeof(*frame)); + /* + * "What about the fp regs?" I hear you ask.... XXXKSE + * Don't know where gs and "onstack" come from. + * May need to fiddle a few other values too. + */ + return (error); +} + +void cpu_wait(p) struct proc *p; { diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index 02b858e..5129746 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -539,21 +539,6 @@ linprocfs_doprocstat(PFS_FILL_ARGS) } /* - * Map process state to descriptive letter. Note that this does not - * quite correspond to what Linux outputs, but it's close enough. - */ -static char *state_str[] = { - "? (unknown)", - "I (idle)", - "R (running)", - "S (sleeping)", - "T (stopped)", - "Z (zombie)", - "W (waiting)", - "M (mutex)" -}; - -/* * Filler function for proc/pid/status */ static int @@ -562,13 +547,53 @@ linprocfs_doprocstatus(PFS_FILL_ARGS) struct kinfo_proc kp; char *state; segsz_t lsize; + struct thread *td2; int i; mtx_lock_spin(&sched_lock); - if (p->p_stat > sizeof state_str / sizeof *state_str) - state = state_str[0]; - else - state = state_str[(int)p->p_stat]; + td2 = FIRST_THREAD_IN_PROC(p); /* XXXKSE pretend only one thread */ + + if (P_SHOULDSTOP(p)) { + state = "T (stopped)"; + } else { + switch(p->p_state) { + case PRS_NEW: + state = "I (idle)"; + break; + case PRS_NORMAL: + if (p->p_flag & P_WEXIT) { + state = "X (exiting)"; + break; + } + switch(td2->td_state) { + case TDS_SLP: + case TDS_MTX: + state = "S (sleeping)"; + break; + case TDS_RUNQ: + case TDS_RUNNING: + state = "R (running)"; + break; + case TDS_NEW: + case TDS_UNQUEUED: + case TDS_IWAIT: + case TDS_SURPLUS: + default: + state = "? (unknown)"; + break; + } + break; + case PRS_WAIT: + state = "W (waiting)"; + break; + case PRS_ZOMBIE: + state = "Z (zombie)"; + break; + default: + state = "? (unknown)"; + break; + } + } mtx_unlock_spin(&sched_lock); PROC_LOCK(p); diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c index 7ef01b9..f60d62c 100644 --- a/sys/compat/svr4/svr4_misc.c +++ b/sys/compat/svr4/svr4_misc.c @@ -1168,7 +1168,7 @@ svr4_setinfo(p, st, s) if (p) { i.si_pid = p->p_pid; mtx_lock_spin(&sched_lock); - if (p->p_stat == SZOMB) { + if (p->p_state == PRS_ZOMBIE) { i.si_stime = p->p_ru->ru_stime.tv_sec; i.si_utime = p->p_ru->ru_utime.tv_sec; } @@ -1256,7 +1256,7 @@ loop: } nfound++; mtx_lock_spin(&sched_lock); - if (q->p_stat == SZOMB && + if ((q->p_state == PRS_ZOMBIE) && ((SCARG(uap, options) & (SVR4_WEXITED|SVR4_WTRAPPED)))) { mtx_unlock_spin(&sched_lock); PROC_UNLOCK(q); @@ -1372,7 +1372,8 @@ loop: nprocs--; return 0; } - if (q->p_stat == SSTOP && (q->p_flag & P_WAITED) == 0 && + /* XXXKSE this needs clarification */ + if (P_SHOULDSTOP(q) && ((q->p_flag & P_WAITED) == 0) && (q->p_flag & P_TRACED || (SCARG(uap, options) & (SVR4_WSTOPPED|SVR4_WCONTINUED)))) { mtx_unlock_spin(&sched_lock); diff --git a/sys/conf/files b/sys/conf/files index 1cff41f..9994c11 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -870,6 +870,7 @@ kern/kern_synch.c standard kern/kern_syscalls.c standard kern/kern_sysctl.c standard kern/kern_tc.c standard +kern/kern_thread.c standard kern/kern_time.c standard kern/kern_timeout.c standard kern/kern_uuid.c standard diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c index 9468f63..996e4eb 100644 --- a/sys/ddb/db_ps.c +++ b/sys/ddb/db_ps.c @@ -52,6 +52,7 @@ db_ps(dummy1, dummy2, dummy3, dummy4) int nl = 0; volatile struct proc *p, *pp; volatile struct thread *td; + char *state; np = nprocs; @@ -96,23 +97,44 @@ db_ps(dummy1, dummy2, dummy3, dummy4) if (pp == NULL) pp = p; - db_printf("%5d %8p %8p %4d %5d %5d %07x %d", + + switch(p->p_state) { + case PRS_NORMAL: + if (P_SHOULDSTOP(p)) + state = "stopped"; + else + state = "Normal"; + break; + case PRS_NEW: + state = "New"; + break; + case PRS_WAIT: + state = "Wait"; + break; + case PRS_ZOMBIE: + state = "Zombie"; + break; + default: + state = "Unknown"; + break; + } + db_printf("%5d %8p %8p %4d %5d %5d %07x %s", p->p_pid, (volatile void *)p, (void *)p->p_uarea, p->p_ucred ? p->p_ucred->cr_ruid : 0, pp->p_pid, - p->p_pgrp ? p->p_pgrp->pg_id : 0, p->p_flag, p->p_stat); + p->p_pgrp ? p->p_pgrp->pg_id : 0, p->p_flag, state); if (p->p_flag & P_KSES) { db_printf("(threaded) %s\n", p->p_comm); FOREACH_THREAD_IN_PROC(p, td) { db_printf( ". . . . . . . " - ". . . . . . . . "); + ". thread %p . . . ", td); if (td->td_wchan) { - db_printf("%6s %8p", td->td_wmesg, + db_printf("SLP %6s %8p\n", td->td_wmesg, (void *)td->td_wchan); - } else if (p->p_stat == SMTX) { - db_printf("%6s %8p", td->td_mtxname, + } else if (td->td_state == TDS_MTX) { + db_printf("MTX %6s %8p\n", td->td_mtxname, (void *)td->td_blocked); } else { - db_printf("--not blocked--"); + db_printf("--not blocked--\n"); } } } else { @@ -120,7 +142,7 @@ db_ps(dummy1, dummy2, dummy3, dummy4) if (td->td_wchan) { db_printf(" %6s %8p", td->td_wmesg, (void *)td->td_wchan); - } else if (p->p_stat == SMTX) { + } else if (td->td_state == TDS_MTX) { db_printf(" %6s %8p", td->td_mtxname, (void *)td->td_blocked); } else { diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c index 0f35370..15ed718 100644 --- a/sys/fs/procfs/procfs_ctl.c +++ b/sys/fs/procfs/procfs_ctl.c @@ -62,7 +62,7 @@ * relative to process (curp) */ #define TRACE_WAIT_P(curp, p) \ - ((p)->p_stat == SSTOP && \ + (P_SHOULDSTOP(p) && \ (p)->p_pptr == (curp) && \ ((p)->p_flag & P_TRACED)) @@ -262,6 +262,7 @@ out: */ case PROCFS_CTL_RUN: PROC_UNLOCK(p); + p->p_flag &= ~P_STOPPED_SGNL; /* this uses SIGSTOP */ break; /* @@ -272,27 +273,26 @@ out: case PROCFS_CTL_WAIT: if (p->p_flag & P_TRACED) { while (error == 0 && - (p->p_stat != SSTOP) && + (P_SHOULDSTOP(p)) && (p->p_flag & P_TRACED) && (p->p_pptr == td->td_proc)) error = msleep((caddr_t) p, &p->p_mtx, PWAIT|PCATCH, "procfsx", 0); if (error == 0 && !TRACE_WAIT_P(td->td_proc, p)) error = EBUSY; - } else - while (error == 0 && p->p_stat != SSTOP) + } else { + while (error == 0 && P_SHOULDSTOP(p)) error = msleep((caddr_t) p, &p->p_mtx, PWAIT|PCATCH, "procfs", 0); + } PROC_UNLOCK(p); return (error); - default: panic("procfs_control"); } mtx_lock_spin(&sched_lock); - if (p->p_stat == SSTOP) - setrunnable(FIRST_THREAD_IN_PROC(p)); /* XXXKSE */ + thread_unsuspend(p); /* If it can run, let it do so. */ mtx_unlock_spin(&sched_lock); return (0); } @@ -349,6 +349,7 @@ procfs_doprocctl(PFS_FILL_ARGS) #endif mtx_lock_spin(&sched_lock); /* XXXKSE: */ + p->p_flag &= ~P_STOPPED_SGNL; setrunnable(FIRST_THREAD_IN_PROC(p)); mtx_unlock_spin(&sched_lock); } else diff --git a/sys/fs/procfs/procfs_dbregs.c b/sys/fs/procfs/procfs_dbregs.c index 361f34b..442521c 100644 --- a/sys/fs/procfs/procfs_dbregs.c +++ b/sys/fs/procfs/procfs_dbregs.c @@ -90,7 +90,7 @@ procfs_doprocdbregs(PFS_FILL_ARGS) if (error == 0) error = uiomove(kv, kl, uio); if (error == 0 && uio->uio_rw == UIO_WRITE) { - if (p->p_stat != SSTOP) + if (!P_SHOULDSTOP(p)) /* XXXKSE should be P_TRACED? */ error = EBUSY; else /* XXXKSE: */ diff --git a/sys/fs/procfs/procfs_fpregs.c b/sys/fs/procfs/procfs_fpregs.c index afabb33..f1401f3 100644 --- a/sys/fs/procfs/procfs_fpregs.c +++ b/sys/fs/procfs/procfs_fpregs.c @@ -84,7 +84,7 @@ procfs_doprocfpregs(PFS_FILL_ARGS) if (error == 0) error = uiomove(kv, kl, uio); if (error == 0 && uio->uio_rw == UIO_WRITE) { - if (p->p_stat != SSTOP) + if (!P_SHOULDSTOP(p)) error = EBUSY; else /* XXXKSE: */ diff --git a/sys/fs/procfs/procfs_ioctl.c b/sys/fs/procfs/procfs_ioctl.c index 09aef86..9d49be9 100644 --- a/sys/fs/procfs/procfs_ioctl.c +++ b/sys/fs/procfs/procfs_ioctl.c @@ -94,9 +94,11 @@ procfs_ioctl(PFS_IOCTL_ARGS) #if 0 mtx_lock_spin(&sched_lock); p->p_step = 0; - if (p->p_stat == SSTOP) { + if (P_SHOULDSTOP(p)) { p->p_xstat = sig; - setrunnable(FIRST_THREAD_IN_PROC(p)); + p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SGNL); + FOREACH_THREAD_IN_PROC(p, td) + setrunnable(td); /* XXX Totally bogus */ mtx_unlock_spin(&sched_lock); } else { mtx_unlock_spin(&sched_lock); diff --git a/sys/fs/procfs/procfs_regs.c b/sys/fs/procfs/procfs_regs.c index 5fcb450..6cefe7e 100644 --- a/sys/fs/procfs/procfs_regs.c +++ b/sys/fs/procfs/procfs_regs.c @@ -86,7 +86,7 @@ procfs_doprocregs(PFS_FILL_ARGS) error = uiomove(kv, kl, uio); PROC_LOCK(p); if (error == 0 && uio->uio_rw == UIO_WRITE) { - if (p->p_stat != SSTOP) + if (!P_SHOULDSTOP(p)) error = EBUSY; else /* XXXKSE: */ diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index f3e9f04..dcc1880 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -79,10 +79,10 @@ ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); ASSYM(P_SFLAG, offsetof(struct proc, p_sflag)); -ASSYM(P_STAT, offsetof(struct proc, p_stat)); +ASSYM(P_STATE, offsetof(struct proc, p_state)); ASSYM(P_UAREA, offsetof(struct proc, p_uarea)); -/*ASSYM(TD_STAT, offsetof(struct thread, td__stat));*/ +ASSYM(TD_STATE, offsetof(struct thread, td_state)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_WCHAN, offsetof(struct thread, td_wchan)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); @@ -101,8 +101,9 @@ ASSYM(KE_FLAGS, offsetof(struct kse, ke_flags)); ASSYM(KEF_ASTPENDING, KEF_ASTPENDING); ASSYM(KEF_NEEDRESCHED, KEF_NEEDRESCHED); -ASSYM(SSLEEP, SSLEEP); -ASSYM(SRUN, SRUN); +ASSYM(TDS_SLP, TDS_SLP); +ASSYM(TDS_RUNQ, TDS_RUNQ); +ASSYM(TDS_RUNNING, TDS_RUNNING); ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall)); ASSYM(V_INTR, offsetof(struct vmmeter, v_intr)); diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 2f11ee2..c73c5e1 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -799,7 +799,7 @@ cpu_idle(void) { if (cpu_idle_hlt) { disable_intr(); - if (procrunnable()) { + if (kserunnable()) { enable_intr(); } else { /* diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index e2cebaf..9e35ad7 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -1100,7 +1100,12 @@ pmap_dispose_thread(td) ksobj = td->td_kstack_obj; ks = td->td_kstack; ptek = vtopte(ks); +#ifdef KSTACK_GUARD + ks -= PAGE_SIZE; + for (i = 1; i < (KSTACK_PAGES + 1); i++) { +#else for (i = 0; i < KSTACK_PAGES; i++) { +#endif m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_dispose_thread: kstack already missing?"); @@ -1116,16 +1121,17 @@ pmap_dispose_thread(td) #ifdef I386_CPU invltlb(); #endif - /* - * If the thread got swapped out some of its KSTACK might have gotten - * swapped. Just get rid of the object to clean up the swap use - * proactively. NOTE! might block waiting for paging I/O to complete. + * Free the space that this stack was mapped to in the kernel + * address map. */ - if (ksobj->type == OBJT_SWAP) { - td->td_kstack_obj = NULL; - vm_object_deallocate(ksobj); - } +#ifdef KSTACK_GUARD + kmem_free(kernel_map, ks, (KSTACK_PAGES + 1) * PAGE_SIZE); +#else + kmem_free(kernel_map, ks, KSTACK_PAGES * PAGE_SIZE); +#endif + vm_object_deallocate(ksobj); + td->td_kstack_obj = NULL; /* play it safe */ } /* diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index e0f9bcd..80db485 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -65,12 +65,19 @@ tlb_flush_count: .long 0 /* * cpu_throw() + * + * This is the second half of cpu_swtch(). It is used when the current + * thread is either a dummy or slated to die, and we no longer care + * about its state. */ ENTRY(cpu_throw) jmp sw1 /* * cpu_switch() + * + * Save the current thread state, then select the next thread to run + * and load its state. */ ENTRY(cpu_switch) @@ -166,11 +173,11 @@ sw1b: movl %eax,%ecx #ifdef INVARIANTS - movl TD_PROC(%ecx), %eax /* XXXKSE */ - cmpb $SRUN,P_STAT(%eax) + cmpb $TDS_RUNQ,TD_STATE(%ecx) jne badsw2 #endif + movl $TDS_RUNNING,TD_STATE(%ecx) movl TD_PCB(%ecx),%edx #if defined(SWTCH_OPTIM_STATS) @@ -310,12 +317,14 @@ cpu_switch_load_gs: #ifdef INVARIANTS badsw2: + pushal pushl $sw0_2 call panic sw0_2: .asciz "cpu_switch: not TDS_RUNQ" badsw3: + pushal pushl $sw0_3 call panic diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 08c75e4..8282416 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -54,6 +54,7 @@ #include <sys/bus.h> #include <sys/systm.h> #include <sys/proc.h> +#include <sys/kse.h> #include <sys/pioctl.h> #include <sys/kernel.h> #include <sys/ktr.h> @@ -267,6 +268,17 @@ trap(frame) if (td->td_ucred != p->p_ucred) cred_update_thread(td); + /* + * First check that we shouldn't just abort. + * But check if we are the single thread first! + */ + if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) { + mtx_lock_spin(&sched_lock); + PROC_LOCK(p); + thread_exit(); + /* NOTREACHED */ + } + switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ ucode = type; @@ -939,11 +951,30 @@ syscall(frame) mtx_unlock(&Giant); } #endif + KASSERT((td->td_kse != NULL), ("syscall: kse/thread UNLINKED")); + KASSERT((td->td_kse->ke_thread == td), ("syscall:kse/thread mismatch")); sticks = td->td_kse->ke_sticks; td->td_frame = &frame; if (td->td_ucred != p->p_ucred) cred_update_thread(td); + if (p->p_flag & P_KSES) { + /* + * If we are doing a syscall in a KSE environment, + * note where our mailbox is. There is always the + * possibility that we could do this lazily (in sleep()), + * but for now do it every time. + */ + td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox + + offsetof(struct kse_mailbox, kmbx_current_thread)); + if ((td->td_mailbox == NULL) || + (td->td_mailbox == (void *)-1)) { + td->td_mailbox = NULL; /* single thread it.. */ + td->td_flags &= ~TDF_UNBOUND; + } else { + td->td_flags |= TDF_UNBOUND; + } + } params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; orig_tf_eflags = frame.tf_eflags; @@ -1045,6 +1076,12 @@ syscall(frame) } /* + * Release Giant if we previously set it. + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) + mtx_unlock(&Giant); + + /* * Traced syscall. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { @@ -1057,12 +1094,6 @@ syscall(frame) */ userret(td, &frame, sticks); - /* - * Release Giant if we previously set it. - */ - if ((callp->sy_narg & SYF_MPSAFE) == 0) - mtx_unlock(&Giant); - #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 5dc2e14..04742c3 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -53,6 +53,7 @@ #include <sys/systm.h> #include <sys/malloc.h> #include <sys/proc.h> +#include <sys/kse.h> #include <sys/bio.h> #include <sys/buf.h> #include <sys/vnode.h> @@ -254,15 +255,26 @@ cpu_set_fork_handler(td, func, arg) } void -cpu_exit(td) - register struct thread *td; +cpu_exit(struct thread *td) +{ + struct mdproc *mdp; + + mdp = &td->td_proc->p_md; + if (mdp->md_ldt) + user_ldt_free(td); + reset_dbregs(); +} + +void +cpu_thread_exit(struct thread *td) { struct pcb *pcb = td->td_pcb; - struct mdproc *mdp = &td->td_proc->p_md; #ifdef DEV_NPX npxexit(td); #endif if (pcb->pcb_ext != 0) { + /* XXXKSE XXXSMP not SMP SAFE.. what locks do we have? */ + /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ /* * XXX do we need to move the TSS off the allocated pages * before freeing them? (not done here) @@ -271,8 +283,6 @@ cpu_exit(td) ctob(IOPAGES + 1)); pcb->pcb_ext = 0; } - if (mdp->md_ldt) - user_ldt_free(td); if (pcb->pcb_flags & PCB_DBREGS) { /* * disable all hardware breakpoints @@ -289,6 +299,146 @@ cpu_sched_exit(td) } void +cpu_thread_setup(struct thread *td) +{ + + td->td_pcb = + (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; +} + +struct md_store { + struct pcb mds_pcb; + struct trapframe mds_frame; +}; + +void +cpu_save_upcall(struct thread *td, struct kse *newkse) +{ + struct trapframe *tf; + + newkse->ke_mdstorage = malloc(sizeof(struct md_store), M_TEMP, + M_WAITOK); + /* Note: use of M_WAITOK means it won't fail. */ + /* set up shortcuts in MI section */ + newkse->ke_pcb = + &(((struct md_store *)(newkse->ke_mdstorage))->mds_pcb); + newkse->ke_frame = + &(((struct md_store *)(newkse->ke_mdstorage))->mds_frame); + tf = newkse->ke_frame; + + /* Copy the upcall pcb. Kernel mode & fp regs are here. */ + /* XXXKSE this may be un-needed */ + bcopy(td->td_pcb, newkse->ke_pcb, sizeof(struct pcb)); + + /* + * This initialises most of the user mode register values + * to good values. Eventually set them explicitly to know values + */ + bcopy(td->td_frame, newkse->ke_frame, sizeof(struct trapframe)); + tf->tf_edi = 0; + tf->tf_esi = 0; /* trampoline arg */ + tf->tf_ebp = 0; + tf->tf_esp = (int)newkse->ke_stackbase + newkse->ke_stacksize - 16; + tf->tf_ebx = 0; /* trampoline arg */ + tf->tf_eip = (int)newkse->ke_upcall; +} + +void +cpu_set_upcall(struct thread *td, void *pcb) +{ + struct pcb *pcb2; + + td->td_flags |= TDF_UPCALLING; + + /* Point the pcb to the top of the stack. */ + pcb2 = td->td_pcb; + + /* + * Copy the upcall pcb. This loads kernel regs. + * Those not loaded individually below get their default + * values here. + * + * XXXKSE It might be a good idea to simply skip this as + * the values of the other registers may be unimportant. + * This would remove any requirement for knowing the KSE + * at this time (see the matching comment below for + * more analysis) (need a good safe default). + */ + bcopy(pcb, pcb2, sizeof(*pcb2)); + + /* + * Create a new fresh stack for the new thread. + * The -16 is so we can expand the trapframe if we go to vm86. + * Don't forget to set this stack value into whatever supplies + * the address for the fault handlers. + * The contexts are filled in at the time we actually DO the + * upcall as only then do we know which KSE we got. + */ + td->td_frame = (struct trapframe *)((caddr_t)pcb2 - 16) - 1; + + /* + * Set registers for trampoline to user mode. Leave space for the + * return address on stack. These are the kernel mode register values. + */ + pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir); + pcb2->pcb_edi = 0; + pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ + pcb2->pcb_ebp = 0; + pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */ + pcb2->pcb_ebx = (int)td; /* trampoline arg */ + pcb2->pcb_eip = (int)fork_trampoline; + pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */ + /* + * If we didn't copy the pcb, we'd need to do the following registers: + * pcb2->pcb_dr*: cloned above. + * pcb2->pcb_savefpu: cloned above. + * pcb2->pcb_flags: cloned above. + * pcb2->pcb_onfault: cloned above (always NULL here?). + * pcb2->pcb_gs: cloned above. XXXKSE ??? + * pcb2->pcb_ext: cleared below. + */ + pcb2->pcb_ext = NULL; +} + +void +cpu_set_args(struct thread *td, struct kse *ke) +{ + suword((void *)(ke->ke_frame->tf_esp + sizeof(void *)), + (int)ke->ke_mailbox); +} + +void +cpu_free_kse_mdstorage(struct kse *kse) +{ + + free(kse->ke_mdstorage, M_TEMP); + kse->ke_mdstorage = NULL; + kse->ke_pcb = NULL; + kse->ke_frame = NULL; +} + +int +cpu_export_context(struct thread *td) +{ + struct trapframe *frame; + struct thread_mailbox *tm; + struct trapframe *uframe; + int error; + + frame = td->td_frame; + tm = td->td_mailbox; + uframe = &tm->ctx.tfrm.tf_tf; + error = copyout(frame, uframe, sizeof(*frame)); + /* + * "What about the fp regs?" I hear you ask.... XXXKSE + * Don't know where gs and "onstack" come from. + * May need to fiddle a few other values too. + */ + return (error); +} + +void cpu_wait(p) struct proc *p; { diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c index 245c96a..0819b67 100644 --- a/sys/i386/linux/linux_machdep.c +++ b/sys/i386/linux/linux_machdep.c @@ -361,7 +361,6 @@ linux_clone(struct thread *td, struct linux_clone_args *args) * Make this runnable after we are finished with it. */ mtx_lock_spin(&sched_lock); - p2->p_stat = SRUN; setrunqueue(FIRST_THREAD_IN_PROC(p2)); mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p2); diff --git a/sys/i386/linux/linux_ptrace.c b/sys/i386/linux/linux_ptrace.c index 536188b..a19dcc7 100644 --- a/sys/i386/linux/linux_ptrace.c +++ b/sys/i386/linux/linux_ptrace.c @@ -409,7 +409,7 @@ linux_ptrace(struct thread *td, struct linux_ptrace_args *uap) } /* not currently stopped */ - if (p->p_stat != SSTOP || (p->p_flag & P_WAITED) == 0) { + if ((p->p_flag & (P_TRACED|P_WAITED)) == 0) { error = EBUSY; goto fail; } diff --git a/sys/ia64/ia64/trap.c b/sys/ia64/ia64/trap.c index e38945f..4ffdb15 100644 --- a/sys/ia64/ia64/trap.c +++ b/sys/ia64/ia64/trap.c @@ -872,14 +872,14 @@ syscall(int code, u_int64_t *args, struct trapframe *framep) break; } - userret(td, framep, sticks); - /* * Release Giant if we had to get it. */ if ((callp->sy_narg & SYF_MPSAFE) == 0) mtx_unlock(&Giant); + userret(td, framep, sticks); + #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); @@ -1043,16 +1043,16 @@ ia32_syscall(struct trapframe *framep) } /* - * Handle reschedule and other end-of-syscall issues - */ - userret(td, framep, sticks); - - /* * Release Giant if we previously set it. */ if ((callp->sy_narg & SYF_MPSAFE) == 0) mtx_unlock(&Giant); + /* + * Handle reschedule and other end-of-syscall issues + */ + userret(td, framep, sticks); + #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index d5c5656..06cc8d8 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -289,6 +289,7 @@ proc0_init(void *dummy __unused) * Initialize thread, process and pgrp structures. */ procinit(); + threadinit(); /* * Initialize sleep queue hash table @@ -322,19 +323,34 @@ proc0_init(void *dummy __unused) p->p_sysent = &aout_sysvec; #endif + /* + * proc_linkup was already done in init_i386() or alphainit() etc. + * because the earlier code needed to follow td->td_proc. Otherwise + * I would have done it here.. maybe this means this should be + * done earlier too. + */ ke = &proc0.p_kse; /* XXXKSE */ kg = &proc0.p_ksegrp; /* XXXKSE */ p->p_flag = P_SYSTEM; p->p_sflag = PS_INMEM; - p->p_stat = SRUN; - p->p_ksegrp.kg_nice = NZERO; - kg->kg_pri_class = PRI_TIMESHARE; - kg->kg_user_pri = PUSER; - td->td_priority = PVM; - td->td_base_pri = PUSER; - + p->p_state = PRS_NORMAL; + td->td_state = TDS_RUNNING; + kg->kg_nice = NZERO; + kg->kg_pri_class = PRI_TIMESHARE; + kg->kg_user_pri = PUSER; + td->td_priority = PVM; + td->td_base_pri = PUSER; + td->td_kse = ke; /* XXXKSE */ + ke->ke_oncpu = 0; + ke->ke_state = KES_RUNNING; + ke->ke_thread = td; + /* proc_linkup puts it in the idle queue, that's not what we want. */ + TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); + kg->kg_idle_kses--; p->p_peers = 0; p->p_leader = p; +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!")); + bcopy("swapper", p->p_comm, sizeof ("swapper")); @@ -662,8 +678,7 @@ kick_init(const void *udata __unused) td = FIRST_THREAD_IN_PROC(initproc); mtx_lock_spin(&sched_lock); - initproc->p_stat = SRUN; - setrunqueue(FIRST_THREAD_IN_PROC(initproc)); /* XXXKSE */ + setrunqueue(td); /* XXXKSE */ mtx_unlock_spin(&sched_lock); } SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL) diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 425e3b7..cf8ba80 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -405,7 +405,7 @@ struct sysent sysent[] = { { 0, (sy_call_t *)kse_wakeup }, /* 380 = kse_wakeup */ { AS(kse_new_args), (sy_call_t *)kse_new }, /* 381 = kse_new */ { AS(thread_wakeup_args), (sy_call_t *)thread_wakeup }, /* 382 = thread_wakeup */ - { 0, (sy_call_t *)kse_yield }, /* 383 = kse_yield */ + { SYF_MPSAFE | 0, (sy_call_t *)kse_yield }, /* 383 = kse_yield */ { 0, (sy_call_t *)nosys }, /* 384 = __mac_get_proc */ { 0, (sy_call_t *)nosys }, /* 385 = __mac_set_proc */ { 0, (sy_call_t *)nosys }, /* 386 = __mac_get_fd */ diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c index 9d30d25..78585b2 100644 --- a/sys/kern/kern_condvar.c +++ b/sys/kern/kern_condvar.c @@ -48,7 +48,7 @@ */ #define CV_ASSERT(cvp, mp, td) do { \ KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ - KASSERT((td)->td_proc->p_stat == SRUN, ("%s: not SRUN", __func__)); \ + KASSERT((td)->td_state == TDS_RUNNING, ("%s: not TDS_RUNNING", __func__)); \ KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \ mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \ @@ -80,6 +80,7 @@ #endif static void cv_timedwait_end(void *arg); +static void cv_check_upcall(struct thread *td); /* * Initialize a condition variable. Must be called before use. @@ -109,14 +110,47 @@ cv_destroy(struct cv *cvp) */ /* + * Decide if we need to queue an upcall. + * This is copied from msleep(), perhaps this should be a common function. + */ +static void +cv_check_upcall(struct thread *td) +{ + + /* + * If we are capable of async syscalls and there isn't already + * another one ready to return, start a new thread + * and queue it as ready to run. Note that there is danger here + * because we need to make sure that we don't sleep allocating + * the thread (recursion here might be bad). + * Hence the TDF_INMSLEEP flag. + */ + if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox && + (td->td_flags & TDF_INMSLEEP) == 0) { + /* + * If we have no queued work to do, + * upcall to the UTS to see if it has more work. + * We don't need to upcall now, just queue it. + */ + if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) { + /* Don't recurse here! */ + td->td_flags |= TDF_INMSLEEP; + thread_schedule_upcall(td, td->td_kse); + td->td_flags &= ~TDF_INMSLEEP; + } + } +} + +/* * Switch context. */ static __inline void cv_switch(struct thread *td) { - td->td_proc->p_stat = SSLEEP; + td->td_state = TDS_SLP; td->td_proc->p_stats->p_ru.ru_nvcsw++; + cv_check_upcall(td); mi_switch(); CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td, td->td_proc->p_pid, td->td_proc->p_comm); @@ -135,7 +169,7 @@ cv_switch_catch(struct thread *td) * We put ourselves on the sleep queue and start our timeout before * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or * both) could occur while we were stopped. A SIGCONT would cause us to - * be marked as SSLEEP without resuming us, thus we must be ready for + * be marked as TDS_SLP without resuming us, thus we must be ready for * sleep when cursig is called. If the wakeup happens while we're * stopped, td->td_wchan will be 0 upon return from cursig. */ @@ -143,13 +177,15 @@ cv_switch_catch(struct thread *td) mtx_unlock_spin(&sched_lock); p = td->td_proc; PROC_LOCK(p); - sig = cursig(p); /* XXXKSE */ + sig = cursig(td); /* XXXKSE */ + if (thread_suspend_check(1)) + sig = SIGSTOP; mtx_lock_spin(&sched_lock); PROC_UNLOCK(p); if (sig != 0) { if (td->td_wchan != NULL) cv_waitq_remove(td); - td->td_proc->p_stat = SRUN; + td->td_state = TDS_RUNNING; /* XXXKSE */ } else if (td->td_wchan != NULL) { cv_switch(td); } @@ -175,7 +211,6 @@ cv_waitq_add(struct cv *cvp, struct thread *td) td->td_flags |= TDF_CVWAITQ; td->td_wchan = cvp; td->td_wmesg = cvp->cv_description; - td->td_kse->ke_slptime = 0; /* XXXKSE */ td->td_ksegrp->kg_slptime = 0; /* XXXKSE */ td->td_base_pri = td->td_priority; CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td, @@ -285,7 +320,7 @@ cv_wait_sig(struct cv *cvp, struct mtx *mp) PROC_LOCK(p); if (sig == 0) - sig = cursig(p); /* XXXKSE */ + sig = cursig(td); /* XXXKSE */ if (sig != 0) { if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) rval = EINTR; @@ -293,6 +328,8 @@ cv_wait_sig(struct cv *cvp, struct mtx *mp) rval = ERESTART; } PROC_UNLOCK(p); + if (p->p_flag & P_WEXIT) + rval = EINTR; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) @@ -363,6 +400,8 @@ cv_timedwait(struct cv *cvp, struct mtx *mp, int timo) mi_switch(); } + if (td->td_proc->p_flag & P_WEXIT) + rval = EWOULDBLOCK; mtx_unlock_spin(&sched_lock); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) @@ -436,12 +475,11 @@ cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) td->td_proc->p_stats->p_ru.ru_nivcsw++; mi_switch(); } - mtx_unlock_spin(&sched_lock); PROC_LOCK(p); if (sig == 0) - sig = cursig(p); + sig = cursig(td); if (sig != 0) { if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) rval = EINTR; @@ -450,6 +488,9 @@ cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) } PROC_UNLOCK(p); + if (p->p_flag & P_WEXIT) + rval = EINTR; + #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0); @@ -477,15 +518,13 @@ cv_wakeup(struct cv *cvp) TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); td->td_flags &= ~TDF_CVWAITQ; td->td_wchan = 0; - if (td->td_proc->p_stat == SSLEEP) { + if (td->td_state == TDS_SLP) { /* OPTIMIZED EXPANSION OF setrunnable(td); */ CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)", td, td->td_proc->p_pid, td->td_proc->p_comm); if (td->td_ksegrp->kg_slptime > 1) /* XXXKSE */ updatepri(td); - td->td_kse->ke_slptime = 0; td->td_ksegrp->kg_slptime = 0; - td->td_proc->p_stat = SRUN; if (td->td_proc->p_sflag & PS_INMEM) { setrunqueue(td); maybe_resched(td); @@ -568,7 +607,7 @@ cv_timedwait_end(void *arg) td->td_flags &= ~TDF_TIMEOUT; setrunqueue(td); } else if (td->td_wchan != NULL) { - if (td->td_proc->p_stat == SSLEEP) /* XXXKSE */ + if (td->td_state == TDS_SLP) /* XXXKSE */ setrunnable(td); else cv_waitq_remove(td); @@ -577,3 +616,27 @@ cv_timedwait_end(void *arg) td->td_flags |= TDF_TIMOFAIL; mtx_unlock_spin(&sched_lock); } + +/* + * For now only abort interruptable waits. + * The others will have to either complete on their own or have a timeout. + */ +void +cv_abort(struct thread *td) +{ + + CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td, + td->td_proc->p_pid, + td->td_proc->p_comm); + mtx_lock_spin(&sched_lock); + if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) { + if (td->td_wchan != NULL) { + if (td->td_state == TDS_SLP) + setrunnable(td); + else + cv_waitq_remove(td); + } + } + mtx_unlock_spin(&sched_lock); +} + diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index feaa123..0cd7f27 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -154,12 +154,14 @@ execve(td, uap) PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); + if ((p->p_flag & P_KSES) && thread_single(SNGLE_EXIT)) { + PROC_UNLOCK(p); + mtx_unlock(&Giant); + return (ERESTART); /* Try again later. */ + } + /* If we get here all other threads are dead. */ p->p_flag |= P_INEXEC; PROC_UNLOCK(p); - -/* XXXKSE */ -/* !!!!!!!! we need abort all the other threads of this process before we */ -/* proceed beyond his point! */ /* * Initialize part of the common data diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 63a5135..fea5438 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -145,6 +145,67 @@ exit1(td, rv) /* * XXXXKSE: MUST abort all other threads before proceeding past here. */ + PROC_LOCK(p); + if (p->p_flag & P_KSES) { + /* + * First check if some other thread got here before us.. + * if so, act apropriatly, (exit or suspend); + */ + thread_suspend_check(0); + /* + * Here is a trick.. + * We need to free up our KSE to process other threads + * so that we can safely set the UNBOUND flag + * (whether or not we have a mailbox) as we are NEVER + * going to return to the user. + * The flag will not be set yet if we are exiting + * because of a signal, pagefault, or similar + * (or even an exit(2) from the UTS). + */ + td->td_flags |= TDF_UNBOUND; + + /* + * Kill off the other threads. This requires + * Some co-operation from other parts of the kernel + * so it may not be instant. + * With this state set: + * Any thread entering the kernel from userspace will + * thread_exit() in trap(). Any thread attempting to + * sleep will return immediatly + * with EINTR or EWOULDBLOCK, which will hopefully force them + * to back out to userland, freeing resources as they go, and + * anything attempting to return to userland will thread_exit() + * from userret(). thread_exit() will unsuspend us + * when the last other thread exits. + */ + if (thread_single(SNGLE_EXIT)) { + panic ("Exit: Single threading fouled up"); + } + /* + * All other activity in this process is now stopped. + * Remove excess KSEs and KSEGRPS. XXXKSE (when we have them) + * ... + * Turn off threading support. + */ + p->p_flag &= ~P_KSES; + td->td_flags &= ~TDF_UNBOUND; + thread_single_end(); /* Don't need this any more. */ + } + /* + * With this state set: + * Any thread entering the kernel from userspace will thread_exit() + * in trap(). Any thread attempting to sleep will return immediatly + * with EINTR or EWOULDBLOCK, which will hopefully force them + * to back out to userland, freeing resources as they go, and + * anything attempting to return to userland will thread_exit() + * from userret(). thread_exit() will do a wakeup on p->p_numthreads + * if it transitions to 1. + */ + + p->p_flag |= P_WEXIT; + PROC_UNLOCK(p); + if (td->td_kse->ke_mdstorage) + cpu_free_kse_mdstorage(td->td_kse); /* Are we a task leader? */ PROC_LOCK(p); @@ -185,7 +246,6 @@ exit1(td, rv) */ PROC_LOCK(p); p->p_flag &= ~(P_TRACED | P_PPWAIT); - p->p_flag |= P_WEXIT; SIGEMPTYSET(p->p_siglist); PROC_UNLOCK(p); if (timevalisset(&p->p_realtimer.it_value)) @@ -434,22 +494,24 @@ exit1(td, rv) /* * We have to wait until after releasing all locks before - * changing p_stat. If we block on a mutex then we will be + * changing p_state. If we block on a mutex then we will be * back at SRUN when we resume and our parent will never * harvest us. */ - p->p_stat = SZOMB; + p->p_state = PRS_ZOMBIE; wakeup(p->p_pptr); PROC_UNLOCK(p->p_pptr); - PROC_UNLOCK(p); - cnt.v_swtch++; binuptime(PCPU_PTR(switchtime)); PCPU_SET(switchticks, ticks); - cpu_sched_exit(td); - cpu_throw(); + cpu_sched_exit(td); /* XXXKSE check if this should be in thread_exit */ + /* + * Make sure this thread is discarded from the zombie. + * This will also release this thread's reference to the ucred. + */ + thread_exit(); panic("exit1"); } @@ -504,6 +566,8 @@ wait1(td, uap, compat) register int nfound; register struct proc *p, *q, *t; int status, error; + struct kse *ke; + struct ksegrp *kg; q = td->td_proc; if (uap->pid == 0) { @@ -540,7 +604,7 @@ loop: } nfound++; - if (p->p_stat == SZOMB) { + if (p->p_state == PRS_ZOMBIE) { /* * charge childs scheduling cpu usage to parent * XXXKSE assume only one thread & kse & ksegrp @@ -656,6 +720,21 @@ loop: } /* + * There should only be one KSE/KSEGRP but + * do it right anyhow. + */ + FOREACH_KSEGRP_IN_PROC(p, kg) { + FOREACH_KSE_IN_GROUP(kg, ke) { + /* Free the KSE spare thread. */ + if (ke->ke_tdspare != NULL) { + thread_free(ke->ke_tdspare); + p->p_kse.ke_tdspare = NULL; + } + } + } + thread_reap(); /* check for zombie threads */ + + /* * Give vm and machine-dependent layer a chance * to free anything that cpu_exit couldn't * release while still running in process context. @@ -669,7 +748,7 @@ loop: mtx_unlock(&Giant); return (0); } - if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 && + if (P_SHOULDSTOP(p) && ((p->p_flag & P_WAITED) == 0) && (p->p_flag & P_TRACED || uap->options & WUNTRACED)) { p->p_flag |= P_WAITED; sx_xunlock(&proctree_lock); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 016653b..eac0267 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -212,23 +212,6 @@ sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); -#if 0 -void -kse_init(struct kse *kse1, struct kse *kse2) -{ -} - -void -thread_init(struct thread *thread1, struct thread *thread2) -{ -} - -void -ksegrp_init(struct ksegrp *ksegrp1, struct ksegrp *ksegrp2) -{ -} -#endif - int fork1(td, flags, procp) struct thread *td; /* parent proc */ @@ -296,6 +279,29 @@ fork1(td, flags, procp) return (0); } + if (p1->p_flag & P_KSES) { + /* + * Idle the other threads for a second. + * Since the user space is copied, it must remain stable. + * In addition, all threads (from the user perspective) + * need to either be suspended or in the kernel, + * where they will try restart in the parent and will + * be aborted in the child. + */ + PROC_LOCK(p1); + if (thread_single(SNGLE_NO_EXIT)) { + /* Abort.. someone else is single threading before us */ + PROC_UNLOCK(p1); + return (ERESTART); + } + PROC_UNLOCK(p1); + /* + * All other activity in this process + * is now suspended at the user boundary, + * (or other safe places if we think of any). + */ + } + /* Allocate new proc. */ newproc = uma_zalloc(proc_zone, M_WAITOK); @@ -311,6 +317,11 @@ fork1(td, flags, procp) if ((nprocs >= maxproc - 10 && uid != 0) || nprocs >= maxproc) { sx_xunlock(&allproc_lock); uma_zfree(proc_zone, newproc); + if (p1->p_flag & P_KSES) { + PROC_LOCK(p1); + thread_single_end(); + PROC_UNLOCK(p1); + } tsleep(&forksleep, PUSER, "fork", hz / 2); return (EAGAIN); } @@ -325,6 +336,11 @@ fork1(td, flags, procp) if (!ok) { sx_xunlock(&allproc_lock); uma_zfree(proc_zone, newproc); + if (p1->p_flag & P_KSES) { + PROC_LOCK(p1); + thread_single_end(); + PROC_UNLOCK(p1); + } tsleep(&forksleep, PUSER, "fork", hz / 2); return (EAGAIN); } @@ -411,7 +427,7 @@ again: lastpid = trypid; p2 = newproc; - p2->p_stat = SIDL; /* protect against others */ + p2->p_state = PRS_NEW; /* protect against others */ p2->p_pid = trypid; LIST_INSERT_HEAD(&allproc, p2, p_list); LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); @@ -449,7 +465,7 @@ again: * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ - td2 = thread_get(p2); + td2 = thread_alloc(); ke2 = &p2->p_kse; kg2 = &p2->p_ksegrp; @@ -459,8 +475,10 @@ again: (unsigned) RANGEOF(struct proc, p_startzero, p_endzero)); bzero(&ke2->ke_startzero, (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero)); +#if 0 /* bzero'd by the thread allocator */ bzero(&td2->td_startzero, (unsigned) RANGEOF(struct thread, td_startzero, td_endzero)); +#endif bzero(&kg2->kg_startzero, (unsigned) RANGEOF(struct ksegrp, kg_startzero, kg_endzero)); @@ -482,9 +500,22 @@ again: * XXXKSE Theoretically only the running thread would get copied * Others in the kernel would be 'aborted' in the child. * i.e return E*something* + * On SMP we would have to stop them running on + * other CPUs! (set a flag in the proc that stops + * all returns to userland until completed) + * This is wrong but ok for 1:1. */ proc_linkup(p2, kg2, ke2, td2); + /* Set up the thread as an active thread (as if runnable). */ + TAILQ_REMOVE(&kg2->kg_iq, ke2, ke_kgrlist); + kg2->kg_idle_kses--; + ke2->ke_state = KES_UNQUEUED; + ke2->ke_thread = td2; + td2->td_kse = ke2; + td2->td_flags &= ~TDF_UNBOUND; /* For the rest of this syscall. */ +KASSERT((ke2->ke_kgrlist.tqe_next != ke2), ("linked to self!")); + /* note.. XXXKSE no pcb or u-area yet */ /* @@ -699,7 +730,6 @@ again: p2->p_acflag = AFORK; if ((flags & RFSTOPPED) == 0) { mtx_lock_spin(&sched_lock); - p2->p_stat = SRUN; setrunqueue(td2); mtx_unlock_spin(&sched_lock); } @@ -803,6 +833,9 @@ fork_exit(callout, arg, frame) struct proc *p = td->td_proc; td->td_kse->ke_oncpu = PCPU_GET(cpuid); + p->p_state = PRS_NORMAL; + td->td_state = TDS_RUNNING; /* Already done in switch() on 386. */ + td->td_kse->ke_state = KES_RUNNING; /* * Finish setting up thread glue. We need to initialize * the thread into a td_critnest=1 state. Some platforms @@ -814,7 +847,7 @@ fork_exit(callout, arg, frame) sched_lock.mtx_lock = (uintptr_t)td; sched_lock.mtx_recurse = 0; cpu_critical_fork_exit(); - CTR3(KTR_PROC, "fork_exit: new proc %p (pid %d, %s)", p, p->p_pid, + CTR3(KTR_PROC, "fork_exit: new thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); if (PCPU_GET(switchtime.sec) == 0) binuptime(PCPU_PTR(switchtime)); diff --git a/sys/kern/kern_idle.c b/sys/kern/kern_idle.c index 29194b7..306f2a5 100644 --- a/sys/kern/kern_idle.c +++ b/sys/kern/kern_idle.c @@ -40,6 +40,7 @@ idle_setup(void *dummy) struct pcpu *pc; #endif struct proc *p; + struct thread *td; int error; #ifdef SMP @@ -60,7 +61,10 @@ idle_setup(void *dummy) panic("idle_setup: kthread_create error %d\n", error); p->p_flag |= P_NOLOAD; - p->p_stat = SRUN; + td = FIRST_THREAD_IN_PROC(p); + td->td_state = TDS_RUNQ; + td->td_kse->ke_state = KES_ONRUNQ; + td->td_kse->ke_flags |= KEF_IDLEKSE; #ifdef SMP } #endif @@ -75,16 +79,22 @@ idle_proc(void *dummy) #ifdef DIAGNOSTIC int count; #endif + struct thread *td; + struct proc *p; + td = curthread; + p = td->td_proc; + td->td_state = TDS_RUNNING; + td->td_kse->ke_state = KES_RUNNING; for (;;) { mtx_assert(&Giant, MA_NOTOWNED); #ifdef DIAGNOSTIC count = 0; - while (count >= 0 && procrunnable() == 0) { + while (count >= 0 && kserunnable() == 0) { #else - while (procrunnable() == 0) { + while (kserunnable() == 0) { #endif /* * This is a good place to put things to be done in @@ -103,8 +113,9 @@ idle_proc(void *dummy) } mtx_lock_spin(&sched_lock); - curproc->p_stats->p_ru.ru_nvcsw++; + p->p_stats->p_ru.ru_nvcsw++; mi_switch(); + td->td_kse->ke_state = KES_RUNNING; mtx_unlock_spin(&sched_lock); } } diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index d65dc82..fb9c092 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -201,7 +201,7 @@ ithread_create(struct ithd **ithread, int vector, int flags, td = FIRST_THREAD_IN_PROC(p); /* XXXKSE */ td->td_ksegrp->kg_pri_class = PRI_ITHD; td->td_priority = PRI_MAX_ITHD; - p->p_stat = SWAIT; + td->td_state = TDS_IWAIT; ithd->it_td = td; td->td_ithd = ithd; if (ithread != NULL) @@ -229,8 +229,7 @@ ithread_destroy(struct ithd *ithread) } ithread->it_flags |= IT_DEAD; mtx_lock_spin(&sched_lock); - if (p->p_stat == SWAIT) { - p->p_stat = SRUN; /* XXXKSE */ + if (td->td_state == TDS_IWAIT) { setrunqueue(td); } mtx_unlock_spin(&sched_lock); @@ -327,7 +326,7 @@ ok: * handler as being dead and let the ithread do the actual removal. */ mtx_lock_spin(&sched_lock); - if (ithread->it_td->td_proc->p_stat != SWAIT) { + if (ithread->it_td->td_state != TDS_IWAIT) { handler->ih_flags |= IH_DEAD; /* @@ -374,8 +373,8 @@ ithread_schedule(struct ithd *ithread, int do_switch) td = ithread->it_td; p = td->td_proc; KASSERT(p != NULL, ("ithread %s has no process", ithread->it_name)); - CTR4(KTR_INTR, "%s: pid %d: (%s) need = %d", __func__, p->p_pid, p->p_comm, - ithread->it_need); + CTR4(KTR_INTR, "%s: pid %d: (%s) need = %d", + __func__, p->p_pid, p->p_comm, ithread->it_need); /* * Set it_need to tell the thread to keep running if it is already @@ -387,14 +386,16 @@ ithread_schedule(struct ithd *ithread, int do_switch) */ ithread->it_need = 1; mtx_lock_spin(&sched_lock); - if (p->p_stat == SWAIT) { + if (td->td_state == TDS_IWAIT) { CTR2(KTR_INTR, "%s: setrunqueue %d", __func__, p->p_pid); - p->p_stat = SRUN; - setrunqueue(td); /* XXXKSE */ - if (do_switch && curthread->td_critnest == 1 && - curthread->td_proc->p_stat == SRUN) { + setrunqueue(td); + if (do_switch && + (curthread->td_critnest == 1)/* && + (curthread->td_state == TDS_RUNNING) XXXKSE*/) { +#if 0 /* not needed in KSE */ if (curthread != PCPU_GET(idlethread)) setrunqueue(curthread); +#endif curthread->td_proc->p_stats->p_ru.ru_nivcsw++; mi_switch(); } else { @@ -402,7 +403,7 @@ ithread_schedule(struct ithd *ithread, int do_switch) } } else { CTR4(KTR_INTR, "%s: pid %d: it_need %d, state %d", - __func__, p->p_pid, ithread->it_need, p->p_stat); + __func__, p->p_pid, ithread->it_need, p->p_state); } mtx_unlock_spin(&sched_lock); @@ -550,7 +551,7 @@ restart: */ if (ithd->it_enable != NULL) ithd->it_enable(ithd->it_vector); - p->p_stat = SWAIT; /* we're idle */ + td->td_state = TDS_IWAIT; /* we're idle */ p->p_stats->p_ru.ru_nvcsw++; CTR2(KTR_INTR, "%s: pid %d: done", __func__, p->p_pid); mi_switch(); diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index a456a86..e8e2fea 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -109,8 +109,7 @@ kthread_create(void (*func)(void *), void *arg, mtx_lock_spin(&sched_lock); p2->p_sflag |= PS_INMEM; if (!(flags & RFSTOPPED)) { - p2->p_stat = SRUN; - setrunqueue(FIRST_THREAD_IN_PROC(p2)); /* XXXKSE */ + setrunqueue(FIRST_THREAD_IN_PROC(p2)); } mtx_unlock_spin(&sched_lock); diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 08bca8d..c2e79d0 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -119,23 +119,20 @@ propagate_priority(struct thread *td) return; } + KASSERT(td->td_state != TDS_SURPLUS, ("Mutex owner SURPLUS")); + MPASS(td->td_proc != NULL); MPASS(td->td_proc->p_magic == P_MAGIC); - KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex")); + KASSERT(td->td_state != TDS_SLP, + ("sleeping thread owns a mutex")); if (td->td_priority <= pri) /* lower is higher priority */ return; - /* - * Bump this thread's priority. - */ - td->td_priority = pri; /* * If lock holder is actually running, just bump priority. */ - if (thread_running(td)) { - MPASS(td->td_proc->p_stat == SRUN - || td->td_proc->p_stat == SZOMB - || td->td_proc->p_stat == SSTOP); + if (td->td_state == TDS_RUNNING) { + td->td_priority = pri; return; } @@ -151,20 +148,26 @@ propagate_priority(struct thread *td) * If on run queue move to new run queue, and quit. * XXXKSE this gets a lot more complicated under threads * but try anyhow. + * We should have a special call to do this more efficiently. */ - if (td->td_proc->p_stat == SRUN) { + if (td->td_state == TDS_RUNQ) { MPASS(td->td_blocked == NULL); remrunqueue(td); + td->td_priority = pri; setrunqueue(td); return; } + /* + * Adjust for any other cases. + */ + td->td_priority = pri; /* * If we aren't blocked on a mutex, we should be. */ - KASSERT(td->td_proc->p_stat == SMTX, ( + KASSERT(td->td_state == TDS_MTX, ( "process %d(%s):%d holds %s but isn't blocked on a mutex\n", - td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat, + td->td_proc->p_pid, td->td_proc->p_comm, td->td_state, m->mtx_object.lo_name)); /* @@ -590,7 +593,7 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line) */ td->td_blocked = m; td->td_mtxname = m->mtx_object.lo_name; - td->td_proc->p_stat = SMTX; + td->td_state = TDS_MTX; propagate_priority(td); if (LOCK_LOG_TEST(&m->mtx_object, opts)) @@ -727,7 +730,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) m, td1); td1->td_blocked = NULL; - td1->td_proc->p_stat = SRUN; setrunqueue(td1); if (td->td_critnest == 1 && td1->td_priority < pri) { @@ -744,7 +746,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) } } #endif - setrunqueue(td); if (LOCK_LOG_TEST(&m->mtx_object, opts)) CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p switching out lock=%p", m, diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c index a197bc0..9dd6924 100644 --- a/sys/kern/kern_poll.c +++ b/sys/kern/kern_poll.c @@ -503,7 +503,6 @@ poll_idle(void) mtx_unlock(&Giant); mtx_assert(&Giant, MA_NOTOWNED); mtx_lock_spin(&sched_lock); - setrunqueue(td); td->td_proc->p_stats->p_ru.ru_nvcsw++; mi_switch(); mtx_unlock_spin(&sched_lock); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index a5378d9..8b15fc2 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -44,6 +44,7 @@ #include <sys/mutex.h> #include <sys/proc.h> #include <sys/sysproto.h> +#include <sys/kse.h> #include <sys/sysctl.h> #include <sys/filedesc.h> #include <sys/tty.h> @@ -111,44 +112,28 @@ procinit() uihashinit(); } -/* - * Note that we do not link to the proc's ucred here - * The thread is linked as if running but no KSE assigned - */ -static void -thread_link(struct thread *td, struct ksegrp *kg) -{ - struct proc *p = kg->kg_proc; - - td->td_proc = p; - td->td_ksegrp = kg; - td->td_last_kse = &p->p_kse; - - TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); - TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); - td->td_critnest = 0; - td->td_kse = NULL; - cpu_thread_link(td); -} - /* * KSE is linked onto the idle queue. */ -static void +void kse_link(struct kse *ke, struct ksegrp *kg) { struct proc *p = kg->kg_proc; +KASSERT((ke->ke_state != KES_ONRUNQ), ("linking suspect kse on run queue")); TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); kg->kg_kses++; +KASSERT((ke->ke_state != KES_IDLE), ("already on idle queue")); + ke->ke_state = KES_IDLE; TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist); + kg->kg_idle_kses++; ke->ke_proc = p; ke->ke_ksegrp = kg; ke->ke_thread = NULL; ke->ke_oncpu = NOCPU; } -static void +void ksegrp_link(struct ksegrp *kg, struct proc *p) { @@ -159,10 +144,13 @@ ksegrp_link(struct ksegrp *kg, struct proc *p) TAILQ_INIT(&kg->kg_iq); /* all kses in ksegrp */ kg->kg_proc = p; /* the following counters are in the -zero- section and may not need clearing */ + kg->kg_numthreads = 0; kg->kg_runnable = 0; kg->kg_kses = 0; + kg->kg_idle_kses = 0; kg->kg_runq_kses = 0; /* XXXKSE change name */ /* link it in now that it's consitant */ + p->p_numksegrps++; TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); } @@ -177,30 +165,13 @@ proc_linkup(struct proc *p, struct ksegrp *kg, TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ TAILQ_INIT(&p->p_threads); /* all threads in proc */ + TAILQ_INIT(&p->p_suspended); /* Threads suspended */ ksegrp_link(kg, p); kse_link(ke, kg); thread_link(td, kg); - /* link them together for 1:1 */ - td->td_kse = ke; - ke->ke_thread = td; } -/* temporary version is ultra simple while we are in 1:1 mode */ -struct thread * -thread_get(struct proc *p) -{ - struct thread *td = &p->p_xxthread; - - return (td); -} - - -/********************* -* STUB KSE syscalls -*********************/ - -/* struct thread_wakeup_args { struct thread_mailbox *tmbx; }; */ int thread_wakeup(struct thread *td, struct thread_wakeup_args *uap) { @@ -219,7 +190,11 @@ int kse_yield(struct thread *td, struct kse_yield_args *uap) { - return(ENOSYS); + PROC_LOCK(td->td_proc); + mtx_lock_spin(&sched_lock); + thread_exit(); + /* NOTREACHED */ + return(0); } int kse_wakeup(struct thread *td, struct kse_wakeup_args *uap) @@ -228,16 +203,80 @@ int kse_wakeup(struct thread *td, struct kse_wakeup_args *uap) return(ENOSYS); } - -int -kse_new(struct thread *td, struct kse_new_args *uap) +/* + * No new KSEG: first call: use current KSE, don't schedule an upcall + * All other situations, do alloate a new KSE and schedule an upcall on it. + */ /* struct kse_new_args { struct kse_mailbox *mbx; int new_grp_flag; }; */ +int +kse_new(struct thread *td, struct kse_new_args *uap) { + struct kse *newkse; + struct proc *p; + struct kse_mailbox mbx; + int err; - return (ENOSYS); + p = td->td_proc; + if ((err = copyin(uap->mbx, &mbx, sizeof(mbx)))) + return (err); + PROC_LOCK(p); + /* + * If we have no KSE mode set, just set it, and skip KSE and KSEGRP + * creation. You cannot request a new group with the first one as + * you are effectively getting one. Instead, go directly to saving + * the upcall info. + */ + if ((td->td_proc->p_flag & P_KSES) || (uap->new_grp_flag)) { + + return (EINVAL); /* XXX */ + /* + * If newgroup then create the new group. + * Check we have the resources for this. + */ + /* Copy lots of fields from the current KSEGRP. */ + /* Create the new KSE */ + /* Copy lots of fields from the current KSE. */ + } else { + /* + * We are switching to KSEs so just + * use the preallocated ones for this call. + * XXXKSE if we have to initialise any fields for KSE + * mode operation, do it here. + */ + newkse = td->td_kse; + } + /* + * Fill out the KSE-mode specific fields of the new kse. + */ + PROC_UNLOCK(p); + mtx_lock_spin(&sched_lock); + mi_switch(); /* Save current registers to PCB. */ + mtx_unlock_spin(&sched_lock); + newkse->ke_upcall = mbx.kmbx_upcall; + newkse->ke_stackbase = mbx.kmbx_stackbase; + newkse->ke_stacksize = mbx.kmbx_stacksize; + newkse->ke_mailbox = uap->mbx; + cpu_save_upcall(td, newkse); + /* Note that we are the returning syscall */ + td->td_retval[0] = 0; + td->td_retval[1] = 0; + + if ((td->td_proc->p_flag & P_KSES) || (uap->new_grp_flag)) { + thread_schedule_upcall(td, newkse); + } else { + /* + * Don't set this until we are truely ready, because + * things will start acting differently. Return to the + * calling code for the first time. Assuming we set up + * the mailboxes right, all syscalls after this will be + * asynchronous. + */ + td->td_proc->p_flag |= P_KSES; + } + return (0); } /* @@ -554,7 +593,7 @@ fixjobc(p, pgrp, entering) LIST_FOREACH(p, &p->p_children, p_sibling) { if ((hispgrp = p->p_pgrp) != pgrp && hispgrp->pg_session == mysession && - p->p_stat != SZOMB) { + p->p_state != PRS_ZOMBIE) { PGRP_LOCK(hispgrp); if (entering) hispgrp->pg_jobc++; @@ -583,7 +622,7 @@ orphanpg(pg) mtx_lock_spin(&sched_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { - if (p->p_stat == SSTOP) { + if (P_SHOULDSTOP(p)) { mtx_unlock_spin(&sched_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); @@ -674,7 +713,9 @@ fill_kinfo_proc(p, kp) kp->ki_sigcatch = p->p_procsig->ps_sigcatch; } mtx_lock_spin(&sched_lock); - if (p->p_stat != SIDL && p->p_stat != SZOMB && p->p_vmspace != NULL) { + if (p->p_state != PRS_NEW && + p->p_state != PRS_ZOMBIE && + p->p_vmspace != NULL) { struct vmspace *vm = p->p_vmspace; kp->ki_size = vm->vm_map.size; @@ -697,35 +738,65 @@ fill_kinfo_proc(p, kp) p->p_stats->p_cru.ru_stime.tv_usec; } td = FIRST_THREAD_IN_PROC(p); - if (td->td_wmesg != NULL) - strncpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg) - 1); - if (p->p_stat == SMTX) { - kp->ki_kiflag |= KI_MTXBLOCK; - strncpy(kp->ki_mtxname, td->td_mtxname, - sizeof(kp->ki_mtxname) - 1); + if (!(p->p_flag & P_KSES)) { + if (td->td_wmesg != NULL) { + strncpy(kp->ki_wmesg, td->td_wmesg, + sizeof(kp->ki_wmesg) - 1); + } + if (td->td_state == TDS_MTX) { + kp->ki_kiflag |= KI_MTXBLOCK; + strncpy(kp->ki_mtxname, td->td_mtxname, + sizeof(kp->ki_mtxname) - 1); + } } - kp->ki_stat = p->p_stat; + + if (p->p_state == PRS_NORMAL) { /* XXXKSE very aproximate */ + if ((td->td_state == TDS_RUNQ) || + (td->td_state == TDS_RUNNING)) { + kp->ki_stat = SRUN; + } else if (td->td_state == TDS_SLP) { + kp->ki_stat = SSLEEP; + } else if (P_SHOULDSTOP(p)) { + kp->ki_stat = SSTOP; + } else if (td->td_state == TDS_MTX) { + kp->ki_stat = SMTX; + } else { + kp->ki_stat = SWAIT; + } + } else if (p->p_state == PRS_ZOMBIE) { + kp->ki_stat = SZOMB; + } else { + kp->ki_stat = SIDL; + } + kp->ki_sflag = p->p_sflag; kp->ki_swtime = p->p_swtime; kp->ki_pid = p->p_pid; /* vvv XXXKSE */ - bintime2timeval(&p->p_runtime, &tv); - kp->ki_runtime = tv.tv_sec * (u_int64_t)1000000 + tv.tv_usec; - kp->ki_pctcpu = p->p_kse.ke_pctcpu; - kp->ki_estcpu = td->td_ksegrp->kg_estcpu; - kp->ki_slptime = td->td_ksegrp->kg_slptime; - kp->ki_wchan = td->td_wchan; - kp->ki_pri.pri_level = td->td_priority; - kp->ki_pri.pri_user = td->td_ksegrp->kg_user_pri; - kp->ki_pri.pri_class = td->td_ksegrp->kg_pri_class; - kp->ki_pri.pri_native = td->td_base_pri; - kp->ki_nice = td->td_ksegrp->kg_nice; - kp->ki_rqindex = p->p_kse.ke_rqindex; - kp->ki_oncpu = p->p_kse.ke_oncpu; - kp->ki_lastcpu = td->td_lastcpu; - kp->ki_tdflags = td->td_flags; - kp->ki_pcb = td->td_pcb; - kp->ki_kstack = (void *)td->td_kstack; + if (!(p->p_flag & P_KSES)) { + bintime2timeval(&p->p_runtime, &tv); + kp->ki_runtime = tv.tv_sec * (u_int64_t)1000000 + tv.tv_usec; + kp->ki_pctcpu = p->p_kse.ke_pctcpu; + kp->ki_estcpu = p->p_ksegrp.kg_estcpu; + kp->ki_slptime = p->p_ksegrp.kg_slptime; + kp->ki_wchan = td->td_wchan; + kp->ki_pri.pri_level = td->td_priority; + kp->ki_pri.pri_user = p->p_ksegrp.kg_user_pri; + kp->ki_pri.pri_class = p->p_ksegrp.kg_pri_class; + kp->ki_pri.pri_native = td->td_base_pri; + kp->ki_nice = p->p_ksegrp.kg_nice; + kp->ki_rqindex = p->p_kse.ke_rqindex; + kp->ki_oncpu = p->p_kse.ke_oncpu; + kp->ki_lastcpu = td->td_lastcpu; + kp->ki_tdflags = td->td_flags; + kp->ki_pcb = td->td_pcb; + kp->ki_kstack = (void *)td->td_kstack; + } else { + kp->ki_oncpu = -1; + kp->ki_lastcpu = -1; + kp->ki_tdflags = -1; + /* All the reast are 0 */ + } /* ^^^ XXXKSE */ mtx_unlock_spin(&sched_lock); sp = NULL; @@ -878,7 +949,7 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) /* * Skip embryonic processes. */ - if (p->p_stat == SIDL) { + if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index d2cb69d..0803cff 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -281,7 +281,6 @@ boot(int howto) DROP_GIANT(); for (subiter = 0; subiter < 50 * iter; subiter++) { mtx_lock_spin(&sched_lock); - setrunqueue(curthread); curthread->td_proc->p_stats->p_ru.ru_nvcsw++; mi_switch(); /* Allow interrupt threads to run */ mtx_unlock_spin(&sched_lock); diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index a561a19..e8ded21 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -84,7 +84,7 @@ static int killpg1(struct thread *td, int sig, int pgid, int all); static int sig_ffs(sigset_t *set); static int sigprop(int sig); static void stop(struct proc *); - +static void tdsignal(struct thread *td, int sig, sig_t action); static int filt_sigattach(struct knote *kn); static void filt_sigdetach(struct knote *kn); static int filt_signal(struct knote *kn, long hint); @@ -168,16 +168,18 @@ static int sigproptbl[NSIG] = { * Determine signal that should be delivered to process p, the current * process, 0 if none. If there is a pending stop signal with default * action, the process stops in issignal(). + * XXXKSE the check for a pending stop is not done under KSE * * MP SAFE. */ int -cursig(struct proc *p) +cursig(struct thread *td) { + struct proc *p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); mtx_assert(&sched_lock, MA_NOTOWNED); - return (SIGPENDING(p) ? issignal(p) : 0); + return (SIGPENDING(p) ? issignal(td) : 0); } /* @@ -1042,7 +1044,7 @@ killpg1(td, sig, pgid, all) PROC_UNLOCK(p); continue; } - if (p->p_stat == SZOMB) { + if (p->p_state == PRS_ZOMBIE) { PROC_UNLOCK(p); continue; } @@ -1243,12 +1245,10 @@ psignal(p, sig) register struct proc *p; register int sig; { - register int prop; register sig_t action; struct thread *td; -#ifdef SMP - struct ksegrp *kg; -#endif + register int prop; + KASSERT(_SIG_VALID(sig), ("psignal(): invalid signal %d\n", sig)); @@ -1257,7 +1257,6 @@ psignal(p, sig) KNOTE(&p->p_klist, NOTE_SIGNAL | sig); prop = sigprop(sig); - /* * If proc is traced, always give parent a chance; * if signal event is tracked by procfs, give *that* @@ -1283,29 +1282,6 @@ psignal(p, sig) action = SIG_DFL; } - /* - * bring the priority of a process up if we want it to get - * killed in this lifetime. - * XXXKSE think if a better way to do this. - * - * What we need to do is see if there is a thread that will - * be able to accept the signal. e.g. - * FOREACH_THREAD_IN_PROC() { - * if runnable, we're done - * else pick one at random. - * } - */ - /* XXXKSE - * For now there is one thread per proc. - * Effectively select one sucker thread.. - */ - td = FIRST_THREAD_IN_PROC(p); - mtx_lock_spin(&sched_lock); - if ((p->p_ksegrp.kg_nice > NZERO) && (action == SIG_DFL) && - (prop & SA_KILL) && ((p->p_flag & P_TRACED) == 0)) - p->p_ksegrp.kg_nice = NZERO; /* XXXKSE */ - mtx_unlock_spin(&sched_lock); - if (prop & SA_CONT) SIG_STOPSIGMASK(p->p_siglist); @@ -1316,48 +1292,125 @@ psignal(p, sig) * is default; don't stop the process below if sleeping, * and don't clear any pending SIGCONT. */ - if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0 && - action == SIG_DFL) + if ((prop & SA_TTYSTOP) && + (p->p_pgrp->pg_jobc == 0) && + (action == SIG_DFL)) return; SIG_CONTSIGMASK(p->p_siglist); } SIGADDSET(p->p_siglist, sig); mtx_lock_spin(&sched_lock); signotify(p); + mtx_unlock_spin(&sched_lock); /* - * Defer further processing for signals which are held, - * except that stopped processes must be continued by SIGCONT. + * Some signals have a process-wide effect and a per-thread + * component. Most processing occurs when the process next + * tries to cross the user boundary, however there are some + * times when processing needs to be done immediatly, such as + * waking up threads so that they can cross the user boundary. + * We try do the per-process part here. */ - if (action == SIG_HOLD && (!(prop & SA_CONT) || p->p_stat != SSTOP)) { - mtx_unlock_spin(&sched_lock); - return; - } - - switch (p->p_stat) { - - case SSLEEP: + if (P_SHOULDSTOP(p)) { /* - * If process is sleeping uninterruptibly - * we can't interrupt the sleep... the signal will - * be noticed when the process returns through - * trap() or syscall(). + * The process is in stopped mode. All the threads should be + * either winding down or already on the suspended queue. */ - if ((td->td_flags & TDF_SINTR) == 0) + if (p->p_flag & P_TRACED) { + /* + * The traced process is already stopped, + * so no further action is necessary. + * No signal can restart us. + */ goto out; + } + + if (sig == SIGKILL) { + /* + * SIGKILL sets process running. + * It will die elsewhere. + * All threads must be restarted. + */ + p->p_flag &= ~P_STOPPED; + goto runfast; + } + + if (prop & SA_CONT) { + /* + * If SIGCONT is default (or ignored), we continue the + * process but don't leave the signal in p_siglist as + * it has no further action. If SIGCONT is held, we + * continue the process and leave the signal in + * p_siglist. If the process catches SIGCONT, let it + * handle the signal itself. If it isn't waiting on + * an event, it goes back to run state. + * Otherwise, process goes back to sleep state. + */ + p->p_flag &= ~P_STOPPED_SGNL; + if (action == SIG_DFL) { + SIGDELSET(p->p_siglist, sig); + } else if (action == SIG_CATCH) { + /* + * The process wants to catch it so it needs + * to run at least one thread, but which one? + * It would seem that the answer would be to + * run an upcall in the next KSE to run, and + * deliver the signal that way. In a NON KSE + * process, we need to make sure that the + * single thread is runnable asap. + * XXXKSE for now however, make them all run. + */ + goto runfast; + } + /* + * The signal is not ignored or caught. + */ + mtx_lock_spin(&sched_lock); + thread_unsuspend(p); /* Checks if should do it. */ + mtx_unlock_spin(&sched_lock); + goto out; + } + + if (prop & SA_STOP) { + /* + * Already stopped, don't need to stop again + * (If we did the shell could get confused). + */ + SIGDELSET(p->p_siglist, sig); + goto out; + } + /* - * Process is sleeping and traced... make it runnable - * so it can discover the signal in issignal() and stop - * for the parent. + * All other kinds of signals: + * If a thread is sleeping interruptibly, simulate a + * wakeup so that when it is continued it will be made + * runnable and can look at the signal. However, don't make + * the process runnable, leave it stopped. + * It may run a bit until it hits a thread_suspend_check(). + * + * XXXKSE I don't understand this at all. */ - if (p->p_flag & P_TRACED) - goto run; + mtx_lock_spin(&sched_lock); + FOREACH_THREAD_IN_PROC(p, td) { + if (td->td_wchan && (td->td_flags & TDF_SINTR)) { + if (td->td_flags & TDF_CVWAITQ) + cv_waitq_remove(td); + else + unsleep(td); + setrunnable(td); + } + } + mtx_unlock_spin(&sched_lock); + goto out; /* - * If SIGCONT is default (or ignored) and process is - * asleep, we are finished; the process should not - * be awakened. + * XXXKSE What about threads that are waiting on mutexes? + * Shouldn't they abort too? */ - if ((prop & SA_CONT) && action == SIG_DFL) { + } else if (p->p_state == PRS_NORMAL) { + if (prop & SA_CONT) { + /* + * Already active, don't need to start again. + */ SIGDELSET(p->p_siglist, sig); goto out; } @@ -1370,133 +1423,128 @@ psignal(p, sig) if (prop & SA_STOP) { if (action != SIG_DFL) goto runfast; + /* * If a child holding parent blocked, * stopping could cause deadlock. */ if (p->p_flag & P_PPWAIT) goto out; - mtx_unlock_spin(&sched_lock); SIGDELSET(p->p_siglist, sig); p->p_xstat = sig; PROC_LOCK(p->p_pptr); - if ((p->p_pptr->p_procsig->ps_flag & PS_NOCLDSTOP) == 0) + if (!(p->p_pptr->p_procsig->ps_flag & PS_NOCLDSTOP)) psignal(p->p_pptr, SIGCHLD); PROC_UNLOCK(p->p_pptr); mtx_lock_spin(&sched_lock); stop(p); + mtx_unlock_spin(&sched_lock); goto out; } else goto runfast; /* NOTREACHED */ + } else { + /* Not in "NORMAL" state. discard the signal. */ + SIGDELSET(p->p_siglist, sig); + goto out; + } - case SSTOP: - /* - * If traced process is already stopped, - * then no further action is necessary. - */ - if (p->p_flag & P_TRACED) - goto out; + /* + * The process is not stopped so we need to apply the signal to all the + * running threads. + */ - /* - * Kill signal always sets processes running. - */ - if (sig == SIGKILL) - goto runfast; +runfast: + FOREACH_THREAD_IN_PROC(p, td) + tdsignal(td, sig, action); + mtx_lock_spin(&sched_lock); + thread_unsuspend(p); + mtx_unlock_spin(&sched_lock); +out: + /* If we jump here, sched_lock should not be owned. */ + mtx_assert(&sched_lock, MA_NOTOWNED); +} - if (prop & SA_CONT) { - /* - * If SIGCONT is default (or ignored), we continue the - * process but don't leave the signal in p_siglist, as - * it has no further action. If SIGCONT is held, we - * continue the process and leave the signal in - * p_siglist. If the process catches SIGCONT, let it - * handle the signal itself. If it isn't waiting on - * an event, then it goes back to run state. - * Otherwise, process goes back to sleep state. - */ - if (action == SIG_DFL) - SIGDELSET(p->p_siglist, sig); - if (action == SIG_CATCH) - goto runfast; - /* - * XXXKSE - * do this for each thread. - */ - if (p->p_flag & P_KSES) { - mtx_assert(&sched_lock, - MA_OWNED | MA_NOTRECURSED); - FOREACH_THREAD_IN_PROC(p, td) { - if (td->td_wchan == NULL) { - setrunnable(td); /* XXXKSE */ - } else { - /* mark it as sleeping */ - } - } - } else { - p->p_flag |= P_CONTINUED; - wakeup(p->p_pptr); - if (td->td_wchan == NULL) - goto run; - p->p_stat = SSLEEP; - } - goto out; +/* + * The force of a signal has been directed against a single + * thread. We need to see what we can do about knocking it + * out of any sleep it may be in etc. + */ +static void +tdsignal(struct thread *td, int sig, sig_t action) +{ + struct proc *p = td->td_proc; + register int prop; + + prop = sigprop(sig); + + /* + * Bring the priority of a process up if we want it to get + * killed in this lifetime. + * XXXKSE we should shift the priority to the thread. + */ + mtx_lock_spin(&sched_lock); + if ((action == SIG_DFL) && (prop & SA_KILL)) { + if (td->td_priority > PUSER) { + td->td_priority = PUSER; } + } + mtx_unlock_spin(&sched_lock); - if (prop & SA_STOP) { - /* - * Already stopped, don't need to stop again. - * (If we did the shell could get confused.) - */ - SIGDELSET(p->p_siglist, sig); + /* + * Defer further processing for signals which are held, + * except that stopped processes must be continued by SIGCONT. + */ + if (action == SIG_HOLD) { + goto out; + } + mtx_lock_spin(&sched_lock); + if (td->td_state == TDS_SLP) { + /* + * If thread is sleeping uninterruptibly + * we can't interrupt the sleep... the signal will + * be noticed when the process returns through + * trap() or syscall(). + */ + if ((td->td_flags & TDF_SINTR) == 0) { + mtx_unlock_spin(&sched_lock); goto out; } - /* - * If process is sleeping interruptibly, then simulate a - * wakeup so that when it is continued, it will be made - * runnable and can look at the signal. But don't make - * the process runnable, leave it stopped. - * XXXKSE should we wake ALL blocked threads? + * Process is sleeping and traced. Make it runnable + * so it can discover the signal in issignal() and stop + * for its parent. */ - if (p->p_flag & P_KSES) { - FOREACH_THREAD_IN_PROC(p, td) { - if (td->td_wchan && (td->td_flags & TDF_SINTR)){ - if (td->td_flags & TDF_CVWAITQ) - cv_waitq_remove(td); - else - unsleep(td); /* XXXKSE */ - } - } - } else { - if (td->td_wchan && td->td_flags & TDF_SINTR) { - if (td->td_flags & TDF_CVWAITQ) - cv_waitq_remove(td); - else - unsleep(td); /* XXXKSE */ - } + if (p->p_flag & P_TRACED) { + p->p_flag &= ~P_STOPPED_TRACE; + goto run; } - goto out; + mtx_unlock_spin(&sched_lock); + /* + * If SIGCONT is default (or ignored) and process is + * asleep, we are finished; the process should not + * be awakened. + */ + if ((prop & SA_CONT) && action == SIG_DFL) { + SIGDELSET(p->p_siglist, sig); + goto out; + } + goto runfast; + /* NOTREACHED */ - default: + } else { /* - * SRUN, SIDL, SZOMB do nothing with the signal, + * Other states do nothing with the signal immediatly, * other than kicking ourselves if we are running. * It will either never be noticed, or noticed very soon. */ - if (p->p_stat == SRUN) { + mtx_unlock_spin(&sched_lock); + if (td->td_state == TDS_RUNQ || + td->td_state == TDS_RUNNING) { + signotify(td->td_proc); #ifdef SMP - struct kse *ke; - struct thread *td = curthread; -/* we should only deliver to one thread.. but which one? */ - FOREACH_KSEGRP_IN_PROC(p, kg) { - FOREACH_KSE_IN_GROUP(kg, ke) { - if (ke->ke_thread == td) { - continue; - } - forward_signal(ke->ke_thread); - } - } + if (td->td_state == TDS_RUNNING && td != curthread) + forward_signal(td); #endif } goto out; @@ -1506,21 +1554,17 @@ psignal(p, sig) runfast: /* * Raise priority to at least PUSER. - * XXXKSE Should we make them all run fast? - * Maybe just one would be enough? */ - - if (FIRST_THREAD_IN_PROC(p)->td_priority > PUSER) { - FIRST_THREAD_IN_PROC(p)->td_priority = PUSER; + mtx_lock_spin(&sched_lock); + if (td->td_priority > PUSER) { + td->td_priority = PUSER; } run: - /* If we jump here, sched_lock has to be owned. */ mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); - setrunnable(td); /* XXXKSE */ -out: + setrunnable(td); mtx_unlock_spin(&sched_lock); - /* Once we get here, sched_lock should not be owned. */ +out: mtx_assert(&sched_lock, MA_NOTOWNED); } @@ -1533,16 +1577,18 @@ out: * by checking the pending signal masks in cursig.) The normal call * sequence is * - * while (sig = cursig(curproc)) + * while (sig = cursig(curthread)) * postsig(sig); */ int -issignal(p) - register struct proc *p; +issignal(td) + struct thread *td; { + struct proc *p; sigset_t mask; register int sig, prop; + p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); for (;;) { int traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG); @@ -1576,6 +1622,7 @@ issignal(p) PROC_UNLOCK(p->p_pptr); mtx_lock_spin(&sched_lock); stop(p); + td->td_state = TDS_UNQUEUED; PROC_UNLOCK(p); DROP_GIANT(); p->p_stats->p_ru.ru_nivcsw++; @@ -1633,6 +1680,7 @@ issignal(p) #endif break; /* == ignore */ } +#if 0 /* * If there is a pending stop signal to process * with default action, stop here, @@ -1647,8 +1695,10 @@ issignal(p) break; /* == ignore */ p->p_xstat = sig; PROC_LOCK(p->p_pptr); - if ((p->p_pptr->p_procsig->ps_flag & PS_NOCLDSTOP) == 0) + if ((p->p_pptr->p_procsig->ps_flag & + PS_NOCLDSTOP) == 0) { psignal(p->p_pptr, SIGCHLD); + } PROC_UNLOCK(p->p_pptr); mtx_lock_spin(&sched_lock); stop(p); @@ -1660,7 +1710,9 @@ issignal(p) PICKUP_GIANT(); PROC_LOCK(p); break; - } else if (prop & SA_IGNORE) { + } else +#endif + if (prop & SA_IGNORE) { /* * Except for SIGCONT, shouldn't get here. * Default action is to ignore; drop it. @@ -1706,7 +1758,7 @@ stop(p) PROC_LOCK_ASSERT(p, MA_OWNED); mtx_assert(&sched_lock, MA_OWNED); - p->p_stat = SSTOP; + p->p_flag |= P_STOPPED_SGNL; p->p_flag &= ~P_WAITED; wakeup(p->p_pptr); } diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c index 5e32eee..c63091c 100644 --- a/sys/kern/kern_subr.c +++ b/sys/kern/kern_subr.c @@ -538,7 +538,6 @@ uio_yield() mtx_lock_spin(&sched_lock); DROP_GIANT(); td->td_priority = td->td_ksegrp->kg_user_pri; /* XXXKSE */ - setrunqueue(td); td->td_proc->p_stats->p_ru.ru_nivcsw++; mi_switch(); mtx_unlock_spin(&sched_lock); diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index 2b531c0..40d3ef8 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -26,6 +26,69 @@ * $FreeBSD$ */ +/*** + +Here is the logic.. + +If there are N processors, then there are at most N KSEs (kernel +schedulable entities) working to process threads that belong to a +KSEGOUP (kg). If there are X of these KSEs actually running at the +moment in question, then there are at most M (N-X) of these KSEs on +the run queue, as running KSEs are not on the queue. + +Runnable threads are queued off the KSEGROUP in priority order. +If there are M or more threads runnable, the top M threads +(by priority) are 'preassigned' to the M KSEs not running. The KSEs take +their priority from those threads and are put on the run queue. + +The last thread that had a priority high enough to have a KSE associated +with it, AND IS ON THE RUN QUEUE is pointed to by +kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs +assigned as all the available KSEs are activly running, or because there +are no threads queued, that pointer is NULL. + +When a KSE is removed from the run queue to become runnable, we know +it was associated with the highest priority thread in the queue (at the head +of the queue). If it is also the last assigned we know M was 1 and must +now be 0. Since the thread is no longer queued that pointer must be +removed from it. Since we know there were no more KSEs available, +(M was 1 and is now 0) and since we are not FREEING our KSE +but using it, we know there are STILL no more KSEs available, we can prove +that the next thread in the ksegrp list will not have a KSE to assign to +it, so we can show that the pointer must be made 'invalid' (NULL). + +The pointer exists so that when a new thread is made runnable, it can +have its priority compared with the last assigned thread to see if +it should 'steal' its KSE or not.. i.e. is it 'earlier' +on the list than that thread or later.. If it's earlier, then the KSE is +removed from the last assigned (which is now not assigned a KSE) +and reassigned to the new thread, which is placed earlier in the list. +The pointer is then backed up to the previous thread (which may or may not +be the new thread). + +When a thread sleeps or is removed, the KSE becomes available and if there +are queued threads that are not assigned KSEs, the highest priority one of +them is assigned the KSE, which is then placed back on the run queue at +the approipriate place, and the kg->kg_last_assigned pointer is adjusted down +to point to it. + +The following diagram shows 2 KSEs and 3 threads from a single process. + + RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads) + \ \____ + \ \ + KSEGROUP---thread--thread--thread (queued in priority order) + \ / + \_______________/ + (last_assigned) + +The result of this scheme is that the M available KSEs are always +queued at the priorities they have inherrited from the M highest priority +threads for that KSEGROUP. If this situation changes, the KSEs are +reassigned to keep this true. + +*/ + #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> @@ -44,34 +107,442 @@ CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); static struct runq runq; SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq) +static void runq_readjust(struct runq *rq, struct kse *ke); +/************************************************************************ + * Functions that manipulate runnability from a thread perspective. * + ************************************************************************/ + /* - * Wrappers which implement old interface; act on global run queue. + * Select the KSE that will be run next. From that find the thread, and x + * remove it from the KSEGRP's run queue. If there is thread clustering, + * this will be what does it. */ - struct thread * choosethread(void) { - return (runq_choose(&runq)->ke_thread); + struct kse *ke; + struct thread *td; + struct ksegrp *kg; + + if ((ke = runq_choose(&runq))) { + td = ke->ke_thread; + KASSERT((td->td_kse == ke), ("kse/thread mismatch")); + kg = ke->ke_ksegrp; + if (td->td_flags & TDF_UNBOUND) { + TAILQ_REMOVE(&kg->kg_runq, td, td_runq); + if (kg->kg_last_assigned == td) + if (TAILQ_PREV(td, threadqueue, td_runq) + != NULL) + printf("Yo MAMA!\n"); + kg->kg_last_assigned = TAILQ_PREV(td, + threadqueue, td_runq); + /* + * If we have started running an upcall, + * Then TDF_UNBOUND WAS set because the thread was + * created without a KSE. Now that we have one, + * and it is our time to run, we make sure + * that BOUND semantics apply for the rest of + * the journey to userland, and into the UTS. + */ +#ifdef NOTYET + if (td->td_flags & TDF_UPCALLING) + tdf->td_flags &= ~TDF_UNBOUND; +#endif + } + kg->kg_runnable--; + CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", + td, td->td_priority); + } else { + /* Pretend the idle thread was on the run queue. */ + td = PCPU_GET(idlethread); + /* Simulate that it was on the run queue */ + td->td_state = TDS_RUNQ; + td->td_kse->ke_state = KES_UNQUEUED; + CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); + } + thread_sanity_check(td); + return (td); +} + +/* + * Given a KSE (now surplus), either assign a new runable thread to it + * (and put it in the run queue) or put it in the ksegrp's idle KSE list. + * Assumes the kse is not linked to any threads any more. (has been cleaned). + */ +void +kse_reassign(struct kse *ke) +{ + struct ksegrp *kg; + struct thread *td; + + kg = ke->ke_ksegrp; + +KASSERT((ke->ke_state != KES_ONRUNQ), ("kse_reassigning non-free kse")); + /* + * Find the first unassigned thread + * If there is a 'last assigned' then see what's next. + * otherwise look at what is first. + */ + if ((td = kg->kg_last_assigned)) { + td = TAILQ_NEXT(td, td_runq); + } else { + td = TAILQ_FIRST(&kg->kg_runq); + } + + /* + * If we found one assign it the kse, otherwise idle the kse. + */ + if (td) { + thread_sanity_check(td); + kg->kg_last_assigned = td; + td->td_kse = ke; + ke->ke_thread = td; + runq_add(&runq, ke); + CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td); + } else { + KASSERT((ke->ke_state != KES_IDLE), ("kse already idle")); +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!")); + ke->ke_state = KES_IDLE; + ke->ke_thread = NULL; + TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist); + kg->kg_idle_kses++; + CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke); +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self2!")); + } } int -procrunnable(void) +kserunnable(void) { return runq_check(&runq); } +/* + * Remove a thread from its KSEGRP's run queue. + * This in turn may remove it from a KSE if it was already assigned + * to one, possibly causing a new thread to be assigned to the KSE + * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair). + */ void remrunqueue(struct thread *td) { - runq_remove(&runq, td->td_kse); + struct thread *td2, *td3; + struct ksegrp *kg; + struct kse *ke; + + mtx_assert(&sched_lock, MA_OWNED); + thread_sanity_check(td); + KASSERT ((td->td_state == TDS_RUNQ), + ("remrunqueue: Bad state on run queue")); + kg = td->td_ksegrp; + ke = td->td_kse; + /* + * If it's a bound thread/KSE pair, take the shortcut. All non-KSE + * threads are BOUND. + */ + CTR1(KTR_RUNQ, "remrunqueue: td%p", td); + td->td_state = TDS_UNQUEUED; + kg->kg_runnable--; + if ((td->td_flags & TDF_UNBOUND) == 0) { + /* Bring its kse with it, leave the thread attached */ + runq_remove(&runq, ke); + ke->ke_state = KES_UNQUEUED; + return; + } + if (ke) { + /* + * This thread has been assigned to a KSE. + * We need to dissociate it and try assign the + * KSE to the next available thread. Then, we should + * see if we need to move the KSE in the run queues. + */ + td2 = kg->kg_last_assigned; + KASSERT((td2 != NULL), ("last assigned has wrong value ")); + td->td_kse = NULL; + if ((td3 = TAILQ_NEXT(td2, td_runq))) { + KASSERT(td3 != td, ("td3 somehow matched td")); + /* + * Give the next unassigned thread to the KSE + * so the number of runnable KSEs remains + * constant. + */ + td3->td_kse = ke; + ke->ke_thread = td3; + kg->kg_last_assigned = td3; + runq_readjust(&runq, ke); + } else { + /* + * There is no unassigned thread. + * If we were the last assigned one, + * adjust the last assigned pointer back + * one, which may result in NULL. + */ + if (td == td2) { + kg->kg_last_assigned = + TAILQ_PREV(td, threadqueue, td_runq); + } + runq_remove(&runq, ke); +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!")); + KASSERT((ke->ke_state != KES_IDLE), + ("kse already idle")); + ke->ke_state = KES_IDLE; + ke->ke_thread = NULL; +KASSERT((TAILQ_FIRST(&kg->kg_iq) != ke), ("really bad screwup")); + TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist); + kg->kg_idle_kses++; +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self2!")); + } + } + TAILQ_REMOVE(&kg->kg_runq, td, td_runq); + thread_sanity_check(td); } +#if 1 /* use the first version */ + void setrunqueue(struct thread *td) { - runq_add(&runq, td->td_kse); + struct kse *ke; + struct ksegrp *kg; + struct thread *td2; + struct thread *tda; + + CTR1(KTR_RUNQ, "setrunqueue: td%p", td); + mtx_assert(&sched_lock, MA_OWNED); + thread_sanity_check(td); + KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state")); + td->td_state = TDS_RUNQ; + kg = td->td_ksegrp; + kg->kg_runnable++; + if ((td->td_flags & TDF_UNBOUND) == 0) { + KASSERT((td->td_kse != NULL), + ("queueing BAD thread to run queue")); + /* + * Common path optimisation: Only one of everything + * and the KSE is always already attached. + * Totally ignore the ksegrp run queue. + */ + runq_add(&runq, td->td_kse); + return; + } + /* + * Ok, so we are threading with this thread. + * We don't have a KSE, see if we can get one.. + */ + tda = kg->kg_last_assigned; + if ((ke = td->td_kse) == NULL) { + /* + * We will need a KSE, see if there is one.. + * First look for a free one, before getting desperate. + * If we can't get one, our priority is not high enough.. + * that's ok.. + */ + if (kg->kg_idle_kses) { + /* + * There is a free one so it's ours for the asking.. + */ + ke = TAILQ_FIRST(&kg->kg_iq); +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self3!")); + TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); + ke->ke_state = KES_UNQUEUED; + kg->kg_idle_kses--; +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self4!")); + } else if (tda && (tda->td_priority > td->td_priority)) { + /* + * None free, but there is one we can commandeer. + */ + ke = tda->td_kse; + tda->td_kse = NULL; + ke->ke_thread = NULL; + tda = kg->kg_last_assigned = + TAILQ_PREV(tda, threadqueue, td_runq); + runq_remove(&runq, ke); +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self5!")); + } + } else { + KASSERT(ke->ke_thread == td, ("KSE/thread mismatch")); + KASSERT(ke->ke_state != KES_IDLE, ("KSE unexpectedly idle")); + ke->ke_thread = NULL; + td->td_kse = NULL; + } + + /* + * Add the thread to the ksegrp's run queue at + * the appropriate place. + */ + TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { + if (td2->td_priority > td->td_priority) { + TAILQ_INSERT_BEFORE(td2, td, td_runq); + break; + } + } + if (td2 == NULL) { + /* We ran off the end of the TAILQ or it was empty. */ + TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq); + } + + /* + * If we have a ke to use, then put it on the run queue and + * If needed, readjust the last_assigned pointer. + */ + if (ke) { + if (tda == NULL) { + /* + * No pre-existing last assigned so whoever is first + * gets the KSE we borught in.. (may be us) + */ + td2 = TAILQ_FIRST(&kg->kg_runq); + KASSERT((td2->td_kse == NULL), + ("unexpected ke present")); + td2->td_kse = ke; + ke->ke_thread = td2; + kg->kg_last_assigned = td2; + } else if (tda->td_priority > td->td_priority) { + /* + * It's ours, grab it, but last_assigned is past us + * so don't change it. + */ + td->td_kse = ke; + ke->ke_thread = td; + } else { + /* + * We are past last_assigned, so + * put the new kse on whatever is next, + * which may or may not be us. + */ + td2 = TAILQ_NEXT(tda, td_runq); + kg->kg_last_assigned = td2; + td2->td_kse = ke; + ke->ke_thread = td2; + } + runq_add(&runq, ke); + } + thread_sanity_check(td); } +#else + +void +setrunqueue(struct thread *td) +{ + struct kse *ke; + struct ksegrp *kg; + struct thread *td2; + + CTR1(KTR_RUNQ, "setrunqueue: td%p", td); + KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state")); + td->td_state = TDS_RUNQ; + kg = td->td_ksegrp; + kg->kg_runnable++; + if ((td->td_flags & TDF_UNBOUND) == 0) { + /* + * Common path optimisation: Only one of everything + * and the KSE is always already attached. + * Totally ignore the ksegrp run queue. + */ + runq_add(&runq, td->td_kse); + return; + } + /* + * First add the thread to the ksegrp's run queue at + * the appropriate place. + */ + TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { + if (td2->td_priority > td->td_priority) { + TAILQ_INSERT_BEFORE(td2, td, td_runq); + break; + } + } + if (td2 == NULL) { + /* We ran off the end of the TAILQ or it was empty. */ + TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq); + } + + /* + * The following could be achieved by simply doing: + * td->td_kse = NULL; kse_reassign(ke); + * but I felt that I'd try do it inline here. + * All this work may not be worth it. + */ + if ((ke = td->td_kse)) { /* XXXKSE */ + /* + * We have a KSE already. See whether we can keep it + * or if we need to give it to someone else. + * Either way it will need to be inserted into + * the runq. kse_reassign() will do this as will runq_add(). + */ + if ((kg->kg_last_assigned) && + (kg->kg_last_assigned->td_priority > td->td_priority)) { + /* + * We can definitly keep the KSE + * as the "last assignead thread" has + * less priority than we do. + * The "last assigned" pointer stays the same. + */ + runq_add(&runq, ke); + return; + + } + /* + * Give it to the correct thread, + * which may be (often is) us, but may not be. + */ + td->td_kse = NULL; + kse_reassign(ke); + return; + } + /* + * There are two cases where KSE adjustment is needed. + * Usurpation of an already assigned KSE, and assignment + * of a previously IDLE KSE. + */ + if (kg->kg_idle_kses) { + /* + * If there are unassigned KSEs then we definitly + * will be assigned one from the idle KSE list. + * If we are the last, we should get the "last + * assigned" pointer set to us as well. + */ + ke = TAILQ_FIRST(&kg->kg_iq); +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!")); + TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); + ke->ke_state = KES_UNQUEUED; + kg->kg_idle_kses--; +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!")); + ke->ke_thread = td; + td->td_kse = ke; + runq_add(&runq, ke); +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!")); + if (TAILQ_NEXT(td, td_runq) == NULL) { + kg->kg_last_assigned = td; + } + } else if (kg->kg_last_assigned && + (kg->kg_last_assigned->td_priority > td->td_priority)) { + /* + * If there were none last-assigned, all KSEs + * are actually out running as we speak. + * If there was a last assigned, but we didn't see it, + * we must be inserting before it, so take the KSE from + * the last assigned, and back it up one entry. Then, + * assign the KSE to the new thread and adjust its priority. + */ + td2 = kg->kg_last_assigned; + ke = td2->td_kse; +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!")); + kg->kg_last_assigned = + TAILQ_PREV(td2, threadqueue, td_runq); + td2->td_kse = NULL; + td->td_kse = ke; + ke->ke_thread = td; + runq_readjust(&runq, ke); +KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!")); + } +} +#endif + +/************************************************************************ + * Critical section marker functions * + ************************************************************************/ /* Critical sections that prevent preemption. */ void critical_enter(void) @@ -98,6 +569,23 @@ critical_exit(void) } } + +/************************************************************************ + * SYSTEM RUN QUEUE manipulations and tests * + ************************************************************************/ +/* + * Initialize a run structure. + */ +void +runq_init(struct runq *rq) +{ + int i; + + bzero(rq, sizeof *rq); + for (i = 0; i < RQ_NQS; i++) + TAILQ_INIT(&rq->rq_queues[i]); +} + /* * Clear the status bit of the queue corresponding to priority level pri, * indicating that it is empty. @@ -156,7 +644,7 @@ runq_setbit(struct runq *rq, int pri) } /* - * Add the process to the queue specified by its priority, and set the + * Add the KSE to the queue specified by its priority, and set the * corresponding status bit. */ void @@ -165,14 +653,16 @@ runq_add(struct runq *rq, struct kse *ke) struct rqhead *rqh; int pri; -#ifdef INVARIANTS - struct proc *p = ke->ke_proc; -#endif - if (ke->ke_flags & KEF_ONRUNQ) - return; mtx_assert(&sched_lock, MA_OWNED); - KASSERT(p->p_stat == SRUN, ("runq_add: proc %p (%s) not SRUN", - p, p->p_comm)); + KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE")); + KASSERT((ke->ke_thread->td_kse != NULL), ("runq_add: No KSE on thread")); + if (ke->ke_state == KES_ONRUNQ) + return; +#if defined(INVARIANTS) && defined(DIAGNOSTIC) + KASSERT(ke->ke_state != KES_ONRUNQ, + ("runq_add: kse %p (%s) already in run queue", ke, + ke->ke_proc->p_comm)); +#endif pri = ke->ke_thread->td_priority / RQ_PPQ; ke->ke_rqindex = pri; runq_setbit(rq, pri); @@ -180,7 +670,8 @@ runq_add(struct runq *rq, struct kse *ke) CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p", ke->ke_proc, ke->ke_thread->td_priority, pri, rqh); TAILQ_INSERT_TAIL(rqh, ke, ke_procq); - ke->ke_flags |= KEF_ONRUNQ; + ke->ke_ksegrp->kg_runq_kses++; + ke->ke_state = KES_ONRUNQ; } /* @@ -219,43 +710,38 @@ runq_choose(struct runq *rq) int pri; mtx_assert(&sched_lock, MA_OWNED); - if ((pri = runq_findbit(rq)) != -1) { + while ((pri = runq_findbit(rq)) != -1) { rqh = &rq->rq_queues[pri]; ke = TAILQ_FIRST(rqh); KASSERT(ke != NULL, ("runq_choose: no proc on busy queue")); - KASSERT(ke->ke_proc->p_stat == SRUN, - ("runq_choose: process %d(%s) in state %d", ke->ke_proc->p_pid, - ke->ke_proc->p_comm, ke->ke_proc->p_stat)); - CTR3(KTR_RUNQ, "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh); + CTR3(KTR_RUNQ, + "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh); +KASSERT(ke->ke_procq.tqe_prev != NULL, ("no prev")); +if (ke->ke_procq.tqe_next) + KASSERT(ke->ke_procq.tqe_next->ke_procq.tqe_prev != NULL, ("no next")); TAILQ_REMOVE(rqh, ke, ke_procq); + ke->ke_ksegrp->kg_runq_kses--; if (TAILQ_EMPTY(rqh)) { CTR0(KTR_RUNQ, "runq_choose: empty"); runq_clrbit(rq, pri); } - ke->ke_flags &= ~KEF_ONRUNQ; + + ke->ke_state = KES_RUNNING; + KASSERT((ke->ke_thread != NULL), + ("runq_choose: No thread on KSE")); + KASSERT((ke->ke_thread->td_kse != NULL), + ("runq_choose: No KSE on thread")); return (ke); } CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); - return (PCPU_GET(idlethread)->td_kse); + return (NULL); } /* - * Initialize a run structure. - */ -void -runq_init(struct runq *rq) -{ - int i; - - bzero(rq, sizeof *rq); - for (i = 0; i < RQ_NQS; i++) - TAILQ_INIT(&rq->rq_queues[i]); -} - -/* - * Remove the process from the queue specified by its priority, and clear the + * Remove the KSE from the queue specified by its priority, and clear the * corresponding status bit if the queue becomes empty. + * Caller must set ke->ke_state afterwards. */ void runq_remove(struct runq *rq, struct kse *ke) @@ -263,8 +749,7 @@ runq_remove(struct runq *rq, struct kse *ke) struct rqhead *rqh; int pri; - if (!(ke->ke_flags & KEF_ONRUNQ)) - return; + KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); mtx_assert(&sched_lock, MA_OWNED); pri = ke->ke_rqindex; rqh = &rq->rq_queues[pri]; @@ -276,5 +761,104 @@ runq_remove(struct runq *rq, struct kse *ke) CTR0(KTR_RUNQ, "runq_remove: empty"); runq_clrbit(rq, pri); } - ke->ke_flags &= ~KEF_ONRUNQ; + ke->ke_state = KES_UNQUEUED; + ke->ke_ksegrp->kg_runq_kses--; +} + +static void +runq_readjust(struct runq *rq, struct kse *ke) +{ + + if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) { + runq_remove(rq, ke); + runq_add(rq, ke); + } +} + +void +thread_sanity_check(struct thread *td) +{ + struct proc *p; + struct ksegrp *kg; + struct kse *ke; + struct thread *td2; + unsigned int prevpri; + int saw_lastassigned; + int unassigned; + int assigned; + + p = td->td_proc; + kg = td->td_ksegrp; + ke = td->td_kse; + + if (kg != &p->p_ksegrp) { + panic ("wrong ksegrp"); + } + + if (ke) { + if (ke != &p->p_kse) { + panic("wrong kse"); + } + if (ke->ke_thread != td) { + panic("wrong thread"); + } + } + + if ((p->p_flag & P_KSES) == 0) { + if (ke == NULL) { + panic("non KSE thread lost kse"); + } + } else { + prevpri = 0; + saw_lastassigned = 0; + unassigned = 0; + assigned = 0; + TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { + if (td2->td_priority < prevpri) { + panic("thread runqueue unosorted"); + } + prevpri = td2->td_priority; + if (td2->td_kse) { + assigned++; + if (unassigned) { + panic("unassigned before assigned"); + } + if (kg->kg_last_assigned == NULL) { + panic("lastassigned corrupt"); + } + if (saw_lastassigned) { + panic("last assigned not last"); + } + if (td2->td_kse->ke_thread != td2) { + panic("mismatched kse/thread"); + } + } else { + unassigned++; + } + if (td2 == kg->kg_last_assigned) { + saw_lastassigned = 1; + if (td2->td_kse == NULL) { + panic("last assigned not assigned"); + } + } + } + if (kg->kg_last_assigned && (saw_lastassigned == 0)) { + panic("where on earth does lastassigned point?"); + } + FOREACH_THREAD_IN_GROUP(kg, td2) { + if (((td2->td_flags & TDF_UNBOUND) == 0) && + (td2->td_state == TDS_RUNQ)) { + assigned++; + if (td2->td_kse == NULL) { + panic ("BOUND thread with no KSE"); + } + } + } +#if 0 + if ((unassigned + assigned) != kg->kg_runnable) { + panic("wrong number in runnable"); + } +#endif + } } + diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index bd1a625..a2a44ff 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -277,9 +277,13 @@ schedcpu(arg) * with 16-bit int's (remember them?) * overflow takes 45 days. */ - /* XXXKSE */ - /* if ((ke->ke_flags & KEF_ONRUNQ) == 0) */ - if (p->p_stat == SSLEEP || p->p_stat == SSTOP) { + /* XXXKSE **WRONG***/ + /* + * the kse slptimes are not touched in wakeup + * because the thread may not HAVE a KSE + */ + if (ke->ke_state == KES_ONRUNQ && + ke->ke_state == KES_RUNNING) { ke->ke_slptime++; } else { ke->ke_slptime = 0; @@ -321,20 +325,31 @@ schedcpu(arg) } kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu); resetpriority(kg); - td = FIRST_THREAD_IN_PROC(p); - if (td->td_priority >= PUSER && - (p->p_sflag & PS_INMEM)) { - int changedqueue = - ((td->td_priority / RQ_PPQ) != - (kg->kg_user_pri / RQ_PPQ)); - - td->td_priority = kg->kg_user_pri; - FOREACH_KSE_IN_GROUP(kg, ke) { - if ((ke->ke_oncpu == NOCPU) && - (p->p_stat == SRUN) && /* XXXKSE */ - changedqueue) { - remrunqueue(ke->ke_thread); - setrunqueue(ke->ke_thread); + FOREACH_THREAD_IN_GROUP(kg, td) { + int changedqueue; + if (td->td_priority >= PUSER) { + /* + * Only change the priority + * of threads that are still at their + * user priority. + * XXXKSE This is problematic + * as we may need to re-order + * the threads on the KSEG list. + */ + changedqueue = + ((td->td_priority / RQ_PPQ) != + (kg->kg_user_pri / RQ_PPQ)); + + td->td_priority = kg->kg_user_pri; + if (changedqueue && + td->td_state == TDS_RUNQ) { + /* this could be optimised */ + remrunqueue(td); + td->td_priority = + kg->kg_user_pri; + setrunqueue(td); + } else { + td->td_priority = kg->kg_user_pri; } } } @@ -409,6 +424,7 @@ sleepinit(void) * entered before msleep returns. If priority includes the PDROP * flag the mutex is not entered before returning. */ + int msleep(ident, mtx, priority, wmesg, timo) void *ident; @@ -426,9 +442,48 @@ msleep(ident, mtx, priority, wmesg, timo) if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0); #endif + KASSERT((td->td_kse != NULL), ("msleep: NULL KSE?")); + KASSERT((td->td_kse->ke_state == KES_RUNNING), ("msleep: kse state?")); WITNESS_SLEEP(0, &mtx->mtx_object); KASSERT(timo != 0 || mtx_owned(&Giant) || mtx != NULL, ("sleeping without a mutex")); + /* + * If we are capable of async syscalls and there isn't already + * another one ready to return, start a new thread + * and queue it as ready to run. Note that there is danger here + * because we need to make sure that we don't sleep allocating + * the thread (recursion here might be bad). + * Hence the TDF_INMSLEEP flag. + */ + if (p->p_flag & P_KSES) { + /* Just don't bother if we are exiting + and not the exiting thread. */ + if ((p->p_flag & P_WEXIT) && catch && p->p_singlethread != td) + return (EINTR); + if (td->td_mailbox && (!(td->td_flags & TDF_INMSLEEP))) { + /* + * If we have no queued work to do, then + * upcall to the UTS to see if it has more to do. + * We don't need to upcall now, just make it and + * queue it. + */ + mtx_lock_spin(&sched_lock); + if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) { + /* Don't recurse here! */ + KASSERT((td->td_kse->ke_state == KES_RUNNING), ("msleep: kse stateX?")); + td->td_flags |= TDF_INMSLEEP; + thread_schedule_upcall(td, td->td_kse); + td->td_flags &= ~TDF_INMSLEEP; + KASSERT((td->td_kse->ke_state == KES_RUNNING), ("msleep: kse stateY?")); + } + mtx_unlock_spin(&sched_lock); + } + KASSERT((td->td_kse != NULL), ("msleep: NULL KSE2?")); + KASSERT((td->td_kse->ke_state == KES_RUNNING), + ("msleep: kse state2?")); + KASSERT((td->td_kse->ke_thread == td), + ("msleep: kse/thread mismatch?")); + } mtx_lock_spin(&sched_lock); if (cold || panicstr) { /* @@ -454,7 +509,7 @@ msleep(ident, mtx, priority, wmesg, timo) } KASSERT(p != NULL, ("msleep1")); - KASSERT(ident != NULL && td->td_proc->p_stat == SRUN, ("msleep")); + KASSERT(ident != NULL && td->td_state == TDS_RUNNING, ("msleep")); td->td_wchan = ident; td->td_wmesg = wmesg; @@ -468,20 +523,23 @@ msleep(ident, mtx, priority, wmesg, timo) callout_reset(&td->td_slpcallout, timo, endtsleep, td); /* * We put ourselves on the sleep queue and start our timeout - * before calling cursig, as we could stop there, and a wakeup - * or a SIGCONT (or both) could occur while we were stopped. - * A SIGCONT would cause us to be marked as SSLEEP + * before calling thread_suspend_check, as we could stop there, and + * a wakeup or a SIGCONT (or both) could occur while we were stopped. * without resuming us, thus we must be ready for sleep * when cursig is called. If the wakeup happens while we're * stopped, td->td_wchan will be 0 upon return from cursig. */ if (catch) { - CTR3(KTR_PROC, "msleep caught: proc %p (pid %d, %s)", p, + CTR3(KTR_PROC, "msleep caught: thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); td->td_flags |= TDF_SINTR; mtx_unlock_spin(&sched_lock); PROC_LOCK(p); - sig = cursig(p); + sig = cursig(td); + if (thread_suspend_check(1)) { + sig = EINTR; + rval = EINTR; + } mtx_lock_spin(&sched_lock); PROC_UNLOCK(p); if (sig != 0) { @@ -492,13 +550,13 @@ msleep(ident, mtx, priority, wmesg, timo) } else sig = 0; if (td->td_wchan != NULL) { - td->td_proc->p_stat = SSLEEP; p->p_stats->p_ru.ru_nvcsw++; + td->td_state = TDS_SLP; mi_switch(); } - CTR3(KTR_PROC, "msleep resume: proc %p (pid %d, %s)", td, p->p_pid, + CTR3(KTR_PROC, "msleep resume: thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); - KASSERT(td->td_proc->p_stat == SRUN, ("running but not SRUN")); + KASSERT(td->td_state == TDS_RUNNING, ("running but not TDS_RUNNING")); td->td_flags &= ~TDF_SINTR; if (td->td_flags & TDF_TIMEOUT) { td->td_flags &= ~TDF_TIMEOUT; @@ -524,8 +582,8 @@ msleep(ident, mtx, priority, wmesg, timo) if (rval == 0 && catch) { PROC_LOCK(p); - /* XXX: shouldn't we always be calling cursig() */ - if (sig != 0 || (sig = cursig(p))) { + /* XXX: shouldn't we always be calling cursig() */ + if (sig != 0 || (sig = cursig(td))) { if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) rval = EINTR; else @@ -571,7 +629,7 @@ endtsleep(arg) td->td_flags &= ~TDF_TIMEOUT; setrunqueue(td); } else if (td->td_wchan != NULL) { - if (td->td_proc->p_stat == SSLEEP) /* XXXKSE */ + if (td->td_state == TDS_SLP) /* XXXKSE */ setrunnable(td); else unsleep(td); @@ -583,6 +641,38 @@ endtsleep(arg) } /* + * Abort a thread, as if an interrupt had occured. Only abort + * interruptable waits (unfortunatly it isn't only safe to abort others). + * This is about identical to cv_abort(). + * Think about merging them? + * Also, whatever the signal code does... + */ +void +abortsleep(struct thread *td) +{ + + mtx_lock_spin(&sched_lock); + /* + * If the TDF_TIMEOUT flag is set, just leave. A + * timeout is scheduled anyhow. + */ + if ((td->td_flags & (TDF_TIMEOUT | TDF_SINTR)) == TDF_SINTR) { + if (td->td_wchan != NULL) { + if (td->td_state == TDS_SLP) { /* XXXKSE */ + setrunnable(td); + } else { + /* + * Probably in a suspended state.. + * um.. dunno XXXKSE + */ + unsleep(td); + } + } + } + mtx_unlock_spin(&sched_lock); +} + +/* * Remove a process from its wait queue */ void @@ -618,25 +708,24 @@ restart: if (td->td_wchan == ident) { TAILQ_REMOVE(qp, td, td_slpq); td->td_wchan = NULL; - if (td->td_proc->p_stat == SSLEEP) { + if (td->td_state == TDS_SLP) { /* OPTIMIZED EXPANSION OF setrunnable(p); */ CTR3(KTR_PROC, "wakeup: thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); if (td->td_ksegrp->kg_slptime > 1) updatepri(td); td->td_ksegrp->kg_slptime = 0; - td->td_kse->ke_slptime = 0; - td->td_proc->p_stat = SRUN; if (p->p_sflag & PS_INMEM) { setrunqueue(td); maybe_resched(td); } else { +/* XXXKSE Wrong! */ td->td_state = TDS_RUNQ; p->p_sflag |= PS_SWAPINREQ; wakeup(&proc0); } /* END INLINE EXPANSION */ - goto restart; } + goto restart; } } mtx_unlock_spin(&sched_lock); @@ -665,20 +754,19 @@ restart: if (td->td_wchan == ident) { TAILQ_REMOVE(qp, td, td_slpq); td->td_wchan = NULL; - if (td->td_proc->p_stat == SSLEEP) { + if (td->td_state == TDS_SLP) { /* OPTIMIZED EXPANSION OF setrunnable(p); */ - CTR3(KTR_PROC, "wakeup1: proc %p (pid %d, %s)", - p, p->p_pid, p->p_comm); + CTR3(KTR_PROC,"wakeup1: thread %p (pid %d, %s)", + td, p->p_pid, p->p_comm); if (td->td_ksegrp->kg_slptime > 1) updatepri(td); td->td_ksegrp->kg_slptime = 0; - td->td_kse->ke_slptime = 0; - td->td_proc->p_stat = SRUN; if (p->p_sflag & PS_INMEM) { setrunqueue(td); maybe_resched(td); break; } else { +/* XXXKSE Wrong */ td->td_state = TDS_RUNQ; p->p_sflag |= PS_SWAPINREQ; wakeup(&proc0); } @@ -698,15 +786,19 @@ mi_switch() { struct bintime new_switchtime; struct thread *td = curthread; /* XXX */ - register struct proc *p = td->td_proc; /* XXX */ + struct proc *p = td->td_proc; /* XXX */ + struct kse *ke = td->td_kse; #if 0 register struct rlimit *rlim; #endif u_int sched_nest; mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); + KASSERT((ke->ke_state == KES_RUNNING), ("mi_switch: kse state?")); #ifdef INVARIANTS - if (p->p_stat != SMTX && p->p_stat != SRUN) + if (td->td_state != TDS_MTX && + td->td_state != TDS_RUNQ && + td->td_state != TDS_RUNNING) mtx_assert(&Giant, MA_NOTOWNED); #endif @@ -735,7 +827,8 @@ mi_switch() * * XXX drop sched_lock, pickup Giant */ - if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY && + if (p->p_state != PRS_ZOMBIE && + p->p_limit->p_cpulimit != RLIM_INFINITY && p->p_runtime > p->p_limit->p_cpulimit) { rlim = &p->p_rlimit[RLIMIT_CPU]; if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) { @@ -763,17 +856,35 @@ mi_switch() */ cnt.v_swtch++; PCPU_SET(switchtime, new_switchtime); - CTR3(KTR_PROC, "mi_switch: old proc %p (pid %d, %s)", p, p->p_pid, + CTR3(KTR_PROC, "mi_switch: old thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); sched_nest = sched_lock.mtx_recurse; - td->td_lastcpu = td->td_kse->ke_oncpu; - td->td_kse->ke_oncpu = NOCPU; - td->td_kse->ke_flags &= ~KEF_NEEDRESCHED; + td->td_lastcpu = ke->ke_oncpu; + ke->ke_oncpu = NOCPU; + ke->ke_flags &= ~KEF_NEEDRESCHED; + /* + * At the last moment: if this KSE is not on the run queue, + * it needs to be freed correctly and the thread treated accordingly. + */ + if ((td->td_state == TDS_RUNNING) && + ((ke->ke_flags & KEF_IDLEKSE) == 0)) { + /* Put us back on the run queue (kse and all). */ + setrunqueue(td); + } else if ((td->td_flags & TDF_UNBOUND) && + (td->td_state != TDS_RUNQ)) { /* in case of old code */ + /* + * We will not be on the run queue. + * Someone else can use the KSE if they need it. + */ + td->td_kse = NULL; + kse_reassign(ke); + } cpu_switch(); td->td_kse->ke_oncpu = PCPU_GET(cpuid); + td->td_kse->ke_state = KES_RUNNING; sched_lock.mtx_recurse = sched_nest; sched_lock.mtx_lock = (uintptr_t)td; - CTR3(KTR_PROC, "mi_switch: new proc %p (pid %d, %s)", p, p->p_pid, + CTR3(KTR_PROC, "mi_switch: new thread %p (pid %d, %s)", td, p->p_pid, p->p_comm); if (PCPU_GET(switchtime.sec) == 0) binuptime(PCPU_PTR(switchtime)); @@ -791,37 +902,42 @@ setrunnable(struct thread *td) struct proc *p = td->td_proc; mtx_lock_spin(&sched_lock); - switch (p->p_stat) { - case SZOMB: /* not a thread flag XXXKSE */ + switch (p->p_state) { + case PRS_ZOMBIE: panic("setrunnable(1)"); + default: + break; } - switch (td->td_proc->p_stat) { + switch (td->td_state) { case 0: - case SRUN: - case SWAIT: + case TDS_RUNNING: + case TDS_IWAIT: default: + printf("state is %d", td->td_state); panic("setrunnable(2)"); - case SSTOP: - case SSLEEP: /* e.g. when sending signals */ + case TDS_SUSPENDED: + thread_unsuspend(p); + break; + case TDS_SLP: /* e.g. when sending signals */ if (td->td_flags & TDF_CVWAITQ) cv_waitq_remove(td); else unsleep(td); - break; - - case SIDL: + case TDS_UNQUEUED: /* being put back onto the queue */ + case TDS_NEW: /* not yet had time to suspend */ + case TDS_RUNQ: /* not yet had time to suspend */ break; } - td->td_proc->p_stat = SRUN; if (td->td_ksegrp->kg_slptime > 1) updatepri(td); td->td_ksegrp->kg_slptime = 0; - td->td_kse->ke_slptime = 0; if ((p->p_sflag & PS_INMEM) == 0) { + td->td_state = TDS_RUNQ; /* XXXKSE not a good idea */ p->p_sflag |= PS_SWAPINREQ; wakeup(&proc0); } else { - setrunqueue(td); + if (td->td_state != TDS_RUNQ) + setrunqueue(td); /* XXXKSE */ maybe_resched(td); } mtx_unlock_spin(&sched_lock); @@ -848,7 +964,7 @@ resetpriority(kg) kg->kg_user_pri = newpriority; } FOREACH_THREAD_IN_GROUP(kg, td) { - maybe_resched(td); + maybe_resched(td); /* XXXKSE silly */ } mtx_unlock_spin(&sched_lock); } @@ -865,20 +981,21 @@ loadav(void *arg) int i, nrun; struct loadavg *avg; struct proc *p; - struct ksegrp *kg; + struct thread *td; avg = &averunnable; sx_slock(&allproc_lock); nrun = 0; FOREACH_PROC_IN_SYSTEM(p) { - FOREACH_KSEGRP_IN_PROC(p, kg) { - switch (p->p_stat) { - case SRUN: + FOREACH_THREAD_IN_PROC(p, td) { + switch (td->td_state) { + case TDS_RUNQ: + case TDS_RUNNING: if ((p->p_flag & P_NOLOAD) != 0) goto nextproc; - /* FALLTHROUGH */ - case SIDL: - nrun++; + nrun++; /* XXXKSE */ + default: + break; } nextproc: continue; @@ -932,19 +1049,18 @@ void schedclock(td) struct thread *td; { - struct kse *ke = td->td_kse; - struct ksegrp *kg = td->td_ksegrp; + struct kse *ke; + struct ksegrp *kg; - if (td) { - ke->ke_cpticks++; - kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1); - if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { - resetpriority(td->td_ksegrp); - if (td->td_priority >= PUSER) - td->td_priority = kg->kg_user_pri; - } - } else { - panic("schedclock"); + KASSERT((td != NULL), ("schedlock: null thread pointer")); + ke = td->td_kse; + kg = td->td_ksegrp; + ke->ke_cpticks++; + kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1); + if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { + resetpriority(kg); + if (td->td_priority >= PUSER) + td->td_priority = kg->kg_user_pri; } } @@ -959,7 +1075,6 @@ yield(struct thread *td, struct yield_args *uap) mtx_assert(&Giant, MA_NOTOWNED); mtx_lock_spin(&sched_lock); td->td_priority = PRI_MAX_TIMESHARE; - setrunqueue(td); kg->kg_proc->p_stats->p_ru.ru_nvcsw++; mi_switch(); mtx_unlock_spin(&sched_lock); diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c index c9081c3..bbe36be 100644 --- a/sys/kern/ksched.c +++ b/sys/kern/ksched.c @@ -181,7 +181,18 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched, mtx_lock_spin(&sched_lock); rtp_to_pri(&rtp, kg); - td->td_last_kse->ke_flags |= KEF_NEEDRESCHED; /* XXXKSE */ + FOREACH_THREAD_IN_GROUP(kg, td) { /* XXXKSE */ + if (td->td_state == TDS_RUNNING) { + td->td_kse->ke_flags |= KEF_NEEDRESCHED; + } else if (td->td_state == TDS_RUNQ) { + if (td->td_priority > kg->kg_user_pri) { + remrunqueue(td); + td->td_priority = + kg->kg_user_pri; + setrunqueue(td); + } + } + } mtx_unlock_spin(&sched_lock); } else @@ -203,7 +214,19 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched, * on the scheduling code: You must leave the * scheduling info alone. */ - td->td_last_kse->ke_flags |= KEF_NEEDRESCHED; /* XXXKSE */ + FOREACH_THREAD_IN_GROUP(kg, td) { + if (td->td_state == TDS_RUNNING) { + td->td_kse->ke_flags |= KEF_NEEDRESCHED; + } else if (td->td_state == TDS_RUNQ) { + if (td->td_priority > kg->kg_user_pri) { + remrunqueue(td); + td->td_priority = + kg->kg_user_pri; + setrunqueue(td); + } + } + + } mtx_unlock_spin(&sched_lock); } break; diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 9dad93b..afd4c5d 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -124,8 +124,8 @@ forward_signal(struct thread *td) * executing so that it executes ast(). */ mtx_assert(&sched_lock, MA_OWNED); - KASSERT(td->td_proc->p_stat == SRUN, - ("forward_signal: process is not SRUN")); + KASSERT(td->td_state == TDS_RUNNING, + ("forward_signal: thread is not TDS_RUNNING")); CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc); diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 3b415de..027aa9c 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -48,6 +48,8 @@ #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/kse.h> +#include <sys/ktr.h> #include <sys/resourcevar.h> #include <sys/signalvar.h> #include <sys/systm.h> @@ -71,13 +73,15 @@ userret(td, frame, oticks) struct kse *ke = td->td_kse; struct ksegrp *kg = td->td_ksegrp; + CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid, + p->p_comm); #ifdef INVARIANTS /* Check that we called signotify() enough. */ mtx_lock(&Giant); PROC_LOCK(p); mtx_lock_spin(&sched_lock); if (SIGPENDING(p) && ((p->p_sflag & PS_NEEDSIGCHK) == 0 || - (p->p_kse.ke_flags & KEF_ASTPENDING) == 0)) + (ke->ke_flags & KEF_ASTPENDING) == 0)) printf("failed to set signal flags proprly for ast()\n"); mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); @@ -100,6 +104,22 @@ userret(td, frame, oticks) } /* + * We need to check to see if we have to exit or wait due to a + * single threading requirement or some other STOP condition. + */ + PROC_LOCK(p); + thread_suspend_check(0); /* Can suspend or kill */ + PROC_UNLOCK(p); + + /* + * DO special thread processing, e.g. upcall tweaking and such + */ + if (p->p_flag & P_KSES) { + thread_userret(p, kg, ke, td, frame); + /* printf("KSE thread returned"); */ + } + + /* * Charge system time if profiling. * * XXX should move PS_PROFIL to a place that can obviously be @@ -121,8 +141,7 @@ userret(td, frame, oticks) * This function will return with preemption disabled. */ void -ast(framep) - struct trapframe *framep; +ast(struct trapframe *framep) { struct thread *td = curthread; struct proc *p = td->td_proc; @@ -136,6 +155,8 @@ ast(framep) int ucode; #endif + CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid, + p->p_comm); KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode")); #ifdef WITNESS if (witness_list(td)) @@ -164,6 +185,13 @@ ast(framep) p->p_stats->p_prof.pr_ticks = 0; } mtx_unlock_spin(&sched_lock); + /* + * XXXKSE While the fact that we owe a user profiling + * tick is stored per KSE in this code, the statistics + * themselves are still stored per process. + * This should probably change, by which I mean that + * possibly the location of both might change. + */ if (td->td_ucred != p->p_ucred) cred_update_thread(td); @@ -192,14 +220,13 @@ ast(framep) if (flags & KEF_NEEDRESCHED) { mtx_lock_spin(&sched_lock); td->td_priority = kg->kg_user_pri; - setrunqueue(td); p->p_stats->p_ru.ru_nivcsw++; mi_switch(); mtx_unlock_spin(&sched_lock); } if (sflag & PS_NEEDSIGCHK) { PROC_LOCK(p); - while ((sig = cursig(p)) != 0) + while ((sig = cursig(td)) != 0) postsig(sig); PROC_UNLOCK(p); } diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c index 08bca8d..c2e79d0 100644 --- a/sys/kern/subr_turnstile.c +++ b/sys/kern/subr_turnstile.c @@ -119,23 +119,20 @@ propagate_priority(struct thread *td) return; } + KASSERT(td->td_state != TDS_SURPLUS, ("Mutex owner SURPLUS")); + MPASS(td->td_proc != NULL); MPASS(td->td_proc->p_magic == P_MAGIC); - KASSERT(td->td_proc->p_stat != SSLEEP, ("sleeping thread owns a mutex")); + KASSERT(td->td_state != TDS_SLP, + ("sleeping thread owns a mutex")); if (td->td_priority <= pri) /* lower is higher priority */ return; - /* - * Bump this thread's priority. - */ - td->td_priority = pri; /* * If lock holder is actually running, just bump priority. */ - if (thread_running(td)) { - MPASS(td->td_proc->p_stat == SRUN - || td->td_proc->p_stat == SZOMB - || td->td_proc->p_stat == SSTOP); + if (td->td_state == TDS_RUNNING) { + td->td_priority = pri; return; } @@ -151,20 +148,26 @@ propagate_priority(struct thread *td) * If on run queue move to new run queue, and quit. * XXXKSE this gets a lot more complicated under threads * but try anyhow. + * We should have a special call to do this more efficiently. */ - if (td->td_proc->p_stat == SRUN) { + if (td->td_state == TDS_RUNQ) { MPASS(td->td_blocked == NULL); remrunqueue(td); + td->td_priority = pri; setrunqueue(td); return; } + /* + * Adjust for any other cases. + */ + td->td_priority = pri; /* * If we aren't blocked on a mutex, we should be. */ - KASSERT(td->td_proc->p_stat == SMTX, ( + KASSERT(td->td_state == TDS_MTX, ( "process %d(%s):%d holds %s but isn't blocked on a mutex\n", - td->td_proc->p_pid, td->td_proc->p_comm, td->td_proc->p_stat, + td->td_proc->p_pid, td->td_proc->p_comm, td->td_state, m->mtx_object.lo_name)); /* @@ -590,7 +593,7 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line) */ td->td_blocked = m; td->td_mtxname = m->mtx_object.lo_name; - td->td_proc->p_stat = SMTX; + td->td_state = TDS_MTX; propagate_priority(td); if (LOCK_LOG_TEST(&m->mtx_object, opts)) @@ -727,7 +730,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) m, td1); td1->td_blocked = NULL; - td1->td_proc->p_stat = SRUN; setrunqueue(td1); if (td->td_critnest == 1 && td1->td_priority < pri) { @@ -744,7 +746,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) } } #endif - setrunqueue(td); if (LOCK_LOG_TEST(&m->mtx_object, opts)) CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p switching out lock=%p", m, diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index 182221d..02b3a0d 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -225,6 +225,7 @@ static struct witness_order_list_entry order_lists[] = { #endif { "clk", &lock_class_mtx_spin }, { "mutex profiling lock", &lock_class_mtx_spin }, + { "zombie_thread_lock", &lock_class_mtx_spin }, { NULL, NULL }, { NULL, NULL } }; diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 1bdd913..d8fba59 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1187,7 +1187,7 @@ selwakeup(sip) sip->si_thread = NULL; mtx_lock_spin(&sched_lock); if (td->td_wchan == (caddr_t)&selwait) { - if (td->td_proc->p_stat == SSLEEP) + if (td->td_state == TDS_SLP) setrunnable(td); else cv_waitq_remove(td); diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index dacb9d9..ab6f1e8 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -467,7 +467,7 @@ ptrace(struct thread *td, struct ptrace_args *uap) } /* not currently stopped */ - if (p->p_stat != SSTOP || (p->p_flag & P_WAITED) == 0) { + if (!P_SHOULDSTOP(p) || (p->p_flag & P_WAITED) == 0) { error = EBUSY; goto fail; } @@ -566,10 +566,12 @@ ptrace(struct thread *td, struct ptrace_args *uap) if (proctree_locked) sx_xunlock(&proctree_lock); /* deliver or queue signal */ - if (p->p_stat == SSTOP) { + if (P_SHOULDSTOP(p)) { p->p_xstat = uap->data; mtx_lock_spin(&sched_lock); + p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SGNL); setrunnable(td2); /* XXXKSE */ + /* Need foreach kse in proc, ... make_kse_queued(). */ mtx_unlock_spin(&sched_lock); } else if (uap->data) psignal(p, uap->data); diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index d8115fb..15a5d7c 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -552,7 +552,7 @@ 381 STD BSD { int kse_new(struct kse_mailbox * mbx, \ int new_grp_flag); } 382 STD BSD { int thread_wakeup(struct thread_mailbox *tmbx); } -383 STD BSD { int kse_yield(void); } +383 MSTD BSD { int kse_yield(void); } 384 UNIMPL BSD __mac_get_proc 385 UNIMPL BSD __mac_set_proc 386 UNIMPL BSD __mac_get_fd diff --git a/sys/kern/tty.c b/sys/kern/tty.c index b9c5743..6c915e1 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -2392,17 +2392,35 @@ ttyinfo(struct tty *tp) PGRP_UNLOCK(tp->t_pgrp); td = FIRST_THREAD_IN_PROC(pick); - stmp = pick->p_stat == SRUN ? "running" : /* XXXKSE */ - pick->p_stat == SMTX ? td->td_mtxname : - td->td_wmesg ? td->td_wmesg : "iowait"; + if (pick->p_flag & P_KSES) { + stmp = "KSE" ; /* XXXKSE */ + } else { + if (td) { + if (td->td_state == TDS_RUNQ) { + stmp = "running"; + } else if (td->td_state == TDS_MTX) { + stmp = td->td_mtxname; + } else if (td->td_wmesg) { + stmp = td->td_wmesg; + } else { + stmp = "iowait"; + } + } else { + stmp = "threadless"; + panic("ttyinfo: no thread!?"); + } + } calcru(pick, &utime, &stime, NULL); - ltmp = pick->p_stat == SIDL || pick->p_stat == SWAIT || - pick->p_stat == SZOMB ? 0 : - pgtok(vmspace_resident_count(pick->p_vmspace)); + ltmp = ((pick->p_state == PRS_NEW) + || (td && (td->td_state == TDS_IWAIT)) + || (pick->p_state == PRS_ZOMBIE ? 0 : + pgtok(vmspace_resident_count(pick->p_vmspace)))); mtx_unlock_spin(&sched_lock); ttyprintf(tp, " cmd: %s %d [%s%s] ", pick->p_comm, - pick->p_pid, pick->p_stat == SMTX ? "*" : "", stmp); + pick->p_pid, + td->td_state == TDS_MTX ? "*" : "", + stmp); /* Print user time. */ ttyprintf(tp, "%ld.%02ldu ", @@ -2433,7 +2451,19 @@ ttyinfo(struct tty *tp) * we pick out just "short-term" sleepers (P_SINTR == 0). * 4) Further ties are broken by picking the highest pid. */ -#define ISRUN(p) (((p)->p_stat == SRUN) || ((p)->p_stat == SIDL)) +#define ISRUN(p, val) \ +do { \ + struct thread *td; \ + val = 0; \ + FOREACH_THREAD_IN_PROC(p, td) { \ + if (td->td_state == TDS_RUNQ || \ + td->td_state == TDS_RUNNING) { \ + val = 1; \ + break; \ + } \ + } \ +} while (0) + #define TESTAB(a, b) ((a)<<1 | (b)) #define ONLYA 2 #define ONLYB 1 @@ -2449,10 +2479,13 @@ proc_compare(struct proc *p1, struct proc *p2) if (p1 == NULL) return (1); + ISRUN(p1, esta); + ISRUN(p2, estb); + /* * see if at least one of them is runnable */ - switch (TESTAB(ISRUN(p1), ISRUN(p2))) { + switch (TESTAB(esta, estb)) { case ONLYA: return (0); case ONLYB: @@ -2477,7 +2510,7 @@ proc_compare(struct proc *p1, struct proc *p2) /* * weed out zombies */ - switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) { + switch (TESTAB(p1->p_state == PRS_ZOMBIE, p2->p_state == PRS_ZOMBIE)) { case ONLYA: return (1); case ONLYB: diff --git a/sys/posix4/ksched.c b/sys/posix4/ksched.c index c9081c3..bbe36be 100644 --- a/sys/posix4/ksched.c +++ b/sys/posix4/ksched.c @@ -181,7 +181,18 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched, mtx_lock_spin(&sched_lock); rtp_to_pri(&rtp, kg); - td->td_last_kse->ke_flags |= KEF_NEEDRESCHED; /* XXXKSE */ + FOREACH_THREAD_IN_GROUP(kg, td) { /* XXXKSE */ + if (td->td_state == TDS_RUNNING) { + td->td_kse->ke_flags |= KEF_NEEDRESCHED; + } else if (td->td_state == TDS_RUNQ) { + if (td->td_priority > kg->kg_user_pri) { + remrunqueue(td); + td->td_priority = + kg->kg_user_pri; + setrunqueue(td); + } + } + } mtx_unlock_spin(&sched_lock); } else @@ -203,7 +214,19 @@ int ksched_setscheduler(register_t *ret, struct ksched *ksched, * on the scheduling code: You must leave the * scheduling info alone. */ - td->td_last_kse->ke_flags |= KEF_NEEDRESCHED; /* XXXKSE */ + FOREACH_THREAD_IN_GROUP(kg, td) { + if (td->td_state == TDS_RUNNING) { + td->td_kse->ke_flags |= KEF_NEEDRESCHED; + } else if (td->td_state == TDS_RUNQ) { + if (td->td_priority > kg->kg_user_pri) { + remrunqueue(td); + td->td_priority = + kg->kg_user_pri; + setrunqueue(td); + } + } + + } mtx_unlock_spin(&sched_lock); } break; diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c index 4f47a75..eee4abc 100644 --- a/sys/sparc64/sparc64/genassym.c +++ b/sys/sparc64/sparc64/genassym.c @@ -232,6 +232,8 @@ ASSYM(TD_KSE, offsetof(struct thread, td_kse)); ASSYM(TD_KSTACK, offsetof(struct thread, td_kstack)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); +ASSYM(TD_STATE, offsetof(struct thread, td_state)); +ASSYM(TDS_RUNNING, TDS_RUNNING); ASSYM(PCB_SIZEOF, sizeof(struct pcb)); ASSYM(PCB_FPSTATE, offsetof(struct pcb, pcb_fpstate)); diff --git a/sys/sparc64/sparc64/swtch.S b/sys/sparc64/sparc64/swtch.S index 429e961..a8a753a 100644 --- a/sys/sparc64/sparc64/swtch.S +++ b/sys/sparc64/sparc64/swtch.S @@ -109,6 +109,9 @@ ENTRY(cpu_switch) stx %o0, [PCPU(CURTHREAD)] stx %o1, [PCPU(CURPCB)] + mov TDS_RUNNING, %o2 + stw %o2, [%o0 + TD_STATE] + SET(sched_lock, %o3, %o2) stx %o0, [%o2 + MTX_LOCK] diff --git a/sys/sparc64/sparc64/swtch.s b/sys/sparc64/sparc64/swtch.s index 429e961..a8a753a 100644 --- a/sys/sparc64/sparc64/swtch.s +++ b/sys/sparc64/sparc64/swtch.s @@ -109,6 +109,9 @@ ENTRY(cpu_switch) stx %o0, [PCPU(CURTHREAD)] stx %o1, [PCPU(CURPCB)] + mov TDS_RUNNING, %o2 + stw %o2, [%o0 + TD_STATE] + SET(sched_lock, %o3, %o2) stx %o0, [%o2 + MTX_LOCK] diff --git a/sys/sparc64/sparc64/trap.c b/sys/sparc64/sparc64/trap.c index 61e3b44..f39d2f6 100644 --- a/sys/sparc64/sparc64/trap.c +++ b/sys/sparc64/sparc64/trap.c @@ -49,6 +49,7 @@ #include <sys/bus.h> #include <sys/interrupt.h> #include <sys/ktr.h> +#include <sys/kse.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/systm.h> @@ -190,6 +191,11 @@ trap(struct trapframe *tf) td->td_frame = tf; if (td->td_ucred != p->p_ucred) cred_update_thread(td); + if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) { + mtx_lock_spin(&sched_lock); + thread_exit(); + /* NOTREACHED */ + } } else { sticks = 0; if ((type & ~T_KERNEL) != T_BREAKPOINT) @@ -528,6 +534,23 @@ syscall(struct trapframe *tf) td->td_frame = tf; if (td->td_ucred != p->p_ucred) cred_update_thread(td); + if (p->p_flag & P_KSES) { + /* + * If we are doing a syscall in a KSE environment, + * note where our mailbox is. There is always the + * possibility that we could do this lazily (in sleep()), + * but for now do it every time. + */ + td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox + + offsetof(struct kse_mailbox, kmbx_current_thread)); + if ((td->td_mailbox == NULL) || + (td->td_mailbox == (void *)-1)) { + td->td_mailbox = NULL; /* single thread it.. */ + td->td_flags &= ~TDF_UNBOUND; + } else { + td->td_flags |= TDF_UNBOUND; + } + } code = tf->tf_global[1]; /* @@ -634,17 +657,17 @@ syscall(struct trapframe *tf) } /* - * Handle reschedule and other end-of-syscall issues - */ - userret(td, tf, sticks); - - /* * Release Giant if we had to get it. Don't use mtx_owned(), * we want to catch broken syscalls. */ if ((callp->sy_narg & SYF_MPSAFE) == 0) mtx_unlock(&Giant); + /* + * Handle reschedule and other end-of-syscall issues + */ + userret(td, tf, sticks); + #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); diff --git a/sys/sparc64/sparc64/vm_machdep.c b/sys/sparc64/sparc64/vm_machdep.c index a896754..8282e93 100644 --- a/sys/sparc64/sparc64/vm_machdep.c +++ b/sys/sparc64/sparc64/vm_machdep.c @@ -108,6 +108,42 @@ cpu_sched_exit(struct thread *td) } } +void +cpu_thread_exit(struct thread *td) +{ +} + +void +cpu_thread_setup(struct thread *td) +{ +} + +void +cpu_save_upcall(struct thread *td, struct kse *newkse) +{ +} + +void +cpu_set_upcall(struct thread *td, void *pcb) +{ +} + +void +cpu_set_args(struct thread *td, struct kse *ke) +{ +} + +void +cpu_free_kse_mdstorage(struct kse *ke) +{ +} + +int +cpu_export_context(struct thread *td) +{ + return (0); +} + /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child diff --git a/sys/sys/condvar.h b/sys/sys/condvar.h index 0050255..cf6a6c6 100644 --- a/sys/sys/condvar.h +++ b/sys/sys/condvar.h @@ -62,6 +62,7 @@ void cv_signal(struct cv *cvp); void cv_broadcast(struct cv *cvp); void cv_waitq_remove(struct thread *td); +void cv_abort(struct thread *td); #define cv_waitq_empty(cvp) (TAILQ_EMPTY(&(cvp)->cv_waitq)) #define cv_wmesg(cvp) ((cvp)->cv_description) diff --git a/sys/sys/proc.h b/sys/sys/proc.h index a4f29de..2c198c8 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -249,12 +249,13 @@ They would be given priorities calculated from the KSEG. * This is what is put to sleep and reactivated. * The first KSE available in the correct group will run this thread. * If several are available, use the one on the same CPU as last time. + * When waing to be run, threads are hung off the KSEGRP in priority order. + * with N runnable and queued KSEs in the KSEGRP, the first N threads + * are linked to them. Other threads are not yet assigned. */ struct thread { struct proc *td_proc; /* Associated process. */ struct ksegrp *td_ksegrp; /* Associated KSEG. */ - struct kse *td_last_kse; /* Where it wants to be if possible. */ - struct kse *td_kse; /* Current KSE if running. */ TAILQ_ENTRY(thread) td_plist; /* All threads in this proc */ TAILQ_ENTRY(thread) td_kglist; /* All threads in this ksegrp */ @@ -267,6 +268,8 @@ struct thread { #define td_startzero td_flags int td_flags; /* (j) TDF_* flags. */ + struct kse *td_last_kse; /* Where it wants to be if possible. */ + struct kse *td_kse; /* Current KSE if running. */ int td_dupfd; /* (k) Ret value from fdopen. XXX */ void *td_wchan; /* (j) Sleep address. */ const char *td_wmesg; /* (j) Reason for sleep. */ @@ -280,6 +283,8 @@ struct thread { LIST_HEAD(, mtx) td_contested; /* (j) Contested locks. */ struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */ int td_intr_nesting_level; /* (k) Interrupt recursion. */ + void *td_mailbox; /* the userland mailbox address */ + struct ucred *td_ucred; /* (k) Reference to credentials. */ #define td_endzero td_md #define td_startcopy td_endzero @@ -290,14 +295,44 @@ struct thread { u_char td_priority; /* (j) Thread active priority. */ #define td_endcopy td_pcb - struct ucred *td_ucred; /* (k) Reference to credentials. */ struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */ + enum { + TDS_NEW = 0x20, + TDS_UNQUEUED, + TDS_SLP, + TDS_MTX, + TDS_RUNQ, + TDS_RUNNING, + TDS_SUSPENDED, /* would have liked to have run */ + TDS_IWAIT, + TDS_SURPLUS + } td_state; struct callout td_slpcallout; /* (h) Callout for sleep. */ struct trapframe *td_frame; /* (k) */ struct vm_object *td_kstack_obj;/* (a) Kstack object. */ vm_offset_t td_kstack; /* Kernel VA of kstack. */ u_int td_critnest; /* (k) Critical section nest level. */ }; +/* flags kept in td_flags */ +#define TDF_UNBOUND 0x000001 /* may give away the kse, uses the kg runq */ +#define TDF_SINTR 0x000008 /* Sleep is interruptible. */ +#define TDF_TIMEOUT 0x000010 /* Timing out during sleep. */ +#define TDF_SELECT 0x000040 /* Selecting; wakeup/waiting danger. */ +#define TDF_CVWAITQ 0x000080 /* Thread is on a cv_waitq (not slpq). */ +#define TDF_UPCALLING 0x000100 /* This thread is doing an upcall. */ +#define TDF_INMSLEEP 0x000400 /* Don't recurse in msleep() */ +#define TDF_TIMOFAIL 0x001000 /* Timeout from sleep after we were awake. */ +#define TDF_DEADLKTREAT 0x800000 /* Lock aquisition - deadlock treatment. */ + +/* + * Traps for young players: + * The main thread flag that controls whether a thread acts as a threaded + * or unthreaded thread is the TDF_UNBOUND flag. + * UPCALLS run with the UNBOUND flags clear, after they are first scheduled. + * i.e. they bind themselves to whatever thread thay are first scheduled with. + * You may see BOUND threads in KSE processes but you should never see + * UNBOUND threads in non KSE processes. + */ /* * The schedulable entity that can be given a context to run. @@ -309,14 +344,14 @@ struct thread { struct kse { struct proc *ke_proc; /* Associated process. */ struct ksegrp *ke_ksegrp; /* Associated KSEG. */ - struct thread *ke_thread; /* Associated thread, if running. */ TAILQ_ENTRY(kse) ke_kglist; /* Queue of all KSEs in ke_ksegrp. */ TAILQ_ENTRY(kse) ke_kgrlist; /* Queue of all KSEs in this state. */ TAILQ_ENTRY(kse) ke_procq; /* (j) Run queue. */ - TAILQ_HEAD(, thread) ke_runq; /* (td_runq) RUNNABLE bound to KSE. */ #define ke_startzero ke_flags int ke_flags; /* (j) KEF_* flags. */ + struct thread *ke_thread; /* Active associated thread. */ + struct thread *ke_bound; /* Thread bound to this KSE (*) */ /*u_int ke_estcpu; */ /* (j) Time averaged val of cpticks. */ int ke_cpticks; /* (j) Ticks of cpu time. */ fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ @@ -329,15 +364,45 @@ struct kse { u_char ke_oncpu; /* (j) Which cpu we are on. */ u_int ke_slptime; /* (j) Time since last idle. */ char ke_rqindex; /* (j) Run queue index. */ -#define ke_endzero ke_priority + enum { + KES_IDLE = 0x10, + KES_ONRUNQ, + KES_UNQUEUED, /* in transit */ + KES_RUNNING + } ke_state; /* (j) S* process status. */ + void *ke_mailbox; /* the userland mailbox address */ + struct thread *ke_tdspare; /* spare thread for upcalls */ +#define ke_endzero ke_dummy #define ke_startcopy ke_endzero - u_char ke_priority; /* (j) Process priority. */ - u_char ke_usrpri; /* (j) User pri from cpu & nice. */ -#define ke_endcopy ke_end - - int ke_end; /* dummy entry */ + u_char ke_dummy; /* */ +#define ke_endcopy ke_mdstorage + + void *ke_upcall; + void *ke_stackbase; + u_long ke_stacksize; + void *ke_mdstorage; /* where we store the pcb and frame */ + struct pcb *ke_pcb; /* the pcb saved for the upcalls */ + struct trapframe *ke_frame; /* the upcall trapframe */ + void *mdkse; /* eventually you load from this in */ + /* switch for our extension PCB x86 */ }; +/* flags kept in ke_flags */ +#define KEF_OWEUPC 0x00002 /* Owe process an addupc() call at next ast. */ +#define KEF_IDLEKSE 0x00004 /* A 'Per CPU idle process'.. has one thread */ +#define KEF_LOANED 0x00004 /* On loan from the bound thread to another */ +#define KEF_ASTPENDING 0x00400 /* KSE has a pending ast. */ +#define KEF_NEEDRESCHED 0x00800 /* Process needs to yield. */ + +/* + * (*) A bound KSE with a bound thread in a KSE process may be lent to + * Other threads, as long as those threads do not leave the kernel. + * The other threads must be either exiting, or be unbound with a valid + * mailbox so that they can save their state there rather than going + * to user space. While this happens the real bound thread is still linked + * to the kse via the ke_bound field, and the KSE has its "KEF_LOANED + * flag set. + */ /* * Kernel-scheduled entity group (KSEG). The scheduler considers each KSEG to @@ -348,27 +413,29 @@ struct ksegrp { struct proc *kg_proc; /* Process that contains this KSEG. */ TAILQ_ENTRY(ksegrp) kg_ksegrp; /* Queue of KSEGs in kg_proc. */ TAILQ_HEAD(, kse) kg_kseq; /* (ke_kglist) All KSEs. */ - TAILQ_HEAD(, kse) kg_rq; /* (ke_kgrlist) Runnable KSEs. */ TAILQ_HEAD(, kse) kg_iq; /* (ke_kgrlist) Idle KSEs. */ TAILQ_HEAD(, thread) kg_threads;/* (td_kglist) All threads. */ - TAILQ_HEAD(, thread) kg_runq; /* (td_runq) Unbound RUNNABLE threads */ + TAILQ_HEAD(, thread) kg_runq; /* (td_runq) waiting RUNNABLE threads */ TAILQ_HEAD(, thread) kg_slpq; /* (td_runq) NONRUNNABLE threads. */ #define kg_startzero kg_estcpu u_int kg_estcpu; /* Sum of the same field in KSEs. */ u_int kg_slptime; /* (j) How long completely blocked. */ + struct thread *kg_last_assigned; /* Last thread assigned to a KSE */ + int kg_numthreads; /* Num threads in total */ + int kg_runnable; /* Num runnable threads on queue. */ + int kg_kses; /* Num KSEs in group. */ + int kg_runq_kses; /* Num KSEs on runq. */ + int kg_idle_kses; /* num KSEs idle */ #define kg_endzero kg_pri_class #define kg_startcopy kg_endzero u_char kg_pri_class; /* (j) Scheduling class. */ u_char kg_user_pri; /* (j) User pri from estcpu and nice. */ char kg_nice; /* (j?/k?) Process "nice" value. */ - struct rtprio kg_rtprio; /* (j) Realtime priority. */ -#define kg_endcopy kg_runnable - - int kg_runnable; /* Num runnable threads on queue. */ - int kg_runq_kses; /* Num KSEs on runq. */ - int kg_kses; /* Num KSEs in group. */ +/* struct rtprio kg_rtprio; */ /* (j) Realtime priority. */ +#define kg_endcopy kg_dummy + int kg_dummy; }; /* @@ -379,6 +446,7 @@ struct proc { LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ TAILQ_HEAD(, ksegrp) p_ksegrps; /* (kg_ksegrp) All KSEGs. */ TAILQ_HEAD(, thread) p_threads; /* (td_plist) Threads. (shortcut) */ + TAILQ_HEAD(, thread) p_suspended; /* (td_runq) suspended threads */ struct ucred *p_ucred; /* (c) Process owner's identity. */ struct filedesc *p_fd; /* (b) Ptr to open files structure. */ /* Accumulated stats for all KSEs? */ @@ -389,7 +457,6 @@ struct proc { struct ksegrp p_ksegrp; struct kse p_kse; - struct thread p_xxthread; /* * The following don't make too much sense.. @@ -397,8 +464,12 @@ struct proc { */ int p_flag; /* (c) P_* flags. */ int p_sflag; /* (j) PS_* flags. */ - int p_stat; /* (j) S* process status. */ - + enum { + PRS_NEW = 0, /* In creation */ + PRS_NORMAL, /* KSEs can be run */ + PRS_WAIT, /* Waiting on interrupt ? */ + PRS_ZOMBIE + } p_state; /* (j) S* process status. */ pid_t p_pid; /* (b) Process identifier. */ LIST_ENTRY(proc) p_hash; /* (d) Hash chain. */ LIST_ENTRY(proc) p_pglist; /* (g + e) List of processes in pgrp. */ @@ -431,6 +502,10 @@ struct proc { u_char p_pfsflags; /* (c) Procfs flags. */ struct nlminfo *p_nlminfo; /* (?) Only used by/for lockd. */ void *p_aioinfo; /* (c) ASYNC I/O info. */ + int p_numthreads; /* (?) number of threads */ + int p_numksegrps; /* (?) number of ksegrps */ + struct thread *p_singlethread;/* If single threading this is it */ + int p_suspcount; /* # waiting threads in suspended mode*/ /* End area that is zeroed on creation. */ #define p_startcopy p_sigmask @@ -467,13 +542,6 @@ struct proc { #define NOCPU 0xff /* For p_oncpu when we aren't on a CPU. */ /* Status values (p_stat). */ -#define SIDL 1 /* Process being created by fork. */ -#define SRUN 2 /* Currently runnable. */ -#define SSLEEP 3 /* Sleeping on an address. */ -#define SSTOP 4 /* Process debugging or suspension. */ -#define SZOMB 5 /* Awaiting collection by parent. */ -#define SWAIT 6 /* Waiting for interrupt. */ -#define SMTX 7 /* Blocked on a mutex. */ /* These flags are kept in p_flag. */ #define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */ @@ -483,13 +551,21 @@ struct proc { #define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */ #define P_SUGID 0x00100 /* Had set id privileges since last exec. */ #define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */ -#define P_TRACED 0x00800 /* Debugged process being traced. */ -#define P_WAITED 0x01000 /* Debugging process has waited for child. */ +#define P_WAITED 0x01000 /* Someone is waiting for us */ #define P_WEXIT 0x02000 /* Working on exiting. */ #define P_EXEC 0x04000 /* Process called exec. */ #define P_KSES 0x08000 /* Process is using KSEs. */ #define P_CONTINUED 0x10000 /* Proc has continued from a stopped state. */ +/* flags that control how threads may be suspended for some reason */ +#define P_STOPPED_SGNL 0x10000 /* Stopped due to SIGSTOP/SIGTSTP */ +#define P_STOPPED_TRACE 0x20000 /* Stopped because of tracing */ +#define P_STOPPED_SNGL 0x40000 /* Only one thread can continue (not to user) */ +#define P_SINGLE_EXIT 0x00400 /* Threads suspending should exit, not wait */ +#define P_TRACED 0x00800 /* Debugged process being traced. */ +#define P_STOPPED (P_STOPPED_SGNL|P_STOPPED_SNGL|P_STOPPED_TRACE) +#define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) + /* Should be moved to machine-dependent areas. */ #define P_UNUSED100000 0x100000 #define P_COWINPROGRESS 0x400000 /* Snapshot copy-on-write in progress. */ @@ -508,21 +584,14 @@ struct proc { #define PS_SWAPPING 0x00200 /* Process is being swapped. */ #define PS_NEEDSIGCHK 0x02000 /* Process may need signal delivery. */ -/* flags kept in td_flags */ -#define TDF_ONRUNQ 0x00001 /* This KE is on a run queue */ -#define TDF_SINTR 0x00008 /* Sleep is interruptible. */ -#define TDF_TIMEOUT 0x00010 /* Timing out during sleep. */ -#define TDF_SELECT 0x00040 /* Selecting; wakeup/waiting danger. */ -#define TDF_CVWAITQ 0x00080 /* Thread is on a cv_waitq (not slpq). */ -#define TDF_TIMOFAIL 0x01000 /* Timeout from sleep after we were awake. */ -#define TDF_DEADLKTREAT 0x800000 /* Lock aquisition - deadlock treatment. */ - -/* flags kept in ke_flags */ -#define KEF_ONRUNQ 0x00001 /* This KE is on a run queue */ -#define KEF_OWEUPC 0x00002 /* Owe process an addupc() call at next ast. */ -#define KEF_ASTPENDING 0x00400 /* KSE has a pending ast. */ -#define KEF_NEEDRESCHED 0x00800 /* Process needs to yield. */ - +/* used only in legacy conversion code */ +#define SIDL 1 /* Process being created by fork. */ +#define SRUN 2 /* Currently runnable. */ +#define SSLEEP 3 /* Sleeping on an address. */ +#define SSTOP 4 /* Process debugging or suspension. */ +#define SZOMB 5 /* Awaiting collection by parent. */ +#define SWAIT 6 /* Waiting for interrupt. */ +#define SMTX 7 /* Blocked on a mutex. */ #define P_MAGIC 0xbeefface @@ -728,6 +797,7 @@ void pargs_drop(struct pargs *pa); void pargs_free(struct pargs *pa); void pargs_hold(struct pargs *pa); void procinit(void); +void threadinit(void); void proc_linkup(struct proc *p, struct ksegrp *kg, struct kse *ke, struct thread *td); void proc_reparent(struct proc *child, struct proc *newparent); @@ -758,7 +828,38 @@ void cpu_fork(struct thread *, struct proc *, struct thread *, int); void cpu_set_fork_handler(struct thread *, void (*)(void *), void *); void cpu_wait(struct proc *); int cpu_coredump(struct thread *, struct vnode *, struct ucred *); -struct thread *thread_get(struct proc *); + +/* New in KSE. */ +struct thread *thread_alloc(void); +void thread_free(struct thread *td); +int cpu_export_context(struct thread *td); +void cpu_free_kse_mdstorage(struct kse *kse); +void cpu_save_upcall(struct thread *td, struct kse *newkse); +void cpu_set_args(struct thread *, struct kse *); +void cpu_set_upcall(struct thread *td, void *pcb); +void cpu_thread_exit(struct thread *); +void cpu_thread_setup(struct thread *td); +void kse_reassign(struct kse *ke); +void kse_link(struct kse *ke, struct ksegrp *kg); +void ksegrp_link(struct ksegrp *kg, struct proc *p); +int kserunnable(void); +void make_kse_runnable(struct kse *ke); +void thread_exit(void) __dead2; +int thread_export_context(struct thread *td); +void thread_link(struct thread *td, struct ksegrp *kg); +void thread_reap(void); +struct thread *thread_schedule_upcall(struct thread *td, struct kse *ke); +int thread_single(int how); +#define SNGLE_NO_EXIT 0 /* values for 'how' */ +#define SNGLE_EXIT 1 +void thread_single_end(void); +void thread_stash(struct thread *td); +int thread_suspend_check(int how); +void thread_unsuspend(struct proc *p); +int thread_userret(struct proc *p, struct ksegrp *kg, struct kse *ke, + struct thread *td, struct trapframe *frame); + +void thread_sanity_check(struct thread *td); #endif /* _KERNEL */ #endif /* !_SYS_PROC_H_ */ diff --git a/sys/sys/queue.h b/sys/sys/queue.h index 5209f4e..ffddc86 100644 --- a/sys/sys/queue.h +++ b/sys/sys/queue.h @@ -102,6 +102,36 @@ * _REMOVE + + + + * */ +#define QUEUE_MACRO_DEBUG 1 +#ifdef QUEUE_MACRO_DEBUG +struct qm_trace { + char * lastfile; + int lastline; + char * prevfile; + int prevline; +}; + +#define TRACEBUF struct qm_trace trace; + +#define QMD_TRACE_HEAD(head) do { \ + (head)->trace.prevline = (head)->trace.lastline; \ + (head)->trace.prevfile = (head)->trace.lastfile; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + +#define QMD_TRACE_ELEM(elem) do { \ + (elem)->trace.prevline = (elem)->trace.lastline; \ + (elem)->trace.prevfile = (elem)->trace.lastfile; \ + (elem)->trace.lastline = __LINE__; \ + (elem)->trace.lastfile = __FILE__; \ +} while (0) + +#else +#define QMD_TRACE_ELEM(elem) +#define QMD_TRACE_HEAD(head) +#define TRACEBUF +#endif /* QUEUE_MACRO_DEBUG */ /* * Singly-linked List declarations. @@ -329,6 +359,7 @@ struct { \ struct name { \ struct type *tqh_first; /* first element */ \ struct type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ } #define TAILQ_HEAD_INITIALIZER(head) \ @@ -338,6 +369,7 @@ struct name { \ struct { \ struct type *tqe_next; /* next element */ \ struct type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ } /* @@ -349,6 +381,8 @@ struct { \ (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ (head1)->tqh_last = (head2)->tqh_last; \ TAILQ_INIT((head2)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_HEAD(head2); \ } \ } while (0) @@ -369,16 +403,21 @@ struct { \ #define TAILQ_INIT(head) do { \ TAILQ_FIRST((head)) = NULL; \ (head)->tqh_last = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ } while (0) #define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ TAILQ_NEXT((elm), field)->field.tqe_prev = \ &TAILQ_NEXT((elm), field); \ - else \ + else { \ (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + } \ TAILQ_NEXT((listelm), field) = (elm); \ (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ } while (0) #define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ @@ -386,6 +425,8 @@ struct { \ TAILQ_NEXT((elm), field) = (listelm); \ *(listelm)->field.tqe_prev = (elm); \ (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ } while (0) #define TAILQ_INSERT_HEAD(head, elm, field) do { \ @@ -396,6 +437,8 @@ struct { \ (head)->tqh_last = &TAILQ_NEXT((elm), field); \ TAILQ_FIRST((head)) = (elm); \ (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ } while (0) #define TAILQ_INSERT_TAIL(head, elm, field) do { \ @@ -403,6 +446,8 @@ struct { \ (elm)->field.tqe_prev = (head)->tqh_last; \ *(head)->tqh_last = (elm); \ (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ } while (0) #define TAILQ_LAST(head, headname) \ @@ -417,9 +462,13 @@ struct { \ if ((TAILQ_NEXT((elm), field)) != NULL) \ TAILQ_NEXT((elm), field)->field.tqe_prev = \ (elm)->field.tqe_prev; \ - else \ + else { \ (head)->tqh_last = (elm)->field.tqe_prev; \ + QMD_TRACE_HEAD(head); \ + } \ *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ + (elm)->field.tqe_next = (void *)-1; \ + QMD_TRACE_ELEM(&(elm)->field); \ } while (0) diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h index 6302d03..a8a68fc 100644 --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h @@ -234,10 +234,10 @@ extern struct mtx sigio_lock; /* * Machine-independent functions: */ -int cursig(struct proc *p); +int cursig(struct thread *td); void execsigs(struct proc *p); void gsignal(int pgid, int sig); -int issignal(struct proc *p); +int issignal(struct thread *p); void killproc(struct proc *p, char *why); void pgsigio(struct sigio **, int signum, int checkctty); void pgsignal(struct pgrp *pgrp, int sig, int checkctty); diff --git a/sys/sys/systm.h b/sys/sys/systm.h index ccba626..134700b 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -309,6 +309,7 @@ extern watchdog_tickle_fn wdog_tickler; */ int msleep(void *chan, struct mtx *mtx, int pri, const char *wmesg, int timo); +void abortsleep(struct thread *td); #define tsleep(chan, pri, wmesg, timo) msleep(chan, NULL, pri, wmesg, timo) void wakeup(void *chan); void wakeup_one(void *chan); diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h index 3025eb4..565bd41 100644 --- a/sys/sys/ucred.h +++ b/sys/sys/ucred.h @@ -44,15 +44,15 @@ * Only the suser() or suser_cred() function should be used for this. */ struct ucred { - u_int cr_ref; /* reference count */ + u_int cr_ref; /* reference count */ #define cr_startcopy cr_uid - uid_t cr_uid; /* effective user id */ - uid_t cr_ruid; /* real user id */ - uid_t cr_svuid; /* saved user id */ - short cr_ngroups; /* number of groups */ - gid_t cr_groups[NGROUPS]; /* groups */ - gid_t cr_rgid; /* real group id */ - gid_t cr_svgid; /* saved user id */ + uid_t cr_uid; /* effective user id */ + uid_t cr_ruid; /* real user id */ + uid_t cr_svuid; /* saved user id */ + short cr_ngroups; /* number of groups */ + gid_t cr_groups[NGROUPS]; /* groups */ + gid_t cr_rgid; /* real group id */ + gid_t cr_svgid; /* saved user id */ struct uidinfo *cr_uidinfo; /* per euid resource consumption */ struct uidinfo *cr_ruidinfo; /* per ruid resource consumption */ struct prison *cr_prison; /* jail(4) */ diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h index e09d549..cf6dc39 100644 --- a/sys/vm/uma_int.h +++ b/sys/vm/uma_int.h @@ -109,7 +109,7 @@ #define UMA_SLAB_MASK (PAGE_SIZE - 1) /* Mask to get back to the page */ #define UMA_SLAB_SHIFT PAGE_SHIFT /* Number of bits PAGE_MASK */ -#define UMA_BOOT_PAGES 15 /* Number of pages allocated for startup */ +#define UMA_BOOT_PAGES 30 /* Number of pages allocated for startup */ #define UMA_WORKING_TIME 20 /* Seconds worth of items to keep */ diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 6c48cbc..25aa48e 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -299,8 +299,11 @@ vm_waitproc(p) GIANT_REQUIRED; cpu_wait(p); pmap_dispose_proc(p); /* drop per-process resources */ - FOREACH_THREAD_IN_PROC(p, td) +/* XXXKSE by here there should not be any threads left! */ + FOREACH_THREAD_IN_PROC(p, td) { + panic("vm_waitproc: Survivor thread!"); pmap_dispose_thread(td); + } vmspace_exitfree(p); /* and clean-out the vmspace */ } @@ -355,7 +358,7 @@ faultin(p) PROC_LOCK(p); mtx_lock_spin(&sched_lock); FOREACH_THREAD_IN_PROC (p, td) - if (td->td_proc->p_stat == SRUN) /* XXXKSE */ + if (td->td_state == TDS_RUNQ) /* XXXKSE */ setrunqueue(td); p->p_sflag |= PS_INMEM; @@ -371,7 +374,7 @@ faultin(p) * is enough space for them. Of course, if a process waits for a long * time, it will be swapped in anyway. * - * XXXKSE - KSEGRP with highest priority counts.. + * XXXKSE - process with the thread with highest priority counts.. * * Giant is still held at this point, to be released in tsleep. */ @@ -381,6 +384,7 @@ scheduler(dummy) void *dummy; { struct proc *p; + struct thread *td; int pri; struct proc *pp; int ppri; @@ -399,11 +403,14 @@ loop: sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { struct ksegrp *kg; + if (p->p_sflag & (PS_INMEM | PS_SWAPPING)) { + continue; + } mtx_lock_spin(&sched_lock); - if (p->p_stat == SRUN - && (p->p_sflag & (PS_INMEM | PS_SWAPPING)) == 0) { - /* Find the minimum sleeptime for the process */ - FOREACH_KSEGRP_IN_PROC(p, kg) { + FOREACH_THREAD_IN_PROC(p, td) { + /* Only consider runnable threads */ + if (td->td_state == TDS_RUNQ) { + kg = td->td_ksegrp; pri = p->p_swtime + kg->kg_slptime; if ((p->p_sflag & PS_SWAPINREQ) == 0) { pri -= kg->kg_nice * 8; @@ -438,6 +445,7 @@ loop: /* * We would like to bring someone in. (only if there is space). + * [What checks the space? ] */ PROC_LOCK(p); faultin(p); @@ -478,6 +486,7 @@ swapout_procs(action) int action; { struct proc *p; + struct thread *td; struct ksegrp *kg; struct proc *outp, *outp2; int outpri, outpri2; @@ -489,13 +498,13 @@ int action; outpri = outpri2 = INT_MIN; retry: sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { struct vmspace *vm; int minslptime = 100000; PROC_LOCK(p); if (p->p_lock != 0 || - (p->p_flag & (P_TRACED|P_SYSTEM|P_WEXIT)) != 0) { + (p->p_flag & (P_STOPPED_SNGL|P_TRACED|P_SYSTEM|P_WEXIT)) != 0) { PROC_UNLOCK(p); continue; } @@ -512,14 +521,15 @@ retry: continue; } - switch (p->p_stat) { + switch (p->p_state) { default: + /* Don't swap out processes in any sort + * of 'special' state. */ mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); continue; - case SSLEEP: - case SSTOP: + case PRS_NORMAL: /* * do not swapout a realtime process * Check all the thread groups.. @@ -537,13 +547,18 @@ retry: * Also guarantee swap_idle_threshold1 * time in memory. */ - if (((FIRST_THREAD_IN_PROC(p)->td_priority) < PSOCK) || - (kg->kg_slptime < swap_idle_threshold1)) { + if (kg->kg_slptime < swap_idle_threshold1) { mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); goto nextproc; } - + FOREACH_THREAD_IN_PROC(p, td) { + if ((td->td_priority) < PSOCK) { + mtx_unlock_spin(&sched_lock); + PROC_UNLOCK(p); + goto nextproc; + } + } /* * If the system is under memory stress, * or if we are swapping @@ -624,14 +639,13 @@ swapout(p) p->p_sflag |= PS_SWAPPING; PROC_UNLOCK(p); FOREACH_THREAD_IN_PROC (p, td) - if (td->td_proc->p_stat == SRUN) /* XXXKSE */ + if (td->td_state == TDS_RUNQ) /* XXXKSE */ remrunqueue(td); /* XXXKSE */ mtx_unlock_spin(&sched_lock); pmap_swapout_proc(p); FOREACH_THREAD_IN_PROC(p, td) pmap_swapout_thread(td); - mtx_lock_spin(&sched_lock); p->p_sflag &= ~PS_SWAPPING; p->p_swtime = 0; diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 935979ae..a1b8adb 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -81,6 +81,7 @@ SYSCTL_STRUCT(_vm, VM_LOADAVG, loadavg, CTLFLAG_RD, static int vmtotal(SYSCTL_HANDLER_ARGS) { +/* XXXKSE almost completely broken */ struct proc *p; struct vmtotal total, *totalp; vm_map_entry_t entry; @@ -88,6 +89,7 @@ vmtotal(SYSCTL_HANDLER_ARGS) vm_map_t map; int paging; struct ksegrp *kg; + struct thread *td; totalp = &total; bzero(totalp, sizeof *totalp); @@ -107,44 +109,49 @@ vmtotal(SYSCTL_HANDLER_ARGS) if (p->p_flag & P_SYSTEM) continue; mtx_lock_spin(&sched_lock); - switch (p->p_stat) { - case 0: + switch (p->p_state) { + case PRS_NEW: + if (p->p_sflag & PS_INMEM) + totalp->t_rq++; + else + totalp->t_sw++; mtx_unlock_spin(&sched_lock); continue; - - case SMTX: - case SSLEEP: - case SSTOP: - kg = &p->p_ksegrp; /* XXXKSE */ - if (p->p_sflag & PS_INMEM) { - if (FIRST_THREAD_IN_PROC(p)->td_priority - <= PZERO) - totalp->t_dw++; - else if (kg->kg_slptime < maxslp) - totalp->t_sl++; - } else if (kg->kg_slptime < maxslp) - totalp->t_sw++; - if (kg->kg_slptime >= maxslp) { - mtx_unlock_spin(&sched_lock); - continue; - } break; + default: + FOREACH_THREAD_IN_PROC(p, td) { + switch (td->td_state) { + case TDS_MTX: + case TDS_SLP: + kg = td->td_ksegrp; /* XXXKSE */ + if (p->p_sflag & PS_INMEM) { + if (td->td_priority <= PZERO) + totalp->t_dw++; + else if (kg->kg_slptime + < maxslp) + totalp->t_sl++; + } else if (kg->kg_slptime < maxslp) + totalp->t_sw++; + if (kg->kg_slptime >= maxslp) { + continue; + } + break; - case SWAIT: - totalp->t_sl++; - continue; + case TDS_RUNQ: + case TDS_RUNNING: + if (p->p_sflag & PS_INMEM) + totalp->t_rq++; + else + totalp->t_sw++; + continue; - case SRUN: - case SIDL: - if (p->p_sflag & PS_INMEM) - totalp->t_rq++; - else - totalp->t_sw++; - if (p->p_stat == SIDL) { - mtx_unlock_spin(&sched_lock); - continue; + case TDS_IWAIT: + totalp->t_sl++; + continue; + default: + break; + } } - break; } mtx_unlock_spin(&sched_lock); /* diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 5708d8d..2e5bd07 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -642,6 +642,7 @@ vm_pageout_scan(int pass) int vnodes_skipped = 0; int maxlaunder; int s; + struct thread *td; GIANT_REQUIRED; /* @@ -1123,7 +1124,8 @@ rescan0: bigproc = NULL; bigsize = 0; sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { + int breakout; /* * If this process is already locked, skip it. */ @@ -1139,10 +1141,19 @@ rescan0: } /* * if the process is in a non-running type state, - * don't touch it. + * don't touch it. Check all the threads individually. */ mtx_lock_spin(&sched_lock); - if (p->p_stat != SRUN && p->p_stat != SSLEEP) { + breakout = 0; + FOREACH_THREAD_IN_PROC(p, td) { + if (td->td_state != TDS_RUNQ && + td->td_state != TDS_RUNNING && + td->td_state != TDS_SLP) { + breakout = 1; + break; + } + } + if (breakout) { mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); continue; @@ -1445,6 +1456,8 @@ static void vm_daemon() { struct proc *p; + int breakout; + struct thread *td; mtx_lock(&Giant); while (TRUE) { @@ -1473,7 +1486,16 @@ vm_daemon() * don't touch it. */ mtx_lock_spin(&sched_lock); - if (p->p_stat != SRUN && p->p_stat != SSLEEP) { + breakout = 0; + FOREACH_THREAD_IN_PROC(p, td) { + if (td->td_state != TDS_RUNQ && + td->td_state != TDS_RUNNING && + td->td_state != TDS_SLP) { + breakout = 1; + break; + } + } + if (breakout) { mtx_unlock_spin(&sched_lock); continue; } diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c index 99ace6e..d7ab1ce 100644 --- a/sys/vm/vm_zeroidle.c +++ b/sys/vm/vm_zeroidle.c @@ -127,7 +127,6 @@ vm_pagezero(void) pages += vm_page_zero_idle(); if (pages > idlezero_maxrun) { mtx_lock_spin(&sched_lock); - setrunqueue(td); td->td_proc->p_stats->p_ru.ru_nvcsw++; mi_switch(); mtx_unlock_spin(&sched_lock); |