15 files changed, 769 insertions, 513 deletions
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 444bcf3..fdc0b49 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -274,7 +274,7 @@ trap(frame)
 		 * XXX p_singlethread not locked, but should be safe.
 		 */
 		if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
-			PROC_LOCK(p); /* check if thisis really needed */
+			PROC_LOCK(p);
 			mtx_lock_spin(&sched_lock);
 			thread_exit();
 			/* NOTREACHED */
@@ -965,18 +965,39 @@ syscall(frame)
 		cred_update_thread(td);
 	if (p->p_flag & P_KSES) {
 		/*
+		 * First check that we shouldn't just abort.
+		 * But check if we are the single thread first!
+		 * XXX p_singlethread not locked, but should be safe.
+		 */
+		if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
+			PROC_LOCK(p);
+			mtx_lock_spin(&sched_lock);
+			thread_exit();
+			/* NOTREACHED */
+		}
+
+		/*
 		 * If we are doing a syscall in a KSE environment,
 		 * note where our mailbox is. There is always the
 		 * possibility that we could do this lazily (in sleep()),
 		 * but for now do it every time.
 		 */
+#if 0
 		td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox
 		    + offsetof(struct kse_mailbox, km_curthread));
+#else /* if user pointer arithmetic is ok in the kernel */
+		td->td_mailbox =
+		    (void *)fuword(
+		    (void *)&td->td_kse->ke_mailbox->km_curthread);
+#endif
 		if ((td->td_mailbox == NULL) ||
 		(td->td_mailbox == (void *)-1)) {
 			td->td_mailbox = NULL;	/* single thread it.. */
 			td->td_flags &= ~TDF_UNBOUND;
 		} else {
+			if (td->td_standin == NULL) {
+				td->td_standin = thread_alloc();
+			}
 			td->td_flags |= TDF_UNBOUND;
 		}
 	}
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 3e1329d..ff991ef 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -309,7 +309,10 @@ cpu_thread_setup(struct thread *td)
 
 /*
  * Initialize machine state (pcb and trap frame) for a new thread about to
- * upcall.
+ * upcall. Pu t enough state in the new thread's PCB to get it to go back 
+ * userret(), where we can intercept it again to set the return (upcall)
+ * Address and stack, along with those from upcals that are from other sources
+ * such as those generated in thread_userret() itself.
  */
 void
 cpu_set_upcall(struct thread *td, void *pcb)
@@ -369,8 +372,9 @@ cpu_set_upcall(struct thread *td, void *pcb)
 }
 
 /*
- * Set the machine state for performing an upcall that had to
- * wait until we selected a KSE to perform the upcall on.
+ * Set that machine state for performing an upcall that has to
+ * be done in thread_userret() so that those upcalls generated
+ * in thread_userret() itself can be done as well.
  */
 void
 cpu_set_upcall_kse(struct thread *td, struct kse *ke)
diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c
index 26dc00b..8ddaaaf 100644
--- a/sys/ddb/db_ps.c
+++ b/sys/ddb/db_ps.c
@@ -127,7 +127,7 @@ db_ps(dummy1, dummy2, dummy3, dummy4)
 			db_printf("(threaded)  %s\n", p->p_comm);
 		FOREACH_THREAD_IN_PROC(p, td) {
 			if (p->p_flag & P_KSES) 
-				db_printf( "       thread %p ", td);
+				db_printf( "   thread %p ksegrp %p ", td, td->td_ksegrp);
 			if (TD_ON_SLEEPQ(td)) {
 				if (td->td_flags & TDF_CVWAITQ)
 					db_printf("[CVQ ");
@@ -155,6 +155,9 @@ db_ps(dummy1, dummy2, dummy3, dummy4)
 				if (TD_AWAITING_INTR(td)) {
 					db_printf("[IWAIT]");
 				}
+				if (TD_LENT(td)) {
+					db_printf("[LOAN]");
+				}
 				break;
 			case TDS_CAN_RUN:
 				db_printf("[Can run]");
@@ -168,9 +171,11 @@ db_ps(dummy1, dummy2, dummy3, dummy4)
 			default:
 				panic("unknown thread state");
 			}
-			if (p->p_flag & P_KSES)
+			if (p->p_flag & P_KSES) {
+				if (td->td_kse)
+					db_printf("[kse %p]", td->td_kse);
 				db_printf("\n");
-			else
+			} else
 				db_printf(" %s\n", p->p_comm);
 					
 		}
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 444bcf3..fdc0b49 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -274,7 +274,7 @@ trap(frame)
 		 * XXX p_singlethread not locked, but should be safe.
 		 */
 		if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
-			PROC_LOCK(p); /* check if thisis really needed */
+			PROC_LOCK(p);
 			mtx_lock_spin(&sched_lock);
 			thread_exit();
 			/* NOTREACHED */
@@ -965,18 +965,39 @@ syscall(frame)
 		cred_update_thread(td);
 	if (p->p_flag & P_KSES) {
 		/*
+		 * First check that we shouldn't just abort.
+		 * But check if we are the single thread first!
+		 * XXX p_singlethread not locked, but should be safe.
+		 */
+		if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
+			PROC_LOCK(p);
+			mtx_lock_spin(&sched_lock);
+			thread_exit();
+			/* NOTREACHED */
+		}
+
+		/*
 		 * If we are doing a syscall in a KSE environment,
 		 * note where our mailbox is. There is always the
 		 * possibility that we could do this lazily (in sleep()),
 		 * but for now do it every time.
 		 */
+#if 0
 		td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox
 		    + offsetof(struct kse_mailbox, km_curthread));
+#else /* if user pointer arithmetic is ok in the kernel */
+		td->td_mailbox =
+		    (void *)fuword(
+		    (void *)&td->td_kse->ke_mailbox->km_curthread);
+#endif
 		if ((td->td_mailbox == NULL) ||
 		(td->td_mailbox == (void *)-1)) {
 			td->td_mailbox = NULL;	/* single thread it.. */
 			td->td_flags &= ~TDF_UNBOUND;
 		} else {
+			if (td->td_standin == NULL) {
+				td->td_standin = thread_alloc();
+			}
 			td->td_flags |= TDF_UNBOUND;
 		}
 	}
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 3e1329d..ff991ef 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -309,7 +309,10 @@ cpu_thread_setup(struct thread *td)
 
 /*
  * Initialize machine state (pcb and trap frame) for a new thread about to
- * upcall.
+ * upcall. Pu t enough state in the new thread's PCB to get it to go back 
+ * userret(), where we can intercept it again to set the return (upcall)
+ * Address and stack, along with those from upcals that are from other sources
+ * such as those generated in thread_userret() itself.
  */
 void
 cpu_set_upcall(struct thread *td, void *pcb)
@@ -369,8 +372,9 @@ cpu_set_upcall(struct thread *td, void *pcb)
 }
 
 /*
- * Set the machine state for performing an upcall that had to
- * wait until we selected a KSE to perform the upcall on.
+ * Set that machine state for performing an upcall that has to
+ * be done in thread_userret() so that those upcalls generated
+ * in thread_userret() itself can be done as well.
  */
 void
 cpu_set_upcall_kse(struct thread *td, struct kse *ke)
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 66b58d2..3b97e60 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -374,12 +374,8 @@ proc0_init(void *dummy __unused)
 	ke->ke_oncpu = 0;
 	ke->ke_state = KES_THREAD;
 	ke->ke_thread = td;
-	/* proc_linkup puts it in the idle queue, that's not what we want. */
-	TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
-	kg->kg_idle_kses--;
 	p->p_peers = 0;
 	p->p_leader = p;
-KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
 
 
 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c
index 26f5376..3ce9aff 100644
--- a/sys/kern/kern_condvar.c
+++ b/sys/kern/kern_condvar.c
@@ -130,16 +130,14 @@ cv_check_upcall(struct thread *td)
 	if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox &&
 	    (td->td_flags & TDF_INMSLEEP) == 0) {
 		/*
-		 * If we have no queued work to do,
-		 * upcall to the UTS to see if it has more work.
 		 * We don't need to upcall now, just queue it.
+		 * The upcall will happen when other n-kernel work
+		 * in this SKEGRP has completed.
+		 * Don't recurse here!
 		 */
-		if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) {
-			/* Don't recurse here! */
-			td->td_flags |= TDF_INMSLEEP;
-			thread_schedule_upcall(td, td->td_kse);
-			td->td_flags &= ~TDF_INMSLEEP;
-		}
+		td->td_flags |= TDF_INMSLEEP;
+		thread_schedule_upcall(td, td->td_kse);
+		td->td_flags &= ~TDF_INMSLEEP;
 	}
 }
 
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 83d5149..a586bef 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -561,9 +561,10 @@ wait1(td, uap, compat)
 	int compat;
 {
 	struct rusage ru;
-	register int nfound;
-	register struct proc *p, *q, *t;
+	int nfound;
+	struct proc *p, *q, *t;
 	int status, error;
+	struct thread *td2;
 	struct kse *ke;
 	struct ksegrp *kg;
 
@@ -718,8 +719,8 @@ loop:
 			}
 
 			/*
-			 * There should only be one KSE/KSEGRP but
-			 * do it right anyhow.
+			 * There should only be one 
+			 * but do it right anyhow.
 			 */
 			FOREACH_KSEGRP_IN_PROC(p, kg) {
 				FOREACH_KSE_IN_GROUP(kg, ke) {
@@ -730,6 +731,12 @@ loop:
 					}
 				}
 			}
+			FOREACH_THREAD_IN_PROC(p, td2) {
+				if (td2->td_standin != NULL) {
+					thread_free(td2->td_standin);
+					td2->td_standin = NULL;
+				}
+			}
 			thread_reap();	/* check for zombie threads */
 
 			/*
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 9fbf602..0af883b 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -500,8 +500,6 @@ again:
 #undef RANGEOF
 
 	/* Set up the thread as an active thread (as if runnable). */
-	TAILQ_REMOVE(&kg2->kg_iq, ke2, ke_kgrlist);
-	kg2->kg_idle_kses--;
 	ke2->ke_state = KES_THREAD;
 	ke2->ke_thread = td2;
 	td2->td_kse = ke2;
diff --git a/sys/kern/kern_kse.c b/sys/kern/kern_kse.c
index 3326119..407b777 100644
--- a/sys/kern/kern_kse.c
+++ b/sys/kern/kern_kse.c
@@ -67,7 +67,7 @@ static int oiks_debug = 1;	/* 0 disable, 1 printf, 2 enter debugger */
 SYSCTL_INT(_kern_threads, OID_AUTO, oiks, CTLFLAG_RW,
 	&oiks_debug, 0, "OIKS thread debug");
 
-static int max_threads_per_proc = 6;
+static int max_threads_per_proc = 10;
 SYSCTL_INT(_kern_threads, OID_AUTO, max_per_proc, CTLFLAG_RW,
 	&max_threads_per_proc, 0, "Limit on threads per proc");
 
@@ -470,6 +470,11 @@ thread_exit(void)
 		thread_stash(ke->ke_tdspare);
 		ke->ke_tdspare = NULL;
 	}
+	if (td->td_standin != NULL) {
+		thread_stash(td->td_standin);
+		td->td_standin = NULL;
+	}
+
 	cpu_thread_exit(td);	/* XXXSMP */
 
 	/*
@@ -478,14 +483,6 @@ thread_exit(void)
 	 * all this stuff.
 	 */
 	if (p->p_numthreads > 1) {
-		/* Reassign this thread's KSE. */
-		ke->ke_thread = NULL;
-		td->td_kse = NULL;
-		ke->ke_state = KES_UNQUEUED;
-		if (ke->ke_bound == td)
-			ke->ke_bound = NULL;
-		kse_reassign(ke);
-
 		/* Unlink this thread from its proc. and the kseg */
 		TAILQ_REMOVE(&p->p_threads, td, td_plist);
 		p->p_numthreads--;
@@ -501,12 +498,41 @@ thread_exit(void)
 				thread_unsuspend_one(p->p_singlethread);
 			}
 		}
+
+		/* Reassign this thread's KSE. */
+		ke->ke_thread = NULL;
+		td->td_kse = NULL;
+		ke->ke_state = KES_UNQUEUED;
+		if (ke->ke_bound == td) {
+			printf("thread_exit: entered with ke_bound set\n");
+			ke->ke_bound = NULL; /* should never happen */
+		}
+
+		kse_reassign(ke);
 		PROC_UNLOCK(p);
 		td->td_state	= TDS_INACTIVE;
 		td->td_proc	= NULL;
 		td->td_ksegrp	= NULL;
 		td->td_last_kse	= NULL;
-		ke->ke_tdspare = td;
+		/* 
+		 * For now stash this here, however
+		 * it's not a permanent solution.
+		 *  When we want to make KSEs exit as well
+		 * we'll have to face this one again.
+		 * Where will we hide it then?
+		 *
+		 * In borrower threads, stash it in the lender
+		 * Where it won't be needed until
+		 * this thread is long gone.
+		 */
+		if (ke->ke_bound) {
+			if (ke->ke_bound->td_standin) {
+				thread_stash(ke->ke_bound->td_standin);
+			}
+			ke->ke_bound->td_standin = td;
+		} else {
+			ke->ke_tdspare = td;
+		}
 	} else {
 		PROC_UNLOCK(p);
 	}
@@ -555,40 +581,85 @@ struct thread *
 thread_schedule_upcall(struct thread *td, struct kse *ke)
 {
 	struct thread *td2;
+	int newkse;
 
 	mtx_assert(&sched_lock, MA_OWNED);
-	if (ke->ke_tdspare != NULL) {
-		td2 = ke->ke_tdspare;
-		ke->ke_tdspare = NULL;
+	newkse = (ke != td->td_kse);
+
+	/* 
+	 * If the kse is already owned by another thread then we can't
+	 * schedule an upcall because the other thread must be BOUND
+	 * which means it is not in a position to take an upcall.
+	 * We must be borrowing the KSE to allow us to complete some in-kernel
+	 * work. When we complete, the Bound thread will have teh chance to 
+	 * complete. This thread will sleep as planned. Hopefully there will
+	 * eventually be un unbound thread that can be converted to an
+	 * upcall to report the completion of this thread.
+	 */
+	if (ke->ke_bound && ((ke->ke_bound->td_flags & TDF_UNBOUND) == 0)) {
+		return (NULL);
+	}
+	KASSERT((ke->ke_bound == NULL), ("kse already bound"));
+
+	if ((td2 = td->td_standin) != NULL) {
+		td->td_standin = NULL;
 	} else {
-		mtx_unlock_spin(&sched_lock);
-		td2 = thread_alloc();
-		mtx_lock_spin(&sched_lock);
+		if (newkse)
+			panic("no reserve thread when called with a new kse");
+		/*
+		 * If called from (e.g.) sleep and we do not have
+		 * a reserve thread, then we've used it, so do not
+		 * create an upcall.
+		 */
+		return(NULL);
 	}
 	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
-	     td, td->td_proc->p_pid, td->td_proc->p_comm);
+	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
 	bzero(&td2->td_startzero,
 	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
 	bcopy(&td->td_startcopy, &td2->td_startcopy,
 	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
 	thread_link(td2, ke->ke_ksegrp);
 	cpu_set_upcall(td2, td->td_pcb);
+
+	/*
+	 * XXXKSE do we really need this? (default values for the
+	 * frame).
+	 */
 	bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe));
+
 	/*
-	 * The user context for this thread is selected when we choose
-	 * a KSE and return to userland on it. All we need do here is
-	 * note that the thread exists in order to perform an upcall.
-	 *
-	 * Since selecting a KSE to perform the upcall involves locking
-	 * that KSE's context to our upcall, its best to wait until the
-	 * last possible moment before grabbing a KSE. We do this in
-	 * userret().
+	 * Bind the new thread to the KSE,
+	 * and if it's our KSE, lend it back to ourself
+	 * so we can continue running.
 	 */
 	td2->td_ucred = crhold(td->td_ucred);
-	td2->td_flags = TDF_UNBOUND|TDF_UPCALLING;
-	TD_SET_CAN_RUN(td2);
-	setrunqueue(td2);
-	return (td2);
+	td2->td_flags = TDF_UPCALLING; /* note: BOUND */
+	td2->td_kse = ke;
+	td2->td_state = TDS_CAN_RUN;
+	td2->td_inhibitors = 0;
+	/*
+	 * If called from msleep(), we are working on the current
+	 * KSE so fake that we borrowed it. If called from
+	 * kse_create(), don't, as we have a new kse too.
+	 */
+	if (!newkse) {
+		/*
+		 * This thread will be scheduled when the current thread
+		 * blocks, exits or tries to enter userspace, (which ever
+		 * happens first). When that happens the KSe will "revert"
+		 * to this thread in a BOUND manner. Since we are called
+		 * from msleep() this is going to be "very soon" in nearly
+		 * all cases.
+		 */
+		ke->ke_bound = td2;
+		TD_SET_LOAN(td2);
+	} else {
+		ke->ke_bound = NULL;
+		ke->ke_thread = td2;
+		setrunqueue(td2);
+	}
+	return (td2);	/* bogus.. should be a void function */
 }
 
 /*
@@ -605,6 +676,7 @@ signal_upcall(struct proc *p, int sig)
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
+return (NULL);
 
 	td = FIRST_THREAD_IN_PROC(p);
 	ke = td->td_kse;
@@ -619,94 +691,15 @@ signal_upcall(struct proc *p, int sig)
 	PROC_LOCK(p);
 	if (error)
 		return (NULL);
+	if (td->td_standin == NULL)
+		td->td_standin = thread_alloc();
 	mtx_lock_spin(&sched_lock);
-	td2 = thread_schedule_upcall(td, ke);
+	td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */
 	mtx_unlock_spin(&sched_lock);
 	return (td2);
 }
 
 /*
- * Consider whether or not an upcall should be made, and update the
- * TDF_UPCALLING flag appropriately.
- *
- * This function is called when the current thread had been bound to a user
- * thread that performed a syscall that blocked, and is now returning.
- * Got that? syscall -> msleep -> wakeup -> syscall_return -> us.
- *
- * This thread will be returned to the UTS in its mailbox as a completed
- * thread.  We need to decide whether or not to perform an upcall now,
- * or simply queue the thread for later.
- *
- * XXXKSE Future enhancement: We could also return back to
- * the thread if we haven't had to do an upcall since then.
- * If the KSE's copy is == the thread's copy, and there are
- * no other completed threads.
- */
-static int
-thread_consider_upcalling(struct thread *td)
-{
-	struct proc *p;
-	struct ksegrp *kg;
-	int error;
-
-	/*
-	 * Save the thread's context, and link it
-	 * into the KSEGRP's list of completed threads.
-	 */
-	error = thread_export_context(td);
-	td->td_flags &= ~TDF_UNBOUND;
-	td->td_mailbox = NULL;
-	if (error)
-		/*
-		 * Failing to do the KSE operation just defaults
-		 * back to synchonous operation, so just return from
-		 * the syscall.
-		 */
-		return (error);
-
-	/*
-	 * Decide whether to perform an upcall now.
-	 */
-	/* Make sure there are no other threads waiting to run. */
-	p = td->td_proc;
-	kg = td->td_ksegrp;
-	PROC_LOCK(p);
-	mtx_lock_spin(&sched_lock);
-	/* bogus test, ok for testing though */
-	if (TAILQ_FIRST(&kg->kg_runq) && 
-	    (TAILQ_LAST(&kg->kg_runq, threadqueue) 
-		!= kg->kg_last_assigned)) {
-		/*
-		 * Another thread in this KSEG needs to run.
-		 * Switch to it instead of performing an upcall,
-		 * abondoning this thread.  Perform the upcall
-		 * later; discard this thread for now.
-		 *
-		 * XXXKSE - As for the other threads to run;
-		 * we COULD rush through all the threads
-		 * in this KSEG at this priority, or we
-		 * could throw the ball back into the court
-		 * and just run the highest prio kse available.
-		 * What is OUR priority?  The priority of the highest
-		 * sycall waiting to be returned?
-		 * For now, just let another KSE run (easiest).
-		 */
-		thread_exit(); /* Abandon current thread. */
-		/* NOTREACHED */
-	} 
-	/*
-	 * Perform an upcall now.
-	 *
-	 * XXXKSE - Assumes we are going to userland, and not
-	 * nested in the kernel.
-	 */
-	td->td_flags |= TDF_UPCALLING;
-	mtx_unlock_spin(&sched_lock);
-	PROC_UNLOCK(p);
-	return (0);
-}
-
-/*
  * The extra work we go through if we are a threaded process when we
  * return to userland.
  *
@@ -724,86 +717,188 @@ thread_userret(struct thread *td, struct trapframe *frame)
 	int error;
 	int unbound;
 	struct kse *ke;
+	struct ksegrp *kg;
+	struct thread *td2;
+	struct proc *p;
 
-	if (td->td_kse->ke_bound) {
-		thread_export_context(td);
-		PROC_LOCK(td->td_proc);
-		mtx_lock_spin(&sched_lock);
-		thread_exit();
-	}
+	error = 0;
 
-	/* Make the thread bound from now on, but remember what it was. */
 	unbound = td->td_flags & TDF_UNBOUND;
-	td->td_flags &= ~TDF_UNBOUND;
-	/*
-	 * Ensure that we have a spare thread available.
-	 */
-	ke = td->td_kse;
-	if (ke->ke_tdspare == NULL) {
-		mtx_lock(&Giant);
-		ke->ke_tdspare = thread_alloc();
-		mtx_unlock(&Giant);
-	}
-	/*
-	 * Originally bound threads need no additional work.
-	 */
-	if (unbound == 0)
-		return (0);
-	error = 0;
+
+	kg = td->td_ksegrp;
+	p = td->td_proc;
+
 	/*
-	 * Decide whether or not we should perform an upcall now.
+	 * Originally bound threads never upcall but they may 
+	 * loan out their KSE at this point.
+	 * Upcalls imply bound.. They also may want to do some Philantropy.
+	 * Unbound threads on the other hand either yield to other work
+	 * or transform into an upcall.
+	 * (having saved their context to user space in both cases)
 	 */
-	if (((td->td_flags & TDF_UPCALLING) == 0) && unbound) {
-		/* if we have other threads to run we will not return */
-		if ((error = thread_consider_upcalling(td)))
-			return (error); /* coundn't go async , just go sync. */
-	}
-	if (td->td_flags & TDF_UPCALLING) {
+	if (unbound ) {
 		/*
-		 * There is no more work to do and we are going to ride
-		 * this thead/KSE up to userland as an upcall.
+		 * We are an unbound thread, looking to return to 
+		 * user space.
+		 * THere are several possibilities:
+		 * 1) we are using a borrowed KSE. save state and exit.
+		 *    kse_reassign() will recycle the kse as needed,
+		 * 2) we are not.. save state, and then convert ourself
+		 *    to be an upcall, bound to the KSE.
+		 *    if there are others that need the kse,
+		 *    give them a chance by doing an mi_switch().
+		 *    Because we are bound, control will eventually return
+		 *    to us here.
+		 * ***
+		 * Save the thread's context, and link it
+		 * into the KSEGRP's list of completed threads.
 		 */
-		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
-		    td, td->td_proc->p_pid, td->td_proc->p_comm);
+		error = thread_export_context(td);
+		td->td_mailbox = NULL;
+		if (error) {
+			/*
+			 * If we are not running on a borrowed KSE, then
+			 * failing to do the KSE operation just defaults
+			 * back to synchonous operation, so just return from
+			 * the syscall. If it IS borrowed, there is nothing
+			 * we can do. We just lose that context. We
+			 * probably should note this somewhere and send
+			 * the process a signal.
+			 */
+			PROC_LOCK(td->td_proc);
+			psignal(td->td_proc, SIGSEGV);
+			mtx_lock_spin(&sched_lock);
+			if (td->td_kse->ke_bound == NULL) {
+				td->td_flags &= ~TDF_UNBOUND;
+				PROC_UNLOCK(td->td_proc);
+				mtx_unlock_spin(&sched_lock);
+				return (error);	/* go sync */
+			}
+			thread_exit();
+		}
 
 		/*
-		 * Set user context to the UTS.
+		 * if the KSE is owned and we are borrowing it,
+		 * don't make an upcall, just exit so that the owner
+		 * can get its KSE if it wants it.
+		 * Our context is already safely stored for later
+		 * use by the UTS.
 		 */
-		cpu_set_upcall_kse(td, ke);
-
+		PROC_LOCK(p);
+		mtx_lock_spin(&sched_lock);
+		if (td->td_kse->ke_bound) {
+			thread_exit();
+		}
+		PROC_UNLOCK(p);
+				
 		/*
-		 * Put any completed mailboxes on this KSE's list.
+		 * Turn ourself into a bound upcall.
+		 * We will rely on kse_reassign()
+		 * to make us run at a later time.
+		 * We should look just like a sheduled upcall
+		 * from msleep() or cv_wait().
 		 */
-		error = thread_link_mboxes(td->td_ksegrp, ke);
-		if (error)
-			goto bad;
+		td->td_flags &= ~TDF_UNBOUND;
+		td->td_flags |= TDF_UPCALLING;
+		/* Only get here if we have become an upcall */
 
-		/*
-		 * Set state and mailbox.
+	} else {
+		mtx_lock_spin(&sched_lock);
+	}
+	/* 
+	 * We ARE going back to userland with this KSE.
+	 * Check for threads that need to borrow it.
+	 * Optimisation: don't call mi_switch if no-one wants the KSE.
+	 * Any other thread that comes ready after this missed the boat.
+	 */
+	ke = td->td_kse;
+	if ((td2 = kg->kg_last_assigned)) 
+		td2 = TAILQ_NEXT(td2, td_runq);
+	else
+		td2 = TAILQ_FIRST(&kg->kg_runq);
+	if (td2)  {
+		/* 
+		 * force a switch to more urgent 'in kernel'
+		 * work. Control will return to this thread
+		 * when there is no more work to do.
+		 * kse_reassign() will do tha for us.
 		 */
-		td->td_flags &= ~TDF_UPCALLING;
-#if 0
-		error = suword((caddr_t)ke->ke_mailbox +
-		    offsetof(struct kse_mailbox, km_curthread),
-		    0);
-#else	/* if user pointer arithmetic is ok in the kernel */
-		error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0);
-#endif
-		if (error)
-			goto bad;
+		TD_SET_LOAN(td);
+		ke->ke_bound = td;
+		ke->ke_thread = NULL;
+		mi_switch(); /* kse_reassign() will (re)find td2 */
+	}
+	mtx_unlock_spin(&sched_lock);
+
+	/*
+	 * Optimisation:
+	 * Ensure that we have a spare thread available,
+	 * for when we re-enter the kernel.
+	 */
+	if (td->td_standin == NULL) {
+		if (ke->ke_tdspare) {
+			td->td_standin = ke->ke_tdspare;
+			ke->ke_tdspare = NULL;
+		} else {
+			td->td_standin = thread_alloc();
+		}
 	}
+
+	/* 
+	 * To get here, we know there is no other need for our
+	 * KSE so we can proceed. If not upcalling, go back to 
+	 * userspace. If we are, get the upcall set up.
+	 */
+	if ((td->td_flags & TDF_UPCALLING) == 0)
+		return (0);
+
+	/* 
+	 * We must be an upcall to get this far.
+	 * There is no more work to do and we are going to ride
+	 * this thead/KSE up to userland as an upcall.
+	 * Do the last parts of the setup needed for the upcall.
+	 */
+	CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
+	    td, td->td_proc->p_pid, td->td_proc->p_comm);
+
+	/*
+	 * Set user context to the UTS.
+	 */
+	cpu_set_upcall_kse(td, ke);
+
 	/*
-	 * Stop any chance that we may be separated from
-	 * the KSE we are currently on. This is "biting the bullet",
-	 * we are committing to go to user space as as this KSE here.
+	 * Put any completed mailboxes on this KSE's list.
 	 */
-	return (error);
+	error = thread_link_mboxes(kg, ke);
+	if (error)
+		goto bad;
+
+	/*
+	 * Set state and mailbox.
+	 * From now on we are just a bound outgoing process.
+	 * **Problem** userret is often called several times.
+	 * it would be nice if this all happenned only on the first time 
+	 * through. (the scan for extra work etc.)
+	 */
+	td->td_flags &= ~TDF_UPCALLING;
+#if 0
+	error = suword((caddr_t)ke->ke_mailbox +
+	    offsetof(struct kse_mailbox, km_curthread), 0);
+#else	/* if user pointer arithmetic is ok in the kernel */
+	error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0);
+#endif
+	if (!error)
+		return (0);
+
 bad:
 	/*
 	 * Things are going to be so screwed we should just kill the process.
  	 * how do we do that?
 	 */
-	 panic ("thread_userret.. need to kill proc..... how?");
+	PROC_LOCK(td->td_proc);
+	psignal(td->td_proc, SIGSEGV);
+	PROC_UNLOCK(td->td_proc);
+	return (error);	/* go sync */
 }
 
 /*
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index bd613c45..82138a5 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -241,9 +241,7 @@ kse_link(struct kse *ke, struct ksegrp *kg)
 
 	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
 	kg->kg_kses++;
-	ke->ke_state = KES_IDLE;
-	TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
-	kg->kg_idle_kses++;
+	ke->ke_state = KES_UNQUEUED;
 	ke->ke_proc	= p;
 	ke->ke_ksegrp	= kg;
 	ke->ke_thread	= NULL;
@@ -310,24 +308,17 @@ kse_exit(struct thread *td, struct kse_exit_args *uap)
 int
 kse_release(struct thread *td, struct kse_release_args *uap)
 {
-	struct thread *td2;
+	struct proc *p;
 
+	p = td->td_proc;
 	/* KSE-enabled processes only, please. */
-	if ((td->td_proc->p_flag & P_KSES) == 0)
-		return (EINVAL);
-
-	/* Don't discard the last thread. */
-	td2 = FIRST_THREAD_IN_PROC(td->td_proc);
-	KASSERT(td2 != NULL, ("kse_release: no threads in our proc"));
-	if (TAILQ_NEXT(td, td_plist) == NULL)
-		return (EINVAL);
-
-	/* Abandon thread. */
-	PROC_LOCK(td->td_proc);
-	mtx_lock_spin(&sched_lock);
-	thread_exit();
-	/* NOTREACHED */
-	return (0);
+	if (p->p_flag & P_KSES) {
+		PROC_LOCK(p);
+		mtx_lock_spin(&sched_lock);
+		thread_exit();
+		/* NOTREACHED */
+	}
+	return (EINVAL);
 }
 
 /* struct kse_wakeup_args {
@@ -423,6 +414,10 @@ kse_create(struct thread *td, struct kse_create_args *uap)
 		if (SIGPENDING(p))
 			newke->ke_flags |= KEF_ASTPENDING;
 		PROC_UNLOCK(p);
+		/* For the first call this may not have been set */
+		if (td->td_standin == NULL) {
+			td->td_standin = thread_alloc();
+		}
 		mtx_lock_spin(&sched_lock);
 		if (newkg)
 			ksegrp_link(newkg, p);
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index b4b8879..37500a1 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -107,7 +107,11 @@ CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
 static struct runq runq;
 SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
 
+void panc(char *string1, char *string2);
+
+#if 0
 static void runq_readjust(struct runq *rq, struct kse *ke);
+#endif
 /************************************************************************
  * Functions that manipulate runnability from a thread perspective.	*
  ************************************************************************/
@@ -169,9 +173,10 @@ retry:
 }
 
 /*
- * Given a KSE (now surplus), either assign a new runable thread to it
+ * Given a KSE (now surplus or at least loanable), either assign a new
+ * runable thread to it
  * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
- * Assumes the kse is not linked to any threads any more. (has been cleaned).
+ * Or aybe give it back to its owner if it's been loaned.
  */
 void
 kse_reassign(struct kse *ke)
@@ -179,23 +184,15 @@ kse_reassign(struct kse *ke)
 	struct ksegrp *kg;
 	struct thread *td;
 	struct thread *owner;
+	struct thread *original;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	kg = ke->ke_ksegrp;
 	owner = ke->ke_bound;
+	original = ke->ke_thread;
 	KASSERT(!(owner && ((owner->td_kse != ke) || 
 		    (owner->td_flags & TDF_UNBOUND))), 
 		("kse_reassign: bad thread bound state"));
-	if (owner && (owner->td_inhibitors == TDI_LOAN)) {
-		TD_CLR_LOAN(owner);
-		ke->ke_bound = NULL;
-		ke->ke_thread = owner;
-		owner->td_kse = ke;
-		setrunqueue(owner);
-		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p (give back)",
-			 ke, owner);
-		return;
-	}
 
 	/*
 	 * Find the first unassigned thread
@@ -212,29 +209,77 @@ kse_reassign(struct kse *ke)
 	 * If we found one assign it the kse, otherwise idle the kse.
 	 */
 	if (td) {
+		/*
+		 * If the original is bound to us we can only be lent out so
+		 * make a loan, otherwise we just drop the 
+		 * original thread.
+		 */
+		if (original) {
+			if (((original->td_flags & TDF_UNBOUND) == 0)) {
+				/*
+				 * Put the owner on the side
+				 */
+				ke->ke_bound = original;
+				TD_SET_LOAN(original);
+			} else {
+				original->td_kse = NULL;
+			}
+		}
 		kg->kg_last_assigned = td;
 		td->td_kse = ke;
 		ke->ke_thread = td;
 		runq_add(&runq, ke);
-		if (owner) 
-			TD_SET_LOAN(owner);
+		/*
+		 * if we have already borrowed this,
+		 * just pass it to the new thread,
+		 * otherwise, enact the loan.
+		 */
 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
-	} else if (!owner) {
-		ke->ke_state = KES_IDLE;
-		ke->ke_thread = NULL;
-		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
-		kg->kg_idle_kses++;
-		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
-	} else {
+		return;
+	}
+	if (owner) { /* already loaned out */
+		/* effectivly unloan it */
 		TD_CLR_LOAN(owner);
-		ke->ke_state = KES_THREAD;
 		ke->ke_thread = owner;
-		owner->td_kse = ke;
-		ke->ke_flags |= KEF_ONLOANQ;
-		TAILQ_INSERT_HEAD(&kg->kg_lq, ke, ke_kgrlist);
-		kg->kg_loan_kses++;
-		CTR1(KTR_RUNQ, "kse_reassign: ke%p is on loan queue", ke);
+		ke->ke_bound = NULL;
+		if (original)
+			original->td_kse = NULL;
+		original = owner;
+
+		if (TD_CAN_RUN(owner)) {
+			/*
+			 * If the owner thread is now runnable,  run it..
+			 * Let it have its KSE back.
+			 */
+			setrunqueue(owner);
+			CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p (give back)",
+			    ke, owner);
+			return;
+		}
+	}
+	/*
+	 * Presetly NOT loaned out.
+	 * If we are bound, we go on the loanable queue
+	 * otherwise onto the free queue.
+	 */
+	if (original) {
+		if (((original->td_flags & TDF_UNBOUND) == 0)) {
+			ke->ke_state = KES_THREAD;
+			ke->ke_flags |= KEF_ONLOANQ;
+			ke->ke_bound = NULL;
+			TAILQ_INSERT_HEAD(&kg->kg_lq, ke, ke_kgrlist);
+			kg->kg_loan_kses++;
+			CTR1(KTR_RUNQ, "kse_reassign: ke%p on loan queue", ke);
+			return;
+		} else {
+			original->td_kse = NULL;
+		}
 	}
+	ke->ke_state = KES_IDLE;
+	ke->ke_thread = NULL;
+	TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
+	kg->kg_idle_kses++;
+	CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
 }
 
 int
@@ -252,7 +297,7 @@ kserunnable(void)
 void
 remrunqueue(struct thread *td)
 {
-	struct thread *td2, *td3, *owner;
+	struct thread *td2, *td3;
 	struct ksegrp *kg;
 	struct kse *ke;
 
@@ -273,6 +318,8 @@ remrunqueue(struct thread *td)
 		ke->ke_state = KES_THREAD; 
 		return;
 	}
+   	td3 = TAILQ_PREV(td, threadqueue, td_runq);
+	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
 	if (ke) {
 		/*
 		 * This thread has been assigned to a KSE.
@@ -282,62 +329,12 @@ remrunqueue(struct thread *td)
 		 */
 		td2 = kg->kg_last_assigned;
 		KASSERT((td2 != NULL), ("last assigned has wrong value "));
-		td->td_kse = NULL;
-		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
-			KASSERT(td3 != td, ("td3 somehow matched td"));
-			/*
-			 * Give the next unassigned thread to the KSE
-			 * so the number of runnable KSEs remains
-			 * constant.
-			 */
-			td3->td_kse = ke;
-			ke->ke_thread = td3;
+		if (td2 == td) 
 			kg->kg_last_assigned = td3;
-			runq_readjust(&runq, ke);
-		} else {
-			/*
-			 * There is no unassigned thread.
-			 * If we were the last assigned one,
-			 * adjust the last assigned pointer back
-			 * one, which may result in NULL.
-			 */
-			if (td == td2) {
-				kg->kg_last_assigned =
-				    TAILQ_PREV(td, threadqueue, td_runq);
-			}
-			runq_remove(&runq, ke);
-			KASSERT((ke->ke_state != KES_IDLE),
-			    ("kse already idle"));
-			if (ke->ke_bound) {
-				owner = ke->ke_bound;
-				if (owner->td_inhibitors == TDI_LOAN) {
-					TD_CLR_LOAN(owner);
-					ke->ke_bound = NULL;
-					ke->ke_thread = owner;
-					owner->td_kse = ke;
-					setrunqueue(owner);
-					CTR2(KTR_RUNQ, 
-					"remrunqueue: ke%p -> td%p (give back)",
-			 			ke, owner);
-				} else {
-					TD_CLR_LOAN(owner);
-					ke->ke_state = KES_THREAD;
-					ke->ke_thread = owner;
-					owner->td_kse = ke;
-					ke->ke_flags |= KEF_ONLOANQ;
-					TAILQ_INSERT_HEAD(&kg->kg_lq, ke, 
-						ke_kgrlist);
-					kg->kg_loan_kses++;
-				}
-			} else {
-				ke->ke_state = KES_IDLE;
-				ke->ke_thread = NULL;
-				TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
-				kg->kg_idle_kses++;
-			}
-		}
+		td->td_kse = NULL;
+		ke->ke_thread = NULL;
+		kse_reassign(ke);
 	}
-	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
 }
 
 void
@@ -355,6 +352,15 @@ setrunqueue(struct thread *td)
 	TD_SET_RUNQ(td);
 	kg = td->td_ksegrp;
 	kg->kg_runnable++;
+	if ((td->td_proc->p_flag & P_KSES) == 0) {
+		/*
+		 * Common path optimisation: Only one of everything
+		 * and the KSE is always already attached.
+		 * Totally ignore the ksegrp run queue.
+		 */
+		runq_add(&runq, td->td_kse);
+		return;
+	}
 	if ((td->td_flags & TDF_UNBOUND) == 0) {
 		KASSERT((td->td_kse != NULL),
 		    ("queueing BAD thread to run queue"));
@@ -365,14 +371,10 @@ setrunqueue(struct thread *td)
 			TAILQ_REMOVE(&kg->kg_lq, ke, ke_kgrlist);
 			kg->kg_loan_kses--;
 		}
-		/*
-		 * Common path optimisation: Only one of everything
-		 * and the KSE is always already attached.
-		 * Totally ignore the ksegrp run queue.
-		 */
 		runq_add(&runq, td->td_kse);
 		return;
 	}
+
 	/* 
 	 * Ok, so we are threading with this thread.
 	 * We don't have a KSE, see if we can get one..
@@ -394,11 +396,16 @@ setrunqueue(struct thread *td)
 			ke->ke_state = KES_THREAD;
 			kg->kg_idle_kses--;
 		} else if (kg->kg_loan_kses) {
+			/*
+			 * Failing that see if we can borrow one.
+			 */
 			ke = TAILQ_FIRST(&kg->kg_lq);
 			TAILQ_REMOVE(&kg->kg_lq, ke, ke_kgrlist);
 			ke->ke_flags &= ~KEF_ONLOANQ;
 			ke->ke_state = KES_THREAD;
-			TD_SET_LOAN(ke->ke_bound);
+			TD_SET_LOAN(ke->ke_thread);
+			ke->ke_bound = ke->ke_thread;
+			ke->ke_thread  = NULL;
 			kg->kg_loan_kses--;
 		} else if (tda && (tda->td_priority > td->td_priority)) {
 			/*
@@ -697,6 +704,7 @@ runq_remove(struct runq *rq, struct kse *ke)
 	ke->ke_ksegrp->kg_runq_kses--;
 }
 
+#if 0
 static void 
 runq_readjust(struct runq *rq, struct kse *ke)
 {
@@ -706,19 +714,27 @@ runq_readjust(struct runq *rq, struct kse *ke)
 		runq_add(rq, ke);
 	}
 }
+#endif
 
 #if 0
 void
-thread_sanity_check(struct thread *td)
+panc(char *string1, char *string2)
+{
+	printf("%s", string1);
+	Debugger(string2);
+}
+
+void
+thread_sanity_check(struct thread *td, char *string)
 {
 	struct proc *p;
 	struct ksegrp *kg;
 	struct kse *ke;
-	struct thread *td2;
+	struct thread *td2 = NULL;
 	unsigned int prevpri;
-	int	saw_lastassigned;
-	int unassigned;
-	int assigned;
+	int	saw_lastassigned = 0;
+	int unassigned = 0;
+	int assigned = 0;
 
 	p = td->td_proc;
 	kg = td->td_ksegrp;
@@ -727,16 +743,16 @@ thread_sanity_check(struct thread *td)
 
 	if (ke) {
 		if (p != ke->ke_proc) {
-			panic("wrong proc");
+			panc(string, "wrong proc");
 		}
 		if (ke->ke_thread != td) {
-			panic("wrong thread");
+			panc(string, "wrong thread");
 		}
 	}
 	
 	if ((p->p_flag & P_KSES) == 0) {
 		if (ke == NULL) {
-			panic("non KSE thread lost kse");
+			panc(string, "non KSE thread lost kse");
 		}
 	} else {
 		prevpri = 0;
@@ -745,22 +761,27 @@ thread_sanity_check(struct thread *td)
 		assigned = 0;
 		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
 			if (td2->td_priority < prevpri) {
-				panic("thread runqueue unosorted");
+				panc(string, "thread runqueue unosorted");
+			}
+			if ((td2->td_state == TDS_RUNQ) &&
+			    td2->td_kse &&
+			    (td2->td_kse->ke_state != KES_ONRUNQ)) {
+				panc(string, "KSE wrong state");
 			}
 			prevpri = td2->td_priority;
 			if (td2->td_kse) {
 				assigned++;
 				if (unassigned) {
-					panic("unassigned before assigned");
+					panc(string, "unassigned before assigned");
 				}
  				if  (kg->kg_last_assigned == NULL) {
-					panic("lastassigned corrupt");
+					panc(string, "lastassigned corrupt");
 				}
 				if (saw_lastassigned) {
-					panic("last assigned not last");
+					panc(string, "last assigned not last");
 				}
 				if (td2->td_kse->ke_thread != td2) {
-					panic("mismatched kse/thread");
+					panc(string, "mismatched kse/thread");
 				}
 			} else {
 				unassigned++;
@@ -768,28 +789,32 @@ thread_sanity_check(struct thread *td)
 			if (td2 == kg->kg_last_assigned) {
 				saw_lastassigned = 1;
 				if (td2->td_kse == NULL) {
-					panic("last assigned not assigned");
+					panc(string, "last assigned not assigned");
 				}
 			}
 		}
 		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
-			panic("where on earth does lastassigned point?");
+			panc(string, "where on earth does lastassigned point?");
 		}
 		FOREACH_THREAD_IN_GROUP(kg, td2) {
 			if (((td2->td_flags & TDF_UNBOUND) == 0) && 
 			    (TD_ON_RUNQ(td2))) {
 				assigned++;
 				if (td2->td_kse == NULL) {
-					panic ("BOUND thread with no KSE");
+					panc(string, "BOUND thread with no KSE");
 				}
 			}
 		}
 #if 0
 		if ((unassigned + assigned) != kg->kg_runnable) {
-			panic("wrong number in runnable");
+			panc(string, "wrong number in runnable");
 		}
 #endif
 	}
+	if (assigned == 12345) {
+		printf("%p %p %p %p %p %d, %d",
+		    td, td2, ke, kg, p, assigned, saw_lastassigned);
+	}
 }
 #endif
 
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 42d23bd..b0f9d92 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -474,18 +474,16 @@ msleep(ident, mtx, priority, wmesg, timo)
 			return (EINTR);
 		if (td->td_mailbox && (!(td->td_flags & TDF_INMSLEEP))) {
 			/*
-			 * If we have no queued work to do, then
-			 * upcall to the UTS to see if it has more to do.
-			 * We don't need to upcall now, just make it and
-			 * queue it.
+			 * Arrange for an upcall to be readied.
+			 * it will not actually happen until all
+			 * pending in-kernel work for this KSEGRP
+			 * has been done.
 			 */
 			mtx_lock_spin(&sched_lock);
-			if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) {
-				/* Don't recurse here! */
-				td->td_flags |= TDF_INMSLEEP;
-				thread_schedule_upcall(td, td->td_kse);
-				td->td_flags &= ~TDF_INMSLEEP;
-			}
+			/* Don't recurse here! */
+			td->td_flags |= TDF_INMSLEEP;
+			thread_schedule_upcall(td, td->td_kse);
+			td->td_flags &= ~TDF_INMSLEEP;
 			mtx_unlock_spin(&sched_lock);
 		}
 	}
@@ -818,23 +816,15 @@ mi_switch(void)
 	 * or stopped or any thing else similar.
 	 */
 	if (TD_IS_RUNNING(td)) {
-		KASSERT(((ke->ke_flags & KEF_IDLEKSE) == 0),
-		    ("Idle thread in mi_switch with wrong state"));
 		/* Put us back on the run queue (kse and all). */
 		setrunqueue(td);
-	} else if (td->td_flags & TDF_UNBOUND) {
+	} else if (p->p_flag & P_KSES) {
 		/*
 		 * We will not be on the run queue. So we must be
-		 * sleeping or similar. If it's available,
+		 * sleeping or similar. As it's available,
 		 * someone else can use the KSE if they need it.
-		 * XXXKSE KSE loaning will change this.
+		 * (If bound LOANING can still occur).
 		 */
-		td->td_kse = NULL;
-		kse_reassign(ke);
-	} else if (p->p_flag & P_KSES) {
-		KASSERT(((ke->ke_bound == NULL) || (ke->ke_bound == td)),
-			("mi_switch: bad bound state"));
-		ke->ke_bound = td;
 		kse_reassign(ke);
 	}
 
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 3326119..407b777 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -67,7 +67,7 @@ static int oiks_debug = 1;	/* 0 disable, 1 printf, 2 enter debugger */
 SYSCTL_INT(_kern_threads, OID_AUTO, oiks, CTLFLAG_RW,
 	&oiks_debug, 0, "OIKS thread debug");
 
-static int max_threads_per_proc = 6;
+static int max_threads_per_proc = 10;
 SYSCTL_INT(_kern_threads, OID_AUTO, max_per_proc, CTLFLAG_RW,
 	&max_threads_per_proc, 0, "Limit on threads per proc");
 
@@ -470,6 +470,11 @@ thread_exit(void)
 		thread_stash(ke->ke_tdspare);
 		ke->ke_tdspare = NULL;
 	}
+	if (td->td_standin != NULL) {
+		thread_stash(td->td_standin);
+		td->td_standin = NULL;
+	}
+
 	cpu_thread_exit(td);	/* XXXSMP */
 
 	/*
@@ -478,14 +483,6 @@ thread_exit(void)
 	 * all this stuff.
 	 */
 	if (p->p_numthreads > 1) {
-		/* Reassign this thread's KSE. */
-		ke->ke_thread = NULL;
-		td->td_kse = NULL;
-		ke->ke_state = KES_UNQUEUED;
-		if (ke->ke_bound == td)
-			ke->ke_bound = NULL;
-		kse_reassign(ke);
-
 		/* Unlink this thread from its proc. and the kseg */
 		TAILQ_REMOVE(&p->p_threads, td, td_plist);
 		p->p_numthreads--;
@@ -501,12 +498,41 @@ thread_exit(void)
 				thread_unsuspend_one(p->p_singlethread);
 			}
 		}
+
+		/* Reassign this thread's KSE. */
+		ke->ke_thread = NULL;
+		td->td_kse = NULL;
+		ke->ke_state = KES_UNQUEUED;
+		if (ke->ke_bound == td) {
+			printf("thread_exit: entered with ke_bound set\n");
+			ke->ke_bound = NULL; /* should never happen */
+		}
+
+		kse_reassign(ke);
 		PROC_UNLOCK(p);
 		td->td_state	= TDS_INACTIVE;
 		td->td_proc	= NULL;
 		td->td_ksegrp	= NULL;
 		td->td_last_kse	= NULL;
-		ke->ke_tdspare = td;
+		/* 
+		 * For now stash this here, however
+		 * it's not a permanent solution.
+		 *  When we want to make KSEs exit as well
+		 * we'll have to face this one again.
+		 * Where will we hide it then?
+		 *
+		 * In borrower threads, stash it in the lender
+		 * Where it won't be needed until
+		 * this thread is long gone.
+		 */
+		if (ke->ke_bound) {
+			if (ke->ke_bound->td_standin) {
+				thread_stash(ke->ke_bound->td_standin);
+			}
+			ke->ke_bound->td_standin = td;
+		} else {
+			ke->ke_tdspare = td;
+		}
 	} else {
 		PROC_UNLOCK(p);
 	}
@@ -555,40 +581,85 @@ struct thread *
 thread_schedule_upcall(struct thread *td, struct kse *ke)
 {
 	struct thread *td2;
+	int newkse;
 
 	mtx_assert(&sched_lock, MA_OWNED);
-	if (ke->ke_tdspare != NULL) {
-		td2 = ke->ke_tdspare;
-		ke->ke_tdspare = NULL;
+	newkse = (ke != td->td_kse);
+
+	/* 
+	 * If the kse is already owned by another thread then we can't
+	 * schedule an upcall because the other thread must be BOUND
+	 * which means it is not in a position to take an upcall.
+	 * We must be borrowing the KSE to allow us to complete some in-kernel
+	 * work. When we complete, the Bound thread will have teh chance to 
+	 * complete. This thread will sleep as planned. Hopefully there will
+	 * eventually be un unbound thread that can be converted to an
+	 * upcall to report the completion of this thread.
+	 */
+	if (ke->ke_bound && ((ke->ke_bound->td_flags & TDF_UNBOUND) == 0)) {
+		return (NULL);
+	}
+	KASSERT((ke->ke_bound == NULL), ("kse already bound"));
+
+	if ((td2 = td->td_standin) != NULL) {
+		td->td_standin = NULL;
 	} else {
-		mtx_unlock_spin(&sched_lock);
-		td2 = thread_alloc();
-		mtx_lock_spin(&sched_lock);
+		if (newkse)
+			panic("no reserve thread when called with a new kse");
+		/*
+		 * If called from (e.g.) sleep and we do not have
+		 * a reserve thread, then we've used it, so do not
+		 * create an upcall.
+		 */
+		return(NULL);
 	}
 	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
-	     td, td->td_proc->p_pid, td->td_proc->p_comm);
+	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
 	bzero(&td2->td_startzero,
 	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
 	bcopy(&td->td_startcopy, &td2->td_startcopy,
 	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
 	thread_link(td2, ke->ke_ksegrp);
 	cpu_set_upcall(td2, td->td_pcb);
+
+	/*
+	 * XXXKSE do we really need this? (default values for the
+	 * frame).
+	 */
 	bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe));
+
 	/*
-	 * The user context for this thread is selected when we choose
-	 * a KSE and return to userland on it. All we need do here is
-	 * note that the thread exists in order to perform an upcall.
-	 *
-	 * Since selecting a KSE to perform the upcall involves locking
-	 * that KSE's context to our upcall, its best to wait until the
-	 * last possible moment before grabbing a KSE. We do this in
-	 * userret().
+	 * Bind the new thread to the KSE,
+	 * and if it's our KSE, lend it back to ourself
+	 * so we can continue running.
 	 */
 	td2->td_ucred = crhold(td->td_ucred);
-	td2->td_flags = TDF_UNBOUND|TDF_UPCALLING;
-	TD_SET_CAN_RUN(td2);
-	setrunqueue(td2);
-	return (td2);
+	td2->td_flags = TDF_UPCALLING; /* note: BOUND */
+	td2->td_kse = ke;
+	td2->td_state = TDS_CAN_RUN;
+	td2->td_inhibitors = 0;
+	/*
+	 * If called from msleep(), we are working on the current
+	 * KSE so fake that we borrowed it. If called from
+	 * kse_create(), don't, as we have a new kse too.
+	 */
+	if (!newkse) {
+		/*
+		 * This thread will be scheduled when the current thread
+		 * blocks, exits or tries to enter userspace, (which ever
+		 * happens first). When that happens the KSe will "revert"
+		 * to this thread in a BOUND manner. Since we are called
+		 * from msleep() this is going to be "very soon" in nearly
+		 * all cases.
+		 */
+		ke->ke_bound = td2;
+		TD_SET_LOAN(td2);
+	} else {
+		ke->ke_bound = NULL;
+		ke->ke_thread = td2;
+		setrunqueue(td2);
+	}
+	return (td2);	/* bogus.. should be a void function */
 }
 
 /*
@@ -605,6 +676,7 @@ signal_upcall(struct proc *p, int sig)
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
+return (NULL);
 
 	td = FIRST_THREAD_IN_PROC(p);
 	ke = td->td_kse;
@@ -619,94 +691,15 @@ signal_upcall(struct proc *p, int sig)
 	PROC_LOCK(p);
 	if (error)
 		return (NULL);
+	if (td->td_standin == NULL)
+		td->td_standin = thread_alloc();
 	mtx_lock_spin(&sched_lock);
-	td2 = thread_schedule_upcall(td, ke);
+	td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */
 	mtx_unlock_spin(&sched_lock);
 	return (td2);
 }
 
 /*
- * Consider whether or not an upcall should be made, and update the
- * TDF_UPCALLING flag appropriately.
- *
- * This function is called when the current thread had been bound to a user
- * thread that performed a syscall that blocked, and is now returning.
- * Got that? syscall -> msleep -> wakeup -> syscall_return -> us.
- *
- * This thread will be returned to the UTS in its mailbox as a completed
- * thread.  We need to decide whether or not to perform an upcall now,
- * or simply queue the thread for later.
- *
- * XXXKSE Future enhancement: We could also return back to
- * the thread if we haven't had to do an upcall since then.
- * If the KSE's copy is == the thread's copy, and there are
- * no other completed threads.
- */
-static int
-thread_consider_upcalling(struct thread *td)
-{
-	struct proc *p;
-	struct ksegrp *kg;
-	int error;
-
-	/*
-	 * Save the thread's context, and link it
-	 * into the KSEGRP's list of completed threads.
-	 */
-	error = thread_export_context(td);
-	td->td_flags &= ~TDF_UNBOUND;
-	td->td_mailbox = NULL;
-	if (error)
-		/*
-		 * Failing to do the KSE operation just defaults
-		 * back to synchonous operation, so just return from
-		 * the syscall.
-		 */
-		return (error);
-
-	/*
-	 * Decide whether to perform an upcall now.
-	 */
-	/* Make sure there are no other threads waiting to run. */
-	p = td->td_proc;
-	kg = td->td_ksegrp;
-	PROC_LOCK(p);
-	mtx_lock_spin(&sched_lock);
-	/* bogus test, ok for testing though */
-	if (TAILQ_FIRST(&kg->kg_runq) && 
-	    (TAILQ_LAST(&kg->kg_runq, threadqueue) 
-		!= kg->kg_last_assigned)) {
-		/*
-		 * Another thread in this KSEG needs to run.
-		 * Switch to it instead of performing an upcall,
-		 * abondoning this thread.  Perform the upcall
-		 * later; discard this thread for now.
-		 *
-		 * XXXKSE - As for the other threads to run;
-		 * we COULD rush through all the threads
-		 * in this KSEG at this priority, or we
-		 * could throw the ball back into the court
-		 * and just run the highest prio kse available.
-		 * What is OUR priority?  The priority of the highest
-		 * sycall waiting to be returned?
-		 * For now, just let another KSE run (easiest).
-		 */
-		thread_exit(); /* Abandon current thread. */
-		/* NOTREACHED */
-	} 
-	/*
-	 * Perform an upcall now.
-	 *
-	 * XXXKSE - Assumes we are going to userland, and not
-	 * nested in the kernel.
-	 */
-	td->td_flags |= TDF_UPCALLING;
-	mtx_unlock_spin(&sched_lock);
-	PROC_UNLOCK(p);
-	return (0);
-}
-
-/*
  * The extra work we go through if we are a threaded process when we
  * return to userland.
  *
@@ -724,86 +717,188 @@ thread_userret(struct thread *td, struct trapframe *frame)
 	int error;
 	int unbound;
 	struct kse *ke;
+	struct ksegrp *kg;
+	struct thread *td2;
+	struct proc *p;
 
-	if (td->td_kse->ke_bound) {
-		thread_export_context(td);
-		PROC_LOCK(td->td_proc);
-		mtx_lock_spin(&sched_lock);
-		thread_exit();
-	}
+	error = 0;
 
-	/* Make the thread bound from now on, but remember what it was. */
 	unbound = td->td_flags & TDF_UNBOUND;
-	td->td_flags &= ~TDF_UNBOUND;
-	/*
-	 * Ensure that we have a spare thread available.
-	 */
-	ke = td->td_kse;
-	if (ke->ke_tdspare == NULL) {
-		mtx_lock(&Giant);
-		ke->ke_tdspare = thread_alloc();
-		mtx_unlock(&Giant);
-	}
-	/*
-	 * Originally bound threads need no additional work.
-	 */
-	if (unbound == 0)
-		return (0);
-	error = 0;
+
+	kg = td->td_ksegrp;
+	p = td->td_proc;
+
 	/*
-	 * Decide whether or not we should perform an upcall now.
+	 * Originally bound threads never upcall but they may 
+	 * loan out their KSE at this point.
+	 * Upcalls imply bound.. They also may want to do some Philantropy.
+	 * Unbound threads on the other hand either yield to other work
+	 * or transform into an upcall.
+	 * (having saved their context to user space in both cases)
 	 */
-	if (((td->td_flags & TDF_UPCALLING) == 0) && unbound) {
-		/* if we have other threads to run we will not return */
-		if ((error = thread_consider_upcalling(td)))
-			return (error); /* coundn't go async , just go sync. */
-	}
-	if (td->td_flags & TDF_UPCALLING) {
+	if (unbound ) {
 		/*
-		 * There is no more work to do and we are going to ride
-		 * this thead/KSE up to userland as an upcall.
+		 * We are an unbound thread, looking to return to 
+		 * user space.
+		 * THere are several possibilities:
+		 * 1) we are using a borrowed KSE. save state and exit.
+		 *    kse_reassign() will recycle the kse as needed,
+		 * 2) we are not.. save state, and then convert ourself
+		 *    to be an upcall, bound to the KSE.
+		 *    if there are others that need the kse,
+		 *    give them a chance by doing an mi_switch().
+		 *    Because we are bound, control will eventually return
+		 *    to us here.
+		 * ***
+		 * Save the thread's context, and link it
+		 * into the KSEGRP's list of completed threads.
 		 */
-		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
-		    td, td->td_proc->p_pid, td->td_proc->p_comm);
+		error = thread_export_context(td);
+		td->td_mailbox = NULL;
+		if (error) {
+			/*
+			 * If we are not running on a borrowed KSE, then
+			 * failing to do the KSE operation just defaults
+			 * back to synchonous operation, so just return from
+			 * the syscall. If it IS borrowed, there is nothing
+			 * we can do. We just lose that context. We
+			 * probably should note this somewhere and send
+			 * the process a signal.
+			 */
+			PROC_LOCK(td->td_proc);
+			psignal(td->td_proc, SIGSEGV);
+			mtx_lock_spin(&sched_lock);
+			if (td->td_kse->ke_bound == NULL) {
+				td->td_flags &= ~TDF_UNBOUND;
+				PROC_UNLOCK(td->td_proc);
+				mtx_unlock_spin(&sched_lock);
+				return (error);	/* go sync */
+			}
+			thread_exit();
+		}
 
 		/*
-		 * Set user context to the UTS.
+		 * if the KSE is owned and we are borrowing it,
+		 * don't make an upcall, just exit so that the owner
+		 * can get its KSE if it wants it.
+		 * Our context is already safely stored for later
+		 * use by the UTS.
 		 */
-		cpu_set_upcall_kse(td, ke);
-
+		PROC_LOCK(p);
+		mtx_lock_spin(&sched_lock);
+		if (td->td_kse->ke_bound) {
+			thread_exit();
+		}
+		PROC_UNLOCK(p);
+				
 		/*
-		 * Put any completed mailboxes on this KSE's list.
+		 * Turn ourself into a bound upcall.
+		 * We will rely on kse_reassign()
+		 * to make us run at a later time.
+		 * We should look just like a sheduled upcall
+		 * from msleep() or cv_wait().
 		 */
-		error = thread_link_mboxes(td->td_ksegrp, ke);
-		if (error)
-			goto bad;
+		td->td_flags &= ~TDF_UNBOUND;
+		td->td_flags |= TDF_UPCALLING;
+		/* Only get here if we have become an upcall */
 
-		/*
-		 * Set state and mailbox.
+	} else {
+		mtx_lock_spin(&sched_lock);
+	}
+	/* 
+	 * We ARE going back to userland with this KSE.
+	 * Check for threads that need to borrow it.
+	 * Optimisation: don't call mi_switch if no-one wants the KSE.
+	 * Any other thread that comes ready after this missed the boat.
+	 */
+	ke = td->td_kse;
+	if ((td2 = kg->kg_last_assigned)) 
+		td2 = TAILQ_NEXT(td2, td_runq);
+	else
+		td2 = TAILQ_FIRST(&kg->kg_runq);
+	if (td2)  {
+		/* 
+		 * force a switch to more urgent 'in kernel'
+		 * work. Control will return to this thread
+		 * when there is no more work to do.
+		 * kse_reassign() will do tha for us.
 		 */
-		td->td_flags &= ~TDF_UPCALLING;
-#if 0
-		error = suword((caddr_t)ke->ke_mailbox +
-		    offsetof(struct kse_mailbox, km_curthread),
-		    0);
-#else	/* if user pointer arithmetic is ok in the kernel */
-		error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0);
-#endif
-		if (error)
-			goto bad;
+		TD_SET_LOAN(td);
+		ke->ke_bound = td;
+		ke->ke_thread = NULL;
+		mi_switch(); /* kse_reassign() will (re)find td2 */
+	}
+	mtx_unlock_spin(&sched_lock);
+
+	/*
+	 * Optimisation:
+	 * Ensure that we have a spare thread available,
+	 * for when we re-enter the kernel.
+	 */
+	if (td->td_standin == NULL) {
+		if (ke->ke_tdspare) {
+			td->td_standin = ke->ke_tdspare;
+			ke->ke_tdspare = NULL;
+		} else {
+			td->td_standin = thread_alloc();
+		}
 	}
+
+	/* 
+	 * To get here, we know there is no other need for our
+	 * KSE so we can proceed. If not upcalling, go back to 
+	 * userspace. If we are, get the upcall set up.
+	 */
+	if ((td->td_flags & TDF_UPCALLING) == 0)
+		return (0);
+
+	/* 
+	 * We must be an upcall to get this far.
+	 * There is no more work to do and we are going to ride
+	 * this thead/KSE up to userland as an upcall.
+	 * Do the last parts of the setup needed for the upcall.
+	 */
+	CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
+	    td, td->td_proc->p_pid, td->td_proc->p_comm);
+
+	/*
+	 * Set user context to the UTS.
+	 */
+	cpu_set_upcall_kse(td, ke);
+
 	/*
-	 * Stop any chance that we may be separated from
-	 * the KSE we are currently on. This is "biting the bullet",
-	 * we are committing to go to user space as as this KSE here.
+	 * Put any completed mailboxes on this KSE's list.
 	 */
-	return (error);
+	error = thread_link_mboxes(kg, ke);
+	if (error)
+		goto bad;
+
+	/*
+	 * Set state and mailbox.
+	 * From now on we are just a bound outgoing process.
+	 * **Problem** userret is often called several times.
+	 * it would be nice if this all happenned only on the first time 
+	 * through. (the scan for extra work etc.)
+	 */
+	td->td_flags &= ~TDF_UPCALLING;
+#if 0
+	error = suword((caddr_t)ke->ke_mailbox +
+	    offsetof(struct kse_mailbox, km_curthread), 0);
+#else	/* if user pointer arithmetic is ok in the kernel */
+	error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0);
+#endif
+	if (!error)
+		return (0);
+
 bad:
 	/*
 	 * Things are going to be so screwed we should just kill the process.
  	 * how do we do that?
 	 */
-	 panic ("thread_userret.. need to kill proc..... how?");
+	PROC_LOCK(td->td_proc);
+	psignal(td->td_proc, SIGSEGV);
+	PROC_UNLOCK(td->td_proc);
+	return (error);	/* go sync */
 }
 
 /*
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 41a775f..33279db 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -286,6 +286,7 @@ struct thread {
 	struct kse_thr_mailbox *td_mailbox; /* the userland mailbox address */
 	struct ucred	*td_ucred;	/* (k) Reference to credentials. */
 	void		(*td_switchin)(void); /* (k) Switchin special func. */
+	struct thread	*td_standin;	/* (?) use this for an upcall */
 	u_int		td_critnest;	/* (k) Critical section nest level. */
 #define	td_endzero td_md
 
@@ -344,6 +345,7 @@ struct thread {
 #define	TD_IS_SUSPENDED(td)	((td)->td_inhibitors & TDI_SUSPENDED)
 #define	TD_IS_SWAPPED(td)	((td)->td_inhibitors & TDI_SWAPPED)
 #define	TD_ON_LOCK(td)		((td)->td_inhibitors & TDI_LOCK)
+#define	TD_LENT(td)		((td)->td_inhibitors & TDI_LOAN)
 #define	TD_AWAITING_INTR(td)	((td)->td_inhibitors & TDI_IWAIT)
 #define	TD_IS_RUNNING(td)	((td)->td_state == TDS_RUNNING)
 #define	TD_ON_RUNQ(td)		((td)->td_state == TDS_RUNQ)
@@ -929,7 +931,7 @@ void	thread_suspend_one(struct thread *td);
 void	thread_unsuspend_one(struct thread *td);
 int	thread_userret(struct thread *td, struct trapframe *frame);
 
-void	thread_sanity_check(struct thread *td);
+void	thread_sanity_check(struct thread *td, char *);
 #endif	/* _KERNEL */
 
 #endif	/* !_SYS_PROC_H_ */