diff options
Diffstat (limited to 'lib/libpthread/thread/thr_kern.c')
-rw-r--r-- | lib/libpthread/thread/thr_kern.c | 2007 |
1 files changed, 1602 insertions, 405 deletions
diff --git a/lib/libpthread/thread/thr_kern.c b/lib/libpthread/thread/thr_kern.c index 6e59a43..b87ae3d 100644 --- a/lib/libpthread/thread/thr_kern.c +++ b/lib/libpthread/thread/thr_kern.c @@ -1,4 +1,6 @@ /* + * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org> + * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org> * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au> * All rights reserved. * @@ -32,474 +34,1452 @@ * $FreeBSD$ * */ -#include <errno.h> -#include <poll.h> +#include <sys/cdefs.h> +__FBSDID("$FreeBSD"); + +#include <sys/types.h> +#include <sys/kse.h> +#include <sys/signalvar.h> +#include <sys/queue.h> +#include <machine/atomic.h> + +#include <assert.h> +#include <signal.h> #include <stdlib.h> -#include <stdarg.h> #include <string.h> +#include <time.h> +#include <ucontext.h> #include <unistd.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/signalvar.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/socket.h> -#include <sys/uio.h> -#include <sys/syscall.h> -#include <fcntl.h> -#include <pthread.h> + +#include "atomic_ops.h" #include "thr_private.h" +#include "pthread_md.h" +#include "libc_private.h" -/* #define DEBUG_THREAD_KERN */ +/*#define DEBUG_THREAD_KERN */ #ifdef DEBUG_THREAD_KERN #define DBG_MSG stdout_debug #else #define DBG_MSG(x...) #endif -static int _kern_idle_running = 0; -static struct timeval _kern_idle_timeout; +/* + * Define a high water mark for the maximum number of threads that + * will be cached. Once this level is reached, any extra threads + * will be free()'d. + * + * XXX - It doesn't make sense to worry about the maximum number of + * KSEs that we can cache because the system will limit us to + * something *much* less than the maximum number of threads + * that we can have. Disregarding KSEs in their own group, + * the maximum number of KSEs is the number of processors in + * the system. + */ +#define MAX_CACHED_THREADS 100 +#define KSE_STACKSIZE 16384 -/* Static function prototype definitions: */ -static void -thread_kern_idle(void); +#define KSE_SET_MBOX(kse, thrd) \ + (kse)->k_mbx.km_curthread = &(thrd)->tmbx -static void -dequeue_signals(void); +#define KSE_SET_EXITED(kse) (kse)->k_flags |= KF_EXITED -static inline void -thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in); +/* + * Add/remove threads from a KSE's scheduling queue. + * For now the scheduling queue is hung off the KSEG. + */ +#define KSEG_THRQ_ADD(kseg, thr) \ + TAILQ_INSERT_TAIL(&(kseg)->kg_threadq, thr, kle) +#define KSEG_THRQ_REMOVE(kseg, thr) \ + TAILQ_REMOVE(&(kseg)->kg_threadq, thr, kle) -/* Static variables: */ -static int last_tick = 0; +/* + * Macros for manipulating the run queues. The priority queue + * routines use the thread's pqe link and also handle the setting + * and clearing of the thread's THR_FLAGS_IN_RUNQ flag. + */ +#define KSE_RUNQ_INSERT_HEAD(kse, thrd) \ + _pq_insert_head(&(kse)->k_schedq->sq_runq, thrd) +#define KSE_RUNQ_INSERT_TAIL(kse, thrd) \ + _pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd) +#define KSE_RUNQ_REMOVE(kse, thrd) \ + _pq_remove(&(kse)->k_schedq->sq_runq, thrd) +#define KSE_RUNQ_FIRST(kse) _pq_first(&(kse)->k_schedq->sq_runq) + + +/* + * We've got to keep track of everything that is allocated, not only + * to have a speedy free list, but also so they can be deallocated + * after a fork(). + */ +static TAILQ_HEAD(, kse) active_kseq; +static TAILQ_HEAD(, kse) free_kseq; +static TAILQ_HEAD(, kse_group) free_kse_groupq; +static TAILQ_HEAD(, kse_group) active_kse_groupq; +static struct lock kse_lock; /* also used for kseg queue */ +static int free_kse_count = 0; +static int free_kseg_count = 0; +static TAILQ_HEAD(, pthread) free_threadq; +static struct lock thread_lock; +static int free_thread_count = 0; +static int inited = 0; +static int active_kse_count = 0; +static int active_kseg_count = 0; + +static void kse_check_completed(struct kse *kse); +static void kse_check_waitq(struct kse *kse); +static void kse_check_signals(struct kse *kse); +static void kse_entry(struct kse_mailbox *mbx); +static void kse_fini(struct kse *curkse); +static void kse_sched_multi(struct kse *curkse); +static void kse_sched_single(struct kse *curkse); +static void kse_switchout_thread(struct kse *kse, struct pthread *thread); +static void kse_wait(struct kse *kse); +static void kseg_free(struct kse_group *kseg); +static void kseg_init(struct kse_group *kseg); +static void kse_waitq_insert(struct pthread *thread); +static void thr_cleanup(struct kse *kse, struct pthread *curthread); +static void thr_gc(struct kse *curkse); +static void thr_resume_wrapper(int unused_1, siginfo_t *unused_2, + ucontext_t *ucp); +static void thr_resume_check(struct pthread *curthread, ucontext_t *ucp, + struct pthread_sigframe *psf); +static int thr_timedout(struct pthread *thread, struct timespec *curtime); + +/* + * This is called after a fork(). + * No locks need to be taken here since we are guaranteed to be + * single threaded. + */ void -_thread_kern_sched(void) +_kse_single_thread(struct pthread *curthread) { - struct timespec ts; - struct timeval tv; - struct pthread *curthread = _get_curthread(); - unsigned int current_tick; - - /* Get the current time of day. */ - GET_CURRENT_TOD(tv); - TIMEVAL_TO_TIMESPEC(&tv, &ts); - current_tick = _sched_ticks; + struct kse *kse, *kse_next; + struct kse_group *kseg, *kseg_next; + struct pthread *thread, *thread_next; + kse_critical_t crit; + int i; /* - * Enter a critical section. + * Disable upcalls and clear the threaded flag. + * XXX - I don't think we need to disable upcalls after a fork(). + * but it doesn't hurt. */ - _thread_kern_kse_mailbox.km_curthread = NULL; + crit = _kse_critical_enter(); + __isthreaded = 0; /* - * If this thread is becoming inactive, make note of the - * time. + * Enter a loop to remove and free all threads other than + * the running thread from the active thread list: */ - if (curthread->state != PS_RUNNING) { + for (thread = TAILQ_FIRST(&_thread_list); thread != NULL; + thread = thread_next) { /* - * Save the current time as the time that the - * thread became inactive: + * Advance to the next thread before the destroying + * the current thread. + */ + thread_next = TAILQ_NEXT(thread, tle); + + /* + * Remove this thread from the list (the current + * thread will be removed but re-added by libpthread + * initialization. */ - curthread->last_inactive = (long)current_tick; - if (curthread->last_inactive < - curthread->last_active) { - /* Account for a rollover: */ - curthread->last_inactive =+ - UINT_MAX + 1; + TAILQ_REMOVE(&_thread_list, thread, tle); + /* Make sure this isn't the running thread: */ + if (thread != curthread) { + _thr_stack_free(&thread->attr); + if (thread->specific != NULL) + free(thread->specific); + for (i = 0; i < MAX_THR_LOCKLEVEL; i++) { + _lockuser_destroy(&thread->lockusers[i]); + } + _lock_destroy(&thread->lock); + free(thread); + } + } + + TAILQ_INIT(&curthread->mutexq); /* initialize mutex queue */ + curthread->joiner = NULL; /* no joining threads yet */ + sigemptyset(&curthread->sigpend); /* clear pending signals */ + if (curthread->specific != NULL) { + free(curthread->specific); + curthread->specific = NULL; + curthread->specific_data_count = 0; + } + + /* Free the free KSEs: */ + while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) { + TAILQ_REMOVE(&free_kseq, kse, k_qe); + _ksd_destroy(&kse->k_ksd); + free(kse); + } + free_kse_count = 0; + + /* Free the active KSEs: */ + for (kse = TAILQ_FIRST(&active_kseq); kse != NULL; kse = kse_next) { + kse_next = TAILQ_NEXT(kse, k_qe); + TAILQ_REMOVE(&active_kseq, kse, k_qe); + for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) { + _lockuser_destroy(&kse->k_lockusers[i]); } + _lock_destroy(&kse->k_lock); + free(kse); + } + active_kse_count = 0; + + /* Free the free KSEGs: */ + while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) { + TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe); + _lock_destroy(&kseg->kg_lock); + free(kseg); + } + free_kseg_count = 0; + + /* Free the active KSEGs: */ + for (kseg = TAILQ_FIRST(&active_kse_groupq); + kseg != NULL; kseg = kseg_next) { + kseg_next = TAILQ_NEXT(kseg, kg_qe); + TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe); + _lock_destroy(&kseg->kg_lock); + free(kseg); + } + active_kseg_count = 0; + + /* Free the free threads. */ + while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) { + TAILQ_REMOVE(&free_threadq, thread, tle); + if (thread->specific != NULL) + free(thread->specific); + for (i = 0; i < MAX_THR_LOCKLEVEL; i++) { + _lockuser_destroy(&thread->lockusers[i]); + } + _lock_destroy(&thread->lock); + free(thread); + } + free_thread_count = 0; + + /* Free the to-be-gc'd threads. */ + while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) { + TAILQ_REMOVE(&_thread_gc_list, thread, tle); + free(thread); + } + + if (inited != 0) { + /* + * Destroy these locks; they'll be recreated to assure they + * are in the unlocked state. + */ + _lock_destroy(&kse_lock); + _lock_destroy(&thread_lock); + _lock_destroy(&_thread_list_lock); + inited = 0; + } + + /* + * After a fork(), the leftover thread goes back to being + * scope process. + */ + curthread->attr.flags &= ~PTHREAD_SCOPE_SYSTEM; + curthread->attr.flags |= PTHREAD_SCOPE_PROCESS; + + /* + * After a fork, we are still operating on the thread's original + * stack. Don't clear the THR_FLAGS_USER from the thread's + * attribute flags. + */ + + /* Initialize the threads library. */ + curthread->kse = NULL; + curthread->kseg = NULL; + _kse_initial = NULL; + _libpthread_init(curthread); +} + +/* + * This is used to initialize housekeeping and to initialize the + * KSD for the KSE. + */ +void +_kse_init(void) +{ + if (inited == 0) { + TAILQ_INIT(&active_kseq); + TAILQ_INIT(&active_kse_groupq); + TAILQ_INIT(&free_kseq); + TAILQ_INIT(&free_kse_groupq); + TAILQ_INIT(&free_threadq); + if (_lock_init(&kse_lock, LCK_ADAPTIVE, + _kse_lock_wait, _kse_lock_wakeup) != 0) + PANIC("Unable to initialize free KSE queue lock"); + if (_lock_init(&thread_lock, LCK_ADAPTIVE, + _kse_lock_wait, _kse_lock_wakeup) != 0) + PANIC("Unable to initialize free thread queue lock"); + if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE, + _kse_lock_wait, _kse_lock_wakeup) != 0) + PANIC("Unable to initialize thread list lock"); + active_kse_count = 0; + active_kseg_count = 0; + inited = 1; } +} + +int +_kse_isthreaded(void) +{ + return (__isthreaded != 0); +} + +/* + * This is called when the first thread (other than the initial + * thread) is created. + */ +void +_kse_setthreaded(int threaded) +{ + if ((threaded != 0) && (__isthreaded == 0)) { + /* + * Locking functions in libc are required when there are + * threads other than the initial thread. + */ + __isthreaded = 1; + + /* + * Tell the kernel to create a KSE for the initial thread + * and enable upcalls in it. + */ + kse_create(&_kse_initial->k_mbx, 0); + KSE_SET_MBOX(_kse_initial, _thr_initial); + } +} + +/* + * Lock wait and wakeup handlers for KSE locks. These are only used by + * KSEs, and should never be used by threads. KSE locks include the + * KSE group lock (used for locking the scheduling queue) and the + * kse_lock defined above. + * + * When a KSE lock attempt blocks, the entire KSE blocks allowing another + * KSE to run. For the most part, it doesn't make much sense to try and + * schedule another thread because you need to lock the scheduling queue + * in order to do that. And since the KSE lock is used to lock the scheduling + * queue, you would just end up blocking again. + */ +void +_kse_lock_wait(struct lock *lock, struct lockuser *lu) +{ + struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu); + struct timespec ts; + kse_critical_t crit; /* - * Place this thread into the appropriate queue(s). + * Enter a loop to wait until we get the lock. */ - switch (curthread->state) { - case PS_DEAD: - case PS_STATE_MAX: /* XXX: silences -Wall */ - case PS_SUSPENDED: - /* Dead or suspended threads are not placed in any queue. */ - break; - case PS_RUNNING: + ts.tv_sec = 0; + ts.tv_nsec = 1000000; /* 1 sec */ + KSE_SET_WAIT(curkse); + while (_LCK_BUSY(lu)) { + /* + * Yield the kse and wait to be notified when the lock + * is granted. + */ + crit = _kse_critical_enter(); + __sys_nanosleep(&ts, NULL); + _kse_critical_leave(crit); + + /* + * Make sure that the wait flag is set again in case + * we wokeup without the lock being granted. + */ + KSE_SET_WAIT(curkse); + } + KSE_CLEAR_WAIT(curkse); +} + +void +_kse_lock_wakeup(struct lock *lock, struct lockuser *lu) +{ + struct kse *curkse; + struct kse *kse; + + curkse = _get_curkse(); + kse = (struct kse *)_LCK_GET_PRIVATE(lu); + + if (kse == curkse) + PANIC("KSE trying to wake itself up in lock"); + else if (KSE_WAITING(kse)) { /* - * Save the current time as the time that the - * thread became inactive: + * Notify the owning kse that it has the lock. */ - current_tick = _sched_ticks; - curthread->last_inactive = (long)current_tick; - if (curthread->last_inactive < - curthread->last_active) { - /* Account for a rollover: */ - curthread->last_inactive =+ UINT_MAX + 1; + KSE_WAKEUP(kse); + } +} + +/* + * Thread wait and wakeup handlers for thread locks. These are only used + * by threads, never by KSEs. Thread locks include the per-thread lock + * (defined in its structure), and condition variable and mutex locks. + */ +void +_thr_lock_wait(struct lock *lock, struct lockuser *lu) +{ + struct pthread *curthread = (struct pthread *)lu->lu_private; + int count; + + /* + * Spin for a bit. + * + * XXX - We probably want to make this a bit smarter. It + * doesn't make sense to spin unless there is more + * than 1 CPU. A thread that is holding one of these + * locks is prevented from being swapped out for another + * thread within the same scheduling entity. + */ + count = 0; + while (_LCK_BUSY(lu) && count < 300) + count++; + while (_LCK_BUSY(lu)) { + THR_SCHED_LOCK(curthread, curthread); + if (_LCK_BUSY(lu)) { + /* Wait for the lock: */ + atomic_store_rel_int(&curthread->need_wakeup, 1); + THR_SET_STATE(curthread, PS_LOCKWAIT); + THR_SCHED_UNLOCK(curthread, curthread); + _thr_sched_switch(curthread); } + else + THR_SCHED_UNLOCK(curthread, curthread); + } +} - if ((curthread->slice_usec != -1) && - (curthread->attr.sched_policy != SCHED_FIFO)) { - /* - * Accumulate the number of microseconds for - * which the current thread has run: - */ - curthread->slice_usec += - (curthread->last_inactive - - curthread->last_active) * - (long)_clock_res_usec; - /* Check for time quantum exceeded: */ - if (curthread->slice_usec > TIMESLICE_USEC) - curthread->slice_usec = -1; +void +_thr_lock_wakeup(struct lock *lock, struct lockuser *lu) +{ + struct pthread *thread; + struct pthread *curthread; + + curthread = _get_curthread(); + thread = (struct pthread *)_LCK_GET_PRIVATE(lu); + + THR_SCHED_LOCK(curthread, thread); + _thr_setrunnable_unlocked(thread); + atomic_store_rel_int(&thread->need_wakeup, 0); + THR_SCHED_UNLOCK(curthread, thread); +} + +kse_critical_t +_kse_critical_enter(void) +{ + kse_critical_t crit; + + crit = _ksd_readandclear_tmbx; + return (crit); +} + +void +_kse_critical_leave(kse_critical_t crit) +{ + struct pthread *curthread; + + _ksd_set_tmbx(crit); + if ((crit != NULL) && ((curthread = _get_curthread()) != NULL)) + THR_YIELD_CHECK(curthread); +} + +void +_thr_critical_enter(struct pthread *thread) +{ + thread->critical_count++; +} + +void +_thr_critical_leave(struct pthread *thread) +{ + thread->critical_count--; + THR_YIELD_CHECK(thread); +} + +/* + * XXX - We may need to take the scheduling lock before calling + * this, or perhaps take the lock within here before + * doing anything else. + */ +void +_thr_sched_switch(struct pthread *curthread) +{ + struct pthread_sigframe psf; + kse_critical_t crit; + struct kse *curkse; + volatile int once = 0; + + /* We're in the scheduler, 5 by 5: */ + crit = _kse_critical_enter(); + curkse = _get_curkse(); + + curthread->need_switchout = 1; /* The thread yielded on its own. */ + curthread->critical_yield = 0; /* No need to yield anymore. */ + curthread->slice_usec = -1; /* Restart the time slice. */ + + /* + * The signal frame is allocated off the stack because + * a thread can be interrupted by other signals while + * it is running down pending signals. + */ + sigemptyset(&psf.psf_sigset); + curthread->curframe = &psf; + + _thread_enter_uts(&curthread->tmbx, &curkse->k_mbx); + + /* + * This thread is being resumed; check for cancellations. + */ + if ((once == 0) && (!THR_IN_CRITICAL(curthread))) { + once = 1; + thr_resume_check(curthread, &curthread->tmbx.tm_context, &psf); + } +} + +/* + * This is the entry point of the KSE upcall. + */ +static void +kse_entry(struct kse_mailbox *mbx) +{ + struct kse *curkse; + + /* The kernel should always clear this before making the upcall. */ + assert(mbx->km_curthread == NULL); + curkse = (struct kse *)mbx->km_udata; + + /* Check for first time initialization: */ + if ((curkse->k_flags & KF_INITIALIZED) == 0) { + /* Setup this KSEs specific data. */ + _ksd_setprivate(&curkse->k_ksd); + _set_curkse(curkse); + + /* Set this before grabbing the context. */ + curkse->k_flags |= KF_INITIALIZED; + } + + /* Avoid checking the type of KSE more than once. */ + if ((curkse->k_kseg->kg_flags & KGF_SINGLE_THREAD) != 0) { + curkse->k_mbx.km_func = (void *)kse_sched_single; + kse_sched_single(curkse); + } else { + curkse->k_mbx.km_func = (void *)kse_sched_multi; + kse_sched_multi(curkse); + } +} + +/* + * This is the scheduler for a KSE which runs a scope system thread. + * The multi-thread KSE scheduler should also work for a single threaded + * KSE, but we use a separate scheduler so that it can be fine-tuned + * to be more efficient (and perhaps not need a separate stack for + * the KSE, allowing it to use the thread's stack). + * + * XXX - This probably needs some work. + */ +static void +kse_sched_single(struct kse *curkse) +{ + struct pthread *curthread; + struct timespec ts; + int level; + + /* This may have returned from a kse_release(). */ + if (KSE_WAITING(curkse)) + KSE_CLEAR_WAIT(curkse); + + curthread = curkse->k_curthread; + + if (curthread->active == 0) { + if (curthread->state != PS_RUNNING) { + /* Check to see if the thread has timed out. */ + KSE_GET_TOD(curkse, &ts); + if (thr_timedout(curthread, &ts) != 0) { + curthread->timeout = 1; + curthread->state = PS_RUNNING; + } } + } else if (curthread->need_switchout != 0) { + /* + * This has to do the job of kse_switchout_thread(), only + * for a single threaded KSE/KSEG. + */ - if (curthread->slice_usec == -1) { - /* - * The thread exceeded its time - * quantum or it yielded the CPU; - * place it at the tail of the - * queue for its priority. - */ - PTHREAD_PRIOQ_INSERT_TAIL(curthread); - } else { + /* This thread no longer needs to yield the CPU: */ + curthread->critical_yield = 0; + curthread->need_switchout = 0; + + /* + * Lock the scheduling queue. + * + * There is no scheduling queue for single threaded KSEs, + * but we need a lock for protection regardless. + */ + KSE_SCHED_LOCK(curkse, curkse->k_kseg); + + switch (curthread->state) { + case PS_DEAD: + /* Unlock the scheduling queue and exit the KSE. */ + KSE_SCHED_UNLOCK(curkse, curkse->k_kseg); + kse_fini(curkse); /* does not return */ + break; + + case PS_COND_WAIT: + case PS_SLEEP_WAIT: + /* Only insert threads that can timeout: */ + if (curthread->wakeup_time.tv_sec != -1) { + /* Insert into the waiting queue: */ + KSE_WAITQ_INSERT(curkse, curthread); + } + break; + + case PS_LOCKWAIT: + level = curthread->locklevel - 1; + if (_LCK_BUSY(&curthread->lockusers[level])) + KSE_WAITQ_INSERT(curkse, curthread); + else + THR_SET_STATE(curthread, PS_RUNNING); + break; + + case PS_JOIN: + case PS_MUTEX_WAIT: + case PS_RUNNING: + case PS_SIGSUSPEND: + case PS_SIGWAIT: + case PS_SUSPENDED: + case PS_DEADLOCK: + default: /* - * The thread hasn't exceeded its - * interval. Place it at the head - * of the queue for its priority. + * These states don't timeout and don't need + * to be in the waiting queue. */ - PTHREAD_PRIOQ_INSERT_HEAD(curthread); + break; } - break; - case PS_SPINBLOCK: - /* Increment spinblock count. */ - _spinblock_count++; + if (curthread->state != PS_RUNNING) + curthread->active = 0; + } - /* No timeouts for these states. */ - curthread->wakeup_time.tv_sec = -1; - curthread->wakeup_time.tv_nsec = -1; + while (curthread->state != PS_RUNNING) { + kse_wait(curkse); + } - /* Restart the time slice. */ - curthread->slice_usec = -1; + /* Remove the frame reference. */ + curthread->curframe = NULL; - /* Insert into the work queue. */ - PTHREAD_WORKQ_INSERT(curthread); - break; + /* Unlock the scheduling queue. */ + KSE_SCHED_UNLOCK(curkse, curkse->k_kseg); - case PS_DEADLOCK: - case PS_JOIN: - case PS_MUTEX_WAIT: - case PS_WAIT_WAIT: - /* No timeouts for these states. */ - curthread->wakeup_time.tv_sec = -1; - curthread->wakeup_time.tv_nsec = -1; + /* + * Continue the thread at its current frame: + */ + _thread_switch(&curthread->tmbx, &curkse->k_mbx.km_curthread); +} + +void +dump_queues(struct kse *curkse) +{ + struct pthread *thread; + + DBG_MSG("Threads in waiting queue:\n"); + TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) { + DBG_MSG(" thread %p, state %d, blocked %d\n", + thread, thread->state, thread->blocked); + } +} + + +/* + * This is the scheduler for a KSE which runs multiple threads. + */ +static void +kse_sched_multi(struct kse *curkse) +{ + struct pthread *curthread; + struct pthread_sigframe *curframe; + int ret; + + /* This may have returned from a kse_release(). */ + if (KSE_WAITING(curkse)) + KSE_CLEAR_WAIT(curkse); - /* Restart the time slice. */ - curthread->slice_usec = -1; + /* Lock the scheduling lock. */ + KSE_SCHED_LOCK(curkse, curkse->k_kseg); - /* Insert into the waiting queue. */ - PTHREAD_WAITQ_INSERT(curthread); - break; + /* + * If the current thread was completed in another KSE, then + * it will be in the run queue. Don't mark it as being blocked. + */ + if (((curthread = curkse->k_curthread) != NULL) && + ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) && + (curthread->need_switchout == 0)) { + /* + * Assume the current thread is blocked; when the + * completed threads are checked and if the current + * thread is among the completed, the blocked flag + * will be cleared. + */ + curthread->blocked = 1; + } + + /* Check for any unblocked threads in the kernel. */ + kse_check_completed(curkse); - case PS_COND_WAIT: - case PS_SLEEP_WAIT: - /* These states can timeout. */ - /* Restart the time slice. */ - curthread->slice_usec = -1; + /* + * Check for threads that have timed-out. + */ + kse_check_waitq(curkse); - /* Insert into the waiting queue. */ - PTHREAD_WAITQ_INSERT(curthread); - break; + /* + * Switchout the current thread, if necessary, as the last step + * so that it is inserted into the run queue (if it's runnable) + * _after_ any other threads that were added to it above. + */ + if (curthread == NULL) + ; /* Nothing to do here. */ + else if ((curthread->need_switchout == 0) && + (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) { + /* + * Resume the thread and tell it to yield when + * it leaves the critical region. + */ + curthread->critical_yield = 0; + curthread->active = 1; + if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0) + KSE_RUNQ_REMOVE(curkse, curthread); + curkse->k_curthread = curthread; + curthread->kse = curkse; + KSE_SCHED_UNLOCK(curkse, curkse->k_kseg); + DBG_MSG("Continuing thread %p in critical region\n", + curthread); + ret = _thread_switch(&curthread->tmbx, + &curkse->k_mbx.km_curthread); + if (ret != 0) + PANIC("Can't resume thread in critical region\n"); } + else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) + kse_switchout_thread(curkse, curthread); + curkse->k_curthread = NULL; - /* Switch into the scheduler's context. */ - DBG_MSG("Calling _thread_enter_uts()\n"); - _thread_enter_uts(&curthread->mailbox, &_thread_kern_kse_mailbox); - DBG_MSG("Returned from _thread_enter_uts, thread %p\n", curthread); + /* This has to be done without the scheduling lock held. */ + KSE_SCHED_UNLOCK(curkse, curkse->k_kseg); + kse_check_signals(curkse); + + /* Check for GC: */ + if (_gc_check != 0) + thr_gc(curkse); + KSE_SCHED_LOCK(curkse, curkse->k_kseg); + + dump_queues(curkse); + + /* Check if there are no threads ready to run: */ + while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) && + (curkse->k_kseg->kg_threadcount != 0)) { + /* + * Wait for a thread to become active or until there are + * no more threads. + */ + kse_wait(curkse); + kse_check_waitq(curkse); + KSE_SCHED_UNLOCK(curkse, curkse->k_kseg); + kse_check_signals(curkse); + if (_gc_check != 0) + thr_gc(curkse); + KSE_SCHED_LOCK(curkse, curkse->k_kseg); + } + + /* Check for no more threads: */ + if (curkse->k_kseg->kg_threadcount == 0) { + /* + * Normally this shouldn't return, but it will if there + * are other KSEs running that create new threads that + * are assigned to this KSE[G]. For instance, if a scope + * system thread were to create a scope process thread + * and this kse[g] is the initial kse[g], then that newly + * created thread would be assigned to us (the initial + * kse[g]). + */ + KSE_SCHED_UNLOCK(curkse, curkse->k_kseg); + kse_fini(curkse); + KSE_SCHED_LOCK(curkse, curkse->k_kseg); + curthread = KSE_RUNQ_FIRST(curkse); + } + + THR_ASSERT(curthread != NULL, + "Return from kse_wait/fini without thread."); + THR_ASSERT(curthread->state != PS_DEAD, + "Trying to resume dead thread!"); + KSE_RUNQ_REMOVE(curkse, curthread); /* - * This point is reached when _thread_switch() is called - * to restore the state of a thread. - * - * This is the normal way out of the scheduler (for synchronous - * switches). + * Make the selected thread the current thread. */ + curkse->k_curthread = curthread; - /* XXXKSE: Do this inside _thread_kern_scheduler() */ - if (curthread->sig_defer_count == 0) { - if (((curthread->cancelflags & - PTHREAD_AT_CANCEL_POINT) == 0) && - ((curthread->cancelflags & - PTHREAD_CANCEL_ASYNCHRONOUS) != 0)) - /* - * Stick a cancellation point at the - * start of each async-cancellable - * thread's resumption. - * - * We allow threads woken at cancel - * points to do their own checks. - */ - pthread_testcancel(); + /* + * Make sure the current thread's kse points to this kse. + */ + curthread->kse = curkse; + + /* + * Reset accounting. + */ + curthread->tmbx.tm_uticks = 0; + curthread->tmbx.tm_sticks = 0; + + /* + * Reset the time slice if this thread is running for the first + * time or running again after using its full time slice allocation. + */ + if (curthread->slice_usec == -1) + curthread->slice_usec = 0; + + /* Mark the thread active. */ + curthread->active = 1; + + /* Remove the frame reference. */ + curframe = curthread->curframe; + curthread->curframe = NULL; + + /* Unlock the scheduling queue: */ + KSE_SCHED_UNLOCK(curkse, curkse->k_kseg); + + /* + * The thread's current signal frame will only be NULL if it + * is being resumed after being blocked in the kernel. In + * this case, and if the thread needs to run down pending + * signals or needs a cancellation check, we need to add a + * signal frame to the thread's context. + */ +#if 0 + if ((curframe == NULL) && ((curthread->check_pending != 0) || + (((curthread->cancelflags & THR_AT_CANCEL_POINT) == 0) && + ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0)))) { + signalcontext(&curthread->tmbx.tm_context, 0, + (__sighandler_t *)thr_resume_wrapper); } +#endif + /* + * Continue the thread at its current frame: + */ + DBG_MSG("Continuing thread %p\n", curthread); + ret = _thread_switch(&curthread->tmbx, &curkse->k_mbx.km_curthread); + if (ret != 0) + PANIC("Thread has returned from _thread_switch"); - if (_sched_switch_hook != NULL) { - /* Run the installed switch hook: */ - thread_run_switch_hook(_last_user_thread, curthread); + /* This point should not be reached. */ + PANIC("Thread has returned from _thread_switch"); +} + +static void +kse_check_signals(struct kse *curkse) +{ + sigset_t sigset; + int i; + + /* Deliver posted signals. */ + for (i = 0; i < _SIG_WORDS; i++) { + atomic_swap_int(&curkse->k_mbx.km_sigscaught.__bits[i], + 0, &sigset.__bits[i]); + } + if (SIGNOTEMPTY(sigset)) { + /* + * Dispatch each signal. + * + * XXX - There is no siginfo for any of these. + * I think there should be, especially for + * signals from other processes (si_pid, si_uid). + */ + for (i = 1; i < NSIG; i++) { + if (sigismember(&sigset, i) != 0) { + DBG_MSG("Dispatching signal %d\n", i); + _thr_sig_dispatch(curkse, i, + NULL /* no siginfo */); + } + } + sigemptyset(&sigset); + __sys_sigprocmask(SIG_SETMASK, &sigset, NULL); } } -void -_thread_kern_scheduler(struct kse_mailbox *km) +static void +thr_resume_wrapper(int unused_1, siginfo_t *unused_2, ucontext_t *ucp) { - struct timespec ts; - struct timeval tv; - pthread_t td, pthread, pthread_h; - unsigned int current_tick; - struct kse_thr_mailbox *tm, *p; - sigset_t sigset; - int i; + struct pthread *curthread = _get_curthread(); - DBG_MSG("entering\n"); - while (!TAILQ_EMPTY(&_thread_list)) { + thr_resume_check(curthread, ucp, NULL); +} - /* Get the current time of day. */ - ts = km->km_timeofday; - TIMESPEC_TO_TIMEVAL(&_sched_tod, &ts); - current_tick = _sched_ticks; +static void +thr_resume_check(struct pthread *curthread, ucontext_t *ucp, + struct pthread_sigframe *psf) +{ + /* Check signals before cancellations. */ + while (curthread->check_pending != 0) { + /* Clear the pending flag. */ + curthread->check_pending = 0; /* - * Pick up threads that had blocked in the kernel and - * have now completed their trap (syscall, vm fault, etc). - * These threads were PS_RUNNING (and still are), but they - * need to be added to the run queue so that they can be - * scheduled again. + * It's perfectly valid, though not portable, for + * signal handlers to munge their interrupted context + * and expect to return to it. Ensure we use the + * correct context when running down signals. */ - DBG_MSG("Picking up km_completed\n"); - p = km->km_completed; - km->km_completed = NULL; /* XXX: Atomic xchg here. */ - while ((tm = p) != NULL) { - p = tm->tm_next; - tm->tm_next = NULL; - if (tm->tm_udata == NULL) { - DBG_MSG("\tidle context\n"); - _kern_idle_running = 0; - continue; + _thr_sig_rundown(curthread, ucp, psf); + } + + if (((curthread->cancelflags & THR_AT_CANCEL_POINT) == 0) && + ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0)) + pthread_testcancel(); +} + +/* + * Clean up a thread. This must be called with the thread's KSE + * scheduling lock held. The thread must be a thread from the + * KSE's group. + */ +static void +thr_cleanup(struct kse *curkse, struct pthread *thread) +{ + struct pthread *joiner; + int free_thread = 0; + + if ((joiner = thread->joiner) != NULL) { + thread->joiner = NULL; + if ((joiner->state == PS_JOIN) && + (joiner->join_status.thread == thread)) { + joiner->join_status.thread = NULL; + + /* Set the return status for the joining thread: */ + joiner->join_status.ret = thread->ret; + + /* Make the thread runnable. */ + if (joiner->kseg == curkse->k_kseg) + _thr_setrunnable_unlocked(joiner); + else { + KSE_SCHED_UNLOCK(curkse, curkse->k_kseg); + KSE_SCHED_LOCK(curkse, joiner->kseg); + _thr_setrunnable_unlocked(joiner); + KSE_SCHED_UNLOCK(curkse, joiner->kseg); + KSE_SCHED_LOCK(curkse, curkse->k_kseg); } - DBG_MSG("\tmailbox=%p pthread=%p\n", tm, tm->tm_udata); - PTHREAD_PRIOQ_INSERT_TAIL((pthread_t)tm->tm_udata); } + thread->attr.flags |= PTHREAD_DETACHED; + } - /* Deliver posted signals. */ - DBG_MSG("Picking up signals\n"); - bcopy(&km->km_sigscaught, &sigset, sizeof(sigset_t)); - sigemptyset(&km->km_sigscaught); /* XXX */ - if (SIGNOTEMPTY(sigset)) - for (i = 1; i < NSIG; i++) - if (sigismember(&sigset, i) != 0) - _thread_sig_dispatch(i); + thread->flags |= THR_FLAGS_GC_SAFE; + thread->kseg->kg_threadcount--; + KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock); + _thr_stack_free(&thread->attr); + if ((thread->attr.flags & PTHREAD_DETACHED) != 0) { + /* Remove this thread from the list of all threads: */ + THR_LIST_REMOVE(thread); + if (thread->refcount == 0) { + THR_GCLIST_REMOVE(thread); + TAILQ_REMOVE(&thread->kseg->kg_threadq, thread, kle); + free_thread = 1; + } + } + KSE_LOCK_RELEASE(curkse, &_thread_list_lock); + if (free_thread != 0) + _thr_free(curkse, thread); +} - if (_spinblock_count != 0) { - /* - * Enter a loop to look for threads waiting on - * a spinlock that is now available. - */ - PTHREAD_WAITQ_SETACTIVE(); - TAILQ_FOREACH(pthread, &_workq, qe) { - if (pthread->state == PS_SPINBLOCK) { - /* - * If the lock is available, let the - * thread run. - */ - if (pthread->data.spinlock-> - access_lock == 0) { - PTHREAD_WAITQ_CLEARACTIVE(); - PTHREAD_WORKQ_REMOVE(pthread); - PTHREAD_PRIOQ_INSERT_TAIL( - pthread); - PTHREAD_SET_STATE(pthread, - PS_RUNNING); - PTHREAD_WAITQ_SETACTIVE(); - - /* - * One less thread in a - * spinblock state: - */ - _spinblock_count--; - } - } +void +thr_gc(struct pthread *curthread) +{ + struct pthread *td, *joiner; + struct kse_group *free_kseg; + + _gc_check = 0; + KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock); + while ((td = TAILQ_FIRST(&_thread_gc_list)) != NULL) { + THR_GCLIST_REMOVE(td); + clean = (td->attr.flags & PTHREAD_DETACHED) != 0; + KSE_LOCK_RELEASE(curkse, &_thread_list_lock); + + KSE_SCHED_LOCK(curkse, td->kseg); + TAILQ_REMOVE(&td->kseg->kg_threadq, td, kle); + if (TAILQ_EMPTY(&td->kseg->kg_threadq)) + free_kseg = td->kseg; + else + free_kseg = NULL; + joiner = NULL; + if ((td->joiner != NULL) && (td->joiner->state == PS_JOIN) && + (td->joiner->join_status.thread == td)) { + joiner = td->joiner; + joiner->join_status.thread = NULL; + + /* Set the return status for the joining thread: */ + joiner->join_status.ret = td->ret; + + /* Make the thread runnable. */ + if (td->kseg == joiner->kseg) { + _thr_setrunnable_unlocked(joiner); + joiner = NULL; } - PTHREAD_WAITQ_CLEARACTIVE(); } + td->joiner = NULL; + KSE_SCHED_UNLOCK(curkse, td->kseg); + if (free_kseg != NULL) + kseg_free(free_kseg); + if (joiner != NULL) { + KSE_SCHED_LOCK(curkse, joiner->kseg); + _thr_setrunnable_unlocked(joiner); + KSE_SCHED_LOCK(curkse, joiner->kseg); + } + _thr_free(curkse, td); + KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock); + } + KSE_LOCK_RELEASE(curkse, &_thread_list_lock); +} + + +/* + * Only new threads that are running or suspended may be scheduled. + */ +void +_thr_schedule_add(struct pthread *curthread, struct pthread *newthread) +{ + struct kse *curkse; + kse_critical_t crit; + int need_start; - /* Wake up threads that have timed out. */ - DBG_MSG("setactive\n"); - PTHREAD_WAITQ_SETACTIVE(); - DBG_MSG("Picking up timeouts (%x)\n", TAILQ_FIRST(&_waitingq)); - while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) && - (pthread->wakeup_time.tv_sec != -1) && - (((pthread->wakeup_time.tv_sec == 0) && - (pthread->wakeup_time.tv_nsec == 0)) || - (pthread->wakeup_time.tv_sec < ts.tv_sec) || - ((pthread->wakeup_time.tv_sec == ts.tv_sec) && - (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) { - DBG_MSG("\t...\n"); + /* + * If this is the first time creating a thread, make sure + * the mailbox is set for the current thread. + */ + if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) { + /* + * No need to lock the scheduling queue since the + * KSE/KSEG pair have not yet been started. + */ + KSEG_THRQ_ADD(newthread->kseg, newthread); + if (newthread->state == PS_RUNNING) + THR_RUNQ_INSERT_TAIL(newthread); + newthread->kseg->kg_threadcount++; + /* + * This thread needs a new KSE and KSEG. + */ + crit = _kse_critical_enter(); + curkse = _get_curkse(); + _ksd_setprivate(&newthread->kse->k_ksd); + kse_create(&newthread->kse->k_mbx, 1); + _ksd_setprivate(&curkse->k_ksd); + _kse_critical_leave(crit); + } + else { + /* + * Lock the KSE and add the new thread to its list of + * assigned threads. If the new thread is runnable, also + * add it to the KSE's run queue. + */ + need_start = 0; + KSE_SCHED_LOCK(curthread->kse, newthread->kseg); + KSEG_THRQ_ADD(newthread->kseg, newthread); + if (newthread->state == PS_RUNNING) + THR_RUNQ_INSERT_TAIL(newthread); + newthread->kseg->kg_threadcount++; + if ((newthread->kse->k_flags & KF_STARTED) == 0) { /* - * Remove this thread from the waiting queue - * (and work queue if necessary) and place it - * in the ready queue. + * This KSE hasn't been started yet. Start it + * outside of holding the lock. */ - PTHREAD_WAITQ_CLEARACTIVE(); - if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ) - PTHREAD_WORKQ_REMOVE(pthread); - DBG_MSG("\twaking thread\n"); - PTHREAD_NEW_STATE(pthread, PS_RUNNING); - PTHREAD_WAITQ_SETACTIVE(); + newthread->kse->k_flags |= KF_STARTED; + need_start = 1; + } + KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg); + + if (need_start != 0) + kse_create(&newthread->kse->k_mbx, 0); + else if ((newthread->state == PS_RUNNING) && + KSE_WAITING(newthread->kse)) { /* - * Flag the timeout in the thread structure: + * The thread is being scheduled on another KSEG. */ - pthread->timeout = 1; + KSE_WAKEUP(newthread->kse); } - DBG_MSG("clearactive\n"); - PTHREAD_WAITQ_CLEARACTIVE(); + } +} - /* - * Get the highest priority thread in the ready queue. - */ - DBG_MSG("Selecting thread\n"); - pthread_h = PTHREAD_PRIOQ_FIRST(); +void +kse_waitq_insert(struct pthread *thread) +{ + struct pthread *td; + + if (thread->wakeup_time.tv_sec == -1) + TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread, + pqe); + else { + td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq); + while ((td != NULL) && (td->wakeup_time.tv_sec != -1) && + ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) || + ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) && + (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec)))) + td = TAILQ_NEXT(td, pqe); + if (td == NULL) + TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, + thread, pqe); + else + TAILQ_INSERT_BEFORE(td, thread, pqe); + } + thread->flags |= THR_FLAGS_IN_WAITQ; +} - /* Check if there are no threads ready to run: */ - if (pthread_h) { - DBG_MSG("Scheduling thread\n"); - /* Remove the thread from the ready queue: */ - PTHREAD_PRIOQ_REMOVE(pthread_h); +/* + * This must be called with the scheduling lock held. + */ +static void +kse_check_completed(struct kse *kse) +{ + struct pthread *thread; + struct kse_thr_mailbox *completed; + + if ((completed = kse->k_mbx.km_completed) != NULL) { + kse->k_mbx.km_completed = NULL; + while (completed != NULL) { + thread = completed->tm_udata; + DBG_MSG("Found completed thread %p, name %s\n", + thread, + (thread->name == NULL) ? "none" : thread->name); + thread->blocked = 0; + if (thread != kse->k_curthread) + KSE_RUNQ_INSERT_TAIL(kse, thread); + completed = completed->tm_next; + } + } +} - /* Make the selected thread the current thread: */ - _set_curthread(pthread_h); +/* + * This must be called with the scheduling lock held. + */ +static void +kse_check_waitq(struct kse *kse) +{ + struct pthread *pthread; + struct timespec ts; - /* - * Save the current time as the time that the thread - * became active: - */ - current_tick = _sched_ticks; - pthread_h->last_active = (long) current_tick; + KSE_GET_TOD(kse, &ts); + /* + * Wake up threads that have timedout. This has to be + * done before adding the current thread to the run queue + * so that a CPU intensive thread doesn't get preference + * over waiting threads. + */ + while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) && + thr_timedout(pthread, &ts)) { + /* Remove the thread from the wait queue: */ + KSE_WAITQ_REMOVE(kse, pthread); + DBG_MSG("Found timedout thread %p in waitq\n", pthread); + + /* Indicate the thread timedout: */ + pthread->timeout = 1; + + /* Add the thread to the priority queue: */ + THR_SET_STATE(pthread, PS_RUNNING); + KSE_RUNQ_INSERT_TAIL(kse, pthread); + } +} + +static int +thr_timedout(struct pthread *thread, struct timespec *curtime) +{ + if (thread->wakeup_time.tv_sec < 0) + return (0); + else if (thread->wakeup_time.tv_sec > curtime->tv_sec) + return (0); + else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) && + (thread->wakeup_time.tv_nsec > curtime->tv_nsec)) + return (0); + else + return (1); +} + +/* + * This must be called with the scheduling lock held. + * + * Each thread has a time slice, a wakeup time (used when it wants + * to wait for a specified amount of time), a run state, and an + * active flag. + * + * When a thread gets run by the scheduler, the active flag is + * set to non-zero (1). When a thread performs an explicit yield + * or schedules a state change, it enters the scheduler and the + * active flag is cleared. When the active flag is still seen + * set in the scheduler, that means that the thread is blocked in + * the kernel (because it is cleared before entering the scheduler + * in all other instances). + * + * The wakeup time is only set for those states that can timeout. + * It is set to (-1, -1) for all other instances. + * + * The thread's run state, aside from being useful when debugging, + * is used to place the thread in an appropriate queue. There + * are 2 basic queues: + * + * o run queue - queue ordered by priority for all threads + * that are runnable + * o waiting queue - queue sorted by wakeup time for all threads + * that are not otherwise runnable (not blocked + * in kernel, not waiting for locks) + * + * The thread's time slice is used for round-robin scheduling + * (the default scheduling policy). While a SCHED_RR thread + * is runnable it's time slice accumulates. When it reaches + * the time slice interval, it gets reset and added to the end + * of the queue of threads at its priority. When a thread no + * longer becomes runnable (blocks in kernel, waits, etc), its + * time slice is reset. + * + * The job of kse_switchout_thread() is to handle all of the above. + */ +static void +kse_switchout_thread(struct kse *kse, struct pthread *thread) +{ + int level; + + /* + * Place the currently running thread into the + * appropriate queue(s). + */ + DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state); + if (thread->blocked != 0) { + /* This thread must have blocked in the kernel. */ + /* thread->slice_usec = -1;*/ /* restart timeslice */ + /* + * XXX - Check for pending signals for this thread to + * see if we need to interrupt it in the kernel. + */ + /* if (thread->check_pending != 0) */ + if ((thread->slice_usec != -1) && + (thread->attr.sched_policy != SCHED_FIFO)) + thread->slice_usec += (thread->tmbx.tm_uticks + + thread->tmbx.tm_sticks) * _clock_res_usec; + } + else { + switch (thread->state) { + case PS_DEAD: /* - * Check if this thread is running for the first time - * or running again after using its full time slice - * allocation: + * The scheduler is operating on a different + * stack. It is safe to do garbage collecting + * here. */ - if (pthread_h->slice_usec == -1) { - /* Reset the accumulated time slice period: */ - pthread_h->slice_usec = 0; - } + thr_cleanup(kse, thread); + return; + break; + case PS_RUNNING: + /* Nothing to do here. */ + break; + + case PS_COND_WAIT: + case PS_SLEEP_WAIT: + /* Insert into the waiting queue: */ + KSE_WAITQ_INSERT(kse, thread); + break; + + case PS_LOCKWAIT: /* - * If we had a context switch, run any - * installed switch hooks. + * This state doesn't timeout. */ - if ((_sched_switch_hook != NULL) && - (_last_user_thread != pthread_h)) { - thread_run_switch_hook(_last_user_thread, - pthread_h); - } + thread->wakeup_time.tv_sec = -1; + thread->wakeup_time.tv_nsec = -1; + level = thread->locklevel - 1; + if (_LCK_BUSY(&thread->lockusers[level])) + KSE_WAITQ_INSERT(kse, thread); + else + THR_SET_STATE(thread, PS_RUNNING); + break; + + case PS_JOIN: + case PS_MUTEX_WAIT: + case PS_SIGSUSPEND: + case PS_SIGWAIT: + case PS_SUSPENDED: + case PS_DEADLOCK: + default: /* - * Continue the thread at its current frame: + * These states don't timeout. */ - _last_user_thread = td; - DBG_MSG("switch in\n"); - _thread_switch(&pthread_h->mailbox, - &_thread_kern_kse_mailbox.km_curthread); - DBG_MSG("switch out\n"); + thread->wakeup_time.tv_sec = -1; + thread->wakeup_time.tv_nsec = -1; + + /* Insert into the waiting queue: */ + KSE_WAITQ_INSERT(kse, thread); + break; + } + if (thread->state != PS_RUNNING) { + /* Restart the time slice: */ + thread->slice_usec = -1; } else { - /* - * There is nothing for us to do. Either - * yield, or idle until something wakes up. - */ - DBG_MSG("No runnable threads, idling.\n"); - if (_kern_idle_running) { - DBG_MSG("kse_release"); - kse_release(NULL); + if (thread->need_switchout != 0) + /* + * The thread yielded on its own; + * restart the timeslice. + */ + thread->slice_usec = -1; + else if ((thread->slice_usec != -1) && + (thread->attr.sched_policy != SCHED_FIFO)) { + thread->slice_usec += (thread->tmbx.tm_uticks + + thread->tmbx.tm_sticks) * _clock_res_usec; + /* Check for time quantum exceeded: */ + if (thread->slice_usec > TIMESLICE_USEC) + thread->slice_usec = -1; } - _kern_idle_running = 1; - if ((pthread == NULL) || - (pthread->wakeup_time.tv_sec == -1)) + if (thread->slice_usec == -1) { + /* + * The thread exceeded its time quantum or + * it yielded the CPU; place it at the tail + * of the queue for its priority. + */ + KSE_RUNQ_INSERT_TAIL(kse, thread); + } else { /* - * Nothing is waiting on a timeout, so - * idling gains us nothing; spin. + * The thread hasn't exceeded its interval + * Place it at the head of the queue for its + * priority. */ - continue; - TIMESPEC_TO_TIMEVAL(&_kern_idle_timeout, - &pthread->wakeup_time); - _thread_switch(&_idle_thr_mailbox, - &_thread_kern_kse_mailbox.km_curthread); + KSE_RUNQ_INSERT_HEAD(kse, thread); + } } - DBG_MSG("looping\n"); } - /* There are no threads; exit. */ - DBG_MSG("No threads, exiting.\n"); - exit(0); + thread->active = 0; + thread->need_switchout = 0; } -void -_thread_kern_sched_state(enum pthread_state state, char *fname, int lineno) +/* + * This function waits for the smallest timeout value of any waiting + * thread, or until it receives a message from another KSE. + * + * This must be called with the scheduling lock held. + */ +static void +kse_wait(struct kse *kse) { - struct pthread *curthread = _get_curthread(); + struct timespec *ts, ts_sleep; + struct pthread *td_wait, *td_run; - /* - * Flag the pthread kernel as executing scheduler code - * to avoid an upcall from interrupting this execution - * and calling the scheduler again. - */ - _thread_kern_kse_mailbox.km_curthread = NULL; + ts = &kse->k_mbx.km_timeofday; + KSE_SET_WAIT(kse); - /* Change the state of the current thread: */ - curthread->state = state; - curthread->fname = fname; - curthread->lineno = lineno; + td_wait = KSE_WAITQ_FIRST(kse); + td_run = KSE_RUNQ_FIRST(kse); + KSE_SCHED_UNLOCK(kse, kse->k_kseg); - /* Schedule the next thread that is ready: */ - _thread_kern_sched(); + if (td_run == NULL) { + if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) { + /* Limit sleep to no more than 2 minutes. */ + ts_sleep.tv_sec = 120; + ts_sleep.tv_nsec = 0; + } else { + TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, ts); + if (ts_sleep.tv_sec > 120) { + ts_sleep.tv_sec = 120; + ts_sleep.tv_nsec = 0; + } + } + if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) { + /* Don't sleep for negative times. */ + kse_release(&ts_sleep); + /* + * The above never returns. + * XXX - Actually, it would be nice if it did + * for KSE's with only one thread. + */ + } + } + KSE_CLEAR_WAIT(kse); } -void -_thread_kern_sched_state_unlock(enum pthread_state state, - spinlock_t *lock, char *fname, int lineno) +/* + * Avoid calling this kse_exit() so as not to confuse it with the + * system call of the same name. + */ +static void +kse_fini(struct kse *kse) { - struct pthread *curthread = _get_curthread(); + struct timespec ts; /* - * Flag the pthread kernel as executing scheduler code - * to avoid an upcall from interrupting this execution - * and calling the scheduler again. + * Check to see if this is the main kse. */ - _thread_kern_kse_mailbox.km_curthread = NULL; - - /* Change the state of the current thread: */ - curthread->state = state; - curthread->fname = fname; - curthread->lineno = lineno; - - _SPINUNLOCK(lock); + if (kse == _kse_initial) { + /* + * Wait for the last KSE/thread to exit, or for more + * threads to be created (it is possible for additional + * scope process threads to be created after the main + * thread exits). + */ + ts.tv_sec = 120; + ts.tv_nsec = 0; + KSE_SET_WAIT(kse); + KSE_SCHED_LOCK(kse, kse->k_kseg); + if ((active_kse_count > 1) && + (kse->k_kseg->kg_threadcount == 0)) { + KSE_SCHED_UNLOCK(kse, kse->k_kseg); + /* + * XXX - We need a way for the KSE to do a timed + * wait. + */ + kse_release(&ts); + /* The above never returns. */ + } + KSE_SCHED_UNLOCK(kse, kse->k_kseg); - /* Schedule the next thread that is ready: */ - _thread_kern_sched(); + /* There are no more threads; exit this process: */ + if (kse->k_kseg->kg_threadcount == 0) { + /* kse_exit(); */ + __isthreaded = 0; + exit(0); + } + } else { + /* Mark this KSE for GC: */ + KSE_LOCK_ACQUIRE(kse, &_thread_list_lock); + TAILQ_INSERT_TAIL(&free_kseq, kse, k_qe); + KSE_LOCK_RELEASE(kse, &_thread_list_lock); + kse_exit(); + } } -/* - * Block until the next timeout. - */ void -_thread_kern_idle(void) +_thr_sig_add(struct pthread *thread, int sig, siginfo_t *info, ucontext_t *ucp) { - struct timespec ts; - struct timeval timeout; + struct kse *curkse; - for (;;) { - timersub(&_kern_idle_timeout, &_sched_tod, &timeout); - TIMEVAL_TO_TIMESPEC(&timeout, &ts); - __sys_nanosleep(&ts, NULL); + curkse = _get_curkse(); + + KSE_SCHED_LOCK(curkse, thread->kseg); + /* + * A threads assigned KSE can't change out from under us + * when we hold the scheduler lock. + */ + if (THR_IS_ACTIVE(thread)) { + /* Thread is active. Can't install the signal for it. */ + /* Make a note in the thread that it has a signal. */ + sigaddset(&thread->sigpend, sig); + thread->check_pending = 1; } + else { + /* Make a note in the thread that it has a signal. */ + sigaddset(&thread->sigpend, sig); + thread->check_pending = 1; + + if (thread->blocked != 0) { + /* Tell the kernel to interrupt the thread. */ + kse_thr_interrupt(&thread->tmbx); + } + } + KSE_SCHED_UNLOCK(curkse, thread->kseg); } void -_thread_kern_set_timeout(const struct timespec * timeout) +_thr_set_timeout(const struct timespec *timeout) { struct pthread *curthread = _get_curthread(); - struct timespec current_time; - struct timeval tv; + struct timespec ts; /* Reset the timeout flag for the running thread: */ curthread->timeout = 0; @@ -514,94 +1494,311 @@ _thread_kern_set_timeout(const struct timespec * timeout) curthread->wakeup_time.tv_nsec = -1; } /* Check if no waiting is required: */ - else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) { + else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) { /* Set the wake up time to 'immediately': */ curthread->wakeup_time.tv_sec = 0; curthread->wakeup_time.tv_nsec = 0; } else { - /* Get the current time: */ - GET_CURRENT_TOD(tv); - TIMEVAL_TO_TIMESPEC(&tv, ¤t_time); - - /* Calculate the time for the current thread to wake up: */ - curthread->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec; - curthread->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec; - - /* Check if the nanosecond field needs to wrap: */ - if (curthread->wakeup_time.tv_nsec >= 1000000000) { - /* Wrap the nanosecond field: */ - curthread->wakeup_time.tv_sec += 1; - curthread->wakeup_time.tv_nsec -= 1000000000; - } + /* Calculate the time for the current thread to wakeup: */ + KSE_GET_TOD(curthread->kse, &ts); + TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout); } } void -_thread_kern_sig_defer(void) +_thr_panic_exit(char *file, int line, char *msg) { - struct pthread *curthread = _get_curthread(); + char buf[256]; - /* Allow signal deferral to be recursive. */ - curthread->sig_defer_count++; + snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg); + __sys_write(2, buf, strlen(buf)); + abort(); } void -_thread_kern_sig_undefer(void) +_thr_setrunnable(struct pthread *curthread, struct pthread *thread) { - struct pthread *curthread = _get_curthread(); + kse_critical_t crit; + + crit = _kse_critical_enter(); + KSE_SCHED_LOCK(curthread->kse, thread->kseg); + _thr_setrunnable_unlocked(thread); + KSE_SCHED_UNLOCK(curthread->kse, thread->kseg); + _kse_critical_leave(crit); +} + +void +_thr_setrunnable_unlocked(struct pthread *thread) +{ + if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0) + /* No silly queues for these threads. */ + THR_SET_STATE(thread, PS_RUNNING); + else { + if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0) + KSE_WAITQ_REMOVE(thread->kse, thread); + THR_SET_STATE(thread, PS_RUNNING); + if ((thread->blocked == 0) && + (thread->flags & THR_FLAGS_IN_RUNQ) == 0) + THR_RUNQ_INSERT_TAIL(thread); + } + /* + * XXX - Threads are not yet assigned to specific KSEs; they are + * assigned to the KSEG. So the fact that a thread's KSE is + * waiting doesn't necessarily mean that it will be the KSE + * that runs the thread after the lock is granted. But we + * don't know if the other KSEs within the same KSEG are + * also in a waiting state or not so we err on the side of + * caution and wakeup the thread's last known KSE. We + * ensure that the threads KSE doesn't change while it's + * scheduling lock is held so it is safe to reference it + * (the KSE). If the KSE wakes up and doesn't find any more + * work it will again go back to waiting so no harm is done. + */ + if (KSE_WAITING(thread->kse)) + KSE_WAKEUP(thread->kse); +} + +struct pthread * +_get_curthread(void) +{ + return (_ksd_curthread); +} + +/* This assumes the caller has disabled upcalls. */ +struct kse * +_get_curkse(void) +{ + return (_ksd_curkse); +} + +void +_set_curkse(struct kse *kse) +{ + _ksd_setprivate(&kse->k_ksd); +} + +/* + * Allocate a new KSEG. + * + * We allow the current KSE (curkse) to be NULL in the case that this + * is the first time a KSEG is being created (library initialization). + * In this case, we don't need to (and can't) take any locks. + */ +struct kse_group * +_kseg_alloc(struct kse *curkse) +{ + struct kse_group *kseg = NULL; + + if ((curkse != NULL) && (free_kseg_count > 0)) { + /* Use the kse lock for the kseg queue. */ + KSE_LOCK_ACQUIRE(curkse, &kse_lock); + if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) { + TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe); + free_kseg_count--; + active_kseg_count++; + TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe); + } + KSE_LOCK_RELEASE(curkse, &kse_lock); + } /* - * Perform checks to yield only if we are about to undefer - * signals. + * If requested, attempt to allocate a new KSE group only if the + * KSE allocation was successful and a KSE group wasn't found in + * the free list. */ - if (curthread->sig_defer_count > 1) { - /* Decrement the signal deferral count. */ - curthread->sig_defer_count--; + if ((kseg == NULL) && + ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) { + THR_ASSERT(_pq_alloc(&kseg->kg_schedq.sq_runq, + THR_MIN_PRIORITY, THR_LAST_PRIORITY) == 0, + "Unable to allocate priority queue."); + kseg_init(kseg); + if (curkse != NULL) + KSE_LOCK_ACQUIRE(curkse, &kse_lock); + kseg_free(kseg); + if (curkse != NULL) + KSE_LOCK_RELEASE(curkse, &kse_lock); + } + return (kseg); +} + +/* + * This must be called with the kse lock held and when there are + * no more threads that reference it. + */ +static void +kseg_free(struct kse_group *kseg) +{ + TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe); + kseg_init(kseg); + free_kseg_count++; + active_kseg_count--; +} + +/* + * Allocate a new KSE. + * + * We allow the current KSE (curkse) to be NULL in the case that this + * is the first time a KSE is being created (library initialization). + * In this case, we don't need to (and can't) take any locks. + */ +struct kse * +_kse_alloc(struct kse *curkse) +{ + struct kse *kse = NULL; + int need_ksd = 0; + int i; + + if ((curkse != NULL) && (free_kse_count > 0)) { + KSE_LOCK_ACQUIRE(curkse, &kse_lock); + /* Search for a finished KSE. */ + kse = TAILQ_FIRST(&free_kseq); +#define KEMBX_DONE 0x01 + while ((kse != NULL) && + ((kse->k_mbx.km_flags & KEMBX_DONE) == 0)) { + kse = TAILQ_NEXT(kse, k_qe); + } +#undef KEMBX_DONE + if (kse != NULL) { + TAILQ_REMOVE(&free_kseq, kse, k_qe); + free_kse_count--; + active_kse_count++; + TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe); + } + KSE_LOCK_RELEASE(curkse, &kse_lock); } - else if (curthread->sig_defer_count == 1) { - /* Reenable signals: */ - curthread->sig_defer_count = 0; + if ((kse == NULL) && + ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) { + bzero(kse, sizeof(*kse)); + + /* Initialize the lockusers. */ + for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) { + _lockuser_init(&kse->k_lockusers[i], (void *)kse); + _LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL); + } + + /* We had to malloc a kse; mark it as needing a new ID.*/ + need_ksd = 1; /* - * Check for asynchronous cancellation before delivering any - * pending signals: + * Create the KSE context. + * + * XXX - For now this is done here in the allocation. + * In the future, we may want to have it done + * outside the allocation so that scope system + * threads (one thread per KSE) are not required + * to have a stack for an unneeded kse upcall. */ - if (((curthread->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) && - ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0)) - pthread_testcancel(); + kse->k_mbx.km_func = kse_entry; + kse->k_mbx.km_stack.ss_sp = (char *)malloc(KSE_STACKSIZE); + kse->k_mbx.km_stack.ss_size = KSE_STACKSIZE; + kse->k_mbx.km_udata = (void *)kse; + kse->k_mbx.km_quantum = 20000; + if (kse->k_mbx.km_stack.ss_size == NULL) { + free(kse); + kse = NULL; + } } + if ((kse != NULL) && (need_ksd != 0)) { + /* This KSE needs initialization. */ + if (curkse != NULL) + KSE_LOCK_ACQUIRE(curkse, &kse_lock); + /* Initialize KSD inside of the lock. */ + if (_ksd_create(&kse->k_ksd, (void *)kse, sizeof(*kse)) != 0) { + if (curkse != NULL) + KSE_LOCK_RELEASE(curkse, &kse_lock); + free(kse->k_mbx.km_stack.ss_sp); + for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) { + _lockuser_destroy(&kse->k_lockusers[i]); + } + free(kse); + return (NULL); + } + kse->k_flags = 0; + active_kse_count++; + TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe); + if (curkse != NULL) + KSE_LOCK_RELEASE(curkse, &kse_lock); + + } + return (kse); } -static inline void -thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in) +void +_kse_free(struct kse *curkse, struct kse *kse) { - pthread_t tid_out = thread_out; - pthread_t tid_in = thread_in; - - if ((tid_out != NULL) && - (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0) - tid_out = NULL; - if ((tid_in != NULL) && - (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0) - tid_in = NULL; - - if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) { - /* Run the scheduler switch hook: */ - _sched_switch_hook(tid_out, tid_in); + struct kse_group *kseg = NULL; + + if (curkse == kse) + PANIC("KSE trying to free itself"); + KSE_LOCK_ACQUIRE(curkse, &kse_lock); + active_kse_count--; + if ((kseg = kse->k_kseg) != NULL) { + TAILQ_REMOVE(&kseg->kg_kseq, kse, k_qe); + /* + * Free the KSEG if there are no more threads associated + * with it. + */ + if (TAILQ_EMPTY(&kseg->kg_threadq)) + kseg_free(kseg); } + kse->k_kseg = NULL; + kse->k_flags &= ~KF_INITIALIZED; + TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe); + free_kse_count++; + KSE_LOCK_RELEASE(curkse, &kse_lock); } -struct pthread * -_get_curthread(void) +static void +kseg_init(struct kse_group *kseg) { - if (_thread_initial == NULL) - _thread_init(); + TAILQ_INIT(&kseg->kg_kseq); + TAILQ_INIT(&kseg->kg_threadq); + TAILQ_INIT(&kseg->kg_schedq.sq_waitq); + TAILQ_INIT(&kseg->kg_schedq.sq_blockedq); + _lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait, + _kse_lock_wakeup); + kseg->kg_threadcount = 0; + kseg->kg_idle_kses = 0; + kseg->kg_flags = 0; +} - return (_thread_run); +struct pthread * +_thr_alloc(struct pthread *curthread) +{ + kse_critical_t crit; + struct pthread *thread = NULL; + + if (curthread != NULL) { + if (_gc_check != 0) + thread_gc(curthread); + if (free_thread_count > 0) { + crit = _kse_critical_enter(); + KSE_LOCK_ACQUIRE(curkse, &thread_lock); + if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) { + TAILQ_REMOVE(&free_threadq, thread, tle); + free_thread_count--; + } + KSE_LOCK_RELEASE(curkse, &thread_lock); + } + } + if (thread == NULL) + thread = (struct pthread *)malloc(sizeof(struct pthread)); + return (thread); } void -_set_curthread(struct pthread *newthread) +_thr_free(struct pthread *curthread, struct pthread *thread) { - _thread_run = newthread; + kse_critical_t crit; + + if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) + free(thread); + else { + crit = _kse_critical_enter(); + KSE_LOCK_ACQUIRE(curkse, &thread_lock); + TAILQ_INSERT_HEAD(&free_threadq, thread, tle); + free_thread_count++; + KSE_LOCK_RELEASE(curkse, &thread_lock); + _kse_critical_leave(crit); + } } |