From f066519e91e2290cb79ef12fe7c958ee462cda6c Mon Sep 17 00:00:00 2001 From: davidxu Date: Sat, 2 Apr 2005 01:20:00 +0000 Subject: Import my recent 1:1 threading working. some features improved includes: 1. fast simple type mutex. 2. __thread tls works. 3. asynchronous cancellation works ( using signal ). 4. thread synchronization is fully based on umtx, mainly, condition variable and other synchronization objects were rewritten by using umtx directly. those objects can be shared between processes via shared memory, it has to change ABI which does not happen yet. 5. default stack size is increased to 1M on 32 bits platform, 2M for 64 bits platform. As the result, some mysql super-smack benchmarks show performance is improved massivly. Okayed by: jeff, mtm, rwatson, scottl --- lib/libthr/thread/thr_stack.c | 230 +++++++++++++++++++++++------------------- 1 file changed, 125 insertions(+), 105 deletions(-) (limited to 'lib/libthr/thread/thr_stack.c') diff --git a/lib/libthr/thread/thr_stack.c b/lib/libthr/thread/thr_stack.c index bd82157..3c5503f 100644 --- a/lib/libthr/thread/thr_stack.c +++ b/lib/libthr/thread/thr_stack.c @@ -26,12 +26,13 @@ * * $FreeBSD$ */ + #include #include -#include #include #include #include + #include "thr_private.h" /* Spare thread stack. */ @@ -43,31 +44,32 @@ struct stack { }; /* - * Default sized (stack and guard) spare stack queue. Stacks are cached to - * avoid additional complexity managing mmap()ed stack regions. Spare stacks - * are used in LIFO order to increase cache locality. + * Default sized (stack and guard) spare stack queue. Stacks are cached + * to avoid additional complexity managing mmap()ed stack regions. Spare + * stacks are used in LIFO order to increase cache locality. */ -static LIST_HEAD(, stack) _dstackq = LIST_HEAD_INITIALIZER(_dstackq); +static LIST_HEAD(, stack) dstackq = LIST_HEAD_INITIALIZER(dstackq); /* * Miscellaneous sized (non-default stack and/or guard) spare stack queue. - * Stacks are cached to avoid additional complexity managing mmap()ed stack - * regions. This list is unordered, since ordering on both stack size and guard - * size would be more trouble than it's worth. Stacks are allocated from this - * cache on a first size match basis. + * Stacks are cached to avoid additional complexity managing mmap()ed + * stack regions. This list is unordered, since ordering on both stack + * size and guard size would be more trouble than it's worth. Stacks are + * allocated from this cache on a first size match basis. */ -static LIST_HEAD(, stack) _mstackq = LIST_HEAD_INITIALIZER(_mstackq); +static LIST_HEAD(, stack) mstackq = LIST_HEAD_INITIALIZER(mstackq); /** - * Base address of the last stack allocated (including its red zone, if there is - * one). Stacks are allocated contiguously, starting beyond the top of the main - * stack. When a new stack is created, a red zone is typically created - * (actually, the red zone is simply left unmapped) above the top of the stack, - * such that the stack will not be able to grow all the way to the bottom of the - * next stack. This isn't fool-proof. It is possible for a stack to grow by a - * large amount, such that it grows into the next stack, and as long as the - * memory within the red zone is never accessed, nothing will prevent one thread - * stack from trouncing all over the next. + * Base address of the last stack allocated (including its red zone, if + * there is one). Stacks are allocated contiguously, starting beyond the + * top of the main stack. When a new stack is created, a red zone is + * typically created (actually, the red zone is mapped with PROT_NONE) above + * the top of the stack, such that the stack will not be able to grow all + * the way to the bottom of the next stack. This isn't fool-proof. It is + * possible for a stack to grow by a large amount, such that it grows into + * the next stack, and as long as the memory within the red zone is never + * accessed, nothing will prevent one thread stack from trouncing all over + * the next. * * low memory * . . . . . . . . . . . . . . . . . . @@ -78,7 +80,7 @@ static LIST_HEAD(, stack) _mstackq = LIST_HEAD_INITIALIZER(_mstackq); * | Red Zone (guard page) | red zone for 2nd thread * | | * +-----------------------------------+ - * | stack 2 - _pthread_stack_default | top of 2nd thread stack + * | stack 2 - _thr_stack_default | top of 2nd thread stack * | | * | | * | | @@ -89,7 +91,7 @@ static LIST_HEAD(, stack) _mstackq = LIST_HEAD_INITIALIZER(_mstackq); * | Red Zone | red zone for 1st thread * | | * +-----------------------------------+ - * | stack 1 - _pthread_stack_default | top of 1st thread stack + * | stack 1 - _thr_stack_default | top of 1st thread stack * | | * | | * | | @@ -100,7 +102,7 @@ static LIST_HEAD(, stack) _mstackq = LIST_HEAD_INITIALIZER(_mstackq); * | Red Zone | * | | red zone for main thread * +-----------------------------------+ - * | USRSTACK - _pthread_stack_initial | top of main thread stack + * | USRSTACK - _thr_stack_initial | top of main thread stack * | | ^ * | | | * | | | @@ -111,48 +113,59 @@ static LIST_HEAD(, stack) _mstackq = LIST_HEAD_INITIALIZER(_mstackq); * high memory * */ -static void * last_stack; +static void *last_stack = NULL; + +/* + * Round size up to the nearest multiple of + * _thr_page_size. + */ +static inline size_t +round_up(size_t size) +{ + if (size % _thr_page_size != 0) + size = ((size / _thr_page_size) + 1) * + _thr_page_size; + return size; +} -void * -_thread_stack_alloc(size_t stacksize, size_t guardsize) +int +_thr_stack_alloc(struct pthread_attr *attr) { - void *stack = NULL; - struct stack *spare_stack; - size_t stack_size; + struct pthread *curthread = _get_curthread(); + struct stack *spare_stack; + size_t stacksize; + size_t guardsize; + char *stackaddr; /* - * Round up stack size to nearest multiple of _pthread_page_size, - * so that mmap() * will work. If the stack size is not an even - * multiple, we end up initializing things such that there is unused - * space above the beginning of the stack, so the stack sits snugly - * against its guard. + * Round up stack size to nearest multiple of _thr_page_size so + * that mmap() * will work. If the stack size is not an even + * multiple, we end up initializing things such that there is + * unused space above the beginning of the stack, so the stack + * sits snugly against its guard. */ - if (stacksize % _pthread_page_size != 0) - stack_size = ((stacksize / _pthread_page_size) + 1) * - _pthread_page_size; - else - stack_size = stacksize; + stacksize = round_up(attr->stacksize_attr); + guardsize = round_up(attr->guardsize_attr); + + attr->stackaddr_attr = NULL; + attr->flags &= ~THR_STACK_USER; /* + * Use the garbage collector lock for synchronization of the + * spare stack lists and allocations from usrstack. + */ + THREAD_LIST_LOCK(curthread); + /* * If the stack and guard sizes are default, try to allocate a stack * from the default-size stack cache: */ - if (stack_size == _pthread_stack_default && - guardsize == _pthread_guard_default) { - /* - * Use the garbage collector mutex for synchronization of the - * spare stack list. - */ - STACK_LOCK; - - if ((spare_stack = LIST_FIRST(&_dstackq)) != NULL) { - /* Use the spare stack. */ + if ((stacksize == THR_STACK_DEFAULT) && + (guardsize == _thr_guard_default)) { + if ((spare_stack = LIST_FIRST(&dstackq)) != NULL) { + /* Use the spare stack. */ LIST_REMOVE(spare_stack, qe); - stack = spare_stack->stackaddr; + attr->stackaddr_attr = spare_stack->stackaddr; } - - /* Unlock the garbage collector mutex. */ - STACK_UNLOCK; } /* * The user specified a non-default stack and/or guard size, so try to @@ -160,76 +173,83 @@ _thread_stack_alloc(size_t stacksize, size_t guardsize) * rounded up stack size (stack_size) in the search: */ else { - /* - * Use the garbage collector mutex for synchronization of the - * spare stack list. - */ - STACK_LOCK; - - LIST_FOREACH(spare_stack, &_mstackq, qe) { - if (spare_stack->stacksize == stack_size && + LIST_FOREACH(spare_stack, &mstackq, qe) { + if (spare_stack->stacksize == stacksize && spare_stack->guardsize == guardsize) { LIST_REMOVE(spare_stack, qe); - stack = spare_stack->stackaddr; + attr->stackaddr_attr = spare_stack->stackaddr; break; } } - - /* Unlock the garbage collector mutex. */ - STACK_UNLOCK; } - - /* Check if a stack was not allocated from a stack cache: */ - if (stack == NULL) { - + if (attr->stackaddr_attr != NULL) { + /* A cached stack was found. Release the lock. */ + THREAD_LIST_UNLOCK(curthread); + } + else { + /* Allocate a stack from usrstack. */ if (last_stack == NULL) - last_stack = _usrstack - _pthread_stack_initial - - _pthread_guard_default; + last_stack = _usrstack - _thr_stack_initial - + _thr_guard_default; /* Allocate a new stack. */ - stack = last_stack - stack_size; + stackaddr = last_stack - stacksize - guardsize; /* - * Even if stack allocation fails, we don't want to try to use - * this location again, so unconditionally decrement + * Even if stack allocation fails, we don't want to try to + * use this location again, so unconditionally decrement * last_stack. Under normal operating conditions, the most - * likely reason for an mmap() error is a stack overflow of the - * adjacent thread stack. + * likely reason for an mmap() error is a stack overflow of + * the adjacent thread stack. */ - last_stack -= (stack_size + guardsize); - - /* Stack: */ - if (mmap(stack, stack_size, PROT_READ | PROT_WRITE, MAP_STACK, - -1, 0) == MAP_FAILED) - stack = NULL; + last_stack -= (stacksize + guardsize); + + /* Release the lock before mmap'ing it. */ + THREAD_LIST_UNLOCK(curthread); + + /* Map the stack and guard page together, and split guard + page from allocated space: */ + if ((stackaddr = mmap(stackaddr, stacksize+guardsize, + PROT_READ | PROT_WRITE, MAP_STACK, + -1, 0)) != MAP_FAILED && + (guardsize == 0 || + mprotect(stackaddr, guardsize, PROT_NONE) == 0)) { + stackaddr += guardsize; + } else { + if (stackaddr != MAP_FAILED) + munmap(stackaddr, stacksize + guardsize); + stackaddr = NULL; + } + attr->stackaddr_attr = stackaddr; } - - return (stack); + if (attr->stackaddr_attr != NULL) + return (0); + else + return (-1); } -/* This function must be called with the 'dead thread list' lock held. */ +/* This function must be called with _thread_list_lock held. */ void -_thread_stack_free(void *stack, size_t stacksize, size_t guardsize) +_thr_stack_free(struct pthread_attr *attr) { - struct stack *spare_stack; - - spare_stack = (stack + stacksize - sizeof(struct stack)); - /* Round stacksize up to nearest multiple of _pthread_page_size. */ - if (stacksize % _pthread_page_size != 0) { - spare_stack->stacksize = - ((stacksize / _pthread_page_size) + 1) * - _pthread_page_size; - } else - spare_stack->stacksize = stacksize; - spare_stack->guardsize = guardsize; - spare_stack->stackaddr = stack; - - if (spare_stack->stacksize == _pthread_stack_default && - spare_stack->guardsize == _pthread_guard_default) { - /* Default stack/guard size. */ - LIST_INSERT_HEAD(&_dstackq, spare_stack, qe); - } else { - /* Non-default stack/guard size. */ - LIST_INSERT_HEAD(&_mstackq, spare_stack, qe); + struct stack *spare_stack; + + if ((attr != NULL) && ((attr->flags & THR_STACK_USER) == 0) + && (attr->stackaddr_attr != NULL)) { + spare_stack = (attr->stackaddr_attr + attr->stacksize_attr + - sizeof(struct stack)); + spare_stack->stacksize = round_up(attr->stacksize_attr); + spare_stack->guardsize = round_up(attr->guardsize_attr); + spare_stack->stackaddr = attr->stackaddr_attr; + + if (spare_stack->stacksize == THR_STACK_DEFAULT && + spare_stack->guardsize == _thr_guard_default) { + /* Default stack/guard size. */ + LIST_INSERT_HEAD(&dstackq, spare_stack, qe); + } else { + /* Non-default stack/guard size. */ + LIST_INSERT_HEAD(&mstackq, spare_stack, qe); + } + attr->stackaddr_attr = NULL; } } -- cgit v1.1