diff options
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch')
-rw-r--r-- | meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch | 2798 |
1 files changed, 0 insertions, 2798 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch b/meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch deleted file mode 100644 index 54c855f..0000000 --- a/meta/recipes-devtools/gcc/gcc-4.5.1/fedora/gcc43-libgomp-speedup.patch +++ /dev/null @@ -1,2798 +0,0 @@ -Upstream-Status: Inappropriate [distribution: fedora] -2008-03-28 Jakub Jelinek <jakub@redhat.com> - - * config/linux/sparc/futex.h (atomic_write_barrier): Fix membar - argument. - -2008-03-27 Jakub Jelinek <jakub@redhat.com> - - * libgomp.h (struct gomp_team_state): Remove single_count field - ifndef HAVE_SYNC_BUILTINS. - (struct gomp_team): Likewise. Add work_share_list_free_lock - ifndef HAVE_SYNC_BUILTINS. - * team.c (gomp_new_team): If HAVE_SYNC_BUILTINS is not defined, - don't initialize single_count, but instead initialize - work_share_list_free_lock. - (free_team): Destroy work_share_list_free_lock ifndef - HAVE_SYNC_BUILTINS. - (gomp_team_start): Don't initialize ts.single_count ifndef - HAVE_SYNC_BUILTINS. - * work.c (alloc_work_share, free_work_share): Use - work_share_list_free_lock instead of atomic chaining ifndef - HAVE_SYNC_BUILTINS. - -2008-03-26 Jakub Jelinek <jakub@redhat.com> - - * loop.c (gomp_loop_init): Fix GFS_DYNAMIC ws->mode setting. - * testsuite/libgomp.c/loop-4.c: New test. - - * libgomp.h (struct gomp_team_state): Add single_count field. - (struct gomp_team): Likewise. - * team.c (gomp_new_team): Clear single_count. - (gomp_team_start): Likewise. - * single.c (GOMP_single_start): Rewritten if HAVE_SYNC_BUILTINS. - -2008-03-25 Jakub Jelinek <jakub@redhat.com> - - * team.c (gomp_thread_start): Don't clear ts.static_trip here. - * loop.c (gomp_loop_static_start, gomp_loop_dynamic_start): Clear - ts.static_trip here. - * work.c (gomp_work_share_start): Don't clear ts.static_trip here. - -2008-03-21 Jakub Jelinek <jakub@redhat.com> - - * libgomp.h: Include ptrlock.h. - (struct gomp_work_share): Reshuffle fields. Add next_alloc, - next_ws, next_free and inline_ordered_team_ids fields, change - ordered_team_ids into pointer from flexible array member. - (struct gomp_team_state): Add last_work_share field, remove - work_share_generation. - (struct gomp_team): Remove work_share_lock, generation_mask, - oldest_live_gen, num_live_gen and init_work_shares fields, add - work work_share_list_alloc, work_share_list_free and work_share_chunk - fields. Change work_shares from pointer to pointers into an array. - (gomp_new_team): New prototype. - (gomp_team_start): Change type of last argument. - (gomp_new_work_share): Removed. - (gomp_init_work_share, gomp_fini_work_share): New prototypes. - (gomp_work_share_init_done): New static inline. - * team.c (gomp_thread_start): Clear ts.last_work_share, don't clear - ts.work_share_generation. - (new_team): Removed. - (gomp_new_team): New function. - (free_team): Free gomp_work_share blocks chained through next_alloc, - instead of freeing work_shares and destroying work_share_lock. - (gomp_team_start): Change last argument from ws to team, don't create - new team, set ts.work_share to &team->work_shares[0] and clear - ts.last_work_share. Don't clear ts.work_share_generation. - (gomp_team_end): Call gomp_fini_work_share. - * work.c (gomp_new_work_share): Removed. - (alloc_work_share, gomp_init_work_share, gomp_fini_work_share): New - functions. - (free_work_share): Add team argument. Call gomp_fini_work_share - and then either free ws if orphaned, or put it into - work_share_list_free list of the current team. - (gomp_work_share_start, gomp_work_share_end, - gomp_work_share_end_nowait): Rewritten. - * sections.c (GOMP_sections_start): Call gomp_work_share_init_done - after gomp_sections_init. If HAVE_SYNC_BUILTINS, call - gomp_iter_dynamic_next instead of the _locked variant and don't take - lock around it, otherwise acquire it before calling - gomp_iter_dynamic_next_locked. - (GOMP_sections_next): If HAVE_SYNC_BUILTINS, call - gomp_iter_dynamic_next instead of the _locked variant and don't take - lock around it. - (GOMP_parallel_sections_start): Call gomp_new_team instead of - gomp_new_work_share. Call gomp_sections_init on &team->work_shares[0]. - Adjust gomp_team_start caller. - * loop.c (gomp_loop_static_start, gomp_loop_ordered_static_start): Call - gomp_work_share_init_done after gomp_loop_init. Don't unlock ws->lock. - (gomp_loop_dynamic_start, gomp_loop_guided_start): Call - gomp_work_share_init_done after gomp_loop_init. If HAVE_SYNC_BUILTINS, - don't unlock ws->lock, otherwise lock it. - (gomp_loop_ordered_dynamic_start, gomp_loop_ordered_guided_start): Call - gomp_work_share_init_done after gomp_loop_init. Lock ws->lock. - (gomp_parallel_loop_start): Call gomp_new_team instead of - gomp_new_work_share. Call gomp_loop_init on &team->work_shares[0]. - Adjust gomp_team_start caller. - * single.c (GOMP_single_start, GOMP_single_copy_start): Call - gomp_work_share_init_done if gomp_work_share_start returned true. - Don't unlock ws->lock. - * parallel.c (GOMP_parallel_start): Call gomp_new_team and pass that - as last argument to gomp_team_start. - * config/linux/ptrlock.c: New file. - * config/linux/ptrlock.h: New file. - * config/posix/ptrlock.c: New file. - * config/posix/ptrlock.h: New file. - * Makefile.am (libgomp_la_SOURCES): Add ptrlock.c. - * Makefile.in: Regenerated. - * testsuite/Makefile.in: Regenerated. - -2008-03-19 Jakub Jelinek <jakub@redhat.com> - - * libgomp.h (gomp_active_wait_policy): Remove decl. - (gomp_throttled_spin_count_var, gomp_available_cpus, - gomp_managed_threads): New extern decls. - * team.c (gomp_team_start, gomp_team_end): If number of threads - changed, adjust atomically gomp_managed_threads. - * env.c (gomp_active_wait_policy, gomp_block_time_var): Remove. - (gomp_throttled_spin_count_var, gomp_available_cpus, - gomp_managed_threads): New variables. - (parse_millis): Removed. - (parse_spincount): New function. - (parse_wait_policy): Return -1/0/1 instead of setting - gomp_active_wait_policy. - (initialize_env): Call gomp_init_num_threads unconditionally. - Initialize gomp_available_cpus. Call parse_spincount instead - of parse_millis, initialize gomp_{,throttled_}spin_count_var - depending on presence and value of OMP_WAIT_POLICY and - GOMP_SPINCOUNT env vars. - * config/linux/wait.h (do_wait): Use gomp_throttled_spin_count_var - instead of gomp_spin_count_var if gomp_managed_threads > - gomp_available_cpus. - - * config/linux/wait.h: Include errno.h. - (FUTEX_WAIT, FUTEX_WAKE, FUTEX_PRIVATE_FLAG): Define. - (gomp_futex_wake, gomp_futex_wait): New extern decls. - * config/linux/mutex.c (gomp_futex_wake, gomp_futex_wait): New - variables. - * config/linux/powerpc/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. - (sys_futex0): Return error code. - (futex_wake, futex_wait): If ENOSYS was returned, clear - FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. - * config/linux/alpha/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. - (futex_wake, futex_wait): If ENOSYS was returned, clear - FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. - * config/linux/x86/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. - (sys_futex0): Return error code. - (futex_wake, futex_wait): If ENOSYS was returned, clear - FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. - * config/linux/s390/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. - (sys_futex0): Return error code. - (futex_wake, futex_wait): If ENOSYS was returned, clear - FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. - * config/linux/ia64/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. - (sys_futex0): Return error code. - (futex_wake, futex_wait): If ENOSYS was returned, clear - FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. - * config/linux/sparc/futex.h (FUTEX_WAIT, FUTEX_WAKE): Remove. - (sys_futex0): Return error code. - (futex_wake, futex_wait): If ENOSYS was returned, clear - FUTEX_PRIVATE_FLAG in gomp_futex_wa{ke,it} and retry. - -2008-03-18 Jakub Jelinek <jakub@redhat.com> - - * libgomp.h (struct gomp_work_share): Add mode field. Put lock and - next into a different cache line from most of the write-once fields. - * loop.c: Include limits.h. - (gomp_loop_init): For GFS_DYNAMIC, multiply ws->chunk_size by incr. - If adding ws->chunk_size nthreads + 1 times after end won't - overflow, set ws->mode to 1. - * iter.c (gomp_iter_dynamic_next_locked): Don't multiply - ws->chunk_size by incr. - (gomp_iter_dynamic_next): Likewise. If ws->mode, use more efficient - code. - * work.c: Include stddef.h. - (gomp_new_work_share): Use offsetof rather than sizeof. - -2008-03-17 Jakub Jelinek <jakub@redhat.com> - - * libgomp.h (struct gomp_team): Change ordered_release field - into gomp_sem_t ** from flexible array member. Add implicit_task - and initial_work_shares fields. - (gomp_new_task): Removed. - (gomp_init_task): New prototype. - * team.c (new_team): Allocate implicit_task for each thread - and initial work_shares together with gomp_team allocation. - (free_team): Only free work_shares if it is not init_work_shares. - (gomp_team_start): Use gomp_init_task instead of gomp_new_task, - set thr->task to the corresponding implicit_task array entry. - * task.c (gomp_new_task): Removed. - (gomp_init_task): New function. - (gomp_end_task): Don't free the task. - (GOMP_task): Allocate struct gomp_task on the stack, call - gomp_init_task rather than gomp_new_task. - * work.c (gomp_work_share_start): If work_shares == - init_work_shares, gomp_malloc + memcpy rather than gomp_realloc. - -2008-03-15 Jakub Jelinek <jakub@redhat.com> - Ulrich Drepper <drepper@redhat.com> - - * config/linux/bar.h (gomp_barrier_state_t): Rewritten. - (gomp_barrier_state_t): Change to unsigned int. - (gomp_barrier_init, gomp_barrier_reinit, gomp_barrier_destroy, - gomp_barrier_wait_start, gomp_barrier_last_thread): Rewritten. - (gomp_barrier_wait_last): Prototype rather than inline. - * config/linux/bar.c (gomp_barrier_wait_end): Rewritten. - (gomp_barrier_wait_last): New function. - -2008-03-15 Jakub Jelinek <jakub@redhat.com> - - * team.c (gomp_thread_start): Use gomp_barrier_wait_last instead - of gomp_barrier_wait. - * env.c (gomp_block_time_var, gomp_spin_count_var): New variables. - (parse_millis): New function. - (initialize_env): Handle GOMP_BLOCKTIME env var. - * libgomp.h (struct gomp_team): Move close to the end of the struct. - (gomp_spin_count_var): New extern var decl. - * work.c (gomp_work_share_end): Use gomp_barrier_state_t bstate - var instead of bool last, call gomp_barrier_last_thread to check - for last thread, pass bstate to gomp_barrier_wait_end. - * config/linux/wait.h: New file. - * config/linux/mutex.c: Include wait.h instead of libgomp.h and - futex.h. - (gomp_mutex_lock_slow): Call do_wait instead of futex_wait. - * config/linux/bar.c: Include wait.h instead of libgomp.h and - futex.h. - (gomp_barrier_wait_end): Change second argument to - gomp_barrier_state_t. Call do_wait instead of futex_wait. - * config/linux/sem.c: Include wait.h instead of libgomp.h and - futex.h. - (gomp_sem_wait_slow): Call do_wait instead of futex_wait. - * config/linux/lock.c: Include wait.h instead of libgomp.h and - futex.h. - (gomp_set_nest_lock_25): Call do_wait instead of futex_wait. - * config/linux/affinity.c: Assume HAVE_SYNC_BUILTINS. - * config/linux/bar.h (gomp_barrier_state_t): New typedef. - (gomp_barrier_wait_end): Change second argument to - gomp_barrier_state_t. - (gomp_barrier_wait_start): Return gomp_barrier_state_t. - (gomp_barrier_last_thread, gomp_barrier_wait_last): New static - inlines. - * config/linux/powerpc/futex.h (cpu_relax, atomic_write_barrier): New - static inlines. - * config/linux/alpha/futex.h (cpu_relax, atomic_write_barrier): - Likewise. - * config/linux/x86/futex.h (cpu_relax, atomic_write_barrier): - Likewise. - * config/linux/s390/futex.h (cpu_relax, atomic_write_barrier): - Likewise. - * config/linux/ia64/futex.h (cpu_relax, atomic_write_barrier): - Likewise. - * config/linux/sparc/futex.h (cpu_relax, atomic_write_barrier): - Likewise. - * config/posix/bar.c (gomp_barrier_wait_end): Change second argument - to gomp_barrier_state_t. - * config/posix/bar.h (gomp_barrier_state_t): New typedef. - (gomp_barrier_wait_end): Change second argument to - gomp_barrier_state_t. - (gomp_barrier_wait_start): Return gomp_barrier_state_t. - (gomp_barrier_last_thread, gomp_barrier_wait_last): New static - inlines. - ---- libgomp/parallel.c.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/parallel.c 2008-03-26 15:32:06.000000000 +0100 -@@ -68,7 +68,7 @@ void - GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads) - { - num_threads = gomp_resolve_num_threads (num_threads); -- gomp_team_start (fn, data, num_threads, NULL); -+ gomp_team_start (fn, data, num_threads, gomp_new_team (num_threads)); - } - - void ---- libgomp/sections.c.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/sections.c 2008-03-26 15:33:06.000000000 +0100 -@@ -59,14 +59,24 @@ GOMP_sections_start (unsigned count) - long s, e, ret; - - if (gomp_work_share_start (false)) -- gomp_sections_init (thr->ts.work_share, count); -+ { -+ gomp_sections_init (thr->ts.work_share, count); -+ gomp_work_share_init_done (); -+ } - -+#ifdef HAVE_SYNC_BUILTINS -+ if (gomp_iter_dynamic_next (&s, &e)) -+ ret = s; -+ else -+ ret = 0; -+#else -+ gomp_mutex_lock (&thr->ts.work_share->lock); - if (gomp_iter_dynamic_next_locked (&s, &e)) - ret = s; - else - ret = 0; -- - gomp_mutex_unlock (&thr->ts.work_share->lock); -+#endif - - return ret; - } -@@ -83,15 +93,23 @@ GOMP_sections_start (unsigned count) - unsigned - GOMP_sections_next (void) - { -- struct gomp_thread *thr = gomp_thread (); - long s, e, ret; - -+#ifdef HAVE_SYNC_BUILTINS -+ if (gomp_iter_dynamic_next (&s, &e)) -+ ret = s; -+ else -+ ret = 0; -+#else -+ struct gomp_thread *thr = gomp_thread (); -+ - gomp_mutex_lock (&thr->ts.work_share->lock); - if (gomp_iter_dynamic_next_locked (&s, &e)) - ret = s; - else - ret = 0; - gomp_mutex_unlock (&thr->ts.work_share->lock); -+#endif - - return ret; - } -@@ -103,15 +121,15 @@ void - GOMP_parallel_sections_start (void (*fn) (void *), void *data, - unsigned num_threads, unsigned count) - { -- struct gomp_work_share *ws; -+ struct gomp_team *team; - - num_threads = gomp_resolve_num_threads (num_threads); - if (gomp_dyn_var && num_threads > count) - num_threads = count; - -- ws = gomp_new_work_share (false, num_threads); -- gomp_sections_init (ws, count); -- gomp_team_start (fn, data, num_threads, ws); -+ team = gomp_new_team (num_threads); -+ gomp_sections_init (&team->work_shares[0], count); -+ gomp_team_start (fn, data, num_threads, team); - } - - /* The GOMP_section_end* routines are called after the thread is told ---- libgomp/env.c.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/env.c 2008-03-26 16:40:26.000000000 +0100 -@@ -44,6 +44,11 @@ enum gomp_schedule_type gomp_run_sched_v - unsigned long gomp_run_sched_chunk = 1; - unsigned short *gomp_cpu_affinity; - size_t gomp_cpu_affinity_len; -+#ifndef HAVE_SYNC_BUILTINS -+gomp_mutex_t gomp_remaining_threads_lock; -+#endif -+unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1; -+unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; - - /* Parse the OMP_SCHEDULE environment variable. */ - -@@ -147,6 +152,79 @@ parse_unsigned_long (const char *name, u - return false; - } - -+/* Parse the GOMP_SPINCOUNT environment varible. Return true if one was -+ present and it was successfully parsed. */ -+ -+static bool -+parse_spincount (const char *name, unsigned long long *pvalue) -+{ -+ char *env, *end; -+ unsigned long long value, mult = 1; -+ -+ env = getenv (name); -+ if (env == NULL) -+ return false; -+ -+ while (isspace ((unsigned char) *env)) -+ ++env; -+ if (*env == '\0') -+ goto invalid; -+ -+ if (strncasecmp (env, "infinite", 8) == 0 -+ || strncasecmp (env, "infinity", 8) == 0) -+ { -+ value = ~0ULL; -+ end = env + 8; -+ goto check_tail; -+ } -+ -+ errno = 0; -+ value = strtoull (env, &end, 10); -+ if (errno) -+ goto invalid; -+ -+ while (isspace ((unsigned char) *end)) -+ ++end; -+ if (*end != '\0') -+ { -+ switch (tolower (*end)) -+ { -+ case 'k': -+ mult = 1000LL; -+ break; -+ case 'm': -+ mult = 1000LL * 1000LL; -+ break; -+ case 'g': -+ mult = 1000LL * 1000LL * 1000LL; -+ break; -+ case 't': -+ mult = 1000LL * 1000LL * 1000LL * 1000LL; -+ break; -+ default: -+ goto invalid; -+ } -+ ++end; -+ check_tail: -+ while (isspace ((unsigned char) *end)) -+ ++end; -+ if (*end != '\0') -+ goto invalid; -+ } -+ -+ if (value > ~0ULL / mult) -+ value = ~0ULL; -+ else -+ value *= mult; -+ -+ *pvalue = value; -+ return true; -+ -+ invalid: -+ gomp_error ("Invalid value for environment variable %s", name); -+ return false; -+} -+ - /* Parse a boolean value for environment variable NAME and store the - result in VALUE. */ - -@@ -281,10 +359,25 @@ initialize_env (void) - parse_schedule (); - parse_boolean ("OMP_DYNAMIC", &gomp_dyn_var); - parse_boolean ("OMP_NESTED", &gomp_nest_var); -+ gomp_init_num_threads (); -+ gomp_available_cpus = gomp_nthreads_var; - if (!parse_unsigned_long ("OMP_NUM_THREADS", &gomp_nthreads_var)) -- gomp_init_num_threads (); -+ gomp_nthreads_var = gomp_available_cpus; - if (parse_affinity ()) - gomp_init_affinity (); -+ if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var)) -+ { -+ /* Using a rough estimation of 100000 spins per msec, -+ use 200 msec blocking. -+ Depending on the CPU speed, this can be e.g. 5 times longer -+ or 5 times shorter. */ -+ gomp_spin_count_var = 20000000LL; -+ } -+ /* gomp_throttled_spin_count_var is used when there are more libgomp -+ managed threads than available CPUs. Use very short spinning. */ -+ gomp_throttled_spin_count_var = 100LL; -+ if (gomp_throttled_spin_count_var > gomp_spin_count_var) -+ gomp_throttled_spin_count_var = gomp_spin_count_var; - - /* Not strictly environment related, but ordering constructors is tricky. */ - pthread_attr_init (&gomp_thread_attr); ---- libgomp/libgomp.h.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/libgomp.h 2008-03-27 12:21:51.000000000 +0100 -@@ -50,6 +50,7 @@ - #include "sem.h" - #include "mutex.h" - #include "bar.h" -+#include "ptrlock.h" - - - /* This structure contains the data to control one work-sharing construct, -@@ -70,6 +71,8 @@ struct gomp_work_share - If this is a SECTIONS construct, this value will always be DYNAMIC. */ - enum gomp_schedule_type sched; - -+ int mode; -+ - /* This is the chunk_size argument to the SCHEDULE clause. */ - long chunk_size; - -@@ -81,17 +84,38 @@ struct gomp_work_share - is always 1. */ - long incr; - -- /* This lock protects the update of the following members. */ -- gomp_mutex_t lock; -+ /* This is a circular queue that details which threads will be allowed -+ into the ordered region and in which order. When a thread allocates -+ iterations on which it is going to work, it also registers itself at -+ the end of the array. When a thread reaches the ordered region, it -+ checks to see if it is the one at the head of the queue. If not, it -+ blocks on its RELEASE semaphore. */ -+ unsigned *ordered_team_ids; - -- union { -- /* This is the next iteration value to be allocated. In the case of -- GFS_STATIC loops, this the iteration start point and never changes. */ -- long next; -+ /* This is the number of threads that have registered themselves in -+ the circular queue ordered_team_ids. */ -+ unsigned ordered_num_used; - -- /* This is the returned data structure for SINGLE COPYPRIVATE. */ -- void *copyprivate; -- }; -+ /* This is the team_id of the currently acknowledged owner of the ordered -+ section, or -1u if the ordered section has not been acknowledged by -+ any thread. This is distinguished from the thread that is *allowed* -+ to take the section next. */ -+ unsigned ordered_owner; -+ -+ /* This is the index into the circular queue ordered_team_ids of the -+ current thread that's allowed into the ordered reason. */ -+ unsigned ordered_cur; -+ -+ /* This is a chain of allocated gomp_work_share blocks, valid only -+ in the first gomp_work_share struct in the block. */ -+ struct gomp_work_share *next_alloc; -+ -+ /* The above fields are written once during workshare initialization, -+ or related to ordered worksharing. Make sure the following fields -+ are in a different cache line. */ -+ -+ /* This lock protects the update of the following members. */ -+ gomp_mutex_t lock __attribute__((aligned (64))); - - /* This is the count of the number of threads that have exited the work - share construct. If the construct was marked nowait, they have moved on -@@ -99,27 +123,28 @@ struct gomp_work_share - of the team to exit the work share construct must deallocate it. */ - unsigned threads_completed; - -- /* This is the index into the circular queue ordered_team_ids of the -- current thread that's allowed into the ordered reason. */ -- unsigned ordered_cur; -+ union { -+ /* This is the next iteration value to be allocated. In the case of -+ GFS_STATIC loops, this the iteration start point and never changes. */ -+ long next; - -- /* This is the number of threads that have registered themselves in -- the circular queue ordered_team_ids. */ -- unsigned ordered_num_used; -+ /* This is the returned data structure for SINGLE COPYPRIVATE. */ -+ void *copyprivate; -+ }; - -- /* This is the team_id of the currently acknoledged owner of the ordered -- section, or -1u if the ordered section has not been acknowledged by -- any thread. This is distinguished from the thread that is *allowed* -- to take the section next. */ -- unsigned ordered_owner; -+ union { -+ /* Link to gomp_work_share struct for next work sharing construct -+ encountered after this one. */ -+ gomp_ptrlock_t next_ws; -+ -+ /* gomp_work_share structs are chained in the free work share cache -+ through this. */ -+ struct gomp_work_share *next_free; -+ }; - -- /* This is a circular queue that details which threads will be allowed -- into the ordered region and in which order. When a thread allocates -- iterations on which it is going to work, it also registers itself at -- the end of the array. When a thread reaches the ordered region, it -- checks to see if it is the one at the head of the queue. If not, it -- blocks on its RELEASE semaphore. */ -- unsigned ordered_team_ids[]; -+ /* If only few threads are in the team, ordered_team_ids can point -+ to this array which fills the padding at the end of this struct. */ -+ unsigned inline_ordered_team_ids[0]; - }; - - /* This structure contains all of the thread-local data associated with -@@ -133,21 +158,24 @@ struct gomp_team_state - - /* This is the work share construct which this thread is currently - processing. Recall that with NOWAIT, not all threads may be -- processing the same construct. This value is NULL when there -- is no construct being processed. */ -+ processing the same construct. */ - struct gomp_work_share *work_share; - -+ /* This is the previous work share construct or NULL if there wasn't any. -+ When all threads are done with the current work sharing construct, -+ the previous one can be freed. The current one can't, as its -+ next_ws field is used. */ -+ struct gomp_work_share *last_work_share; -+ - /* This is the ID of this thread within the team. This value is - guaranteed to be between 0 and N-1, where N is the number of - threads in the team. */ - unsigned team_id; - -- /* The work share "generation" is a number that increases by one for -- each work share construct encountered in the dynamic flow of the -- program. It is used to find the control data for the work share -- when encountering it for the first time. This particular number -- reflects the generation of the work_share member of this struct. */ -- unsigned work_share_generation; -+#ifdef HAVE_SYNC_BUILTINS -+ /* Number of single stmts encountered. */ -+ unsigned long single_count; -+#endif - - /* For GFS_RUNTIME loops that resolved to GFS_STATIC, this is the - trip number through the loop. So first time a particular loop -@@ -163,41 +191,53 @@ struct gomp_team_state - - struct gomp_team - { -- /* This lock protects access to the following work shares data structures. */ -- gomp_mutex_t work_share_lock; -- -- /* This is a dynamically sized array containing pointers to the control -- structs for all "live" work share constructs. Here "live" means that -- the construct has been encountered by at least one thread, and not -- completed by all threads. */ -- struct gomp_work_share **work_shares; -- -- /* The work_shares array is indexed by "generation & generation_mask". -- The mask will be 2**N - 1, where 2**N is the size of the array. */ -- unsigned generation_mask; -- -- /* These two values define the bounds of the elements of the work_shares -- array that are currently in use. */ -- unsigned oldest_live_gen; -- unsigned num_live_gen; -- - /* This is the number of threads in the current team. */ - unsigned nthreads; - -+ /* This is number of gomp_work_share structs that have been allocated -+ as a block last time. */ -+ unsigned work_share_chunk; -+ - /* This is the saved team state that applied to a master thread before - the current thread was created. */ - struct gomp_team_state prev_ts; - -- /* This barrier is used for most synchronization of the team. */ -- gomp_barrier_t barrier; -- - /* This semaphore should be used by the master thread instead of its - "native" semaphore in the thread structure. Required for nested - parallels, as the master is a member of two teams. */ - gomp_sem_t master_release; - -- /* This array contains pointers to the release semaphore of the threads -- in the team. */ -+ /* List of gomp_work_share structs chained through next_free fields. -+ This is populated and taken off only by the first thread in the -+ team encountering a new work sharing construct, in a critical -+ section. */ -+ struct gomp_work_share *work_share_list_alloc; -+ -+ /* List of gomp_work_share structs freed by free_work_share. New -+ entries are atomically added to the start of the list, and -+ alloc_work_share can safely only move all but the first entry -+ to work_share_list alloc, as free_work_share can happen concurrently -+ with alloc_work_share. */ -+ struct gomp_work_share *work_share_list_free; -+ -+#ifdef HAVE_SYNC_BUILTINS -+ /* Number of simple single regions encountered by threads in this -+ team. */ -+ unsigned long single_count; -+#else -+ /* Mutex protecting addition of workshares to work_share_list_free. */ -+ gomp_mutex_t work_share_list_free_lock; -+#endif -+ -+ /* This barrier is used for most synchronization of the team. */ -+ gomp_barrier_t barrier; -+ -+ /* Initial work shares, to avoid allocating any gomp_work_share -+ structs in the common case. */ -+ struct gomp_work_share work_shares[8]; -+ -+ /* This is an array with pointers to the release semaphore -+ of the threads in the team. */ - gomp_sem_t *ordered_release[]; - }; - -@@ -242,6 +282,11 @@ extern bool gomp_dyn_var; - extern bool gomp_nest_var; - extern enum gomp_schedule_type gomp_run_sched_var; - extern unsigned long gomp_run_sched_chunk; -+#ifndef HAVE_SYNC_BUILTINS -+extern gomp_mutex_t gomp_remaining_threads_lock; -+#endif -+extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; -+extern unsigned long gomp_available_cpus, gomp_managed_threads; - - /* The attributes to be used during thread creation. */ - extern pthread_attr_t gomp_thread_attr; -@@ -306,17 +351,27 @@ extern unsigned gomp_dynamic_max_threads - - /* team.c */ - -+extern struct gomp_team *gomp_new_team (unsigned); - extern void gomp_team_start (void (*) (void *), void *, unsigned, -- struct gomp_work_share *); -+ struct gomp_team *); - extern void gomp_team_end (void); - - /* work.c */ - --extern struct gomp_work_share * gomp_new_work_share (bool, unsigned); -+extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned); -+extern void gomp_fini_work_share (struct gomp_work_share *); - extern bool gomp_work_share_start (bool); - extern void gomp_work_share_end (void); - extern void gomp_work_share_end_nowait (void); - -+static inline void -+gomp_work_share_init_done (void) -+{ -+ struct gomp_thread *thr = gomp_thread (); -+ if (__builtin_expect (thr->ts.last_work_share != NULL, 1)) -+ gomp_ptrlock_set (&thr->ts.last_work_share->next_ws, thr->ts.work_share); -+} -+ - #ifdef HAVE_ATTRIBUTE_VISIBILITY - # pragma GCC visibility pop - #endif ---- libgomp/iter.c.jj 2008-03-26 14:48:34.000000000 +0100 -+++ libgomp/iter.c 2008-03-26 15:11:23.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -154,7 +154,7 @@ gomp_iter_dynamic_next_locked (long *pst - if (start == ws->end) - return false; - -- chunk = ws->chunk_size * ws->incr; -+ chunk = ws->chunk_size; - left = ws->end - start; - if (ws->incr < 0) - { -@@ -186,11 +186,38 @@ gomp_iter_dynamic_next (long *pstart, lo - struct gomp_work_share *ws = thr->ts.work_share; - long start, end, nend, chunk, incr; - -- start = ws->next; - end = ws->end; - incr = ws->incr; -- chunk = ws->chunk_size * incr; -+ chunk = ws->chunk_size; -+ -+ if (__builtin_expect (ws->mode, 1)) -+ { -+ long tmp = __sync_fetch_and_add (&ws->next, chunk); -+ if (incr > 0) -+ { -+ if (tmp >= end) -+ return false; -+ nend = tmp + chunk; -+ if (nend > end) -+ nend = end; -+ *pstart = tmp; -+ *pend = nend; -+ return true; -+ } -+ else -+ { -+ if (tmp <= end) -+ return false; -+ nend = tmp + chunk; -+ if (nend < end) -+ nend = end; -+ *pstart = tmp; -+ *pend = nend; -+ return true; -+ } -+ } - -+ start = ws->next; - while (1) - { - long left = end - start; ---- libgomp/work.c.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/work.c 2008-03-27 12:21:51.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -29,39 +29,138 @@ - of threads. */ - - #include "libgomp.h" -+#include <stddef.h> - #include <stdlib.h> - #include <string.h> - - --/* Create a new work share structure. */ -+/* Allocate a new work share structure, preferably from current team's -+ free gomp_work_share cache. */ - --struct gomp_work_share * --gomp_new_work_share (bool ordered, unsigned nthreads) -+static struct gomp_work_share * -+alloc_work_share (struct gomp_team *team) - { - struct gomp_work_share *ws; -- size_t size; -+ unsigned int i; - -- size = sizeof (*ws); -- if (ordered) -- size += nthreads * sizeof (ws->ordered_team_ids[0]); -+ /* This is called in a critical section. */ -+ if (team->work_share_list_alloc != NULL) -+ { -+ ws = team->work_share_list_alloc; -+ team->work_share_list_alloc = ws->next_free; -+ return ws; -+ } - -- ws = gomp_malloc_cleared (size); -- gomp_mutex_init (&ws->lock); -- ws->ordered_owner = -1; -+#ifdef HAVE_SYNC_BUILTINS -+ ws = team->work_share_list_free; -+ /* We need atomic read from work_share_list_free, -+ as free_work_share can be called concurrently. */ -+ __asm ("" : "+r" (ws)); -+ -+ if (ws && ws->next_free) -+ { -+ struct gomp_work_share *next = ws->next_free; -+ ws->next_free = NULL; -+ team->work_share_list_alloc = next->next_free; -+ return next; -+ } -+#else -+ gomp_mutex_lock (&team->work_share_list_free_lock); -+ ws = team->work_share_list_free; -+ if (ws) -+ { -+ team->work_share_list_alloc = ws->next_free; -+ team->work_share_list_free = NULL; -+ gomp_mutex_unlock (&team->work_share_list_free_lock); -+ return ws; -+ } -+ gomp_mutex_unlock (&team->work_share_list_free_lock); -+#endif - -+ team->work_share_chunk *= 2; -+ ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share)); -+ ws->next_alloc = team->work_shares[0].next_alloc; -+ team->work_shares[0].next_alloc = ws; -+ team->work_share_list_alloc = &ws[1]; -+ for (i = 1; i < team->work_share_chunk - 1; i++) -+ ws[i].next_free = &ws[i + 1]; -+ ws[i].next_free = NULL; - return ws; - } - -+/* Initialize an already allocated struct gomp_work_share. -+ This shouldn't touch the next_alloc field. */ -+ -+void -+gomp_init_work_share (struct gomp_work_share *ws, bool ordered, -+ unsigned nthreads) -+{ -+ gomp_mutex_init (&ws->lock); -+ if (__builtin_expect (ordered, 0)) -+ { -+#define INLINE_ORDERED_TEAM_IDS_CNT \ -+ ((sizeof (struct gomp_work_share) \ -+ - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \ -+ / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0])) -+ -+ if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT) -+ ws->ordered_team_ids -+ = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids)); -+ else -+ ws->ordered_team_ids = ws->inline_ordered_team_ids; -+ memset (ws->ordered_team_ids, '\0', -+ nthreads * sizeof (*ws->ordered_team_ids)); -+ ws->ordered_num_used = 0; -+ ws->ordered_owner = -1; -+ ws->ordered_cur = 0; -+ } -+ else -+ ws->ordered_team_ids = NULL; -+ gomp_ptrlock_init (&ws->next_ws, NULL); -+ ws->threads_completed = 0; -+} - --/* Free a work share structure. */ -+/* Do any needed destruction of gomp_work_share fields before it -+ is put back into free gomp_work_share cache or freed. */ - --static void --free_work_share (struct gomp_work_share *ws) -+void -+gomp_fini_work_share (struct gomp_work_share *ws) - { - gomp_mutex_destroy (&ws->lock); -- free (ws); -+ if (ws->ordered_team_ids != ws->inline_ordered_team_ids) -+ free (ws->ordered_team_ids); -+ gomp_ptrlock_destroy (&ws->next_ws); - } - -+/* Free a work share struct, if not orphaned, put it into current -+ team's free gomp_work_share cache. */ -+ -+static inline void -+free_work_share (struct gomp_team *team, struct gomp_work_share *ws) -+{ -+ gomp_fini_work_share (ws); -+ if (__builtin_expect (team == NULL, 0)) -+ free (ws); -+ else -+ { -+ struct gomp_work_share *next_ws; -+#ifdef HAVE_SYNC_BUILTINS -+ do -+ { -+ next_ws = team->work_share_list_free; -+ ws->next_free = next_ws; -+ } -+ while (!__sync_bool_compare_and_swap (&team->work_share_list_free, -+ next_ws, ws)); -+#else -+ gomp_mutex_lock (&team->work_share_list_free_lock); -+ next_ws = team->work_share_list_free; -+ ws->next_free = next_ws; -+ team->work_share_list_free = ws; -+ gomp_mutex_unlock (&team->work_share_list_free_lock); -+#endif -+ } -+} - - /* The current thread is ready to begin the next work sharing construct. - In all cases, thr->ts.work_share is updated to point to the new -@@ -74,71 +173,34 @@ gomp_work_share_start (bool ordered) - struct gomp_thread *thr = gomp_thread (); - struct gomp_team *team = thr->ts.team; - struct gomp_work_share *ws; -- unsigned ws_index, ws_gen; - - /* Work sharing constructs can be orphaned. */ - if (team == NULL) - { -- ws = gomp_new_work_share (ordered, 1); -+ ws = gomp_malloc (sizeof (*ws)); -+ gomp_init_work_share (ws, ordered, 1); - thr->ts.work_share = ws; -- thr->ts.static_trip = 0; -- gomp_mutex_lock (&ws->lock); -- return true; -+ return ws; - } - -- gomp_mutex_lock (&team->work_share_lock); -- -- /* This thread is beginning its next generation. */ -- ws_gen = ++thr->ts.work_share_generation; -- -- /* If this next generation is not newer than any other generation in -- the team, then simply reference the existing construct. */ -- if (ws_gen - team->oldest_live_gen < team->num_live_gen) -+ ws = thr->ts.work_share; -+ thr->ts.last_work_share = ws; -+ ws = gomp_ptrlock_get (&ws->next_ws); -+ if (ws == NULL) - { -- ws_index = ws_gen & team->generation_mask; -- ws = team->work_shares[ws_index]; -+ /* This thread encountered a new ws first. */ -+ struct gomp_work_share *ws = alloc_work_share (team); -+ gomp_init_work_share (ws, ordered, team->nthreads); - thr->ts.work_share = ws; -- thr->ts.static_trip = 0; -- -- gomp_mutex_lock (&ws->lock); -- gomp_mutex_unlock (&team->work_share_lock); -- -- return false; -+ return true; - } -- -- /* Resize the work shares queue if we've run out of space. */ -- if (team->num_live_gen++ == team->generation_mask) -+ else - { -- team->work_shares = gomp_realloc (team->work_shares, -- 2 * team->num_live_gen -- * sizeof (*team->work_shares)); -- -- /* Unless oldest_live_gen is zero, the sequence of live elements -- wraps around the end of the array. If we do nothing, we break -- lookup of the existing elements. Fix that by unwrapping the -- data from the front to the end. */ -- if (team->oldest_live_gen > 0) -- memcpy (team->work_shares + team->num_live_gen, -- team->work_shares, -- (team->oldest_live_gen & team->generation_mask) -- * sizeof (*team->work_shares)); -- -- team->generation_mask = team->generation_mask * 2 + 1; -- } -- -- ws_index = ws_gen & team->generation_mask; -- ws = gomp_new_work_share (ordered, team->nthreads); -- thr->ts.work_share = ws; -- thr->ts.static_trip = 0; -- team->work_shares[ws_index] = ws; -- -- gomp_mutex_lock (&ws->lock); -- gomp_mutex_unlock (&team->work_share_lock); -- -- return true; -+ thr->ts.work_share = ws; -+ return false; -+ } - } - -- - /* The current thread is done with its current work sharing construct. - This version does imply a barrier at the end of the work-share. */ - -@@ -147,36 +209,28 @@ gomp_work_share_end (void) - { - struct gomp_thread *thr = gomp_thread (); - struct gomp_team *team = thr->ts.team; -- struct gomp_work_share *ws = thr->ts.work_share; -- bool last; -- -- thr->ts.work_share = NULL; -+ gomp_barrier_state_t bstate; - - /* Work sharing constructs can be orphaned. */ - if (team == NULL) - { -- free_work_share (ws); -+ free_work_share (NULL, thr->ts.work_share); -+ thr->ts.work_share = NULL; - return; - } - -- last = gomp_barrier_wait_start (&team->barrier); -+ bstate = gomp_barrier_wait_start (&team->barrier); - -- if (last) -+ if (gomp_barrier_last_thread (bstate)) - { -- unsigned ws_index; -- -- ws_index = thr->ts.work_share_generation & team->generation_mask; -- team->work_shares[ws_index] = NULL; -- team->oldest_live_gen++; -- team->num_live_gen = 0; -- -- free_work_share (ws); -+ if (__builtin_expect (thr->ts.last_work_share != NULL, 1)) -+ free_work_share (team, thr->ts.last_work_share); - } - -- gomp_barrier_wait_end (&team->barrier, last); -+ gomp_barrier_wait_end (&team->barrier, bstate); -+ thr->ts.last_work_share = NULL; - } - -- - /* The current thread is done with its current work sharing construct. - This version does NOT imply a barrier at the end of the work-share. */ - -@@ -188,15 +242,17 @@ gomp_work_share_end_nowait (void) - struct gomp_work_share *ws = thr->ts.work_share; - unsigned completed; - -- thr->ts.work_share = NULL; -- - /* Work sharing constructs can be orphaned. */ - if (team == NULL) - { -- free_work_share (ws); -+ free_work_share (NULL, ws); -+ thr->ts.work_share = NULL; - return; - } - -+ if (__builtin_expect (thr->ts.last_work_share == NULL, 0)) -+ return; -+ - #ifdef HAVE_SYNC_BUILTINS - completed = __sync_add_and_fetch (&ws->threads_completed, 1); - #else -@@ -206,18 +262,6 @@ gomp_work_share_end_nowait (void) - #endif - - if (completed == team->nthreads) -- { -- unsigned ws_index; -- -- gomp_mutex_lock (&team->work_share_lock); -- -- ws_index = thr->ts.work_share_generation & team->generation_mask; -- team->work_shares[ws_index] = NULL; -- team->oldest_live_gen++; -- team->num_live_gen--; -- -- gomp_mutex_unlock (&team->work_share_lock); -- -- free_work_share (ws); -- } -+ free_work_share (team, thr->ts.last_work_share); -+ thr->ts.last_work_share = NULL; - } ---- libgomp/single.c.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/single.c 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -37,10 +37,24 @@ - bool - GOMP_single_start (void) - { -+#ifdef HAVE_SYNC_BUILTINS -+ struct gomp_thread *thr = gomp_thread (); -+ struct gomp_team *team = thr->ts.team; -+ unsigned long single_count; -+ -+ if (__builtin_expect (team == NULL, 0)) -+ return true; -+ -+ single_count = thr->ts.single_count++; -+ return __sync_bool_compare_and_swap (&team->single_count, single_count, -+ single_count + 1L); -+#else - bool ret = gomp_work_share_start (false); -- gomp_mutex_unlock (&gomp_thread ()->ts.work_share->lock); -+ if (ret) -+ gomp_work_share_init_done (); - gomp_work_share_end_nowait (); - return ret; -+#endif - } - - /* This routine is called when first encountering a SINGLE construct that -@@ -57,10 +71,12 @@ GOMP_single_copy_start (void) - void *ret; - - first = gomp_work_share_start (false); -- gomp_mutex_unlock (&thr->ts.work_share->lock); - - if (first) -- ret = NULL; -+ { -+ gomp_work_share_init_done (); -+ ret = NULL; -+ } - else - { - gomp_barrier_wait (&thr->ts.team->barrier); ---- libgomp/loop.c.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/loop.c 2008-03-26 18:47:04.000000000 +0100 -@@ -27,8 +27,9 @@ - - /* This file handles the LOOP (FOR/DO) construct. */ - --#include "libgomp.h" -+#include <limits.h> - #include <stdlib.h> -+#include "libgomp.h" - - - /* Initialize the given work share construct from the given arguments. */ -@@ -44,6 +45,39 @@ gomp_loop_init (struct gomp_work_share * - ? start : end; - ws->incr = incr; - ws->next = start; -+ if (sched == GFS_DYNAMIC) -+ { -+ ws->chunk_size *= incr; -+ -+#ifdef HAVE_SYNC_BUILTINS -+ { -+ /* For dynamic scheduling prepare things to make each iteration -+ faster. */ -+ struct gomp_thread *thr = gomp_thread (); -+ struct gomp_team *team = thr->ts.team; -+ long nthreads = team ? team->nthreads : 1; -+ -+ if (__builtin_expect (incr > 0, 1)) -+ { -+ /* Cheap overflow protection. */ -+ if (__builtin_expect ((nthreads | ws->chunk_size) -+ >= 1UL << (sizeof (long) -+ * __CHAR_BIT__ / 2 - 1), 0)) -+ ws->mode = 0; -+ else -+ ws->mode = ws->end < (LONG_MAX -+ - (nthreads + 1) * ws->chunk_size); -+ } -+ /* Cheap overflow protection. */ -+ else if (__builtin_expect ((nthreads | -ws->chunk_size) -+ >= 1UL << (sizeof (long) -+ * __CHAR_BIT__ / 2 - 1), 0)) -+ ws->mode = 0; -+ else -+ ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX; -+ } -+#endif -+ } - } - - /* The *_start routines are called when first encountering a loop construct -@@ -68,10 +102,13 @@ gomp_loop_static_start (long start, long - { - struct gomp_thread *thr = gomp_thread (); - -+ thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) -- gomp_loop_init (thr->ts.work_share, start, end, incr, -- GFS_STATIC, chunk_size); -- gomp_mutex_unlock (&thr->ts.work_share->lock); -+ { -+ gomp_loop_init (thr->ts.work_share, start, end, incr, -+ GFS_STATIC, chunk_size); -+ gomp_work_share_init_done (); -+ } - - return !gomp_iter_static_next (istart, iend); - } -@@ -84,13 +121,16 @@ gomp_loop_dynamic_start (long start, lon - bool ret; - - if (gomp_work_share_start (false)) -- gomp_loop_init (thr->ts.work_share, start, end, incr, -- GFS_DYNAMIC, chunk_size); -+ { -+ gomp_loop_init (thr->ts.work_share, start, end, incr, -+ GFS_DYNAMIC, chunk_size); -+ gomp_work_share_init_done (); -+ } - - #ifdef HAVE_SYNC_BUILTINS -- gomp_mutex_unlock (&thr->ts.work_share->lock); - ret = gomp_iter_dynamic_next (istart, iend); - #else -+ gomp_mutex_lock (&thr->ts.work_share->lock); - ret = gomp_iter_dynamic_next_locked (istart, iend); - gomp_mutex_unlock (&thr->ts.work_share->lock); - #endif -@@ -106,13 +146,16 @@ gomp_loop_guided_start (long start, long - bool ret; - - if (gomp_work_share_start (false)) -- gomp_loop_init (thr->ts.work_share, start, end, incr, -- GFS_GUIDED, chunk_size); -+ { -+ gomp_loop_init (thr->ts.work_share, start, end, incr, -+ GFS_GUIDED, chunk_size); -+ gomp_work_share_init_done (); -+ } - - #ifdef HAVE_SYNC_BUILTINS -- gomp_mutex_unlock (&thr->ts.work_share->lock); - ret = gomp_iter_guided_next (istart, iend); - #else -+ gomp_mutex_lock (&thr->ts.work_share->lock); - ret = gomp_iter_guided_next_locked (istart, iend); - gomp_mutex_unlock (&thr->ts.work_share->lock); - #endif -@@ -149,13 +192,14 @@ gomp_loop_ordered_static_start (long sta - { - struct gomp_thread *thr = gomp_thread (); - -+ thr->ts.static_trip = 0; - if (gomp_work_share_start (true)) - { - gomp_loop_init (thr->ts.work_share, start, end, incr, - GFS_STATIC, chunk_size); - gomp_ordered_static_init (); -+ gomp_work_share_init_done (); - } -- gomp_mutex_unlock (&thr->ts.work_share->lock); - - return !gomp_iter_static_next (istart, iend); - } -@@ -168,8 +212,14 @@ gomp_loop_ordered_dynamic_start (long st - bool ret; - - if (gomp_work_share_start (true)) -- gomp_loop_init (thr->ts.work_share, start, end, incr, -- GFS_DYNAMIC, chunk_size); -+ { -+ gomp_loop_init (thr->ts.work_share, start, end, incr, -+ GFS_DYNAMIC, chunk_size); -+ gomp_mutex_lock (&thr->ts.work_share->lock); -+ gomp_work_share_init_done (); -+ } -+ else -+ gomp_mutex_lock (&thr->ts.work_share->lock); - - ret = gomp_iter_dynamic_next_locked (istart, iend); - if (ret) -@@ -187,8 +237,14 @@ gomp_loop_ordered_guided_start (long sta - bool ret; - - if (gomp_work_share_start (true)) -- gomp_loop_init (thr->ts.work_share, start, end, incr, -- GFS_GUIDED, chunk_size); -+ { -+ gomp_loop_init (thr->ts.work_share, start, end, incr, -+ GFS_GUIDED, chunk_size); -+ gomp_mutex_lock (&thr->ts.work_share->lock); -+ gomp_work_share_init_done (); -+ } -+ else -+ gomp_mutex_lock (&thr->ts.work_share->lock); - - ret = gomp_iter_guided_next_locked (istart, iend); - if (ret) -@@ -375,12 +431,12 @@ gomp_parallel_loop_start (void (*fn) (vo - long incr, enum gomp_schedule_type sched, - long chunk_size) - { -- struct gomp_work_share *ws; -+ struct gomp_team *team; - - num_threads = gomp_resolve_num_threads (num_threads); -- ws = gomp_new_work_share (false, num_threads); -- gomp_loop_init (ws, start, end, incr, sched, chunk_size); -- gomp_team_start (fn, data, num_threads, ws); -+ team = gomp_new_team (num_threads); -+ gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size); -+ gomp_team_start (fn, data, num_threads, team); - } - - void ---- libgomp/Makefile.in.jj 2008-01-10 20:53:47.000000000 +0100 -+++ libgomp/Makefile.in 2008-03-26 18:51:01.000000000 +0100 -@@ -83,7 +83,7 @@ libgomp_la_LIBADD = - am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ - error.lo iter.lo loop.lo ordered.lo parallel.lo sections.lo \ - single.lo team.lo work.lo lock.lo mutex.lo proc.lo sem.lo \ -- bar.lo time.lo fortran.lo affinity.lo -+ bar.lo ptrlock.lo time.lo fortran.lo affinity.lo - libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) - DEFAULT_INCLUDES = -I. -I$(srcdir) -I. - depcomp = $(SHELL) $(top_srcdir)/../depcomp -@@ -292,7 +292,7 @@ libgomp_version_info = -version-info $(l - libgomp_la_LDFLAGS = $(libgomp_version_info) $(libgomp_version_script) - libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ - loop.c ordered.c parallel.c sections.c single.c team.c work.c \ -- lock.c mutex.c proc.c sem.c bar.c time.c fortran.c affinity.c -+ lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c - - nodist_noinst_HEADERS = libgomp_f.h - nodist_libsubinclude_HEADERS = omp.h -@@ -434,6 +434,7 @@ distclean-compile: - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@ -+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ ---- libgomp/testsuite/libgomp.c/loop-4.c.jj 2008-03-26 18:47:04.000000000 +0100 -+++ libgomp/testsuite/libgomp.c/loop-4.c 2008-03-26 18:47:04.000000000 +0100 -@@ -0,0 +1,28 @@ -+/* { dg-do run } */ -+ -+extern void abort (void); -+ -+int -+main (void) -+{ -+ int e = 0; -+#pragma omp parallel num_threads (4) reduction(+:e) -+ { -+ long i; -+ #pragma omp for schedule(dynamic,1) -+ for (i = __LONG_MAX__ - 30001; i <= __LONG_MAX__ - 10001; i += 10000) -+ if (i != __LONG_MAX__ - 30001 -+ && i != __LONG_MAX__ - 20001 -+ && i != __LONG_MAX__ - 10001) -+ e = 1; -+ #pragma omp for schedule(dynamic,1) -+ for (i = -__LONG_MAX__ + 30000; i >= -__LONG_MAX__ + 10000; i -= 10000) -+ if (i != -__LONG_MAX__ + 30000 -+ && i != -__LONG_MAX__ + 20000 -+ && i != -__LONG_MAX__ + 10000) -+ e = 1; -+ } -+ if (e) -+ abort (); -+ return 0; -+} ---- libgomp/Makefile.am.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/Makefile.am 2008-03-26 15:15:19.000000000 +0100 -@@ -31,7 +31,7 @@ libgomp_la_LDFLAGS = $(libgomp_version_i - - libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ - loop.c ordered.c parallel.c sections.c single.c team.c work.c \ -- lock.c mutex.c proc.c sem.c bar.c time.c fortran.c affinity.c -+ lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c - - nodist_noinst_HEADERS = libgomp_f.h - nodist_libsubinclude_HEADERS = omp.h ---- libgomp/team.c.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/team.c 2008-03-27 12:22:26.000000000 +0100 -@@ -94,7 +94,7 @@ gomp_thread_start (void *xdata) - { - gomp_barrier_wait (&thr->ts.team->barrier); - local_fn (local_data); -- gomp_barrier_wait (&thr->ts.team->barrier); -+ gomp_barrier_wait_last (&thr->ts.team->barrier); - } - else - { -@@ -114,11 +114,10 @@ gomp_thread_start (void *xdata) - thr->data = NULL; - thr->ts.team = NULL; - thr->ts.work_share = NULL; -+ thr->ts.last_work_share = NULL; - thr->ts.team_id = 0; -- thr->ts.work_share_generation = 0; -- thr->ts.static_trip = 0; - -- gomp_barrier_wait (&team->barrier); -+ gomp_barrier_wait_last (&team->barrier); - gomp_barrier_wait (&gomp_threads_dock); - - local_fn = thr->fn; -@@ -133,21 +132,29 @@ gomp_thread_start (void *xdata) - - /* Create a new team data structure. */ - --static struct gomp_team * --new_team (unsigned nthreads, struct gomp_work_share *work_share) -+struct gomp_team * -+gomp_new_team (unsigned nthreads) - { - struct gomp_team *team; - size_t size; -+ int i; - - size = sizeof (*team) + nthreads * sizeof (team->ordered_release[0]); - team = gomp_malloc (size); -- gomp_mutex_init (&team->work_share_lock); - -- team->work_shares = gomp_malloc (4 * sizeof (struct gomp_work_share *)); -- team->generation_mask = 3; -- team->oldest_live_gen = work_share == NULL; -- team->num_live_gen = work_share != NULL; -- team->work_shares[0] = work_share; -+ team->work_share_chunk = 8; -+#ifdef HAVE_SYNC_BUILTINS -+ team->single_count = 0; -+#else -+ gomp_mutex_init (&team->work_share_list_free_lock); -+#endif -+ gomp_init_work_share (&team->work_shares[0], false, nthreads); -+ team->work_shares[0].next_alloc = NULL; -+ team->work_share_list_free = NULL; -+ team->work_share_list_alloc = &team->work_shares[1]; -+ for (i = 1; i < 7; i++) -+ team->work_shares[i].next_free = &team->work_shares[i + 1]; -+ team->work_shares[i].next_free = NULL; - - team->nthreads = nthreads; - gomp_barrier_init (&team->barrier, nthreads); -@@ -164,10 +171,22 @@ new_team (unsigned nthreads, struct gomp - static void - free_team (struct gomp_team *team) - { -- free (team->work_shares); -- gomp_mutex_destroy (&team->work_share_lock); -+ if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) -+ { -+ struct gomp_work_share *ws = team->work_shares[0].next_alloc; -+ do -+ { -+ struct gomp_work_share *next_ws = ws->next_alloc; -+ free (ws); -+ ws = next_ws; -+ } -+ while (ws != NULL); -+ } - gomp_barrier_destroy (&team->barrier); - gomp_sem_destroy (&team->master_release); -+#ifndef HAVE_SYNC_BUILTINS -+ gomp_mutex_destroy (&team->work_share_list_free_lock); -+#endif - free (team); - } - -@@ -176,11 +195,10 @@ free_team (struct gomp_team *team) - - void - gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, -- struct gomp_work_share *work_share) -+ struct gomp_team *team) - { - struct gomp_thread_start_data *start_data; - struct gomp_thread *thr, *nthr; -- struct gomp_team *team; - bool nested; - unsigned i, n, old_threads_used = 0; - pthread_attr_t thread_attr, *attr; -@@ -188,17 +206,18 @@ gomp_team_start (void (*fn) (void *), vo - thr = gomp_thread (); - nested = thr->ts.team != NULL; - -- team = new_team (nthreads, work_share); -- - /* Always save the previous state, even if this isn't a nested team. - In particular, we should save any work share state from an outer - orphaned work share construct. */ - team->prev_ts = thr->ts; - - thr->ts.team = team; -- thr->ts.work_share = work_share; - thr->ts.team_id = 0; -- thr->ts.work_share_generation = 0; -+ thr->ts.work_share = &team->work_shares[0]; -+ thr->ts.last_work_share = NULL; -+#ifdef HAVE_SYNC_BUILTINS -+ thr->ts.single_count = 0; -+#endif - thr->ts.static_trip = 0; - - if (nthreads == 1) -@@ -241,9 +260,12 @@ gomp_team_start (void (*fn) (void *), vo - { - nthr = gomp_threads[i]; - nthr->ts.team = team; -- nthr->ts.work_share = work_share; -+ nthr->ts.work_share = &team->work_shares[0]; -+ nthr->ts.last_work_share = NULL; - nthr->ts.team_id = i; -- nthr->ts.work_share_generation = 0; -+#ifdef HAVE_SYNC_BUILTINS -+ nthr->ts.single_count = 0; -+#endif - nthr->ts.static_trip = 0; - nthr->fn = fn; - nthr->data = data; -@@ -266,8 +288,24 @@ gomp_team_start (void (*fn) (void *), vo - } - } - -+ if (__builtin_expect (nthreads > old_threads_used, 0)) -+ { -+ long diff = (long) nthreads - (long) old_threads_used; -+ -+ if (old_threads_used == 0) -+ --diff; -+ -+#ifdef HAVE_SYNC_BUILTINS -+ __sync_fetch_and_add (&gomp_managed_threads, diff); -+#else -+ gomp_mutex_lock (&gomp_remaining_threads_lock); -+ gomp_managed_threads += diff; -+ gomp_mutex_unlock (&gomp_remaining_threads_lock); -+#endif -+ } -+ - attr = &gomp_thread_attr; -- if (gomp_cpu_affinity != NULL) -+ if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) - { - size_t stacksize; - pthread_attr_init (&thread_attr); -@@ -287,9 +325,12 @@ gomp_team_start (void (*fn) (void *), vo - int err; - - start_data->ts.team = team; -- start_data->ts.work_share = work_share; -+ start_data->ts.work_share = &team->work_shares[0]; -+ start_data->ts.last_work_share = NULL; - start_data->ts.team_id = i; -- start_data->ts.work_share_generation = 0; -+#ifdef HAVE_SYNC_BUILTINS -+ start_data->ts.single_count = 0; -+#endif - start_data->ts.static_trip = 0; - start_data->fn = fn; - start_data->fn_data = data; -@@ -303,7 +344,7 @@ gomp_team_start (void (*fn) (void *), vo - gomp_fatal ("Thread creation failed: %s", strerror (err)); - } - -- if (gomp_cpu_affinity != NULL) -+ if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) - pthread_attr_destroy (&thread_attr); - - do_release: -@@ -313,8 +354,20 @@ gomp_team_start (void (*fn) (void *), vo - that should arrive back at the end of this team. The extra - threads should be exiting. Note that we arrange for this test - to never be true for nested teams. */ -- if (nthreads < old_threads_used) -- gomp_barrier_reinit (&gomp_threads_dock, nthreads); -+ if (__builtin_expect (nthreads < old_threads_used, 0)) -+ { -+ long diff = (long) nthreads - (long) old_threads_used; -+ -+ gomp_barrier_reinit (&gomp_threads_dock, nthreads); -+ -+#ifdef HAVE_SYNC_BUILTINS -+ __sync_fetch_and_add (&gomp_managed_threads, diff); -+#else -+ gomp_mutex_lock (&gomp_remaining_threads_lock); -+ gomp_managed_threads += diff; -+ gomp_mutex_unlock (&gomp_remaining_threads_lock); -+#endif -+ } - } - - -@@ -329,8 +382,21 @@ gomp_team_end (void) - - gomp_barrier_wait (&team->barrier); - -+ gomp_fini_work_share (thr->ts.work_share); -+ - thr->ts = team->prev_ts; - -+ if (__builtin_expect (thr->ts.team != NULL, 0)) -+ { -+#ifdef HAVE_SYNC_BUILTINS -+ __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); -+#else -+ gomp_mutex_lock (&gomp_remaining_threads_lock); -+ gomp_managed_threads -= team->nthreads - 1L; -+ gomp_mutex_unlock (&gomp_remaining_threads_lock); -+#endif -+ } -+ - free_team (team); - } - ---- libgomp/config/posix/bar.h.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/config/posix/bar.h 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -46,18 +46,32 @@ typedef struct - unsigned total; - unsigned arrived; - } gomp_barrier_t; -+typedef bool gomp_barrier_state_t; - - extern void gomp_barrier_init (gomp_barrier_t *, unsigned); - extern void gomp_barrier_reinit (gomp_barrier_t *, unsigned); - extern void gomp_barrier_destroy (gomp_barrier_t *); - - extern void gomp_barrier_wait (gomp_barrier_t *); --extern void gomp_barrier_wait_end (gomp_barrier_t *, bool); -+extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t); - --static inline bool gomp_barrier_wait_start (gomp_barrier_t *bar) -+static inline gomp_barrier_state_t -+gomp_barrier_wait_start (gomp_barrier_t *bar) - { - gomp_mutex_lock (&bar->mutex1); - return ++bar->arrived == bar->total; - } - -+static inline bool -+gomp_barrier_last_thread (gomp_barrier_state_t state) -+{ -+ return state; -+} -+ -+static inline void -+gomp_barrier_wait_last (gomp_barrier_t *bar) -+{ -+ gomp_barrier_wait (bar); -+} -+ - #endif /* GOMP_BARRIER_H */ ---- libgomp/config/posix/ptrlock.h.jj 2008-03-26 15:11:32.000000000 +0100 -+++ libgomp/config/posix/ptrlock.h 2008-03-26 15:11:32.000000000 +0100 -@@ -0,0 +1,69 @@ -+/* Copyright (C) 2008 Free Software Foundation, Inc. -+ Contributed by Jakub Jelinek <jakub@redhat.com>. -+ -+ This file is part of the GNU OpenMP Library (libgomp). -+ -+ Libgomp is free software; you can redistribute it and/or modify it -+ under the terms of the GNU Lesser General Public License as published by -+ the Free Software Foundation; either version 2.1 of the License, or -+ (at your option) any later version. -+ -+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY -+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for -+ more details. -+ -+ You should have received a copy of the GNU Lesser General Public License -+ along with libgomp; see the file COPYING.LIB. If not, write to the -+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -+ MA 02110-1301, USA. */ -+ -+/* As a special exception, if you link this library with other files, some -+ of which are compiled with GCC, to produce an executable, this library -+ does not by itself cause the resulting executable to be covered by the -+ GNU General Public License. This exception does not however invalidate -+ any other reasons why the executable file might be covered by the GNU -+ General Public License. */ -+ -+/* This is a Linux specific implementation of a mutex synchronization -+ mechanism for libgomp. This type is private to the library. This -+ implementation uses atomic instructions and the futex syscall. */ -+ -+#ifndef GOMP_PTRLOCK_H -+#define GOMP_PTRLOCK_H 1 -+ -+typedef struct { void *ptr; gomp_mutex_t lock; } gomp_ptrlock_t; -+ -+static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr) -+{ -+ ptrlock->ptr = ptr; -+ gomp_mutex_init (&ptrlock->lock); -+} -+ -+static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock) -+{ -+ if (ptrlock->ptr != NULL) -+ return ptrlock->ptr; -+ -+ gomp_mutex_lock (&ptrlock->lock); -+ if (ptrlock->ptr != NULL) -+ { -+ gomp_mutex_unlock (&ptrlock->lock); -+ return ptrlock->ptr; -+ } -+ -+ return NULL; -+} -+ -+static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr) -+{ -+ ptrlock->ptr = ptr; -+ gomp_mutex_unlock (&ptrlock->lock); -+} -+ -+static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock) -+{ -+ gomp_mutex_destroy (&ptrlock->lock); -+} -+ -+#endif /* GOMP_PTRLOCK_H */ ---- libgomp/config/posix/ptrlock.c.jj 2008-03-26 15:11:32.000000000 +0100 -+++ libgomp/config/posix/ptrlock.c 2008-03-26 15:11:32.000000000 +0100 -@@ -0,0 +1 @@ -+/* Everything is in the header. */ ---- libgomp/config/posix/bar.c.jj 2007-12-07 14:41:01.000000000 +0100 -+++ libgomp/config/posix/bar.c 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -70,7 +70,7 @@ gomp_barrier_reinit (gomp_barrier_t *bar - } - - void --gomp_barrier_wait_end (gomp_barrier_t *bar, bool last) -+gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t last) - { - unsigned int n; - ---- libgomp/config/linux/alpha/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/alpha/futex.h 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -30,8 +30,6 @@ - #ifndef SYS_futex - #define SYS_futex 394 - #endif --#define FUTEX_WAIT 0 --#define FUTEX_WAKE 1 - - - static inline void -@@ -45,7 +43,7 @@ futex_wait (int *addr, int val) - - sc_0 = SYS_futex; - sc_16 = (long) addr; -- sc_17 = FUTEX_WAIT; -+ sc_17 = gomp_futex_wait; - sc_18 = val; - sc_19 = 0; - __asm volatile ("callsys" -@@ -53,6 +51,20 @@ futex_wait (int *addr, int val) - : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19) - : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", - "$22", "$23", "$24", "$25", "$27", "$28", "memory"); -+ if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sc_0 = SYS_futex; -+ sc_17 &= ~FUTEX_PRIVATE_FLAG; -+ sc_19 = 0; -+ __asm volatile ("callsys" -+ : "=r" (sc_0), "=r"(sc_19) -+ : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18), -+ "1"(sc_19) -+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", -+ "$22", "$23", "$24", "$25", "$27", "$28", "memory"); -+ } - } - - static inline void -@@ -66,11 +78,35 @@ futex_wake (int *addr, int count) - - sc_0 = SYS_futex; - sc_16 = (long) addr; -- sc_17 = FUTEX_WAKE; -+ sc_17 = gomp_futex_wake; - sc_18 = count; - __asm volatile ("callsys" - : "=r" (sc_0), "=r"(sc_19) - : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18) - : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", - "$22", "$23", "$24", "$25", "$27", "$28", "memory"); -+ if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sc_0 = SYS_futex; -+ sc_17 &= ~FUTEX_PRIVATE_FLAG; -+ __asm volatile ("callsys" -+ : "=r" (sc_0), "=r"(sc_19) -+ : "0"(sc_0), "r" (sc_16), "r"(sc_17), "r"(sc_18) -+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", -+ "$22", "$23", "$24", "$25", "$27", "$28", "memory"); -+ } -+} -+ -+static inline void -+cpu_relax (void) -+{ -+ __asm volatile ("" : : : "memory"); -+} -+ -+static inline void -+atomic_write_barrier (void) -+{ -+ __asm volatile ("wmb" : : : "memory"); - } ---- libgomp/config/linux/affinity.c.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/affinity.c 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2006, 2007 Free Software Foundation, Inc. -+/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. - Contributed by Jakub Jelinek <jakub@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -38,9 +38,6 @@ - #ifdef HAVE_PTHREAD_AFFINITY_NP - - static unsigned int affinity_counter; --#ifndef HAVE_SYNC_BUILTINS --static gomp_mutex_t affinity_lock; --#endif - - void - gomp_init_affinity (void) -@@ -76,9 +73,6 @@ gomp_init_affinity (void) - CPU_SET (gomp_cpu_affinity[0], &cpuset); - pthread_setaffinity_np (pthread_self (), sizeof (cpuset), &cpuset); - affinity_counter = 1; --#ifndef HAVE_SYNC_BUILTINS -- gomp_mutex_init (&affinity_lock); --#endif - } - - void -@@ -87,13 +81,7 @@ gomp_init_thread_affinity (pthread_attr_ - unsigned int cpu; - cpu_set_t cpuset; - --#ifdef HAVE_SYNC_BUILTINS - cpu = __sync_fetch_and_add (&affinity_counter, 1); --#else -- gomp_mutex_lock (&affinity_lock); -- cpu = affinity_counter++; -- gomp_mutex_unlock (&affinity_lock); --#endif - cpu %= gomp_cpu_affinity_len; - CPU_ZERO (&cpuset); - CPU_SET (gomp_cpu_affinity[cpu], &cpuset); ---- libgomp/config/linux/bar.h.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/bar.h 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -36,40 +36,49 @@ - - typedef struct - { -- gomp_mutex_t mutex; -- unsigned total; -- unsigned arrived; -- int generation; -+ /* Make sure total/generation is in a mostly read cacheline, while -+ awaited in a separate cacheline. */ -+ unsigned total __attribute__((aligned (64))); -+ unsigned generation; -+ unsigned awaited __attribute__((aligned (64))); - } gomp_barrier_t; -+typedef unsigned int gomp_barrier_state_t; - - static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count) - { -- gomp_mutex_init (&bar->mutex); - bar->total = count; -- bar->arrived = 0; -+ bar->awaited = count; - bar->generation = 0; - } - - static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count) - { -- gomp_mutex_lock (&bar->mutex); -+ __sync_fetch_and_add (&bar->awaited, count - bar->total); - bar->total = count; -- gomp_mutex_unlock (&bar->mutex); - } - - static inline void gomp_barrier_destroy (gomp_barrier_t *bar) - { -- /* Before destroying, make sure all threads have left the barrier. */ -- gomp_mutex_lock (&bar->mutex); - } - - extern void gomp_barrier_wait (gomp_barrier_t *); --extern void gomp_barrier_wait_end (gomp_barrier_t *, bool); -+extern void gomp_barrier_wait_last (gomp_barrier_t *); -+extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t); - --static inline bool gomp_barrier_wait_start (gomp_barrier_t *bar) -+static inline gomp_barrier_state_t -+gomp_barrier_wait_start (gomp_barrier_t *bar) - { -- gomp_mutex_lock (&bar->mutex); -- return ++bar->arrived == bar->total; -+ unsigned int ret = bar->generation; -+ /* Do we need any barrier here or is __sync_add_and_fetch acting -+ as the needed LoadLoad barrier already? */ -+ ret += __sync_add_and_fetch (&bar->awaited, -1) == 0; -+ return ret; -+} -+ -+static inline bool -+gomp_barrier_last_thread (gomp_barrier_state_t state) -+{ -+ return state & 1; - } - - #endif /* GOMP_BARRIER_H */ ---- libgomp/config/linux/ptrlock.h.jj 2008-03-26 15:11:32.000000000 +0100 -+++ libgomp/config/linux/ptrlock.h 2008-03-26 15:11:32.000000000 +0100 -@@ -0,0 +1,65 @@ -+/* Copyright (C) 2008 Free Software Foundation, Inc. -+ Contributed by Jakub Jelinek <jakub@redhat.com>. -+ -+ This file is part of the GNU OpenMP Library (libgomp). -+ -+ Libgomp is free software; you can redistribute it and/or modify it -+ under the terms of the GNU Lesser General Public License as published by -+ the Free Software Foundation; either version 2.1 of the License, or -+ (at your option) any later version. -+ -+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY -+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for -+ more details. -+ -+ You should have received a copy of the GNU Lesser General Public License -+ along with libgomp; see the file COPYING.LIB. If not, write to the -+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -+ MA 02110-1301, USA. */ -+ -+/* As a special exception, if you link this library with other files, some -+ of which are compiled with GCC, to produce an executable, this library -+ does not by itself cause the resulting executable to be covered by the -+ GNU General Public License. This exception does not however invalidate -+ any other reasons why the executable file might be covered by the GNU -+ General Public License. */ -+ -+/* This is a Linux specific implementation of a mutex synchronization -+ mechanism for libgomp. This type is private to the library. This -+ implementation uses atomic instructions and the futex syscall. */ -+ -+#ifndef GOMP_PTRLOCK_H -+#define GOMP_PTRLOCK_H 1 -+ -+typedef void *gomp_ptrlock_t; -+ -+static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr) -+{ -+ *ptrlock = ptr; -+} -+ -+extern void *gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock); -+static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock) -+{ -+ if ((uintptr_t) *ptrlock > 2) -+ return *ptrlock; -+ -+ if (__sync_bool_compare_and_swap (ptrlock, NULL, (uintptr_t) 1)) -+ return NULL; -+ -+ return gomp_ptrlock_get_slow (ptrlock); -+} -+ -+extern void gomp_ptrlock_set_slow (gomp_ptrlock_t *ptrlock, void *ptr); -+static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr) -+{ -+ if (!__sync_bool_compare_and_swap (ptrlock, (uintptr_t) 1, ptr)) -+ gomp_ptrlock_set_slow (ptrlock, ptr); -+} -+ -+static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock) -+{ -+} -+ -+#endif /* GOMP_PTRLOCK_H */ ---- libgomp/config/linux/lock.c.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/lock.c 2008-03-26 15:11:32.000000000 +0100 -@@ -29,11 +29,10 @@ - primitives. This implementation uses atomic instructions and the futex - syscall. */ - --#include "libgomp.h" - #include <string.h> - #include <unistd.h> - #include <sys/syscall.h> --#include "futex.h" -+#include "wait.h" - - - /* The internal gomp_mutex_t and the external non-recursive omp_lock_t -@@ -137,7 +136,7 @@ omp_set_nest_lock (omp_nest_lock_t *lock - return; - } - -- futex_wait (&lock->owner, otid); -+ do_wait (&lock->owner, otid); - } - } - ---- libgomp/config/linux/ptrlock.c.jj 2008-03-26 15:11:32.000000000 +0100 -+++ libgomp/config/linux/ptrlock.c 2008-03-26 15:11:32.000000000 +0100 -@@ -0,0 +1,70 @@ -+/* Copyright (C) 2008 Free Software Foundation, Inc. -+ Contributed by Jakub Jelinek <jakub@redhat.com>. -+ -+ This file is part of the GNU OpenMP Library (libgomp). -+ -+ Libgomp is free software; you can redistribute it and/or modify it -+ under the terms of the GNU Lesser General Public License as published by -+ the Free Software Foundation; either version 2.1 of the License, or -+ (at your option) any later version. -+ -+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY -+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for -+ more details. -+ -+ You should have received a copy of the GNU Lesser General Public License -+ along with libgomp; see the file COPYING.LIB. If not, write to the -+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -+ MA 02110-1301, USA. */ -+ -+/* As a special exception, if you link this library with other files, some -+ of which are compiled with GCC, to produce an executable, this library -+ does not by itself cause the resulting executable to be covered by the -+ GNU General Public License. This exception does not however invalidate -+ any other reasons why the executable file might be covered by the GNU -+ General Public License. */ -+ -+/* This is a Linux specific implementation of a mutex synchronization -+ mechanism for libgomp. This type is private to the library. This -+ implementation uses atomic instructions and the futex syscall. */ -+ -+#include <endian.h> -+#include <limits.h> -+#include "wait.h" -+ -+void * -+gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock) -+{ -+ int *intptr; -+ __sync_bool_compare_and_swap (ptrlock, 1, 2); -+ -+ /* futex works on ints, not pointers. -+ But a valid work share pointer will be at least -+ 8 byte aligned, so it is safe to assume the low -+ 32-bits of the pointer won't contain values 1 or 2. */ -+ __asm volatile ("" : "=r" (intptr) : "0" (ptrlock)); -+#if __BYTE_ORDER == __BIG_ENDIAN -+ if (sizeof (*ptrlock) > sizeof (int)) -+ intptr += (sizeof (*ptrlock) / sizeof (int)) - 1; -+#endif -+ do -+ do_wait (intptr, 2); -+ while (*intptr == 2); -+ __asm volatile ("" : : : "memory"); -+ return *ptrlock; -+} -+ -+void -+gomp_ptrlock_set_slow (gomp_ptrlock_t *ptrlock, void *ptr) -+{ -+ int *intptr; -+ -+ *ptrlock = ptr; -+ __asm volatile ("" : "=r" (intptr) : "0" (ptrlock)); -+#if __BYTE_ORDER == __BIG_ENDIAN -+ if (sizeof (*ptrlock) > sizeof (int)) -+ intptr += (sizeof (*ptrlock) / sizeof (int)) - 1; -+#endif -+ futex_wake (intptr, INT_MAX); -+} ---- libgomp/config/linux/x86/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/x86/futex.h 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -27,9 +27,6 @@ - - /* Provide target-specific access to the futex system call. */ - --#define FUTEX_WAIT 0 --#define FUTEX_WAKE 1 -- - #ifdef __LP64__ - # ifndef SYS_futex - # define SYS_futex 202 -@@ -38,14 +35,26 @@ - static inline void - futex_wait (int *addr, int val) - { -- register long r10 __asm__("%r10") = 0; -+ register long r10 __asm__("%r10"); - long res; - -+ r10 = 0; - __asm volatile ("syscall" - : "=a" (res) -- : "0"(SYS_futex), "D" (addr), "S"(FUTEX_WAIT), -- "d"(val), "r"(r10) -+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wait), -+ "d" (val), "r" (r10) - : "r11", "rcx", "memory"); -+ if (__builtin_expect (res == -ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ r10 = 0; -+ __asm volatile ("syscall" -+ : "=a" (res) -+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wait), -+ "d" (val), "r" (r10) -+ : "r11", "rcx", "memory"); -+ } - } - - static inline void -@@ -55,8 +64,19 @@ futex_wake (int *addr, int count) - - __asm volatile ("syscall" - : "=a" (res) -- : "0"(SYS_futex), "D" (addr), "S"(FUTEX_WAKE), "d"(count) -+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wake), -+ "d" (count) - : "r11", "rcx", "memory"); -+ if (__builtin_expect (res == -ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ __asm volatile ("syscall" -+ : "=a" (res) -+ : "0" (SYS_futex), "D" (addr), "S" (gomp_futex_wake), -+ "d" (count) -+ : "r11", "rcx", "memory"); -+ } - } - #else - # ifndef SYS_futex -@@ -65,7 +85,7 @@ futex_wake (int *addr, int count) - - # ifdef __PIC__ - --static inline void -+static inline long - sys_futex0 (int *addr, int op, int val) - { - long res; -@@ -77,11 +97,12 @@ sys_futex0 (int *addr, int op, int val) - : "0"(SYS_futex), "r" (addr), "c"(op), - "d"(val), "S"(0) - : "memory"); -+ return res; - } - - # else - --static inline void -+static inline long - sys_futex0 (int *addr, int op, int val) - { - long res; -@@ -91,6 +112,7 @@ sys_futex0 (int *addr, int op, int val) - : "0"(SYS_futex), "b" (addr), "c"(op), - "d"(val), "S"(0) - : "memory"); -+ return res; - } - - # endif /* __PIC__ */ -@@ -98,13 +120,37 @@ sys_futex0 (int *addr, int op, int val) - static inline void - futex_wait (int *addr, int val) - { -- sys_futex0 (addr, FUTEX_WAIT, val); -+ long res = sys_futex0 (addr, gomp_futex_wait, val); -+ if (__builtin_expect (res == -ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sys_futex0 (addr, gomp_futex_wait, val); -+ } - } - - static inline void - futex_wake (int *addr, int count) - { -- sys_futex0 (addr, FUTEX_WAKE, count); -+ long res = sys_futex0 (addr, gomp_futex_wake, count); -+ if (__builtin_expect (res == -ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sys_futex0 (addr, gomp_futex_wake, count); -+ } - } - - #endif /* __LP64__ */ -+ -+static inline void -+cpu_relax (void) -+{ -+ __asm volatile ("rep; nop" : : : "memory"); -+} -+ -+static inline void -+atomic_write_barrier (void) -+{ -+ __sync_synchronize (); -+} ---- libgomp/config/linux/wait.h.jj 2008-03-26 15:11:32.000000000 +0100 -+++ libgomp/config/linux/wait.h 2008-03-26 15:11:32.000000000 +0100 -@@ -0,0 +1,68 @@ -+/* Copyright (C) 2008 Free Software Foundation, Inc. -+ Contributed by Jakub Jelinek <jakub@redhat.com>. -+ -+ This file is part of the GNU OpenMP Library (libgomp). -+ -+ Libgomp is free software; you can redistribute it and/or modify it -+ under the terms of the GNU Lesser General Public License as published by -+ the Free Software Foundation; either version 2.1 of the License, or -+ (at your option) any later version. -+ -+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY -+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -+ FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for -+ more details. -+ -+ You should have received a copy of the GNU Lesser General Public License -+ along with libgomp; see the file COPYING.LIB. If not, write to the -+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -+ MA 02110-1301, USA. */ -+ -+/* As a special exception, if you link this library with other files, some -+ of which are compiled with GCC, to produce an executable, this library -+ does not by itself cause the resulting executable to be covered by the -+ GNU General Public License. This exception does not however invalidate -+ any other reasons why the executable file might be covered by the GNU -+ General Public License. */ -+ -+/* This is a Linux specific implementation of a mutex synchronization -+ mechanism for libgomp. This type is private to the library. This -+ implementation uses atomic instructions and the futex syscall. */ -+ -+#ifndef GOMP_WAIT_H -+#define GOMP_WAIT_H 1 -+ -+#include "libgomp.h" -+#include <errno.h> -+ -+#define FUTEX_WAIT 0 -+#define FUTEX_WAKE 1 -+#define FUTEX_PRIVATE_FLAG 128L -+ -+#ifdef HAVE_ATTRIBUTE_VISIBILITY -+# pragma GCC visibility push(hidden) -+#endif -+ -+extern long int gomp_futex_wait, gomp_futex_wake; -+ -+#include "futex.h" -+ -+static inline void do_wait (int *addr, int val) -+{ -+ unsigned long long i, count = gomp_spin_count_var; -+ -+ if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0)) -+ count = gomp_throttled_spin_count_var; -+ for (i = 0; i < count; i++) -+ if (__builtin_expect (*addr != val, 0)) -+ return; -+ else -+ cpu_relax (); -+ futex_wait (addr, val); -+} -+ -+#ifdef HAVE_ATTRIBUTE_VISIBILITY -+# pragma GCC visibility pop -+#endif -+ -+#endif /* GOMP_WAIT_H */ ---- libgomp/config/linux/sparc/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/sparc/futex.h 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Jakub Jelinek <jakub@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -28,10 +28,8 @@ - /* Provide target-specific access to the futex system call. */ - - #include <sys/syscall.h> --#define FUTEX_WAIT 0 --#define FUTEX_WAKE 1 - --static inline void -+static inline long - sys_futex0 (int *addr, int op, int val) - { - register long int g1 __asm__ ("g1"); -@@ -47,9 +45,9 @@ sys_futex0 (int *addr, int op, int val) - o3 = 0; - - #ifdef __arch64__ --# define SYSCALL_STRING "ta\t0x6d" -+# define SYSCALL_STRING "ta\t0x6d; bcs,a,pt %%xcc, 1f; sub %%g0, %%o0, %%o0; 1:" - #else --# define SYSCALL_STRING "ta\t0x10" -+# define SYSCALL_STRING "ta\t0x10; bcs,a 1f; sub %%g0, %%o0, %%o0; 1:" - #endif - - __asm volatile (SYSCALL_STRING -@@ -65,16 +63,49 @@ sys_futex0 (int *addr, int op, int val) - "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", - #endif - "cc", "memory"); -+ return o0; - } - - static inline void - futex_wait (int *addr, int val) - { -- sys_futex0 (addr, FUTEX_WAIT, val); -+ long err = sys_futex0 (addr, gomp_futex_wait, val); -+ if (__builtin_expect (err == ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sys_futex0 (addr, gomp_futex_wait, val); -+ } - } - - static inline void - futex_wake (int *addr, int count) - { -- sys_futex0 (addr, FUTEX_WAKE, count); -+ long err = sys_futex0 (addr, gomp_futex_wake, count); -+ if (__builtin_expect (err == ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sys_futex0 (addr, gomp_futex_wake, count); -+ } -+} -+ -+static inline void -+cpu_relax (void) -+{ -+#if defined __arch64__ || defined __sparc_v9__ -+ __asm volatile ("membar #LoadLoad" : : : "memory"); -+#else -+ __asm volatile ("" : : : "memory"); -+#endif -+} -+ -+static inline void -+atomic_write_barrier (void) -+{ -+#if defined __arch64__ || defined __sparc_v9__ -+ __asm volatile ("membar #StoreStore" : : : "memory"); -+#else -+ __sync_synchronize (); -+#endif - } ---- libgomp/config/linux/ia64/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/ia64/futex.h 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -29,23 +29,24 @@ - - #include <sys/syscall.h> - --#define FUTEX_WAIT 0 --#define FUTEX_WAKE 1 - - --static inline void --sys_futex0(int *addr, int op, int val) -+static inline long -+sys_futex0(int *addr, long op, int val) - { - register long out0 asm ("out0") = (long) addr; - register long out1 asm ("out1") = op; - register long out2 asm ("out2") = val; - register long out3 asm ("out3") = 0; -+ register long r8 asm ("r8"); -+ register long r10 asm ("r10"); - register long r15 asm ("r15") = SYS_futex; - - __asm __volatile ("break 0x100000" -- : "=r"(r15), "=r"(out0), "=r"(out1), "=r"(out2), "=r"(out3) -+ : "=r"(r15), "=r"(out0), "=r"(out1), "=r"(out2), "=r"(out3), -+ "=r"(r8), "=r"(r10) - : "r"(r15), "r"(out0), "r"(out1), "r"(out2), "r"(out3) -- : "memory", "r8", "r10", "out4", "out5", "out6", "out7", -+ : "memory", "out4", "out5", "out6", "out7", - /* Non-stacked integer registers, minus r8, r10, r15. */ - "r2", "r3", "r9", "r11", "r12", "r13", "r14", "r16", "r17", "r18", - "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", -@@ -56,16 +57,41 @@ sys_futex0(int *addr, int op, int val) - "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", - /* Branch registers. */ - "b6"); -+ return r8 & r10; - } - - static inline void - futex_wait (int *addr, int val) - { -- sys_futex0 (addr, FUTEX_WAIT, val); -+ long err = sys_futex0 (addr, gomp_futex_wait, val); -+ if (__builtin_expect (err == ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sys_futex0 (addr, gomp_futex_wait, val); -+ } - } - - static inline void - futex_wake (int *addr, int count) - { -- sys_futex0 (addr, FUTEX_WAKE, count); -+ long err = sys_futex0 (addr, gomp_futex_wake, count); -+ if (__builtin_expect (err == ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sys_futex0 (addr, gomp_futex_wake, count); -+ } -+} -+ -+static inline void -+cpu_relax (void) -+{ -+ __asm volatile ("hint @pause" : : : "memory"); -+} -+ -+static inline void -+atomic_write_barrier (void) -+{ -+ __sync_synchronize (); - } ---- libgomp/config/linux/s390/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/s390/futex.h 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Jakub Jelinek <jakub@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -28,10 +28,8 @@ - /* Provide target-specific access to the futex system call. */ - - #include <sys/syscall.h> --#define FUTEX_WAIT 0 --#define FUTEX_WAKE 1 - --static inline void -+static inline long - sys_futex0 (int *addr, int op, int val) - { - register long int gpr2 __asm__ ("2"); -@@ -49,16 +47,41 @@ sys_futex0 (int *addr, int op, int val) - : "i" (SYS_futex), - "0" (gpr2), "d" (gpr3), "d" (gpr4), "d" (gpr5) - : "memory"); -+ return gpr2; - } - - static inline void - futex_wait (int *addr, int val) - { -- sys_futex0 (addr, FUTEX_WAIT, val); -+ long err = sys_futex0 (addr, gomp_futex_wait, val); -+ if (__builtin_expect (err == -ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sys_futex0 (addr, gomp_futex_wait, val); -+ } - } - - static inline void - futex_wake (int *addr, int count) - { -- sys_futex0 (addr, FUTEX_WAKE, count); -+ long err = sys_futex0 (addr, gomp_futex_wake, count); -+ if (__builtin_expect (err == -ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sys_futex0 (addr, gomp_futex_wake, count); -+ } -+} -+ -+static inline void -+cpu_relax (void) -+{ -+ __asm volatile ("" : : : "memory"); -+} -+ -+static inline void -+atomic_write_barrier (void) -+{ -+ __sync_synchronize (); - } ---- libgomp/config/linux/mutex.c.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/mutex.c 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -29,9 +29,10 @@ - mechanism for libgomp. This type is private to the library. This - implementation uses atomic instructions and the futex syscall. */ - --#include "libgomp.h" --#include "futex.h" -+#include "wait.h" - -+long int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; -+long int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; - - void - gomp_mutex_lock_slow (gomp_mutex_t *mutex) -@@ -40,7 +41,7 @@ gomp_mutex_lock_slow (gomp_mutex_t *mute - { - int oldval = __sync_val_compare_and_swap (mutex, 1, 2); - if (oldval != 0) -- futex_wait (mutex, 2); -+ do_wait (mutex, 2); - } - while (!__sync_bool_compare_and_swap (mutex, 0, 2)); - } ---- libgomp/config/linux/sem.c.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/sem.c 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -29,8 +29,7 @@ - mechanism for libgomp. This type is private to the library. This - implementation uses atomic instructions and the futex syscall. */ - --#include "libgomp.h" --#include "futex.h" -+#include "wait.h" - - - void -@@ -44,7 +43,7 @@ gomp_sem_wait_slow (gomp_sem_t *sem) - if (__sync_bool_compare_and_swap (sem, val, val - 1)) - return; - } -- futex_wait (sem, -1); -+ do_wait (sem, -1); - } - } - ---- libgomp/config/linux/powerpc/futex.h.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/powerpc/futex.h 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -28,10 +28,8 @@ - /* Provide target-specific access to the futex system call. */ - - #include <sys/syscall.h> --#define FUTEX_WAIT 0 --#define FUTEX_WAKE 1 - --static inline void -+static inline long - sys_futex0 (int *addr, int op, int val) - { - register long int r0 __asm__ ("r0"); -@@ -50,21 +48,48 @@ sys_futex0 (int *addr, int op, int val) - doesn't. It doesn't much matter for us. In the interest of unity, - go ahead and clobber it always. */ - -- __asm volatile ("sc" -+ __asm volatile ("sc; mfcr %0" - : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6) - : "r"(r0), "r"(r3), "r"(r4), "r"(r5), "r"(r6) - : "r7", "r8", "r9", "r10", "r11", "r12", - "cr0", "ctr", "memory"); -+ if (__builtin_expect (r0 & (1 << 28), 0)) -+ return r3; -+ return 0; - } - - static inline void - futex_wait (int *addr, int val) - { -- sys_futex0 (addr, FUTEX_WAIT, val); -+ long err = sys_futex0 (addr, gomp_futex_wait, val); -+ if (__builtin_expect (err == ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sys_futex0 (addr, gomp_futex_wait, val); -+ } - } - - static inline void - futex_wake (int *addr, int count) - { -- sys_futex0 (addr, FUTEX_WAKE, count); -+ long err = sys_futex0 (addr, gomp_futex_wake, count); -+ if (__builtin_expect (err == ENOSYS, 0)) -+ { -+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG; -+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG; -+ sys_futex0 (addr, gomp_futex_wake, count); -+ } -+} -+ -+static inline void -+cpu_relax (void) -+{ -+ __asm volatile ("" : : : "memory"); -+} -+ -+static inline void -+atomic_write_barrier (void) -+{ -+ __asm volatile ("eieio" : : : "memory"); - } ---- libgomp/config/linux/bar.c.jj 2007-12-07 14:41:00.000000000 +0100 -+++ libgomp/config/linux/bar.c 2008-03-26 15:11:32.000000000 +0100 -@@ -1,4 +1,4 @@ --/* Copyright (C) 2005 Free Software Foundation, Inc. -+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU OpenMP Library (libgomp). -@@ -29,32 +29,29 @@ - mechanism for libgomp. This type is private to the library. This - implementation uses atomic instructions and the futex syscall. */ - --#include "libgomp.h" --#include "futex.h" - #include <limits.h> -+#include "wait.h" - - - void --gomp_barrier_wait_end (gomp_barrier_t *bar, bool last) -+gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) - { -- if (last) -+ if (__builtin_expect ((state & 1) != 0, 0)) - { -- bar->generation++; -- futex_wake (&bar->generation, INT_MAX); -+ /* Next time we'll be awaiting TOTAL threads again. */ -+ bar->awaited = bar->total; -+ atomic_write_barrier (); -+ bar->generation += 2; -+ futex_wake ((int *) &bar->generation, INT_MAX); - } - else - { -- unsigned int generation = bar->generation; -- -- gomp_mutex_unlock (&bar->mutex); -+ unsigned int generation = state; - - do -- futex_wait (&bar->generation, generation); -+ do_wait ((int *) &bar->generation, generation); - while (bar->generation == generation); - } -- -- if (__sync_add_and_fetch (&bar->arrived, -1) == 0) -- gomp_mutex_unlock (&bar->mutex); - } - - void -@@ -62,3 +59,18 @@ gomp_barrier_wait (gomp_barrier_t *barri - { - gomp_barrier_wait_end (barrier, gomp_barrier_wait_start (barrier)); - } -+ -+/* Like gomp_barrier_wait, except that if the encountering thread -+ is not the last one to hit the barrier, it returns immediately. -+ The intended usage is that a thread which intends to gomp_barrier_destroy -+ this barrier calls gomp_barrier_wait, while all other threads -+ call gomp_barrier_wait_last. When gomp_barrier_wait returns, -+ the barrier can be safely destroyed. */ -+ -+void -+gomp_barrier_wait_last (gomp_barrier_t *barrier) -+{ -+ gomp_barrier_state_t state = gomp_barrier_wait_start (barrier); -+ if (state & 1) -+ gomp_barrier_wait_end (barrier, state); -+} |