diff options
-rw-r--r-- | share/man/man9/locking.9 | 31 | ||||
-rw-r--r-- | share/man/man9/rmlock.9 | 185 | ||||
-rw-r--r-- | sys/conf/files | 1 | ||||
-rw-r--r-- | sys/kern/kern_rmlock.c | 500 | ||||
-rw-r--r-- | sys/kern/subr_lock.c | 1 | ||||
-rw-r--r-- | sys/kern/subr_pcpu.c | 5 | ||||
-rw-r--r-- | sys/kern/subr_smp.c | 53 | ||||
-rw-r--r-- | sys/sys/_rmlock.h | 62 | ||||
-rw-r--r-- | sys/sys/lock.h | 1 | ||||
-rw-r--r-- | sys/sys/pcpu.h | 22 | ||||
-rw-r--r-- | sys/sys/rmlock.h | 109 | ||||
-rw-r--r-- | sys/sys/smp.h | 1 |
12 files changed, 946 insertions, 25 deletions
diff --git a/share/man/man9/locking.9 b/share/man/man9/locking.9 index 2328b1b..825eb7f 100644 --- a/share/man/man9/locking.9 +++ b/share/man/man9/locking.9 @@ -53,6 +53,8 @@ Shared-Exclusive locks .It Reader-Writer locks .It +Read-Mostly locks +.It Turnstiles .It Semaphores @@ -163,6 +165,17 @@ This ability should not be used lightly and .Em may go away. Users of recursion in any locks should be prepared to defend their decision against vigorous criticism. +.Ss Rm_locks +Mostly reader locks are similar to +.Em Reader/write +locks but optimized for very infrequent +.Em writer +locking. +.Em rm_lock +locks implement full priority propagation by tracking shared owners +using a lock user supplied +.Em tracker +data structure. .Ss Sx_locks Shared/exclusive locks are used to protect data that are read far more often than they are written. @@ -298,12 +311,13 @@ one of the synchronization primitives discussed here: (someone who knows what they are talking about should write this table) .Bl -column ".Ic xxxxxxxxxxxxxxxxxxxx" ".Xr XXXXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXXX" ".Xr XXXXX" -offset indent .It Xo -.Em "You have: You want:" Ta Spin_mtx Ta Slp_mtx Ta sx_lock Ta rw_lock Ta sleep +.Em "You have: You want:" Ta Spin_mtx Ta Slp_mtx Ta sx_lock Ta rw_lock Ta rm_lock Ta sleep .Xc -.It Ic SPIN mutex Ta \&ok Ta \&no Ta \&no Ta \&no Ta \&no-3 -.It Ic Sleep mutex Ta \&ok Ta \&ok-1 Ta \&no Ta \&ok Ta \&no-3 -.It Ic sx_lock Ta \&ok Ta \&no Ta \&ok-2 Ta \&no Ta \&ok-4 -.It Ic rw_lock Ta \&ok Ta \&ok Ta \&no Ta \&ok-2 Ta \&no-3 +.It Ic SPIN mutex Ta \&ok-1 Ta \&no Ta \&no Ta \&no Ta \&no Ta \&no-3 +.It Ic Sleep mutex Ta \&ok Ta \&ok-1 Ta \&no Ta \&ok Ta \&ok Ta \&no-3 +.It Ic sx_lock Ta \&ok Ta \&ok Ta \&ok-2 Ta \&ok Ta \&ok Ta \&ok-4 +.It Ic rw_lock Ta \&ok Ta \&ok Ta \&no Ta \&ok-2 Ta \&ok Ta \&no-3 +.It Ic rm_lock Ta \&ok Ta \&ok Ta \&no Ta \&ok Ta \&ok-2 Ta \&no .El .Pp .Em *1 @@ -333,16 +347,17 @@ The next table shows what can be used in different contexts. At this time this is a rather easy to remember table. .Bl -column ".Ic Xxxxxxxxxxxxxxxxxxxx" ".Xr XXXXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXXX" ".Xr XXXXX" -offset indent .It Xo -.Em "Context:" Ta Spin_mtx Ta Slp_mtx Ta sx_lock Ta rw_lock Ta sleep +.Em "Context:" Ta Spin_mtx Ta Slp_mtx Ta sx_lock Ta rw_lock Ta rm_lock Ta sleep .Xc -.It interrupt: Ta \&ok Ta \&no Ta \&no Ta \&no Ta \&no -.It idle: Ta \&ok Ta \&no Ta \&no Ta \&no Ta \&no +.It interrupt: Ta \&ok Ta \&no Ta \&no Ta \&no Ta \&no Ta \&no +.It idle: Ta \&ok Ta \&no Ta \&no Ta \&no Ta \&no Ta \&no .El .Sh SEE ALSO .Xr condvar 9 , .Xr lock 9 , .Xr mtx_pool 9 , .Xr mutex 9 , +.Xr rmlock 9 , .Xr rwlock 9 , .Xr sema 9 , .Xr sleep 9 , diff --git a/share/man/man9/rmlock.9 b/share/man/man9/rmlock.9 new file mode 100644 index 0000000..c088f64 --- /dev/null +++ b/share/man/man9/rmlock.9 @@ -0,0 +1,185 @@ +.\" Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> +.\" Copyright (c) 2006 Gleb Smirnoff <glebius@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.\" Based on rwlock.9 man page + +.Dd August 22, 2007 +.Dt RMLOCK 9 +.Os +.Sh NAME +.Nm rmlock , +.Nm rm_init , +.Nm rm_destroy , +.Nm rm_rlock , +.Nm rm_wlock , +.Nm rm_runlock , +.Nm rm_wunlock , +.Nm RM_SYSINIT +.Nd mostly read lock - a kernel reader/writer lock optimized for mostly read access patterns +.Sh SYNOPSIS +.In sys/param.h +.In sys/lock.h +.In sys/rmlock.h +.Ft void +.Fn rm_init "struct rmlock *rm" "const char *name" "int opts" +.Ft void +.Fn rm_destroy "struct rmlock *rm" +.Ft void +.Fn rm_rlock "struct rmlock *rm" "struct rm_priotracker* tracker" +.Ft void +.Fn rm_wlock "struct rmlock *rm" +.Ft void +.Fn rm_runlock "struct rmlock *rm" "struct rm_priotracker* tracker" +.Ft void +.Fn rm_wunlock "struct rmlock *rm" +.Ft int + +.In sys/kernel.h +.Fn RM_SYSINIT "name" "struct rmlock *rm" "const char *desc" "int opts" +.Sh DESCRIPTION +Mostly reader locks allow shared access to protected data by multiple threads, +or exclusive access by a single thread. +The threads with shared access are known as +.Em readers +since they only read the protected data. +A thread with exclusive access is known as a +.Em writer +since it can modify protected data. +.Pp +Read mostly locks are designed to be efficient for locks allmost exclusively used as reader +locks and as such should be used for protecting data that rarely changes. +Acquiring an exclusive lock after the lock had been locked for shared access is an expensive operation. +.Pp +Although reader/writer locks look very similar to +.Xr sx 9 +locks, their usage pattern is different. +Reader/writer locks can be treated as mutexes (see +.Xr mutex 9 ) +with shared/exclusive semantics. +Unlike +.Xr sx 9 , +an +.Nm +can be locked while holding a non-spin mutex, and an +.Nm +cannot be held while sleeping. +The +.Nm +locks have full priority propagation like mutexes. The rm_priotracker structure argument supplied +in rmrlock and rmrunlock is used to keep track of the read owner(s). +Another important property is that shared holders of +.Nm +can recurse if the lock has been initialized with the LO_RECURSABLE option, +however exclusive locks are not allowed to recurse. +.Ss Macros and Functions +.Bl -tag -width indent +.It Fn rm_init "struct rmlock *rm" "const char *name" "int opts" +Initialize structure located at +.Fa rm +as mostly reader lock, described by name +.Fa name . +Optioally allowing readers to recurse by setting LO_RECURSABLE in +.Fa opts +The name description is used solely for debugging purposes. +This function must be called before any other operations +on the lock. +.It Fn rm_rlock "struct rmlock *rm" "struct rm_priotracker* tracker" +Lock +.Fa rm +as a reader. Using +.Fa tracker +to track read owners of a lock for priority propagation. +This data structure is only used internally by rmlock and must persist +until rm_runlock has been called. This data structure can be allocated on the stack since +rmlocks cannot be held while sleeping. +If any thread holds this lock exclusively, the current thread blocks, +and its priority is propagated to the exclusive holder. +If the lock was initialized with the LO_RECURSABLE option the +.Fn rm_rlock +function can be called when the thread has already acquired reader +access on +.Fa rm . +This is called +.Dq "recursing on a lock" . +.It Fn rm_wlock "struct rmlock *rm" +Lock +.Fa rm +as a writer. +If there are any shared owners of the lock, the current thread blocks. +The +.Fn rm_wlock +function cannot be called recursively. +.It Fn rm_runlock "struct rmlock *rm" "struct rm_priotracker* tracker" +This function releases a shared lock previously acquired by +.Fn rm_rlock . +The +.Fa tracker +argument must match the +.Fa tracker +argument used for acquiring the shared lock +.It Fn rm_wunlock "struct rmlock *rm" +This function releases an exclusive lock previously acquired by +.Fn rm_wlock . +.It Fn rm_destroy "struct rmlock *rm" +This functions destroys a lock previously initialized with +.Fn rm_init . +The +.Fa rm +lock must be unlocked. +.El +.El +.Sh SEE ALSO +.Xr locking 9 , +.Xr mutex 9 , +.Xr panic 9 , +.Xr rwlock 9, +.Xr sema 9 , +.Xr sx 9 +.Sh HISTORY +These +functions appeared in +.Fx 7.0 . +.Sh AUTHORS +.An -nosplit +The +.Nm +facility was written by +.An "Stephan Uphoff" . +This manual page was written by +.An "Gleb Smirnoff" +for rwlock and modifed to reflect rmlock by +.An "Stephan Uphoff" . +.Sh BUGS +.Dv Uniprocessor Systems Optimization: +rmlock is not currently optimized for single processor systems + +.Dv Number of rmlocks in the system: +The rmlock implementation uses a single per cpu list shared by all rmlocks in the system. +If rmlocks become popular, hashing to multiple per cpu queues may be needed to speed up +the writer lock process. + +.Dv condvar: The rm lock can currently not be used as a lock argument for condition variable wait functions. diff --git a/sys/conf/files b/sys/conf/files index 6d26a88..f2a5b88 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1445,6 +1445,7 @@ kern/kern_priv.c standard kern/kern_proc.c standard kern/kern_prot.c standard kern/kern_resource.c standard +kern/kern_rmlock.c standard kern/kern_rwlock.c standard kern/kern_sema.c standard kern/kern_shutdown.c standard diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c new file mode 100644 index 0000000..5a91811 --- /dev/null +++ b/sys/kern/kern_rmlock.c @@ -0,0 +1,500 @@ +/*- + * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Machine independent bits of reader/writer lock implementation. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_ddb.h" + +#include <sys/param.h> +#include <sys/systm.h> + +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/rmlock.h> +#include <sys/sched.h> +#include <sys/smp.h> +#include <sys/systm.h> +#include <sys/turnstile.h> +#include <sys/lock_profile.h> +#include <machine/cpu.h> + +#ifdef DDB +#include <ddb/ddb.h> +#endif + +#define RMPF_ONQUEUE 1 +#define RMPF_SIGNAL 2 + +/* + * To support usage of rmlock in CVs and msleep + * yet another list for the priority tracker + * would be needed. + * Using this lock for cv and msleep also does + * not seem very useful + */ + +static __inline void compiler_memory_barrier(void) { + __asm __volatile("":::"memory"); +} + +static void lock_rm(struct lock_object *lock, int how); +static int unlock_rm(struct lock_object *lock); + +struct lock_class lock_class_rm = { + .lc_name = "rm", + .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, +#if 0 +#ifdef DDB + .lc_ddb_show = db_show_rwlock, +#endif +#endif + .lc_lock = lock_rm, + .lc_unlock = unlock_rm, +}; + +static void +lock_rm(struct lock_object *lock, int how) { + panic("lock_rm called"); +} + +static int +unlock_rm(struct lock_object *lock) { + panic("unlock_rm called"); +} + +static struct mtx rm_spinlock; + +MTX_SYSINIT(rm_spinlock, &rm_spinlock, "rm_spinlock", MTX_SPIN); + +/* + * Add or remove tracker from per cpu list. + * The per cpu list can be traversed at any time in forward + * direction from an interrupt on the *local* cpu. + */ + +static void inline +rm_tracker_add(struct pcpu *pc, struct rm_priotracker* tracker) { + struct rm_queue* next; + /* Initialize all tracker pointers */ + tracker->rmp_cpuQueue.rmq_prev = &pc->pc_rm_queue; + next = pc->pc_rm_queue.rmq_next; + tracker->rmp_cpuQueue.rmq_next = next; + /* rmq_prev is not used during froward traversal */ + next->rmq_prev = &tracker->rmp_cpuQueue; + /* Update pointer to first element */ + pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue; +} + + +static void inline +rm_tracker_remove(struct pcpu *pc, struct rm_priotracker* tracker) { + struct rm_queue *next, *prev; + next = tracker->rmp_cpuQueue.rmq_next; + prev = tracker->rmp_cpuQueue.rmq_prev; + /* Not used during forward traversal */ + next->rmq_prev = prev; + /* Remove from list */ + prev->rmq_next = next; +} + + + + +static void rm_cleanIPI(void *arg) { + struct pcpu *pc; + struct rmlock* rm = arg; + struct rm_priotracker *tracker; + struct rm_queue* queue; + pc = pcpu_find(curcpu); + + for(queue = pc->pc_rm_queue.rmq_next; + queue != &pc->pc_rm_queue; + queue = queue->rmq_next) { + tracker = (struct rm_priotracker *) queue; + if(tracker->rmp_rmlock == rm && tracker->rmp_flags == 0 ) { + tracker->rmp_flags = RMPF_ONQUEUE; + mtx_lock_spin(&rm_spinlock); + LIST_INSERT_HEAD(&rm->rm_activeReaders,tracker, + rmp_qentry); + mtx_unlock_spin(&rm_spinlock); + } + } + return; +} + + + +void +rm_init(struct rmlock *rm, const char *name, int opts) +{ + rm->rm_noreadtoken = 1; + LIST_INIT(&rm->rm_activeReaders); + mtx_init(&rm->rm_lock, name, "RM_MTX",MTX_NOWITNESS); + lock_init(&rm->lock_object, &lock_class_rm, name, NULL, (opts & LO_RECURSABLE)| LO_WITNESS); + +} + +void +rm_destroy(struct rmlock *rm) +{ + mtx_destroy(&rm->rm_lock); + lock_destroy(&rm->lock_object); +} + +void +rm_sysinit(void *arg) +{ + struct rm_args *args = arg; + rm_init(args->ra_rm, args->ra_desc, args->ra_opts); +} + + +static void +_rm_rlock_hard(struct rmlock *rm, struct rm_priotracker* tracker) +{ + struct pcpu *pc; + struct rm_queue *queue; + struct rm_priotracker* atracker; + + critical_enter(); + pc = pcpu_find(curcpu); + + /* Check if we just need to do a proper critical_exit */ + if (0 == rm->rm_noreadtoken) { + critical_exit(); + return; + } + + /* Remove our tracker from the per cpu list */ + rm_tracker_remove(pc,tracker); + + /* Check to see if the IPI granted us the lock after all */ + if(tracker->rmp_flags) { + /* Just add back tracker - we hold the lock */ + rm_tracker_add(pc,tracker); + critical_exit(); + return; + } + + + + /* + * We allow readers to aquire a lock even if a writer + * is blocked if the lock is recursive and the reader + * already holds the lock + */ + + if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) { + /* + * Just grand the lock if this thread already have a tracker + * for this lock on the per cpu queue + */ + + for(queue = pc->pc_rm_queue.rmq_next; + queue != &pc->pc_rm_queue; + queue = queue->rmq_next) { + atracker = (struct rm_priotracker *) queue; + if (( atracker->rmp_rmlock == rm) && + ( atracker->rmp_thread == tracker->rmp_thread )) { + mtx_lock_spin(&rm_spinlock); + LIST_INSERT_HEAD(&rm->rm_activeReaders,tracker, + rmp_qentry); + tracker->rmp_flags = RMPF_ONQUEUE; + mtx_unlock_spin(&rm_spinlock); + rm_tracker_add(pc,tracker); + critical_exit(); + return; + } + } + } + + + sched_unpin(); + critical_exit(); + + mtx_lock(&rm->rm_lock); + rm->rm_noreadtoken = 0; + critical_enter(); + + pc = pcpu_find(curcpu); + rm_tracker_add(pc,tracker); + sched_pin(); + critical_exit(); + + mtx_unlock(&rm->rm_lock); + return; +} + +void +_rm_rlock(struct rmlock *rm, struct rm_priotracker* tracker) +{ + struct thread *td = curthread; + struct pcpu *pc; + + tracker->rmp_flags = 0; + tracker->rmp_thread = td; + tracker->rmp_rmlock = rm; + + td->td_critnest++; /* critical_enter(); */ + + compiler_memory_barrier(); + + pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ + + rm_tracker_add(pc,tracker); + + td->td_pinned++; /* sched_pin(); */ + + compiler_memory_barrier(); + + td->td_critnest--; + + /* + * Fast path to combine two common conditions + * into a single conditional jump + */ + + if (0 == (td->td_owepreempt | rm->rm_noreadtoken)) { + return; + } + + /* We do not have a read token and need to acquire one */ + _rm_rlock_hard(rm,tracker); +} + + +static void +_rm_unlock_hard(struct thread *td,struct rm_priotracker* tracker) +{ + + if (td->td_owepreempt) { + td->td_critnest++; + critical_exit(); + } + + if (!tracker->rmp_flags) { + return; + } + + + mtx_lock_spin(&rm_spinlock); + LIST_REMOVE(tracker,rmp_qentry); + + if (tracker->rmp_flags & RMPF_SIGNAL) { + struct rmlock *rm; + struct turnstile* ts; + + rm = tracker->rmp_rmlock; + + turnstile_chain_lock(&rm->lock_object); + mtx_unlock_spin(&rm_spinlock); + + ts = turnstile_lookup(&rm->lock_object); + + turnstile_signal(ts, TS_EXCLUSIVE_QUEUE); + turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); + turnstile_chain_unlock(&rm->lock_object); + + + } else + mtx_unlock_spin(&rm_spinlock); + +} + +void +_rm_runlock(struct rmlock *rm, struct rm_priotracker* tracker) +{ + struct pcpu *pc; + struct thread *td = tracker->rmp_thread; + + td->td_critnest++; /* critical_enter(); */ + pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ + rm_tracker_remove(pc,tracker); + td->td_critnest--; + td->td_pinned--; /* sched_unpin(); */ + + if (0 == (td->td_owepreempt | tracker->rmp_flags)) + return; + + + _rm_unlock_hard(td,tracker); + + +} + + +void +_rm_wlock(struct rmlock *rm) +{ + struct rm_priotracker *prio; + struct turnstile *ts; + + mtx_lock(&rm->rm_lock); + + if (rm->rm_noreadtoken == 0) { + /* Get all read tokens back */ + + rm->rm_noreadtoken = 1; + + /* + * Assumes rm->rm_noreadtoken update is visible + * on other CPUs before rm_cleanIPI is called + */ +#ifdef SMP + smp_rendezvous(smp_no_rendevous_barrier, + rm_cleanIPI, + smp_no_rendevous_barrier + ,rm); + +#else + rm_cleanIPI(rm); +#endif + + + mtx_lock_spin(&rm_spinlock); + + while((prio = LIST_FIRST(&rm->rm_activeReaders)) != NULL) { + ts = turnstile_trywait(&rm->lock_object); + prio->rmp_flags = RMPF_ONQUEUE | RMPF_SIGNAL; + mtx_unlock_spin(&rm_spinlock); + turnstile_wait(ts,prio->rmp_thread, + TS_EXCLUSIVE_QUEUE); + mtx_lock_spin(&rm_spinlock); + + } + + mtx_unlock_spin(&rm_spinlock); + } + +} + + +void +_rm_wunlock(struct rmlock *rm) +{ + mtx_unlock(&rm->rm_lock); +} + + +#ifdef LOCK_DEBUG + +void _rm_wlock_debug(struct rmlock *rm, const char *file, int line) +{ + + + WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, + file, line); + + _rm_wlock(rm); + + LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line); + + WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); + + curthread->td_locks++; + +} + +void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) +{ + curthread->td_locks--; + WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); + LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line); + _rm_wunlock(rm); +} + + +void +_rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, + const char *file, int line) +{ + + + WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER , file, line); + + _rm_rlock(rm, tracker); + + LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, line); + + WITNESS_LOCK(&rm->lock_object, 0 , file, line); + + curthread->td_locks++; +} + +void +_rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, + const char *file, int line) { + curthread->td_locks--; + WITNESS_UNLOCK(&rm->lock_object, 0 , file, line); + LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line); + _rm_runlock(rm, tracker); +} + + + + +#else +/* + * Just strip out file and line arguments if no lock debugging is enabled + * in the kernel - we are called from a kernel module. +*/ + + +void _rm_wlock_debug(struct rmlock *rm, const char *file, int line) +{ + _rm_wlock(rm); +} + +void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) +{ + _rm_wunlock(rm); +} + +void +_rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, + const char *file, int line) +{ + _rm_rlock(rm, tracker); +} + +void +_rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, + const char *file, int line) { + _rm_runlock(rm, tracker); +} + +#endif diff --git a/sys/kern/subr_lock.c b/sys/kern/subr_lock.c index 33e2bdd..ebb3c35 100644 --- a/sys/kern/subr_lock.c +++ b/sys/kern/subr_lock.c @@ -57,6 +57,7 @@ struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = { &lock_class_mtx_spin, &lock_class_mtx_sleep, &lock_class_sx, + &lock_class_rm, &lock_class_rw, &lock_class_lockmgr, }; diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c index bf9b0c7..1109b31 100644 --- a/sys/kern/subr_pcpu.c +++ b/sys/kern/subr_pcpu.c @@ -56,7 +56,7 @@ __FBSDID("$FreeBSD$"); #include <sys/smp.h> #include <ddb/ddb.h> -static struct pcpu *cpuid_to_pcpu[MAXCPU]; +struct pcpu *cpuid_to_pcpu[MAXCPU]; struct cpuhead cpuhead = SLIST_HEAD_INITIALIZER(cpuhead); /* @@ -74,6 +74,9 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) cpuid_to_pcpu[cpuid] = pcpu; SLIST_INSERT_HEAD(&cpuhead, pcpu, pc_allcpu); cpu_pcpu_init(pcpu, cpuid, size); + pcpu->pc_rm_queue.rmq_next = &pcpu->pc_rm_queue; + pcpu->pc_rm_queue.rmq_prev = &pcpu->pc_rm_queue; + } /* diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index a884288..bc30b2a 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -104,10 +104,10 @@ SYSCTL_INT(_kern_smp, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, "Forwarding of roundrobin to all other CPUs"); /* Variables needed for SMP rendezvous. */ -static void (*smp_rv_setup_func)(void *arg); -static void (*smp_rv_action_func)(void *arg); -static void (*smp_rv_teardown_func)(void *arg); -static void *smp_rv_func_arg; +static void (*volatile smp_rv_setup_func)(void *arg); +static void (*volatile smp_rv_action_func)(void *arg); +static void (* volatile smp_rv_teardown_func)(void *arg); +static void * volatile smp_rv_func_arg; static volatile int smp_rv_waiters[3]; /* @@ -286,6 +286,13 @@ restart_cpus(cpumask_t map) return 1; } +void smp_no_rendevous_barrier(void *dummy) +{ +#ifdef SMP + KASSERT((!smp_started),("smp_no_rendevous called and smp is started")); +#endif +} + /* * All-CPU rendezvous. CPUs are signalled, all execute the setup function * (if specified), rendezvous, execute the action function (if specified), @@ -298,33 +305,41 @@ restart_cpus(cpumask_t map) void smp_rendezvous_action(void) { - + void* local_func_arg = smp_rv_func_arg; + void (*local_setup_func)(void*) = smp_rv_setup_func; + void (*local_action_func)(void*) = smp_rv_action_func; + void (*local_teardown_func)(void*) = smp_rv_teardown_func; + /* Ensure we have up-to-date values. */ atomic_add_acq_int(&smp_rv_waiters[0], 1); while (smp_rv_waiters[0] < mp_ncpus) cpu_spinwait(); /* setup function */ - if (smp_rv_setup_func != NULL) - smp_rv_setup_func(smp_rv_func_arg); + if (local_setup_func != smp_no_rendevous_barrier) { + if (smp_rv_setup_func != NULL) + smp_rv_setup_func(smp_rv_func_arg); + /* spin on entry rendezvous */ + atomic_add_int(&smp_rv_waiters[1], 1); + while (smp_rv_waiters[1] < mp_ncpus) + cpu_spinwait(); + } - /* spin on entry rendezvous */ - atomic_add_int(&smp_rv_waiters[1], 1); - while (smp_rv_waiters[1] < mp_ncpus) - cpu_spinwait(); /* action function */ - if (smp_rv_action_func != NULL) - smp_rv_action_func(smp_rv_func_arg); + if (local_action_func != NULL) + local_action_func(local_func_arg); + /* spin on exit rendezvous */ atomic_add_int(&smp_rv_waiters[2], 1); + if (local_teardown_func == smp_no_rendevous_barrier) + return; while (smp_rv_waiters[2] < mp_ncpus) cpu_spinwait(); - /* teardown function */ - if (smp_rv_teardown_func != NULL) - smp_rv_teardown_func(smp_rv_func_arg); + if (local_teardown_func != NULL) + local_teardown_func(local_func_arg); } void @@ -356,12 +371,18 @@ smp_rendezvous(void (* setup_func)(void *), smp_rv_waiters[2] = 0; atomic_store_rel_int(&smp_rv_waiters[0], 0); + + /* signal other processors, which will enter the IPI with interrupts off */ ipi_all_but_self(IPI_RENDEZVOUS); /* call executor function */ smp_rendezvous_action(); + if (teardown_func == smp_no_rendevous_barrier) { + while (atomic_load_acq_int(&smp_rv_waiters[2]) < mp_ncpus) + cpu_spinwait(); + } /* release lock */ mtx_unlock_spin(&smp_ipi_mtx); } diff --git a/sys/sys/_rmlock.h b/sys/sys/_rmlock.h new file mode 100644 index 0000000..e5c68d5 --- /dev/null +++ b/sys/sys/_rmlock.h @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS__RMLOCK_H_ +#define _SYS__RMLOCK_H_ + +/* + * XXXUPS remove as soon as we have per cpu variable + * linker sets and can define rm_queue in _rm_lock.h +*/ +#include <sys/pcpu.h> +/* + * Mostly reader/occasional writer lock. + */ + +LIST_HEAD(rmpriolist,rm_priotracker); + +struct rmlock { + struct lock_object lock_object; + volatile int rm_noreadtoken; + LIST_HEAD(,rm_priotracker) rm_activeReaders; + struct mtx rm_lock; + +}; + +struct rm_priotracker { + struct rm_queue rmp_cpuQueue; /* Must be first */ + struct rmlock *rmp_rmlock; + struct thread *rmp_thread; + int rmp_flags; + LIST_ENTRY(rm_priotracker) rmp_qentry; +}; + +#endif /* !_SYS__RMLOCK_H_ */ diff --git a/sys/sys/lock.h b/sys/sys/lock.h index 6d35ce5..2a9c4aa 100644 --- a/sys/sys/lock.h +++ b/sys/sys/lock.h @@ -222,6 +222,7 @@ extern struct lock_class lock_class_mtx_sleep; extern struct lock_class lock_class_mtx_spin; extern struct lock_class lock_class_sx; extern struct lock_class lock_class_rw; +extern struct lock_class lock_class_rm; extern struct lock_class lock_class_lockmgr; extern struct lock_class *lock_classes[]; diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h index b2fd628..8757fe6 100644 --- a/sys/sys/pcpu.h +++ b/sys/sys/pcpu.h @@ -48,6 +48,15 @@ struct pcb; struct thread; +/* + * XXXUPS remove as soon as we have per cpu variable + * linker sets and can define rm_queue in _rm_lock.h +*/ +struct rm_queue { + struct rm_queue* volatile rmq_next; + struct rm_queue* volatile rmq_prev; +}; + /* * This structure maps out the global data that needs to be kept on a * per-cpu basis. The members are accessed via the PCPU_GET/SET/PTR @@ -74,6 +83,15 @@ struct pcpu { PCPU_MD_FIELDS; struct vmmeter pc_cnt; /* VM stats counters */ struct device *pc_device; + + /* + * Stuff for read mostly lock + * + * XXXUPS remove as soon as we have per cpu variable + * linker sets. + */ + struct rm_queue pc_rm_queue; + }; SLIST_HEAD(cpuhead, pcpu); @@ -92,6 +110,10 @@ extern struct cpuhead cpuhead; * db_show_mdpcpu() is responsible for handling machine dependent * fields for the DDB 'show pcpu' command. */ + +extern struct pcpu *cpuid_to_pcpu[MAXCPU]; + + void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size); void db_show_mdpcpu(struct pcpu *pcpu); diff --git a/sys/sys/rmlock.h b/sys/sys/rmlock.h new file mode 100644 index 0000000..04ccb38 --- /dev/null +++ b/sys/sys/rmlock.h @@ -0,0 +1,109 @@ +/*- + * Copyright (c) 2007 Stephan Uphoff <ups@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_RMLOCK_H_ +#define _SYS_RMLOCK_H_ + +#include <sys/mutex.h> + +#include <sys/_lock.h> +#include <sys/_rmlock.h> + +#ifdef _KERNEL + + +void rm_init(struct rmlock *rm, const char *name, int opts); +void rm_destroy(struct rmlock *rm); +void rm_sysinit(void *arg); + + +void _rm_wlock_debug(struct rmlock *rm, const char *file, int line); +void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line); +void _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, + const char *file, int line); +void _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, + const char *file, int line); + + +void _rm_wlock(struct rmlock *rm); +void _rm_wunlock(struct rmlock *rm); +void _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker); +void _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker); + +/* + * Public interface for lock operations. + * + */ + +#ifndef LOCK_DEBUG +#error LOCK_DEBUG not defined, include <sys/lock.h> before <sys/rmlock.h> +#endif + +#if LOCK_DEBUG > 0 + +#define rm_wlock(rm) _rm_wlock_debug((rm), LOCK_FILE, LOCK_LINE) +#define rm_wunlock(rm) _rm_wunlock_debug((rm), LOCK_FILE, LOCK_LINE) +#define rm_rlock(rm,tracker) \ + _rm_rlock_debug((rm),(tracker), LOCK_FILE, LOCK_LINE ) +#define rm_runlock(rm,tracker) \ + _rm_runlock_debug((rm), (tracker), LOCK_FILE, LOCK_LINE ) + +#else + +#define rm_wlock(rm) _rm_wlock((rm)) +#define rm_wunlock(rm) _rm_wunlock((rm)) +#define rm_rlock(rm,tracker) _rm_rlock((rm),(tracker)) +#define rm_runlock(rm,tracker) _rm_runlock((rm), (tracker)) + +#endif + +#define rm_initialized(rm) lock_initalized(&(rm)->lock_object) + +struct rm_args { + struct rmlock *ra_rm; + const char *ra_desc; + int ra_opts; +}; + +#define RM_SYSINIT(name, rm, desc, opts) \ + static struct rm_args name##_args = { \ + (rm), \ + (desc), \ + (opts), \ + }; \ + SYSINIT(name##_rm_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ + rm_sysinit, &name##_args); \ + SYSUNINIT(name##_rm_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ + rm_destroy, (rm)) + + +#endif /* _KERNEL */ +#endif /* !_SYS_RMLOCK_H_ */ diff --git a/sys/sys/smp.h b/sys/sys/smp.h index f2b2f52..7c79bb9 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -100,6 +100,7 @@ void forward_roundrobin(void); int restart_cpus(cpumask_t); int stop_cpus(cpumask_t); void smp_rendezvous_action(void); +void smp_no_rendevous_barrier(void *); extern struct mtx smp_ipi_mtx; #endif /* SMP */ |