diff options
Diffstat (limited to 'share')
-rw-r--r-- | share/examples/Makefile | 1 | ||||
-rw-r--r-- | share/examples/etc/README.examples | 1 | ||||
-rw-r--r-- | share/examples/etc/make.conf | 4 | ||||
-rw-r--r-- | share/man/man4/Makefile | 1 | ||||
-rw-r--r-- | share/man/man4/cc_cdg.4 | 155 | ||||
-rw-r--r-- | share/man/man4/oce.4 | 2 | ||||
-rw-r--r-- | share/man/man4/virtio.4 | 6 | ||||
-rw-r--r-- | share/man/man4/virtio_balloon.4 | 2 | ||||
-rw-r--r-- | share/man/man4/virtio_blk.4 | 32 | ||||
-rw-r--r-- | share/man/man4/virtio_scsi.4 | 2 | ||||
-rw-r--r-- | share/man/man4/vtnet.4 | 2 | ||||
-rw-r--r-- | share/man/man5/fstab.5 | 2 | ||||
-rw-r--r-- | share/man/man5/src.conf.5 | 13 | ||||
-rw-r--r-- | share/man/man9/locking.9 | 376 | ||||
-rw-r--r-- | share/misc/committers-src.dot | 2 | ||||
-rw-r--r-- | share/mk/bsd.libnames.mk | 1 |
16 files changed, 418 insertions, 184 deletions
diff --git a/share/examples/Makefile b/share/examples/Makefile index 90bef45..04f53d6 100644 --- a/share/examples/Makefile +++ b/share/examples/Makefile @@ -240,6 +240,7 @@ etc-examples: .endif .if ${SHARED} != "symlinks" +SUBDIR= smbfs .if ${MK_ATF} != "no" SUBDIR+=atf .endif diff --git a/share/examples/etc/README.examples b/share/examples/etc/README.examples index d703edb..f7bf4ce 100644 --- a/share/examples/etc/README.examples +++ b/share/examples/etc/README.examples @@ -40,6 +40,7 @@ netstart - network startup script run from /etc/rc network.subr - routines for network configuration scripts networks - see networks(5) newsyslog.conf - configuration for system log file rotator newsyslog(8) +nsmb.conf - smbfs lookups configuration file opieaccess - OPIE database of trusted networks pf.conf - pf(4) example configuration file pf.os - SYN fingerprint database diff --git a/share/examples/etc/make.conf b/share/examples/etc/make.conf index 1a1c43c..19be713 100644 --- a/share/examples/etc/make.conf +++ b/share/examples/etc/make.conf @@ -38,8 +38,8 @@ # pentium3m, pentium3, pentium-m, pentium2, # pentiumpro, pentium-mmx, pentium, i486 # (VIA CPUs) c7, c3-2, c3 -# AMD64 architecture: opteron-sse3, athlon64-sse3, k8-sse3, opteron, -# athlon64, k8, core2, nocona +# AMD64 architecture: amdfam10, opteron-sse3, athlon64-sse3, k8-sse3, +# opteron, athlon64, k8, core2, nocona # Intel ia64 architecture: itanium2, itanium # SPARC-V9 architecture: v9 (generic 64-bit V9), ultrasparc (default # if omitted), ultrasparc3 diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 523d4a9..e7f5f4c 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -81,6 +81,7 @@ MAN= aac.4 \ cardbus.4 \ carp.4 \ cas.4 \ + cc_cdg.4 \ cc_chd.4 \ cc_cubic.4 \ cc_hd.4 \ diff --git a/share/man/man4/cc_cdg.4 b/share/man/man4/cc_cdg.4 new file mode 100644 index 0000000..c111bd4 --- /dev/null +++ b/share/man/man4/cc_cdg.4 @@ -0,0 +1,155 @@ +.\" +.\" Copyright (c) 2013 Swinburne University of Technology, Melbourne, Australia +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR +.\" ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd July 2, 2013 +.Dt CC_CDG 4 +.Os +.Sh NAME +.Nm cc_cdg +.Nd CDG Congestion Control Algorithm +.Sh DESCRIPTION +CAIA-Delay Gradient (CDG) is a hybrid congestion control algorithm which reacts +to both packet loss and inferred queuing delay. +It attempts to operate as a delay-based algorithm where possible, but utilises +heuristics to detect loss-based TCP cross traffic and will compete effectively +as required. +CDG is therefore incrementally deployable and suitable for use on shared +networks. +.Pp +During delay-based operation, CDG uses a delay-gradient based probabilistic +backoff mechanism, and will also try to infer non congestion related +packet losses and avoid backing off when they occur. +During loss-based operation, CDG essentially reverts to +.Xr cc_newreno 4 Ns - Ns like +behaviour. +.Pp +CDG switches to loss-based operation when it detects that a configurable number +of consecutive delay-based backoffs have had no measurable effect. +It periodically attempts to return to delay-based operation, but will keep +switching back to loss-based operation as required. +.Sh MIB Variables +The algorithm exposes the following variables in the +.Va net.inet.tcp.cc.cdg +branch of the +.Xr sysctl 3 +MIB: +.Bl -tag -width ".Va exp_backoff_scale" +.It Va version +Current algorithm/implementation version number. +.It Va beta_delay +Delay-based window decrease factor as a percentage (on delay-based backoff, w = +w * beta_delay / 100). +Default is 70. +.It Va beta_loss +Loss-based window decrease factor as a percentage (on loss-based backoff, w = +w * beta_loss / 100). +Default is 50. +.It Va exp_backoff_scale +Scaling parameter for the probabilistic exponential backoff. +Default is 2. +.It Va smoothing_factor +Number of samples used for moving average smoothing (0 means no smoothing). +Default is 8. +.It Va loss_compete_consec_cong +Number of consecutive delay-gradient based congestion episodes which will +trigger loss-based CC compatibility. +Default is 5. +.It Va loss_compete_hold_backoff +Number of consecutive delay-gradient based congestion episodes to hold the +window backoff for loss-based CC compatibility. +Default is 5. +.It Va alpha_inc +If non-zero, this enables an experimental mode where CDG's window increase +factor (alpha) is increased by 1 MSS every +.Va alpha_inc +RTTs during congestion avoidance mode. +(Setting +.Va alpha_inc +to 1 results in the most aggressive growth of the window increase factor over +time. +Use higher +.Va alpha_inc +values for slower growth.) +Default is 0. +.El +.Sh SEE ALSO +.Xr cc_chd 4 , +.Xr cc_cubic 4 , +.Xr cc_hd 4 , +.Xr cc_htcp 4 , +.Xr cc_newreno 4 , +.Xr cc_vegas 4 , +.Xr h_ertt 4 , +.Xr mod_cc 4 , +.Xr tcp 4 , +.Xr khelp 9 , +.Xr mod_cc 9 +.Rs +.%A "D. A. Hayes" +.%A "G. Armitage" +.%T "Revisiting TCP Congestion Control using Delay Gradients" +.%J "Networking 2011 Proceedings, Part II" +.%D "May 2011" +.%P "328-341" +.Re +.Rs +.%A "N. Khademi" +.%A "G. Armitage" +.%T "Minimising RTT across homogeneous 802.11 WLANs with CAIA Delay-Gradient TCP (v0.1)" +.%R "CAIA Technical Report 121113A" +.%D "November 2012" +.%U "http://caia.swin.edu.au/reports/121113A/CAIA-TR-121113A.pdf" +.Re +.Sh ACKNOWLEDGEMENTS +Development and testing of this software were made possible in part by grants +from the FreeBSD Foundation and The Cisco University Research Program Fund, a +corporate advised fund of Silicon Valley Community Foundation. +.Sh HISTORY +The +.Nm +congestion control module first appeared in +.Fx 9.2 . +.Pp +The module was first released in 2011 by David Hayes whilst working on the +NewTCP research project at Swinburne University of Technology's Centre for +Advanced Internet Architectures, Melbourne, Australia. +More details are available at: +.Pp +http://caia.swin.edu.au/urp/newtcp/ +.Sh AUTHORS +.An -nosplit +The +.Nm +congestion control module was written by +.An David Hayes Aq david.hayes@ieee.org . +This manual page was written by +.An Lawrence Stewart Aq lstewart@FreeBSD.org +and +.An Grenville Armitage Aq garmitage@swin.edu.au . +.Sh BUGS +The underlying algorithm and parameter values are still a work in progress and +may not be optimal for some network scenarios. diff --git a/share/man/man4/oce.4 b/share/man/man4/oce.4 index 78043ff..ee3b33b 100644 --- a/share/man/man4/oce.4 +++ b/share/man/man4/oce.4 @@ -91,7 +91,7 @@ Firmware can be updated by following the steps below: .It Copy the below code to a Makefile: .Bd -literal -offset indent -\&.KMOD=elxflash +KMOD=elxflash FIRMWS=imagename.ufi:elxflash \&.include <bsd.kmod.mk> .Ed diff --git a/share/man/man4/virtio.4 b/share/man/man4/virtio.4 index 6856f69..10ebf20 100644 --- a/share/man/man4/virtio.4 +++ b/share/man/man4/virtio.4 @@ -53,10 +53,10 @@ This emulation is often inefficient. .Pp VirtIO defines an interface for efficient I/O between the hypervisor and VM. The -.Xr virtio 4 +.Nm module provides a shared memory transport called a virtqueue. The -.Xr virtio_pci 4 +.Xr virtio_pci device driver represents an emulated PCI device that the hypervisor makes available to the VM. This device provides the probing, configuration, and @@ -94,4 +94,4 @@ Support for VirtIO first appeared in .An -nosplit .Fx support for VirtIO was first added by -.An Bryan Venteicher Aq bryanv@daemoninthecloset.org . +.An Bryan Venteicher Aq bryanv@FreeBSD.org . diff --git a/share/man/man4/virtio_balloon.4 b/share/man/man4/virtio_balloon.4 index 96e8c73..450b5cd 100644 --- a/share/man/man4/virtio_balloon.4 +++ b/share/man/man4/virtio_balloon.4 @@ -59,6 +59,6 @@ The hypervisor can later signal the balloon to return the memory. The .Nm driver was written by -.An Bryan Venteicher Aq bryanv@daemoninthecloset.org . +.An Bryan Venteicher Aq bryanv@FreeBSD.org . It first appeared in .Fx 9.0 . diff --git a/share/man/man4/virtio_blk.4 b/share/man/man4/virtio_blk.4 index 7be4425..eb5fded 100644 --- a/share/man/man4/virtio_blk.4 +++ b/share/man/man4/virtio_blk.4 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 22, 2012 +.Dd July 2, 2013 .Dt VIRTIO_BLK 4 .Os .Sh NAME @@ -53,11 +53,33 @@ Tunables can be set at the .Xr loader 8 prompt before booting the kernel or stored in .Xr loader.conf 5 . -.Bl -tag -width "xxxxxx" +.Bl -tag -width indent .It Va hw.vtblk.no_ident -This tunable disables retrieving the device identification string -from the hypervisor. +.It Va hw.vtblk. Ns Ar X Ns Va .no_ident +.Pp +These tunables disable retrieving the device identification string +from the hypervisor either globally or per-device. The default value is 0. +.It Va hw.vtblk.writecache_mode +.It Va hw.vtblk. Ns Ar X Ns Va .writecache_mode +.Pp +These tunables determine the write cache mode globally or per-device. +The mode can changed only if the ConfigWCE feature is negotiated. +Set to 0 for writethrough mode, 1 for writeback mode, and -1 to leave +it as-is. +The default value is to leave as-is. +.El +.Sh SYSCTL VARIABLES +The following variables are available as +.Xr sysctl 8 +variables. +.Bl -tag -width indent +.It Va dev.vtblk. Ns Ar X Ns Va .writecache_mode +.Pp +The write cache mode of the device can be either writethrough (0) or +writeback (1). +If the ConfigWCE feature is negotiated, the write cache mode can +be toggled between writethrough and writeback. .El .Sh SEE ALSO .Xr virtio 4 @@ -65,6 +87,6 @@ The default value is 0. The .Nm driver was written by -.An Bryan Venteicher Aq bryanv@daemoninthecloset.org . +.An Bryan Venteicher Aq bryanv@FreeBSD.org . It first appeared in .Fx 9.0 . diff --git a/share/man/man4/virtio_scsi.4 b/share/man/man4/virtio_scsi.4 index a2d30d4..b136de6 100644 --- a/share/man/man4/virtio_scsi.4 +++ b/share/man/man4/virtio_scsi.4 @@ -87,6 +87,6 @@ Enable tracing prints. The .Nm driver was written by -.An Bryan Venteicher Aq bryanv@daemoninthecloset.org . +.An Bryan Venteicher Aq bryanv@FreeBSD.org . It first appeared in .Fx 10.0 . diff --git a/share/man/man4/vtnet.4 b/share/man/man4/vtnet.4 index febb0ac..8d4d202 100644 --- a/share/man/man4/vtnet.4 +++ b/share/man/man4/vtnet.4 @@ -89,7 +89,7 @@ The default value is 0. The .Nm driver was written by -.An Bryan Venteicher Aq bryanv@daemoninthecloset.org . +.An Bryan Venteicher Aq bryanv@FreeBSD.org . It first appeared in .Fx 9.0 . .Sh CAVEATS diff --git a/share/man/man5/fstab.5 b/share/man/man5/fstab.5 index 7daacf2..27b2ec7 100644 --- a/share/man/man5/fstab.5 +++ b/share/man/man5/fstab.5 @@ -231,7 +231,7 @@ is an .Xr md 4 device file .Pq Do md Dc or Do md[0-9]* Dc -and +and .Dq file is specified in .Fa fs_mntopts , diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index a7759c3f..3547748 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,7 +1,7 @@ .\" DO NOT EDIT-- this file is automatically generated. .\" from FreeBSD: head/tools/build/options/makeman 251685 2013-06-13 13:05:08Z emaste .\" $FreeBSD$ -.Dd June 16, 2013 +.Dd July 3, 2013 .Dt SRC.CONF 5 .Os .Sh NAME @@ -1055,6 +1055,17 @@ Set to not build kernel modules that include sourceless microcode. .It Va WITHOUT_SSP .\" from FreeBSD: head/tools/build/options/WITHOUT_SSP 180012 2008-06-25 21:33:28Z ru Set to not build world with propolice stack smashing protection. +.It Va WITH_SVN +.\" from FreeBSD: head/tools/build/options/WITH_SVN 252561 2013-07-03 12:36:47Z zeising +Set to install +.Xr svnlite 1 +as +.Xr svn 1 . +.It Va WITHOUT_SVNLITE +.\" from FreeBSD: head/tools/build/options/WITHOUT_SVNLITE 252561 2013-07-03 12:36:47Z zeising +Set to not build +.Xr svnlite 1 +and related programs. .It Va WITHOUT_SYMVER .\" from FreeBSD: head/tools/build/options/WITHOUT_SYMVER 169649 2007-05-17 05:03:24Z deischen Set to disable symbol versioning when building shared libraries. diff --git a/share/man/man9/locking.9 b/share/man/man9/locking.9 index 046ce10..f8ac5f2 100644 --- a/share/man/man9/locking.9 +++ b/share/man/man9/locking.9 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 22, 2013 +.Dd June 30, 2013 .Dt LOCKING 9 .Os .Sh NAME @@ -33,53 +33,51 @@ .Sh DESCRIPTION The .Em FreeBSD -kernel is written to run across multiple CPUs and as such requires -several different synchronization primitives to allow the developers -to safely access and manipulate the many data types required. +kernel is written to run across multiple CPUs and as such provides +several different synchronization primitives to allow developers +to safely access and manipulate many data types. .Ss Mutexes -Mutexes (also erroneously called "sleep mutexes") are the most commonly used +Mutexes (also called "blocking mutexes") are the most commonly used synchronization primitive in the kernel. A thread acquires (locks) a mutex before accessing data shared with other threads (including interrupt threads), and releases (unlocks) it afterwards. If the mutex cannot be acquired, the thread requesting it will wait. -Mutexes are by default adaptive, meaning that +Mutexes are adaptive by default, meaning that if the owner of a contended mutex is currently running on another CPU, -then a thread attempting to acquire the mutex will briefly spin -in the hope that the owner is only briefly holding it, -and might release it shortly. -If the owner does not do so, the waiting thread proceeds to yield the processor, -allowing other threads to run. -If the owner is not currently actually running then the spin step is skipped. +then a thread attempting to acquire the mutex will spin rather than yielding +the processor. Mutexes fully support priority propagation. .Pp See .Xr mutex 9 for details. -.Ss Spin mutexes -Spin mutexes are variation of basic mutexes; the main difference between -the two is that spin mutexes never yield the processor - instead, they spin, -waiting for the thread holding the lock, -(which must be running on another CPU), to release it. -Spin mutexes disable interrupts while the held so as to not get pre-empted. -Since disabling interrupts is expensive, they are also generally slower. -Spin mutexes should be used only when necessary, e.g. to protect data shared +.Ss Spin Mutexes +Spin mutexes are a variation of basic mutexes; the main difference between +the two is that spin mutexes never block. +Instead, they spin while waiting for the lock to be released. +To avoid deadlock, a thread that holds a spin mutex must never yield its CPU. +Unlike ordinary mutexes, spin mutexes disable interrupts when acquired. +Since disabling interrupts can be expensive, they are generally slower to +acquire and release. +Spin mutexes should be used only when absolutely necessary, +e.g. to protect data shared with interrupt filter code (see .Xr bus_setup_intr 9 -for details). -.Ss Pool mutexes -With most synchronization primitives, such as mutexes, programmer must -provide a piece of allocated memory to hold the primitive. +for details), +or for scheduler internals. +.Ss Mutex Pools +With most synchronization primitives, such as mutexes, the programmer must +provide memory to hold the primitive. For example, a mutex may be embedded inside the structure it protects. -Pool mutex is a variant of mutex without this requirement - to lock or unlock -a pool mutex, one uses address of the structure being protected with it, -not the mutex itself. -Pool mutexes are seldom used. +Mutex pools provide a preallocated set of mutexes to avoid this +requirement. +Note that mutexes from a pool may only be used as leaf locks. .Pp See .Xr mtx_pool 9 for details. -.Ss Reader/writer locks -Reader/writer locks allow shared access to protected data by multiple threads, +.Ss Reader/Writer Locks +Reader/writer locks allow shared access to protected data by multiple threads or exclusive access by a single thread. The threads with shared access are known as .Em readers @@ -91,26 +89,16 @@ since it may modify protected data. Reader/writer locks can be treated as mutexes (see above and .Xr mutex 9 ) with shared/exclusive semantics. -More specifically, regular mutexes can be -considered to be equivalent to a write-lock on an -.Em rw_lock. -The -.Em rw_lock -locks have priority propagation like mutexes, but priority -can be propagated only to an exclusive holder. +Reader/writer locks support priority propagation like mutexes, +but priority is propagated only to an exclusive holder. This limitation comes from the fact that shared owners are anonymous. -Another important property is that shared holders of -.Em rw_lock -can recurse, but exclusive locks are not allowed to recurse. -This ability should not be used lightly and -.Em may go away. .Pp See .Xr rwlock 9 for details. -.Ss Read-mostly locks -Mostly reader locks are similar to +.Ss Read-Mostly Locks +Read-mostly locks are similar to .Em reader/writer locks but optimized for very infrequent write locking. .Em Read-mostly @@ -122,21 +110,41 @@ data structure. See .Xr rmlock 9 for details. +.Ss Sleepable Read-Mostly Locks +Sleepable read-mostly locks are a variation on read-mostly locks. +Threads holding an exclusive lock may sleep, +but threads holding a shared lock may not. +Priority is propagated to shared owners but not to exclusive owners. .Ss Shared/exclusive locks Shared/exclusive locks are similar to reader/writer locks; the main difference -between them is that shared/exclusive locks may be held during unbounded sleep -(and may thus perform an unbounded sleep). -They are inherently less efficient than mutexes, reader/writer locks -and read-mostly locks. -They do not support priority propagation. -They should be considered to be closely related to -.Xr sleep 9 . -They could in some cases be -considered a conditional sleep. +between them is that shared/exclusive locks may be held during unbounded sleep. +Acquiring a contested shared/exclusive lock can perform an unbounded sleep. +These locks do not support priority propagation. .Pp See .Xr sx 9 for details. +.Ss Lockmanager locks +Lockmanager locks are sleepable shared/exclusive locks used mostly in +.Xr VFS 9 +.Po +as a +.Xr vnode 9 +lock +.Pc +and in the buffer cache +.Po +.Xr BUF_LOCK 9 +.Pc . +They have features other lock types do not have such as sleep +timeouts, blocking upgrades, +writer starvation avoidance, draining, and an interlock mutex, +but this makes them complicated both to use and to implement; +for this reason, they should be avoided. +.Pp +See +.Xr lock 9 +for details. .Ss Counting semaphores Counting semaphores provide a mechanism for synchronizing access to a pool of resources. @@ -149,43 +157,21 @@ See .Xr sema 9 for details. .Ss Condition variables -Condition variables are used in conjunction with mutexes to wait for -conditions to occur. -A thread must hold the mutex before calling the -.Fn cv_wait* , +Condition variables are used in conjunction with locks to wait for +a condition to become true. +A thread must hold the associated lock before calling one of the +.Fn cv_wait , functions. -When a thread waits on a condition, the mutex -is atomically released before the thread yields the processor, -then reacquired before the function call returns. +When a thread waits on a condition, the lock +is atomically released before the thread yields the processor +and reacquired before the function call returns. +Condition variables may be used with blocking mutexes, +reader/writer locks, read-mostly locks, and shared/exclusive locks. .Pp See .Xr condvar 9 for details. -.Ss Giant -Giant is an instance of a mutex, with some special characteristics: -.Bl -enum -.It -It is recursive. -.It -Drivers can request that Giant be locked around them -by not marking themselves MPSAFE. -Note that infrastructure to do this is slowly going away as non-MPSAFE -drivers either became properly locked or disappear. -.It -Giant must be locked first before other locks. -.It -It is OK to hold Giant while performing unbounded sleep; in such case, -Giant will be dropped before sleeping and picked up after wakeup. -.It -There are places in the kernel that drop Giant and pick it back up -again. -Sleep locks will do this before sleeping. -Parts of the network or VM code may do this as well, depending on the -setting of a sysctl. -This means that you cannot count on Giant keeping other code from -running if your code sleeps, even if you want it to. -.El -.Ss Sleep/wakeup +.Ss Sleep/Wakeup The functions .Fn tsleep , .Fn msleep , @@ -194,7 +180,12 @@ The functions .Fn wakeup , and .Fn wakeup_one -handle event-based thread blocking. +also handle event-based thread blocking. +Unlike condition variables, +arbitrary addresses may be used as wait channels and a dedicated +structure does not need to be allocated. +However, care must be taken to ensure that wait channel addresses are +unique to an event. If a thread must wait for an external event, it is put to sleep by .Fn tsleep , .Fn msleep , @@ -214,9 +205,10 @@ the thread is being put to sleep. All threads sleeping on a single .Fa chan are woken up later by -.Fn wakeup , -often called from inside an interrupt routine, to indicate that the -resource the thread was blocking on is available now. +.Fn wakeup +.Pq often called from inside an interrupt routine +to indicate that the +event the thread was blocking on has occurred. .Pp Several of the sleep functions including .Fn msleep , @@ -232,122 +224,170 @@ includes the flag, then the lock will not be reacquired before returning. The lock is used to ensure that a condition can be checked atomically, and that the current thread can be suspended without missing a -change to the condition, or an associated wakeup. +change to the condition or an associated wakeup. In addition, all of the sleep routines will fully drop the .Va Giant mutex -(even if recursed) +.Pq even if recursed while the thread is suspended and will reacquire the .Va Giant -mutex before the function returns. +mutex +.Pq restoring any recursion +before the function returns. .Pp -See -.Xr sleep 9 -for details. -.Ss Lockmanager locks -Shared/exclusive locks, used mostly in -.Xr VFS 9 , -in particular as a -.Xr vnode 9 -lock. -They have features other lock types do not have, such as sleep timeout, -writer starvation avoidance, draining, and interlock mutex, but this makes them -complicated to implement; for this reason, they are deprecated. +The +.Fn pause +function is a special sleep function that waits for a specified +amount of time to pass before the thread resumes execution. +This sleep cannot be terminated early by either an explicit +.Fn wakeup +or a signal. .Pp See -.Xr lock 9 +.Xr sleep 9 for details. +.Ss Giant +Giant is a special mutex used to protect data structures that do not +yet have their own locks. +Since it provides semantics akin to the old +.Xr spl 9 +interface, +Giant has special characteristics: +.Bl -enum +.It +It is recursive. +.It +Drivers can request that Giant be locked around them +by not marking themselves MPSAFE. +Note that infrastructure to do this is slowly going away as non-MPSAFE +drivers either became properly locked or disappear. +.It +Giant must be locked before other non-sleepable locks. +.It +Giant is dropped during unbounded sleeps and reacquired after wakeup. +.It +There are places in the kernel that drop Giant and pick it back up +again. +Sleep locks will do this before sleeping. +Parts of the network or VM code may do this as well. +This means that you cannot count on Giant keeping other code from +running if your code sleeps, even if you want it to. +.El .Sh INTERACTIONS -The primitives interact and have a number of rules regarding how +The primitives can interact and have a number of rules regarding how they can and can not be combined. -Many of these rules are checked using the -.Xr witness 4 -code. -.Ss Bounded vs. unbounded sleep -The following primitives perform bounded sleep: - mutexes, pool mutexes, reader/writer locks and read-mostly locks. +Many of these rules are checked by +.Xr witness 4 . +.Ss Bounded vs. Unbounded Sleep +In a bounded sleep +.Po also referred to as +.Dq blocking +.Pc +the only resource needed to resume execution of a thread +is CPU time for the owner of a lock that the thread is waiting to acquire. +In an unbounded sleep +.Po +often referred to as simply +.Dq sleeping +.Pc +a thread waits for an external event or for a condition +to become true. +In particular, +a dependency chain of threads in bounded sleeps should always make forward +progress, +since there is always CPU time available. +This requires that no thread in a bounded sleep is waiting for a lock held +by a thread in an unbounded sleep. +To avoid priority inversions, +a thread in a bounded sleep lends its priority to the owner of the lock +that it is waiting for. .Pp -The following primitives may perform an unbounded sleep: -shared/exclusive locks, counting semaphores, condition variables, sleep/wakeup and lockmanager locks. +The following primitives perform bounded sleeps: +mutexes, reader/writer locks and read-mostly locks. .Pp +The following primitives perform unbounded sleeps: +sleepable read-mostly locks, shared/exclusive locks, lockmanager locks, +counting semaphores, condition variables, and sleep/wakeup. +.Ss General Principles +.Bl -bullet +.It It is an error to do any operation that could result in yielding the processor while holding a spin mutex. +.It +It is an error to do any operation that could result in unbounded sleep +while holding any primitive from the 'bounded sleep' group. +For example, it is an error to try to acquire a shared/exclusive lock while +holding a mutex, or to try to allocate memory with M_WAITOK while holding a +reader/writer lock. .Pp -As a general rule, it is an error to do any operation that could result -in unbounded sleep while holding any primitive from the 'bounded sleep' group. -For example, it is an error to try to acquire shared/exclusive lock while -holding mutex, or to try to allocate memory with M_WAITOK while holding -read-write lock. -.Pp -As a special case, it is possible to call +Note that the lock passed to one of the .Fn sleep or -.Fn mtx_sleep -while holding a single mutex. -It will atomically drop that mutex and reacquire it as part of waking up. -This is often a bad idea because it generally relies on the programmer having -good knowledge of all of the call graph above the place where -.Fn mtx_sleep -is being called and assumptions the calling code has made. -Because the lock gets dropped during sleep, one must re-test all -the assumptions that were made before, all the way up the call graph to the -place where the lock was acquired. -.Pp +.Fn cv_wait +functions is dropped before the thread enters the unbounded sleep and does +not violate this rule. +.It It is an error to do any operation that could result in yielding of the processor when running inside an interrupt filter. -.Pp +.It It is an error to do any operation that could result in unbounded sleep when running inside an interrupt thread. +.El .Ss Interaction table The following table shows what you can and can not do while holding -one of the synchronization primitives discussed: -.Bl -column ".Ic xxxxxxxxxxxxxxxx" ".Xr XXXXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXX" -offset indent -.It Em " You want:" Ta spin-mtx Ta mutex Ta rwlock Ta rmlock Ta sx Ta sleep -.It Em "You have: " Ta ------ Ta ------ Ta ------ Ta ------ Ta ------ Ta ------ -.It spin mtx Ta \&ok-1 Ta \&no Ta \&no Ta \&no Ta \&no Ta \&no-3 -.It mutex Ta \&ok Ta \&ok-1 Ta \&ok Ta \&ok Ta \&no Ta \&no-3 -.It rwlock Ta \&ok Ta \&ok Ta \&ok-2 Ta \&ok Ta \&no Ta \&no-3 -.It rmlock Ta \&ok Ta \&ok Ta \&ok Ta \&ok-2 Ta \&no-5 Ta \&no-5 -.It sx Ta \&ok Ta \&ok Ta \&ok Ta \&ok Ta \&no-2 Ta \&ok-4 +one of the locking primitives discussed. Note that +.Dq sleep +includes +.Fn sema_wait , +.Fn sema_timedwait , +any of the +.Fn cv_wait +functions, +and any of the +.Fn sleep +functions. +.Bl -column ".Ic xxxxxxxxxxxxxxxx" ".Xr XXXXXXXXX" ".Xr XXXXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXXXXX" ".Xr XXXXXX" -offset 3n +.It Em " You want:" Ta spin mtx Ta mutex/rw Ta rmlock Ta sleep rm Ta sx/lk Ta sleep +.It Em "You have: " Ta -------- Ta -------- Ta ------ Ta -------- Ta ------ Ta ------ +.It spin mtx Ta \&ok Ta \&no Ta \&no Ta \&no Ta \&no Ta \&no-1 +.It mutex/rw Ta \&ok Ta \&ok Ta \&ok Ta \&no Ta \&no Ta \&no-1 +.It rmlock Ta \&ok Ta \&ok Ta \&ok Ta \&no Ta \&no Ta \&no-1 +.It sleep rm Ta \&ok Ta \&ok Ta \&ok Ta \&ok-2 Ta \&ok-2 Ta \&ok-2/3 +.It sx Ta \&ok Ta \&ok Ta \&ok Ta \&ok Ta \&ok Ta \&ok-3 +.It lockmgr Ta \&ok Ta \&ok Ta \&ok Ta \&ok Ta \&ok Ta \&ok .El .Pp .Em *1 -Recursion is defined per lock. -Lock order is important. +There are calls that atomically release this primitive when going to sleep +and reacquire it on wakeup +.Po +.Fn mtx_sleep , +.Fn rw_sleep , +.Fn msleep_spin , +etc. +.Pc . .Pp .Em *2 -Readers can recurse though writers can not. -Lock order is important. +These cases are only allowed while holding a write lock on a sleepable +read-mostly lock. .Pp .Em *3 -There are calls that atomically release this primitive when going to sleep -and reacquire it on wakeup (e.g. -.Fn mtx_sleep , -.Fn rw_sleep -and -.Fn msleep_spin ) . -.Pp -.Em *4 -Though one can sleep holding an sx lock, one can also use -.Fn sx_sleep -which will atomically release this primitive when going to sleep and +Though one can sleep while holding this lock, +one can also use a +.Fn sleep +function to atomically release this primitive when going to sleep and reacquire it on wakeup. .Pp -.Em *5 -.Em Read-mostly -locks can be initialized to support sleeping while holding a write lock. -See -.Xr rmlock 9 -for details. +Note that non-blocking try operations on locks are always permitted. .Ss Context mode table The next table shows what can be used in different contexts. At this time this is a rather easy to remember table. -.Bl -column ".Ic Xxxxxxxxxxxxxxxxxxx" ".Xr XXXXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXX" -offset indent -.It Em "Context:" Ta spin mtx Ta mutex Ta sx Ta rwlock Ta rmlock Ta sleep +.Bl -column ".Ic Xxxxxxxxxxxxxxxxxxx" ".Xr XXXXXXXXX" ".Xr XXXXXXXXX" ".Xr XXXXXXX" ".Xr XXXXXXXXX" ".Xr XXXXXX" -offset 3n +.It Em "Context:" Ta spin mtx Ta mutex/rw Ta rmlock Ta sleep rm Ta sx/lk Ta sleep .It interrupt filter: Ta \&ok Ta \&no Ta \&no Ta \&no Ta \&no Ta \&no -.It interrupt thread: Ta \&ok Ta \&ok Ta \&no Ta \&ok Ta \&ok Ta \&no -.It callout: Ta \&ok Ta \&ok Ta \&no Ta \&ok Ta \&no Ta \&no -.It syscall: Ta \&ok Ta \&ok Ta \&ok Ta \&ok Ta \&ok Ta \&ok +.It interrupt thread: Ta \&ok Ta \&ok Ta \&ok Ta \&no Ta \&no Ta \&no +.It callout: Ta \&ok Ta \&ok Ta \&ok Ta \&no Ta \&no Ta \&no +.It system call: Ta \&ok Ta \&ok Ta \&ok Ta \&ok Ta \&ok Ta \&ok .El .Sh SEE ALSO .Xr witness 4 , diff --git a/share/misc/committers-src.dot b/share/misc/committers-src.dot index d321381..6691efb 100644 --- a/share/misc/committers-src.dot +++ b/share/misc/committers-src.dot @@ -202,6 +202,7 @@ kevlo [label="Kevin Lo\nkevlo@FreeBSD.org\n2006/07/23"] kib [label="Konstantin Belousov\nkib@FreeBSD.org\n2006/06/03"] kmacy [label="Kip Macy\nkmacy@FreeBSD.org\n2005/06/01"] le [label="Lukas Ertl\nle@FreeBSD.org\n2004/02/02"] +loos [label="Luiz Otavio O Souza\nloos@FreeBSD.org\n2013/07/03"] lstewart [label="Lawrence Stewart\nlstewart@FreeBSD.org\n2008/10/06"] marcel [label="Marcel Moolenaar\nmarcel@FreeBSD.org\n1999/07/03"] marius [label="Marius Strobl\nmarius@FreeBSD.org\n2004/04/17"] @@ -305,6 +306,7 @@ day1 -> rgrimes day1 -> alm day1 -> dg +adrian -> loos adrian -> monthadar adrian -> ray adrian -> rmh diff --git a/share/mk/bsd.libnames.mk b/share/mk/bsd.libnames.mk index e11ac81..f78a656 100644 --- a/share/mk/bsd.libnames.mk +++ b/share/mk/bsd.libnames.mk @@ -149,6 +149,7 @@ LIBRT?= ${DESTDIR}${LIBDIR}/librt.a LIBRTLD_DB?= ${DESTDIR}${LIBDIR}/librtld_db.a LIBSBUF?= ${DESTDIR}${LIBDIR}/libsbuf.a LIBSDP?= ${DESTDIR}${LIBDIR}/libsdp.a +LIBSMB?= ${DESTDIR}${LIBDIR}/libsmb.a LIBSSH?= ${DESTDIR}${LIBDIR}/libssh.a LIBSSL?= ${DESTDIR}${LIBDIR}/libssl.a LIBSTAND?= ${DESTDIR}${LIBDIR}/libstand.a |