diff options
Diffstat (limited to 'share/man/man9/socket.9')
-rw-r--r-- | share/man/man9/socket.9 | 636 |
1 files changed, 636 insertions, 0 deletions
diff --git a/share/man/man9/socket.9 b/share/man/man9/socket.9 new file mode 100644 index 0000000..8590a3d --- /dev/null +++ b/share/man/man9/socket.9 @@ -0,0 +1,636 @@ +.\"- +.\" Copyright (c) 2006 Robert N. M. Watson +.\" Copyright (c) 2014 Benjamin J. Kaduk +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 26, 2014 +.Dt SOCKET 9 +.Os +.Sh NAME +.Nm socket +.Nd "kernel socket interface" +.Sh SYNOPSIS +.In sys/socket.h +.In sys/socketvar.h +.Ft void +.Fn soabort "struct socket *so" +.Ft int +.Fn soaccept "struct socket *so" "struct sockaddr **nam" +.Ft int +.Fn socheckuid "struct socket *so" "uid_t uid" +.Ft int +.Fn sobind "struct socket *so" "struct sockaddr *nam" "struct thread *td" +.Ft void +.Fn soclose "struct socket *so" +.Ft int +.Fn soconnect "struct socket *so" "struct sockaddr *nam" "struct thread *td" +.Ft int +.Fo socreate +.Fa "int dom" "struct socket **aso" "int type" "int proto" +.Fa "struct ucred *cred" "struct thread *td" +.Fc +.Ft int +.Fn sodisconnect "struct socket *so" +.Ft struct sockaddr * +.Fn sodupsockaddr "const struct sockaddr *sa" "int mflags" +.Ft void +.Fn sofree "struct socket *so" +.Ft void +.Fn sohasoutofband "struct socket *so" +.Ft int +.Fn solisten "struct socket *so" "int backlog" "struct thread *td" +.Ft void +.Fn solisten_proto "struct socket *so" "int backlog" +.Ft int +.Fn solisten_proto_check "struct socket *so" +.Ft struct socket * +.Fn sonewconn "struct socket *head" "int connstatus" +.Ft int +.Fo sopoll +.Fa "struct socket *so" "int events" "struct ucred *active_cred" +.Fa "struct thread *td" +.Fc +.Ft int +.Fo sopoll_generic +.Fa "struct socket *so" "int events" "struct ucred *active_cred" +.Fa "struct thread *td" +.Fc +.Ft int +.Fo soreceive +.Fa "struct socket *so" "struct sockaddr **psa" "struct uio *uio" +.Fa "struct mbuf **mp0" "struct mbuf **controlp" "int *flagsp" +.Fc +.Ft int +.Fo soreceive_stream +.Fa "struct socket *so" "struct sockaddr **paddr" +.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp" +.Fa "int *flagsp" +.Fc +.Ft int +.Fo soreceive_dgram +.Fa "struct socket *so" "struct sockaddr **paddr" +.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp" +.Fa "int *flagsp" +.Fc +.Ft int +.Fo soreceive_generic +.Fa "struct socket *so" "struct sockaddr **paddr" +.Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp" +.Fa "int *flagsp" +.Fc +.Ft int +.Fn soreserve "struct socket *so" "u_long sndcc" "u_long rcvcc" +.Ft void +.Fn sorflush "struct socket *so" +.Ft int +.Fo sosend +.Fa "struct socket *so" "struct sockaddr *addr" "struct uio *uio" +.Fa "struct mbuf *top" "struct mbuf *control" "int flags" "struct thread *td" +.Fc +.Ft int +.Fo sosend_dgram +.Fa "struct socket *so" "struct sockaddr *addr" +.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control" +.Fa "int flags" "struct thread *td" +.Fc +.Ft int +.Fo sosend_generic +.Fa "struct socket *so" "struct sockaddr *addr" +.Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control" +.Fa "int flags" "struct thread *td" +.Fc +.Ft int +.Fn soshutdown "struct socket *so" "int how" +.Ft void +.Fn sotoxsocket "struct socket *so" "struct xsocket *xso" +.Ft void +.Fn soupcall_clear "struct socket *so" "int which" +.Ft void +.Fo soupcall_set +.Fa "struct socket *so" "int which" +.Fa "int (*func)(struct socket *, void *, int)" "void *arg" +.Fc +.Ft void +.Fn sowakeup "struct socket *so" "struct sockbuf *sb" +.In sys/sockopt.h +.Ft int +.Fn sosetopt "struct socket *so" "struct sockopt *sopt" +.Ft int +.Fn sogetopt "struct socket *so" "struct sockopt *sopt" +.Ft int +.Fn sooptcopyin "struct sockopt *sopt" "void *buf" "size_t len" "size_t minlen" +.Ft int +.Fn sooptcopyout "struct sockopt *sopt" "const void *buf" "size_t len" +.Sh DESCRIPTION +The kernel +.Nm +programming interface permits in-kernel consumers to interact with +local and network socket objects in a manner similar to that permitted using +the +.Xr socket 2 +user API. +These interfaces are appropriate for use by distributed file systems and +other network-aware kernel services. +While the user API operates on file descriptors, the kernel interfaces +operate directly on +.Vt "struct socket" +pointers. +Some portions of the kernel API exist only to implement the user API, +and are not expected to be used by kernel code. +The portions of the socket API used by socket consumers and +implementations of network protocols will differ; some routines +are only useful for protocol implementors. +.Pp +Except where otherwise indicated, +.Nm +functions may sleep, and are not appropriate for use in an +.Xr ithread 9 +context or while holding non-sleepable kernel locks. +.Ss Creating and Destroying Sockets +A new socket may be created using +.Fn socreate . +As with +.Xr socket 2 , +arguments specify the requested domain, type, and protocol via +.Fa dom , type , +and +.Fa proto . +The socket is returned via +.Fa aso +on success. +In addition, the credential used to authorize operations associated with the +socket will be passed via +.Fa cred +(and will be cached for the lifetime of the socket), and the thread +performing the operation via +.Fa td . +.Em Warning : +authorization of the socket creation operation will be performed +using the thread credential for some protocols (such as raw sockets). +.Pp +Sockets may be closed and freed using +.Fn soclose , +which has similar semantics to +.Xr close 2 . +.Pp +In certain circumstances, it is appropriate to destroy a socket without +waiting for it to disconnect, for which +.Fn soabort +is used. +This is only appropriate for incoming connections which are in a +partially connected state. +It must be called on an unreferenced socket, by the thread which +removed the socket from its listen queue, to prevent races. +It will call into protocol code, so no socket locks may be held +over the call. +The caller of +.Fn soabort +is responsible for setting the VNET context. +The normal path to freeing a socket is +.Fn sofree , +which handles reference counting on the socket. +It should be called whenever a reference is released, and also whenever +reference flags are cleared in socket or protocol code. +Calls to +.Fn sofree +should not be made from outside the socket layer; outside callers +should use +.Fn soclose +instead. +.Ss Connections and Addresses +The +.Fn sobind +function is equivalent to the +.Xr bind 2 +system call, and binds the socket +.Fa so +to the address +.Fa nam . +The operation would be authorized using the credential on thread +.Fa td . +.Pp +The +.Fn soconnect +function is equivalent to the +.Xr connect 2 +system call, and initiates a connection on the socket +.Fa so +to the address +.Fa nam . +The operation will be authorized using the credential on thread +.Fa td . +Unlike the user system call, +.Fn soconnect +returns immediately; the caller may +.Xr msleep 9 +on +.Fa so->so_timeo +while holding the socket mutex and waiting for the +.Dv SS_ISCONNECTING +flag to clear or +.Fa so->so_error +to become non-zero. +If +.Fn soconnect +fails, the caller must manually clear the +.Dv SS_ISCONNECTING +flag. +.Pp +A call to +.Fn sodisconnect +disconnects the socket without closing it. +.Pp +The +.Fn soshutdown +function is equivalent to the +.Xr shutdown 2 +system call, and causes part or all of a connection on a socket to be closed +down. +.Pp +Sockets are transitioned from non-listening status to listening with +.Fn solisten . +.Ss Socket Options +The +.Fn sogetopt +function is equivalent to the +.Xr getsockopt 2 +system call, and retrieves a socket option on socket +.Fa so . +The +.Fn sosetopt +function is equivalent to the +.Xr setsockopt 2 +system call, and sets a socket option on socket +.Fa so . +.Pp +The second argument in both +.Fn sogetopt +and +.Fn sosetopt +is the +.Fa sopt +pointer to a +.Vt "struct sopt" +describing the socket option operation. +The caller-allocated structure must be zeroed, and then have its fields +initialized to specify socket option operation arguments: +.Bl -tag -width ".Va sopt_valsize" +.It Va sopt_dir +Set to +.Dv SOPT_SET +or +.Dv SOPT_GET +depending on whether this is a get or set operation. +.It Va sopt_level +Specify the level in the network stack the operation is targeted at; for +example, +.Dv SOL_SOCKET . +.It Va sopt_name +Specify the name of the socket option to set. +.It Va sopt_val +Kernel space pointer to the argument value for the socket option. +.It Va sopt_valsize +Size of the argument value in bytes. +.El +.Ss Socket Upcalls +In order for the owner of a socket to be notified when the socket +is ready to send or receive data, an upcall may be registered on +the socket. +The upcall is a function that will be called by the socket framework +when a socket buffer associated with the given socket is ready for +reading or writing. +.Fn soupcall_set +is used to register a socket upcall. +The function +.Va func +is registered, and the pointer +.Va arg +will be passed as its second argument when it is called by the framework. +The possible values for +.Va which +are +.Dv SO_RCV +and +.Dv SO_SND , +which register upcalls for receive and send events, respectively. +The upcall function +.Fn func +must return either +.Dv SU_OK +or +.Dv SU_ISCONNECTED , +depending on whether or not a call to +.Xr soisconnected +should be made by the socket framework after the upcall returns. +The upcall +.Va func +cannot call +.Xr soisconnected +itself due to lock ordering with the socket buffer lock. +Only +.Dv SO_RCV +upcalls should return +.Dv SU_ISCONNECTED . +When a +.Dv SO_RCV +upcall returns +.Dv SU_ISCONNECTED , +the upcall will be removed from the socket. +.Pp +Upcalls are removed from their socket by +.Fn soupcall_clear . +The +.Va which +argument again specifies whether the sending or receiving upcall is to +be cleared, with +.Dv SO_RCV +or +.Dv SO_SND . +.Ss Socket I/O +The +.Fn soreceive +function is equivalent to the +.Xr recvmsg 2 +system call, and attempts to receive bytes of data from the socket +.Fa so , +optionally blocking awaiting for data if none is ready to read. +Data may be retrieved directly to kernel or user memory via the +.Fa uio +argument, or as an mbuf chain returned to the caller via +.Fa mp0 , +avoiding a data copy. +The +.Fa uio +must always be +.Pf non- Dv NULL . +If +.Fa mp0 +is +.Pf non- Dv NULL , +only the +.Pf uio_resid +of +.Fa uio +is used. +The caller may optionally retrieve a socket address on a protocol with the +.Dv PR_ADDR +capability by providing storage via +.Pf non- Dv NULL +.Fa psa +argument. +The caller may optionally retrieve control data mbufs via a +.Pf non- Dv NULL +.Fa controlp +argument. +Optional flags may be passed to +.Fn soreceive +via a +.Pf non- Dv NULL +.Fa flagsp +argument, and use the same flag name space as the +.Xr recvmsg 2 +system call. +.Pp +The +.Fn sosend +function is equivalent to the +.Xr sendmsg 2 +system call, and attempts to send bytes of data via the socket +.Fa so , +optionally blocking if data cannot be immediately sent. +Data may be sent directly from kernel or user memory via the +.Fa uio +argument, or as an mbuf chain via +.Fa top , +avoiding a data copy. +Only one of the +.Fa uio +or +.Fa top +pointers may be +.Pf non- Dv NULL . +An optional destination address may be specified via a +.Pf non- Dv NULL +.Fa addr +argument, which may result in an implicit connect if supported by the +protocol. +The caller may optionally send control data mbufs via a +.Pf non- Dv NULL +.Fa control +argument. +Flags may be passed to +.Fn sosend +using the +.Fa flags +argument, and use the same flag name space as the +.Xr sendmsg 2 +system call. +.Pp +Kernel callers running in +.Xr ithread 9 +context, or with a mutex held, will wish to use non-blocking sockets and pass +the +.Dv MSG_DONTWAIT +flag in order to prevent these functions from sleeping. +.Pp +A socket can be queried for readability, writability, out-of-band data, +or end-of-file using +.Fn sopoll . +The possible values for +.Va events +are as for +.Xr poll 2 , +with symbolic values +.Dv POLLIN , +.Dv POLLPRI , +.Dv POLLOUT , +.Dv POLLRDNORM , +.Dv POLLWRNORM , +.Dv POLLRDBAND , +and +.Dv POLLINGEOF +taken from +.In sys/poll.h . +.Pp +Calls to +.Fn soaccept +pass through to the protocol's accept routine to accept an incoming connection. +.Ss Socket Utility Functions +The uid of a socket's credential may be compared against a +.Va uid +with +.Fn socheckuid . +.Pp +A copy of an existing +.Vt struct sockaddr +may be made using +.Fn sodupsockaddr . +.Pp +Protocol implementations notify the socket layer of the arrival of +out-of-band data using +.Fn sohasoutofband , +so that the socket layer can notify socket consumers of the available data. +.Pp +An +.Dq external-format +version of a +.Vt struct socket +can be created using +.Fn sotoxsocket , +suitable for isolating user code from changes in the kernel structure. +.Ss Protocol Implementations +Protocols must supply an implementation for +.Fn solisten ; +such protocol implementations can call back into the socket layer using +.Fn solisten_proto_check +and +.Fn solisten_proto +to check and set the socket-layer listen state. +These callbacks are provided so that the protocol implementation +can order the socket layer and protocol locks as necessary. +Protocols must supply an implementation of +.Fn soreceive ; +the functions +.Fn soreceive_stream , +.Fn soreceive_dgram , +and +.Fn soreceive_generic +are supplied for use by such implementations. +.Pp +Protocol implementations can use +.Fn sonewconn +to create a socket and attach protocol state to that socket. +This can be used to create new sockets available for +.Fn soaccept +on a listen socket. +The returned socket has a reference count of zero. +.Pp +Protocols must supply an implementation for +.Fn sopoll ; +.Fn sopoll_generic +is provided for the use by protocol implementations. +.Pp +The functions +.Fn sosend_dgram +and +.Fn sosend_generic +are supplied to assist in protocol implementations of +.Fn sosend . +.Pp +When a protocol creates a new socket structure, it is necessary to +reserve socket buffer space for that socket, by calling +.Fn soreserve . +The rough inverse of this reservation is performed by +.Fn sorflush , +which is called automatically by the socket framework. +.Pp +When a protocol needs to wake up threads waiting for the socket to +become ready to read or write, variants of +.Fn sowakeup +are used. +The +.Fn sowakeup +function should not be called directly by protocol code, instead use the +wrappers +.Fn sorwakeup , +.Fn sorwakeup_locked , +.Fn sowwakeup , +and +.Fn sowwakeup_locked +for readers and writers, with the corresponding socket buffer lock +not already locked, or already held, respectively. +.Pp +The functions +.Fn sooptcopyin +and +.Fn sooptcopyout +are useful for transferring +.Vt struct sockopt +data between user and kernel code. +.Sh SEE ALSO +.Xr bind 2 , +.Xr close 2 , +.Xr connect 2 , +.Xr getsockopt 2 , +.Xr recv 2 , +.Xr send 2 , +.Xr setsockopt 2 , +.Xr shutdown 2 , +.Xr socket 2 , +.Xr ng_ksocket 4 , +.Xr ithread 9 , +.Xr msleep 9 , +.Xr ucred 9 +.Sh HISTORY +The +.Xr socket 2 +system call appeared in +.Bx 4.2 . +This manual page was introduced in +.Fx 7.0 . +.Sh AUTHORS +This manual page was written by +.An Robert Watson +and +.An Benjamin Kaduk . +.Sh BUGS +The use of explicitly passed credentials, credentials hung from explicitly +passed threads, the credential on +.Dv curthread , +and the cached credential from +socket creation time is inconsistent, and may lead to unexpected behaviour. +It is possible that several of the +.Fa td +arguments should be +.Fa cred +arguments, or simply not be present at all. +.Pp +The caller may need to manually clear +.Dv SS_ISCONNECTING +if +.Fn soconnect +returns an error. +.Pp +The +.Dv MSG_DONTWAIT +flag is not implemented for +.Fn sosend , +and may not always work with +.Fn soreceive +when zero copy sockets are enabled. +.Pp +This manual page does not describe how to register socket upcalls or monitor +a socket for readability/writability without using blocking I/O. +.Pp +The +.Fn soref +and +.Fn sorele +functions are not described, and in most cases should not be used, due to +confusing and potentially incorrect interactions when +.Fn sorele +is last called after +.Fn soclose . |