summaryrefslogtreecommitdiffstats
path: root/sys/netinet/udp_usrreq.c
diff options
context:
space:
mode:
authorrwatson <rwatson@FreeBSD.org>2008-07-15 15:38:47 +0000
committerrwatson <rwatson@FreeBSD.org>2008-07-15 15:38:47 +0000
commit6d9661b22428319f8d87375a3d6f69afda13469a (patch)
tree9e83efd11a5d307fd93a89b13bf38f242e705cf1 /sys/netinet/udp_usrreq.c
parent2d49f66781d711152d390cc98a26f9b781a5255a (diff)
downloadFreeBSD-src-6d9661b22428319f8d87375a3d6f69afda13469a.zip
FreeBSD-src-6d9661b22428319f8d87375a3d6f69afda13469a.tar.gz
Merge last of a series of rwlock conversion changes to UDP, which
completes the move to a fully parallel UDP transmit path by using global read, rather than write, locking of inpcbinfo in further semi-connected cases: - Add macros to allow try-locking of inpcb and inpcbinfo. - Always acquire an incpcb read lock in udp_output(), which stablizes the local inpcb address and port bindings in order to determine what further locking is required: - If the inpcb is currently not bound (at all) and are implicitly connecting, we require inpcbinfo and inpcb write locks, so drop the read lock and re-acquire. - If the inpcb is bound for at least one of the port or address, but an explicit source or destination is requested, trylock the inpcbinfo lock, and if that fails, drop the inpcb lock, lock the global lock, and relock the inpcb lock. - Otherwise, no further locking is required (common case). - Update comments. In practice, this means that the vast majority of consumers of UDP sockets will not acquire any exclusive locks at the socket or UDP levels of the network stack. This leads to a marked performance improvement in several important workloads, including BIND, nsd, and memcached over UDP, as well as significant improvements in pps microbenchmarks. The plan is to MFC all of the rwlock changes to RELENG_7 once they have settled for a weeks in the tree. Tested by: ps, kris (older revision), bde MFC after: 3 weeks
Diffstat (limited to 'sys/netinet/udp_usrreq.c')
-rw-r--r--sys/netinet/udp_usrreq.c119
1 files changed, 85 insertions, 34 deletions
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 1cce109..5c3698f 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -846,14 +846,42 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
return (error);
}
- if (src.sin_family == AF_INET || addr != NULL) {
+ /*
+ * Depending on whether or not the application has bound or connected
+ * the application, we may have to do varying levels of work. The
+ * optimal case is for a connected UDP socket, as a global lock isn't
+ * required.
+ *
+ * In order to decide which we need, we require stability of the
+ * inpcb binding, which we ensure by acquiring a read lock on the
+ * inpcb. This doesn't strictly follow the lock order, so we play
+ * the trylock and retry game; note that we may end up with more
+ * conservative locks than required the second time around, so later
+ * assertions have to accept that. Further analysis of the number of
+ * misses under contention is required.
+ */
+ sin = (struct sockaddr_in *)addr;
+ INP_RLOCK(inp);
+ if (sin != NULL &&
+ (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
+ INP_RUNLOCK(inp);
INP_INFO_WLOCK(&udbinfo);
INP_WLOCK(inp);
+ unlock_udbinfo = 2;
+ } else if ((sin != NULL && (
+ (sin->sin_addr.s_addr == INADDR_ANY) ||
+ (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
+ (inp->inp_laddr.s_addr == INADDR_ANY) ||
+ (inp->inp_lport == 0))) ||
+ (src.sin_family == AF_INET)) {
+ if (!INP_INFO_TRY_RLOCK(&udbinfo)) {
+ INP_RUNLOCK(inp);
+ INP_INFO_RLOCK(&udbinfo);
+ INP_RLOCK(inp);
+ }
unlock_udbinfo = 1;
- } else {
+ } else
unlock_udbinfo = 0;
- INP_RLOCK(inp);
- }
/*
* If the IP_SENDSRCADDR control message was specified, override the
@@ -880,12 +908,11 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* If a UDP socket has been connected, then a local address/port will
* have been selected and bound.
*
- * If a UDP socket has not been connect to, then an explicit
+ * If a UDP socket has not been connected to, then an explicit
* destination address must be used, in which case a local
* address/port may not have been selected and bound.
*/
- if (addr) {
- INP_INFO_LOCK_ASSERT(&udbinfo);
+ if (sin != NULL) {
INP_LOCK_ASSERT(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
error = EISCONN;
@@ -896,39 +923,58 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* Jail may rewrite the destination address, so let it do
* that before we use it.
*/
- sin = (struct sockaddr_in *)addr;
if (jailed(td->td_ucred))
prison_remote_ip(td->td_ucred, 0,
&sin->sin_addr.s_addr);
/*
- * If a local address or port hasn't yet been selected, do
- * that now. Once a port is selected, we commit the binding
- * back to the socket; we also commit the binding of the
- * address if in jail.
+ * If a local address or port hasn't yet been selected, or if
+ * the destination address needs to be rewritten due to using
+ * a special INADDR_ constant, invoke in_pcbconnect_setup()
+ * to do the heavy lifting. Once a port is selected, we
+ * commit the binding back to the socket; we also commit the
+ * binding of the address if in jail.
+ *
+ * If we already have a valid binding and we're not
+ * requesting a destination address rewrite, use a fast path.
*/
- error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport,
- &faddr.s_addr, &fport, NULL, td->td_ucred);
- if (error)
- goto release;
+ if (inp->inp_laddr.s_addr == INADDR_ANY ||
+ inp->inp_lport == 0 ||
+ sin->sin_addr.s_addr == INADDR_ANY ||
+ sin->sin_addr.s_addr == INADDR_BROADCAST) {
+ INP_INFO_LOCK_ASSERT(&udbinfo);
+ error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
+ &lport, &faddr.s_addr, &fport, NULL,
+ td->td_ucred);
+ if (error)
+ goto release;
- /* Commit the local port if newly assigned. */
- if (inp->inp_laddr.s_addr == INADDR_ANY &&
- inp->inp_lport == 0) {
- INP_INFO_WLOCK_ASSERT(&udbinfo);
- INP_WLOCK_ASSERT(inp);
/*
- * Remember addr if jailed, to prevent rebinding.
+ * XXXRW: Why not commit the port if the address is
+ * !INADDR_ANY?
*/
- if (jailed(td->td_ucred))
- inp->inp_laddr = laddr;
- inp->inp_lport = lport;
- if (in_pcbinshash(inp) != 0) {
- inp->inp_lport = 0;
- error = EAGAIN;
- goto release;
+ /* Commit the local port if newly assigned. */
+ if (inp->inp_laddr.s_addr == INADDR_ANY &&
+ inp->inp_lport == 0) {
+ INP_INFO_WLOCK_ASSERT(&udbinfo);
+ INP_WLOCK_ASSERT(inp);
+ /*
+ * Remember addr if jailed, to prevent
+ * rebinding.
+ */
+ if (jailed(td->td_ucred))
+ inp->inp_laddr = laddr;
+ inp->inp_lport = lport;
+ if (in_pcbinshash(inp) != 0) {
+ inp->inp_lport = 0;
+ error = EAGAIN;
+ goto release;
+ }
+ inp->inp_flags |= INP_ANONPORT;
}
- inp->inp_flags |= INP_ANONPORT;
+ } else {
+ faddr = sin->sin_addr;
+ fport = sin->sin_port;
}
} else {
INP_LOCK_ASSERT(inp);
@@ -1006,20 +1052,25 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */
udpstat.udps_opackets++;
- if (unlock_udbinfo)
+ if (unlock_udbinfo == 2)
INP_INFO_WUNLOCK(&udbinfo);
+ else if (unlock_udbinfo == 1)
+ INP_INFO_RUNLOCK(&udbinfo);
error = ip_output(m, inp->inp_options, NULL, ipflags,
inp->inp_moptions, inp);
- if (unlock_udbinfo)
+ if (unlock_udbinfo == 2)
INP_WUNLOCK(inp);
else
INP_RUNLOCK(inp);
return (error);
release:
- if (unlock_udbinfo) {
- INP_INFO_WUNLOCK(&udbinfo);
+ if (unlock_udbinfo == 2) {
INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&udbinfo);
+ } else if (unlock_udbinfo == 1) {
+ INP_RUNLOCK(inp);
+ INP_INFO_RUNLOCK(&udbinfo);
} else
INP_RUNLOCK(inp);
m_freem(m);
OpenPOWER on IntegriCloud