diff options
author | jch <jch@FreeBSD.org> | 2015-08-03 12:13:54 +0000 |
---|---|---|
committer | jch <jch@FreeBSD.org> | 2015-08-03 12:13:54 +0000 |
commit | 67927a7a7c96545feb52784dea33376dcf127e76 (patch) | |
tree | b0a37aa79ef7873f11fc9e5037d69c1487c31c9c /sys/netinet/tcp_subr.c | |
parent | 9a5c85d2e41259da9bc8187da7291244d1d16738 (diff) | |
download | FreeBSD-src-67927a7a7c96545feb52784dea33376dcf127e76.zip FreeBSD-src-67927a7a7c96545feb52784dea33376dcf127e76.tar.gz |
Decompose TCP INP_INFO lock to increase short-lived TCP connections scalability:
- The existing TCP INP_INFO lock continues to protect the global inpcb list
stability during full list traversal (e.g. tcp_pcblist()).
- A new INP_LIST lock protects inpcb list actual modifications (inp allocation
and free) and inpcb global counters.
It allows to use TCP INP_INFO_RLOCK lock in critical paths (e.g. tcp_input())
and INP_INFO_WLOCK only in occasional operations that walk all connections.
PR: 183659
Differential Revision: https://reviews.freebsd.org/D2599
Reviewed by: jhb, adrian
Tested by: adrian, nitroboost-gmail.com
Sponsored by: Verisign, Inc.
Diffstat (limited to 'sys/netinet/tcp_subr.c')
-rw-r--r-- | sys/netinet/tcp_subr.c | 50 |
1 files changed, 25 insertions, 25 deletions
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 7fe0eba..808eb97 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -858,7 +858,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo) VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_WLOCK(&V_tcbinfo); /* * New connections already part way through being initialised * with the CC algo we're removing will not race with this code @@ -888,7 +888,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo) } INP_WUNLOCK(inp); } - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); @@ -906,7 +906,7 @@ tcp_drop(struct tcpcb *tp, int errno) { struct socket *so = tp->t_inpcb->inp_socket; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (TCPS_HAVERCVDSYN(tp->t_state)) { @@ -1073,7 +1073,7 @@ tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type) struct inpcb *inp; CURVNET_SET(tp->t_vnet); - INP_INFO_WLOCK(&V_tcbinfo); + INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); @@ -1088,13 +1088,13 @@ tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type) tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); if (in_pcbrele_wlocked(inp)) { - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } } INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } @@ -1108,7 +1108,7 @@ tcp_close(struct tcpcb *tp) struct inpcb *inp = tp->t_inpcb; struct socket *so; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); #ifdef TCP_OFFLOAD @@ -1156,7 +1156,7 @@ tcp_drain(void) * where we're really low on mbufs, this is potentially * useful. */ - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_WLOCK(&V_tcbinfo); LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) { if (inpb->inp_flags & INP_TIMEWAIT) continue; @@ -1167,7 +1167,7 @@ tcp_drain(void) } INP_WUNLOCK(inpb); } - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); @@ -1186,7 +1186,7 @@ tcp_notify(struct inpcb *inp, int error) { struct tcpcb *tp; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || @@ -1250,10 +1250,10 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) /* * OK, now we're committed to doing something. */ - INP_INFO_RLOCK(&V_tcbinfo); + INP_LIST_RLOCK(&V_tcbinfo); gencnt = V_tcbinfo.ipi_gencnt; n = V_tcbinfo.ipi_count; - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_LIST_RUNLOCK(&V_tcbinfo); m = syncache_pcbcount(); @@ -1278,7 +1278,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) if (inp_list == NULL) return (ENOMEM); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_WLOCK(&V_tcbinfo); for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); @@ -1303,7 +1303,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) } INP_WUNLOCK(inp); } - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); n = i; error = 0; @@ -1341,14 +1341,14 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) } else INP_RUNLOCK(inp); } - INP_INFO_WLOCK(&V_tcbinfo); + INP_INFO_RLOCK(&V_tcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); if (!error) { /* @@ -1358,11 +1358,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) * while we were processing this request, and it * might be necessary to retry. */ - INP_INFO_RLOCK(&V_tcbinfo); + INP_LIST_RLOCK(&V_tcbinfo); xig.xig_gen = V_tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_tcbinfo.ipi_count + pcb_count; - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_LIST_RUNLOCK(&V_tcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); @@ -1518,7 +1518,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) - offsetof(struct icmp, icmp_ip)); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); - INP_INFO_WLOCK(&V_tcbinfo); + INP_INFO_RLOCK(&V_tcbinfo); inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL) { @@ -1578,7 +1578,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) inc.inc_laddr = ip->ip_src; syncache_unreach(&inc, th); } - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); } else in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify); } @@ -1648,9 +1648,9 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr; inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr; inc.inc_flags |= INC_ISIPV6; - INP_INFO_WLOCK(&V_tcbinfo); + INP_INFO_RLOCK(&V_tcbinfo); syncache_unreach(&inc, &th); - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); } else in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, NULL, notify); @@ -1783,7 +1783,7 @@ tcp_drop_syn_sent(struct inpcb *inp, int errno) { struct tcpcb *tp; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || @@ -2341,7 +2341,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS) default: return (EINVAL); } - INP_INFO_WLOCK(&V_tcbinfo); + INP_INFO_RLOCK(&V_tcbinfo); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: @@ -2380,7 +2380,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS) INP_WUNLOCK(inp); } else error = ESRCH; - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); return (error); } |