diff options
author | jch <jch@FreeBSD.org> | 2016-11-24 14:48:46 +0000 |
---|---|---|
committer | jch <jch@FreeBSD.org> | 2016-11-24 14:48:46 +0000 |
commit | 7f3fe3a98ec981ac0d1481bb5e4cb31ef9ac8a0a (patch) | |
tree | 3cfb7702bb0aeaf23d9b6562d3f81192fd2bc65b /sys/netinet/tcp_subr.c | |
parent | 16165b4c35ba2f47480383759cd08703a8515756 (diff) | |
download | FreeBSD-src-7f3fe3a98ec981ac0d1481bb5e4cb31ef9ac8a0a.zip FreeBSD-src-7f3fe3a98ec981ac0d1481bb5e4cb31ef9ac8a0a.tar.gz |
MFC r286227, r286443:
r286227:
Decompose TCP INP_INFO lock to increase short-lived TCP connections scalability:
- The existing TCP INP_INFO lock continues to protect the global inpcb list
stability during full list traversal (e.g. tcp_pcblist()).
- A new INP_LIST lock protects inpcb list actual modifications (inp allocation
and free) and inpcb global counters.
It allows to use TCP INP_INFO_RLOCK lock in critical paths (e.g. tcp_input())
and INP_INFO_WLOCK only in occasional operations that walk all connections.
PR: 183659
Differential Revision: https://reviews.freebsd.org/D2599
Reviewed by: jhb, adrian
Tested by: adrian, nitroboost-gmail.com
Sponsored by: Verisign, Inc.
r286443:
Fix a kernel assertion issue introduced with r286227:
Avoid too strict INP_INFO_RLOCK_ASSERT checks due to
tcp_notify() being called from in6_pcbnotify().
Reported by: Larry Rosenman <ler@lerctr.org>
Submitted by: markj, jch
Diffstat (limited to 'sys/netinet/tcp_subr.c')
-rw-r--r-- | sys/netinet/tcp_subr.c | 50 |
1 files changed, 25 insertions, 25 deletions
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index b3c7309..28af834 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -956,7 +956,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo) VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_WLOCK(&V_tcbinfo); /* * New connections already part way through being initialised * with the CC algo we're removing will not race with this code @@ -986,7 +986,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo) } INP_WUNLOCK(inp); } - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); @@ -1004,7 +1004,7 @@ tcp_drop(struct tcpcb *tp, int errno) { struct socket *so = tp->t_inpcb->inp_socket; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (TCPS_HAVERCVDSYN(tp->t_state)) { @@ -1171,7 +1171,7 @@ tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type) struct inpcb *inp; CURVNET_SET(tp->t_vnet); - INP_INFO_WLOCK(&V_tcbinfo); + INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); @@ -1186,13 +1186,13 @@ tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type) tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); if (in_pcbrele_wlocked(inp)) { - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } } INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } @@ -1206,7 +1206,7 @@ tcp_close(struct tcpcb *tp) struct inpcb *inp = tp->t_inpcb; struct socket *so; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); #ifdef TCP_OFFLOAD @@ -1265,7 +1265,7 @@ tcp_drain(void) * where we're really low on mbufs, this is potentially * useful. */ - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_WLOCK(&V_tcbinfo); LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) { if (inpb->inp_flags & INP_TIMEWAIT) continue; @@ -1276,7 +1276,7 @@ tcp_drain(void) } INP_WUNLOCK(inpb); } - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); @@ -1295,7 +1295,7 @@ tcp_notify(struct inpcb *inp, int error) { struct tcpcb *tp; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || @@ -1359,10 +1359,10 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) /* * OK, now we're committed to doing something. */ - INP_INFO_RLOCK(&V_tcbinfo); + INP_LIST_RLOCK(&V_tcbinfo); gencnt = V_tcbinfo.ipi_gencnt; n = V_tcbinfo.ipi_count; - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_LIST_RUNLOCK(&V_tcbinfo); m = syncache_pcbcount(); @@ -1385,7 +1385,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_WLOCK(&V_tcbinfo); for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); @@ -1410,7 +1410,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) } INP_WUNLOCK(inp); } - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_WUNLOCK(&V_tcbinfo); n = i; error = 0; @@ -1448,14 +1448,14 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) } else INP_RUNLOCK(inp); } - INP_INFO_WLOCK(&V_tcbinfo); + INP_INFO_RLOCK(&V_tcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); if (!error) { /* @@ -1465,11 +1465,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) * while we were processing this request, and it * might be necessary to retry. */ - INP_INFO_RLOCK(&V_tcbinfo); + INP_LIST_RLOCK(&V_tcbinfo); xig.xig_gen = V_tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_tcbinfo.ipi_count + pcb_count; - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_LIST_RUNLOCK(&V_tcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); @@ -1630,7 +1630,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) - offsetof(struct icmp, icmp_ip)); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); - INP_INFO_WLOCK(&V_tcbinfo); + INP_INFO_RLOCK(&V_tcbinfo); inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL) { @@ -1690,7 +1690,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) inc.inc_laddr = ip->ip_src; syncache_unreach(&inc, th); } - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); } else in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify); } @@ -1763,9 +1763,9 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr; inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr; inc.inc_flags |= INC_ISIPV6; - INP_INFO_WLOCK(&V_tcbinfo); + INP_INFO_RLOCK(&V_tcbinfo); syncache_unreach(&inc, &th); - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); } else in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, NULL, notify); @@ -1898,7 +1898,7 @@ tcp_drop_syn_sent(struct inpcb *inp, int errno) { struct tcpcb *tp; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || @@ -2422,7 +2422,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS) default: return (EINVAL); } - INP_INFO_WLOCK(&V_tcbinfo); + INP_INFO_RLOCK(&V_tcbinfo); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: @@ -2461,7 +2461,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS) INP_WUNLOCK(inp); } else error = ESRCH; - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_INFO_RUNLOCK(&V_tcbinfo); return (error); } |