summaryrefslogtreecommitdiffstats
path: root/sys/netinet/tcp_subr.c
diff options
context:
space:
mode:
authorjch <jch@FreeBSD.org>2015-08-03 12:13:54 +0000
committerjch <jch@FreeBSD.org>2015-08-03 12:13:54 +0000
commit67927a7a7c96545feb52784dea33376dcf127e76 (patch)
treeb0a37aa79ef7873f11fc9e5037d69c1487c31c9c /sys/netinet/tcp_subr.c
parent9a5c85d2e41259da9bc8187da7291244d1d16738 (diff)
downloadFreeBSD-src-67927a7a7c96545feb52784dea33376dcf127e76.zip
FreeBSD-src-67927a7a7c96545feb52784dea33376dcf127e76.tar.gz
Decompose TCP INP_INFO lock to increase short-lived TCP connections scalability:
- The existing TCP INP_INFO lock continues to protect the global inpcb list stability during full list traversal (e.g. tcp_pcblist()). - A new INP_LIST lock protects inpcb list actual modifications (inp allocation and free) and inpcb global counters. It allows to use TCP INP_INFO_RLOCK lock in critical paths (e.g. tcp_input()) and INP_INFO_WLOCK only in occasional operations that walk all connections. PR: 183659 Differential Revision: https://reviews.freebsd.org/D2599 Reviewed by: jhb, adrian Tested by: adrian, nitroboost-gmail.com Sponsored by: Verisign, Inc.
Diffstat (limited to 'sys/netinet/tcp_subr.c')
-rw-r--r--sys/netinet/tcp_subr.c50
1 files changed, 25 insertions, 25 deletions
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 7fe0eba..808eb97 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -858,7 +858,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
/*
* New connections already part way through being initialised
* with the CC algo we're removing will not race with this code
@@ -888,7 +888,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
@@ -906,7 +906,7 @@ tcp_drop(struct tcpcb *tp, int errno)
{
struct socket *so = tp->t_inpcb->inp_socket;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
if (TCPS_HAVERCVDSYN(tp->t_state)) {
@@ -1073,7 +1073,7 @@ tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type)
struct inpcb *inp;
CURVNET_SET(tp->t_vnet);
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
__func__, tp));
@@ -1088,13 +1088,13 @@ tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type)
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
if (in_pcbrele_wlocked(inp)) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
@@ -1108,7 +1108,7 @@ tcp_close(struct tcpcb *tp)
struct inpcb *inp = tp->t_inpcb;
struct socket *so;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
#ifdef TCP_OFFLOAD
@@ -1156,7 +1156,7 @@ tcp_drain(void)
* where we're really low on mbufs, this is potentially
* useful.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
if (inpb->inp_flags & INP_TIMEWAIT)
continue;
@@ -1167,7 +1167,7 @@ tcp_drain(void)
}
INP_WUNLOCK(inpb);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
@@ -1186,7 +1186,7 @@ tcp_notify(struct inpcb *inp, int error)
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -1250,10 +1250,10 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
/*
* OK, now we're committed to doing something.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_LIST_RLOCK(&V_tcbinfo);
gencnt = V_tcbinfo.ipi_gencnt;
n = V_tcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_LIST_RUNLOCK(&V_tcbinfo);
m = syncache_pcbcount();
@@ -1278,7 +1278,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
if (inp_list == NULL)
return (ENOMEM);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
@@ -1303,7 +1303,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
n = i;
error = 0;
@@ -1341,14 +1341,14 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_RLOCK(inp);
if (!in_pcbrele_rlocked(inp))
INP_RUNLOCK(inp);
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
if (!error) {
/*
@@ -1358,11 +1358,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_LIST_RLOCK(&V_tcbinfo);
xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_LIST_RUNLOCK(&V_tcbinfo);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
free(inp_list, M_TEMP);
@@ -1518,7 +1518,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
- offsetof(struct icmp, icmp_ip));
th = (struct tcphdr *)((caddr_t)ip
+ (ip->ip_hl << 2));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport,
ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL) {
@@ -1578,7 +1578,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
inc.inc_laddr = ip->ip_src;
syncache_unreach(&inc, th);
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
} else
in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
}
@@ -1648,9 +1648,9 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
inc.inc_flags |= INC_ISIPV6;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
syncache_unreach(&inc, &th);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
} else
in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
0, cmd, NULL, notify);
@@ -1783,7 +1783,7 @@ tcp_drop_syn_sent(struct inpcb *inp, int errno)
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -2341,7 +2341,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
default:
return (EINVAL);
}
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
@@ -2380,7 +2380,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
INP_WUNLOCK(inp);
} else
error = ESRCH;
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
OpenPOWER on IntegriCloud