summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjch <jch@FreeBSD.org>2016-07-27 07:51:58 +0000
committerjch <jch@FreeBSD.org>2016-07-27 07:51:58 +0000
commit9872e9abc558c1a29893f08c0609df4bc9117ea7 (patch)
treebb9a89d3a6a36d0fbe8fb26e008bf81e581db707
parent541060fe206ad7ae572e2e335831cbd0193f3c96 (diff)
downloadFreeBSD-src-9872e9abc558c1a29893f08c0609df4bc9117ea7.zip
FreeBSD-src-9872e9abc558c1a29893f08c0609df4bc9117ea7.tar.gz
MFC r271119, r272081:
r271119: In tcp_input(), don't acquire the pcbinfo global write lock for SYN packets targeting a listening socket. Permit to reduce TCP input processing starvation in context of high SYN load (e.g. short-lived TCP connections or SYN flood). Submitted by: Julien Charbon <jcharbon@verisign.com> Reviewed by: adrian, hiren, jhb, Mike Bentkofsky r272081: Catch up with r271119.
-rw-r--r--sys/netinet/tcp_input.c28
-rw-r--r--sys/netinet/tcp_syncache.c19
-rw-r--r--sys/netinet/toecore.c1
3 files changed, 21 insertions, 27 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 25762cb..131ce48 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -751,12 +751,12 @@ tcp_input(struct mbuf *m, int off0)
/*
* Locate pcb for segment; if we're likely to add or remove a
- * connection then first acquire pcbinfo lock. There are two cases
+ * connection then first acquire pcbinfo lock. There are three cases
* where we might discover later we need a write lock despite the
- * flags: ACKs moving a connection out of the syncache, and ACKs for
- * a connection in TIMEWAIT.
+ * flags: ACKs moving a connection out of the syncache, ACKs for a
+ * connection in TIMEWAIT and SYNs not targeting a listening socket.
*/
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
+ if ((thflags & (TH_FIN | TH_RST)) != 0) {
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
} else
@@ -983,10 +983,12 @@ relocked:
* now be in TIMEWAIT.
*/
#ifdef INVARIANTS
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
+ if ((thflags & (TH_FIN | TH_RST)) != 0)
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#endif
- if (tp->t_state != TCPS_ESTABLISHED) {
+ if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
+ (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
+ !(tp->t_flags & TF_FASTOPEN)))) {
if (ti_locked == TI_UNLOCKED) {
if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
@@ -1027,17 +1029,13 @@ relocked:
/*
* When the socket is accepting connections (the INPCB is in LISTEN
* state) we look into the SYN cache if this is a new connection
- * attempt or the completion of a previous one. Because listen
- * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
- * held in this case.
+ * attempt or the completion of a previous one.
*/
if (so->so_options & SO_ACCEPTCONN) {
struct in_conninfo inc;
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
"tp not listening", __func__));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-
bzero(&inc, sizeof(inc));
#ifdef INET6
if (isipv6) {
@@ -1060,6 +1058,8 @@ relocked:
* socket appended to the listen queue in SYN_RECEIVED state.
*/
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
/*
* Parse the TCP options here because
* syncookies need access to the reflected
@@ -1348,8 +1348,12 @@ new_tfo_socket:
#endif
/*
* Entry added to syncache and mbuf consumed.
- * Everything already unlocked by syncache_add().
+ * Only the listen socket is unlocked by syncache_add().
*/
+ if (ti_locked == TI_WLOCKED) {
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ ti_locked = TI_UNLOCKED;
+ }
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
return;
} else if (tp->t_state == TCPS_LISTEN) {
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 03e8c72..f71d028 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1107,7 +1107,7 @@ syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
* Global TCP locks are held because we manipulate the PCB lists
* and create a new socket.
*/
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
*lsop = syncache_socket(sc, *lsop, m);
@@ -1175,7 +1175,6 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
int tfo_response_cookie_valid = 0;
#endif
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp); /* listen socket */
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
("%s: unexpected tcp flags", __func__));
@@ -1229,21 +1228,15 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#ifdef MAC
if (mac_syncache_init(&maclabel) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
goto done;
} else
mac_syncache_create(maclabel, inp);
#endif
#ifdef TCP_RFC7413
- if (!tfo_cookie_valid) {
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- }
-#else
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (!tfo_cookie_valid)
#endif
-
+ INP_WUNLOCK(inp);
+
/*
* Remember the IP options, if any.
*/
@@ -1272,10 +1265,8 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
#ifdef TCP_RFC7413
- if (tfo_cookie_valid) {
+ if (tfo_cookie_valid)
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- }
#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c
index 53d4778..1adaf4d 100644
--- a/sys/netinet/toecore.c
+++ b/sys/netinet/toecore.c
@@ -328,7 +328,6 @@ toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
{
struct socket *lso = inp->inp_socket;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
OpenPOWER on IntegriCloud