summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/options2
-rw-r--r--sys/netinet/tcp.h5
-rw-r--r--sys/netinet/tcp_fastopen.c442
-rw-r--r--sys/netinet/tcp_fastopen.h47
-rw-r--r--sys/netinet/tcp_input.c92
-rw-r--r--sys/netinet/tcp_output.c71
-rw-r--r--sys/netinet/tcp_subr.c21
-rw-r--r--sys/netinet/tcp_syncache.c137
-rw-r--r--sys/netinet/tcp_syncache.h6
-rw-r--r--sys/netinet/tcp_timer.c3
-rw-r--r--sys/netinet/tcp_usrreq.c57
-rw-r--r--sys/netinet/tcp_var.h16
13 files changed, 881 insertions, 19 deletions
diff --git a/sys/conf/files b/sys/conf/files
index 73582b8..1823040 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3503,6 +3503,7 @@ netinet/sctp_usrreq.c optional inet sctp | inet6 sctp
netinet/sctputil.c optional inet sctp | inet6 sctp
netinet/siftr.c optional inet siftr alq | inet6 siftr alq
netinet/tcp_debug.c optional tcpdebug
+netinet/tcp_fastopen.c optional inet tcp_rfc7413 | inet6 tcp_rfc7413
netinet/tcp_hostcache.c optional inet | inet6
netinet/tcp_input.c optional inet | inet6
netinet/tcp_lro.c optional inet | inet6
diff --git a/sys/conf/options b/sys/conf/options
index 413b44e..0699c2c 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -442,6 +442,8 @@ SLIP_IFF_OPTS opt_slip.h
TCPDEBUG
SIFTR
TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading
+TCP_RFC7413 opt_inet.h
+TCP_RFC7413_MAX_KEYS opt_inet.h
TCP_SIGNATURE opt_inet.h
VLAN_ARRAY opt_vlan.h
XBONEHACK
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index fb2f810..7e3a1a3 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -97,6 +97,10 @@ struct tcphdr {
#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */
#define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */
#define TCPOLEN_SIGNATURE 18
+#define TCPOPT_FAST_OPEN 34
+#define TCPOLEN_FAST_OPEN_EMPTY 2
+#define TCPOLEN_FAST_OPEN_MIN 6
+#define TCPOLEN_FAST_OPEN_MAX 18
/* Miscellaneous constants */
#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */
@@ -165,6 +169,7 @@ struct tcphdr {
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */
+#define TCP_FASTOPEN 1025 /* enable TFO / was created via TFO */
/* Start of reserved space for third-party user-settable options. */
#define TCP_VENDOR SO_VENDOR
diff --git a/sys/netinet/tcp_fastopen.c b/sys/netinet/tcp_fastopen.c
new file mode 100644
index 0000000..482320e
--- /dev/null
+++ b/sys/netinet/tcp_fastopen.c
@@ -0,0 +1,442 @@
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413].
+ *
+ * This implementation is currently considered to be experimental and is not
+ * included in kernel builds by default. To include this code, add the
+ * following line to your kernel config:
+ *
+ * options TCP_RFC7413
+ *
+ * The generated TFO cookies are the 64-bit output of
+ * SipHash24(<16-byte-key><client-ip>). Multiple concurrent valid keys are
+ * supported so that time-based rolling cookie invalidation policies can be
+ * implemented in the system. The default number of concurrent keys is 2.
+ * This can be adjusted in the kernel config as follows:
+ *
+ * options TCP_RFC7413_MAX_KEYS=<num-keys>
+ *
+ *
+ * The following TFO-specific sysctls are defined:
+ *
+ * net.inet.tcp.fastopen.acceptany (RW, default 0)
+ * When non-zero, all client-supplied TFO cookies will be considered to
+ * be valid.
+ *
+ * net.inet.tcp.fastopen.autokey (RW, default 120)
+ * When this and net.inet.tcp.fastopen.enabled are non-zero, a new key
+ * will be automatically generated after this many seconds.
+ *
+ * net.inet.tcp.fastopen.enabled (RW, default 0)
+ * When zero, no new TFO connections can be created. On the transition
+ * from enabled to disabled, all installed keys are removed. On the
+ * transition from disabled to enabled, if net.inet.tcp.fastopen.autokey
+ * is non-zero and there are no keys installed, a new key will be
+ * generated immediately. The transition from enabled to disabled does
+ * not affect any TFO connections in progress; it only prevents new ones
+ * from being made.
+ *
+ * net.inet.tcp.fastopen.keylen (RO)
+ * The key length in bytes.
+ *
+ * net.inet.tcp.fastopen.maxkeys (RO)
+ * The maximum number of keys supported.
+ *
+ * net.inet.tcp.fastopen.numkeys (RO)
+ * The current number of keys installed.
+ *
+ * net.inet.tcp.fastopen.setkey (WO)
+ * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this
+ * sysctl.
+ *
+ *
+ * In order for TFO connections to be created via a listen socket, that
+ * socket must have the TCP_FASTOPEN socket option set on it. This option
+ * can be set on the socket either before or after the listen() is invoked.
+ * Clearing this option on a listen socket after it has been set has no
+ * effect on existing TFO connections or TFO connections in progress; it
+ * only prevents new TFO connections from being made.
+ *
+ * For passively-created sockets, the TCP_FASTOPEN socket option can be
+ * queried to determine whether the connection was established using TFO.
+ * Note that connections that are established via a TFO SYN, but that fall
+ * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
+ * set.
+ *
+ * Per the RFC, this implementation limits the number of TFO connections
+ * that can be in the SYN_RECEIVED state on a per listen-socket basis.
+ * Whenever this limit is exceeded, requests for new TFO connections are
+ * serviced as non-TFO requests. Without such a limit, given a valid TFO
+ * cookie, an attacker could keep the listen queue in an overflow condition
+ * using a TFO SYN flood. This implementation sets the limit at half the
+ * configured listen backlog.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <crypto/siphash/siphash.h>
+
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_fastopen.h>
+#include <netinet/tcp_var.h>
+
+
+#define TCP_FASTOPEN_KEY_LEN SIPHASH_KEY_LENGTH
+
+#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
+#define TCP_FASTOPEN_MAX_KEYS 2
+#else
+#define TCP_FASTOPEN_MAX_KEYS TCP_RFC7413_MAX_KEYS
+#endif
+
+struct tcp_fastopen_keylist {
+ unsigned int newest;
+ uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
+};
+
+struct tcp_fastopen_callout {
+ struct callout c;
+ struct vnet *v;
+};
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open");
+
+static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0;
+#define V_tcp_fastopen_acceptany VNET(tcp_fastopen_acceptany)
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
+ "Accept any non-empty cookie");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_autokey) = 120;
+#define V_tcp_fastopen_autokey VNET(tcp_fastopen_autokey)
+static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_autokey, "IU",
+ "Number of seconds between auto-generation of a new key; zero disables");
+
+VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0;
+static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_enabled, "IU",
+ "Enable/disable TCP Fast Open processing");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
+ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
+ "Key length in bytes");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
+ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
+ "Maximum number of keys supported");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0;
+#define V_tcp_fastopen_numkeys VNET(tcp_fastopen_numkeys)
+SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
+ CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
+ "Number of keys installed");
+
+static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
+ CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_setkey, "",
+ "Install a new key");
+
+static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock);
+#define V_tcp_fastopen_keylock VNET(tcp_fastopen_keylock)
+
+#define TCP_FASTOPEN_KEYS_RLOCK(t) rm_rlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_RUNLOCK(t) rm_runlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_WLOCK() rm_wlock(&V_tcp_fastopen_keylock)
+#define TCP_FASTOPEN_KEYS_WUNLOCK() rm_wunlock(&V_tcp_fastopen_keylock)
+
+static VNET_DEFINE(struct tcp_fastopen_keylist, tcp_fastopen_keys);
+#define V_tcp_fastopen_keys VNET(tcp_fastopen_keys)
+
+static VNET_DEFINE(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
+#define V_tcp_fastopen_autokey_ctx VNET(tcp_fastopen_autokey_ctx)
+
+static VNET_DEFINE(uma_zone_t, counter_zone);
+#define V_counter_zone VNET(counter_zone)
+
+void
+tcp_fastopen_init(void)
+{
+ V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
+ callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
+ &V_tcp_fastopen_keylock, 0);
+ V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+}
+
+void
+tcp_fastopen_destroy(void)
+{
+ callout_drain(&V_tcp_fastopen_autokey_ctx.c);
+ rm_destroy(&V_tcp_fastopen_keylock);
+ uma_zdestroy(V_counter_zone);
+}
+
+unsigned int *
+tcp_fastopen_alloc_counter(void)
+{
+ unsigned int *counter;
+ counter = uma_zalloc(V_counter_zone, M_NOWAIT);
+ if (counter)
+ *counter = 1;
+ return (counter);
+}
+
+void
+tcp_fastopen_decrement_counter(unsigned int *counter)
+{
+ if (*counter == 1)
+ uma_zfree(V_counter_zone, counter);
+ else
+ atomic_subtract_int(counter, 1);
+}
+
+static void
+tcp_fastopen_addkey_locked(uint8_t *key)
+{
+
+ V_tcp_fastopen_keys.newest++;
+ if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
+ V_tcp_fastopen_keys.newest = 0;
+ memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
+ TCP_FASTOPEN_KEY_LEN);
+ if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
+ V_tcp_fastopen_numkeys++;
+}
+
+static void
+tcp_fastopen_autokey_locked(void)
+{
+ uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+ arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
+ tcp_fastopen_addkey_locked(newkey);
+}
+
+static void
+tcp_fastopen_autokey_callout(void *arg)
+{
+ struct tcp_fastopen_callout *ctx = arg;
+
+ CURVNET_SET(ctx->v);
+ tcp_fastopen_autokey_locked();
+ callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
+ tcp_fastopen_autokey_callout, ctx);
+ CURVNET_RESTORE();
+}
+
+
+static uint64_t
+tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
+{
+ SIPHASH_CTX ctx;
+ uint64_t siphash;
+
+ SipHash24_Init(&ctx);
+ SipHash_SetKey(&ctx, key);
+ switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+ case 0:
+ SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
+ break;
+#endif
+#ifdef INET6
+ case INC_ISIPV6:
+ SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
+ break;
+#endif
+ }
+ SipHash_Final((u_int8_t *)&siphash, &ctx);
+
+ return (siphash);
+}
+
+
+/*
+ * Return values:
+ * -1 the cookie is invalid and no valid cookie is available
+ * 0 the cookie is invalid and the latest cookie has been returned
+ * 1 the cookie is valid and the latest cookie has been returned
+ */
+int
+tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+ unsigned int len, uint64_t *latest_cookie)
+{
+ struct rm_priotracker tracker;
+ unsigned int i, key_index;
+ uint64_t cur_cookie;
+
+ if (V_tcp_fastopen_acceptany) {
+ *latest_cookie = 0;
+ return (1);
+ }
+
+ if (len != TCP_FASTOPEN_COOKIE_LEN) {
+ if (V_tcp_fastopen_numkeys > 0) {
+ *latest_cookie =
+ tcp_fastopen_make_cookie(
+ V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
+ inc);
+ return (0);
+ }
+ return (-1);
+ }
+
+ /*
+ * Check against each available key, from newest to oldest.
+ */
+ TCP_FASTOPEN_KEYS_RLOCK(&tracker);
+ key_index = V_tcp_fastopen_keys.newest;
+ for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
+ cur_cookie =
+ tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
+ inc);
+ if (i == 0)
+ *latest_cookie = cur_cookie;
+ if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) {
+ TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+ return (1);
+ }
+ if (key_index == 0)
+ key_index = TCP_FASTOPEN_MAX_KEYS - 1;
+ else
+ key_index--;
+ }
+ TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+
+ return (0);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int new;
+
+ new = V_tcp_fastopen_autokey;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if (new > (INT_MAX / hz))
+ return (EINVAL);
+
+ TCP_FASTOPEN_KEYS_WLOCK();
+ if (V_tcp_fastopen_enabled) {
+ if (V_tcp_fastopen_autokey && !new)
+ callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+ else if (new)
+ callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+ new * hz, tcp_fastopen_autokey_callout,
+ &V_tcp_fastopen_autokey_ctx);
+ }
+ V_tcp_fastopen_autokey = new;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ }
+
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int new;
+
+ new = V_tcp_fastopen_enabled;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if (V_tcp_fastopen_enabled && !new) {
+ /* enabled -> disabled */
+ TCP_FASTOPEN_KEYS_WLOCK();
+ V_tcp_fastopen_numkeys = 0;
+ V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+ if (V_tcp_fastopen_autokey)
+ callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+ V_tcp_fastopen_enabled = 0;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ } else if (!V_tcp_fastopen_enabled && new) {
+ /* disabled -> enabled */
+ TCP_FASTOPEN_KEYS_WLOCK();
+ if (V_tcp_fastopen_autokey &&
+ (V_tcp_fastopen_numkeys == 0)) {
+ tcp_fastopen_autokey_locked();
+ callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+ V_tcp_fastopen_autokey * hz,
+ tcp_fastopen_autokey_callout,
+ &V_tcp_fastopen_autokey_ctx);
+ }
+ V_tcp_fastopen_enabled = 1;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ }
+ }
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+ if (req->oldptr != NULL || req->oldlen != 0)
+ return (EINVAL);
+ if (req->newptr == NULL)
+ return (EPERM);
+ if (req->newlen != sizeof(newkey))
+ return (EINVAL);
+ error = SYSCTL_IN(req, newkey, sizeof(newkey));
+ if (error)
+ return (error);
+
+ TCP_FASTOPEN_KEYS_WLOCK();
+ tcp_fastopen_addkey_locked(newkey);
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+
+ return (0);
+}
diff --git a/sys/netinet/tcp_fastopen.h b/sys/netinet/tcp_fastopen.h
new file mode 100644
index 0000000..c64ba2c
--- /dev/null
+++ b/sys/netinet/tcp_fastopen.h
@@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _TCP_FASTOPEN_H_
+#define _TCP_FASTOPEN_H_
+
+#ifdef _KERNEL
+
+#define TCP_FASTOPEN_COOKIE_LEN 8 /* tied to SipHash24 64-bit output */
+
+VNET_DECLARE(unsigned int, tcp_fastopen_enabled);
+#define V_tcp_fastopen_enabled VNET(tcp_fastopen_enabled)
+
+void tcp_fastopen_init(void);
+void tcp_fastopen_destroy(void);
+unsigned int *tcp_fastopen_alloc_counter(void);
+void tcp_fastopen_decrement_counter(unsigned int *counter);
+int tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+ unsigned int len, uint64_t *latest_cookie);
+#endif /* _KERNEL */
+
+#endif /* _TCP_FASTOPEN_H_ */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 0500f25..b824ba2 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -98,6 +98,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -1072,6 +1075,9 @@ relocked:
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
+#ifdef TCP_RFC7413
+new_tfo_socket:
+#endif
if (so == NULL) {
/*
* We completed the 3-way handshake
@@ -1329,7 +1335,12 @@ relocked:
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
tcp_dooptions(&to, optp, optlen, TO_SYN);
+#ifdef TCP_RFC7413
+ if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+ goto new_tfo_socket;
+#else
syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
+#endif
/*
* Entry added to syncache and mbuf consumed.
* Everything already unlocked by syncache_add().
@@ -1439,6 +1450,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct in_conninfo *inc;
struct mbuf *mfree;
struct tcpopt to;
+ int tfo_syn;
#ifdef TCPDEBUG
/*
@@ -1878,6 +1890,28 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN) {
+ /*
+ * When a TFO connection is in SYN_RECEIVED, the
+ * only valid packets are the initial SYN, a
+ * retransmit/copy of the initial SYN (possibly with
+ * a subset of the original data), a valid ACK, a
+ * FIN, or a RST.
+ */
+ if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
+ rstreason = BANDLIM_RST_OPENPORT;
+ goto dropwithreset;
+ } else if (thflags & TH_SYN) {
+ /* non-initial SYN is ignored */
+ if ((tcp_timer_active(tp, TT_DELACK) ||
+ tcp_timer_active(tp, TT_REXMT)))
+ goto drop;
+ } else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
+ goto drop;
+ }
+ }
+#endif
break;
/*
@@ -2318,9 +2352,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((thflags & TH_ACK) == 0) {
if (tp->t_state == TCPS_SYN_RECEIVED ||
- (tp->t_flags & TF_NEEDSYN))
+ (tp->t_flags & TF_NEEDSYN)) {
+#ifdef TCP_RFC7413
+ if (tp->t_state == TCPS_SYN_RECEIVED &&
+ tp->t_flags & TF_FASTOPEN) {
+ tp->snd_wnd = tiwin;
+ cc_conn_init(tp);
+ }
+#endif
goto step6;
- else if (tp->t_flags & TF_ACKNOW)
+ } else if (tp->t_flags & TF_ACKNOW)
goto dropafterack;
else
goto drop;
@@ -2359,7 +2400,27 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_state_change(tp, TCPS_ESTABLISHED);
TCP_PROBE5(accept__established, NULL, tp,
mtod(m, const char *), tp, th);
- cc_conn_init(tp);
+#ifdef TCP_RFC7413
+ if (tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+
+ /*
+ * Account for the ACK of our SYN prior to
+ * regular ACK processing below.
+ */
+ tp->snd_una++;
+ }
+ /*
+ * TFO connections call cc_conn_init() during SYN
+ * processing. Calling it again here for such
+ * connections is not harmless as it would undo the
+ * snd_cwnd reduction that occurs when a TFO SYN|ACK
+ * is retransmitted.
+ */
+ if (!(tp->t_flags & TF_FASTOPEN))
+#endif
+ cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
}
/*
@@ -2890,9 +2951,12 @@ dodata: /* XXX */
* case PRU_RCVD). If a FIN has already been received on this
* connection then we just ignore the text.
*/
- if ((tlen || (thflags & TH_FIN)) &&
+ tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags & TF_FASTOPEN));
+ if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
+
m_adj(m, drop_hdrlen); /* delayed header drop */
/*
* Insert segment which includes th into TCP reassembly queue
@@ -2908,8 +2972,9 @@ dodata: /* XXX */
*/
if (th->th_seq == tp->rcv_nxt &&
LIST_EMPTY(&tp->t_segq) &&
- TCPS_HAVEESTABLISHED(tp->t_state)) {
- if (DELAY_ACK(tp, tlen))
+ (TCPS_HAVEESTABLISHED(tp->t_state) ||
+ tfo_syn)) {
+ if (DELAY_ACK(tp, tlen) || tfo_syn)
tp->t_flags |= TF_DELACK;
else
tp->t_flags |= TF_ACKNOW;
@@ -3262,6 +3327,21 @@ tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
to->to_sacks = cp + 2;
TCPSTAT_INC(tcps_sack_rcv_blocks);
break;
+#ifdef TCP_RFC7413
+ case TCPOPT_FAST_OPEN:
+ if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) &&
+ (optlen < TCPOLEN_FAST_OPEN_MIN) &&
+ (optlen > TCPOLEN_FAST_OPEN_MAX))
+ continue;
+ if (!(flags & TO_SYN))
+ continue;
+ if (!V_tcp_fastopen_enabled)
+ continue;
+ to->to_flags |= TOF_FASTOPEN;
+ to->to_tfo_len = optlen - 2;
+ to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
+ break;
+#endif
default:
continue;
}
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index f1196e1..c7dc547 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -69,6 +69,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#define TCPOUTFLAGS
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
@@ -202,6 +205,17 @@ tcp_output(struct tcpcb *tp)
return (tcp_offload_output(tp));
#endif
+#ifdef TCP_RFC7413
+ /*
+ * For TFO connections in SYN_RECEIVED, only allow the initial
+ * SYN|ACK and those sent by the retransmit timer.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED) &&
+ SEQ_GT(tp->snd_max, tp->snd_una) && /* inital SYN|ACK sent */
+ (tp->snd_nxt != tp->snd_una)) /* not a retransmit */
+ return (0);
+#endif
/*
* Determine length of data that should be transmitted,
* and flags that will be used.
@@ -387,6 +401,15 @@ after_sack_rexmit:
if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
if (tp->t_state != TCPS_SYN_RECEIVED)
flags &= ~TH_SYN;
+#ifdef TCP_RFC7413
+ /*
+ * When sending additional segments following a TFO SYN|ACK,
+ * do not include the SYN bit.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ flags &= ~TH_SYN;
+#endif
off--, len++;
}
@@ -400,6 +423,17 @@ after_sack_rexmit:
flags &= ~TH_FIN;
}
+#ifdef TCP_RFC7413
+ /*
+ * When retransmitting SYN|ACK on a passively-created TFO socket,
+ * don't include data, as the presence of data may have caused the
+ * original SYN|ACK to have been dropped by a middlebox.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) ||
+ (flags & TH_RST)))
+ len = 0;
+#endif
if (len <= 0) {
/*
* If FIN has been sent but not acked,
@@ -718,6 +752,22 @@ send:
tp->snd_nxt = tp->iss;
to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
to.to_flags |= TOF_MSS;
+#ifdef TCP_RFC7413
+ /*
+ * Only include the TFO option on the first
+ * transmission of the SYN|ACK on a
+ * passively-created TFO socket, as the presence of
+ * the TFO option may have caused the original
+ * SYN|ACK to have been dropped by a middlebox.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_rxtshift == 0)) {
+ to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+ to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
+ to.to_flags |= TOF_FASTOPEN;
+ }
+#endif
}
/* Window scaling. */
if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
@@ -997,7 +1047,7 @@ send:
* give data to the user when a buffer fills or
* a PUSH comes in.)
*/
- if (off + len == so->so_snd.sb_cc)
+ if ((off + len == so->so_snd.sb_cc) && !(flags & TH_SYN))
flags |= TH_PUSH;
SOCKBUF_UNLOCK(&so->so_snd);
} else {
@@ -1694,6 +1744,25 @@ tcp_addoptions(struct tcpopt *to, u_char *optp)
TCPSTAT_INC(tcps_sack_send_blocks);
break;
}
+#ifdef TCP_RFC7413
+ case TOF_FASTOPEN:
+ {
+ int total_len;
+
+ /* XXX is there any point to aligning this option? */
+ total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
+ if (TCP_MAXOLEN - optlen < total_len)
+ continue;
+ *optp++ = TCPOPT_FAST_OPEN;
+ *optp++ = total_len;
+ if (to->to_tfo_len > 0) {
+ bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
+ optp += to->to_tfo_len;
+ }
+ optlen += total_len;
+ break;
+ }
+#endif
default:
panic("%s: unknown TCP option type", __func__);
break;
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 7bdd573..3ce21a9 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -83,6 +83,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/nd6.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -427,6 +430,10 @@ tcp_init(void)
SHUTDOWN_PRI_DEFAULT);
EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
+
+#ifdef TCP_RFC7413
+ tcp_fastopen_init();
+#endif
}
#ifdef VIMAGE
@@ -434,6 +441,9 @@ void
tcp_destroy(void)
{
+#ifdef TCP_RFC7413
+ tcp_fastopen_destroy();
+#endif
tcp_hc_destroy();
syncache_destroy();
tcp_tw_destroy();
@@ -1102,6 +1112,17 @@ tcp_close(struct tcpcb *tp)
if (tp->t_state == TCPS_LISTEN)
tcp_offload_listen_stop(tp);
#endif
+#ifdef TCP_RFC7413
+ /*
+ * This releases the TFO pending counter resource for TFO listen
+ * sockets as well as passively-created TFO sockets that transition
+ * from SYN_RECEIVED to CLOSED.
+ */
+ if (tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+ }
+#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 9896788..8d2cc98 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -78,6 +78,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_pcb.h>
#endif
#include <netinet/tcp.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -1089,6 +1092,39 @@ failed:
return (0);
}
+#ifdef TCP_RFC7413
+static void
+syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
+ uint64_t response_cookie)
+{
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ unsigned int *pending_counter;
+
+ /*
+ * Global TCP locks are held because we manipulate the PCB lists
+ * and create a new socket.
+ */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+ pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
+ *lsop = syncache_socket(sc, *lsop, m);
+ if (*lsop == NULL) {
+ TCPSTAT_INC(tcps_sc_aborted);
+ atomic_subtract_int(pending_counter, 1);
+ } else {
+ inp = sotoinpcb(*lsop);
+ tp = intotcpcb(inp);
+ tp->t_flags |= TF_FASTOPEN;
+ tp->t_tfo_cookie = response_cookie;
+ tp->snd_max = tp->iss;
+ tp->snd_nxt = tp->iss;
+ tp->t_tfo_pending = pending_counter;
+ TCPSTAT_INC(tcps_sc_completed);
+ }
+}
+#endif /* TCP_RFC7413 */
+
/*
* Given a LISTEN socket and an inbound SYN request, add
* this to the syn cache, and send back a segment:
@@ -1101,8 +1137,15 @@ failed:
* DoS attack, an attacker could send data which would eventually
* consume all available buffer space if it were ACKed. By not ACKing
* the data, we avoid this DoS scenario.
+ *
+ * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
+ * cookie is processed, V_tcp_fastopen_enabled set to true, and the
+ * TCP_FASTOPEN socket option is set. In this case, a new socket is created
+ * and returned via lsop, the mbuf is not freed so that tcp_input() can
+ * queue its data to the socket, and 1 is returned to indicate the
+ * TFO-socket-creation path was taken.
*/
-void
+int
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
void *todctx)
@@ -1115,6 +1158,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
u_int ltflags;
int win, sb_hiwat, ip_ttl, ip_tos;
char *s;
+ int rv = 0;
#ifdef INET6
int autoflowlabel = 0;
#endif
@@ -1123,6 +1167,11 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#endif
struct syncache scs;
struct ucred *cred;
+#ifdef TCP_RFC7413
+ uint64_t tfo_response_cookie;
+ int tfo_cookie_valid = 0;
+ int tfo_response_cookie_valid = 0;
+#endif
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp); /* listen socket */
@@ -1148,6 +1197,29 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sb_hiwat = so->so_rcv.sb_hiwat;
ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
+#ifdef TCP_RFC7413
+ if (V_tcp_fastopen_enabled && (tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) {
+ /*
+ * Limit the number of pending TFO connections to
+ * approximately half of the queue limit. This prevents TFO
+ * SYN floods from starving the service by filling the
+ * listen queue with bogus TFO connections.
+ */
+ if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
+ (so->so_qlimit / 2)) {
+ int result;
+
+ result = tcp_fastopen_check_cookie(inc,
+ to->to_tfo_cookie, to->to_tfo_len,
+ &tfo_response_cookie);
+ tfo_cookie_valid = (result > 0);
+ tfo_response_cookie_valid = (result >= 0);
+ } else
+ atomic_subtract_int(tp->t_tfo_pending, 1);
+ }
+#endif
+
/* By the time we drop the lock these should no longer be used. */
so = NULL;
tp = NULL;
@@ -1160,9 +1232,16 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
} else
mac_syncache_create(maclabel, inp);
#endif
+#ifdef TCP_RFC7413
+ if (!tfo_cookie_valid) {
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ }
+#else
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
-
+#endif
+
/*
* Remember the IP options, if any.
*/
@@ -1190,6 +1269,12 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc = syncache_lookup(inc, &sch); /* returns locked entry */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ }
+#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
/*
@@ -1232,6 +1317,14 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
goto done;
}
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ bzero(&scs, sizeof(scs));
+ sc = &scs;
+ goto skip_alloc;
+ }
+#endif
+
sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
/*
@@ -1255,7 +1348,13 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
}
}
-
+
+#ifdef TCP_RFC7413
+skip_alloc:
+ if (!tfo_cookie_valid && tfo_response_cookie_valid)
+ sc->sc_tfo_cookie = &tfo_response_cookie;
+#endif
+
/*
* Fill in the syncache values.
*/
@@ -1365,6 +1464,15 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#endif
SCH_UNLOCK(sch);
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ syncache_tfo_expand(sc, lsop, m, tfo_response_cookie);
+ /* INP_WUNLOCK(inp) will be performed by the called */
+ rv = 1;
+ goto tfo_done;
+ }
+#endif
+
/*
* Do a standard 3-way handshake.
*/
@@ -1382,17 +1490,20 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
done:
+ if (m) {
+ *lsop = NULL;
+ m_freem(m);
+ }
+#ifdef TCP_RFC7413
+tfo_done:
+#endif
if (cred != NULL)
crfree(cred);
#ifdef MAC
if (sc == &scs)
mac_syncache_destroy(&maclabel);
#endif
- if (m) {
-
- *lsop = NULL;
- m_freem(m);
- }
+ return (rv);
}
static int
@@ -1520,6 +1631,16 @@ syncache_respond(struct syncache *sc)
if (sc->sc_flags & SCF_SIGNATURE)
to.to_flags |= TOF_SIGNATURE;
#endif
+
+#ifdef TCP_RFC7413
+ if (sc->sc_tfo_cookie) {
+ to.to_flags |= TOF_FASTOPEN;
+ to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+ to.to_tfo_cookie = sc->sc_tfo_cookie;
+ /* don't send cookie again when retransmitting response */
+ sc->sc_tfo_cookie = NULL;
+ }
+#endif
optlen = tcp_addoptions(&to, (u_char *)(th + 1));
/* Adjust headers by option size. */
diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h
index fb9a6c6..d56afb6 100644
--- a/sys/netinet/tcp_syncache.h
+++ b/sys/netinet/tcp_syncache.h
@@ -41,7 +41,7 @@ void syncache_destroy(void);
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
-void syncache_add(struct in_conninfo *, struct tcpopt *,
+int syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
@@ -74,7 +74,9 @@ struct syncache {
#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */
-
+#ifdef TCP_RFC7413
+ void *sc_tfo_cookie; /* for TCP Fast Open response */
+#endif
void *sc_pspare; /* TCP_SIGNATURE */
u_int32_t sc_spare[2]; /* UTO */
};
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 3dc3a81..c66cb76 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -596,7 +596,8 @@ tcp_timer_rexmt(void * xtp)
} else
tp->t_flags &= ~TF_PREVVALID;
TCPSTAT_INC(tcps_rexmttimeo);
- if (tp->t_state == TCPS_SYN_SENT)
+ if ((tp->t_state == TCPS_SYN_SENT) ||
+ (tp->t_state == TCPS_SYN_RECEIVED))
rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index c848306..1ccbf9a 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -79,6 +79,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -391,6 +394,10 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
}
SOCK_UNLOCK(so);
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN)
+ tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
out:
TCPDEBUG2(PRU_LISTEN);
INP_WUNLOCK(inp);
@@ -436,6 +443,10 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
}
SOCK_UNLOCK(so);
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN)
+ tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
out:
TCPDEBUG2(PRU_LISTEN);
INP_WUNLOCK(inp);
@@ -791,6 +802,18 @@ tcp_usr_rcvd(struct socket *so, int flags)
}
tp = intotcpcb(inp);
TCPDEBUG1();
+#ifdef TCP_RFC7413
+ /*
+ * For passively-created TFO connections, don't attempt a window
+ * update while still in SYN_RECEIVED as this may trigger an early
+ * SYN|ACK. It is preferable to have the SYN|ACK be sent along with
+ * application response data, or failing that, when the DELACK timer
+ * expires.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ goto out;
+#endif
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
tcp_offload_rcvd(tp);
@@ -1558,6 +1581,29 @@ unlock_and_done:
TP_MAXIDLE(tp));
goto unlock_and_done;
+#ifdef TCP_RFC7413
+ case TCP_FASTOPEN:
+ INP_WUNLOCK(inp);
+ if (!V_tcp_fastopen_enabled)
+ return (EPERM);
+
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ return (error);
+
+ INP_WLOCK_RECHECK(inp);
+ if (optval) {
+ tp->t_flags |= TF_FASTOPEN;
+ if ((tp->t_state == TCPS_LISTEN) &&
+ (tp->t_tfo_pending == NULL))
+ tp->t_tfo_pending =
+ tcp_fastopen_alloc_counter();
+ } else
+ tp->t_flags &= ~TF_FASTOPEN;
+ goto unlock_and_done;
+#endif
+
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -1628,6 +1674,13 @@ unlock_and_done:
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, &ui, sizeof(ui));
break;
+#ifdef TCP_RFC7413
+ case TCP_FASTOPEN:
+ optval = tp->t_flags & TF_FASTOPEN;
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof optval);
+ break;
+#endif
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -1951,6 +2004,10 @@ db_print_tflags(u_int t_flags)
db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_FASTOPEN) {
+ db_printf("%sTF_FASTOPEN", comma ? ", " : "");
+ comma = 1;
+ }
}
static void
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 758f2c5..cff81c5 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -213,8 +213,18 @@ struct tcpcb {
u_int t_flags2; /* More tcpcb flags storage */
uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+ void *t_pspare2[3]; /* 1 TCP_SIGNATURE, 2 TBD */
+ unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */
+#else
void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */
+#endif
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+ uint64_t _pad[4]; /* 4 TBD (1-2 CC/RTT?) */
+ uint64_t t_tfo_cookie; /* TCP Fast Open cookie */
+#else
uint64_t _pad[5]; /* 5 TBD (1-2 CC/RTT?) */
+#endif
uint32_t t_tsomaxsegcount; /* TSO maximum segment count */
uint32_t t_tsomaxsegsize; /* TSO maximum segment size in bytes */
};
@@ -251,6 +261,7 @@ struct tcpcb {
#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
+#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
#define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY)
#define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY
@@ -310,14 +321,17 @@ struct tcpopt {
#define TOF_TS 0x0010 /* timestamp */
#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
#define TOF_SACK 0x0080 /* Peer sent SACK option */
-#define TOF_MAXOPT 0x0100
+#define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */
+#define TOF_MAXOPT 0x0200
u_int32_t to_tsval; /* new timestamp */
u_int32_t to_tsecr; /* reflected timestamp */
u_char *to_sacks; /* pointer to the first SACK blocks */
u_char *to_signature; /* pointer to the TCP-MD5 signature */
+ u_char *to_tfo_cookie; /* pointer to the TFO cookie */
u_int16_t to_mss; /* maximum segment size */
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
+ u_int8_t to_tfo_len; /* TFO cookie length */
u_int32_t to_spare; /* UTO */
};
OpenPOWER on IntegriCloud