summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authorsbruno <sbruno@FreeBSD.org>2014-10-29 22:17:45 +0000
committersbruno <sbruno@FreeBSD.org>2014-10-29 22:17:45 +0000
commit47c62aa0b0b32c345bff946b9d20c22176e209a6 (patch)
tree9dfed8b1a9ebe5ab0fd30460aa7b2f24a5340d19 /sys/netinet
parent97d69448b874f5c698f9ee0ceddc195e496c8838 (diff)
downloadFreeBSD-src-47c62aa0b0b32c345bff946b9d20c22176e209a6.zip
FreeBSD-src-47c62aa0b0b32c345bff946b9d20c22176e209a6.tar.gz
MFC r272720, 273061, 273062, 273063, 273064
Implement PLPMTUD blackhole detection (RFC 4821), inspired by code from xnu sources. If we encounter a network where ICMP is blocked the Needs Frag indicator may not propagate back to us. Attempt to downshift the mss once to a preconfigured value. Note, this is turned off by default.
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/tcp_output.c17
-rw-r--r--sys/netinet/tcp_timer.c156
-rw-r--r--sys/netinet/tcp_var.h11
3 files changed, 182 insertions, 2 deletions
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 8c97c12..8abb16d 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -676,6 +676,12 @@ just_return:
send:
SOCKBUF_LOCK_ASSERT(&so->so_snd);
+ if (len > 0) {
+ if (len >= tp->t_maxseg)
+ tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
+ else
+ tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
+ }
/*
* Before ESTABLISHED, force sending of initial options
* unless TCP set not to do any options.
@@ -1184,6 +1190,11 @@ send:
*/
ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
+ if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
+ tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+ else
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+
if (tp->t_state == TCPS_SYN_SENT)
TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
@@ -1220,8 +1231,12 @@ send:
*
* NB: Don't set DF on small MTU/MSS to have a safe fallback.
*/
- if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
+ if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) {
ip->ip_off |= htons(IP_DF);
+ tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+ } else {
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+ }
if (tp->t_state == TCPS_SYN_SENT)
TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index bde7503..00fcb22 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -63,6 +63,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
#include <netinet/tcpip.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
@@ -124,6 +127,54 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
&tcp_rexmit_drop_options, 0,
"Drop TCP options from 3rd and later retransmitted SYN");
+static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
+#define V_tcp_pmtud_blackhole_detect VNET(tcp_pmtud_blackhole_detect)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
+ CTLFLAG_RW,
+ &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
+ "Path MTU Discovery Black Hole Detection Enabled");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
+#define V_tcp_pmtud_blackhole_activated \
+ VNET(tcp_pmtud_blackhole_activated)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
+ CTLFLAG_RD,
+ &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
+ "Path MTU Discovery Black Hole Detection, Activation Count");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
+#define V_tcp_pmtud_blackhole_activated_min_mss \
+ VNET(tcp_pmtud_blackhole_activated_min_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
+ CTLFLAG_RD,
+ &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
+ "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
+#define V_tcp_pmtud_blackhole_failed VNET(tcp_pmtud_blackhole_failed)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
+ CTLFLAG_RD,
+ &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
+ "Path MTU Discovery Black Hole Detection, Failure Count");
+
+#ifdef INET
+static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
+#define V_tcp_pmtud_blackhole_mss VNET(tcp_pmtud_blackhole_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
+ CTLFLAG_RW,
+ &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
+ "Path MTU Discovery Black Hole Detection lowered MSS");
+#endif
+
+#ifdef INET6
+static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
+#define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
+ CTLFLAG_RW,
+ &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
+ "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
+#endif
+
static int per_cpu_timers = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
&per_cpu_timers , 0, "run tcp timers on all cpus");
@@ -492,6 +543,7 @@ tcp_timer_rexmt(void * xtp)
ostate = tp->t_state;
#endif
+
INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
/*
@@ -593,6 +645,110 @@ tcp_timer_rexmt(void * xtp)
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
TCPT_RANGESET(tp->t_rxtcur, rexmt,
tp->t_rttmin, TCPTV_REXMTMAX);
+
+ /*
+ * We enter the path for PLMTUD if connection is established or, if
+ * connection is FIN_WAIT_1 status, reason for the last is that if
+ * amount of data we send is very small, we could send it in couple of
+ * packets and process straight to FIN. In that case we won't catch
+ * ESTABLISHED state.
+ */
+ if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
+ || (tp->t_state == TCPS_FIN_WAIT_1))) {
+ int optlen;
+#ifdef INET6
+ int isipv6;
+#endif
+
+ if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
+ (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
+ (tp->t_rxtshift <= 2)) {
+ /*
+ * Enter Path MTU Black-hole Detection mechanism:
+ * - Disable Path MTU Discovery (IP "DF" bit).
+ * - Reduce MTU to lower value than what we
+ * negotiated with peer.
+ */
+ /* Record that we may have found a black hole. */
+ tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
+
+ /* Keep track of previous MSS. */
+ optlen = tp->t_maxopd - tp->t_maxseg;
+ tp->t_pmtud_saved_maxopd = tp->t_maxopd;
+
+ /*
+ * Reduce the MSS to blackhole value or to the default
+ * in an attempt to retransmit.
+ */
+#ifdef INET6
+ isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
+ if (isipv6 &&
+ tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
+ /* Use the sysctl tuneable blackhole MSS. */
+ tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
+ V_tcp_pmtud_blackhole_activated++;
+ } else if (isipv6) {
+ /* Use the default MSS. */
+ tp->t_maxopd = V_tcp_v6mssdflt;
+ /*
+ * Disable Path MTU Discovery when we switch to
+ * minmss.
+ */
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+ V_tcp_pmtud_blackhole_activated_min_mss++;
+ }
+#endif
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
+ /* Use the sysctl tuneable blackhole MSS. */
+ tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
+ V_tcp_pmtud_blackhole_activated++;
+ } else {
+ /* Use the default MSS. */
+ tp->t_maxopd = V_tcp_mssdflt;
+ /*
+ * Disable Path MTU Discovery when we switch to
+ * minmss.
+ */
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+ V_tcp_pmtud_blackhole_activated_min_mss++;
+ }
+#endif
+ tp->t_maxseg = tp->t_maxopd - optlen;
+ /*
+ * Reset the slow-start flight size
+ * as it may depend on the new MSS.
+ */
+ if (CC_ALGO(tp)->conn_init != NULL)
+ CC_ALGO(tp)->conn_init(tp->ccv);
+ } else {
+ /*
+ * If further retransmissions are still unsuccessful
+ * with a lowered MTU, maybe this isn't a blackhole and
+ * we restore the previous MSS and blackhole detection
+ * flags.
+ */
+ if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
+ (tp->t_rxtshift > 4)) {
+ tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+ tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
+ optlen = tp->t_maxopd - tp->t_maxseg;
+ tp->t_maxopd = tp->t_pmtud_saved_maxopd;
+ tp->t_maxseg = tp->t_maxopd - optlen;
+ V_tcp_pmtud_blackhole_failed++;
+ /*
+ * Reset the slow-start flight size as it
+ * may depend on the new MSS.
+ */
+ if (CC_ALGO(tp)->conn_init != NULL)
+ CC_ALGO(tp)->conn_init(tp->ccv);
+ }
+ }
+ }
+
/*
* Disable RFC1323 and SACK if we haven't got any response to
* our third SYN to work-around some broken terminal servers
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 398df8b..10b1096 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -209,8 +209,10 @@ struct tcpcb {
u_int t_keepcnt; /* number of keepalives before close */
u_int t_tsomax; /* tso burst length limit */
+ u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */
+ u_int t_flags2; /* More tcpcb flags storage */
- uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
+ uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */
void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */
uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */
};
@@ -284,6 +286,13 @@ struct tcpcb {
#endif /* TCP_SIGNATURE */
/*
+ * Flags for PLPMTU handling, t_flags2
+ */
+#define TF2_PLPMTU_BLACKHOLE 0x00000001 /* Possible PLPMTUD Black Hole. */
+#define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */
+#define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */
+
+/*
* Structure to hold TCP options that are only used during segment
* processing (in tcp_input), but not held in the tcpcb.
* It's basically used to reduce the number of parameters
OpenPOWER on IntegriCloud