summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhiren <hiren@FreeBSD.org>2016-08-29 18:00:14 +0000
committerhiren <hiren@FreeBSD.org>2016-08-29 18:00:14 +0000
commit516deda2dc34807f6f662fa3a8fff85b15340179 (patch)
treea145ed5375149e3512f958d89a0a5a008171475e
parent12acfe4fb5d9ffb6301b44a3ab04f7dd14add422 (diff)
downloadFreeBSD-src-516deda2dc34807f6f662fa3a8fff85b15340179.zip
FreeBSD-src-516deda2dc34807f6f662fa3a8fff85b15340179.tar.gz
MFC r303626 (by gallatin)
Rework IPV6 TCP path MTU discovery to match IPv4. No objection: gallatin
-rw-r--r--sys/netinet/tcp_subr.c147
-rw-r--r--sys/netinet6/icmp6.c4
-rw-r--r--sys/netinet6/ip6_output.c21
3 files changed, 125 insertions, 47 deletions
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 505fc30..a9cd819 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -78,6 +78,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_icmp.h>
#include <netinet/ip_var.h>
#ifdef INET6
+#include <netinet/icmp6.h>
#include <netinet/ip6.h>
#include <netinet6/in6_fib.h>
#include <netinet6/in6_pcb.h>
@@ -2040,72 +2041,146 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
void
tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
{
- struct tcphdr th;
+ struct in6_addr *dst;
+ struct tcphdr *th;
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
struct ip6_hdr *ip6;
struct mbuf *m;
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ struct icmp6_hdr *icmp6;
struct ip6ctlparam *ip6cp = NULL;
const struct sockaddr_in6 *sa6_src = NULL;
- int off;
- struct tcp_portonly {
- u_int16_t th_sport;
- u_int16_t th_dport;
- } *thp;
+ struct in_conninfo inc;
+ tcp_seq icmp_tcp_seq;
+ unsigned int mtu;
+ unsigned int off;
+
if (sa->sa_family != AF_INET6 ||
sa->sa_len != sizeof(struct sockaddr_in6))
return;
- if (cmd == PRC_MSGSIZE)
- notify = tcp_mtudisc_notify;
- else if (!PRC_IS_REDIRECT(cmd) &&
- ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
- return;
-
/* if the parameter is from icmp6, decode it. */
if (d != NULL) {
ip6cp = (struct ip6ctlparam *)d;
+ icmp6 = ip6cp->ip6c_icmp6;
m = ip6cp->ip6c_m;
ip6 = ip6cp->ip6c_ip6;
off = ip6cp->ip6c_off;
sa6_src = ip6cp->ip6c_src;
+ dst = ip6cp->ip6c_finaldst;
} else {
m = NULL;
ip6 = NULL;
off = 0; /* fool gcc */
sa6_src = &sa6_any;
+ dst = NULL;
}
- if (ip6 != NULL) {
- struct in_conninfo inc;
- /*
- * XXX: We assume that when IPV6 is non NULL,
- * M and OFF are valid.
- */
+ if (cmd == PRC_MSGSIZE)
+ notify = tcp_mtudisc_notify;
+ else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
+ cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) &&
+ ip6 != NULL)
+ notify = tcp_drop_syn_sent;
- /* check if we can safely examine src and dst ports */
- if (m->m_pkthdr.len < off + sizeof(*thp))
- return;
+ /*
+ * Hostdead is ugly because it goes linearly through all PCBs.
+ * XXX: We never get this from ICMP, otherwise it makes an
+ * excellent DoS attack on machines with many connections.
+ */
+ else if (cmd == PRC_HOSTDEAD)
+ ip6 = NULL;
+ else if ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)
+ return;
- bzero(&th, sizeof(th));
- m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
+ if (ip6 == NULL) {
+ in6_pcbnotify(&V_tcbinfo, sa, 0,
+ (const struct sockaddr *)sa6_src,
+ 0, cmd, NULL, notify);
+ return;
+ }
- in6_pcbnotify(&V_tcbinfo, sa, th.th_dport,
- (struct sockaddr *)ip6cp->ip6c_src,
- th.th_sport, cmd, NULL, notify);
+ /* Check if we can safely get the ports from the tcp hdr */
+ if (m == NULL ||
+ (m->m_pkthdr.len <
+ (int32_t) (off + offsetof(struct tcphdr, th_seq)))) {
+ return;
+ }
+ th = (struct tcphdr *) mtodo(ip6cp->ip6c_m, ip6cp->ip6c_off);
+ INP_INFO_RLOCK(&V_tcbinfo);
+ inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, th->th_dport,
+ &ip6->ip6_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
+ if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
+ /* signal EHOSTDOWN, as it flushes the cached route */
+ inp = (*notify)(inp, EHOSTDOWN);
+ if (inp != NULL)
+ INP_WUNLOCK(inp);
+ } else if (inp != NULL) {
+ if (!(inp->inp_flags & INP_TIMEWAIT) &&
+ !(inp->inp_flags & INP_DROPPED) &&
+ !(inp->inp_socket == NULL)) {
+ icmp_tcp_seq = ntohl(th->th_seq);
+ tp = intotcpcb(inp);
+ if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
+ SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
+ if (cmd == PRC_MSGSIZE) {
+ /*
+ * MTU discovery:
+ * If we got a needfrag set the MTU
+ * in the route to the suggested new
+ * value (if given) and then notify.
+ */
+ mtu = ntohl(icmp6->icmp6_mtu);
+ /*
+ * If no alternative MTU was
+ * proposed, or the proposed
+ * MTU was too small, set to
+ * the min.
+ */
+ if (mtu < IPV6_MMTU)
+ mtu = IPV6_MMTU - 8;
+
+
+ bzero(&inc, sizeof(inc));
+ inc.inc_fibnum = M_GETFIB(m);
+ inc.inc_flags |= INC_ISIPV6;
+ inc.inc6_faddr = *dst;
+ if (in6_setscope(&inc.inc6_faddr,
+ m->m_pkthdr.rcvif, NULL))
+ goto unlock_inp;
+
+ /*
+ * Only process the offered MTU if it
+ * is smaller than the current one.
+ */
+ if (mtu < tp->t_maxseg +
+ (sizeof (*th) + sizeof (*ip6))) {
+ tcp_hc_updatemtu(&inc, mtu);
+ tcp_mtudisc(inp, mtu);
+ ICMP6STAT_INC(icp6s_pmtuchg);
+ }
+ } else
+ inp = (*notify)(inp,
+ inet6ctlerrmap[cmd]);
+ }
+ }
+unlock_inp:
+ if (inp != NULL)
+ INP_WUNLOCK(inp);
+ } else {
bzero(&inc, sizeof(inc));
- inc.inc_fport = th.th_dport;
- inc.inc_lport = th.th_sport;
- inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
- inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
+ inc.inc_fibnum = M_GETFIB(m);
inc.inc_flags |= INC_ISIPV6;
- INP_INFO_RLOCK(&V_tcbinfo);
- syncache_unreach(&inc, &th);
- INP_INFO_RUNLOCK(&V_tcbinfo);
- } else
- in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
- 0, cmd, NULL, notify);
+ inc.inc_fport = th->th_dport;
+ inc.inc_lport = th->th_sport;
+ inc.inc6_faddr = *dst;
+ inc.inc6_laddr = ip6->ip6_src;
+ syncache_unreach(&inc, th);
+ }
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
#endif /* INET6 */
diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c
index 69bb60f..6b6c92e 100644
--- a/sys/netinet6/icmp6.c
+++ b/sys/netinet6/icmp6.c
@@ -485,15 +485,13 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
switch (code) {
case ICMP6_DST_UNREACH_NOROUTE:
+ case ICMP6_DST_UNREACH_ADDR: /* PRC_HOSTDEAD is a DOS */
code = PRC_UNREACH_NET;
break;
case ICMP6_DST_UNREACH_ADMIN:
icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
break;
- case ICMP6_DST_UNREACH_ADDR:
- code = PRC_HOSTDEAD;
- break;
case ICMP6_DST_UNREACH_BEYONDSCOPE:
/* I mean "source address was incorrect." */
code = PRC_PARAMPROB;
diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c
index dc1eeb4..f31679e 100644
--- a/sys/netinet6/ip6_output.c
+++ b/sys/netinet6/ip6_output.c
@@ -150,9 +150,10 @@ static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
static int ip6_getpmtu(struct route_in6 *, int,
- struct ifnet *, const struct in6_addr *, u_long *, int *, u_int);
+ struct ifnet *, const struct in6_addr *, u_long *, int *, u_int,
+ u_int);
static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long,
- u_long *, int *);
+ u_long *, int *, u_int);
static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *);
static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
@@ -718,7 +719,7 @@ again:
/* Determine path MTU. */
if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst,
- &mtu, &alwaysfrag, fibnum)) != 0)
+ &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0)
goto bad;
/*
@@ -1251,7 +1252,7 @@ ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup)
ifp = nh6.nh_ifp;
mtu = nh6.nh_mtu;
- error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL);
+ error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0);
fib6_free_nh_ext(fibnum, &nh6);
return (error);
@@ -1270,7 +1271,7 @@ ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup)
static int
ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup,
- int *alwaysfragp, u_int fibnum)
+ int *alwaysfragp, u_int fibnum, u_int proto)
{
struct nhop6_basic nh6;
struct in6_addr kdst;
@@ -1308,7 +1309,7 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
if (ro_pmtu->ro_rt)
mtu = ro_pmtu->ro_rt->rt_mtu;
- return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp));
+ return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto));
}
/*
@@ -1320,7 +1321,7 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
*/
static int
ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu,
- u_long *mtup, int *alwaysfragp)
+ u_long *mtup, int *alwaysfragp, u_int proto)
{
u_long mtu = 0;
int alwaysfrag = 0;
@@ -1335,7 +1336,11 @@ ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu,
inc.inc6_faddr = *dst;
ifmtu = IN6_LINKMTU(ifp);
- mtu = tcp_hc_getmtu(&inc);
+
+ /* TCP is known to react to pmtu changes so skip hc */
+ if (proto != IPPROTO_TCP)
+ mtu = tcp_hc_getmtu(&inc);
+
if (mtu)
mtu = min(mtu, rt_mtu);
else
OpenPOWER on IntegriCloud