diff options
author | andre <andre@FreeBSD.org> | 2007-05-28 11:03:53 +0000 |
---|---|---|
committer | andre <andre@FreeBSD.org> | 2007-05-28 11:03:53 +0000 |
commit | b4b7eeb094a1d9171e9bc849d768dd1b422dc540 (patch) | |
tree | 8e95731f13820079df6dc3b4239da443ec06557b /sys | |
parent | 7d4dfa79424729c190b3b181ff89fc4c50530f22 (diff) | |
download | FreeBSD-src-b4b7eeb094a1d9171e9bc849d768dd1b422dc540.zip FreeBSD-src-b4b7eeb094a1d9171e9bc849d768dd1b422dc540.tar.gz |
Refactor and rewrite in parts the SYN handling code on listen sockets
in tcp_input():
o tighten the checks on allowed TCP flags to be RFC793 and
tcp-secure conform
o log check failures to syslog at LOG_DEBUG level
o rearrange the code flow to be easier to follow
o add KASSERTs to validate assumptions of the code flow
Add sysctl net.inet.tcp.syncache.rst_on_sock_fail defaulting to enable
that controls the behavior on socket creation failure for a otherwise
successful 3-way handshake. The socket creation can fail due to global
memory shortage, listen queue limits and file descriptor limits. The
sysctl allows to chose between two options to deal with this. One is
to send a reset to the other endpoint to notify it about the failure
(default). The other one is to ignore and treat the failure as a
transient error and have the other endpoint retransmit for another try.
Reviewed by: rwatson (in general)
Diffstat (limited to 'sys')
-rw-r--r-- | sys/netinet/tcp_input.c | 163 | ||||
-rw-r--r-- | sys/netinet/tcp_syncache.c | 4 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 3 |
3 files changed, 125 insertions, 45 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 02a9c22..5a221e3 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -241,9 +241,11 @@ tcp_input(struct mbuf *m, int off0) struct ip6_hdr *ip6 = NULL; int isipv6; #else + const void *ip6 = NULL; const int isipv6 = 0; #endif struct tcpopt to; /* options in this segment */ + char *s = NULL; /* address and port logging */ #ifdef TCPDEBUG /* @@ -376,16 +378,6 @@ tcp_input(struct mbuf *m, int off0) thflags = th->th_flags; /* - * If the drop_synfin option is enabled, drop all packets with - * both the SYN and FIN bits set. This prevents e.g. nmap from - * identifying the TCP/IP stack. - * - * This is a violation of the TCP specification. - */ - if (drop_synfin && (thflags & (TH_SYN|TH_FIN)) == (TH_SYN|TH_FIN)) - goto drop; - - /* * Convert TCP protocol specific fields to host format. */ th->th_seq = ntohl(th->th_seq); @@ -480,17 +472,10 @@ findpcb: */ if ((tcp_log_in_vain == 1 && (thflags & TH_SYN)) || tcp_log_in_vain == 2) { - char *s; -#ifdef INET6 - s = tcp_log_addrs(NULL, th, (void *)ip, (void *)ip6); -#else - s = tcp_log_addrs(NULL, th, (void *)ip, NULL); -#endif /* INET6 */ - if (s != NULL) { + if ((s = tcp_log_addrs(NULL, th, (void *)ip, + (void *)ip6))) log(LOG_INFO, "%s; %s: Connection attempt " "to closed port\n", s, __func__); - free(s, M_TCPLOG); - } } /* * When blackholing do not respond with a RST but @@ -537,12 +522,10 @@ findpcb: * segment and send an appropriate response. */ tp = intotcpcb(inp); - if (tp == NULL) { + if (tp == NULL || tp->t_state == TCPS_CLOSED) { rstreason = BANDLIM_RST_CLOSEDPORT; goto dropwithreset; } - if (tp->t_state == TCPS_CLOSED) - goto dropunlock; /* XXX: dropwithreset??? */ #ifdef MAC INP_LOCK_ASSERT(inp); @@ -600,8 +583,7 @@ findpcb: * contains a RST, check the sequence number to see if it * is a valid reset segment. */ - if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) { - if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { + if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { /* * Parse the TCP options here because * syncookies need access to the reflected @@ -626,10 +608,21 @@ findpcb: * but could not allocate a socket * either due to memory shortage, * listen queue length limits or - * global socket limits. + * global socket limits. Send RST + * or wait and have the remote end + * retransmit the ACK for another + * try. */ - rstreason = BANDLIM_UNLIMITED; - goto dropwithreset; + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Socket allocation failure, %s\n", + s, __func__, (tcp_sc_rst_sock_fail ? + "sending RST" : "try again")); + if (tcp_sc_rst_sock_fail) { + rstreason = BANDLIM_UNLIMITED; + goto dropwithreset; + } else + goto dropunlock; } /* * Socket is created in state SYN_RECEIVED. @@ -648,23 +641,80 @@ findpcb: tlen); INP_INFO_UNLOCK_ASSERT(&tcbinfo); return; - } - if (thflags & TH_RST) { - syncache_chkrst(&inc, th); - goto dropunlock; - } - if (thflags & TH_ACK) { - syncache_badack(&inc); - tcpstat.tcps_badsyn++; - rstreason = BANDLIM_RST_OPENPORT; - goto dropwithreset; - } + } + /* + * Our (SYN|ACK) response was rejected. + * Check with syncache and remove entry to prevent + * retransmits. + */ + if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "SYN|ACK was rejected\n", s, __func__); + syncache_chkrst(&inc, th); goto dropunlock; } - + /* + * Spurious RST. Ignore. + */ + if (thflags & TH_RST) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Spurious RST\n", s, __func__); + goto dropunlock; + } + /* + * We can't do anything without SYN. + */ + if ((thflags & TH_SYN) == 0) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "SYN missing\n", s, __func__); + tcpstat.tcps_badsyn++; + goto dropunlock; + } + /* + * (SYN|ACK) is bogus on a listen socket. + */ + if (thflags & TH_ACK) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "SYN|ACK bogus\n", s, __func__); + syncache_badack(&inc); /* XXX: Not needed! */ + tcpstat.tcps_badsyn++; + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } + /* + * If the drop_synfin option is enabled, drop all + * segments with both the SYN and FIN bits set. + * This prevents e.g. nmap from identifying the + * TCP/IP stack. + * XXX: Poor reasoning. nmap has other methods + * and is constantly refining its stack detection + * strategies. + * + * This is a violation of the TCP specification + * and was used by RFC1644. + */ + if ((thflags & TH_FIN) && drop_synfin) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "SYN|FIN ignored\n", s, __func__); + tcpstat.tcps_badsyn++; + goto dropunlock; + } /* * Segment's flags are (SYN) or (SYN|FIN). + * + * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored + * as they do not affect the state of the TCP FSM. + * The data pointed to by TH_URG and th_urp is ignored. */ + KASSERT((thflags & (TH_RST|TH_ACK)) == 0, + ("%s: Listen socket: TH_RST or TH_ACK set", __func__)); + KASSERT(thflags & (TH_SYN), + ("%s: Listen socket: TH_SYN not set", __func__)); #ifdef INET6 /* * If deprecated address is forbidden, @@ -701,6 +751,10 @@ findpcb: if ((ia6 = ip6_getdstifaddr(m)) && (ia6->ia6_flags & IN6_IFF_DEPRECATED)) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Deprecated IPv6 address\n", + s, __func__); rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } @@ -723,21 +777,39 @@ findpcb: if (isipv6) { #ifdef INET6 if (th->th_dport == th->th_sport && - IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) + IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection to self\n", s, __func__); goto dropunlock; + } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || - IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) + IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection to multicast address\n", + s, __func__); goto dropunlock; + } #endif } else { if (th->th_dport == th->th_sport && - ip->ip_dst.s_addr == ip->ip_src.s_addr) + ip->ip_dst.s_addr == ip->ip_src.s_addr) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection to self\n", s, __func__); goto dropunlock; + } if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || - in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) + in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection to multicast address\n", + s, __func__); goto dropunlock; + } } /* * SYN appears to be valid. Create compressed TCP state @@ -752,8 +824,9 @@ findpcb: syncache_add(&inc, &to, th, inp, &so, m); /* * Entry added to syncache and mbuf consumed. - * Everything unlocked already by syncache_add(). + * Everything already unlocked by syncache_add(). */ + INP_INFO_UNLOCK_ASSERT(&tcbinfo); return; } @@ -777,6 +850,8 @@ dropunlock: INP_INFO_WUNLOCK(&tcbinfo); drop: INP_INFO_UNLOCK_ASSERT(&tcbinfo); + if (s != NULL) + free(s, M_TCPLOG); if (m != NULL) m_freem(m); return; diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 4143700..3c70528 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -213,6 +213,10 @@ SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN, SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW, &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions"); +int tcp_sc_rst_sock_fail = 1; +SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail, CTLFLAG_RW, + &tcp_sc_rst_sock_fail, 0, "Send reset on socket allocation failure"); + static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache"); #define SYNCACHE_HASH(inc, mask) \ diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 5b5eff0..bcbc2fc 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -502,7 +502,8 @@ extern int path_mtu_discovery; extern int ss_fltsz; extern int ss_fltsz_local; -extern int tcp_do_sack; /* SACK enabled/disabled */ +extern int tcp_do_sack; /* SACK enabled/disabled */ +extern int tcp_sc_rst_sock_fail; /* RST on sock alloc failure */ int tcp_addoptions(struct tcpopt *, u_char *); struct tcpcb * |