summaryrefslogtreecommitdiffstats
path: root/sys/netinet/tcp_timewait.c
diff options
context:
space:
mode:
authorrwatson <rwatson@FreeBSD.org>2006-04-01 16:36:36 +0000
committerrwatson <rwatson@FreeBSD.org>2006-04-01 16:36:36 +0000
commit5078a28ae8712cfa4dfde732bd6ce4b0c4844bb0 (patch)
treefbe5163bbedb12aefb046285a48eec04db08aa3d /sys/netinet/tcp_timewait.c
parent7f0301fd6c390eefc92e5a70b2a44bc8285ccb62 (diff)
downloadFreeBSD-src-5078a28ae8712cfa4dfde732bd6ce4b0c4844bb0.zip
FreeBSD-src-5078a28ae8712cfa4dfde732bd6ce4b0c4844bb0.tar.gz
Update TCP for infrastructural changes to the socket/pcb refcount model,
pru_abort(), pru_detach(), and in_pcbdetach(): - Universally support and enforce the invariant that so_pcb is never NULL, converting dozens of unnecessary NULL checks into assertions, and eliminating dozens of unnecessary error handling cases in protocol code. - In some cases, eliminate unnecessary pcbinfo locking, as it is no longer required to ensure so_pcb != NULL. For example, the receive code no longer requires the pcbinfo lock, and the send code only requires it if building a new connection on an otherwise unconnected socket triggered via sendto() with an address. This should significnatly reduce tcbinfo lock contention in the receive and send cases. - In order to support the invariant that so_pcb != NULL, it is now necessary for the TCP code to not discard the tcpcb any time a connection is dropped, but instead leave the tcpcb until the socket is shutdown. This case is handled by setting INP_DROPPED, to substitute for using a NULL so_pcb to indicate that the connection has been dropped. This requires the inpcb lock, but not the pcbinfo lock. - Unlike all other protocols in the tree, TCP may need to retain access to the socket after the file descriptor has been closed. Set SS_PROTOREF in tcp_detach() in order to prevent the socket from being freed, and add a flag, INP_SOCKREF, so that the TCP code knows whether or not it needs to free the socket when the connection finally does close. The typical case where this occurs is if close() is called on a TCP socket before all sent data in the send socket buffer has been transmitted or acknowledged. If INP_SOCKREF is found when the connection is dropped, we release the inpcb, tcpcb, and socket instead of flagging INP_DROPPED. - Abort and detach protocol switch methods no longer return failures, nor attempt to free sockets, as the socket layer does this. - Annotate the existence of a long-standing race in the TCP timer code, in which timers are stopped but not drained when the socket is freed, as waiting for drain may lead to deadlocks, or have to occur in a context where waiting is not permitted. This race has been handled by testing to see if the tcpcb pointer in the inpcb is NULL (and vice versa), which is not normally permitted, but may be true of a inpcb and tcpcb have been freed. Add a counter to test how often this race has actually occurred, and a large comment for each instance where we compare potentially freed memory with NULL. This will have to be fixed in the near future, but requires is to further address how to handle the timer shutdown shutdown issue. - Several TCP calls no longer potentially free the passed inpcb/tcpcb, so no longer need to return a pointer to indicate whether the argument passed in is still valid. - Un-macroize debugging and locking setup for various protocol switch methods for TCP, as it lead to more obscurity, and as locking becomes more customized to the methods, offers less benefit. - Assert copyright on tcp_usrreq.c due to significant modifications that have been made as part of this work. These changes significantly modify the memory management and connection logic of our TCP implementation, and are (as such) High Risk Changes, and likely to contain serious bugs. Please report problems to the current@ mailing list ASAP, ideally with simple test cases, and optionally, packet traces. MFC after: 3 months
Diffstat (limited to 'sys/netinet/tcp_timewait.c')
-rw-r--r--sys/netinet/tcp_timewait.c156
1 files changed, 123 insertions, 33 deletions
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index e61d541..e3d5f15 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -213,7 +213,6 @@ SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW,
uma_zone_t sack_hole_zone;
static struct inpcb *tcp_notify(struct inpcb *, int);
-static void tcp_discardcb(struct tcpcb *);
static void tcp_isn_tick(void *);
/*
@@ -665,7 +664,7 @@ tcp_drop(tp, errno)
return (tcp_close(tp));
}
-static void
+void
tcp_discardcb(tp)
struct tcpcb *tp;
{
@@ -676,6 +675,12 @@ tcp_discardcb(tp)
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
#endif /* INET6 */
+ /*
+ * XXXRW: This is all very well and good, but actually, we might be
+ * discarding the tcpcb after the socket is gone, so we can't do
+ * this:
+ KASSERT(so != NULL, ("tcp_discardcb: so == NULL"));
+ */
INP_LOCK_ASSERT(inp);
/*
@@ -755,7 +760,12 @@ tcp_discardcb(tp)
inp->inp_ppcb = NULL;
tp->t_inpcb = NULL;
uma_zfree(tcpcb_zone, tp);
- soisdisconnected(so);
+
+ /*
+ * XXXRW: This seems a bit unclean.
+ */
+ if (so != NULL)
+ soisdisconnected(so);
}
/*
@@ -769,22 +779,40 @@ tcp_close(tp)
struct tcpcb *tp;
{
struct inpcb *inp = tp->t_inpcb;
-#ifdef INET6
- struct socket *so = inp->inp_socket;
-#endif
+ struct socket *so;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(inp);
- tcp_discardcb(tp);
+ inp->inp_vflag |= INP_DROPPED;
+
+ tcpstat.tcps_closed++;
+ KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
+ so = inp->inp_socket;
+ soisdisconnected(so);
+ if (inp->inp_vflag & INP_SOCKREF) {
+ KASSERT(so->so_state & SS_PROTOREF,
+ ("tcp_close: !SS_PROTOREF"));
+ inp->inp_vflag &= ~INP_SOCKREF;
+ tcp_discardcb(tp);
#ifdef INET6
- if (INP_CHECK_SOCKAF(so, AF_INET6))
- in6_pcbdetach(inp);
- else
+ if (inp->inp_vflag & INP_IPV6PROTO) {
+ in6_pcbdetach(inp);
+ in6_pcbfree(inp);
+ } else {
#endif
- in_pcbdetach(inp);
- tcpstat.tcps_closed++;
- return (NULL);
+ in_pcbdetach(inp);
+ in_pcbfree(inp);
+#ifdef INET6
+ }
+#endif
+ ACCEPT_LOCK();
+ SOCK_LOCK(so);
+ so->so_state &= ~SS_PROTOREF;
+ sofree(so);
+ return (NULL);
+ }
+ return (tp);
}
void
@@ -857,8 +885,11 @@ tcp_notify(inp, error)
return (inp);
} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
tp->t_softerror) {
- tcp_drop(tp, error);
- return (struct inpcb *)0;
+ tp = tcp_drop(tp, error);
+ if (tp != NULL)
+ return (inp);
+ else
+ return (NULL);
} else {
tp->t_softerror = error;
return (inp);
@@ -1433,8 +1464,11 @@ tcp_drop_syn_sent(inp, errno)
INP_LOCK_ASSERT(inp);
if (tp != NULL && tp->t_state == TCPS_SYN_SENT) {
- tcp_drop(tp, errno);
- return (NULL);
+ tp = tcp_drop(tp, errno);
+ if (tp != NULL)
+ return (inp);
+ else
+ return (NULL);
}
return (inp);
}
@@ -1670,7 +1704,9 @@ tcp_twstart(tp)
if (tw == NULL) {
tw = tcp_timer_2msl_tw(1);
if (tw == NULL) {
- tcp_close(tp);
+ tp = tcp_close(tp);
+ if (tp != NULL)
+ INP_UNLOCK(tp->t_inpcb);
return;
}
}
@@ -1705,21 +1741,45 @@ tcp_twstart(tp)
*/
tw_time = 2 * tcp_msl;
acknow = tp->t_flags & TF_ACKNOW;
+
+ /*
+ * First, discard tcpcb state, which includes stopping its timers and
+ * freeing it. tcp_discardcb() used to also release the inpcb, but
+ * that work is now done in the caller.
+ */
tcp_discardcb(tp);
so = inp->inp_socket;
- ACCEPT_LOCK();
SOCK_LOCK(so);
- so->so_pcb = NULL;
tw->tw_cred = crhold(so->so_cred);
tw->tw_so_options = so->so_options;
- sotryfree(so);
- inp->inp_socket = NULL;
+ SOCK_UNLOCK(so);
if (acknow)
tcp_twrespond(tw, TH_ACK);
inp->inp_ppcb = (caddr_t)tw;
inp->inp_vflag |= INP_TIMEWAIT;
tcp_timer_2msl_reset(tw, tw_time);
- INP_UNLOCK(inp);
+
+ /*
+ * If the inpcb owns the sole reference to the socket, then we can
+ * detach and free the socket as it is not needed in time wait.
+ */
+ if (inp->inp_vflag & INP_SOCKREF) {
+ KASSERT(so->so_state & SS_PROTOREF,
+ ("tcp_twstart: !SS_PROTOREF"));
+ inp->inp_vflag &= ~INP_SOCKREF;
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ in6_pcbdetach(inp);
+ else
+#endif
+ in_pcbdetach(inp);
+ INP_UNLOCK(inp);
+ ACCEPT_LOCK();
+ SOCK_LOCK(so);
+ so->so_state &= ~SS_PROTOREF;
+ sofree(so);
+ } else
+ INP_UNLOCK(inp);
}
/*
@@ -1756,31 +1816,62 @@ tcp_twrecycleable(struct tcptw *tw)
return (0);
}
-struct tcptw *
+void
tcp_twclose(struct tcptw *tw, int reuse)
{
+ struct socket *so;
struct inpcb *inp;
+ /*
+ * At this point, we should have an inpcb<->twtcp pair, with no
+ * associated socket. Validate that this is the case.
+ *
+ * XXXRW: This comment stale -- could still have socket ...?
+ */
inp = tw->tw_inpcb;
+ KASSERT((inp->inp_vflag & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
+ KASSERT(inp->inp_ppcb == (void *)tw, ("tcp_twclose: inp_ppcb != tw"));
INP_INFO_WLOCK_ASSERT(&tcbinfo); /* tcp_timer_2msl_stop(). */
INP_LOCK_ASSERT(inp);
tw->tw_inpcb = NULL;
tcp_timer_2msl_stop(tw);
inp->inp_ppcb = NULL;
+ inp->inp_vflag |= INP_DROPPED;
+
+ so = inp->inp_socket;
+ if (so != NULL && inp->inp_vflag & INP_SOCKREF) {
+ KASSERT(so->so_state & SS_PROTOREF,
+ ("tcp_twclose: !SS_PROTOREF"));
+ inp->inp_vflag &= ~INP_SOCKREF;
#ifdef INET6
- if (inp->inp_vflag & INP_IPV6PROTO)
- in6_pcbdetach(inp);
- else
+ if (inp->inp_vflag & INP_IPV6PROTO) {
+ in6_pcbdetach(inp);
+ in6_pcbfree(inp);
+ } else {
+ in_pcbdetach(inp);
+ in_pcbfree(inp);
+ }
+#endif
+ ACCEPT_LOCK();
+ SOCK_LOCK(so);
+ so->so_state &= ~SS_PROTOREF;
+ sofree(so);
+ } else if (so == NULL) {
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ in6_pcbfree(inp);
+ else
#endif
- in_pcbdetach(inp);
+ in_pcbfree(inp);
+ } else
+ printf("tcp_twclose: so != NULL but !INP_SOCKREF");
tcpstat.tcps_closed++;
crfree(tw->tw_cred);
tw->tw_cred = NULL;
if (reuse)
- return (tw);
+ return;
uma_zfree(tcptw_zone, tw);
- return (NULL);
}
int
@@ -2233,11 +2324,10 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
INP_LOCK(inp);
if ((tw = intotw(inp)) &&
(inp->inp_vflag & INP_TIMEWAIT) != 0) {
- (void) tcp_twclose(tw, 0);
+ tcp_twclose(tw, 0);
} else if ((tp = intotcpcb(inp)) &&
((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) {
- tp = tcp_drop(tp, ECONNABORTED);
- if (tp != NULL)
+ if (tcp_drop(tp, ECONNABORTED) != NULL)
INP_UNLOCK(inp);
} else
INP_UNLOCK(inp);
OpenPOWER on IntegriCloud