summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorandre <andre@FreeBSD.org>2012-10-28 19:16:22 +0000
committerandre <andre@FreeBSD.org>2012-10-28 19:16:22 +0000
commitb21f6ebbaa35e6f822a064af33d150bafa261dd0 (patch)
treef7240c7cce5e774e1124c0643860b338546c6b3e
parentee161fee4db27d88de337761219260d04fb38c42 (diff)
downloadFreeBSD-src-b21f6ebbaa35e6f822a064af33d150bafa261dd0.zip
FreeBSD-src-b21f6ebbaa35e6f822a064af33d150bafa261dd0.tar.gz
Simplify and enhance the window change/update acceptance logic,
especially in the presence of bi-directional data transfers. snd_wl1 tracks the right edge, including data in the reassembly queue, of valid incoming data. This makes it like rcv_nxt plus reassembly. It never goes backwards to prevent older, possibly reordered segments from updating the window. snd_wl2 tracks the left edge of sent data. This makes it a duplicate of snd_una. However joining them right now is difficult due to separate update dependencies in different places in the code flow. snd_wnd tracks the current advertized send window by the peer. In tcp_output() the effective window is calculated by subtracting the already in-flight data, snd_nxt less snd_una, from it. ACK's become the main clock of window updates and will always update the window when the left edge of what we sent is advanced. The ACK clock is the primary signaling mechanism in ongoing data transfers. This works reliably even in the presence of reordering, reassembly and retransmitted segments. The ACK clock is most important because it determines how much data we are allowed to inject into the network. Zero window updates get us out of persistence mode are crucial. Here a segment that neither moves ACK nor SEQ but enlarges WND is accepted. When the ACK clock is not active (that is we're not or no longer sending any data) any segment that moves the extended right SEQ edge, including out-of-order segments, updates the window. This gives us updates especially during ping-pong transfers where the peer isn't done consuming the already acknowledged data from the receive buffer while responding with data. The SSH protocol is a prime candidate to benefit from the improved bi-directional window update logic as it has its own windowing mechanism on top of TCP and is frequently sending back protocol ACK's. Tcpdump provided by: darrenr Tested by: darrenr MFC after: 2 weeks
-rw-r--r--sys/netinet/tcp_input.c63
1 files changed, 47 insertions, 16 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 22caaf6..0e3308b 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1714,7 +1714,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* Pull snd_wl1 up to prevent seq wrap relative to
* th_seq.
*/
- tp->snd_wl1 = th->th_seq;
+ tp->snd_wl1 = th->th_seq + tlen;
/*
* Pull rcv_up up to prevent seq wrap relative to
* rcv_nxt.
@@ -2327,7 +2327,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (tlen == 0 && (thflags & TH_FIN) == 0)
(void) tcp_reass(tp, (struct tcphdr *)0, 0,
(struct mbuf *)0);
- tp->snd_wl1 = th->th_seq - 1;
/* FALLTHROUGH */
/*
@@ -2638,12 +2637,10 @@ process_ACK:
SOCKBUF_LOCK(&so->so_snd);
if (acked > so->so_snd.sb_cc) {
- tp->snd_wnd -= so->so_snd.sb_cc;
sbdrop_locked(&so->so_snd, (int)so->so_snd.sb_cc);
ourfinisacked = 1;
} else {
sbdrop_locked(&so->so_snd, acked);
- tp->snd_wnd -= acked;
ourfinisacked = 0;
}
/* NB: sowwakeup_locked() does an implicit unlock. */
@@ -2733,24 +2730,56 @@ step6:
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
- * Update window information.
- * Don't look at window if no ACK: TAC's send garbage on first SYN.
+ * Window update acceptance logic. We have to be careful not
+ * to accept window updates from old segments in the presence
+ * of reordering or duplication.
+ *
+ * A window update is valid when:
+ * - the segment ACK's new data.
+ * - the segment carries new data and its ACK is current.
+ * - the segment matches the current SEQ and ACK but increases
+ * the window. This is the escape from persist mode, if there
+ * data to be sent.
+ *
+ * XXXAO: The presence of new SACK information would allow to
+ * accept window updates during retransmits. We don't have an
+ * easy way to test for that the moment.
+ *
+ * NB: The other side isn't allowed to shrink the window when
+ * not sending or acking new data. This behavior is strongly
+ * discouraged by RFC793, section 3.7, page 42 anyways.
+ *
+ * XXXAO: tiwin >= minmss to avoid jitter?
*/
- if ((thflags & TH_ACK) &&
- (SEQ_LT(tp->snd_wl1, th->th_seq) ||
- (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
- (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
- /* keep track of pure window updates */
- if (tlen == 0 &&
- tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
+ if ((thflags & TH_ACK) && tiwin != tp->snd_wnd &&
+ (SEQ_GT(th->th_ack, tp->snd_wl2) ||
+ (th->th_ack == tp->snd_wl2 &&
+ (SEQ_GT(th->th_seq + tlen, tp->snd_wl1) ||
+ (th->th_seq == tp->snd_wl1 && tlen == 0 && tiwin > tp->snd_wnd))))) {
+#if 0
+ char *s;
+ if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: window update %lu -> %lu\n",
+ s, __func__, tp->snd_wnd, tiwin);
+ free(s, M_TCPLOG);
+ }
+#endif
+ /* Keep track of pure window updates. */
+ if (th->th_seq == tp->snd_wl1 && tlen == 0 &&
+ tiwin > tp->snd_wnd)
TCPSTAT_INC(tcps_rcvwinupd);
+ /*
+ * When the new window is larger, nudge output
+ * as we may be able to send more data.
+ */
+ if (tiwin > tp->snd_wnd)
+ needoutput = 1;
tp->snd_wnd = tiwin;
- tp->snd_wl1 = th->th_seq;
- tp->snd_wl2 = th->th_ack;
if (tp->snd_wnd > tp->max_sndwnd)
tp->max_sndwnd = tp->snd_wnd;
- needoutput = 1;
}
+ if (SEQ_GT(th->th_ack, tp->snd_wl2))
+ tp->snd_wl2 = th->th_ack;
/*
* Process segments with URG.
@@ -2870,6 +2899,8 @@ dodata: /* XXX */
thflags = tcp_reass(tp, th, &tlen, m);
tp->t_flags |= TF_ACKNOW;
}
+ if (SEQ_GT(th->th_seq, tp->snd_wl1))
+ tp->snd_wl1 = th->th_seq + tlen;
if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
tcp_update_sack_list(tp, save_start, save_start + tlen);
#if 0
OpenPOWER on IntegriCloud