summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>2001-12-02 08:49:29 +0000
committerdillon <dillon@FreeBSD.org>2001-12-02 08:49:29 +0000
commitf97547e246f3c4f9f272892fa757d7df8dc90b89 (patch)
tree1e2d69883173d0e4c094c85ef435f3c25dc41de5 /sys/netinet
parent0b6f0e4522eeceef1d5e5dc2e920666192f7ec2a (diff)
downloadFreeBSD-src-f97547e246f3c4f9f272892fa757d7df8dc90b89.zip
FreeBSD-src-f97547e246f3c4f9f272892fa757d7df8dc90b89.tar.gz
Fix a bug with transmitter restart after receiving a 0 window. The
receiver was not sending an immediate ack with delayed acks turned on when the input buffer is drained, preventing the transmitter from restarting immediately. Propogate the TCP_NODELAY option to accept()ed sockets. (Helps tbench and is a good idea anyway). Some cleanup. Identify additonal issues in comments. MFC after: 1 day
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/tcp_input.c9
-rw-r--r--sys/netinet/tcp_output.c54
-rw-r--r--sys/netinet/tcp_reass.c9
-rw-r--r--sys/netinet/tcp_syncache.c2
-rw-r--r--sys/netinet/tcp_var.h1
5 files changed, 56 insertions, 19 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 6740ebe..d8ece0b 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -152,10 +152,15 @@ do { \
#endif
/*
- * Indicate whether this ack should be delayed.
+ * Indicate whether this ack should be delayed. We can delay the ack if
+ * - delayed acks are enabled and
+ * - there is no delayed ack timer in progress and
+ * - our last ack wasn't a 0-sized window. We never want to delay
+ * the ack that opens up a 0-sized window.
*/
#define DELAY_ACK(tp) \
- (tcp_delack_enabled && !callout_pending(tp->tt_delack))
+ (tcp_delack_enabled && !callout_pending(tp->tt_delack) && \
+ (tp->t_flags & TF_RXWIN0SENT) == 0)
static int
tcp_reass(tp, th, tlenp, m)
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 92072d6..fa884ab 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -118,7 +118,9 @@ tcp_output(tp)
u_char opt[TCP_MAXOLEN];
unsigned ipoptlen, optlen, hdrlen;
int idle, sendalot;
+#if 0
int maxburst = TCP_MAXBURST;
+#endif
struct rmxp_tao *taop;
struct rmxp_tao tao_noncached;
#ifdef INET6
@@ -277,28 +279,38 @@ again:
win = sbspace(&so->so_rcv);
/*
- * Sender silly window avoidance. If connection is idle
- * and can send all data, a maximum segment,
- * at least a maximum default-size segment do it,
- * or are forced, do it; otherwise don't bother.
- * If peer's buffer is tiny, then send
- * when window is at least half open.
- * If retransmitting (possibly after persist timer forced us
- * to send into a small window), then must resend.
+ * Sender silly window avoidance. We transmit under the following
+ * conditions when len is non-zero:
+ *
+ * - We have a full segment
+ * - This is the last buffer in a write()/send() and we are
+ * either idle or running NODELAY
+ * - we've timed out (e.g. persist timer)
+ * - we have more then 1/2 the maximum send window's worth of
+ * data (receiver may be limited the window size)
+ * - we need to retransmit
*/
if (len) {
if (len == tp->t_maxseg)
goto send;
- if (!(tp->t_flags & TF_MORETOCOME) &&
- (idle || tp->t_flags & TF_NODELAY) &&
- (tp->t_flags & TF_NOPUSH) == 0 &&
- len + off >= so->so_snd.sb_cc)
+ /*
+ * NOTE! on localhost connections an 'ack' from the remote
+ * end may occur synchronously with the output and cause
+ * us to flush a buffer queued with moretocome. XXX
+ *
+ * note: the len + off check is almost certainly unnecessary.
+ */
+ if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */
+ (idle || (tp->t_flags & TF_NODELAY)) &&
+ len + off >= so->so_snd.sb_cc &&
+ (tp->t_flags & TF_NOPUSH) == 0) {
goto send;
- if (tp->t_force)
+ }
+ if (tp->t_force) /* typ. timeout case */
goto send;
if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
goto send;
- if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+ if (SEQ_LT(tp->snd_nxt, tp->snd_max)) /* retransmit case */
goto send;
}
@@ -697,6 +709,20 @@ send:
if (win > (long)TCP_MAXWIN << tp->rcv_scale)
win = (long)TCP_MAXWIN << tp->rcv_scale;
th->th_win = htons((u_short) (win>>tp->rcv_scale));
+
+
+ /*
+ * Adjust the RXWIN0SENT flag - indicate that we have advertised
+ * a 0 window. This may cause the remote transmitter to stall. This
+ * flag tells soreceive() to disable delayed acknowledgements when
+ * draining the buffer. This can occur if the receiver is attempting
+ * to read more data then can be buffered prior to transmitting on
+ * the connection.
+ */
+ if (win == 0)
+ tp->t_flags |= TF_RXWIN0SENT;
+ else
+ tp->t_flags &= ~TF_RXWIN0SENT;
if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
th->th_flags |= TH_URG;
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 6740ebe..d8ece0b 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -152,10 +152,15 @@ do { \
#endif
/*
- * Indicate whether this ack should be delayed.
+ * Indicate whether this ack should be delayed. We can delay the ack if
+ * - delayed acks are enabled and
+ * - there is no delayed ack timer in progress and
+ * - our last ack wasn't a 0-sized window. We never want to delay
+ * the ack that opens up a 0-sized window.
*/
#define DELAY_ACK(tp) \
- (tcp_delack_enabled && !callout_pending(tp->tt_delack))
+ (tcp_delack_enabled && !callout_pending(tp->tt_delack) && \
+ (tp->t_flags & TF_RXWIN0SENT) == 0)
static int
tcp_reass(tp, th, tlenp, m)
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 3fbb0b8..e1dd788 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -657,7 +657,7 @@ syncache_socket(sc, lso)
tp->rcv_wnd = sc->sc_wnd;
tp->rcv_adv += tp->rcv_wnd;
- tp->t_flags = sc->sc_tp->t_flags & TF_NOPUSH;
+ tp->t_flags = sc->sc_tp->t_flags & (TF_NOPUSH|TF_NODELAY);
if (sc->sc_flags & SCF_NOOPT)
tp->t_flags |= TF_NOOPT;
if (sc->sc_flags & SCF_WINSCALE) {
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 0f71278..f4a5b75 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -101,6 +101,7 @@ struct tcpcb {
#define TF_MORETOCOME 0x10000 /* More data to be appended to sock */
#define TF_LQ_OVERFLOW 0x20000 /* listen queue overflow */
#define TF_LASTIDLE 0x40000 /* connection was previously idle */
+#define TF_RXWIN0SENT 0x80000 /* sent a receiver win 0 in response */
int t_force; /* 1 if forcing out a byte */
tcp_seq snd_una; /* send unacknowledged */
OpenPOWER on IntegriCloud