diff options
-rw-r--r-- | sys/netinet/tcp_input.c | 9 | ||||
-rw-r--r-- | sys/netinet/tcp_output.c | 54 | ||||
-rw-r--r-- | sys/netinet/tcp_reass.c | 9 | ||||
-rw-r--r-- | sys/netinet/tcp_syncache.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 1 |
5 files changed, 56 insertions, 19 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 6740ebe..d8ece0b 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -152,10 +152,15 @@ do { \ #endif /* - * Indicate whether this ack should be delayed. + * Indicate whether this ack should be delayed. We can delay the ack if + * - delayed acks are enabled and + * - there is no delayed ack timer in progress and + * - our last ack wasn't a 0-sized window. We never want to delay + * the ack that opens up a 0-sized window. */ #define DELAY_ACK(tp) \ - (tcp_delack_enabled && !callout_pending(tp->tt_delack)) + (tcp_delack_enabled && !callout_pending(tp->tt_delack) && \ + (tp->t_flags & TF_RXWIN0SENT) == 0) static int tcp_reass(tp, th, tlenp, m) diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 92072d6..fa884ab 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -118,7 +118,9 @@ tcp_output(tp) u_char opt[TCP_MAXOLEN]; unsigned ipoptlen, optlen, hdrlen; int idle, sendalot; +#if 0 int maxburst = TCP_MAXBURST; +#endif struct rmxp_tao *taop; struct rmxp_tao tao_noncached; #ifdef INET6 @@ -277,28 +279,38 @@ again: win = sbspace(&so->so_rcv); /* - * Sender silly window avoidance. If connection is idle - * and can send all data, a maximum segment, - * at least a maximum default-size segment do it, - * or are forced, do it; otherwise don't bother. - * If peer's buffer is tiny, then send - * when window is at least half open. - * If retransmitting (possibly after persist timer forced us - * to send into a small window), then must resend. + * Sender silly window avoidance. We transmit under the following + * conditions when len is non-zero: + * + * - We have a full segment + * - This is the last buffer in a write()/send() and we are + * either idle or running NODELAY + * - we've timed out (e.g. persist timer) + * - we have more then 1/2 the maximum send window's worth of + * data (receiver may be limited the window size) + * - we need to retransmit */ if (len) { if (len == tp->t_maxseg) goto send; - if (!(tp->t_flags & TF_MORETOCOME) && - (idle || tp->t_flags & TF_NODELAY) && - (tp->t_flags & TF_NOPUSH) == 0 && - len + off >= so->so_snd.sb_cc) + /* + * NOTE! on localhost connections an 'ack' from the remote + * end may occur synchronously with the output and cause + * us to flush a buffer queued with moretocome. XXX + * + * note: the len + off check is almost certainly unnecessary. + */ + if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ + (idle || (tp->t_flags & TF_NODELAY)) && + len + off >= so->so_snd.sb_cc && + (tp->t_flags & TF_NOPUSH) == 0) { goto send; - if (tp->t_force) + } + if (tp->t_force) /* typ. timeout case */ goto send; if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) goto send; - if (SEQ_LT(tp->snd_nxt, tp->snd_max)) + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) /* retransmit case */ goto send; } @@ -697,6 +709,20 @@ send: if (win > (long)TCP_MAXWIN << tp->rcv_scale) win = (long)TCP_MAXWIN << tp->rcv_scale; th->th_win = htons((u_short) (win>>tp->rcv_scale)); + + + /* + * Adjust the RXWIN0SENT flag - indicate that we have advertised + * a 0 window. This may cause the remote transmitter to stall. This + * flag tells soreceive() to disable delayed acknowledgements when + * draining the buffer. This can occur if the receiver is attempting + * to read more data then can be buffered prior to transmitting on + * the connection. + */ + if (win == 0) + tp->t_flags |= TF_RXWIN0SENT; + else + tp->t_flags &= ~TF_RXWIN0SENT; if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); th->th_flags |= TH_URG; diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index 6740ebe..d8ece0b 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -152,10 +152,15 @@ do { \ #endif /* - * Indicate whether this ack should be delayed. + * Indicate whether this ack should be delayed. We can delay the ack if + * - delayed acks are enabled and + * - there is no delayed ack timer in progress and + * - our last ack wasn't a 0-sized window. We never want to delay + * the ack that opens up a 0-sized window. */ #define DELAY_ACK(tp) \ - (tcp_delack_enabled && !callout_pending(tp->tt_delack)) + (tcp_delack_enabled && !callout_pending(tp->tt_delack) && \ + (tp->t_flags & TF_RXWIN0SENT) == 0) static int tcp_reass(tp, th, tlenp, m) diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 3fbb0b8..e1dd788 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -657,7 +657,7 @@ syncache_socket(sc, lso) tp->rcv_wnd = sc->sc_wnd; tp->rcv_adv += tp->rcv_wnd; - tp->t_flags = sc->sc_tp->t_flags & TF_NOPUSH; + tp->t_flags = sc->sc_tp->t_flags & (TF_NOPUSH|TF_NODELAY); if (sc->sc_flags & SCF_NOOPT) tp->t_flags |= TF_NOOPT; if (sc->sc_flags & SCF_WINSCALE) { diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 0f71278..f4a5b75 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -101,6 +101,7 @@ struct tcpcb { #define TF_MORETOCOME 0x10000 /* More data to be appended to sock */ #define TF_LQ_OVERFLOW 0x20000 /* listen queue overflow */ #define TF_LASTIDLE 0x40000 /* connection was previously idle */ +#define TF_RXWIN0SENT 0x80000 /* sent a receiver win 0 in response */ int t_force; /* 1 if forcing out a byte */ tcp_seq snd_una; /* send unacknowledged */ |