diff options
author | ps <ps@FreeBSD.org> | 2004-10-05 18:36:24 +0000 |
---|---|---|
committer | ps <ps@FreeBSD.org> | 2004-10-05 18:36:24 +0000 |
commit | c8e4aa1cd58a20597b74285a3cc35ea24ba5c47c (patch) | |
tree | bcb9cd585361024879ec2c577e898534dec0d79d | |
parent | 9536269a6d161e34a72dee84f6df6b4183f51be8 (diff) | |
download | FreeBSD-src-c8e4aa1cd58a20597b74285a3cc35ea24ba5c47c.zip FreeBSD-src-c8e4aa1cd58a20597b74285a3cc35ea24ba5c47c.tar.gz |
- Estimate the amount of data in flight in sack recovery and use it
to control the packets injected while in sack recovery (for both
retransmissions and new data).
- Cleanups to the sack codepaths in tcp_output.c and tcp_sack.c.
- Add a new sysctl (net.inet.tcp.sack.initburst) that controls the
number of sack retransmissions done upon initiation of sack recovery.
Submitted by: Mohan Srinivasan <mohans@yahoo-inc.com>
-rw-r--r-- | sys/netinet/tcp_input.c | 12 | ||||
-rw-r--r-- | sys/netinet/tcp_output.c | 66 | ||||
-rw-r--r-- | sys/netinet/tcp_reass.c | 12 | ||||
-rw-r--r-- | sys/netinet/tcp_sack.c | 37 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 5 | ||||
-rw-r--r-- | sys/netinet/tcp_timewait.c | 5 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 5 |
7 files changed, 83 insertions, 59 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 3b70e99..b96b959 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -155,6 +155,12 @@ SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD, &tcp_reass_overflows, 0, "Global number of TCP Segment Reassembly Queue Overflows"); +static int tcp_sack_recovery_initburst = 3; +SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, + initburst, CTLFLAG_RW, + &tcp_sack_recovery_initburst, 0, + "Initial Number of Rexmits when sack recovery is set up"); + struct inpcbhead tcb; #define tcb6 tcb /* for KAME src sync over BSD*'s */ struct inpcbinfo tcbinfo; @@ -1980,9 +1986,9 @@ trimthenstep6: tp->t_rtttime = 0; if (tp->sack_enable) { tcpstat.tcps_sack_recovery_episode++; - tp->snd_cwnd = - tp->t_maxseg * - tp->t_dupacks; + tp->sack_newdata = tp->snd_nxt; + tp->snd_cwnd = + tp->t_maxseg * tcp_sack_recovery_initburst; (void) tcp_output(tp); tp->snd_cwnd += tp->snd_ssthresh; diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index cd09097..18da2cd 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -124,6 +124,7 @@ tcp_output(struct tcpcb *tp) unsigned ipoptlen, optlen, hdrlen; int idle, sendalot; int i, sack_rxmit; + int sack_bytes_rxmt; struct sackhole *p; #if 0 int maxburst = TCP_MAXBURST; @@ -198,12 +199,16 @@ again: * Still in sack recovery , reset rxmit flag to zero. */ sack_rxmit = 0; + sack_bytes_rxmt = 0; len = 0; p = NULL; if (tp->sack_enable && IN_FASTRECOVERY(tp) && - (p = tcp_sack_output(tp))) { - KASSERT(tp->snd_cwnd >= 0, - ("%s: CWIN is negative : %ld", __func__, tp->snd_cwnd)); + (p = tcp_sack_output(tp, &sack_bytes_rxmt))) { + long cwin; + + cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt; + if (cwin < 0) + cwin = 0; /* Do not retransmit SACK segments beyond snd_recover */ if (SEQ_GT(p->end, tp->snd_recover)) { /* @@ -222,10 +227,10 @@ again: goto after_sack_rexmit; } else /* Can rexmit part of the current hole */ - len = ((long)ulmin(tp->snd_cwnd, - tp->snd_recover - p->rxmit)); + len = ((long)ulmin(cwin, + tp->snd_recover - p->rxmit)); } else - len = ((long)ulmin(tp->snd_cwnd, p->end - p->rxmit)); + len = ((long)ulmin(cwin, p->end - p->rxmit)); sack_rxmit = 1; sendalot = 1; off = p->rxmit - tp->snd_una; @@ -295,8 +300,25 @@ after_sack_rexmit: * If sack_rxmit is true we are retransmitting from the scoreboard * in which case len is already set. */ - if (!sack_rxmit) - len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off); + if (sack_rxmit == 0) { + if (sack_bytes_rxmt == 0) + len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off); + else { + long cwin; + + /* + * We are inside of a SACK recovery episode and are + * sending new data, having retransmitted all the + * data possible in the scoreboard. + */ + len = so->so_snd.sb_cc - off; + cwin = sendwin - (tp->snd_nxt - tp->sack_newdata) - + sack_bytes_rxmt; + if (cwin < 0) + cwin = 0; + len = lmin(len, cwin); + } + } /* * Lop off SYN bit if it has already been sent. However, if this @@ -850,12 +872,13 @@ send: * case, since we know we aren't doing a retransmission. * (retransmit and persist are mutually exclusive...) */ - if (len || (flags & (TH_SYN|TH_FIN)) - || callout_active(tp->tt_persist)) - th->th_seq = htonl(tp->snd_nxt); - else - th->th_seq = htonl(tp->snd_max); - if (sack_rxmit) { + if (sack_rxmit == 0) { + if (len || (flags & (TH_SYN|TH_FIN)) + || callout_active(tp->tt_persist)) + th->th_seq = htonl(tp->snd_nxt); + else + th->th_seq = htonl(tp->snd_max); + } else { th->th_seq = htonl(p->rxmit); p->rxmit += len; } @@ -956,7 +979,7 @@ send: tp->t_flags |= TF_SENTFIN; } } - if (tp->sack_enable && sack_rxmit) + if (sack_rxmit) goto timer; tp->snd_nxt += len; if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { @@ -981,18 +1004,9 @@ send: * of retransmit time. */ timer: - if (tp->sack_enable && sack_rxmit && - !callout_active(tp->tt_rexmt) && - tp->snd_nxt != tp->snd_max) { - callout_reset(tp->tt_rexmt, tp->t_rxtcur, - tcp_timer_rexmt, tp); - if (callout_active(tp->tt_persist)) { - callout_stop(tp->tt_persist); - tp->t_rxtshift = 0; - } - } if (!callout_active(tp->tt_rexmt) && - tp->snd_nxt != tp->snd_una) { + ((sack_rxmit && tp->snd_nxt != tp->snd_max) || + (tp->snd_nxt != tp->snd_una))) { if (callout_active(tp->tt_persist)) { callout_stop(tp->tt_persist); tp->t_rxtshift = 0; diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index 3b70e99..b96b959 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -155,6 +155,12 @@ SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD, &tcp_reass_overflows, 0, "Global number of TCP Segment Reassembly Queue Overflows"); +static int tcp_sack_recovery_initburst = 3; +SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, + initburst, CTLFLAG_RW, + &tcp_sack_recovery_initburst, 0, + "Initial Number of Rexmits when sack recovery is set up"); + struct inpcbhead tcb; #define tcb6 tcb /* for KAME src sync over BSD*'s */ struct inpcbinfo tcbinfo; @@ -1980,9 +1986,9 @@ trimthenstep6: tp->t_rtttime = 0; if (tp->sack_enable) { tcpstat.tcps_sack_recovery_episode++; - tp->snd_cwnd = - tp->t_maxseg * - tp->t_dupacks; + tp->sack_newdata = tp->snd_nxt; + tp->snd_cwnd = + tp->t_maxseg * tcp_sack_recovery_initburst; (void) tcp_output(tp); tp->snd_cwnd += tp->snd_ssthresh; diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c index 1cf44f2..e30e9d6 100644 --- a/sys/netinet/tcp_sack.c +++ b/sys/netinet/tcp_sack.c @@ -164,6 +164,11 @@ struct tcphdr tcp_savetcp; extern struct uma_zone *sack_hole_zone; +SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK"); +int tcp_do_sack = 1; +SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW, + &tcp_do_sack, 0, "Enable/Disable TCP SACK support"); + /* * This function is called upon receipt of new valid data (while not in header * prediction mode), and it updates the ordered list of sacks. @@ -486,18 +491,19 @@ tcp_sack_partialack(tp, th) { INP_LOCK_ASSERT(tp->t_inpcb); u_long ocwnd = tp->snd_cwnd; + int sack_bytes_rexmt = 0; callout_stop(tp->tt_rexmt); tp->t_rtttime = 0; /* - * Set snd_cwnd to one segment beyond acknowledged offset - * (tp->snd_una has not yet been updated when this function is called.) - */ - /* - * Should really be - * min(tp->snd_cwnd, tp->t_maxseg + (th->th_ack - tp->snd_una)) + * Set cwnd so we can send one more segment (either rexmit based on + * scoreboard or new segment). Set cwnd to the amount of data + * rexmitted from scoreboard plus the amount of new data transmitted + * in this sack recovery episode plus one segment. */ - tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una); + (void)tcp_sack_output(tp, &sack_bytes_rexmt); + tp->snd_cwnd = sack_bytes_rexmt + (tp->snd_nxt - tp->sack_newdata) + + tp->t_maxseg; tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); tp->snd_cwnd = ocwnd; @@ -529,29 +535,29 @@ tcp_print_holes(struct tcpcb *tp) * NULL otherwise. */ struct sackhole * -tcp_sack_output(struct tcpcb *tp) +tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt) { - struct sackhole *p; + struct sackhole *p = NULL; INP_LOCK_ASSERT(tp->t_inpcb); if (!tp->sack_enable) return (NULL); - p = tp->snd_holes; - while (p) { + *sack_bytes_rexmt = 0; + for (p = tp->snd_holes; p ; p = p->next) { if (SEQ_LT(p->rxmit, p->end)) { if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */ - p = p->next; continue; } #ifdef TCP_SACK_DEBUG if (p) tcp_print_holes(tp); #endif - return (p); + *sack_bytes_rexmt += (p->rxmit - p->start); + break; } - p = p->next; + *sack_bytes_rexmt += (p->rxmit - p->start); } - return (NULL); + return (p); } /* @@ -588,4 +594,3 @@ tcp_sack_adjust(struct tcpcb *tp) tp->snd_nxt = tp->rcv_lastsack; return; } - diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index b0f4e5c..7619916 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -206,11 +206,6 @@ static int tcp_inflight_stab = 20; SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW, &tcp_inflight_stab, 0, "Inflight Algorithm Stabilization 20 = 2 packets"); -SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK"); -int tcp_do_sack = 1; -SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW, - &tcp_do_sack, 0, "Enable/Disable TCP SACK support"); - uma_zone_t sack_hole_zone; static struct inpcb *tcp_notify(struct inpcb *, int); diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index b0f4e5c..7619916 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -206,11 +206,6 @@ static int tcp_inflight_stab = 20; SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW, &tcp_inflight_stab, 0, "Inflight Algorithm Stabilization 20 = 2 packets"); -SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK"); -int tcp_do_sack = 1; -SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW, - &tcp_do_sack, 0, "Enable/Disable TCP SACK support"); - uma_zone_t sack_hole_zone; static struct inpcb *tcp_notify(struct inpcb *, int); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 4ba7a91..a58821b 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -200,6 +200,8 @@ struct tcpcb { tcp_seq rcv_lastsack; /* last seq number(+1) sack'd by rcv'r*/ int rcv_numsacks; /* # distinct sack blks present */ struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */ + tcp_seq sack_newdata; /* New data xmitted in this recovery + episode starts at this seq number */ }; #define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY) @@ -523,6 +525,7 @@ struct xtcpcb { #ifdef _KERNEL #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet_tcp); +SYSCTL_DECL(_net_inet_tcp_sack); #endif extern struct inpcbhead tcb; /* head of queue of active tcpcb's */ @@ -617,7 +620,7 @@ void tcp_update_sack_list(struct tcpcb *tp); void tcp_del_sackholes(struct tcpcb *, struct tcphdr *); void tcp_clean_sackreport(struct tcpcb *tp); void tcp_sack_adjust(struct tcpcb *tp); -struct sackhole *tcp_sack_output(struct tcpcb *tp); +struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); void tcp_free_sackholes(struct tcpcb *tp); int tcp_newreno(struct tcpcb *, struct tcphdr *); |