summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authorps <ps@FreeBSD.org>2004-10-05 18:36:24 +0000
committerps <ps@FreeBSD.org>2004-10-05 18:36:24 +0000
commitc8e4aa1cd58a20597b74285a3cc35ea24ba5c47c (patch)
treebcb9cd585361024879ec2c577e898534dec0d79d /sys/netinet
parent9536269a6d161e34a72dee84f6df6b4183f51be8 (diff)
downloadFreeBSD-src-c8e4aa1cd58a20597b74285a3cc35ea24ba5c47c.zip
FreeBSD-src-c8e4aa1cd58a20597b74285a3cc35ea24ba5c47c.tar.gz
- Estimate the amount of data in flight in sack recovery and use it
to control the packets injected while in sack recovery (for both retransmissions and new data). - Cleanups to the sack codepaths in tcp_output.c and tcp_sack.c. - Add a new sysctl (net.inet.tcp.sack.initburst) that controls the number of sack retransmissions done upon initiation of sack recovery. Submitted by: Mohan Srinivasan <mohans@yahoo-inc.com>
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/tcp_input.c12
-rw-r--r--sys/netinet/tcp_output.c66
-rw-r--r--sys/netinet/tcp_reass.c12
-rw-r--r--sys/netinet/tcp_sack.c37
-rw-r--r--sys/netinet/tcp_subr.c5
-rw-r--r--sys/netinet/tcp_timewait.c5
-rw-r--r--sys/netinet/tcp_var.h5
7 files changed, 83 insertions, 59 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 3b70e99..b96b959 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -155,6 +155,12 @@ SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
&tcp_reass_overflows, 0,
"Global number of TCP Segment Reassembly Queue Overflows");
+static int tcp_sack_recovery_initburst = 3;
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO,
+ initburst, CTLFLAG_RW,
+ &tcp_sack_recovery_initburst, 0,
+ "Initial Number of Rexmits when sack recovery is set up");
+
struct inpcbhead tcb;
#define tcb6 tcb /* for KAME src sync over BSD*'s */
struct inpcbinfo tcbinfo;
@@ -1980,9 +1986,9 @@ trimthenstep6:
tp->t_rtttime = 0;
if (tp->sack_enable) {
tcpstat.tcps_sack_recovery_episode++;
- tp->snd_cwnd =
- tp->t_maxseg *
- tp->t_dupacks;
+ tp->sack_newdata = tp->snd_nxt;
+ tp->snd_cwnd =
+ tp->t_maxseg * tcp_sack_recovery_initburst;
(void) tcp_output(tp);
tp->snd_cwnd +=
tp->snd_ssthresh;
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index cd09097..18da2cd 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -124,6 +124,7 @@ tcp_output(struct tcpcb *tp)
unsigned ipoptlen, optlen, hdrlen;
int idle, sendalot;
int i, sack_rxmit;
+ int sack_bytes_rxmt;
struct sackhole *p;
#if 0
int maxburst = TCP_MAXBURST;
@@ -198,12 +199,16 @@ again:
* Still in sack recovery , reset rxmit flag to zero.
*/
sack_rxmit = 0;
+ sack_bytes_rxmt = 0;
len = 0;
p = NULL;
if (tp->sack_enable && IN_FASTRECOVERY(tp) &&
- (p = tcp_sack_output(tp))) {
- KASSERT(tp->snd_cwnd >= 0,
- ("%s: CWIN is negative : %ld", __func__, tp->snd_cwnd));
+ (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
+ long cwin;
+
+ cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt;
+ if (cwin < 0)
+ cwin = 0;
/* Do not retransmit SACK segments beyond snd_recover */
if (SEQ_GT(p->end, tp->snd_recover)) {
/*
@@ -222,10 +227,10 @@ again:
goto after_sack_rexmit;
} else
/* Can rexmit part of the current hole */
- len = ((long)ulmin(tp->snd_cwnd,
- tp->snd_recover - p->rxmit));
+ len = ((long)ulmin(cwin,
+ tp->snd_recover - p->rxmit));
} else
- len = ((long)ulmin(tp->snd_cwnd, p->end - p->rxmit));
+ len = ((long)ulmin(cwin, p->end - p->rxmit));
sack_rxmit = 1;
sendalot = 1;
off = p->rxmit - tp->snd_una;
@@ -295,8 +300,25 @@ after_sack_rexmit:
* If sack_rxmit is true we are retransmitting from the scoreboard
* in which case len is already set.
*/
- if (!sack_rxmit)
- len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off);
+ if (sack_rxmit == 0) {
+ if (sack_bytes_rxmt == 0)
+ len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off);
+ else {
+ long cwin;
+
+ /*
+ * We are inside of a SACK recovery episode and are
+ * sending new data, having retransmitted all the
+ * data possible in the scoreboard.
+ */
+ len = so->so_snd.sb_cc - off;
+ cwin = sendwin - (tp->snd_nxt - tp->sack_newdata) -
+ sack_bytes_rxmt;
+ if (cwin < 0)
+ cwin = 0;
+ len = lmin(len, cwin);
+ }
+ }
/*
* Lop off SYN bit if it has already been sent. However, if this
@@ -850,12 +872,13 @@ send:
* case, since we know we aren't doing a retransmission.
* (retransmit and persist are mutually exclusive...)
*/
- if (len || (flags & (TH_SYN|TH_FIN))
- || callout_active(tp->tt_persist))
- th->th_seq = htonl(tp->snd_nxt);
- else
- th->th_seq = htonl(tp->snd_max);
- if (sack_rxmit) {
+ if (sack_rxmit == 0) {
+ if (len || (flags & (TH_SYN|TH_FIN))
+ || callout_active(tp->tt_persist))
+ th->th_seq = htonl(tp->snd_nxt);
+ else
+ th->th_seq = htonl(tp->snd_max);
+ } else {
th->th_seq = htonl(p->rxmit);
p->rxmit += len;
}
@@ -956,7 +979,7 @@ send:
tp->t_flags |= TF_SENTFIN;
}
}
- if (tp->sack_enable && sack_rxmit)
+ if (sack_rxmit)
goto timer;
tp->snd_nxt += len;
if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
@@ -981,18 +1004,9 @@ send:
* of retransmit time.
*/
timer:
- if (tp->sack_enable && sack_rxmit &&
- !callout_active(tp->tt_rexmt) &&
- tp->snd_nxt != tp->snd_max) {
- callout_reset(tp->tt_rexmt, tp->t_rxtcur,
- tcp_timer_rexmt, tp);
- if (callout_active(tp->tt_persist)) {
- callout_stop(tp->tt_persist);
- tp->t_rxtshift = 0;
- }
- }
if (!callout_active(tp->tt_rexmt) &&
- tp->snd_nxt != tp->snd_una) {
+ ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
+ (tp->snd_nxt != tp->snd_una))) {
if (callout_active(tp->tt_persist)) {
callout_stop(tp->tt_persist);
tp->t_rxtshift = 0;
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 3b70e99..b96b959 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -155,6 +155,12 @@ SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
&tcp_reass_overflows, 0,
"Global number of TCP Segment Reassembly Queue Overflows");
+static int tcp_sack_recovery_initburst = 3;
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO,
+ initburst, CTLFLAG_RW,
+ &tcp_sack_recovery_initburst, 0,
+ "Initial Number of Rexmits when sack recovery is set up");
+
struct inpcbhead tcb;
#define tcb6 tcb /* for KAME src sync over BSD*'s */
struct inpcbinfo tcbinfo;
@@ -1980,9 +1986,9 @@ trimthenstep6:
tp->t_rtttime = 0;
if (tp->sack_enable) {
tcpstat.tcps_sack_recovery_episode++;
- tp->snd_cwnd =
- tp->t_maxseg *
- tp->t_dupacks;
+ tp->sack_newdata = tp->snd_nxt;
+ tp->snd_cwnd =
+ tp->t_maxseg * tcp_sack_recovery_initburst;
(void) tcp_output(tp);
tp->snd_cwnd +=
tp->snd_ssthresh;
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index 1cf44f2..e30e9d6 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -164,6 +164,11 @@ struct tcphdr tcp_savetcp;
extern struct uma_zone *sack_hole_zone;
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
+int tcp_do_sack = 1;
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
+ &tcp_do_sack, 0, "Enable/Disable TCP SACK support");
+
/*
* This function is called upon receipt of new valid data (while not in header
* prediction mode), and it updates the ordered list of sacks.
@@ -486,18 +491,19 @@ tcp_sack_partialack(tp, th)
{
INP_LOCK_ASSERT(tp->t_inpcb);
u_long ocwnd = tp->snd_cwnd;
+ int sack_bytes_rexmt = 0;
callout_stop(tp->tt_rexmt);
tp->t_rtttime = 0;
/*
- * Set snd_cwnd to one segment beyond acknowledged offset
- * (tp->snd_una has not yet been updated when this function is called.)
- */
- /*
- * Should really be
- * min(tp->snd_cwnd, tp->t_maxseg + (th->th_ack - tp->snd_una))
+ * Set cwnd so we can send one more segment (either rexmit based on
+ * scoreboard or new segment). Set cwnd to the amount of data
+ * rexmitted from scoreboard plus the amount of new data transmitted
+ * in this sack recovery episode plus one segment.
*/
- tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
+ (void)tcp_sack_output(tp, &sack_bytes_rexmt);
+ tp->snd_cwnd = sack_bytes_rexmt + (tp->snd_nxt - tp->sack_newdata) +
+ tp->t_maxseg;
tp->t_flags |= TF_ACKNOW;
(void) tcp_output(tp);
tp->snd_cwnd = ocwnd;
@@ -529,29 +535,29 @@ tcp_print_holes(struct tcpcb *tp)
* NULL otherwise.
*/
struct sackhole *
-tcp_sack_output(struct tcpcb *tp)
+tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
{
- struct sackhole *p;
+ struct sackhole *p = NULL;
INP_LOCK_ASSERT(tp->t_inpcb);
if (!tp->sack_enable)
return (NULL);
- p = tp->snd_holes;
- while (p) {
+ *sack_bytes_rexmt = 0;
+ for (p = tp->snd_holes; p ; p = p->next) {
if (SEQ_LT(p->rxmit, p->end)) {
if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */
- p = p->next;
continue;
}
#ifdef TCP_SACK_DEBUG
if (p)
tcp_print_holes(tp);
#endif
- return (p);
+ *sack_bytes_rexmt += (p->rxmit - p->start);
+ break;
}
- p = p->next;
+ *sack_bytes_rexmt += (p->rxmit - p->start);
}
- return (NULL);
+ return (p);
}
/*
@@ -588,4 +594,3 @@ tcp_sack_adjust(struct tcpcb *tp)
tp->snd_nxt = tp->rcv_lastsack;
return;
}
-
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index b0f4e5c..7619916 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -206,11 +206,6 @@ static int tcp_inflight_stab = 20;
SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW,
&tcp_inflight_stab, 0, "Inflight Algorithm Stabilization 20 = 2 packets");
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
-int tcp_do_sack = 1;
-SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
- &tcp_do_sack, 0, "Enable/Disable TCP SACK support");
-
uma_zone_t sack_hole_zone;
static struct inpcb *tcp_notify(struct inpcb *, int);
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index b0f4e5c..7619916 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -206,11 +206,6 @@ static int tcp_inflight_stab = 20;
SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW,
&tcp_inflight_stab, 0, "Inflight Algorithm Stabilization 20 = 2 packets");
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
-int tcp_do_sack = 1;
-SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
- &tcp_do_sack, 0, "Enable/Disable TCP SACK support");
-
uma_zone_t sack_hole_zone;
static struct inpcb *tcp_notify(struct inpcb *, int);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 4ba7a91..a58821b 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -200,6 +200,8 @@ struct tcpcb {
tcp_seq rcv_lastsack; /* last seq number(+1) sack'd by rcv'r*/
int rcv_numsacks; /* # distinct sack blks present */
struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
+ tcp_seq sack_newdata; /* New data xmitted in this recovery
+ episode starts at this seq number */
};
#define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY)
@@ -523,6 +525,7 @@ struct xtcpcb {
#ifdef _KERNEL
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_inet_tcp);
+SYSCTL_DECL(_net_inet_tcp_sack);
#endif
extern struct inpcbhead tcb; /* head of queue of active tcpcb's */
@@ -617,7 +620,7 @@ void tcp_update_sack_list(struct tcpcb *tp);
void tcp_del_sackholes(struct tcpcb *, struct tcphdr *);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
-struct sackhole *tcp_sack_output(struct tcpcb *tp);
+struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
void tcp_free_sackholes(struct tcpcb *tp);
int tcp_newreno(struct tcpcb *, struct tcphdr *);
OpenPOWER on IntegriCloud