summaryrefslogtreecommitdiffstats
path: root/sys/netinet/tcp_sack.c
diff options
context:
space:
mode:
authorps <ps@FreeBSD.org>2005-05-11 21:37:42 +0000
committerps <ps@FreeBSD.org>2005-05-11 21:37:42 +0000
commit0ee231720168f91505019116179aa59a3f939618 (patch)
tree136eaa5e8df38eb44a216776b146e7f2be9d17d5 /sys/netinet/tcp_sack.c
parentb2be2c6b08ec2d5c2b4b24ddd94f0f026c865ff9 (diff)
downloadFreeBSD-src-0ee231720168f91505019116179aa59a3f939618.zip
FreeBSD-src-0ee231720168f91505019116179aa59a3f939618.tar.gz
When looking for the next hole to retransmit from the scoreboard,
or to compute the total retransmitted bytes in this sack recovery episode, the scoreboard is traversed. While in sack recovery, this traversal occurs on every call to tcp_output(), every dupack and every partial ack. The scoreboard could potentially get quite large, making this traversal expensive. This change optimizes this by storing hints (for the next hole to retransmit and the total retransmitted bytes in this sack recovery episode) reducing the complexity to find these values from O(n) to constant time. The debug code that sanity checks the hints against the computed value will be removed eventually. Submitted by: Mohan Srinivasan, Noritoshi Demizu, Raja Mukerji.
Diffstat (limited to 'sys/netinet/tcp_sack.c')
-rw-r--r--sys/netinet/tcp_sack.c182
1 files changed, 111 insertions, 71 deletions
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index 4eda008..924228f 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -352,6 +352,8 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
cur->rxmit = cur->start;
tp->snd_numholes = 1;
tcp_sack_globalholes++;
+ /* Update the sack scoreboard "cache" */
+ tp->sackhint.nexthole = cur;
tp->rcv_lastsack = sack.end;
TAILQ_INSERT_HEAD(&tp->snd_holes, cur, scblink);
continue; /* with next sack block */
@@ -367,10 +369,17 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
cur = TAILQ_NEXT(cur, scblink);
continue;
}
+ tp->sackhint.sack_bytes_rexmit -=
+ (cur->rxmit - cur->start);
+ KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
+ ("sackhint bytes rtx >= 0"));
if (SEQ_LEQ(sack.start, cur->start)) {
/* Data acks at least the beginning of hole */
if (SEQ_GEQ(sack.end, cur->end)) {
/* Acks entire hole, so delete hole */
+ if (tp->sackhint.nexthole == cur)
+ tp->sackhint.nexthole =
+ TAILQ_NEXT(cur, scblink);
temp = cur;
cur = TAILQ_NEXT(cur, scblink);
TAILQ_REMOVE(&tp->snd_holes,
@@ -379,22 +388,16 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
tp->snd_numholes--;
tcp_sack_globalholes--;
continue;
+ } else {
+ /* Move start of hole forward */
+ cur->start = sack.end;
+ cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
}
- /* otherwise, move start of hole forward */
- cur->start = sack.end;
- cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
- cur = TAILQ_NEXT(cur, scblink);
- continue;
- }
- /* move end of hole backward */
- if (SEQ_GEQ(sack.end, cur->end)) {
+ } else if (SEQ_GEQ(sack.end, cur->end)) {
+ /* Move end of hole backward */
cur->end = sack.start;
cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
- cur = TAILQ_NEXT(cur, scblink);
- continue;
- }
- if (SEQ_LT(cur->start, sack.start) &&
- SEQ_GT(cur->end, sack.end)) {
+ } else {
/*
* ACKs some data in middle of a hole; need to
* split current hole
@@ -403,25 +406,34 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
tcp_sack_globalholes >=
tcp_sack_globalmaxholes) {
tcpstat.tcps_sack_sboverflow++;
- continue;
+ temp = NULL;
+ } else {
+ temp = (struct sackhole *)
+ uma_zalloc(sack_hole_zone,
+ M_NOWAIT);
+ }
+ if (temp != NULL) {
+ temp->start = sack.end;
+ temp->end = cur->end;
+ temp->rxmit = SEQ_MAX(cur->rxmit,
+ temp->start);
+ cur->end = sack.start;
+ cur->rxmit = SEQ_MIN(cur->rxmit,
+ cur->end);
+ tp->sackhint.sack_bytes_rexmit +=
+ (cur->rxmit - cur->start);
+ TAILQ_INSERT_AFTER(&tp->snd_holes,
+ cur, temp, scblink);
+ cur = temp;
+ tp->snd_numholes++;
+ tcp_sack_globalholes++;
}
- temp = (struct sackhole *)
- uma_zalloc(sack_hole_zone,M_NOWAIT);
- if (temp == NULL)
- continue; /* ENOBUFS */
- temp->start = sack.end;
- temp->end = cur->end;
- temp->rxmit = SEQ_MAX(cur->rxmit, temp->start);
- cur->end = sack.start;
- cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
- TAILQ_INSERT_AFTER(&tp->snd_holes,
- cur, temp, scblink);
- cur = TAILQ_NEXT(temp, scblink);
- tp->snd_numholes++;
- tcp_sack_globalholes++;
}
+ tp->sackhint.sack_bytes_rexmit +=
+ (cur->rxmit - cur->start);
+ cur = TAILQ_NEXT(cur, scblink);
}
- /* At this point, we are at the tail of the scoreboard. */
+ /* At this point, we have iterated the whole scoreboard. */
if (SEQ_LT(tp->rcv_lastsack, sack.start)) {
/* Need to append new hole at end. */
if (tp->snd_numholes >= tcp_sack_maxholes ||
@@ -467,15 +479,25 @@ tcp_del_sackholes(tp, th)
while (cur)
if (SEQ_LEQ(cur->end, lastack)) {
prev = cur;
+ tp->sackhint.sack_bytes_rexmit -=
+ (cur->rxmit - cur->start);
+ if (tp->sackhint.nexthole == cur)
+ tp->sackhint.nexthole =
+ TAILQ_NEXT(cur, scblink);
cur = TAILQ_NEXT(cur, scblink);
TAILQ_REMOVE(&tp->snd_holes, prev, scblink);
uma_zfree(sack_hole_zone, prev);
tp->snd_numholes--;
tcp_sack_globalholes--;
} else if (SEQ_LT(cur->start, lastack)) {
+ if (SEQ_LT(cur->rxmit, lastack)) {
+ tp->sackhint.sack_bytes_rexmit -=
+ (cur->rxmit - cur->start);
+ cur->rxmit = lastack;
+ } else
+ tp->sackhint.sack_bytes_rexmit -=
+ (lastack - cur->start);
cur->start = lastack;
- if (SEQ_LT(cur->rxmit, cur->start))
- cur->rxmit = cur->start;
break;
} else
break;
@@ -494,6 +516,8 @@ tcp_free_sackholes(struct tcpcb *tp)
tcp_sack_globalholes--;
}
tp->snd_numholes = 0;
+ tp->sackhint.nexthole = NULL;
+ tp->sackhint.sack_bytes_rexmit = 0;
}
/*
@@ -512,7 +536,6 @@ tcp_sack_partialack(tp, th)
struct tcphdr *th;
{
int num_segs = 1;
- int sack_bytes_rxmt = 0;
INP_LOCK_ASSERT(tp->t_inpcb);
callout_stop(tp->tt_rexmt);
@@ -520,9 +543,9 @@ tcp_sack_partialack(tp, th)
/* send one or 2 segments based on how much new data was acked */
if (((th->th_ack - tp->snd_una) / tp->t_maxseg) > 2)
num_segs = 2;
- (void)tcp_sack_output(tp, &sack_bytes_rxmt);
- tp->snd_cwnd = sack_bytes_rxmt + (tp->snd_nxt - tp->sack_newdata) +
- num_segs * tp->t_maxseg;
+ tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
+ (tp->snd_nxt - tp->sack_newdata) +
+ num_segs * tp->t_maxseg);
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_flags |= TF_ACKNOW;
@@ -530,17 +553,15 @@ tcp_sack_partialack(tp, th)
}
/*
- * Returns pointer to a sackhole if there are any pending retransmissions;
- * NULL otherwise.
+ * Debug version of tcp_sack_output() that walks the scoreboard. Used for
+ * now to sanity check the hint.
*/
-struct sackhole *
-tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
+static struct sackhole *
+tcp_sack_output_debug(struct tcpcb *tp, int *sack_bytes_rexmt)
{
struct sackhole *p;
INP_LOCK_ASSERT(tp->t_inpcb);
- if (!tp->sack_enable)
- return (NULL);
*sack_bytes_rexmt = 0;
TAILQ_FOREACH(p, &tp->snd_holes, scblink) {
if (SEQ_LT(p->rxmit, p->end)) {
@@ -556,6 +577,55 @@ tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
}
/*
+ * Returns the next hole to retransmit and the number of retransmitted bytes
+ * from the scoreboard. We store both the next hole and the number of
+ * retransmitted bytes as hints (and recompute these on the fly upon SACK/ACK
+ * reception). This avoids scoreboard traversals completely.
+ *
+ * The loop here will traverse *at most* one link. Here's the argument.
+ * For the loop to traverse more than 1 link before finding the next hole to
+ * retransmit, we would need to have at least 1 node following the current hint
+ * with (rxmit == end). But, for all holes following the current hint,
+ * (start == rxmit), since we have not yet retransmitted from them. Therefore,
+ * in order to traverse more 1 link in the loop below, we need to have at least
+ * one node following the current hint with (start == rxmit == end).
+ * But that can't happen, (start == end) means that all the data in that hole
+ * has been sacked, in which case, the hole would have been removed from the
+ * scoreboard.
+ */
+struct sackhole *
+tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
+{
+ struct sackhole *hole = NULL, *dbg_hole = NULL;
+ int dbg_bytes_rexmt;
+
+ INP_LOCK_ASSERT(tp->t_inpcb);
+ dbg_hole = tcp_sack_output_debug(tp, &dbg_bytes_rexmt);
+ *sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit;
+ hole = tp->sackhint.nexthole;
+ if (hole == NULL || SEQ_LT(hole->rxmit, hole->end))
+ goto out;
+ while ((hole = TAILQ_NEXT(hole, scblink)) != NULL) {
+ if (SEQ_LT(hole->rxmit, hole->end)) {
+ tp->sackhint.nexthole = hole;
+ break;
+ }
+ }
+out:
+ if (dbg_hole != hole) {
+ printf("%s: Computed sack hole not the same as cached value\n", __func__);
+ hole = dbg_hole;
+ }
+ if (*sack_bytes_rexmt != dbg_bytes_rexmt) {
+ printf("%s: Computed sack_bytes_retransmitted (%d) not"
+ "the same as cached value (%d)\n",
+ __func__, dbg_bytes_rexmt, *sack_bytes_rexmt);
+ *sack_bytes_rexmt = dbg_bytes_rexmt;
+ }
+ return (hole);
+}
+
+/*
* After a timeout, the SACK list may be rebuilt. This SACK information
* should be used to avoid retransmitting SACKed data. This function
* traverses the SACK list to see if snd_nxt should be moved forward.
@@ -590,33 +660,3 @@ tcp_sack_adjust(struct tcpcb *tp)
tp->snd_nxt = tp->rcv_lastsack;
return;
}
-
-/*
- * Calculate the number of SACKed bytes in the scoreboard by
- * subtracting the amount of data accounted for in sackholes
- * from the total span of the scoreboard. Also returns the
- * amount of data that is "lost" and has not yet been retransmitted.
- */
-int
-tcp_sacked_bytes(struct tcpcb *tp, int *lost_not_rexmitted)
-{
- INP_LOCK_ASSERT(tp->t_inpcb);
- struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
- int sacked = 0;
- int lost = 0;
-
- if (cur == NULL) /* Scoreboard empty. */
- goto out;
- if (SEQ_GEQ(tp->snd_una, tp->rcv_lastsack)) /* Scoreboard is stale. */
- goto out;
- sacked = tp->rcv_lastsack - cur->start;
- while (cur) {
- lost += (cur->end - cur->rxmit);
- sacked -= (cur->end - cur->start);
- cur = TAILQ_NEXT(cur, scblink);
- }
-out:
- if (lost_not_rexmitted)
- *lost_not_rexmitted = lost;
- return (sacked);
-}
OpenPOWER on IntegriCloud