summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/netinet/tcp_input.c36
-rw-r--r--sys/netinet/tcp_reass.c36
-rw-r--r--sys/netinet/tcp_sack.c107
-rw-r--r--sys/netinet/tcp_var.h6
4 files changed, 70 insertions, 115 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index eda9eb4..6762232 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -164,8 +164,7 @@ struct inpcbhead tcb;
struct inpcbinfo tcbinfo;
struct mtx *tcbinfo_mtx;
-static void tcp_dooptions(struct tcpcb *, struct tcpopt *, u_char *,
- int, int, struct tcphdr *);
+static void tcp_dooptions(struct tcpopt *, u_char *, int, int);
static void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
@@ -747,7 +746,7 @@ findpcb:
* present in a SYN segment. See tcp_timewait().
*/
if (thflags & TH_SYN)
- tcp_dooptions((struct tcpcb *)NULL, &to, optp, optlen, 1, th);
+ tcp_dooptions(&to, optp, optlen, 1);
if (tcp_timewait((struct tcptw *)inp->inp_ppcb,
&to, th, m, tlen))
goto findpcb;
@@ -961,7 +960,7 @@ findpcb:
tcp_trace(TA_INPUT, ostate, tp,
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
- tcp_dooptions(tp, &to, optp, optlen, 1, th);
+ tcp_dooptions(&to, optp, optlen, 1);
if (!syncache_add(&inc, &to, th, &so, m))
goto drop;
if (so == NULL) {
@@ -1082,7 +1081,7 @@ after_listen:
* for incoming connections is handled in tcp_syncache.
* XXX this is traditional behavior, may need to be cleaned up.
*/
- tcp_dooptions(tp, &to, optp, optlen, thflags & TH_SYN, th);
+ tcp_dooptions(&to, optp, optlen, thflags & TH_SYN);
if (thflags & TH_SYN) {
if (to.to_flags & TOF_SCALE) {
tp->t_flags |= TF_RCVD_SCALE;
@@ -1104,11 +1103,6 @@ after_listen:
}
- if (tp->sack_enable) {
- /* Delete stale (cumulatively acked) SACK holes */
- tcp_del_sackholes(tp, th);
- }
-
/*
* Header prediction: check for the two common cases
* of a uni-directional data xfer. If the packet has
@@ -1153,7 +1147,7 @@ after_listen:
((!tcp_do_newreno && !tp->sack_enable &&
tp->t_dupacks < tcprexmtthresh) ||
((tcp_do_newreno || tp->sack_enable) &&
- !IN_FASTRECOVERY(tp)))) {
+ !IN_FASTRECOVERY(tp) && to.to_nsacks == 0))) {
KASSERT(headlocked, ("headlocked"));
INP_INFO_WUNLOCK(&tcbinfo);
headlocked = 0;
@@ -1824,6 +1818,12 @@ trimthenstep6:
case TCPS_LAST_ACK:
case TCPS_TIME_WAIT:
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
+ if (SEQ_GT(th->th_ack, tp->snd_max)) {
+ tcpstat.tcps_rcvacktoomuch++;
+ goto dropafterack;
+ }
+ if (tp->sack_enable)
+ tcp_sack_doack(tp, &to, th->th_ack);
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
if (tlen == 0 && tiwin == tp->snd_wnd) {
tcpstat.tcps_rcvdupack++;
@@ -2002,10 +2002,6 @@ trimthenstep6:
tp->snd_cwnd = tp->snd_ssthresh;
}
tp->t_dupacks = 0;
- if (SEQ_GT(th->th_ack, tp->snd_max)) {
- tcpstat.tcps_rcvacktoomuch++;
- goto dropafterack;
- }
/*
* If we reach this point, ACK is not a duplicate,
* i.e., it ACKs something we sent.
@@ -2560,13 +2556,11 @@ drop:
* Parse TCP options and place in tcpopt.
*/
static void
-tcp_dooptions(tp, to, cp, cnt, is_syn, th)
- struct tcpcb *tp;
+tcp_dooptions(to, cp, cnt, is_syn)
struct tcpopt *to;
u_char *cp;
int cnt;
int is_syn;
- struct tcphdr *th;
{
int opt, optlen;
@@ -2642,10 +2636,12 @@ tcp_dooptions(tp, to, cp, cnt, is_syn, th)
to->to_flags |= TOF_SACK;
}
break;
-
case TCPOPT_SACK:
- if (!tp || tcp_sack_option(tp, th, cp, optlen))
+ if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
continue;
+ to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
+ to->to_sacks = cp + 2;
+ tcpstat.tcps_sack_rcv_blocks++;
break;
default:
continue;
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index eda9eb4..6762232 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -164,8 +164,7 @@ struct inpcbhead tcb;
struct inpcbinfo tcbinfo;
struct mtx *tcbinfo_mtx;
-static void tcp_dooptions(struct tcpcb *, struct tcpopt *, u_char *,
- int, int, struct tcphdr *);
+static void tcp_dooptions(struct tcpopt *, u_char *, int, int);
static void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
@@ -747,7 +746,7 @@ findpcb:
* present in a SYN segment. See tcp_timewait().
*/
if (thflags & TH_SYN)
- tcp_dooptions((struct tcpcb *)NULL, &to, optp, optlen, 1, th);
+ tcp_dooptions(&to, optp, optlen, 1);
if (tcp_timewait((struct tcptw *)inp->inp_ppcb,
&to, th, m, tlen))
goto findpcb;
@@ -961,7 +960,7 @@ findpcb:
tcp_trace(TA_INPUT, ostate, tp,
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
- tcp_dooptions(tp, &to, optp, optlen, 1, th);
+ tcp_dooptions(&to, optp, optlen, 1);
if (!syncache_add(&inc, &to, th, &so, m))
goto drop;
if (so == NULL) {
@@ -1082,7 +1081,7 @@ after_listen:
* for incoming connections is handled in tcp_syncache.
* XXX this is traditional behavior, may need to be cleaned up.
*/
- tcp_dooptions(tp, &to, optp, optlen, thflags & TH_SYN, th);
+ tcp_dooptions(&to, optp, optlen, thflags & TH_SYN);
if (thflags & TH_SYN) {
if (to.to_flags & TOF_SCALE) {
tp->t_flags |= TF_RCVD_SCALE;
@@ -1104,11 +1103,6 @@ after_listen:
}
- if (tp->sack_enable) {
- /* Delete stale (cumulatively acked) SACK holes */
- tcp_del_sackholes(tp, th);
- }
-
/*
* Header prediction: check for the two common cases
* of a uni-directional data xfer. If the packet has
@@ -1153,7 +1147,7 @@ after_listen:
((!tcp_do_newreno && !tp->sack_enable &&
tp->t_dupacks < tcprexmtthresh) ||
((tcp_do_newreno || tp->sack_enable) &&
- !IN_FASTRECOVERY(tp)))) {
+ !IN_FASTRECOVERY(tp) && to.to_nsacks == 0))) {
KASSERT(headlocked, ("headlocked"));
INP_INFO_WUNLOCK(&tcbinfo);
headlocked = 0;
@@ -1824,6 +1818,12 @@ trimthenstep6:
case TCPS_LAST_ACK:
case TCPS_TIME_WAIT:
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
+ if (SEQ_GT(th->th_ack, tp->snd_max)) {
+ tcpstat.tcps_rcvacktoomuch++;
+ goto dropafterack;
+ }
+ if (tp->sack_enable)
+ tcp_sack_doack(tp, &to, th->th_ack);
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
if (tlen == 0 && tiwin == tp->snd_wnd) {
tcpstat.tcps_rcvdupack++;
@@ -2002,10 +2002,6 @@ trimthenstep6:
tp->snd_cwnd = tp->snd_ssthresh;
}
tp->t_dupacks = 0;
- if (SEQ_GT(th->th_ack, tp->snd_max)) {
- tcpstat.tcps_rcvacktoomuch++;
- goto dropafterack;
- }
/*
* If we reach this point, ACK is not a duplicate,
* i.e., it ACKs something we sent.
@@ -2560,13 +2556,11 @@ drop:
* Parse TCP options and place in tcpopt.
*/
static void
-tcp_dooptions(tp, to, cp, cnt, is_syn, th)
- struct tcpcb *tp;
+tcp_dooptions(to, cp, cnt, is_syn)
struct tcpopt *to;
u_char *cp;
int cnt;
int is_syn;
- struct tcphdr *th;
{
int opt, optlen;
@@ -2642,10 +2636,12 @@ tcp_dooptions(tp, to, cp, cnt, is_syn, th)
to->to_flags |= TOF_SACK;
}
break;
-
case TCPOPT_SACK:
- if (!tp || tcp_sack_option(tp, th, cp, optlen))
+ if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
continue;
+ to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
+ to->to_sacks = cp + 2;
+ tcpstat.tcps_sack_rcv_blocks++;
break;
default:
continue;
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index 945ab9e..c15f827 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -373,54 +373,54 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole)
}
/*
- * Process the TCP SACK option. Returns 1 if tcp_dooptions() should continue,
- * and 0 otherwise, if the option was fine. tp->snd_holes is an ordered list
- * of holes (oldest to newest, in terms of the sequence space).
+ * Process cumulative ACK and the TCP SACK option to update the scoreboard.
+ * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
+ * the sequence space).
*/
-int
-tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
+void
+tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
{
- int tmp_olen;
- u_char *tmp_cp;
struct sackhole *cur, *temp;
- struct sackblk sack, sack_blocks[TCP_MAX_SACK], *sblkp;
+ struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
int i, j, num_sack_blks;
INP_LOCK_ASSERT(tp->t_inpcb);
- if (!tp->sack_enable)
- return (1);
- if ((th->th_flags & TH_ACK) == 0)
- return (1);
- /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
- if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
- return (1);
- /* If ack is outside [snd_una, snd_max], ignore the SACK options */
- if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))
- return (1);
- tmp_cp = cp + 2;
- tmp_olen = optlen - 2;
- tcpstat.tcps_sack_rcv_blocks++;
+
+ num_sack_blks = 0;
/*
- * Sort the SACK blocks so we can update the scoreboard
- * with just one pass. The overhead of sorting upto 4 elements
- * is less than making upto 4 passes over the scoreboard.
+ * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
+ * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
*/
- num_sack_blks = 0;
- while (tmp_olen > 0) {
- bcopy(tmp_cp, &sack, sizeof(sack));
+ if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
+ sack_blocks[num_sack_blks].start = tp->snd_una;
+ sack_blocks[num_sack_blks++].end = th_ack;
+ }
+ /*
+ * Append received valid SACK blocks to sack_blocks[].
+ */
+ for (i = 0; i < to->to_nsacks; i++) {
+ bcopy((to->to_sacks + i * TCPOLEN_SACK), &sack, sizeof(sack));
sack.start = ntohl(sack.start);
sack.end = ntohl(sack.end);
if (SEQ_GT(sack.end, sack.start) &&
SEQ_GT(sack.start, tp->snd_una) &&
- SEQ_GT(sack.start, th->th_ack) &&
+ SEQ_GT(sack.start, th_ack) &&
SEQ_LEQ(sack.end, tp->snd_max))
sack_blocks[num_sack_blks++] = sack;
- tmp_olen -= TCPOLEN_SACK;
- tmp_cp += TCPOLEN_SACK;
}
+
+ /*
+ * Return if SND.UNA is not advanced and no valid SACK block
+ * is received.
+ */
if (num_sack_blks == 0)
- return 0;
- /* Bubble sort */
+ return;
+
+ /*
+ * Sort the SACK blocks so we can update the scoreboard
+ * with just one pass. The overhead of sorting upto 4+1 elements
+ * is less than making upto 4+1 passes over the scoreboard.
+ */
for (i = 0; i < num_sack_blks; i++) {
for (j = i + 1; j < num_sack_blks; j++) {
if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) {
@@ -437,7 +437,7 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
* (from the sack blocks received) are created later below (in
* the logic that adds holes to the tail of the scoreboard).
*/
- tp->snd_fack = tp->snd_una;
+ tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
/*
* In the while-loop below, incoming SACK blocks (sack_blocks[])
* and SACK holes (snd_holes) are traversed from their tails with
@@ -460,7 +460,7 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
*/
temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
if (temp == NULL)
- return 0;
+ return;
tp->snd_fack = sblkp->end;
/* Go to the previous sack block. */
sblkp--;
@@ -548,49 +548,12 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
else
sblkp--;
}
- return (0);
}
/*
- * Delete stale (i.e, cumulatively ack'd) holes. Hole is deleted only if
- * it is completely acked; otherwise, tcp_sack_option(), called from
- * tcp_dooptions(), will fix up the hole.
+ * Free all SACK holes to clear the scoreboard.
*/
void
-tcp_del_sackholes(tp, th)
- struct tcpcb *tp;
- struct tcphdr *th;
-{
- INP_LOCK_ASSERT(tp->t_inpcb);
- if (tp->sack_enable && tp->t_state != TCPS_LISTEN) {
- /* max because this could be an older ack just arrived */
- tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
- th->th_ack : tp->snd_una;
- struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
- struct sackhole *prev;
- while (cur)
- if (SEQ_LEQ(cur->end, lastack)) {
- prev = cur;
- cur = TAILQ_NEXT(cur, scblink);
- tp->sackhint.sack_bytes_rexmit -=
- (prev->rxmit - prev->start);
- tcp_sackhole_remove(tp, prev);
- } else if (SEQ_LT(cur->start, lastack)) {
- if (SEQ_LT(cur->rxmit, lastack)) {
- tp->sackhint.sack_bytes_rexmit -=
- (cur->rxmit - cur->start);
- cur->rxmit = lastack;
- } else
- tp->sackhint.sack_bytes_rexmit -=
- (lastack - cur->start);
- cur->start = lastack;
- break;
- } else
- break;
- }
-}
-
-void
tcp_free_sackholes(struct tcpcb *tp)
{
struct sackhole *q;
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index d2f34ea..b1515dd 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -238,7 +238,8 @@ struct tcpopt {
u_int32_t to_tsecr;
u_int16_t to_mss;
u_int8_t to_requested_s_scale;
- u_int8_t to_pad;
+ u_int8_t to_nsacks; /* number of SACK blocks */
+ u_char *to_sacks; /* pointer to the first SACK blocks */
};
#ifdef _NETINET_IN_PCB_H_
@@ -578,9 +579,8 @@ extern u_long tcp_sendspace;
extern u_long tcp_recvspace;
tcp_seq tcp_new_isn(struct tcpcb *);
-int tcp_sack_option(struct tcpcb *,struct tcphdr *,u_char *,int);
+void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
-void tcp_del_sackholes(struct tcpcb *, struct tcphdr *);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
OpenPOWER on IntegriCloud