summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authorandre <andre@FreeBSD.org>2007-03-15 15:59:28 +0000
committerandre <andre@FreeBSD.org>2007-03-15 15:59:28 +0000
commitda1f4962209103a9c4b8de8081614312a2c05864 (patch)
tree958c5cc9f9700b70c7ad5f201a98774762724e25 /sys/netinet
parente384ffc370ce3185a1575f148f487dbaad04de2c (diff)
downloadFreeBSD-src-da1f4962209103a9c4b8de8081614312a2c05864.zip
FreeBSD-src-da1f4962209103a9c4b8de8081614312a2c05864.tar.gz
Consolidate insertion of TCP options into a segment from within tcp_output()
and syncache_respond() into its own generic function tcp_addoptions(). tcp_addoptions() is alignment agnostic and does optimal packing in all cases. In struct tcpopt rename to_requested_s_scale to just to_wscale. Add a comment with quote from RFC1323: "The Window field in a SYN (i.e., a <SYN> or <SYN,ACK>) segment itself is never scaled." Reviewed by: silby, mohans, julian Sponsored by: TCP/IP Optimization Fundraise 2005
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/tcp.h7
-rw-r--r--sys/netinet/tcp_input.c4
-rw-r--r--sys/netinet/tcp_output.c343
-rw-r--r--sys/netinet/tcp_reass.c4
-rw-r--r--sys/netinet/tcp_syncache.c118
-rw-r--r--sys/netinet/tcp_var.h22
6 files changed, 264 insertions, 234 deletions
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index 04c8663..3f744fb 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -76,14 +76,17 @@ struct tcphdr {
};
#define TCPOPT_EOL 0
+#define TCPOLEN_EOL 1
#define TCPOPT_NOP 1
+#define TCPOLEN_NOP 1
#define TCPOPT_MAXSEG 2
#define TCPOLEN_MAXSEG 4
#define TCPOPT_WINDOW 3
#define TCPOLEN_WINDOW 3
-#define TCPOPT_SACK_PERMITTED 4 /* Experimental */
+#define TCPOPT_SACK_PERMITTED 4
#define TCPOLEN_SACK_PERMITTED 2
-#define TCPOPT_SACK 5 /* Experimental */
+#define TCPOPT_SACK 5
+#define TCPOLEN_SACKHDR 2
#define TCPOLEN_SACK 8 /* 2*sizeof(tcp_seq) */
#define TCPOPT_TIMESTAMP 8
#define TCPOLEN_TIMESTAMP 10
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 2d88d47..eb51349 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1146,7 +1146,7 @@ after_listen:
if ((to.to_flags & TOF_SCALE) &&
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
- tp->snd_scale = to.to_requested_s_scale;
+ tp->snd_scale = to.to_wscale;
tp->snd_wnd = th->th_win << tp->snd_scale;
tiwin = tp->snd_wnd;
}
@@ -2745,7 +2745,7 @@ tcp_dooptions(to, cp, cnt, flags)
if (!(flags & TO_SYN))
continue;
to->to_flags |= TOF_SCALE;
- to->to_requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+ to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
break;
case TCPOPT_TIMESTAMP:
if (optlen != TCPOLEN_TIMESTAMP)
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 586a5bd..09f7db3 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -142,10 +142,10 @@ tcp_output(struct tcpcb *tp)
u_char opt[TCP_MAXOLEN];
unsigned ipoptlen, optlen, hdrlen;
int idle, sendalot;
- int i, sack_rxmit;
- int sack_bytes_rxmt;
+ int sack_rxmit, sack_bytes_rxmt;
struct sackhole *p;
int tso = 0;
+ struct tcpopt to;
#if 0
int maxburst = TCP_MAXBURST;
#endif
@@ -626,157 +626,67 @@ send:
else
#endif
hdrlen = sizeof (struct tcpiphdr);
- if (flags & TH_SYN) {
- tp->snd_nxt = tp->iss;
- if ((tp->t_flags & TF_NOOPT) == 0) {
- u_short mss;
-
- opt[0] = TCPOPT_MAXSEG;
- opt[1] = TCPOLEN_MAXSEG;
- mss = htons((u_short) tcp_mssopt(&tp->t_inpcb->inp_inc));
- (void)memcpy(opt + 2, &mss, sizeof(mss));
- optlen = TCPOLEN_MAXSEG;
-
- if ((tp->t_flags & TF_REQ_SCALE) &&
- ((flags & TH_ACK) == 0 ||
- (tp->t_flags & TF_RCVD_SCALE))) {
- *((u_int32_t *)(opt + optlen)) = htonl(
- TCPOPT_NOP << 24 |
- TCPOPT_WINDOW << 16 |
- TCPOLEN_WINDOW << 8 |
- tp->request_r_scale);
- optlen += 4;
- }
- }
- }
/*
- * Send a timestamp and echo-reply if this is a SYN and our side
- * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
- * and our peer have sent timestamps in our SYN's.
+ * Compute options for segment.
+ * We only have to care about SYN and established connection
+ * segments. Options for SYN-ACK segments are handled in TCP
+ * syncache.
*/
- if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
- (flags & TH_RST) == 0 &&
- ((flags & TH_ACK) == 0 ||
- (tp->t_flags & TF_RCVD_TSTMP))) {
- u_int32_t *lp = (u_int32_t *)(opt + optlen);
-
- /* Form timestamp option as shown in appendix A of RFC 1323. */
- *lp++ = htonl(TCPOPT_TSTAMP_HDR);
- *lp++ = htonl(ticks + tp->ts_offset);
- *lp = htonl(tp->ts_recent);
- optlen += TCPOLEN_TSTAMP_APPA;
- }
-
- /* Set receive buffer autosizing timestamp. */
- if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE))
- tp->rfbuf_ts = ticks;
-
-#ifdef TCP_SIGNATURE
-#ifdef INET6
- if (!isipv6)
-#endif
- if (tp->t_flags & TF_SIGNATURE) {
- int i;
- u_char *bp;
-
- /* Initialize TCP-MD5 option (RFC2385) */
- bp = (u_char *)opt + optlen;
- *bp++ = TCPOPT_SIGNATURE;
- *bp++ = TCPOLEN_SIGNATURE;
- sigoff = optlen + 2;
- for (i = 0; i < TCP_SIGLEN; i++)
- *bp++ = 0;
- optlen += TCPOLEN_SIGNATURE;
- }
-#endif /* TCP_SIGNATURE */
-
- if (tp->sack_enable && ((tp->t_flags & TF_NOOPT) == 0)) {
- /*
- * Tack on the SACK permitted option *last*.
- * And do padding of options after tacking this on.
- * This is because of MSS, TS, WinScale and Signatures are
- * all present, we have just 2 bytes left for the SACK
- * permitted option, which is just enough.
- */
- /*
- * If this is the first SYN of connection (not a SYN
- * ACK), include SACK permitted option. If this is a
- * SYN ACK, include SACK permitted option if peer has
- * already done so. This is only for active connect,
- * since the syncache takes care of the passive connect.
- */
- if ((flags & TH_SYN) &&
- (!(flags & TH_ACK) || (tp->t_flags & TF_SACK_PERMIT))) {
- u_char *bp;
- bp = (u_char *)opt + optlen;
-
- *bp++ = TCPOPT_SACK_PERMITTED;
- *bp++ = TCPOLEN_SACK_PERMITTED;
- optlen += TCPOLEN_SACK_PERMITTED;
+ if ((tp->t_flags & TF_NOOPT) == 0) {
+ to.to_flags = 0;
+ /* Maximum segment size. */
+ if (flags & TH_SYN) {
+ tp->snd_nxt = tp->iss;
+ to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
+ to.to_flags |= TOF_MSS;
}
-
- /*
- * Send SACKs if necessary. This should be the last
- * option processed. Only as many SACKs are sent as
- * are permitted by the maximum options size.
- *
- * In general, SACK blocks consume 8*n+2 bytes.
- * So a full size SACK blocks option is 34 bytes
- * (to generate 4 SACK blocks). At a minimum,
- * we need 10 bytes (to generate 1 SACK block).
- * If TCP Timestamps (12 bytes) and TCP Signatures
- * (18 bytes) are both present, we'll just have
- * 10 bytes for SACK options 40 - (12 + 18).
- */
- if (TCPS_HAVEESTABLISHED(tp->t_state) &&
- (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0 &&
- MAX_TCPOPTLEN - optlen - 2 >= TCPOLEN_SACK) {
- int nsack, sackoptlen, padlen;
- u_char *bp = (u_char *)opt + optlen;
- u_int32_t *lp;
-
- nsack = (MAX_TCPOPTLEN - optlen - 2) / TCPOLEN_SACK;
- nsack = min(nsack, tp->rcv_numsacks);
- sackoptlen = (2 + nsack * TCPOLEN_SACK);
-
- /*
- * First we need to pad options so that the
- * SACK blocks can start at a 4-byte boundary
- * (sack option and length are at a 2 byte offset).
- */
- padlen = (MAX_TCPOPTLEN - optlen - sackoptlen) % 4;
- optlen += padlen;
- while (padlen-- > 0)
- *bp++ = TCPOPT_NOP;
-
- tcpstat.tcps_sack_send_blocks++;
- *bp++ = TCPOPT_SACK;
- *bp++ = sackoptlen;
- lp = (u_int32_t *)bp;
- for (i = 0; i < nsack; i++) {
- struct sackblk sack = tp->sackblks[i];
- *lp++ = htonl(sack.start);
- *lp++ = htonl(sack.end);
+ /* Window scaling. */
+ if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
+ to.to_wscale = tp->request_r_scale;
+ to.to_flags |= TOF_SCALE;
+ }
+ /* Timestamps. */
+ if ((tp->t_flags & TF_RCVD_TSTMP) ||
+ ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
+ to.to_tsval = ticks + tp->ts_offset;
+ to.to_tsecr = tp->ts_recent;
+ to.to_flags |= TOF_TS;
+ /* Set receive buffer autosizing timestamp. */
+ if (tp->rfbuf_ts == 0 &&
+ (so->so_rcv.sb_flags & SB_AUTOSIZE))
+ tp->rfbuf_ts = ticks;
+ }
+ /* Selective ACK's. */
+ if (tp->sack_enable) {
+ if (flags & TH_SYN)
+ to.to_flags |= TOF_SACKPERM;
+ else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+ (tp->t_flags & TF_SACK_PERMIT) &&
+ tp->rcv_numsacks > 0) {
+ to.to_flags |= TOF_SACK;
+ to.to_nsacks = tp->rcv_numsacks;
+ to.to_sacks = (u_char *)tp->sackblks;
}
- optlen += sackoptlen;
}
- }
+#ifdef TCP_SIGNATURE
+ /* TCP-MD5 (RFC2385). */
+#ifdef INET6
+ if (!isipv6 && (tp->t_flags & TF_SIGNATURE))
+#else
+ if (tp->t_flags & TF_SIGNATURE)
+#endif /* INET6 */
+ to.to_flags |= TOF_SIGNATURE;
+#endif /* TCP_SIGNATURE */
- /* Pad TCP options to a 4 byte boundary */
- if (optlen < MAX_TCPOPTLEN && (optlen % sizeof(u_int32_t))) {
- int pad = sizeof(u_int32_t) - (optlen % sizeof(u_int32_t));
- u_char *bp = (u_char *)opt + optlen;
+ /* Processing the options. */
+ hdrlen += optlen = tcp_addoptions(&to, (u_char *)&opt);
- optlen += pad;
- while (pad) {
- *bp++ = TCPOPT_EOL;
- pad--;
- }
+#ifdef TCP_SIGNATURE
+ sigoff = to.to_signature - (u_char *)&to;
+#endif /* TCP_SIGNATURE */
}
- hdrlen += optlen;
-
#ifdef INET6
if (isipv6)
ipoptlen = ip6_optlen(tp->t_inpcb);
@@ -876,11 +786,11 @@ send:
m->m_data += max_linkhdr;
m->m_len = hdrlen;
if (len <= MHLEN - hdrlen - max_linkhdr) {
- m_copydata(so->so_snd.sb_mb, off, (int) len,
+ m_copydata(so->so_snd.sb_mb, off, (int)len,
mtod(m, caddr_t) + hdrlen);
m->m_len += len;
} else {
- m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
+ m->m_next = m_copy(so->so_snd.sb_mb, off, (int)len);
if (m->m_next == 0) {
SOCKBUF_UNLOCK(&so->so_snd);
(void) m_free(m);
@@ -983,6 +893,9 @@ send:
/*
* Calculate receive window. Don't shrink window,
* but avoid silly window syndrome.
+ *
+ * XXX: RFC1323: The Window field in a SYN (i.e., a <SYN> or
+ * <SYN,ACK>) segment itself is never scaled.
*/
if (recwin < (long)(so->so_rcv.sb_hiwat / 4) &&
recwin < (long)tp->t_maxseg)
@@ -1320,3 +1233,143 @@ tcp_setpersist(tp)
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
tp->t_rxtshift++;
}
+
+/*
+ * Insert TCP options according to the supplied parameters to the place
+ * optp in a consistent way. Can handle unaligned destinations.
+ *
+ * The order of the option processing is crucial for optimal packing and
+ * alignment for the scarce option space.
+ *
+ * The optimal order for a SYN/SYN-ACK segment is:
+ * MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) +
+ * Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40.
+ *
+ * The SACK options should be last. SACK blocks consume 8*n+2 bytes.
+ * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks).
+ * At minimum we need 10 bytes (to generate 1 SACK block). If both
+ * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present,
+ * we only have 10 bytes for SACK options (40 - (12 + 18)).
+ */
+int
+tcp_addoptions(struct tcpopt *to, u_char *optp)
+{
+ u_int mask, optlen = 0;
+
+ for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
+ if ((to->to_flags & mask) != mask)
+ continue;
+ switch (to->to_flags & mask) {
+ case TOF_MSS:
+ while (optlen % 4) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ optlen += TCPOLEN_MAXSEG;
+ *optp++ = TCPOPT_MAXSEG;
+ *optp++ = TCPOLEN_MAXSEG;
+ to->to_mss = htons(to->to_mss);
+ bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss));
+ optp += sizeof(to->to_mss);
+ break;
+ case TOF_SCALE:
+ while (!optlen || optlen % 2 != 1) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ optlen += TCPOLEN_WINDOW;
+ *optp++ = TCPOPT_WINDOW;
+ *optp++ = TCPOLEN_WINDOW;
+ *optp++ = to->to_wscale;
+ break;
+ case TOF_SACKPERM:
+ while (optlen % 2) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ optlen += TCPOLEN_SACK_PERMITTED;
+ *optp++ = TCPOPT_SACK_PERMITTED;
+ *optp++ = TCPOLEN_SACK_PERMITTED;
+ break;
+ case TOF_TS:
+ while (!optlen || optlen % 4 != 2) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ optlen += TCPOLEN_TIMESTAMP;
+ *optp++ = TCPOPT_TIMESTAMP;
+ *optp++ = TCPOLEN_TIMESTAMP;
+ to->to_tsval = htonl(to->to_tsval);
+ to->to_tsecr = htonl(to->to_tsecr);
+ bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval));
+ optp += sizeof(to->to_tsval);
+ bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
+ optp += sizeof(to->to_tsecr);
+ break;
+ case TOF_SIGNATURE:
+ {
+ int siglen = TCPOLEN_SIGNATURE - 2;
+
+ while (!optlen || optlen % 4 != 2) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ if (MAX_TCPOPTLEN - optlen < TCPOLEN_SIGNATURE)
+ continue;
+ optlen += TCPOLEN_SIGNATURE;
+ *optp++ = TCPOPT_SIGNATURE;
+ *optp++ = TCPOLEN_SIGNATURE;
+ to->to_signature = optp;
+ while (siglen--)
+ *optp++ = 0;
+ break;
+ }
+ case TOF_SACK:
+ {
+ int sackblks = 0;
+ struct sackblk *sack = (struct sackblk *)to->to_sacks;
+ tcp_seq sack_seq;
+
+ while (!optlen || optlen % 4 != 2) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ if (MAX_TCPOPTLEN - optlen < 2 + TCPOLEN_SACK)
+ continue;
+ optlen += TCPOLEN_SACKHDR;
+ *optp++ = TCPOPT_SACK;
+ sackblks = min(to->to_nsacks,
+ (MAX_TCPOPTLEN - optlen) / TCPOLEN_SACK);
+ *optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
+ while (sackblks--) {
+ sack_seq = htonl(sack->start);
+ bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
+ optp += sizeof(sack_seq);
+ sack_seq = htonl(sack->end);
+ bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
+ optp += sizeof(sack_seq);
+ optlen += TCPOLEN_SACK;
+ sack++;
+ }
+ tcpstat.tcps_sack_send_blocks++;
+ break;
+ }
+ default:
+ panic("%s: unknown TCP option type", __func__);
+ break;
+ }
+ }
+
+ /* Terminate and pad TCP options to a 4 byte boundary. */
+ if (optlen % 4) {
+ optlen += TCPOLEN_EOL;
+ *optp++ = TCPOPT_EOL;
+ }
+ while (optlen % 4) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+
+ KASSERT(optlen <= MAX_TCPOPTLEN, ("%s: TCP options too long", __func__));
+ return (optlen);
+}
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 2d88d47..eb51349 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -1146,7 +1146,7 @@ after_listen:
if ((to.to_flags & TOF_SCALE) &&
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
- tp->snd_scale = to.to_requested_s_scale;
+ tp->snd_scale = to.to_wscale;
tp->snd_wnd = th->th_win << tp->snd_scale;
tiwin = tp->snd_wnd;
}
@@ -2745,7 +2745,7 @@ tcp_dooptions(to, cp, cnt, flags)
if (!(flags & TO_SYN))
continue;
to->to_flags |= TOF_SCALE;
- to->to_requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+ to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
break;
case TCPOPT_TIMESTAMP:
if (optlen != TCPOLEN_TIMESTAMP)
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 84d3c10..931383c 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1020,12 +1020,15 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* with auto sizing. This allows us to scale the
* receive buffer over a wide range while not losing
* any efficiency or fine granularity.
+ *
+ * RFC1323: The Window field in a SYN (i.e., a <SYN>
+ * or <SYN,ACK>) segment itself is never scaled.
*/
while (wscale < TCP_MAX_WINSHIFT &&
(0x1 << wscale) < tcp_minmss)
wscale++;
sc->sc_requested_r_scale = wscale;
- sc->sc_requested_s_scale = to->to_requested_s_scale;
+ sc->sc_requested_s_scale = to->to_wscale;
sc->sc_flags |= SCF_WINSCALE;
}
}
@@ -1097,8 +1100,8 @@ syncache_respond(struct syncache *sc, struct mbuf *m)
struct ip *ip = NULL;
struct tcphdr *th;
int optlen, error;
- u_int16_t tlen, hlen, mssopt;
- u_int8_t *optp;
+ u_int16_t hlen, tlen, mssopt;
+ struct tcpopt to;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
#endif
@@ -1108,33 +1111,16 @@ syncache_respond(struct syncache *sc, struct mbuf *m)
(sc->sc_inc.inc_isipv6) ? sizeof(struct ip6_hdr) :
#endif
sizeof(struct ip);
+ tlen = hlen + sizeof(struct tcphdr);
/* Determine MSS we advertize to other end of connection. */
mssopt = tcp_mssopt(&sc->sc_inc);
if (sc->sc_peer_mss)
mssopt = max( min(sc->sc_peer_mss, mssopt), tcp_minmss);
- /* Compute the size of the TCP options. */
- if (sc->sc_flags & SCF_NOOPT) {
- optlen = 0;
- } else {
- optlen = TCPOLEN_MAXSEG +
- ((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) +
- ((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0);
-#ifdef TCP_SIGNATURE
- if (sc->sc_flags & SCF_SIGNATURE)
- optlen += TCPOLEN_SIGNATURE;
-#endif
- if (sc->sc_flags & SCF_SACK)
- optlen += TCPOLEN_SACK_PERMITTED;
- optlen = roundup2(optlen, 4);
- }
- tlen = hlen + sizeof(struct tcphdr) + optlen;
-
- /*
- * XXX: Assume that the entire packet will fit in a header mbuf.
- */
- KASSERT(max_linkhdr + tlen <= MHLEN, ("syncache: mbuf too small"));
+ /* XXX: Assume that the entire packet will fit in a header mbuf. */
+ KASSERT(max_linkhdr + tlen + MAX_TCPOPTLEN <= MHLEN,
+ ("syncache: mbuf too small"));
/* Create the IP+TCP header from scratch. */
if (m)
@@ -1197,70 +1183,52 @@ syncache_respond(struct syncache *sc, struct mbuf *m)
th->th_seq = htonl(sc->sc_iss);
th->th_ack = htonl(sc->sc_irs + 1);
- th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+ th->th_off = sizeof(struct tcphdr) >> 2;
th->th_x2 = 0;
th->th_flags = TH_SYN|TH_ACK;
th->th_win = htons(sc->sc_wnd);
th->th_urp = 0;
/* Tack on the TCP options. */
- if (optlen != 0) {
- optp = (u_int8_t *)(th + 1);
- *optp++ = TCPOPT_MAXSEG;
- *optp++ = TCPOLEN_MAXSEG;
- *optp++ = (mssopt >> 8) & 0xff;
- *optp++ = mssopt & 0xff;
+ if ((sc->sc_flags & SCF_NOOPT) == 0) {
+ to.to_flags = 0;
+ to.to_mss = mssopt;
+ to.to_flags = TOF_MSS;
if (sc->sc_flags & SCF_WINSCALE) {
- *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
- TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
- sc->sc_requested_r_scale);
- optp += 4;
+ to.to_wscale = sc->sc_requested_r_scale;
+ to.to_flags |= TOF_SCALE;
}
-
if (sc->sc_flags & SCF_TIMESTAMP) {
- u_int32_t *lp = (u_int32_t *)(optp);
-
- /* Form timestamp option per appendix A of RFC 1323. */
- *lp++ = htonl(TCPOPT_TSTAMP_HDR);
- if (sc->sc_ts)
- *lp++ = htonl(sc->sc_ts);
- else
- *lp++ = htonl(ticks);
- *lp = htonl(sc->sc_tsreflect);
- optp += TCPOLEN_TSTAMP_APPA;
+ /* Virgin timestamp or TCP cookie enhanced one. */
+ to.to_tsval = sc->sc_ts ? sc->sc_ts : ticks;
+ to.to_tsecr = sc->sc_tsreflect;
+ to.to_flags |= TOF_TS;
}
-
+ if (sc->sc_flags & SCF_SACK)
+ to.to_flags |= TOF_SACKPERM;
#ifdef TCP_SIGNATURE
- /*
- * Handle TCP-MD5 passive opener response.
- */
- if (sc->sc_flags & SCF_SIGNATURE) {
- u_int8_t *bp = optp;
- int i;
-
- *bp++ = TCPOPT_SIGNATURE;
- *bp++ = TCPOLEN_SIGNATURE;
- for (i = 0; i < TCP_SIGLEN; i++)
- *bp++ = 0;
- tcp_signature_compute(m, sizeof(struct ip), 0, optlen,
- optp + 2, IPSEC_DIR_OUTBOUND);
- optp += TCPOLEN_SIGNATURE;
- }
-#endif /* TCP_SIGNATURE */
+ if (sc->sc_flags & SCF_SIGNATURE)
+ to.to_flags |= TOF_SIGNATURE;
+#endif
+ optlen = tcp_addoptions(&to, (u_char *)(th + 1));
- if (sc->sc_flags & SCF_SACK) {
- *optp++ = TCPOPT_SACK_PERMITTED;
- *optp++ = TCPOLEN_SACK_PERMITTED;
- }
+#ifdef TCP_SIGNATURE
+ tcp_signature_compute(m, sizeof(struct ip), 0, optlen,
+ to.to_signature, IPSEC_DIR_OUTBOUND);
+#endif
- {
- /* Pad TCP options to a 4 byte boundary */
- int padlen = optlen - (optp - (u_int8_t *)(th + 1));
- while (padlen-- > 0)
- *optp++ = TCPOPT_EOL;
- }
- }
+ /* Adjust headers by option size. */
+ th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+ m->m_len += optlen;
+ m->m_pkthdr.len += optlen;
+#ifdef INET6
+ if (sc->sc_inc.inc_isipv6)
+ ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
+#endif
+ ip->ip_len += optlen;
+ } else
+ optlen = 0;
#ifdef INET6
if (sc->sc_inc.inc_isipv6) {
@@ -1272,7 +1240,7 @@ syncache_respond(struct syncache *sc, struct mbuf *m)
#endif
{
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(tlen - hlen + IPPROTO_TCP));
+ htons(tlen + optlen - hlen + IPPROTO_TCP));
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 46d396d..6dc20d4 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -229,22 +229,27 @@ struct tcpcb {
* Structure to hold TCP options that are only used during segment
* processing (in tcp_input), but not held in the tcpcb.
* It's basically used to reduce the number of parameters
- * to tcp_dooptions.
+ * to tcp_dooptions and tcp_addoptions.
+ * The binary order of the to_flags is relevant for packing of the
+ * options in tcp_addoptions.
*/
struct tcpopt {
u_long to_flags; /* which options are present */
-#define TOF_TS 0x0001 /* timestamp */
-#define TOF_MSS 0x0010 /* maximum segment size */
-#define TOF_SCALE 0x0020 /* window scaling */
+#define TOF_MSS 0x0001 /* maximum segment size */
+#define TOF_SCALE 0x0002 /* window scaling */
+#define TOF_SACKPERM 0x0004 /* SACK permitted */
+#define TOF_TS 0x0010 /* timestamp */
#define TOF_SIGNATURE 0x0040 /* signature option present */
#define TOF_SIGLEN 0x0080 /* signature length valid (RFC2385) */
#define TOF_SACK 0x0100 /* Peer sent SACK option */
- u_int32_t to_tsval;
- u_int32_t to_tsecr;
- u_int16_t to_mss;
- u_int8_t to_requested_s_scale;
+#define TOF_MAXOPT 0x0200
+ u_int32_t to_tsval; /* our new timestamp */
+ u_int32_t to_tsecr; /* reflected timestamp */
+ u_int16_t to_mss; /* maximum segment size */
+ u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
u_char *to_sacks; /* pointer to the first SACK blocks */
+ u_char *to_signature; /* pointer to the MD5 signature */
};
/*
@@ -497,6 +502,7 @@ extern int ss_fltsz_local;
extern int tcp_do_sack; /* SACK enabled/disabled */
+int tcp_addoptions(struct tcpopt *, u_char *);
struct tcpcb *
tcp_close(struct tcpcb *);
void tcp_discardcb(struct tcpcb *);
OpenPOWER on IntegriCloud