diff options
author | wollman <wollman@FreeBSD.org> | 1995-02-09 23:13:27 +0000 |
---|---|---|
committer | wollman <wollman@FreeBSD.org> | 1995-02-09 23:13:27 +0000 |
commit | 72af2aa44a98381d496043f67a333df345e987a9 (patch) | |
tree | 6b8f4500f8844dcde0c5fcc625cf24bb530f5240 | |
parent | 93f26e434abdf6c41008b87288c038fa1ada912f (diff) | |
download | FreeBSD-src-72af2aa44a98381d496043f67a333df345e987a9.zip FreeBSD-src-72af2aa44a98381d496043f67a333df345e987a9.tar.gz |
Merge Transaction TCP, courtesy of Andras Olah <olah@cs.utwente.nl> and
Bob Braden <braden@isi.edu>.
NB: This has not had David's TCP ACK hack re-integrated. It is not clear
what the correct solution to this problem is, if any. If a better solution
doesn't pop up in response to this message, I'll put David's code back in
(or he's welcome to do so himself).
-rw-r--r-- | sys/netinet/in_proto.c | 7 | ||||
-rw-r--r-- | sys/netinet/tcp_debug.c | 13 | ||||
-rw-r--r-- | sys/netinet/tcp_input.c | 782 | ||||
-rw-r--r-- | sys/netinet/tcp_output.c | 160 | ||||
-rw-r--r-- | sys/netinet/tcp_reass.c | 782 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 89 | ||||
-rw-r--r-- | sys/netinet/tcp_timer.c | 11 | ||||
-rw-r--r-- | sys/netinet/tcp_timewait.c | 89 | ||||
-rw-r--r-- | sys/netinet/tcp_usrreq.c | 201 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 6 |
10 files changed, 1897 insertions, 243 deletions
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 9455875..e14e9d8 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -97,15 +97,12 @@ struct protosw inetsw[] = { #ifdef TTCP { SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD, - tcp_input, 0, tcp_ctlinput, tcp_ctloutput, - tcp_usrreq, - tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, tcp_sysctl #else { SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD, +#endif tcp_input, 0, tcp_ctlinput, tcp_ctloutput, tcp_usrreq, - tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, -#endif + tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, tcp_sysctl }, { SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, rip_input, rip_output, 0, rip_ctloutput, diff --git a/sys/netinet/tcp_debug.c b/sys/netinet/tcp_debug.c index c759626..f2c2e1d 100644 --- a/sys/netinet/tcp_debug.c +++ b/sys/netinet/tcp_debug.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_debug.c 8.1 (Berkeley) 6/10/93 - * $Id$ + * $Id: tcp_debug.c,v 1.2 1994/08/02 07:48:54 davidg Exp $ */ #ifdef TCPDEBUG @@ -102,7 +102,7 @@ tcp_trace(act, ostate, tp, ti, req) if (tcpconsdebug == 0) return; if (tp) - printf("%x %s:", tp, tcpstates[ostate]); + printf("%p %s:", tp, tcpstates[ostate]); else printf("???????? "); printf("%s ", tanames[act]); @@ -130,11 +130,14 @@ tcp_trace(act, ostate, tp, ti, req) printf("@%x, urp=%x", ack, ti->ti_urp); flags = ti->ti_flags; if (flags) { -#ifndef lint char *cp = "<"; -#define pf(f) { if (ti->ti_flags&TH_/**/f) { printf("%s%s", cp, "f"); cp = ","; } } +#define pf(f) { \ + if (ti->ti_flags & TH_##f) { \ + printf("%s%s", cp, #f); \ + cp = ","; \ + } \ +} pf(SYN); pf(ACK); pf(FIN); pf(RST); pf(PUSH); pf(URG); -#endif printf(">"); } break; diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 961e02a..6c48ee4 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 - * $Id: tcp_input.c,v 1.9 1994/10/02 17:48:43 phk Exp $ + * $Id: tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp $ */ #ifndef TUBA_INCLUDE @@ -67,12 +67,6 @@ int tcprexmtthresh = 3; struct inpcb *tcp_last_inpcb = &tcb; #endif /* TUBA_INCLUDE */ -#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) - -/* for modulo comparisons of timestamps */ -#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) -#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) - /* * Insert segment ti into reassembly queue of tcp with @@ -142,7 +136,17 @@ tcp_reass(tp, ti, m) tcpstat.tcps_rcvduppack++; tcpstat.tcps_rcvdupbyte += ti->ti_len; m_freem(m); +#ifdef TTCP + /* + * Try to present any queued data + * at the left window edge to the user. + * This is needed after the 3-WHS + * completes. + */ + goto present; /* ??? */ +#else return (0); +#endif } m_adj(m, i); ti->ti_len -= i; @@ -184,7 +188,7 @@ present: * Present data to user, advancing rcv_nxt through * completed sequence space. */ - if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) + if (!TCPS_HAVEESTABLISHED(tp->t_state)) return (0); ti = tp->seg_next; if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) @@ -227,12 +231,23 @@ tcp_input(m, iphlen) struct in_addr laddr; int dropsocket = 0; int iss = 0; +#ifdef TTCP + u_long tiwin; + struct tcpopt to; /* options in this segment */ + struct rmxp_tao *taop; /* pointer to our TAO cache entry */ + struct rmxp_tao tao_noncached; /* in case there's no cached entry */ +#else u_long tiwin, ts_val, ts_ecr; int ts_present = 0; +#endif #ifdef TCPDEBUG short ostate = 0; #endif +#ifdef TTCP + bzero((char *)&to, sizeof(to)); +#endif + tcpstat.tcps_rcvtotal++; /* * Get IP and TCP header together in first mbuf. @@ -298,9 +313,15 @@ tcp_input(m, iphlen) optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) && *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) && (ti->ti_flags & TH_SYN) == 0) { +#ifdef TTCP + to.to_flag |= TOF_TS; + to.to_tsval = ntohl(*(u_long *)(optp + 4)); + to.to_tsecr = ntohl(*(u_long *)(optp + 8)); +#else ts_present = 1; ts_val = ntohl(*(u_long *)(optp + 4)); ts_ecr = ntohl(*(u_long *)(optp + 8)); +#endif optp = NULL; /* we've parsed the options */ } } @@ -359,6 +380,9 @@ findpcb: } #endif if (so->so_options & SO_ACCEPTCONN) { +#ifdef TTCP + register struct tcpcb *tp0 = tp; +#endif so = sonewconn(so, 0); if (so == 0) goto drop; @@ -382,9 +406,11 @@ findpcb: #endif tp = intotcpcb(inp); tp->t_state = TCPS_LISTEN; +#ifdef TTCP + tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT); +#endif - /* Compute proper scaling value from buffer space - */ + /* Compute proper scaling value from buffer space */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat) tp->request_r_scale++; @@ -404,7 +430,11 @@ findpcb: */ if (optp && tp->t_state != TCPS_LISTEN) tcp_dooptions(tp, optp, optlen, ti, +#ifdef TTCP + &to); +#else &ts_present, &ts_val, &ts_ecr); +#endif /* * Header prediction: check for the two common cases @@ -419,22 +449,48 @@ findpcb: * receiver side. If we're getting packets in-order * (the reassembly queue is empty), add the data to * the socket buffer and note that we need a delayed ack. +#ifdef TTCP + * Make sure that the hidden state-flags are also off. + * Since we check for TCPS_ESTABLISHED above, it can only + * be TH_NEEDSYN. +#endif */ if (tp->t_state == TCPS_ESTABLISHED && (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && +#ifdef TTCP + ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && + ((to.to_flag & TOF_TS) == 0 || + TSTMP_GEQ(to.to_tsval, tp->ts_recent)) && + /* + * Using the CC option is compulsory if once started: + * the segment is OK if no T/TCP was negotiated or + * if the segment has a CC option equal to CCrecv + */ + ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) || + (to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv) && +#else (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) && +#endif ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && tp->snd_nxt == tp->snd_max) { /* * If last ACK falls within this segment's sequence numbers, - * record the timestamp. + * record the timestamp. + * NOTE that the test is modified according to the latest + * proposal of the tcplw@cray.com list (Braden 1993/04/26). */ - if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) && - SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) { +#ifdef TTCP + if ((to.to_flag & TOF_TS) != 0 && + SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { + tp->ts_recent_age = tcp_now; + tp->ts_recent = to.to_tsval; +#else + if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_now; tp->ts_recent = ts_val; +#endif } if (ti->ti_len == 0) { @@ -445,8 +501,14 @@ findpcb: * this is a pure ack for outstanding data. */ ++tcpstat.tcps_predack; +#ifdef TTCP + if ((to.to_flag & TOF_TS) != 0) + tcp_xmit_timer(tp, + tcp_now - to.to_tsecr + 1); +#else if (ts_present) tcp_xmit_timer(tp, tcp_now-ts_ecr+1); +#endif else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) tcp_xmit_timer(tp, tp->t_rtt); @@ -497,17 +559,7 @@ findpcb: m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); sbappend(&so->so_rcv, m); sorwakeup(so); - /* - * If this is a small packet, then ACK now - with Nagel - * congestion avoidance sender won't send more until - * he gets an ACK. - */ - if ((unsigned)ti->ti_len < tp->t_maxseg) { - tp->t_flags |= TF_ACKNOW; - tcp_output(tp); - } else { - tp->t_flags |= TF_DELACK; - } + tp->t_flags |= TF_DELACK; return; } } @@ -590,9 +642,19 @@ findpcb: dropsocket = 0; /* socket is already gone */ goto drop; } +#ifdef TTCP + if ((taop = tcp_gettaocache(inp)) == NULL) { + taop = &tao_noncached; + bzero(taop, sizeof(*taop)); + } +#endif /* TTCP */ if (optp) tcp_dooptions(tp, optp, optlen, ti, +#ifdef TTCP + &to); +#else &ts_present, &ts_val, &ts_ecr); +#endif if (iss) tp->iss = iss; else @@ -601,6 +663,66 @@ findpcb: tp->irs = ti->ti_seq; tcp_sendseqinit(tp); tcp_rcvseqinit(tp); +#ifdef TTCP + /* + * Initialization of the tcpcb for transaction; + * set SND.WND = SEG.WND, + * initialize CCsend and CCrecv. + */ + tp->snd_wnd = tiwin; /* initial send-window */ + tp->cc_send = CC_INC(tcp_ccgen); + tp->cc_recv = to.to_cc; + /* + * Perform TAO test on incoming CC (SEG.CC) option, if any. + * - compare SEG.CC against cached CC from the same host, + * if any. + * - if SEG.CC > chached value, SYN must be new and is accepted + * immediately: save new CC in the cache, mark the socket + * connected, enter ESTABLISHED state, turn on flag to + * send a SYN in the next segment. + * A virtual advertised window is set in rcv_adv to + * initialize SWS prevention. Then enter normal segment + * processing: drop SYN, process data and FIN. + * - otherwise do a normal 3-way handshake. + */ + if ((to.to_flag & TOF_CC) != 0) { + if (taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) { + taop->tao_cc = to.to_cc; + tp->t_state = TCPS_ESTABLISHED; + + /* + * If there is a FIN, or if there is data and the + * connection is local, then delay SYN,ACK(SYN) in + * the hope of piggy-backing it on a response + * segment. Otherwise must send ACK now in case + * the other side is slow starting. + */ + if ((tiflags & TH_FIN) || (ti->ti_len != 0 && + in_localaddr(inp->inp_faddr))) + tp->t_flags |= (TF_DELACK | TF_NEEDSYN); + else + tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); + tp->rcv_adv += tp->rcv_wnd; + tcpstat.tcps_connects++; + soisconnected(so); + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + dropsocket = 0; /* committed to socket */ + tcpstat.tcps_accepts++; + goto trimthenstep6; + } + /* else do standard 3-way handshake */ + } else { + /* + * No CC option, but maybe CC.NEW: + * invalidate cached value. + */ + taop->tao_cc = 0; + } + /* + * TAO test failed or there was no CC option, + * do a standard 3-way handshake. + */ +#endif /* TTCP */ tp->t_flags |= TF_ACKNOW; tp->t_state = TCPS_SYN_RECEIVED; tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; @@ -622,10 +744,34 @@ findpcb: * continue processing rest of data/controls, beginning with URG */ case TCPS_SYN_SENT: +#ifdef TTCP + if ((taop = tcp_gettaocache(inp)) == NULL) { + taop = &tao_noncached; + bzero(taop, sizeof(*taop)); + } + + if ((tiflags & TH_ACK) && + (SEQ_LEQ(ti->ti_ack, tp->iss) || + SEQ_GT(ti->ti_ack, tp->snd_max))) { + /* + * If we have a cached CCsent for the remote host, + * hence we haven't just crashed and restarted, + * do not send a RST. This may be a retransmission + * from the other side after our earlier ACK was lost. + * Our new SYN, when it arrives, will serve as the + * needed ACK. + */ + if (taop->tao_ccsent != 0) + goto drop; + else + goto dropwithreset; + } +#else if ((tiflags & TH_ACK) && (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) goto dropwithreset; +#endif if (tiflags & TH_RST) { if (tiflags & TH_ACK) tp = tcp_drop(tp, ECONNREFUSED); @@ -633,25 +779,77 @@ findpcb: } if ((tiflags & TH_SYN) == 0) goto drop; +#ifdef TTCP + tp->snd_wnd = ti->ti_win; /* initial send window */ + tp->cc_recv = to.to_cc; /* foreign CC */ +#else if (tiflags & TH_ACK) { tp->snd_una = ti->ti_ack; if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; } tp->t_timer[TCPT_REXMT] = 0; +#endif + tp->irs = ti->ti_seq; tcp_rcvseqinit(tp); +#ifndef TTCP tp->t_flags |= TF_ACKNOW; if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { +#else + if (tiflags & TH_ACK && SEQ_GT(ti->ti_ack, tp->iss)) { +#endif tcpstat.tcps_connects++; soisconnected(so); - tp->t_state = TCPS_ESTABLISHED; /* Do window scaling on this connection? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; } +#ifdef TTCP + /* + * Our SYN was acked. If segment contains CC.ECHO + * option, check it to make sure this segment really + * matches our SYN. If not, just drop it as old + * duplicate, but send an RST if we're still playing + * by the old rules. + */ + if ((to.to_flag & TOF_CCECHO) && + tp->cc_send != to.to_ccecho) { + if (taop->tao_ccsent != 0) + goto drop; + else + goto dropwithreset; + } + /* Segment is acceptable, update cache if undefined. */ + if (taop->tao_ccsent == 0) + taop->tao_ccsent = to.to_ccecho; + + tp->rcv_adv += tp->rcv_wnd; + tp->snd_una++; /* SYN is acked */ + /* + * If there's data, delay ACK; if there's also a FIN + * ACKNOW will be turned on later. + */ + if (ti->ti_len != 0) + tp->t_flags |= TF_DELACK; + else + tp->t_flags |= TF_ACKNOW; + /* + * Received <SYN,ACK> in SYN_SENT[*] state. + * Transitions: + * SYN_SENT --> ESTABLISHED + * SYN_SENT* --> FIN_WAIT_1 + */ + if (tp->t_flags & TF_NEEDFIN) { + tp->t_state = TCPS_FIN_WAIT_1; + tp->t_flags &= ~TF_NEEDFIN; + tiflags &= ~TH_SYN; + } else + tp->t_state = TCPS_ESTABLISHED; +#else + tp->t_state = TCPS_ESTABLISHED; (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); /* @@ -660,8 +858,48 @@ findpcb: */ if (tp->t_rtt) tcp_xmit_timer(tp, tp->t_rtt); +#endif + +#ifdef TTCP + } else { + /* + * Received initial SYN in SYN-SENT[*] state => simul- + * taneous open. If segment contains CC option and there is + * a cached CC, apply TAO test; if it succeeds, connection is + * half-synchronized. Otherwise, do 3-way handshake: + * SYN-SENT -> SYN-RECEIVED + * SYN-SENT* -> SYN-RECEIVED* + * If there was no CC option, clear cached CC value. + */ + tp->t_flags |= TF_ACKNOW; + tp->t_timer[TCPT_REXMT] = 0; + if (to.to_flag & TOF_CC) { + if (taop->tao_cc != 0 && + CC_GT(to.to_cc, taop->tao_cc)) { + /* + * update cache and make transition: + * SYN-SENT -> ESTABLISHED* + * SYN-SENT* -> FIN-WAIT-1* + */ + taop->tao_cc = to.to_cc; + if (tp->t_flags & TF_NEEDFIN) { + tp->t_state = TCPS_FIN_WAIT_1; + tp->t_flags &= ~TF_NEEDFIN; + } else + tp->t_state = TCPS_ESTABLISHED; + tp->t_flags |= TF_NEEDSYN; + } else + tp->t_state = TCPS_SYN_RECEIVED; + } else { + /* CC.NEW or no option => invalidate cache */ + taop->tao_cc = 0; + tp->t_state = TCPS_SYN_RECEIVED; + } + } +#else } else tp->t_state = TCPS_SYN_RECEIVED; +#endif trimthenstep6: /* @@ -680,21 +918,72 @@ trimthenstep6: } tp->snd_wl1 = ti->ti_seq - 1; tp->rcv_up = ti->ti_seq; +#ifdef TTCP + /* + * Client side of transaction: already sent SYN and data. + * If the remote host used T/TCP to validate the SYN, + * our data will be ACK'd; if so, enter normal data segment + * processing in the middle of step 5, ack processing. + * Otherwise, goto step 6. + */ + if (tiflags & TH_ACK) + goto process_ACK; +#endif goto step6; +#ifdef TTCP + /* + * If the state is LAST_ACK or CLOSING or TIME_WAIT: + * if segment contains a SYN and CC [not CC.NEW] option: + * if state == TIME_WAIT and connection duration > MSL, + * drop packet and send RST; + * + * if SEG.CC > CCrecv then is new SYN, and can implicitly + * ack the FIN (and data) in retransmission queue. + * Complete close and delete TCPCB. Then reprocess + * segment, hoping to find new TCPCB in LISTEN state; + * + * else must be old SYN; drop it. + * else do normal processing. + */ + case TCPS_LAST_ACK: + case TCPS_CLOSING: + case TCPS_TIME_WAIT: + if ((tiflags & TH_SYN) && + (to.to_flag & TOF_CC) && tp->cc_recv != 0) { + if (tp->t_state == TCPS_TIME_WAIT && + tp->t_duration > TCPTV_MSL) + goto dropwithreset; + if (CC_GT(to.to_cc, tp->cc_recv)) { + tp = tcp_close(tp); + goto findpcb; + } + else + goto drop; + } + break; /* continue normal processing */ +#endif } /* * States other than LISTEN or SYN_SENT. * First check timestamp, if present. +#ifdef TTCP + * Then check the connection count, if present. +#endif * Then check that at least some bytes of segment are within * receive window. If segment begins before rcv_nxt, * drop leading data (and SYN); if nothing left, just ack. - * + * * RFC 1323 PAWS: If we have a timestamp reply on this segment * and it's less than ts_recent, drop it. */ +#ifdef TTCP + if ((to.to_flag & TOF_TS) != 0 && (tiflags & TH_RST) == 0 && + tp->ts_recent && TSTMP_LT(to.to_tsval, tp->ts_recent)) { +#else if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent && TSTMP_LT(ts_val, tp->ts_recent)) { +#endif /* Check to see if ts_recent is over 24 days old. */ if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) { @@ -718,6 +1007,19 @@ trimthenstep6: } } +#ifdef TTCP + /* + * T/TCP mechanism + * If T/TCP was negotiated and the segment doesn't have CC, + * or if it's CC is wrong then drop the segment. + * RST segments do not have to comply with this. + */ + if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) && + ((to.to_flag & TOF_CC) == 0 || tp->cc_recv != to.to_cc) && + (tiflags & TH_RST) == 0) + goto dropafterack; +#endif + todrop = tp->rcv_nxt - ti->ti_seq; if (todrop > 0) { if (tiflags & TH_SYN) { @@ -829,12 +1131,19 @@ trimthenstep6: /* * If last ACK falls within this segment's sequence numbers, * record its timestamp. + * NOTE that the test is modified according to the latest + * proposal of the tcplw@cray.com list (Braden 1993/04/26). */ - if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) && - SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len + - ((tiflags & (TH_SYN|TH_FIN)) != 0))) { +#ifdef TTCP + if ((to.to_flag & TOF_TS) != 0 && + SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { + tp->ts_recent_age = tcp_now; + tp->ts_recent = to.to_tsval; +#else + if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_now; tp->ts_recent = ts_val; +#endif } /* @@ -880,11 +1189,26 @@ trimthenstep6: goto dropwithreset; } +#ifdef TTCP + /* + * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN + * flag is on (half-synchronized state), then queue data for + * later processing; else drop segment and return. + */ + if ((tiflags & TH_ACK) == 0) { + if (tp->t_state == TCPS_SYN_RECEIVED || + (tp->t_flags & TF_NEEDSYN)) + goto step6; + else + goto drop; + } +#else /* * If the ACK bit is off we drop the segment and return. */ if ((tiflags & TH_ACK) == 0) goto drop; +#endif /* * Ack processing. @@ -900,16 +1224,46 @@ trimthenstep6: if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) goto dropwithreset; + tcpstat.tcps_connects++; soisconnected(so); - tp->t_state = TCPS_ESTABLISHED; /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; } +#ifdef TTCP + /* + * Upon successful completion of 3-way handshake, + * update cache.CC if it was undefined, pass any queued + * data to the user, and advance state appropriately. + */ + if ((taop = tcp_gettaocache(inp)) != NULL && + taop->tao_cc == 0) + taop->tao_cc = tp->cc_recv; + + /* + * Make transitions: + * SYN-RECEIVED -> ESTABLISHED + * SYN-RECEIVED* -> FIN-WAIT-1 + */ + if (tp->t_flags & TF_NEEDFIN) { + tp->t_state = TCPS_FIN_WAIT_1; + tp->t_flags &= ~TF_NEEDFIN; + } else + tp->t_state = TCPS_ESTABLISHED; + /* + * If segment contains data or ACK, will call tcp_reass() + * later; if not, do so now to pass queued data to user. + */ + if (ti->ti_len == 0 && (tiflags & TH_FIN) == 0) + (void) tcp_reass(tp, (struct tcpiphdr *)0, + (struct mbuf *)0); +#else /* TTCP */ + tp->t_state = TCPS_ESTABLISHED; (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); +#endif /* TTCP */ tp->snd_wl1 = ti->ti_seq - 1; /* fall into ... */ @@ -999,6 +1353,24 @@ trimthenstep6: tcpstat.tcps_rcvacktoomuch++; goto dropafterack; } +#ifdef TTCP + /* + * If we reach this point, ACK is not a duplicate, + * i.e., it ACKs something we sent. + */ + if (tp->t_flags & TF_NEEDSYN) { + /* + * T/TCP: Connection was half-synchronized, and our + * SYN has been ACK'd (so connection is now fully + * synchronized). Go to non-starred state and + * increment snd_una for ACK of SYN. + */ + tp->t_flags &= ~TF_NEEDSYN; + tp->snd_una++; + } + +process_ACK: +#endif acked = ti->ti_ack - tp->snd_una; tcpstat.tcps_rcvackpack++; tcpstat.tcps_rcvackbyte += acked; @@ -1012,8 +1384,13 @@ trimthenstep6: * timer backoff (cf., Phil Karn's retransmit alg.). * Recompute the initial retransmit timer. */ +#ifdef TTCP + if (to.to_flag & TOF_TS) + tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1); +#else if (ts_present) tcp_xmit_timer(tp, tcp_now-ts_ecr+1); +#endif else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) tcp_xmit_timer(tp,tp->t_rtt); @@ -1028,6 +1405,16 @@ trimthenstep6: needoutput = 1; } else if (tp->t_timer[TCPT_PERSIST] == 0) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + +#ifdef TTCP + /* + * If no data (only SYN) was ACK'd, + * skip rest of ACK processing. + */ + if (acked == 0) + goto step6; +#endif + /* * When new data is acked, open the congestion window. * If the window gives us less than ssthresh packets @@ -1092,7 +1479,15 @@ trimthenstep6: if (ourfinisacked) { tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); - tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; +#ifdef TTCP + /* Shorten TIME_WAIT [RFC-1644, p.28] */ + if (tp->cc_recv != 0 && + tp->t_duration < TCPTV_MSL) + tp->t_timer[TCPT_2MSL] = + tp->t_rxtcur * TCPTV_TWTRUNC; + else +#endif + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; soisdisconnected(so); } break; @@ -1232,7 +1627,19 @@ dodata: /* XXX */ if (tiflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { socantrcvmore(so); - tp->t_flags |= TF_ACKNOW; +#ifdef TTCP + /* + * If connection is half-synchronized + * (ie SEND_SYN flag on) then delay ACK, + * so it may be piggybacked when SYN is sent. + * Otherwise, since we received a FIN then no + * more input can be expected, send ACK now. + */ + if (tp->t_flags & TF_NEEDSYN) + tp->t_flags |= TF_DELACK; + else +#endif /* TTCP */ + tp->t_flags |= TF_ACKNOW; tp->rcv_nxt++; } switch (tp->t_state) { @@ -1262,7 +1669,18 @@ dodata: /* XXX */ case TCPS_FIN_WAIT_2: tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); - tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; +#ifdef TTCP + /* Shorten TIME_WAIT [RFC-1644, p.28] */ + if (tp->cc_recv != 0 && + tp->t_duration < TCPTV_MSL) { + tp->t_timer[TCPT_2MSL] = + tp->t_rxtcur * TCPTV_TWTRUNC; + /* For transaction client, force ACK now. */ + tp->t_flags |= TF_ACKNOW; + } + else +#endif + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; soisdisconnected(so); break; @@ -1280,14 +1698,6 @@ dodata: /* XXX */ #endif /* - * If this is a small packet, then ACK now - with Nagel - * congestion avoidance sender won't send more until - * he gets an ACK. - */ - if (ti->ti_len && ((unsigned)ti->ti_len < tp->t_maxseg)) - tp->t_flags |= TF_ACKNOW; - - /* * Return any desired output. */ if (needoutput || (tp->t_flags & TF_ACKNOW)) @@ -1301,6 +1711,12 @@ dropafterack: */ if (tiflags & TH_RST) goto drop; +#ifdef TTCP +#ifdef TCPDEBUG + if (so->so_options & SO_DEBUG) + tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); +#endif +#endif m_freem(m); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); @@ -1315,6 +1731,12 @@ dropwithreset: if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) || IN_MULTICAST(ntohl(ti->ti_dst.s_addr))) goto drop; +#ifdef TTCP +#ifdef TCPDEBUG + if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); +#endif +#endif if (tiflags & TH_ACK) tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); else { @@ -1333,9 +1755,14 @@ drop: * Drop space held by incoming segment and return. */ #ifdef TCPDEBUG +#ifdef TTCP + if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); +#else if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); #endif +#endif m_freem(m); /* destroy temporarily created socket */ if (dropsocket) @@ -1345,15 +1772,23 @@ drop: } void +#ifdef TTCP +tcp_dooptions(tp, cp, cnt, ti, to) +#else tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr) +#endif struct tcpcb *tp; u_char *cp; int cnt; struct tcpiphdr *ti; +#ifdef TTCP + struct tcpopt *to; +#else int *ts_present; u_long *ts_val, *ts_ecr; +#endif { - u_short mss; + u_short mss = 0; int opt, optlen; for (; cnt > 0; cnt -= optlen, cp += optlen) { @@ -1379,7 +1814,6 @@ tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr) continue; bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); NTOHS(mss); - (void) tcp_mss(tp, mss); /* sets t_maxseg */ break; case TCPOPT_WINDOW: @@ -1394,11 +1828,21 @@ tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr) case TCPOPT_TIMESTAMP: if (optlen != TCPOLEN_TIMESTAMP) continue; +#ifdef TTCP + to->to_flag |= TOF_TS; + bcopy((char *)cp + 2, + (char *)&to->to_tsval, sizeof(to->to_tsval)); + NTOHL(to->to_tsval); + bcopy((char *)cp + 6, + (char *)&to->to_tsecr, sizeof(to->to_tsecr)); + NTOHL(to->to_tsecr); +#else *ts_present = 1; bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val)); NTOHL(*ts_val); bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr)); NTOHL(*ts_ecr); +#endif /* * A timestamp received in a SYN makes @@ -1406,12 +1850,59 @@ tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr) */ if (ti->ti_flags & TH_SYN) { tp->t_flags |= TF_RCVD_TSTMP; +#ifdef TTCP + tp->ts_recent = to->to_tsval; +#else tp->ts_recent = *ts_val; +#endif tp->ts_recent_age = tcp_now; } break; +#ifdef TTCP + case TCPOPT_CC: + if (optlen != TCPOLEN_CC) + continue; + to->to_flag |= TCPOPT_CC; + bcopy((char *)cp + 2, + (char *)&to->to_cc, sizeof(to->to_cc)); + NTOHL(to->to_cc); + /* + * A CC or CC.new option received in a SYN makes + * it ok to send CC in subsequent segments. + */ + if (ti->ti_flags & TH_SYN) + tp->t_flags |= TF_RCVD_CC; + break; + case TCPOPT_CCNEW: + if (optlen != TCPOLEN_CC) + continue; + if (!(ti->ti_flags & TH_SYN)) + continue; + to->to_flag |= TOF_CCNEW; + bcopy((char *)cp + 2, + (char *)&to->to_cc, sizeof(to->to_cc)); + NTOHL(to->to_cc); + /* + * A CC or CC.new option received in a SYN makes + * it ok to send CC in subsequent segments. + */ + tp->t_flags |= TF_RCVD_CC; + break; + case TCPOPT_CCECHO: + if (optlen != TCPOLEN_CC) + continue; + if (!(ti->ti_flags & TH_SYN)) + continue; + to->to_flag |= TOF_CCECHO; + bcopy((char *)cp + 2, + (char *)&to->to_ccecho, sizeof(to->to_ccecho)); + NTOHL(to->to_ccecho); + break; +#endif /* TTCP*/ } } + if (ti->ti_flags & TH_SYN) + tcp_mss(tp, mss); /* sets t_maxseg */ } /* @@ -1535,39 +2026,76 @@ tcp_xmit_timer(tp, rtt) * window to be a single segment if the destination isn't local. * While looking at the routing entry, we also initialize other path-dependent * parameters from pre-set or cached values in the routing entry. + * + * Also take into account the space needed for options that we + * send regularly. Make maxseg shorter by that amount to assure + * that we can send maxseg amount of data even when the options + * are present. Store the upper limit of the length of options plus + * data in maxopd. + * + * NOTE that this routine is only called when we process an incoming + * segment, for outgoing segments only tcp_mssopt is called. + * +#ifdef TTCP + * In case of T/TCP, we call this routine during implicit connection + * setup as well (offer = -1), to initialize maxseg from the cached + * MSS of our peer. +#endif */ -int +void tcp_mss(tp, offer) - register struct tcpcb *tp; - u_int offer; + struct tcpcb *tp; + int offer; { - struct route *ro; register struct rtentry *rt; struct ifnet *ifp; register int rtt, mss; u_long bufsize; struct inpcb *inp; struct socket *so; +#ifdef TTCP + struct rmxp_tao *taop; + int origoffer = offer; + extern int tcp_do_rfc1644; +#endif extern int tcp_mssdflt; + extern int tcp_do_rfc1323; inp = tp->t_inpcb; - ro = &inp->inp_route; - - if ((rt = ro->ro_rt) == (struct rtentry *)0) { - /* No route yet, so try to acquire one */ - if (inp->inp_faddr.s_addr != INADDR_ANY) { - ro->ro_dst.sa_family = AF_INET; - ro->ro_dst.sa_len = sizeof(ro->ro_dst); - ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = - inp->inp_faddr; - rtalloc(ro); - } - if ((rt = ro->ro_rt) == (struct rtentry *)0) - return (tcp_mssdflt); + if ((rt = tcp_rtlookup(inp)) == NULL) { + tp->t_maxopd = tp->t_maxseg = tcp_mssdflt; + return; } ifp = rt->rt_ifp; so = inp->inp_socket; +#ifdef TTCP + taop = rmx_taop(rt->rt_rmx); + /* + * Offer == -1 means that we didn't receive SYN yet, + * use cached value in that case; + */ + if (offer == -1) + offer = taop->tao_mssopt; +#endif /* TTCP */ + /* + * Offer == 0 means that there was no MSS on the SYN segment, + * in this case we use tcp_mssdflt. + */ + if (offer == 0) + offer = tcp_mssdflt; + else + /* + * Sanity check: make sure that maxopd will be large + * enough to allow some data on segments even is the + * all the option space is used (40bytes). Otherwise + * funny things may happen in tcp_output. + */ + offer = max(offer, 64); +#ifdef TTCP + taop->tao_mssopt = offer; +#endif /* TTCP */ + #ifdef RTV_MTU /* if route characteristics exist ... */ /* * While we're here, check if there's an initial rtt @@ -1576,7 +2104,7 @@ tcp_mss(tp, offer) */ if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { /* - * XXX the lock bit for MTU indicates that the value + * XXX the lock bit for RTT indicates that the value * is also a minimum value; this is subject to time. */ if (rt->rt_rmx.rmx_locks & RTV_RTT) @@ -1602,6 +2130,44 @@ tcp_mss(tp, offer) #endif /* RTV_MTU */ { mss = ifp->if_mtu - sizeof(struct tcpiphdr); + if (!in_localaddr(inp->inp_faddr)) + mss = min(mss, tcp_mssdflt); + } + mss = min(mss, offer); + /* + * maxopd stores the maximum length of data AND options + * in a segment; maxseg is the amount of data in a normal + * segment. We need to store this value (maxopd) apart + * from maxseg, because now every segment carries options + * and thus we normally have somewhat less data in segments. + */ + tp->t_maxopd = mss; + +#ifdef TTCP + /* + * In case of T/TCP, origoffer==-1 indicates, that no segments + * were received yet. In this case we just guess, otherwise + * we do the same as before T/TCP. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && + (origoffer == -1 || + (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) + mss -= TCPOLEN_TSTAMP_APPA; + if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && + (origoffer == -1 || + (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)) + mss -= TCPOLEN_CC_APPA; +#else /* TTCP */ + /* + * Adjust mss to leave space for the usual options. We're + * called from the end of tcp_dooptions so we can use the + * REQ/RCVD flags to see if options will be used. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == + (TF_REQ_TSTMP|TF_RCVD_TSTMP)) + mss -= TCPOLEN_TSTAMP_APPA; +#endif /* TTCP */ + #if (MCLBYTES & (MCLBYTES - 1)) == 0 if (mss > MCLBYTES) mss &= ~(MCLBYTES-1); @@ -1609,53 +2175,43 @@ tcp_mss(tp, offer) if (mss > MCLBYTES) mss = mss / MCLBYTES * MCLBYTES; #endif - if (!in_localaddr(inp->inp_faddr)) - mss = min(mss, tcp_mssdflt); - } /* - * The current mss, t_maxseg, is initialized to the default value. - * If we compute a smaller value, reduce the current mss. - * If we compute a larger value, return it for use in sending - * a max seg size option, but don't store it for use - * unless we received an offer at least that large from peer. - * However, do not accept offers under 32 bytes. + * If there's a pipesize, change the socket buffer + * to that size. Make the socket buffers an integral + * number of mss units; if the mss is larger than + * the socket buffer, decrease the mss. */ - if (offer) - mss = min(mss, offer); - mss = max(mss, 32); /* sanity */ - if (mss < tp->t_maxseg || offer != 0) { - /* - * If there's a pipesize, change the socket buffer - * to that size. Make the socket buffers an integral - * number of mss units; if the mss is larger than - * the socket buffer, decrease the mss. - */ #ifdef RTV_SPIPE - if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) + if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) #endif - bufsize = so->so_snd.sb_hiwat; - if (bufsize < mss) - mss = bufsize; - else { - bufsize = roundup(bufsize, mss); - if (bufsize > sb_max) - bufsize = sb_max; - (void)sbreserve(&so->so_snd, bufsize); - } - tp->t_maxseg = mss; + bufsize = so->so_snd.sb_hiwat; + if (bufsize < mss) + mss = bufsize; + else { + bufsize = roundup(bufsize, mss); + if (bufsize > sb_max) + bufsize = sb_max; + (void)sbreserve(&so->so_snd, bufsize); + } + tp->t_maxseg = mss; #ifdef RTV_RPIPE - if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) + if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) #endif - bufsize = so->so_rcv.sb_hiwat; - if (bufsize > mss) { - bufsize = roundup(bufsize, mss); - if (bufsize > sb_max) - bufsize = sb_max; - (void)sbreserve(&so->so_rcv, bufsize); - } + bufsize = so->so_rcv.sb_hiwat; + if (bufsize > mss) { + bufsize = roundup(bufsize, mss); + if (bufsize > sb_max) + bufsize = sb_max; + (void)sbreserve(&so->so_rcv, bufsize); } - tp->snd_cwnd = mss; +#ifdef TTCP + /* + * Don't force slow-start on local network. + */ + if (!in_localaddr(inp->inp_faddr)) +#endif /* TTCP */ + tp->snd_cwnd = mss; #ifdef RTV_SSTHRESH if (rt->rt_rmx.rmx_ssthresh) { @@ -1667,7 +2223,29 @@ tcp_mss(tp, offer) */ tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); } -#endif /* RTV_MTU */ - return (mss); +#endif +} + +/* + * Determine the MSS option to send on an outgoing SYN. + */ +int +tcp_mssopt(tp) + struct tcpcb *tp; +{ + struct rtentry *rt; + extern int tcp_mssdflt; + + rt = tcp_rtlookup(tp->t_inpcb); + if (rt == NULL) + return tcp_mssdflt; + + /* + * if there's an mtu associated with the route, use it + */ + if (rt->rt_rmx.rmx_mtu) + return rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); + + return rt->rt_ifp->if_mtu - sizeof(struct tcpiphdr); } #endif /* TUBA_INCLUDE */ diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 161baed..3af7caa 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 - * $Id: tcp_output.c,v 1.5 1995/01/24 08:03:22 davidg Exp $ + * $Id: tcp_output.c,v 1.6 1995/01/26 03:56:20 davidg Exp $ */ #include <sys/param.h> @@ -66,8 +66,6 @@ extern struct mbuf *m_copypack(); #endif -#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ - /* * Tcp output routine: figure out what should be sent and send it. */ @@ -80,9 +78,13 @@ tcp_output(tp) int off, flags, error; register struct mbuf *m; register struct tcpiphdr *ti; - u_char opt[MAX_TCPOPTLEN]; + u_char opt[TCP_MAXOLEN]; unsigned optlen, hdrlen; int idle, sendalot; +#ifdef TTCP + struct rmxp_tao *taop; + struct rmxp_tao tao_noncached; +#endif /* * Determine length of data that should be transmitted, @@ -104,6 +106,17 @@ again: win = min(tp->snd_wnd, tp->snd_cwnd); flags = tcp_outflags[tp->t_state]; +#ifdef TTCP + /* + * Get standard flags, and add SYN or FIN if requested by 'hidden' + * state flags. + */ + if (tp->t_flags & TF_NEEDFIN) + flags |= TH_FIN; + if (tp->t_flags & TF_NEEDSYN) + flags |= TH_SYN; +#endif /* TTCP */ + /* * If in persist timeout with window of 0, send 1 byte. * Otherwise, if window is small but nonzero @@ -139,6 +152,26 @@ again: len = min(so->so_snd.sb_cc, win) - off; +#ifdef TTCP + if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { + taop = &tao_noncached; + bzero(taop, sizeof(*taop)); + } + + /* + * Lop off SYN bit if it has already been sent. However, if this + * is SYN-SENT state and if segment contains data and if we don't + * know that foreign host supports TAO, suppress sending segment. + */ + if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { + flags &= ~TH_SYN; + off--, len++; + if (len > 0 && tp->t_state == TCPS_SYN_SENT && + taop->tao_ccsent == 0) + return 0; + } +#endif /* TTCP */ + if (len < 0) { /* * If FIN has been sent but not acked, @@ -179,11 +212,18 @@ again: if (len == tp->t_maxseg) goto send; if ((idle || tp->t_flags & TF_NODELAY) && +#ifdef TTCP + (tp->t_flags & TF_NOPUSH) == 0 && +#endif len + off >= so->so_snd.sb_cc) goto send; if (tp->t_force) goto send; +#ifdef TTCP + if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) +#else if (len >= tp->max_sndwnd / 2) +#endif goto send; if (SEQ_LT(tp->snd_nxt, tp->snd_max)) goto send; @@ -216,7 +256,12 @@ again: */ if (tp->t_flags & TF_ACKNOW) goto send; +#ifdef TTCP + if ((flags & TH_RST) || + ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) +#else if (flags & (TH_SYN|TH_RST)) +#endif goto send; if (SEQ_GT(tp->snd_up, tp->snd_una)) goto send; @@ -279,10 +324,10 @@ send: u_short mss; opt[0] = TCPOPT_MAXSEG; - opt[1] = 4; - mss = htons((u_short) tcp_mss(tp, 0)); + opt[1] = TCPOLEN_MAXSEG; + mss = htons((u_short) tcp_mssopt(tp)); bcopy((caddr_t)&mss, (caddr_t)(opt + 2), sizeof(mss)); - optlen = 4; + optlen = TCPOLEN_MAXSEG; if ((tp->t_flags & TF_REQ_SCALE) && ((flags & TH_ACK) == 0 || @@ -303,8 +348,12 @@ send: * and our peer have sent timestamps in our SYN's. */ if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && - (flags & TH_RST) == 0 && + (flags & TH_RST) == 0 && +#ifdef TTCP + ((flags & TH_ACK) == 0 || +#else ((flags & (TH_SYN|TH_ACK)) == TH_SYN || +#endif (tp->t_flags & TF_RCVD_TSTMP))) { u_long *lp = (u_long *)(opt + optlen); @@ -315,27 +364,108 @@ send: optlen += TCPOLEN_TSTAMP_APPA; } +#ifdef TTCP + /* + * Send `CC-family' options if our side wants to use them (TF_REQ_CC), + * options are allowed (!TF_NOOPT) and it's not a RST. + */ + if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && + (flags & TH_RST) == 0) { + switch (flags & (TH_SYN|TH_ACK)) { + /* + * This is a normal ACK, send CC if we received CC before + * from our peer. + */ + case TH_ACK: + if (!(tp->t_flags & TF_RCVD_CC)) + break; + /*FALLTHROUGH*/ + + /* + * We can only get here in T/TCP's SYN_SENT* state, when + * we're a sending a non-SYN segment without waiting for + * the ACK of our SYN. A check above assures that we only + * do this if our peer understands T/TCP. + */ + case 0: + opt[optlen++] = TCPOPT_NOP; + opt[optlen++] = TCPOPT_NOP; + opt[optlen++] = TCPOPT_CC; + opt[optlen++] = TCPOLEN_CC; + *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send); + + optlen += 4; + break; + + /* + * This is our initial SYN, check whether we have to use + * CC or CC.new. + */ + case TH_SYN: + opt[optlen++] = TCPOPT_NOP; + opt[optlen++] = TCPOPT_NOP; + + if (taop->tao_ccsent != 0 && + CC_GEQ(tp->cc_send, taop->tao_ccsent)) { + opt[optlen++] = TCPOPT_CC; + taop->tao_ccsent = tp->cc_send; + } else { + opt[optlen++] = TCPOPT_CCNEW; + taop->tao_ccsent = 0; + } + opt[optlen++] = TCPOLEN_CC; + *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send); + optlen += 4; + break; + + /* + * This is a SYN,ACK; send CC and CC.echo if we received + * CC from our peer. + */ + case (TH_SYN|TH_ACK): + if (tp->t_flags & TF_RCVD_CC) { + opt[optlen++] = TCPOPT_NOP; + opt[optlen++] = TCPOPT_NOP; + opt[optlen++] = TCPOPT_CC; + opt[optlen++] = TCPOLEN_CC; + *(u_int32_t *)&opt[optlen] = + htonl(tp->cc_send); + optlen += 4; + opt[optlen++] = TCPOPT_NOP; + opt[optlen++] = TCPOPT_NOP; + opt[optlen++] = TCPOPT_CCECHO; + opt[optlen++] = TCPOLEN_CC; + *(u_int32_t *)&opt[optlen] = + htonl(tp->cc_recv); + optlen += 4; + } + break; + } + } +#endif /* TTCP */ + hdrlen += optlen; /* * Adjust data length if insertion of options will - * bump the packet length beyond the t_maxseg length. + * bump the packet length beyond the t_maxopd length. + * Clear the FIN bit because we cut off the tail of + * the segment. */ - if (len > tp->t_maxseg - optlen) { + if (len + optlen > tp->t_maxopd) { /* * If there is still more to send, don't close the connection. */ flags &= ~TH_FIN; - len = tp->t_maxseg - optlen; + len = tp->t_maxopd - optlen; sendalot = 1; - } - + } -#ifdef DIAGNOSTIC +/*#ifdef DIAGNOSTIC*/ if (max_linkhdr + hdrlen > MHLEN) panic("tcphdr too big"); -#endif +/*#endif*/ /* * Grab a header mbuf, attaching a copy of data to diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index 961e02a..6c48ee4 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 - * $Id: tcp_input.c,v 1.9 1994/10/02 17:48:43 phk Exp $ + * $Id: tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp $ */ #ifndef TUBA_INCLUDE @@ -67,12 +67,6 @@ int tcprexmtthresh = 3; struct inpcb *tcp_last_inpcb = &tcb; #endif /* TUBA_INCLUDE */ -#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) - -/* for modulo comparisons of timestamps */ -#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) -#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) - /* * Insert segment ti into reassembly queue of tcp with @@ -142,7 +136,17 @@ tcp_reass(tp, ti, m) tcpstat.tcps_rcvduppack++; tcpstat.tcps_rcvdupbyte += ti->ti_len; m_freem(m); +#ifdef TTCP + /* + * Try to present any queued data + * at the left window edge to the user. + * This is needed after the 3-WHS + * completes. + */ + goto present; /* ??? */ +#else return (0); +#endif } m_adj(m, i); ti->ti_len -= i; @@ -184,7 +188,7 @@ present: * Present data to user, advancing rcv_nxt through * completed sequence space. */ - if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) + if (!TCPS_HAVEESTABLISHED(tp->t_state)) return (0); ti = tp->seg_next; if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) @@ -227,12 +231,23 @@ tcp_input(m, iphlen) struct in_addr laddr; int dropsocket = 0; int iss = 0; +#ifdef TTCP + u_long tiwin; + struct tcpopt to; /* options in this segment */ + struct rmxp_tao *taop; /* pointer to our TAO cache entry */ + struct rmxp_tao tao_noncached; /* in case there's no cached entry */ +#else u_long tiwin, ts_val, ts_ecr; int ts_present = 0; +#endif #ifdef TCPDEBUG short ostate = 0; #endif +#ifdef TTCP + bzero((char *)&to, sizeof(to)); +#endif + tcpstat.tcps_rcvtotal++; /* * Get IP and TCP header together in first mbuf. @@ -298,9 +313,15 @@ tcp_input(m, iphlen) optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) && *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) && (ti->ti_flags & TH_SYN) == 0) { +#ifdef TTCP + to.to_flag |= TOF_TS; + to.to_tsval = ntohl(*(u_long *)(optp + 4)); + to.to_tsecr = ntohl(*(u_long *)(optp + 8)); +#else ts_present = 1; ts_val = ntohl(*(u_long *)(optp + 4)); ts_ecr = ntohl(*(u_long *)(optp + 8)); +#endif optp = NULL; /* we've parsed the options */ } } @@ -359,6 +380,9 @@ findpcb: } #endif if (so->so_options & SO_ACCEPTCONN) { +#ifdef TTCP + register struct tcpcb *tp0 = tp; +#endif so = sonewconn(so, 0); if (so == 0) goto drop; @@ -382,9 +406,11 @@ findpcb: #endif tp = intotcpcb(inp); tp->t_state = TCPS_LISTEN; +#ifdef TTCP + tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT); +#endif - /* Compute proper scaling value from buffer space - */ + /* Compute proper scaling value from buffer space */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat) tp->request_r_scale++; @@ -404,7 +430,11 @@ findpcb: */ if (optp && tp->t_state != TCPS_LISTEN) tcp_dooptions(tp, optp, optlen, ti, +#ifdef TTCP + &to); +#else &ts_present, &ts_val, &ts_ecr); +#endif /* * Header prediction: check for the two common cases @@ -419,22 +449,48 @@ findpcb: * receiver side. If we're getting packets in-order * (the reassembly queue is empty), add the data to * the socket buffer and note that we need a delayed ack. +#ifdef TTCP + * Make sure that the hidden state-flags are also off. + * Since we check for TCPS_ESTABLISHED above, it can only + * be TH_NEEDSYN. +#endif */ if (tp->t_state == TCPS_ESTABLISHED && (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && +#ifdef TTCP + ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && + ((to.to_flag & TOF_TS) == 0 || + TSTMP_GEQ(to.to_tsval, tp->ts_recent)) && + /* + * Using the CC option is compulsory if once started: + * the segment is OK if no T/TCP was negotiated or + * if the segment has a CC option equal to CCrecv + */ + ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) || + (to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv) && +#else (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) && +#endif ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && tp->snd_nxt == tp->snd_max) { /* * If last ACK falls within this segment's sequence numbers, - * record the timestamp. + * record the timestamp. + * NOTE that the test is modified according to the latest + * proposal of the tcplw@cray.com list (Braden 1993/04/26). */ - if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) && - SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) { +#ifdef TTCP + if ((to.to_flag & TOF_TS) != 0 && + SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { + tp->ts_recent_age = tcp_now; + tp->ts_recent = to.to_tsval; +#else + if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_now; tp->ts_recent = ts_val; +#endif } if (ti->ti_len == 0) { @@ -445,8 +501,14 @@ findpcb: * this is a pure ack for outstanding data. */ ++tcpstat.tcps_predack; +#ifdef TTCP + if ((to.to_flag & TOF_TS) != 0) + tcp_xmit_timer(tp, + tcp_now - to.to_tsecr + 1); +#else if (ts_present) tcp_xmit_timer(tp, tcp_now-ts_ecr+1); +#endif else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) tcp_xmit_timer(tp, tp->t_rtt); @@ -497,17 +559,7 @@ findpcb: m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); sbappend(&so->so_rcv, m); sorwakeup(so); - /* - * If this is a small packet, then ACK now - with Nagel - * congestion avoidance sender won't send more until - * he gets an ACK. - */ - if ((unsigned)ti->ti_len < tp->t_maxseg) { - tp->t_flags |= TF_ACKNOW; - tcp_output(tp); - } else { - tp->t_flags |= TF_DELACK; - } + tp->t_flags |= TF_DELACK; return; } } @@ -590,9 +642,19 @@ findpcb: dropsocket = 0; /* socket is already gone */ goto drop; } +#ifdef TTCP + if ((taop = tcp_gettaocache(inp)) == NULL) { + taop = &tao_noncached; + bzero(taop, sizeof(*taop)); + } +#endif /* TTCP */ if (optp) tcp_dooptions(tp, optp, optlen, ti, +#ifdef TTCP + &to); +#else &ts_present, &ts_val, &ts_ecr); +#endif if (iss) tp->iss = iss; else @@ -601,6 +663,66 @@ findpcb: tp->irs = ti->ti_seq; tcp_sendseqinit(tp); tcp_rcvseqinit(tp); +#ifdef TTCP + /* + * Initialization of the tcpcb for transaction; + * set SND.WND = SEG.WND, + * initialize CCsend and CCrecv. + */ + tp->snd_wnd = tiwin; /* initial send-window */ + tp->cc_send = CC_INC(tcp_ccgen); + tp->cc_recv = to.to_cc; + /* + * Perform TAO test on incoming CC (SEG.CC) option, if any. + * - compare SEG.CC against cached CC from the same host, + * if any. + * - if SEG.CC > chached value, SYN must be new and is accepted + * immediately: save new CC in the cache, mark the socket + * connected, enter ESTABLISHED state, turn on flag to + * send a SYN in the next segment. + * A virtual advertised window is set in rcv_adv to + * initialize SWS prevention. Then enter normal segment + * processing: drop SYN, process data and FIN. + * - otherwise do a normal 3-way handshake. + */ + if ((to.to_flag & TOF_CC) != 0) { + if (taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) { + taop->tao_cc = to.to_cc; + tp->t_state = TCPS_ESTABLISHED; + + /* + * If there is a FIN, or if there is data and the + * connection is local, then delay SYN,ACK(SYN) in + * the hope of piggy-backing it on a response + * segment. Otherwise must send ACK now in case + * the other side is slow starting. + */ + if ((tiflags & TH_FIN) || (ti->ti_len != 0 && + in_localaddr(inp->inp_faddr))) + tp->t_flags |= (TF_DELACK | TF_NEEDSYN); + else + tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); + tp->rcv_adv += tp->rcv_wnd; + tcpstat.tcps_connects++; + soisconnected(so); + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + dropsocket = 0; /* committed to socket */ + tcpstat.tcps_accepts++; + goto trimthenstep6; + } + /* else do standard 3-way handshake */ + } else { + /* + * No CC option, but maybe CC.NEW: + * invalidate cached value. + */ + taop->tao_cc = 0; + } + /* + * TAO test failed or there was no CC option, + * do a standard 3-way handshake. + */ +#endif /* TTCP */ tp->t_flags |= TF_ACKNOW; tp->t_state = TCPS_SYN_RECEIVED; tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; @@ -622,10 +744,34 @@ findpcb: * continue processing rest of data/controls, beginning with URG */ case TCPS_SYN_SENT: +#ifdef TTCP + if ((taop = tcp_gettaocache(inp)) == NULL) { + taop = &tao_noncached; + bzero(taop, sizeof(*taop)); + } + + if ((tiflags & TH_ACK) && + (SEQ_LEQ(ti->ti_ack, tp->iss) || + SEQ_GT(ti->ti_ack, tp->snd_max))) { + /* + * If we have a cached CCsent for the remote host, + * hence we haven't just crashed and restarted, + * do not send a RST. This may be a retransmission + * from the other side after our earlier ACK was lost. + * Our new SYN, when it arrives, will serve as the + * needed ACK. + */ + if (taop->tao_ccsent != 0) + goto drop; + else + goto dropwithreset; + } +#else if ((tiflags & TH_ACK) && (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) goto dropwithreset; +#endif if (tiflags & TH_RST) { if (tiflags & TH_ACK) tp = tcp_drop(tp, ECONNREFUSED); @@ -633,25 +779,77 @@ findpcb: } if ((tiflags & TH_SYN) == 0) goto drop; +#ifdef TTCP + tp->snd_wnd = ti->ti_win; /* initial send window */ + tp->cc_recv = to.to_cc; /* foreign CC */ +#else if (tiflags & TH_ACK) { tp->snd_una = ti->ti_ack; if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; } tp->t_timer[TCPT_REXMT] = 0; +#endif + tp->irs = ti->ti_seq; tcp_rcvseqinit(tp); +#ifndef TTCP tp->t_flags |= TF_ACKNOW; if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { +#else + if (tiflags & TH_ACK && SEQ_GT(ti->ti_ack, tp->iss)) { +#endif tcpstat.tcps_connects++; soisconnected(so); - tp->t_state = TCPS_ESTABLISHED; /* Do window scaling on this connection? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; } +#ifdef TTCP + /* + * Our SYN was acked. If segment contains CC.ECHO + * option, check it to make sure this segment really + * matches our SYN. If not, just drop it as old + * duplicate, but send an RST if we're still playing + * by the old rules. + */ + if ((to.to_flag & TOF_CCECHO) && + tp->cc_send != to.to_ccecho) { + if (taop->tao_ccsent != 0) + goto drop; + else + goto dropwithreset; + } + /* Segment is acceptable, update cache if undefined. */ + if (taop->tao_ccsent == 0) + taop->tao_ccsent = to.to_ccecho; + + tp->rcv_adv += tp->rcv_wnd; + tp->snd_una++; /* SYN is acked */ + /* + * If there's data, delay ACK; if there's also a FIN + * ACKNOW will be turned on later. + */ + if (ti->ti_len != 0) + tp->t_flags |= TF_DELACK; + else + tp->t_flags |= TF_ACKNOW; + /* + * Received <SYN,ACK> in SYN_SENT[*] state. + * Transitions: + * SYN_SENT --> ESTABLISHED + * SYN_SENT* --> FIN_WAIT_1 + */ + if (tp->t_flags & TF_NEEDFIN) { + tp->t_state = TCPS_FIN_WAIT_1; + tp->t_flags &= ~TF_NEEDFIN; + tiflags &= ~TH_SYN; + } else + tp->t_state = TCPS_ESTABLISHED; +#else + tp->t_state = TCPS_ESTABLISHED; (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); /* @@ -660,8 +858,48 @@ findpcb: */ if (tp->t_rtt) tcp_xmit_timer(tp, tp->t_rtt); +#endif + +#ifdef TTCP + } else { + /* + * Received initial SYN in SYN-SENT[*] state => simul- + * taneous open. If segment contains CC option and there is + * a cached CC, apply TAO test; if it succeeds, connection is + * half-synchronized. Otherwise, do 3-way handshake: + * SYN-SENT -> SYN-RECEIVED + * SYN-SENT* -> SYN-RECEIVED* + * If there was no CC option, clear cached CC value. + */ + tp->t_flags |= TF_ACKNOW; + tp->t_timer[TCPT_REXMT] = 0; + if (to.to_flag & TOF_CC) { + if (taop->tao_cc != 0 && + CC_GT(to.to_cc, taop->tao_cc)) { + /* + * update cache and make transition: + * SYN-SENT -> ESTABLISHED* + * SYN-SENT* -> FIN-WAIT-1* + */ + taop->tao_cc = to.to_cc; + if (tp->t_flags & TF_NEEDFIN) { + tp->t_state = TCPS_FIN_WAIT_1; + tp->t_flags &= ~TF_NEEDFIN; + } else + tp->t_state = TCPS_ESTABLISHED; + tp->t_flags |= TF_NEEDSYN; + } else + tp->t_state = TCPS_SYN_RECEIVED; + } else { + /* CC.NEW or no option => invalidate cache */ + taop->tao_cc = 0; + tp->t_state = TCPS_SYN_RECEIVED; + } + } +#else } else tp->t_state = TCPS_SYN_RECEIVED; +#endif trimthenstep6: /* @@ -680,21 +918,72 @@ trimthenstep6: } tp->snd_wl1 = ti->ti_seq - 1; tp->rcv_up = ti->ti_seq; +#ifdef TTCP + /* + * Client side of transaction: already sent SYN and data. + * If the remote host used T/TCP to validate the SYN, + * our data will be ACK'd; if so, enter normal data segment + * processing in the middle of step 5, ack processing. + * Otherwise, goto step 6. + */ + if (tiflags & TH_ACK) + goto process_ACK; +#endif goto step6; +#ifdef TTCP + /* + * If the state is LAST_ACK or CLOSING or TIME_WAIT: + * if segment contains a SYN and CC [not CC.NEW] option: + * if state == TIME_WAIT and connection duration > MSL, + * drop packet and send RST; + * + * if SEG.CC > CCrecv then is new SYN, and can implicitly + * ack the FIN (and data) in retransmission queue. + * Complete close and delete TCPCB. Then reprocess + * segment, hoping to find new TCPCB in LISTEN state; + * + * else must be old SYN; drop it. + * else do normal processing. + */ + case TCPS_LAST_ACK: + case TCPS_CLOSING: + case TCPS_TIME_WAIT: + if ((tiflags & TH_SYN) && + (to.to_flag & TOF_CC) && tp->cc_recv != 0) { + if (tp->t_state == TCPS_TIME_WAIT && + tp->t_duration > TCPTV_MSL) + goto dropwithreset; + if (CC_GT(to.to_cc, tp->cc_recv)) { + tp = tcp_close(tp); + goto findpcb; + } + else + goto drop; + } + break; /* continue normal processing */ +#endif } /* * States other than LISTEN or SYN_SENT. * First check timestamp, if present. +#ifdef TTCP + * Then check the connection count, if present. +#endif * Then check that at least some bytes of segment are within * receive window. If segment begins before rcv_nxt, * drop leading data (and SYN); if nothing left, just ack. - * + * * RFC 1323 PAWS: If we have a timestamp reply on this segment * and it's less than ts_recent, drop it. */ +#ifdef TTCP + if ((to.to_flag & TOF_TS) != 0 && (tiflags & TH_RST) == 0 && + tp->ts_recent && TSTMP_LT(to.to_tsval, tp->ts_recent)) { +#else if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent && TSTMP_LT(ts_val, tp->ts_recent)) { +#endif /* Check to see if ts_recent is over 24 days old. */ if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) { @@ -718,6 +1007,19 @@ trimthenstep6: } } +#ifdef TTCP + /* + * T/TCP mechanism + * If T/TCP was negotiated and the segment doesn't have CC, + * or if it's CC is wrong then drop the segment. + * RST segments do not have to comply with this. + */ + if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) && + ((to.to_flag & TOF_CC) == 0 || tp->cc_recv != to.to_cc) && + (tiflags & TH_RST) == 0) + goto dropafterack; +#endif + todrop = tp->rcv_nxt - ti->ti_seq; if (todrop > 0) { if (tiflags & TH_SYN) { @@ -829,12 +1131,19 @@ trimthenstep6: /* * If last ACK falls within this segment's sequence numbers, * record its timestamp. + * NOTE that the test is modified according to the latest + * proposal of the tcplw@cray.com list (Braden 1993/04/26). */ - if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) && - SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len + - ((tiflags & (TH_SYN|TH_FIN)) != 0))) { +#ifdef TTCP + if ((to.to_flag & TOF_TS) != 0 && + SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { + tp->ts_recent_age = tcp_now; + tp->ts_recent = to.to_tsval; +#else + if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_now; tp->ts_recent = ts_val; +#endif } /* @@ -880,11 +1189,26 @@ trimthenstep6: goto dropwithreset; } +#ifdef TTCP + /* + * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN + * flag is on (half-synchronized state), then queue data for + * later processing; else drop segment and return. + */ + if ((tiflags & TH_ACK) == 0) { + if (tp->t_state == TCPS_SYN_RECEIVED || + (tp->t_flags & TF_NEEDSYN)) + goto step6; + else + goto drop; + } +#else /* * If the ACK bit is off we drop the segment and return. */ if ((tiflags & TH_ACK) == 0) goto drop; +#endif /* * Ack processing. @@ -900,16 +1224,46 @@ trimthenstep6: if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) goto dropwithreset; + tcpstat.tcps_connects++; soisconnected(so); - tp->t_state = TCPS_ESTABLISHED; /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; } +#ifdef TTCP + /* + * Upon successful completion of 3-way handshake, + * update cache.CC if it was undefined, pass any queued + * data to the user, and advance state appropriately. + */ + if ((taop = tcp_gettaocache(inp)) != NULL && + taop->tao_cc == 0) + taop->tao_cc = tp->cc_recv; + + /* + * Make transitions: + * SYN-RECEIVED -> ESTABLISHED + * SYN-RECEIVED* -> FIN-WAIT-1 + */ + if (tp->t_flags & TF_NEEDFIN) { + tp->t_state = TCPS_FIN_WAIT_1; + tp->t_flags &= ~TF_NEEDFIN; + } else + tp->t_state = TCPS_ESTABLISHED; + /* + * If segment contains data or ACK, will call tcp_reass() + * later; if not, do so now to pass queued data to user. + */ + if (ti->ti_len == 0 && (tiflags & TH_FIN) == 0) + (void) tcp_reass(tp, (struct tcpiphdr *)0, + (struct mbuf *)0); +#else /* TTCP */ + tp->t_state = TCPS_ESTABLISHED; (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); +#endif /* TTCP */ tp->snd_wl1 = ti->ti_seq - 1; /* fall into ... */ @@ -999,6 +1353,24 @@ trimthenstep6: tcpstat.tcps_rcvacktoomuch++; goto dropafterack; } +#ifdef TTCP + /* + * If we reach this point, ACK is not a duplicate, + * i.e., it ACKs something we sent. + */ + if (tp->t_flags & TF_NEEDSYN) { + /* + * T/TCP: Connection was half-synchronized, and our + * SYN has been ACK'd (so connection is now fully + * synchronized). Go to non-starred state and + * increment snd_una for ACK of SYN. + */ + tp->t_flags &= ~TF_NEEDSYN; + tp->snd_una++; + } + +process_ACK: +#endif acked = ti->ti_ack - tp->snd_una; tcpstat.tcps_rcvackpack++; tcpstat.tcps_rcvackbyte += acked; @@ -1012,8 +1384,13 @@ trimthenstep6: * timer backoff (cf., Phil Karn's retransmit alg.). * Recompute the initial retransmit timer. */ +#ifdef TTCP + if (to.to_flag & TOF_TS) + tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1); +#else if (ts_present) tcp_xmit_timer(tp, tcp_now-ts_ecr+1); +#endif else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) tcp_xmit_timer(tp,tp->t_rtt); @@ -1028,6 +1405,16 @@ trimthenstep6: needoutput = 1; } else if (tp->t_timer[TCPT_PERSIST] == 0) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + +#ifdef TTCP + /* + * If no data (only SYN) was ACK'd, + * skip rest of ACK processing. + */ + if (acked == 0) + goto step6; +#endif + /* * When new data is acked, open the congestion window. * If the window gives us less than ssthresh packets @@ -1092,7 +1479,15 @@ trimthenstep6: if (ourfinisacked) { tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); - tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; +#ifdef TTCP + /* Shorten TIME_WAIT [RFC-1644, p.28] */ + if (tp->cc_recv != 0 && + tp->t_duration < TCPTV_MSL) + tp->t_timer[TCPT_2MSL] = + tp->t_rxtcur * TCPTV_TWTRUNC; + else +#endif + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; soisdisconnected(so); } break; @@ -1232,7 +1627,19 @@ dodata: /* XXX */ if (tiflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { socantrcvmore(so); - tp->t_flags |= TF_ACKNOW; +#ifdef TTCP + /* + * If connection is half-synchronized + * (ie SEND_SYN flag on) then delay ACK, + * so it may be piggybacked when SYN is sent. + * Otherwise, since we received a FIN then no + * more input can be expected, send ACK now. + */ + if (tp->t_flags & TF_NEEDSYN) + tp->t_flags |= TF_DELACK; + else +#endif /* TTCP */ + tp->t_flags |= TF_ACKNOW; tp->rcv_nxt++; } switch (tp->t_state) { @@ -1262,7 +1669,18 @@ dodata: /* XXX */ case TCPS_FIN_WAIT_2: tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); - tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; +#ifdef TTCP + /* Shorten TIME_WAIT [RFC-1644, p.28] */ + if (tp->cc_recv != 0 && + tp->t_duration < TCPTV_MSL) { + tp->t_timer[TCPT_2MSL] = + tp->t_rxtcur * TCPTV_TWTRUNC; + /* For transaction client, force ACK now. */ + tp->t_flags |= TF_ACKNOW; + } + else +#endif + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; soisdisconnected(so); break; @@ -1280,14 +1698,6 @@ dodata: /* XXX */ #endif /* - * If this is a small packet, then ACK now - with Nagel - * congestion avoidance sender won't send more until - * he gets an ACK. - */ - if (ti->ti_len && ((unsigned)ti->ti_len < tp->t_maxseg)) - tp->t_flags |= TF_ACKNOW; - - /* * Return any desired output. */ if (needoutput || (tp->t_flags & TF_ACKNOW)) @@ -1301,6 +1711,12 @@ dropafterack: */ if (tiflags & TH_RST) goto drop; +#ifdef TTCP +#ifdef TCPDEBUG + if (so->so_options & SO_DEBUG) + tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); +#endif +#endif m_freem(m); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); @@ -1315,6 +1731,12 @@ dropwithreset: if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) || IN_MULTICAST(ntohl(ti->ti_dst.s_addr))) goto drop; +#ifdef TTCP +#ifdef TCPDEBUG + if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); +#endif +#endif if (tiflags & TH_ACK) tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); else { @@ -1333,9 +1755,14 @@ drop: * Drop space held by incoming segment and return. */ #ifdef TCPDEBUG +#ifdef TTCP + if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); +#else if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); #endif +#endif m_freem(m); /* destroy temporarily created socket */ if (dropsocket) @@ -1345,15 +1772,23 @@ drop: } void +#ifdef TTCP +tcp_dooptions(tp, cp, cnt, ti, to) +#else tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr) +#endif struct tcpcb *tp; u_char *cp; int cnt; struct tcpiphdr *ti; +#ifdef TTCP + struct tcpopt *to; +#else int *ts_present; u_long *ts_val, *ts_ecr; +#endif { - u_short mss; + u_short mss = 0; int opt, optlen; for (; cnt > 0; cnt -= optlen, cp += optlen) { @@ -1379,7 +1814,6 @@ tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr) continue; bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); NTOHS(mss); - (void) tcp_mss(tp, mss); /* sets t_maxseg */ break; case TCPOPT_WINDOW: @@ -1394,11 +1828,21 @@ tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr) case TCPOPT_TIMESTAMP: if (optlen != TCPOLEN_TIMESTAMP) continue; +#ifdef TTCP + to->to_flag |= TOF_TS; + bcopy((char *)cp + 2, + (char *)&to->to_tsval, sizeof(to->to_tsval)); + NTOHL(to->to_tsval); + bcopy((char *)cp + 6, + (char *)&to->to_tsecr, sizeof(to->to_tsecr)); + NTOHL(to->to_tsecr); +#else *ts_present = 1; bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val)); NTOHL(*ts_val); bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr)); NTOHL(*ts_ecr); +#endif /* * A timestamp received in a SYN makes @@ -1406,12 +1850,59 @@ tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr) */ if (ti->ti_flags & TH_SYN) { tp->t_flags |= TF_RCVD_TSTMP; +#ifdef TTCP + tp->ts_recent = to->to_tsval; +#else tp->ts_recent = *ts_val; +#endif tp->ts_recent_age = tcp_now; } break; +#ifdef TTCP + case TCPOPT_CC: + if (optlen != TCPOLEN_CC) + continue; + to->to_flag |= TCPOPT_CC; + bcopy((char *)cp + 2, + (char *)&to->to_cc, sizeof(to->to_cc)); + NTOHL(to->to_cc); + /* + * A CC or CC.new option received in a SYN makes + * it ok to send CC in subsequent segments. + */ + if (ti->ti_flags & TH_SYN) + tp->t_flags |= TF_RCVD_CC; + break; + case TCPOPT_CCNEW: + if (optlen != TCPOLEN_CC) + continue; + if (!(ti->ti_flags & TH_SYN)) + continue; + to->to_flag |= TOF_CCNEW; + bcopy((char *)cp + 2, + (char *)&to->to_cc, sizeof(to->to_cc)); + NTOHL(to->to_cc); + /* + * A CC or CC.new option received in a SYN makes + * it ok to send CC in subsequent segments. + */ + tp->t_flags |= TF_RCVD_CC; + break; + case TCPOPT_CCECHO: + if (optlen != TCPOLEN_CC) + continue; + if (!(ti->ti_flags & TH_SYN)) + continue; + to->to_flag |= TOF_CCECHO; + bcopy((char *)cp + 2, + (char *)&to->to_ccecho, sizeof(to->to_ccecho)); + NTOHL(to->to_ccecho); + break; +#endif /* TTCP*/ } } + if (ti->ti_flags & TH_SYN) + tcp_mss(tp, mss); /* sets t_maxseg */ } /* @@ -1535,39 +2026,76 @@ tcp_xmit_timer(tp, rtt) * window to be a single segment if the destination isn't local. * While looking at the routing entry, we also initialize other path-dependent * parameters from pre-set or cached values in the routing entry. + * + * Also take into account the space needed for options that we + * send regularly. Make maxseg shorter by that amount to assure + * that we can send maxseg amount of data even when the options + * are present. Store the upper limit of the length of options plus + * data in maxopd. + * + * NOTE that this routine is only called when we process an incoming + * segment, for outgoing segments only tcp_mssopt is called. + * +#ifdef TTCP + * In case of T/TCP, we call this routine during implicit connection + * setup as well (offer = -1), to initialize maxseg from the cached + * MSS of our peer. +#endif */ -int +void tcp_mss(tp, offer) - register struct tcpcb *tp; - u_int offer; + struct tcpcb *tp; + int offer; { - struct route *ro; register struct rtentry *rt; struct ifnet *ifp; register int rtt, mss; u_long bufsize; struct inpcb *inp; struct socket *so; +#ifdef TTCP + struct rmxp_tao *taop; + int origoffer = offer; + extern int tcp_do_rfc1644; +#endif extern int tcp_mssdflt; + extern int tcp_do_rfc1323; inp = tp->t_inpcb; - ro = &inp->inp_route; - - if ((rt = ro->ro_rt) == (struct rtentry *)0) { - /* No route yet, so try to acquire one */ - if (inp->inp_faddr.s_addr != INADDR_ANY) { - ro->ro_dst.sa_family = AF_INET; - ro->ro_dst.sa_len = sizeof(ro->ro_dst); - ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = - inp->inp_faddr; - rtalloc(ro); - } - if ((rt = ro->ro_rt) == (struct rtentry *)0) - return (tcp_mssdflt); + if ((rt = tcp_rtlookup(inp)) == NULL) { + tp->t_maxopd = tp->t_maxseg = tcp_mssdflt; + return; } ifp = rt->rt_ifp; so = inp->inp_socket; +#ifdef TTCP + taop = rmx_taop(rt->rt_rmx); + /* + * Offer == -1 means that we didn't receive SYN yet, + * use cached value in that case; + */ + if (offer == -1) + offer = taop->tao_mssopt; +#endif /* TTCP */ + /* + * Offer == 0 means that there was no MSS on the SYN segment, + * in this case we use tcp_mssdflt. + */ + if (offer == 0) + offer = tcp_mssdflt; + else + /* + * Sanity check: make sure that maxopd will be large + * enough to allow some data on segments even is the + * all the option space is used (40bytes). Otherwise + * funny things may happen in tcp_output. + */ + offer = max(offer, 64); +#ifdef TTCP + taop->tao_mssopt = offer; +#endif /* TTCP */ + #ifdef RTV_MTU /* if route characteristics exist ... */ /* * While we're here, check if there's an initial rtt @@ -1576,7 +2104,7 @@ tcp_mss(tp, offer) */ if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { /* - * XXX the lock bit for MTU indicates that the value + * XXX the lock bit for RTT indicates that the value * is also a minimum value; this is subject to time. */ if (rt->rt_rmx.rmx_locks & RTV_RTT) @@ -1602,6 +2130,44 @@ tcp_mss(tp, offer) #endif /* RTV_MTU */ { mss = ifp->if_mtu - sizeof(struct tcpiphdr); + if (!in_localaddr(inp->inp_faddr)) + mss = min(mss, tcp_mssdflt); + } + mss = min(mss, offer); + /* + * maxopd stores the maximum length of data AND options + * in a segment; maxseg is the amount of data in a normal + * segment. We need to store this value (maxopd) apart + * from maxseg, because now every segment carries options + * and thus we normally have somewhat less data in segments. + */ + tp->t_maxopd = mss; + +#ifdef TTCP + /* + * In case of T/TCP, origoffer==-1 indicates, that no segments + * were received yet. In this case we just guess, otherwise + * we do the same as before T/TCP. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && + (origoffer == -1 || + (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) + mss -= TCPOLEN_TSTAMP_APPA; + if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && + (origoffer == -1 || + (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)) + mss -= TCPOLEN_CC_APPA; +#else /* TTCP */ + /* + * Adjust mss to leave space for the usual options. We're + * called from the end of tcp_dooptions so we can use the + * REQ/RCVD flags to see if options will be used. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == + (TF_REQ_TSTMP|TF_RCVD_TSTMP)) + mss -= TCPOLEN_TSTAMP_APPA; +#endif /* TTCP */ + #if (MCLBYTES & (MCLBYTES - 1)) == 0 if (mss > MCLBYTES) mss &= ~(MCLBYTES-1); @@ -1609,53 +2175,43 @@ tcp_mss(tp, offer) if (mss > MCLBYTES) mss = mss / MCLBYTES * MCLBYTES; #endif - if (!in_localaddr(inp->inp_faddr)) - mss = min(mss, tcp_mssdflt); - } /* - * The current mss, t_maxseg, is initialized to the default value. - * If we compute a smaller value, reduce the current mss. - * If we compute a larger value, return it for use in sending - * a max seg size option, but don't store it for use - * unless we received an offer at least that large from peer. - * However, do not accept offers under 32 bytes. + * If there's a pipesize, change the socket buffer + * to that size. Make the socket buffers an integral + * number of mss units; if the mss is larger than + * the socket buffer, decrease the mss. */ - if (offer) - mss = min(mss, offer); - mss = max(mss, 32); /* sanity */ - if (mss < tp->t_maxseg || offer != 0) { - /* - * If there's a pipesize, change the socket buffer - * to that size. Make the socket buffers an integral - * number of mss units; if the mss is larger than - * the socket buffer, decrease the mss. - */ #ifdef RTV_SPIPE - if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) + if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) #endif - bufsize = so->so_snd.sb_hiwat; - if (bufsize < mss) - mss = bufsize; - else { - bufsize = roundup(bufsize, mss); - if (bufsize > sb_max) - bufsize = sb_max; - (void)sbreserve(&so->so_snd, bufsize); - } - tp->t_maxseg = mss; + bufsize = so->so_snd.sb_hiwat; + if (bufsize < mss) + mss = bufsize; + else { + bufsize = roundup(bufsize, mss); + if (bufsize > sb_max) + bufsize = sb_max; + (void)sbreserve(&so->so_snd, bufsize); + } + tp->t_maxseg = mss; #ifdef RTV_RPIPE - if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) + if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) #endif - bufsize = so->so_rcv.sb_hiwat; - if (bufsize > mss) { - bufsize = roundup(bufsize, mss); - if (bufsize > sb_max) - bufsize = sb_max; - (void)sbreserve(&so->so_rcv, bufsize); - } + bufsize = so->so_rcv.sb_hiwat; + if (bufsize > mss) { + bufsize = roundup(bufsize, mss); + if (bufsize > sb_max) + bufsize = sb_max; + (void)sbreserve(&so->so_rcv, bufsize); } - tp->snd_cwnd = mss; +#ifdef TTCP + /* + * Don't force slow-start on local network. + */ + if (!in_localaddr(inp->inp_faddr)) +#endif /* TTCP */ + tp->snd_cwnd = mss; #ifdef RTV_SSTHRESH if (rt->rt_rmx.rmx_ssthresh) { @@ -1667,7 +2223,29 @@ tcp_mss(tp, offer) */ tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); } -#endif /* RTV_MTU */ - return (mss); +#endif +} + +/* + * Determine the MSS option to send on an outgoing SYN. + */ +int +tcp_mssopt(tp) + struct tcpcb *tp; +{ + struct rtentry *rt; + extern int tcp_mssdflt; + + rt = tcp_rtlookup(tp->t_inpcb); + if (rt == NULL) + return tcp_mssdflt; + + /* + * if there's an mtu associated with the route, use it + */ + if (rt->rt_rmx.rmx_mtu) + return rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); + + return rt->rt_ifp->if_mtu - sizeof(struct tcpiphdr); } #endif /* TUBA_INCLUDE */ diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 10e83ab..d05052f 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 - * $Id: tcp_subr.c,v 1.4 1994/10/02 17:48:44 phk Exp $ + * $Id: tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp $ */ #include <sys/param.h> @@ -54,16 +54,24 @@ #include <netinet/ip_var.h> #include <netinet/ip_icmp.h> #include <netinet/tcp.h> +#define TCPOUTFLAGS #include <netinet/tcp_fsm.h> #include <netinet/tcp_seq.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #include <netinet/tcpip.h> +#ifdef TCPDEBUG +#include <netinet/tcp_debug.h> +#endif /* patchable/settable parameters for tcp */ int tcp_mssdflt = TCP_MSS; int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; int tcp_do_rfc1323 = 1; +#ifdef TTCP +int tcp_do_rfc1644 = 1; +static void tcp_cleartaocache(void); +#endif extern struct inpcb *tcp_last_inpcb; @@ -75,6 +83,10 @@ tcp_init() { tcp_iss = 1; /* wrong */ +#ifdef TTCP + tcp_ccgen = 1; + tcp_cleartaocache(); +#endif tcb.inp_next = tcb.inp_prev = &tcb; if (max_protohdr < sizeof(struct tcpiphdr)) max_protohdr = sizeof(struct tcpiphdr); @@ -196,6 +208,10 @@ tcp_respond(tp, ti, m, ack, seq, flags) ti->ti_sum = in_cksum(m, tlen); ((struct ip *)ti)->ip_len = tlen; ((struct ip *)ti)->ip_ttl = ip_defttl; +#ifdef TCPDEBUG + if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_OUTPUT, 0, tp, ti, 0); +#endif (void) ip_output(m, NULL, ro, 0, NULL); } @@ -215,9 +231,14 @@ tcp_newtcpcb(inp) return ((struct tcpcb *)0); bzero((char *) tp, sizeof(struct tcpcb)); tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; - tp->t_maxseg = tcp_mssdflt; + tp->t_maxseg = tp->t_maxopd = tcp_mssdflt; - tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; + if (tcp_do_rfc1323) + tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); +#ifdef TTCP + if (tcp_do_rfc1644) + tp->t_flags |= TF_REQ_CC; +#endif tp->t_inpcb = inp; /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no @@ -444,3 +465,65 @@ tcp_quench(inp, errno) if (tp) tp->snd_cwnd = tp->t_maxseg; } + +/* + * Look-up the routing entry to the peer of this inpcb. If no route + * is found and it cannot be allocated the return NULL. This routine + * is called by TCP routines that access the rmx structure and by tcp_mss + * to get the interface MTU. + */ +struct rtentry * +tcp_rtlookup(inp) + struct inpcb *inp; +{ + struct route *ro; + struct rtentry *rt; + + ro = &inp->inp_route; + rt = ro->ro_rt; + if (rt == NULL || !(rt->rt_flags & RTF_UP)) { + /* No route yet, so try to acquire one */ + if (inp->inp_faddr.s_addr != INADDR_ANY) { + ro->ro_dst.sa_family = AF_INET; + ro->ro_dst.sa_len = sizeof(ro->ro_dst); + ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = + inp->inp_faddr; + rtalloc(ro); + rt = ro->ro_rt; + } + } + return rt; +} + +#ifdef TTCP +/* + * Return a pointer to the cached information about the remote host. + * The cached information is stored in the protocol specific part of + * the route metrics. + */ +struct rmxp_tao * +tcp_gettaocache(inp) + struct inpcb *inp; +{ + struct rtentry *rt = tcp_rtlookup(inp); + + /* Make sure this is a host route and is up. */ + if (rt == NULL || + (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST)) + return NULL; + + return rmx_taop(rt->rt_rmx); +} + +/* + * Clear all the TAO cache entries, called from tcp_init. + * + * XXX + * This routine is just an empty one, because we assume that the routing + * routing tables are initialized at the same time when TCP, so there is + * nothing in the cache left over. + */ +static void +tcp_cleartaocache(void) +{ } +#endif /* TTCP */ diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 3c7fb1f..72448f1 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 - * $Id$ + * $Id: tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp $ */ #ifndef TUBA_INCLUDE @@ -123,6 +123,9 @@ tcp_slowtimo() } } tp->t_idle++; +#ifdef TTCP + tp->t_duration++; +#endif if (tp->t_rtt) tp->t_rtt++; tpgone: @@ -212,6 +215,12 @@ tcp_timers(tp, timer) tp->t_srtt = 0; } tp->snd_nxt = tp->snd_una; +#ifdef TTCP + /* + * Force a segment to be sent. + */ + tp->t_flags |= TF_ACKNOW; +#endif /* * If timing a segment in this window, stop the timer. */ diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 10e83ab..d05052f 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 - * $Id: tcp_subr.c,v 1.4 1994/10/02 17:48:44 phk Exp $ + * $Id: tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp $ */ #include <sys/param.h> @@ -54,16 +54,24 @@ #include <netinet/ip_var.h> #include <netinet/ip_icmp.h> #include <netinet/tcp.h> +#define TCPOUTFLAGS #include <netinet/tcp_fsm.h> #include <netinet/tcp_seq.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #include <netinet/tcpip.h> +#ifdef TCPDEBUG +#include <netinet/tcp_debug.h> +#endif /* patchable/settable parameters for tcp */ int tcp_mssdflt = TCP_MSS; int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; int tcp_do_rfc1323 = 1; +#ifdef TTCP +int tcp_do_rfc1644 = 1; +static void tcp_cleartaocache(void); +#endif extern struct inpcb *tcp_last_inpcb; @@ -75,6 +83,10 @@ tcp_init() { tcp_iss = 1; /* wrong */ +#ifdef TTCP + tcp_ccgen = 1; + tcp_cleartaocache(); +#endif tcb.inp_next = tcb.inp_prev = &tcb; if (max_protohdr < sizeof(struct tcpiphdr)) max_protohdr = sizeof(struct tcpiphdr); @@ -196,6 +208,10 @@ tcp_respond(tp, ti, m, ack, seq, flags) ti->ti_sum = in_cksum(m, tlen); ((struct ip *)ti)->ip_len = tlen; ((struct ip *)ti)->ip_ttl = ip_defttl; +#ifdef TCPDEBUG + if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_OUTPUT, 0, tp, ti, 0); +#endif (void) ip_output(m, NULL, ro, 0, NULL); } @@ -215,9 +231,14 @@ tcp_newtcpcb(inp) return ((struct tcpcb *)0); bzero((char *) tp, sizeof(struct tcpcb)); tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; - tp->t_maxseg = tcp_mssdflt; + tp->t_maxseg = tp->t_maxopd = tcp_mssdflt; - tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; + if (tcp_do_rfc1323) + tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); +#ifdef TTCP + if (tcp_do_rfc1644) + tp->t_flags |= TF_REQ_CC; +#endif tp->t_inpcb = inp; /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no @@ -444,3 +465,65 @@ tcp_quench(inp, errno) if (tp) tp->snd_cwnd = tp->t_maxseg; } + +/* + * Look-up the routing entry to the peer of this inpcb. If no route + * is found and it cannot be allocated the return NULL. This routine + * is called by TCP routines that access the rmx structure and by tcp_mss + * to get the interface MTU. + */ +struct rtentry * +tcp_rtlookup(inp) + struct inpcb *inp; +{ + struct route *ro; + struct rtentry *rt; + + ro = &inp->inp_route; + rt = ro->ro_rt; + if (rt == NULL || !(rt->rt_flags & RTF_UP)) { + /* No route yet, so try to acquire one */ + if (inp->inp_faddr.s_addr != INADDR_ANY) { + ro->ro_dst.sa_family = AF_INET; + ro->ro_dst.sa_len = sizeof(ro->ro_dst); + ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = + inp->inp_faddr; + rtalloc(ro); + rt = ro->ro_rt; + } + } + return rt; +} + +#ifdef TTCP +/* + * Return a pointer to the cached information about the remote host. + * The cached information is stored in the protocol specific part of + * the route metrics. + */ +struct rmxp_tao * +tcp_gettaocache(inp) + struct inpcb *inp; +{ + struct rtentry *rt = tcp_rtlookup(inp); + + /* Make sure this is a host route and is up. */ + if (rt == NULL || + (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST)) + return NULL; + + return rmx_taop(rt->rt_rmx); +} + +/* + * Clear all the TAO cache entries, called from tcp_init. + * + * XXX + * This routine is just an empty one, because we assume that the routing + * routing tables are initialized at the same time when TCP, so there is + * nothing in the cache left over. + */ +static void +tcp_cleartaocache(void) +{ } +#endif /* TTCP */ diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 083362f..5149895 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 - * $Id: tcp_usrreq.c,v 1.5 1994/09/15 10:36:56 davidg Exp $ + * $Id: tcp_usrreq.c,v 1.6 1994/12/15 20:39:34 wollman Exp $ */ #include <sys/param.h> @@ -84,7 +84,9 @@ tcp_usrreq(so, req, m, nam, control) struct sockaddr_in *sinp; int s; int error = 0; +#ifdef TCPDEBUG int ostate; +#endif if (req == PRU_CONTROL) return (in_control(so, (int)m, (caddr_t)nam, @@ -113,9 +115,14 @@ tcp_usrreq(so, req, m, nam, control) #ifdef KPROF tcp_acounts[tp->t_state][req]++; #endif +#ifdef TCPDEBUG ostate = tp->t_state; } else ostate = 0; +#else /* TCPDEBUG */ + } +#endif /* TCPDEBUG */ + switch (req) { /* @@ -196,6 +203,10 @@ tcp_usrreq(so, req, m, nam, control) break; } +#ifdef TTCP + if ((error = tcp_connect(tp, nam)) != 0) + break; +#else /* TTCP */ if (inp->inp_lport == 0) { error = in_pcbbind(inp, (struct mbuf *)0); if (error) @@ -220,6 +231,7 @@ tcp_usrreq(so, req, m, nam, control) tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; tcp_sendseqinit(tp); +#endif /* TTCP */ error = tcp_output(tp); break; @@ -275,9 +287,37 @@ tcp_usrreq(so, req, m, nam, control) * Do a send by putting data in output queue and updating urgent * marker if URG set. Possibly send more data. */ +#ifdef TTCP + case PRU_SEND_EOF: +#endif case PRU_SEND: sbappend(&so->so_snd, m); - error = tcp_output(tp); +#ifdef TTCP + if (nam && tp->t_state < TCPS_SYN_SENT) { + /* + * Do implied connect if not yet connected, + * initialize window to default value, and + * initialize maxseg/maxopd using peer's cached + * MSS. + */ + error = tcp_connect(tp, nam); + if (error) + break; + tp->snd_wnd = TTCP_CLIENT_SND_WND; + tcp_mss(tp, -1); + } + + if (req == PRU_SEND_EOF) { + /* + * Close the send side of the connection after + * the data is sent. + */ + socantsendmore(so); + tp = tcp_usrclosed(tp); + } + if (tp != NULL) +#endif TTCP + error = tcp_output(tp); break; /* @@ -345,7 +385,9 @@ tcp_usrreq(so, req, m, nam, control) */ case PRU_SLOWTIMO: tp = tcp_timers(tp, (int)nam); +#ifdef TCPDEBUG req |= (int)nam << 8; /* for debug's sake */ +#endif break; default: @@ -359,6 +401,83 @@ tcp_usrreq(so, req, m, nam, control) return (error); } +#ifdef TTCP +/* + * Common subroutine to open a TCP connection to remote host specified + * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local + * port number if needed. Call in_pcbladdr to do the routing and to choose + * a local host address (interface). If there is an existing incarnation + * of the same connection in TIME-WAIT state and if the remote host was + * sending CC options and if the connection duration was < MSL, then + * truncate the previous TIME-WAIT state and proceed. + * Initialize connection parameters and enter SYN-SENT state. + */ +int +tcp_connect(tp, nam) + register struct tcpcb *tp; + struct mbuf *nam; +{ + struct inpcb *inp = tp->t_inpcb, *oinp; + struct socket *so = inp->inp_socket; + struct tcpcb *otp; + struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *); + struct sockaddr_in *ifaddr; + int error; + + if (inp->inp_lport == 0) { + error = in_pcbbind(inp, NULL); + if (error) + return error; + } + + /* + * Cannot simply call in_pcbconnect, because there might be an + * earlier incarnation of this same connection still in + * TIME_WAIT state, creating an ADDRINUSE error. + */ + error = in_pcbladdr(inp, nam, &ifaddr); + oinp = in_pcblookup(inp->inp_head, + sin->sin_addr, sin->sin_port, + inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr + : ifaddr->sin_addr, + inp->inp_lport, 0); + if (oinp) { + if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && + otp->t_state == TCPS_TIME_WAIT && + otp->t_duration < TCPTV_MSL && + (otp->t_flags & TF_RCVD_CC)) + otp = tcp_close(otp); + else + return EADDRINUSE; + } + if (inp->inp_laddr.s_addr == INADDR_ANY) + inp->inp_laddr = ifaddr->sin_addr; + inp->inp_faddr = sin->sin_addr; + inp->inp_fport = sin->sin_port; + + tp->t_template = tcp_template(tp); + if (tp->t_template == 0) { + in_pcbdisconnect(inp); + return ENOBUFS; + } + + /* Compute window scaling to request. */ + while (tp->request_r_scale < TCP_MAX_WINSHIFT && + (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) + tp->request_r_scale++; + + soisconnecting(so); + tcpstat.tcps_connattempt++; + tp->t_state = TCPS_SYN_SENT; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; + tcp_sendseqinit(tp); + tp->cc_send = CC_INC(tcp_ccgen); + + return 0; +} +#endif /* TTCP */ + int tcp_ctloutput(op, so, level, optname, mp) int op; @@ -409,6 +528,26 @@ tcp_ctloutput(op, so, level, optname, mp) error = EINVAL; break; +#ifdef TTCP + case TCP_NOOPT: + if (m == NULL || m->m_len < sizeof (int)) + error = EINVAL; + else if (*mtod(m, int *)) + tp->t_flags |= TF_NOOPT; + else + tp->t_flags &= ~TF_NOOPT; + break; + + case TCP_NOPUSH: + if (m == NULL || m->m_len < sizeof (int)) + error = EINVAL; + else if (*mtod(m, int *)) + tp->t_flags |= TF_NOPUSH; + else + tp->t_flags &= ~TF_NOPUSH; + break; +#endif /* TTCP */ + default: error = ENOPROTOOPT; break; @@ -428,6 +567,14 @@ tcp_ctloutput(op, so, level, optname, mp) case TCP_MAXSEG: *mtod(m, int *) = tp->t_maxseg; break; +#ifdef TTCP + case TCP_NOOPT: + *mtod(m, int *) = tp->t_flags & TF_NOOPT; + break; + case TCP_NOPUSH: + *mtod(m, int *) = tp->t_flags & TF_NOPUSH; + break; +#endif /* TTCP */ default: error = ENOPROTOOPT; break; @@ -533,12 +680,22 @@ tcp_usrclosed(tp) case TCPS_CLOSED: case TCPS_LISTEN: +#ifndef TTCP case TCPS_SYN_SENT: +#endif tp->t_state = TCPS_CLOSED; tp = tcp_close(tp); break; +#ifdef TTCP + case TCPS_SYN_SENT: + case TCPS_SYN_RECEIVED: + tp->t_flags |= TF_NEEDFIN; + break; + +#else case TCPS_SYN_RECEIVED: +#endif case TCPS_ESTABLISHED: tp->t_state = TCPS_FIN_WAIT_1; break; @@ -551,3 +708,43 @@ tcp_usrclosed(tp) soisdisconnected(tp->t_inpcb->inp_socket); return (tp); } + +/* + * Sysctl for tcp variables. + */ +int +tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; +{ + extern int tcp_do_rfc1323; +#ifdef TTCP + extern int tcp_do_rfc1644; +#endif + extern int tcp_mssdflt; + + /* All sysctl names at this level are terminal. */ + if (namelen != 1) + return (ENOTDIR); + + switch (name[0]) { + case TCPCTL_DO_RFC1323: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &tcp_do_rfc1323)); +#ifdef TTCP + case TCPCTL_DO_RFC1644: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &tcp_do_rfc1644)); +#endif + case TCPCTL_MSSDFLT: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &tcp_mssdflt)); + default: + return (ENOPROTOOPT); + } + /* NOTREACHED */ +} diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index f34109d..41c9a3b 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -179,7 +179,7 @@ struct rmxp_tao { #define TAOF_UNDEF 0 /* we don't know yet */ #endif /* notyet */ }; -#define rmx_taop(r) ((struct rmxp_tao *)&(r).rmx_pspec) +#define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler) #endif /* TTCP */ #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) @@ -333,11 +333,7 @@ struct rmxp_tao * #endif void tcp_init __P((void)); void tcp_input __P((struct mbuf *, int)); -#ifdef TTCP void tcp_mss __P((struct tcpcb *, int)); -#else -int tcp_mss __P((struct tcpcb *, u_int)); -#endif int tcp_mssopt __P((struct tcpcb *)); struct tcpcb * tcp_newtcpcb __P((struct inpcb *)); |