diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/netinet/in_pcb.c | 29 | ||||
-rw-r--r-- | sys/netinet/in_pcb.h | 2 | ||||
-rw-r--r-- | sys/netinet/in_proto.c | 1 | ||||
-rw-r--r-- | sys/netinet/ip_output.c | 4 | ||||
-rw-r--r-- | sys/netinet/tcp_input.c | 216 | ||||
-rw-r--r-- | sys/netinet/tcp_reass.c | 216 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 316 | ||||
-rw-r--r-- | sys/netinet/tcp_timer.c | 27 | ||||
-rw-r--r-- | sys/netinet/tcp_timer.h | 1 | ||||
-rw-r--r-- | sys/netinet/tcp_timewait.c | 316 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 18 | ||||
-rw-r--r-- | sys/netinet6/in6_pcb.c | 8 | ||||
-rw-r--r-- | sys/netinet6/ip6_output.c | 4 | ||||
-rw-r--r-- | sys/netinet6/ipsec.c | 125 | ||||
-rw-r--r-- | sys/netinet6/ipsec.h | 4 | ||||
-rw-r--r-- | sys/netinet6/ipsec6.h | 5 | ||||
-rw-r--r-- | sys/netinet6/raw_ip6.c | 4 | ||||
-rw-r--r-- | sys/netinet6/udp6_usrreq.c | 6 |
18 files changed, 1067 insertions, 235 deletions
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 7f83917..46f824d 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -62,6 +62,7 @@ #include <netinet/in_pcb.h> #include <netinet/in_var.h> #include <netinet/ip_var.h> +#include <netinet/tcp_var.h> #ifdef INET6 #include <netinet/ip6.h> #include <netinet6/ip6_var.h> @@ -169,8 +170,11 @@ in_pcballoc(so, pcbinfo, td) } #endif /*IPSEC*/ #if defined(INET6) - if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) - inp->inp_flags |= IN6P_IPV6_V6ONLY; + if (INP_SOCKAF(so) == AF_INET6) { + inp->inp_vflag |= INP_IPV6PROTO; + if (ip6_v6only) + inp->inp_flags |= IN6P_IPV6_V6ONLY; + } #endif LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); pcbinfo->ipi_count++; @@ -294,6 +298,17 @@ in_pcbbind_setup(inp, nam, laddrp, lportp, td) t = in_pcblookup_local(inp->inp_pcbinfo, sin->sin_addr, lport, prison ? 0 : INPLOOKUP_WILDCARD); + /* + * XXX + * This entire block sorely needs a rewrite. + */ + if (t && (t->inp_vflag & INP_TIMEWAIT)) { + if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY || + ntohl(t->inp_laddr.s_addr) != INADDR_ANY || + (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) && + (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid)) + return (EADDRINUSE); + } else if (t && (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || @@ -317,6 +332,10 @@ in_pcbbind_setup(inp, nam, laddrp, lportp, td) return (EADDRNOTAVAIL); t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, prison ? 0 : wild); + if (t && (t->inp_vflag & INP_TIMEWAIT)) { + if ((reuseport & intotw(t)->tw_so_options) == 0) + return (EADDRINUSE); + } else if (t && (reuseport & t->inp_socket->so_options) == 0) { #if defined(INET6) @@ -640,8 +659,10 @@ in_pcbdetach(inp) #endif /*IPSEC*/ inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); - so->so_pcb = 0; - sotryfree(so); + if (so) { + so->so_pcb = 0; + sotryfree(so); + } if (inp->inp_options) (void)m_free(inp->inp_options); if (inp->inp_route.ro_rt) diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 018d7b4..efb7862 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -140,6 +140,8 @@ struct inpcb { u_char inp_vflag; /* IP version flag (v4/v6) */ #define INP_IPV4 0x1 #define INP_IPV6 0x2 +#define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */ +#define INP_TIMEWAIT 0x8 /* .. probably doesn't go here */ u_char inp_ip_ttl; /* time to live proto */ u_char inp_ip_p; /* protocol proto */ diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 863b048..a2a528f 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -40,6 +40,7 @@ #include "opt_inet6.h" #include <sys/param.h> +#include <sys/systm.h> #include <sys/kernel.h> #include <sys/socket.h> #include <sys/domain.h> diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 64e58e2..c6cefc4 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -470,10 +470,10 @@ ip_output(m0, opt, ro, flags, imo, inp) sendit: #ifdef IPSEC /* get SP for this packet */ - if (so == NULL) + if (inp == NULL) sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); else - sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); + sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error); if (sp == NULL) { ipsecstat.out_inval++; diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index ad20203..52dba70 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -137,6 +137,8 @@ static int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *); static void tcp_xmit_timer(struct tcpcb *, int); static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *); +static int tcp_timewait(struct tcptw *, struct tcpopt *, + struct tcphdr *, struct mbuf *, int); /* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ #ifdef INET6 @@ -636,6 +638,22 @@ findpcb: goto dropwithreset; } INP_LOCK(inp); + if (inp->inp_vflag & INP_TIMEWAIT) { + /* + * The only option of relevance is TOF_CC, and only if + * present in a SYN segment. See tcp_timewait(). + */ + if (thflags & TH_SYN) + tcp_dooptions(&to, optp, optlen, 1); + if (tcp_timewait((struct tcptw *)inp->inp_ppcb, + &to, th, m, tlen)) + goto findpcb; + /* + * tcp_timewait unlocks inp. + */ + INP_INFO_WUNLOCK(&tcbinfo); + return; + } tp = intotcpcb(inp); if (tp == 0) { INP_UNLOCK(inp); @@ -1319,6 +1337,7 @@ trimthenstep6: case TCPS_LAST_ACK: case TCPS_CLOSING: case TCPS_TIME_WAIT: + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); if ((thflags & TH_SYN) && (to.to_flags & TOF_CC) && tp->cc_recv != 0) { if (tp->t_state == TCPS_TIME_WAIT && @@ -1418,6 +1437,8 @@ trimthenstep6: break; case TCPS_TIME_WAIT: + KASSERT(tp->t_state != TCPS_TIME_WAIT, + ("timewait")); break; } } @@ -1550,6 +1571,7 @@ trimthenstep6: * and start over if the sequence numbers * are above the previous ones. */ + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); if (thflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && SEQ_GT(th->th_seq, tp->rcv_nxt)) { @@ -1678,7 +1700,7 @@ trimthenstep6: case TCPS_CLOSING: case TCPS_LAST_ACK: case TCPS_TIME_WAIT: - + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); if (SEQ_LEQ(th->th_ack, tp->snd_una)) { if (tlen == 0 && tiwin == tp->snd_wnd) { tcpstat.tcps_rcvdupack++; @@ -1921,6 +1943,10 @@ process_ACK: * specification, but if we don't get a FIN * we'll hang forever. */ + /* XXXjl + * we should release the tp also, and use a + * compressed state. + */ if (so->so_state & SS_CANTRCVMORE) { soisdisconnected(so); callout_reset(tp->tt_2msl, tcp_maxidle, @@ -1938,19 +1964,11 @@ process_ACK: */ case TCPS_CLOSING: if (ourfinisacked) { - tp->t_state = TCPS_TIME_WAIT; - tcp_canceltimers(tp); - /* Shorten TIME_WAIT [RFC-1644, p.28] */ - if (tp->cc_recv != 0 && - (ticks - tp->t_starttime) < tcp_msl) - callout_reset(tp->tt_2msl, - tp->t_rxtcur * - TCPTV_TWTRUNC, - tcp_timer_2msl, tp); - else - callout_reset(tp->tt_2msl, 2 * tcp_msl, - tcp_timer_2msl, tp); - soisdisconnected(so); + KASSERT(headlocked, ("headlocked")); + INP_INFO_WUNLOCK(&tcbinfo); + m_freem(m); + tcp_twstart(tp); + return; } break; @@ -1973,6 +1991,7 @@ process_ACK: * it and restart the finack timer. */ case TCPS_TIME_WAIT: + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); callout_reset(tp->tt_2msl, 2 * tcp_msl, tcp_timer_2msl, tp); goto dropafterack; @@ -2166,27 +2185,15 @@ dodata: /* XXX */ * standard timers. */ case TCPS_FIN_WAIT_2: - tp->t_state = TCPS_TIME_WAIT; - tcp_canceltimers(tp); - /* Shorten TIME_WAIT [RFC-1644, p.28] */ - if (tp->cc_recv != 0 && - (ticks - tp->t_starttime) < tcp_msl) { - callout_reset(tp->tt_2msl, - tp->t_rxtcur * TCPTV_TWTRUNC, - tcp_timer_2msl, tp); - /* For transaction client, force ACK now. */ - tp->t_flags |= TF_ACKNOW; - } - else - callout_reset(tp->tt_2msl, 2 * tcp_msl, - tcp_timer_2msl, tp); - soisdisconnected(so); - break; + KASSERT(headlocked == 0, ("headlocked")); + tcp_twstart(tp); + return; /* * In TIME_WAIT state restart the 2 MSL time_wait timer. */ case TCPS_TIME_WAIT: + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); callout_reset(tp->tt_2msl, 2 * tcp_msl, tcp_timer_2msl, tp); break; @@ -2802,3 +2809,152 @@ tcp_newreno_partial_ack(tp, th) */ tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg); } + +/* + * Returns 1 if the TIME_WAIT state was killed and we should start over, + * looking for a pcb in the listen state. Returns 0 otherwise. + */ +static int +tcp_timewait(tw, to, th, m, tlen) + struct tcptw *tw; + struct tcpopt *to; + struct tcphdr *th; + struct mbuf *m; + int tlen; +{ + int thflags; + tcp_seq seq; +#ifdef INET6 + int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; +#else + const int isipv6 = 0; +#endif + + thflags = th->th_flags; + + /* + * NOTE: for FIN_WAIT_2 (to be added later), + * must validate sequence number before accepting RST + */ + + /* + * If the segment contains RST: + * Drop the segment - see Stevens, vol. 2, p. 964 and + * RFC 1337. + */ + if (thflags & TH_RST) + goto drop; + + /* + * If segment contains a SYN and CC [not CC.NEW] option: + * if connection duration > MSL, drop packet and send RST; + * + * if SEG.CC > CCrecv then is new SYN. + * Complete close and delete TCPCB. Then reprocess + * segment, hoping to find new TCPCB in LISTEN state; + * + * else must be old SYN; drop it. + * else do normal processing. + */ + if ((thflags & TH_SYN) && (to->to_flags & TOF_CC) && tw->cc_recv != 0) { + if ((ticks - tw->t_starttime) > tcp_msl) + goto reset; + if (CC_GT(to->to_cc, tw->cc_recv)) { + tcp_twclose(tw); + return (1); + } + goto drop; + } + +#if 0 +/* PAWS not needed at the moment */ + /* + * RFC 1323 PAWS: If we have a timestamp reply on this segment + * and it's less than ts_recent, drop it. + */ + if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && + TSTMP_LT(to.to_tsval, tp->ts_recent)) { + if ((thflags & TH_ACK) == 0) + goto drop; + goto ack; + } + /* + * ts_recent is never updated because we never accept new segments. + */ +#endif + + /* + * If a new connection request is received + * while in TIME_WAIT, drop the old connection + * and start over if the sequence numbers + * are above the previous ones. + */ + if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) { + tcp_twclose(tw); + return (1); + } + + /* + * Drop the the segment if it does not contain an ACK. + */ + if ((thflags & TH_ACK) == 0) + goto drop; + + /* + * Reset the 2MSL timer if this is a duplicate FIN. + */ + if (thflags & TH_FIN) { + seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0); + if (seq + 1 == tw->rcv_nxt) + callout_reset(tw->tt_2msl, + 2 * tcp_msl, tcp_timer_2msl, tw); + } + + /* + * Acknowlege the segment, then drop it. + */ + tcp_twrespond(tw, TH_ACK); + goto drop; + +reset: + /* + * Generate a RST, dropping incoming segment. + * Make ACK acceptable to originator of segment. + * Don't bother to respond if destination was broadcast/multicast. + */ + if (m->m_flags & (M_BCAST|M_MCAST)) + goto drop; + if (isipv6) { + struct ip6_hdr *ip6; + + /* IPv6 anycast check is done at tcp6_input() */ + ip6 = mtod(m, struct ip6_hdr *); + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || + IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) + goto drop; + } else { + struct ip *ip; + + ip = mtod(m, struct ip *); + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || + IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || + ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || + in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) + goto drop; + } + if (thflags & TH_ACK) { + tcp_respond(NULL, + mtod(m, void *), th, m, 0, th->th_ack, TH_RST); + } else { + seq = th->th_seq + (thflags & TH_SYN ? 1 : 0); + tcp_respond(NULL, + mtod(m, void *), th, m, seq, 0, TH_RST|TH_ACK); + } + INP_UNLOCK(tw->tw_inpcb); + return (0); + +drop: + INP_UNLOCK(tw->tw_inpcb); + m_freem(m); + return (0); +} diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index ad20203..52dba70 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -137,6 +137,8 @@ static int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *); static void tcp_xmit_timer(struct tcpcb *, int); static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *); +static int tcp_timewait(struct tcptw *, struct tcpopt *, + struct tcphdr *, struct mbuf *, int); /* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ #ifdef INET6 @@ -636,6 +638,22 @@ findpcb: goto dropwithreset; } INP_LOCK(inp); + if (inp->inp_vflag & INP_TIMEWAIT) { + /* + * The only option of relevance is TOF_CC, and only if + * present in a SYN segment. See tcp_timewait(). + */ + if (thflags & TH_SYN) + tcp_dooptions(&to, optp, optlen, 1); + if (tcp_timewait((struct tcptw *)inp->inp_ppcb, + &to, th, m, tlen)) + goto findpcb; + /* + * tcp_timewait unlocks inp. + */ + INP_INFO_WUNLOCK(&tcbinfo); + return; + } tp = intotcpcb(inp); if (tp == 0) { INP_UNLOCK(inp); @@ -1319,6 +1337,7 @@ trimthenstep6: case TCPS_LAST_ACK: case TCPS_CLOSING: case TCPS_TIME_WAIT: + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); if ((thflags & TH_SYN) && (to.to_flags & TOF_CC) && tp->cc_recv != 0) { if (tp->t_state == TCPS_TIME_WAIT && @@ -1418,6 +1437,8 @@ trimthenstep6: break; case TCPS_TIME_WAIT: + KASSERT(tp->t_state != TCPS_TIME_WAIT, + ("timewait")); break; } } @@ -1550,6 +1571,7 @@ trimthenstep6: * and start over if the sequence numbers * are above the previous ones. */ + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); if (thflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && SEQ_GT(th->th_seq, tp->rcv_nxt)) { @@ -1678,7 +1700,7 @@ trimthenstep6: case TCPS_CLOSING: case TCPS_LAST_ACK: case TCPS_TIME_WAIT: - + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); if (SEQ_LEQ(th->th_ack, tp->snd_una)) { if (tlen == 0 && tiwin == tp->snd_wnd) { tcpstat.tcps_rcvdupack++; @@ -1921,6 +1943,10 @@ process_ACK: * specification, but if we don't get a FIN * we'll hang forever. */ + /* XXXjl + * we should release the tp also, and use a + * compressed state. + */ if (so->so_state & SS_CANTRCVMORE) { soisdisconnected(so); callout_reset(tp->tt_2msl, tcp_maxidle, @@ -1938,19 +1964,11 @@ process_ACK: */ case TCPS_CLOSING: if (ourfinisacked) { - tp->t_state = TCPS_TIME_WAIT; - tcp_canceltimers(tp); - /* Shorten TIME_WAIT [RFC-1644, p.28] */ - if (tp->cc_recv != 0 && - (ticks - tp->t_starttime) < tcp_msl) - callout_reset(tp->tt_2msl, - tp->t_rxtcur * - TCPTV_TWTRUNC, - tcp_timer_2msl, tp); - else - callout_reset(tp->tt_2msl, 2 * tcp_msl, - tcp_timer_2msl, tp); - soisdisconnected(so); + KASSERT(headlocked, ("headlocked")); + INP_INFO_WUNLOCK(&tcbinfo); + m_freem(m); + tcp_twstart(tp); + return; } break; @@ -1973,6 +1991,7 @@ process_ACK: * it and restart the finack timer. */ case TCPS_TIME_WAIT: + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); callout_reset(tp->tt_2msl, 2 * tcp_msl, tcp_timer_2msl, tp); goto dropafterack; @@ -2166,27 +2185,15 @@ dodata: /* XXX */ * standard timers. */ case TCPS_FIN_WAIT_2: - tp->t_state = TCPS_TIME_WAIT; - tcp_canceltimers(tp); - /* Shorten TIME_WAIT [RFC-1644, p.28] */ - if (tp->cc_recv != 0 && - (ticks - tp->t_starttime) < tcp_msl) { - callout_reset(tp->tt_2msl, - tp->t_rxtcur * TCPTV_TWTRUNC, - tcp_timer_2msl, tp); - /* For transaction client, force ACK now. */ - tp->t_flags |= TF_ACKNOW; - } - else - callout_reset(tp->tt_2msl, 2 * tcp_msl, - tcp_timer_2msl, tp); - soisdisconnected(so); - break; + KASSERT(headlocked == 0, ("headlocked")); + tcp_twstart(tp); + return; /* * In TIME_WAIT state restart the 2 MSL time_wait timer. */ case TCPS_TIME_WAIT: + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait")); callout_reset(tp->tt_2msl, 2 * tcp_msl, tcp_timer_2msl, tp); break; @@ -2802,3 +2809,152 @@ tcp_newreno_partial_ack(tp, th) */ tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg); } + +/* + * Returns 1 if the TIME_WAIT state was killed and we should start over, + * looking for a pcb in the listen state. Returns 0 otherwise. + */ +static int +tcp_timewait(tw, to, th, m, tlen) + struct tcptw *tw; + struct tcpopt *to; + struct tcphdr *th; + struct mbuf *m; + int tlen; +{ + int thflags; + tcp_seq seq; +#ifdef INET6 + int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; +#else + const int isipv6 = 0; +#endif + + thflags = th->th_flags; + + /* + * NOTE: for FIN_WAIT_2 (to be added later), + * must validate sequence number before accepting RST + */ + + /* + * If the segment contains RST: + * Drop the segment - see Stevens, vol. 2, p. 964 and + * RFC 1337. + */ + if (thflags & TH_RST) + goto drop; + + /* + * If segment contains a SYN and CC [not CC.NEW] option: + * if connection duration > MSL, drop packet and send RST; + * + * if SEG.CC > CCrecv then is new SYN. + * Complete close and delete TCPCB. Then reprocess + * segment, hoping to find new TCPCB in LISTEN state; + * + * else must be old SYN; drop it. + * else do normal processing. + */ + if ((thflags & TH_SYN) && (to->to_flags & TOF_CC) && tw->cc_recv != 0) { + if ((ticks - tw->t_starttime) > tcp_msl) + goto reset; + if (CC_GT(to->to_cc, tw->cc_recv)) { + tcp_twclose(tw); + return (1); + } + goto drop; + } + +#if 0 +/* PAWS not needed at the moment */ + /* + * RFC 1323 PAWS: If we have a timestamp reply on this segment + * and it's less than ts_recent, drop it. + */ + if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && + TSTMP_LT(to.to_tsval, tp->ts_recent)) { + if ((thflags & TH_ACK) == 0) + goto drop; + goto ack; + } + /* + * ts_recent is never updated because we never accept new segments. + */ +#endif + + /* + * If a new connection request is received + * while in TIME_WAIT, drop the old connection + * and start over if the sequence numbers + * are above the previous ones. + */ + if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) { + tcp_twclose(tw); + return (1); + } + + /* + * Drop the the segment if it does not contain an ACK. + */ + if ((thflags & TH_ACK) == 0) + goto drop; + + /* + * Reset the 2MSL timer if this is a duplicate FIN. + */ + if (thflags & TH_FIN) { + seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0); + if (seq + 1 == tw->rcv_nxt) + callout_reset(tw->tt_2msl, + 2 * tcp_msl, tcp_timer_2msl, tw); + } + + /* + * Acknowlege the segment, then drop it. + */ + tcp_twrespond(tw, TH_ACK); + goto drop; + +reset: + /* + * Generate a RST, dropping incoming segment. + * Make ACK acceptable to originator of segment. + * Don't bother to respond if destination was broadcast/multicast. + */ + if (m->m_flags & (M_BCAST|M_MCAST)) + goto drop; + if (isipv6) { + struct ip6_hdr *ip6; + + /* IPv6 anycast check is done at tcp6_input() */ + ip6 = mtod(m, struct ip6_hdr *); + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || + IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) + goto drop; + } else { + struct ip *ip; + + ip = mtod(m, struct ip *); + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || + IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || + ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || + in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) + goto drop; + } + if (thflags & TH_ACK) { + tcp_respond(NULL, + mtod(m, void *), th, m, 0, th->th_ack, TH_RST); + } else { + seq = th->th_seq + (thflags & TH_SYN ? 1 : 0); + tcp_respond(NULL, + mtod(m, void *), th, m, seq, 0, TH_RST|TH_ACK); + } + INP_UNLOCK(tw->tw_inpcb); + return (0); + +drop: + INP_UNLOCK(tw->tw_inpcb); + m_freem(m); + return (0); +} diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 3c43acb..dc7730f 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -179,6 +179,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW, static void tcp_cleartaocache(void); static struct inpcb *tcp_notify(struct inpcb *, int); +static void tcp_discardcb(struct tcpcb *); /* * Target size of TCP PCB hash tables. Must be a power of two. @@ -191,26 +192,23 @@ static struct inpcb *tcp_notify(struct inpcb *, int); #endif /* - * This is the actual shape of what we allocate using the zone - * allocator. Doing it this way allows us to protect both structures - * using the same generation count, and also eliminates the overhead - * of allocating tcpcbs separately. By hiding the structure here, - * we avoid changing most of the rest of the code (although it needs - * to be changed, eventually, for greater efficiency). + * XXX + * Callouts should be moved into struct tcp directly. They are currently + * separate becuase the tcpcb structure is exported to userland for sysctl + * parsing purposes, which do not know about callouts. */ -#define ALIGNMENT 32 -#define ALIGNM1 (ALIGNMENT - 1) -struct inp_tp { - union { - struct inpcb inp; - char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1]; - } inp_tp_u; +struct tcpcb_mem { struct tcpcb tcb; - struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl; - struct callout inp_tp_delack; + struct callout tcpcb_mem_rexmt, tcpcb_mem_persist, tcpcb_mem_keep; + struct callout tcpcb_mem_2msl, tcpcb_mem_delack; +}; +struct tcptw_mem { + struct tcptw tw; + struct callout tcptw_mem_2msl; }; -#undef ALIGNMENT -#undef ALIGNM1 + +static uma_zone_t tcpcb_zone; +static uma_zone_t tcptw_zone; /* * Tcp initialization @@ -244,7 +242,7 @@ tcp_init() tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask); tcbinfo.porthashbase = hashinit(hashsize, M_PCB, &tcbinfo.porthashmask); - tcbinfo.ipi_zone = uma_zcreate("tcpcb", sizeof(struct inp_tp), + tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(tcbinfo.ipi_zone, maxsockets); #ifdef INET6 @@ -257,6 +255,15 @@ tcp_init() if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) panic("tcp_init"); #undef TCP_MINPROTOHDR + /* + * These have to be type stable for the benefit of the timers. + */ + tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(tcpcb_zone, maxsockets); + tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw_mem), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(tcptw_zone, maxsockets); syncache_init(); } @@ -552,16 +559,17 @@ struct tcpcb * tcp_newtcpcb(inp) struct inpcb *inp; { - struct inp_tp *it; - register struct tcpcb *tp; + struct tcpcb_mem *tm; + struct tcpcb *tp; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ - it = (struct inp_tp *)inp; - tp = &it->tcb; - bzero((char *) tp, sizeof(struct tcpcb)); - LIST_INIT(&tp->t_segq); + tm = uma_zalloc(tcpcb_zone, M_NOWAIT | M_ZERO); + if (tm == NULL) + return (NULL); + tp = &tm->tcb; + /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ tp->t_maxseg = tp->t_maxopd = #ifdef INET6 isipv6 ? tcp_v6mssdflt : @@ -569,11 +577,11 @@ tcp_newtcpcb(inp) tcp_mssdflt; /* Set up our timeouts. */ - callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0); - callout_init(tp->tt_persist = &it->inp_tp_persist, 0); - callout_init(tp->tt_keep = &it->inp_tp_keep, 0); - callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0); - callout_init(tp->tt_delack = &it->inp_tp_delack, 0); + callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, 0); + callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, 0); + callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, 0); + callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, 0); + callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, 0); if (tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); @@ -628,23 +636,17 @@ tcp_drop(tp, errno) return (tcp_close(tp)); } -/* - * Close a TCP control block: - * discard all space held by the tcp - * discard internet protocol block - * wake up any sleepers - */ -struct tcpcb * -tcp_close(tp) - register struct tcpcb *tp; +static void +tcp_discardcb(tp) + struct tcpcb *tp; { - register struct tseg_qent *q; + struct tseg_qent *q; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ - register struct rtentry *rt; + struct rtentry *rt; int dosavessthresh; /* @@ -762,20 +764,37 @@ tcp_close(tp) } no_valid_rt: /* free the reassembly queue, if any */ - while((q = LIST_FIRST(&tp->t_segq)) != NULL) { + while ((q = LIST_FIRST(&tp->t_segq)) != NULL) { LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); FREE(q, M_TSEGQ); } inp->inp_ppcb = NULL; tp->t_inpcb = NULL; + uma_zfree(tcpcb_zone, tp); soisdisconnected(so); +} + +/* + * Close a TCP control block: + * discard all space held by the tcp + * discard internet protocol block + * wake up any sleepers + */ +struct tcpcb * +tcp_close(tp) + struct tcpcb *tp; +{ + struct inpcb *inp = tp->t_inpcb; + struct socket *so = inp->inp_socket; + + tcp_discardcb(tp); #ifdef INET6 if (INP_CHECK_SOCKAF(so, AF_INET6)) in6_pcbdetach(inp); else -#endif /* INET6 */ - in_pcbdetach(inp); +#endif + in_pcbdetach(inp); tcpstat.tcps_closed++; return ((struct tcpcb *)0); } @@ -799,6 +818,8 @@ tcp_drain() */ INP_INFO_RLOCK(&tcbinfo); LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) { + if (inpb->inp_vflag & INP_TIMEWAIT) + continue; INP_LOCK(inpb); if ((tcpb = intotcpcb(inpb))) { while ((te = LIST_FIRST(&tcpb->t_segq)) @@ -908,7 +929,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) inp = LIST_NEXT(inp, inp_list)) { INP_LOCK(inp); if (inp->inp_gencnt <= gencnt && - cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) + (((inp->inp_vflag & INP_TIMEWAIT) && + cr_cansee(req->td->td_ucred, intotw(inp)->tw_cred) == 0) || + cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)) inp_list[i++] = inp; INP_UNLOCK(inp); } @@ -926,12 +949,19 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) /* XXX should avoid extra copy */ bcopy(inp, &xt.xt_inp, sizeof *inp); inp_ppcb = inp->inp_ppcb; - if (inp_ppcb != NULL) - bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); - else + if (inp_ppcb == NULL) + bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); + else if (inp->inp_vflag & INP_TIMEWAIT) { bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); + xt.xt_tp.t_state = TCPS_TIME_WAIT; + } else + bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xt.xt_socket); + else { + bzero(&xt.xt_socket, sizeof xt.xt_socket); + xt.xt_socket.xso_protocol = IPPROTO_TCP; + } xt.xt_inp.inp_gencnt = inp->inp_gencnt; error = SYSCTL_OUT(req, &xt, sizeof xt); } @@ -1487,7 +1517,7 @@ ipsec_hdrsiz_tcp(tp) struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; -#endif /* INET6 */ +#endif struct tcphdr *th; if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL)) @@ -1559,6 +1589,196 @@ tcp_cleartaocache() } /* + * Move a TCP connection into TIME_WAIT state. + * tcbinfo is unlocked. + * inp is locked, and is unlocked before returning. + */ +void +tcp_twstart(tp) + struct tcpcb *tp; +{ + struct tcptw_mem *tm; + struct tcptw *tw; + struct inpcb *inp; + int tw_time, acknow; + struct socket *so; + + tm = uma_zalloc(tcptw_zone, M_NOWAIT); + if (tm == NULL) + /* EEEK! -- preserve old structure or just kill everything? */ + /* must obtain tcbinfo lock in order to drop the structure. */ + panic("uma_zalloc(tcptw)"); + tw = &tm->tw; + inp = tp->t_inpcb; + tw->tw_inpcb = inp; + + /* + * Recover last window size sent. + */ + tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale; + + /* + * Set t_recent if timestamps are used on the connection. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == + (TF_REQ_TSTMP|TF_RCVD_TSTMP)) + tw->t_recent = tp->ts_recent; + else + tw->t_recent = 0; + + tw->snd_nxt = tp->snd_nxt; + tw->rcv_nxt = tp->rcv_nxt; + tw->cc_recv = tp->cc_recv; + tw->cc_send = tp->cc_send; + tw->t_starttime = tp->t_starttime; + callout_init(tw->tt_2msl = &tm->tcptw_mem_2msl, 0); + +/* XXX + * If this code will + * be used for fin-wait-2 state also, then we may need + * a ts_recent from the last segment. + */ + /* Shorten TIME_WAIT [RFC-1644, p.28] */ + if (tp->cc_recv != 0 && (ticks - tp->t_starttime) < tcp_msl) { + tw_time = tp->t_rxtcur * TCPTV_TWTRUNC; + /* For T/TCP client, force ACK now. */ + acknow = 1; + } else { + tw_time = 2 * tcp_msl; + acknow = tp->t_flags & TF_ACKNOW; + } + tcp_discardcb(tp); + so = inp->inp_socket; + so->so_pcb = NULL; + tw->tw_cred = crhold(so->so_cred); + tw->tw_so_options = so->so_options; + sotryfree(so); + inp->inp_socket = NULL; + inp->inp_ppcb = (caddr_t)tw; + inp->inp_vflag |= INP_TIMEWAIT; + callout_reset(tw->tt_2msl, tw_time, tcp_timer_2msl_tw, tw); + if (acknow) + tcp_twrespond(tw, TH_ACK); + INP_UNLOCK(inp); +} + +void +tcp_twclose(tw) + struct tcptw *tw; +{ + struct inpcb *inp; + + inp = tw->tw_inpcb; + tw->tw_inpcb = NULL; + callout_stop(tw->tt_2msl); + inp->inp_ppcb = NULL; + uma_zfree(tcptw_zone, tw); +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6PROTO) + in6_pcbdetach(inp); + else +#endif + in_pcbdetach(inp); + tcpstat.tcps_closed++; +} + +int +tcp_twrespond(struct tcptw *tw, int flags) +{ + struct inpcb *inp = tw->tw_inpcb; + struct tcphdr *th; + struct mbuf *m; + struct ip *ip = NULL; + u_int8_t *optp; + u_int hdrlen, optlen; + int error; +#ifdef INET6 + struct ip6_hdr *ip6 = NULL; + int isipv6 = inp->inp_inc.inc_isipv6; +#else + const int isipv6 = 0; +#endif + + m = m_gethdr(M_NOWAIT, MT_HEADER); + if (m == NULL) + return (ENOBUFS); + m->m_data += max_linkhdr; + + if (isipv6) { + hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); + ip6 = mtod(m, struct ip6_hdr *); + th = (struct tcphdr *)(ip6 + 1); + tcpip_fillheaders(inp, ip6, th); + } else { + hdrlen = sizeof(struct tcpiphdr); + ip = mtod(m, struct ip *); + th = (struct tcphdr *)(ip + 1); + tcpip_fillheaders(inp, ip, th); + } + optp = (u_int8_t *)(th + 1); + + /* + * Send a timestamp and echo-reply if both our side and our peer + * have sent timestamps in our SYN's and this is not a RST. + */ + if (tw->t_recent && flags == TH_ACK) { + u_int32_t *lp = (u_int32_t *)optp; + + /* Form timestamp option as shown in appendix A of RFC 1323. */ + *lp++ = htonl(TCPOPT_TSTAMP_HDR); + *lp++ = htonl(ticks); + *lp = htonl(tw->t_recent); + optp += TCPOLEN_TSTAMP_APPA; + } + + /* + * Send `CC-family' options if needed, and it's not a RST. + */ + if (tw->cc_recv != 0 && flags == TH_ACK) { + u_int32_t *lp = (u_int32_t *)optp; + + *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC)); + *lp = htonl(tw->cc_send); + optp += TCPOLEN_CC_APPA; + } + optlen = optp - (u_int8_t *)(th + 1); + + m->m_len = hdrlen + optlen; + m->m_pkthdr.len = m->m_len; + + KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small")); + + th->th_seq = htonl(tw->snd_nxt); + th->th_ack = htonl(tw->rcv_nxt); + th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; + th->th_flags = flags; + th->th_win = htons(tw->last_win); + + if (isipv6) { + th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), + sizeof(struct tcphdr) + optlen); + ip6->ip6_hlim = in6_selecthlim(inp, inp->in6p_route.ro_rt ? + inp->in6p_route.ro_rt->rt_ifp : NULL); + error = ip6_output(m, inp->in6p_outputopts, &inp->in6p_route, + (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp); + } else { + th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP)); + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + ip->ip_len = m->m_pkthdr.len; + error = ip_output(m, inp->inp_options, &inp->inp_route, + (tw->tw_so_options & SO_DONTROUTE), NULL, inp); + } + if (flags & TH_ACK) + tcpstat.tcps_sndacks++; + else + tcpstat.tcps_sndctrl++; + tcpstat.tcps_sndtotal++; + return (error); +} + +/* * TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING * * This code attempts to calculate the bandwidth-delay product as a diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 8a68579..16955d6 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -252,6 +252,33 @@ tcp_timer_2msl(xtp) } void +tcp_timer_2msl_tw(xtw) + void *xtw; +{ + struct tcptw *tw = xtw; + int s; + + s = splnet(); + INP_INFO_WLOCK(&tcbinfo); + if (tw->tw_inpcb == NULL) { + INP_INFO_WUNLOCK(&tcbinfo); + splx(s); + return; + } + INP_LOCK(tw->tw_inpcb); + if (callout_pending(tw->tt_2msl) || !callout_active(tw->tt_2msl)) { + INP_UNLOCK(tw->tw_inpcb); + INP_INFO_WUNLOCK(&tcbinfo); + splx(s); + return; + } + callout_deactivate(tw->tt_2msl); + tcp_twclose(tw); + INP_INFO_WUNLOCK(&tcbinfo); + splx(s); +} + +void tcp_timer_keep(xtp) void *xtp; { diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h index cb835fb..40080b7 100644 --- a/sys/netinet/tcp_timer.h +++ b/sys/netinet/tcp_timer.h @@ -152,6 +152,7 @@ extern int tcp_ttl; /* time to live for TCP segs */ extern int tcp_backoff[]; void tcp_timer_2msl(void *xtp); +void tcp_timer_2msl_tw(void *xtw); /* XXX temporary */ void tcp_timer_keep(void *xtp); void tcp_timer_persist(void *xtp); void tcp_timer_rexmt(void *xtp); diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 3c43acb..dc7730f 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -179,6 +179,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW, static void tcp_cleartaocache(void); static struct inpcb *tcp_notify(struct inpcb *, int); +static void tcp_discardcb(struct tcpcb *); /* * Target size of TCP PCB hash tables. Must be a power of two. @@ -191,26 +192,23 @@ static struct inpcb *tcp_notify(struct inpcb *, int); #endif /* - * This is the actual shape of what we allocate using the zone - * allocator. Doing it this way allows us to protect both structures - * using the same generation count, and also eliminates the overhead - * of allocating tcpcbs separately. By hiding the structure here, - * we avoid changing most of the rest of the code (although it needs - * to be changed, eventually, for greater efficiency). + * XXX + * Callouts should be moved into struct tcp directly. They are currently + * separate becuase the tcpcb structure is exported to userland for sysctl + * parsing purposes, which do not know about callouts. */ -#define ALIGNMENT 32 -#define ALIGNM1 (ALIGNMENT - 1) -struct inp_tp { - union { - struct inpcb inp; - char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1]; - } inp_tp_u; +struct tcpcb_mem { struct tcpcb tcb; - struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl; - struct callout inp_tp_delack; + struct callout tcpcb_mem_rexmt, tcpcb_mem_persist, tcpcb_mem_keep; + struct callout tcpcb_mem_2msl, tcpcb_mem_delack; +}; +struct tcptw_mem { + struct tcptw tw; + struct callout tcptw_mem_2msl; }; -#undef ALIGNMENT -#undef ALIGNM1 + +static uma_zone_t tcpcb_zone; +static uma_zone_t tcptw_zone; /* * Tcp initialization @@ -244,7 +242,7 @@ tcp_init() tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask); tcbinfo.porthashbase = hashinit(hashsize, M_PCB, &tcbinfo.porthashmask); - tcbinfo.ipi_zone = uma_zcreate("tcpcb", sizeof(struct inp_tp), + tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(tcbinfo.ipi_zone, maxsockets); #ifdef INET6 @@ -257,6 +255,15 @@ tcp_init() if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) panic("tcp_init"); #undef TCP_MINPROTOHDR + /* + * These have to be type stable for the benefit of the timers. + */ + tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(tcpcb_zone, maxsockets); + tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw_mem), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(tcptw_zone, maxsockets); syncache_init(); } @@ -552,16 +559,17 @@ struct tcpcb * tcp_newtcpcb(inp) struct inpcb *inp; { - struct inp_tp *it; - register struct tcpcb *tp; + struct tcpcb_mem *tm; + struct tcpcb *tp; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ - it = (struct inp_tp *)inp; - tp = &it->tcb; - bzero((char *) tp, sizeof(struct tcpcb)); - LIST_INIT(&tp->t_segq); + tm = uma_zalloc(tcpcb_zone, M_NOWAIT | M_ZERO); + if (tm == NULL) + return (NULL); + tp = &tm->tcb; + /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ tp->t_maxseg = tp->t_maxopd = #ifdef INET6 isipv6 ? tcp_v6mssdflt : @@ -569,11 +577,11 @@ tcp_newtcpcb(inp) tcp_mssdflt; /* Set up our timeouts. */ - callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0); - callout_init(tp->tt_persist = &it->inp_tp_persist, 0); - callout_init(tp->tt_keep = &it->inp_tp_keep, 0); - callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0); - callout_init(tp->tt_delack = &it->inp_tp_delack, 0); + callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, 0); + callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, 0); + callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, 0); + callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, 0); + callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, 0); if (tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); @@ -628,23 +636,17 @@ tcp_drop(tp, errno) return (tcp_close(tp)); } -/* - * Close a TCP control block: - * discard all space held by the tcp - * discard internet protocol block - * wake up any sleepers - */ -struct tcpcb * -tcp_close(tp) - register struct tcpcb *tp; +static void +tcp_discardcb(tp) + struct tcpcb *tp; { - register struct tseg_qent *q; + struct tseg_qent *q; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ - register struct rtentry *rt; + struct rtentry *rt; int dosavessthresh; /* @@ -762,20 +764,37 @@ tcp_close(tp) } no_valid_rt: /* free the reassembly queue, if any */ - while((q = LIST_FIRST(&tp->t_segq)) != NULL) { + while ((q = LIST_FIRST(&tp->t_segq)) != NULL) { LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); FREE(q, M_TSEGQ); } inp->inp_ppcb = NULL; tp->t_inpcb = NULL; + uma_zfree(tcpcb_zone, tp); soisdisconnected(so); +} + +/* + * Close a TCP control block: + * discard all space held by the tcp + * discard internet protocol block + * wake up any sleepers + */ +struct tcpcb * +tcp_close(tp) + struct tcpcb *tp; +{ + struct inpcb *inp = tp->t_inpcb; + struct socket *so = inp->inp_socket; + + tcp_discardcb(tp); #ifdef INET6 if (INP_CHECK_SOCKAF(so, AF_INET6)) in6_pcbdetach(inp); else -#endif /* INET6 */ - in_pcbdetach(inp); +#endif + in_pcbdetach(inp); tcpstat.tcps_closed++; return ((struct tcpcb *)0); } @@ -799,6 +818,8 @@ tcp_drain() */ INP_INFO_RLOCK(&tcbinfo); LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) { + if (inpb->inp_vflag & INP_TIMEWAIT) + continue; INP_LOCK(inpb); if ((tcpb = intotcpcb(inpb))) { while ((te = LIST_FIRST(&tcpb->t_segq)) @@ -908,7 +929,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) inp = LIST_NEXT(inp, inp_list)) { INP_LOCK(inp); if (inp->inp_gencnt <= gencnt && - cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) + (((inp->inp_vflag & INP_TIMEWAIT) && + cr_cansee(req->td->td_ucred, intotw(inp)->tw_cred) == 0) || + cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)) inp_list[i++] = inp; INP_UNLOCK(inp); } @@ -926,12 +949,19 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) /* XXX should avoid extra copy */ bcopy(inp, &xt.xt_inp, sizeof *inp); inp_ppcb = inp->inp_ppcb; - if (inp_ppcb != NULL) - bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); - else + if (inp_ppcb == NULL) + bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); + else if (inp->inp_vflag & INP_TIMEWAIT) { bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); + xt.xt_tp.t_state = TCPS_TIME_WAIT; + } else + bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xt.xt_socket); + else { + bzero(&xt.xt_socket, sizeof xt.xt_socket); + xt.xt_socket.xso_protocol = IPPROTO_TCP; + } xt.xt_inp.inp_gencnt = inp->inp_gencnt; error = SYSCTL_OUT(req, &xt, sizeof xt); } @@ -1487,7 +1517,7 @@ ipsec_hdrsiz_tcp(tp) struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; -#endif /* INET6 */ +#endif struct tcphdr *th; if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL)) @@ -1559,6 +1589,196 @@ tcp_cleartaocache() } /* + * Move a TCP connection into TIME_WAIT state. + * tcbinfo is unlocked. + * inp is locked, and is unlocked before returning. + */ +void +tcp_twstart(tp) + struct tcpcb *tp; +{ + struct tcptw_mem *tm; + struct tcptw *tw; + struct inpcb *inp; + int tw_time, acknow; + struct socket *so; + + tm = uma_zalloc(tcptw_zone, M_NOWAIT); + if (tm == NULL) + /* EEEK! -- preserve old structure or just kill everything? */ + /* must obtain tcbinfo lock in order to drop the structure. */ + panic("uma_zalloc(tcptw)"); + tw = &tm->tw; + inp = tp->t_inpcb; + tw->tw_inpcb = inp; + + /* + * Recover last window size sent. + */ + tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale; + + /* + * Set t_recent if timestamps are used on the connection. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == + (TF_REQ_TSTMP|TF_RCVD_TSTMP)) + tw->t_recent = tp->ts_recent; + else + tw->t_recent = 0; + + tw->snd_nxt = tp->snd_nxt; + tw->rcv_nxt = tp->rcv_nxt; + tw->cc_recv = tp->cc_recv; + tw->cc_send = tp->cc_send; + tw->t_starttime = tp->t_starttime; + callout_init(tw->tt_2msl = &tm->tcptw_mem_2msl, 0); + +/* XXX + * If this code will + * be used for fin-wait-2 state also, then we may need + * a ts_recent from the last segment. + */ + /* Shorten TIME_WAIT [RFC-1644, p.28] */ + if (tp->cc_recv != 0 && (ticks - tp->t_starttime) < tcp_msl) { + tw_time = tp->t_rxtcur * TCPTV_TWTRUNC; + /* For T/TCP client, force ACK now. */ + acknow = 1; + } else { + tw_time = 2 * tcp_msl; + acknow = tp->t_flags & TF_ACKNOW; + } + tcp_discardcb(tp); + so = inp->inp_socket; + so->so_pcb = NULL; + tw->tw_cred = crhold(so->so_cred); + tw->tw_so_options = so->so_options; + sotryfree(so); + inp->inp_socket = NULL; + inp->inp_ppcb = (caddr_t)tw; + inp->inp_vflag |= INP_TIMEWAIT; + callout_reset(tw->tt_2msl, tw_time, tcp_timer_2msl_tw, tw); + if (acknow) + tcp_twrespond(tw, TH_ACK); + INP_UNLOCK(inp); +} + +void +tcp_twclose(tw) + struct tcptw *tw; +{ + struct inpcb *inp; + + inp = tw->tw_inpcb; + tw->tw_inpcb = NULL; + callout_stop(tw->tt_2msl); + inp->inp_ppcb = NULL; + uma_zfree(tcptw_zone, tw); +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6PROTO) + in6_pcbdetach(inp); + else +#endif + in_pcbdetach(inp); + tcpstat.tcps_closed++; +} + +int +tcp_twrespond(struct tcptw *tw, int flags) +{ + struct inpcb *inp = tw->tw_inpcb; + struct tcphdr *th; + struct mbuf *m; + struct ip *ip = NULL; + u_int8_t *optp; + u_int hdrlen, optlen; + int error; +#ifdef INET6 + struct ip6_hdr *ip6 = NULL; + int isipv6 = inp->inp_inc.inc_isipv6; +#else + const int isipv6 = 0; +#endif + + m = m_gethdr(M_NOWAIT, MT_HEADER); + if (m == NULL) + return (ENOBUFS); + m->m_data += max_linkhdr; + + if (isipv6) { + hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); + ip6 = mtod(m, struct ip6_hdr *); + th = (struct tcphdr *)(ip6 + 1); + tcpip_fillheaders(inp, ip6, th); + } else { + hdrlen = sizeof(struct tcpiphdr); + ip = mtod(m, struct ip *); + th = (struct tcphdr *)(ip + 1); + tcpip_fillheaders(inp, ip, th); + } + optp = (u_int8_t *)(th + 1); + + /* + * Send a timestamp and echo-reply if both our side and our peer + * have sent timestamps in our SYN's and this is not a RST. + */ + if (tw->t_recent && flags == TH_ACK) { + u_int32_t *lp = (u_int32_t *)optp; + + /* Form timestamp option as shown in appendix A of RFC 1323. */ + *lp++ = htonl(TCPOPT_TSTAMP_HDR); + *lp++ = htonl(ticks); + *lp = htonl(tw->t_recent); + optp += TCPOLEN_TSTAMP_APPA; + } + + /* + * Send `CC-family' options if needed, and it's not a RST. + */ + if (tw->cc_recv != 0 && flags == TH_ACK) { + u_int32_t *lp = (u_int32_t *)optp; + + *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC)); + *lp = htonl(tw->cc_send); + optp += TCPOLEN_CC_APPA; + } + optlen = optp - (u_int8_t *)(th + 1); + + m->m_len = hdrlen + optlen; + m->m_pkthdr.len = m->m_len; + + KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small")); + + th->th_seq = htonl(tw->snd_nxt); + th->th_ack = htonl(tw->rcv_nxt); + th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; + th->th_flags = flags; + th->th_win = htons(tw->last_win); + + if (isipv6) { + th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), + sizeof(struct tcphdr) + optlen); + ip6->ip6_hlim = in6_selecthlim(inp, inp->in6p_route.ro_rt ? + inp->in6p_route.ro_rt->rt_ifp : NULL); + error = ip6_output(m, inp->in6p_outputopts, &inp->in6p_route, + (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp); + } else { + th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP)); + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + ip->ip_len = m->m_pkthdr.len; + error = ip_output(m, inp->inp_options, &inp->inp_route, + (tw->tw_so_options & SO_DONTROUTE), NULL, inp); + } + if (flags & TH_ACK) + tcpstat.tcps_sndacks++; + else + tcpstat.tcps_sndctrl++; + tcpstat.tcps_sndtotal++; + return (error); +} + +/* * TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING * * This code attempts to calculate the bandwidth-delay product as a diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 137b042..e4a07f8 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -235,6 +235,20 @@ struct syncache_head { TAILQ_HEAD(, syncache) sch_bucket; u_int sch_length; }; + +struct tcptw { + struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */ + tcp_seq snd_nxt; + tcp_seq rcv_nxt; + tcp_cc cc_recv; + tcp_cc cc_send; + u_short last_win; /* cached window value */ + u_short tw_so_options; /* copy of so_options */ + struct ucred *tw_cred; /* user credentials */ + u_long t_recent; + u_long t_starttime; + struct callout *tt_2msl; /* 2*msl TIME_WAIT timer */ +}; /* * The TAO cache entry which is stored in the protocol family specific @@ -254,6 +268,7 @@ struct rmxp_tao { #define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler) #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) +#define intotw(ip) ((struct tcptw *)(ip)->inp_ppcb) #define sototcpcb(so) (intotcpcb(sotoinpcb(so))) /* @@ -448,6 +463,8 @@ extern int ss_fltsz_local; void tcp_canceltimers(struct tcpcb *); struct tcpcb * tcp_close(struct tcpcb *); +void tcp_twstart(struct tcpcb *); +void tcp_twclose(struct tcptw *); void tcp_ctlinput(int, struct sockaddr *, void *); int tcp_ctloutput(struct socket *, struct sockopt *); struct tcpcb * @@ -471,6 +488,7 @@ struct inpcb * tcp_quench(struct inpcb *, int); void tcp_respond(struct tcpcb *, void *, struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int); +int tcp_twrespond(struct tcptw *, int); struct rtentry * tcp_rtlookup(struct in_conninfo *); void tcp_setpersist(struct tcpcb *); diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 963cd9a..b328d32 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -614,9 +614,10 @@ in6_pcbdetach(inp) #endif /* IPSEC */ inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); - sotoinpcb(so) = 0; - sotryfree(so); - + if (so) { + so->so_pcb = NULL; + sotryfree(so); + } if (inp->in6p_options) m_freem(inp->in6p_options); ip6_freepcbopts(inp->in6p_outputopts); @@ -627,7 +628,6 @@ in6_pcbdetach(inp) if (inp->inp_options) (void)m_free(inp->inp_options); ip_freemoptions(inp->inp_moptions); - inp->inp_vflag = 0; INP_LOCK_DESTROY(inp); uma_zfree(ipi->ipi_zone, inp); diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 64b11aa..2b22088 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -218,10 +218,10 @@ ip6_output(m0, opt, ro, flags, im6o, ifpp, inp) #ifdef IPSEC /* get a security policy for this packet */ - if (so == NULL) + if (inp == NULL) sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error); else - sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); + sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error); if (sp == NULL) { ipsec6stat.out_inval++; diff --git a/sys/netinet6/ipsec.c b/sys/netinet6/ipsec.c index f898c0a..500f80b 100644 --- a/sys/netinet6/ipsec.c +++ b/sys/netinet6/ipsec.c @@ -235,10 +235,10 @@ static int ipsec6_encapsulate __P((struct mbuf *, struct secasvar *)); * NOTE: IPv6 mapped adddress concern is implemented here. */ struct secpolicy * -ipsec4_getpolicybysock(m, dir, so, error) +ipsec4_getpolicybypcb(m, dir, inp, error) struct mbuf *m; u_int dir; - struct socket *so; + struct inpcb *inp; int *error; { struct inpcbpolicy *pcbsp = NULL; @@ -246,35 +246,19 @@ ipsec4_getpolicybysock(m, dir, so, error) struct secpolicy *kernsp = NULL; /* policy on kernel */ /* sanity check */ - if (m == NULL || so == NULL || error == NULL) + if (m == NULL || inp == NULL || error == NULL) panic("ipsec4_getpolicybysock: NULL pointer was passed.\n"); - switch (so->so_proto->pr_domain->dom_family) { - case AF_INET: - /* set spidx in pcb */ - *error = ipsec4_setspidx_inpcb(m, sotoinpcb(so)); - break; + /* set spidx in pcb */ #ifdef INET6 - case AF_INET6: - /* set spidx in pcb */ - *error = ipsec6_setspidx_in6pcb(m, sotoin6pcb(so)); - break; + if (inp->inp_vflag & INP_IPV6PROTO) + *error = ipsec6_setspidx_in6pcb(m, inp); + else #endif - default: - panic("ipsec4_getpolicybysock: unsupported address family\n"); - } + *error = ipsec4_setspidx_inpcb(m, inp); if (*error) return NULL; - switch (so->so_proto->pr_domain->dom_family) { - case AF_INET: - pcbsp = sotoinpcb(so)->inp_sp; - break; -#ifdef INET6 - case AF_INET6: - pcbsp = sotoin6pcb(so)->in6p_sp; - break; -#endif - } + pcbsp = inp->inp_sp; /* sanity check */ if (pcbsp == NULL) @@ -390,6 +374,19 @@ ipsec4_getpolicybysock(m, dir, so, error) /* NOTREACHED */ } +struct secpolicy * +ipsec4_getpolicybysock(m, dir, so, error) + struct mbuf *m; + u_int dir; + struct socket *so; + int *error; +{ + + if (so == NULL) + panic("ipsec4_getpolicybysock: NULL pointer was passed.\n"); + return (ipsec4_getpolicybypcb(m, dir, sotoinpcb(so), error)); +} + /* * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet, * and return a pointer to SP. @@ -462,10 +459,10 @@ ipsec4_getpolicybyaddr(m, dir, flag, error) * others: a pointer to SP */ struct secpolicy * -ipsec6_getpolicybysock(m, dir, so, error) +ipsec6_getpolicybypcb(m, dir, inp, error) struct mbuf *m; u_int dir; - struct socket *so; + struct inpcb *inp; int *error; { struct inpcbpolicy *pcbsp = NULL; @@ -473,18 +470,17 @@ ipsec6_getpolicybysock(m, dir, so, error) struct secpolicy *kernsp = NULL; /* policy on kernel */ /* sanity check */ - if (m == NULL || so == NULL || error == NULL) + if (m == NULL || inp == NULL || error == NULL) panic("ipsec6_getpolicybysock: NULL pointer was passed.\n"); #ifdef DIAGNOSTIC - if (so->so_proto->pr_domain->dom_family != AF_INET6) + if ((inp->inp_vflag & INP_IPV6PROTO) == 0) panic("ipsec6_getpolicybysock: socket domain != inet6\n"); #endif /* set spidx in pcb */ - ipsec6_setspidx_in6pcb(m, sotoin6pcb(so)); - - pcbsp = sotoin6pcb(so)->in6p_sp; + ipsec6_setspidx_in6pcb(m, inp); + pcbsp = inp->in6p_sp; /* sanity check */ if (pcbsp == NULL) @@ -601,6 +597,19 @@ ipsec6_getpolicybysock(m, dir, so, error) /* NOTREACHED */ } +struct secpolicy * +ipsec6_getpolicybysock(m, dir, so, error) + struct mbuf *m; + u_int dir; + struct socket *so; + int *error; +{ + + if (so == NULL) + panic("ipsec6_getpolicybysock: NULL pointer was passed.\n"); + return (ipsec6_getpolicybypcb(m, dir, sotoin6pcb(so), error)); +} + /* * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet, * and return a pointer to SP. @@ -1690,9 +1699,9 @@ ipsec_in_reject(sp, m) * and {ah,esp}4_input for tunnel mode */ int -ipsec4_in_reject_so(m, so) +ipsec4_in_reject(m, inp) struct mbuf *m; - struct socket *so; + struct inpcb *inp; { struct secpolicy *sp = NULL; int error; @@ -1706,10 +1715,10 @@ ipsec4_in_reject_so(m, so) * When we are called from ip_forward(), we call * ipsec4_getpolicybyaddr() with IP_FORWARDING flag. */ - if (so == NULL) + if (inp == NULL) sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error); else - sp = ipsec4_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error); + sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_INBOUND, inp, &error); if (sp == NULL) return 0; /* XXX should be panic ? @@ -1724,18 +1733,16 @@ ipsec4_in_reject_so(m, so) } int -ipsec4_in_reject(m, inp) +ipsec4_in_reject_so(m, so) struct mbuf *m; - struct inpcb *inp; + struct socket *so; { - if (inp == NULL) - return ipsec4_in_reject_so(m, NULL); - if (inp->inp_socket) - return ipsec4_in_reject_so(m, inp->inp_socket); - else - panic("ipsec4_in_reject: invalid inpcb/socket"); + if (so == NULL) + return ipsec4_in_reject(m, NULL); + return ipsec4_in_reject(m, sotoinpcb(so)); } + #ifdef INET6 /* * Check AH/ESP integrity. @@ -1743,9 +1750,9 @@ ipsec4_in_reject(m, inp) * and {ah,esp}6_input for tunnel mode */ int -ipsec6_in_reject_so(m, so) +ipsec6_in_reject(m, in6p) struct mbuf *m; - struct socket *so; + struct in6pcb *in6p; { struct secpolicy *sp = NULL; int error; @@ -1759,33 +1766,30 @@ ipsec6_in_reject_so(m, so) * When we are called from ip_forward(), we call * ipsec6_getpolicybyaddr() with IP_FORWARDING flag. */ - if (so == NULL) + if (in6p == NULL) sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error); else - sp = ipsec6_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error); + sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_INBOUND, in6p, &error); if (sp == NULL) return 0; /* XXX should be panic ? */ result = ipsec_in_reject(sp, m); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec6_in_reject_so call free SP:%p\n", sp)); + printf("DP ipsec6_in_reject call free SP:%p\n", sp)); key_freesp(sp); return result; } int -ipsec6_in_reject(m, in6p) +ipsec6_in_reject_so(m, so) struct mbuf *m; - struct in6pcb *in6p; + struct socket *so; { - if (in6p == NULL) - return ipsec6_in_reject_so(m, NULL); - if (in6p->in6p_socket) - return ipsec6_in_reject_so(m, in6p->in6p_socket); - else - panic("ipsec6_in_reject: invalid in6p/socket"); + if (so == NULL) + return ipsec6_in_reject(m, NULL); + return ipsec6_in_reject(m, sotoin6pcb(so)); } #endif @@ -1889,7 +1893,7 @@ ipsec4_hdrsiz(m, dir, inp) if (inp == NULL) sp = ipsec4_getpolicybyaddr(m, dir, IP_FORWARDING, &error); else - sp = ipsec4_getpolicybysock(m, dir, inp->inp_socket, &error); + sp = ipsec4_getpolicybypcb(m, dir, inp, &error); if (sp == NULL) return 0; /* XXX should be panic ? */ @@ -1921,15 +1925,18 @@ ipsec6_hdrsiz(m, dir, in6p) /* sanity check */ if (m == NULL) return 0; /* XXX shoud be panic ? */ +#if 0 + /* this is possible in TIME_WAIT state */ if (in6p != NULL && in6p->in6p_socket == NULL) panic("ipsec6_hdrsize: why is socket NULL but there is PCB."); +#endif /* get SP for this packet */ /* XXX Is it right to call with IP_FORWARDING. */ if (in6p == NULL) sp = ipsec6_getpolicybyaddr(m, dir, IP_FORWARDING, &error); else - sp = ipsec6_getpolicybysock(m, dir, in6p->in6p_socket, &error); + sp = ipsec6_getpolicybypcb(m, dir, in6p, &error); if (sp == NULL) return 0; diff --git a/sys/netinet6/ipsec.h b/sys/netinet6/ipsec.h index 76790b8..40f5f56 100644 --- a/sys/netinet6/ipsec.h +++ b/sys/netinet6/ipsec.h @@ -297,12 +297,14 @@ extern int ip4_esp_randpad; #define ipseclog(x) do { if (ipsec_debug) log x; } while (0) +struct inpcb; +extern struct secpolicy *ipsec4_getpolicybypcb + __P((struct mbuf *, u_int, struct inpcb *, int *)); extern struct secpolicy *ipsec4_getpolicybysock __P((struct mbuf *, u_int, struct socket *, int *)); extern struct secpolicy *ipsec4_getpolicybyaddr __P((struct mbuf *, u_int, int, int *)); -struct inpcb; extern int ipsec_init_policy __P((struct socket *so, struct inpcbpolicy **)); extern int ipsec_copy_policy __P((struct inpcbpolicy *, struct inpcbpolicy *)); diff --git a/sys/netinet6/ipsec6.h b/sys/netinet6/ipsec6.h index e9b8a2c..1811088 100644 --- a/sys/netinet6/ipsec6.h +++ b/sys/netinet6/ipsec6.h @@ -50,13 +50,14 @@ extern int ip6_ah_net_deflev; extern int ip6_ipsec_ecn; extern int ip6_esp_randpad; +struct inpcb; +extern struct secpolicy *ipsec6_getpolicybypcb + __P((struct mbuf *, u_int, struct inpcb *, int *)); extern struct secpolicy *ipsec6_getpolicybysock __P((struct mbuf *, u_int, struct socket *, int *)); extern struct secpolicy *ipsec6_getpolicybyaddr __P((struct mbuf *, u_int, int, int *)); -struct inpcb; - extern int ipsec6_in_reject_so __P((struct mbuf *, struct socket *)); extern int ipsec6_delete_pcbpolicy __P((struct inpcb *)); extern int ipsec6_set_policy __P((struct inpcb *inp, int optname, diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 540533b..728da31 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -180,7 +180,7 @@ rip6_input(mp, offp, proto) /* * Check AH/ESP integrity. */ - if (n && ipsec6_in_reject_so(n, last->inp_socket)) { + if (n && ipsec6_in_reject(n, last)) { m_freem(n); ipsec6stat.in_polvio++; /* do not inject data into pcb */ @@ -219,7 +219,7 @@ rip6_input(mp, offp, proto) /* * Check AH/ESP integrity. */ - if (last && ipsec6_in_reject_so(m, last->inp_socket)) { + if (last && ipsec6_in_reject(m, last)) { m_freem(m); ipsec6stat.in_polvio++; ip6stat.ip6s_delivered--; diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index b5a4033..fd642cb 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -261,7 +261,7 @@ udp6_input(mp, offp, proto) /* * Check AH/ESP integrity. */ - if (ipsec6_in_reject_so(m, last->inp_socket)) + if (ipsec6_in_reject(m, last)) ipsec6stat.in_polvio++; /* do not inject data into pcb */ else @@ -328,7 +328,7 @@ udp6_input(mp, offp, proto) /* * Check AH/ESP integrity. */ - if (ipsec6_in_reject_so(m, last->inp_socket)) { + if (ipsec6_in_reject(m, last)) { ipsec6stat.in_polvio++; goto bad; } @@ -384,7 +384,7 @@ udp6_input(mp, offp, proto) /* * Check AH/ESP integrity. */ - if (ipsec6_in_reject_so(m, in6p->in6p_socket)) { + if (ipsec6_in_reject(m, in6p)) { ipsec6stat.in_polvio++; goto bad; } |