summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/netinet/in_pcb.c29
-rw-r--r--sys/netinet/in_pcb.h2
-rw-r--r--sys/netinet/in_proto.c1
-rw-r--r--sys/netinet/ip_output.c4
-rw-r--r--sys/netinet/tcp_input.c216
-rw-r--r--sys/netinet/tcp_reass.c216
-rw-r--r--sys/netinet/tcp_subr.c316
-rw-r--r--sys/netinet/tcp_timer.c27
-rw-r--r--sys/netinet/tcp_timer.h1
-rw-r--r--sys/netinet/tcp_timewait.c316
-rw-r--r--sys/netinet/tcp_var.h18
-rw-r--r--sys/netinet6/in6_pcb.c8
-rw-r--r--sys/netinet6/ip6_output.c4
-rw-r--r--sys/netinet6/ipsec.c125
-rw-r--r--sys/netinet6/ipsec.h4
-rw-r--r--sys/netinet6/ipsec6.h5
-rw-r--r--sys/netinet6/raw_ip6.c4
-rw-r--r--sys/netinet6/udp6_usrreq.c6
18 files changed, 1067 insertions, 235 deletions
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 7f83917..46f824d 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -62,6 +62,7 @@
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -169,8 +170,11 @@ in_pcballoc(so, pcbinfo, td)
}
#endif /*IPSEC*/
#if defined(INET6)
- if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only)
- inp->inp_flags |= IN6P_IPV6_V6ONLY;
+ if (INP_SOCKAF(so) == AF_INET6) {
+ inp->inp_vflag |= INP_IPV6PROTO;
+ if (ip6_v6only)
+ inp->inp_flags |= IN6P_IPV6_V6ONLY;
+ }
#endif
LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
pcbinfo->ipi_count++;
@@ -294,6 +298,17 @@ in_pcbbind_setup(inp, nam, laddrp, lportp, td)
t = in_pcblookup_local(inp->inp_pcbinfo,
sin->sin_addr, lport,
prison ? 0 : INPLOOKUP_WILDCARD);
+ /*
+ * XXX
+ * This entire block sorely needs a rewrite.
+ */
+ if (t && (t->inp_vflag & INP_TIMEWAIT)) {
+ if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
+ ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
+ (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) &&
+ (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid))
+ return (EADDRINUSE);
+ } else
if (t &&
(ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
@@ -317,6 +332,10 @@ in_pcbbind_setup(inp, nam, laddrp, lportp, td)
return (EADDRNOTAVAIL);
t = in_pcblookup_local(pcbinfo, sin->sin_addr,
lport, prison ? 0 : wild);
+ if (t && (t->inp_vflag & INP_TIMEWAIT)) {
+ if ((reuseport & intotw(t)->tw_so_options) == 0)
+ return (EADDRINUSE);
+ } else
if (t &&
(reuseport & t->inp_socket->so_options) == 0) {
#if defined(INET6)
@@ -640,8 +659,10 @@ in_pcbdetach(inp)
#endif /*IPSEC*/
inp->inp_gencnt = ++ipi->ipi_gencnt;
in_pcbremlists(inp);
- so->so_pcb = 0;
- sotryfree(so);
+ if (so) {
+ so->so_pcb = 0;
+ sotryfree(so);
+ }
if (inp->inp_options)
(void)m_free(inp->inp_options);
if (inp->inp_route.ro_rt)
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 018d7b4..efb7862 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -140,6 +140,8 @@ struct inpcb {
u_char inp_vflag; /* IP version flag (v4/v6) */
#define INP_IPV4 0x1
#define INP_IPV6 0x2
+#define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */
+#define INP_TIMEWAIT 0x8 /* .. probably doesn't go here */
u_char inp_ip_ttl; /* time to live proto */
u_char inp_ip_p; /* protocol proto */
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index 863b048..a2a528f 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -40,6 +40,7 @@
#include "opt_inet6.h"
#include <sys/param.h>
+#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/domain.h>
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 64e58e2..c6cefc4 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -470,10 +470,10 @@ ip_output(m0, opt, ro, flags, imo, inp)
sendit:
#ifdef IPSEC
/* get SP for this packet */
- if (so == NULL)
+ if (inp == NULL)
sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
else
- sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
+ sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
if (sp == NULL) {
ipsecstat.out_inval++;
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index ad20203..52dba70 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -137,6 +137,8 @@ static int tcp_reass(struct tcpcb *, struct tcphdr *, int *,
struct mbuf *);
static void tcp_xmit_timer(struct tcpcb *, int);
static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
+static int tcp_timewait(struct tcptw *, struct tcpopt *,
+ struct tcphdr *, struct mbuf *, int);
/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
#ifdef INET6
@@ -636,6 +638,22 @@ findpcb:
goto dropwithreset;
}
INP_LOCK(inp);
+ if (inp->inp_vflag & INP_TIMEWAIT) {
+ /*
+ * The only option of relevance is TOF_CC, and only if
+ * present in a SYN segment. See tcp_timewait().
+ */
+ if (thflags & TH_SYN)
+ tcp_dooptions(&to, optp, optlen, 1);
+ if (tcp_timewait((struct tcptw *)inp->inp_ppcb,
+ &to, th, m, tlen))
+ goto findpcb;
+ /*
+ * tcp_timewait unlocks inp.
+ */
+ INP_INFO_WUNLOCK(&tcbinfo);
+ return;
+ }
tp = intotcpcb(inp);
if (tp == 0) {
INP_UNLOCK(inp);
@@ -1319,6 +1337,7 @@ trimthenstep6:
case TCPS_LAST_ACK:
case TCPS_CLOSING:
case TCPS_TIME_WAIT:
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
if ((thflags & TH_SYN) &&
(to.to_flags & TOF_CC) && tp->cc_recv != 0) {
if (tp->t_state == TCPS_TIME_WAIT &&
@@ -1418,6 +1437,8 @@ trimthenstep6:
break;
case TCPS_TIME_WAIT:
+ KASSERT(tp->t_state != TCPS_TIME_WAIT,
+ ("timewait"));
break;
}
}
@@ -1550,6 +1571,7 @@ trimthenstep6:
* and start over if the sequence numbers
* are above the previous ones.
*/
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
if (thflags & TH_SYN &&
tp->t_state == TCPS_TIME_WAIT &&
SEQ_GT(th->th_seq, tp->rcv_nxt)) {
@@ -1678,7 +1700,7 @@ trimthenstep6:
case TCPS_CLOSING:
case TCPS_LAST_ACK:
case TCPS_TIME_WAIT:
-
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
if (tlen == 0 && tiwin == tp->snd_wnd) {
tcpstat.tcps_rcvdupack++;
@@ -1921,6 +1943,10 @@ process_ACK:
* specification, but if we don't get a FIN
* we'll hang forever.
*/
+ /* XXXjl
+ * we should release the tp also, and use a
+ * compressed state.
+ */
if (so->so_state & SS_CANTRCVMORE) {
soisdisconnected(so);
callout_reset(tp->tt_2msl, tcp_maxidle,
@@ -1938,19 +1964,11 @@ process_ACK:
*/
case TCPS_CLOSING:
if (ourfinisacked) {
- tp->t_state = TCPS_TIME_WAIT;
- tcp_canceltimers(tp);
- /* Shorten TIME_WAIT [RFC-1644, p.28] */
- if (tp->cc_recv != 0 &&
- (ticks - tp->t_starttime) < tcp_msl)
- callout_reset(tp->tt_2msl,
- tp->t_rxtcur *
- TCPTV_TWTRUNC,
- tcp_timer_2msl, tp);
- else
- callout_reset(tp->tt_2msl, 2 * tcp_msl,
- tcp_timer_2msl, tp);
- soisdisconnected(so);
+ KASSERT(headlocked, ("headlocked"));
+ INP_INFO_WUNLOCK(&tcbinfo);
+ m_freem(m);
+ tcp_twstart(tp);
+ return;
}
break;
@@ -1973,6 +1991,7 @@ process_ACK:
* it and restart the finack timer.
*/
case TCPS_TIME_WAIT:
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
callout_reset(tp->tt_2msl, 2 * tcp_msl,
tcp_timer_2msl, tp);
goto dropafterack;
@@ -2166,27 +2185,15 @@ dodata: /* XXX */
* standard timers.
*/
case TCPS_FIN_WAIT_2:
- tp->t_state = TCPS_TIME_WAIT;
- tcp_canceltimers(tp);
- /* Shorten TIME_WAIT [RFC-1644, p.28] */
- if (tp->cc_recv != 0 &&
- (ticks - tp->t_starttime) < tcp_msl) {
- callout_reset(tp->tt_2msl,
- tp->t_rxtcur * TCPTV_TWTRUNC,
- tcp_timer_2msl, tp);
- /* For transaction client, force ACK now. */
- tp->t_flags |= TF_ACKNOW;
- }
- else
- callout_reset(tp->tt_2msl, 2 * tcp_msl,
- tcp_timer_2msl, tp);
- soisdisconnected(so);
- break;
+ KASSERT(headlocked == 0, ("headlocked"));
+ tcp_twstart(tp);
+ return;
/*
* In TIME_WAIT state restart the 2 MSL time_wait timer.
*/
case TCPS_TIME_WAIT:
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
callout_reset(tp->tt_2msl, 2 * tcp_msl,
tcp_timer_2msl, tp);
break;
@@ -2802,3 +2809,152 @@ tcp_newreno_partial_ack(tp, th)
*/
tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
}
+
+/*
+ * Returns 1 if the TIME_WAIT state was killed and we should start over,
+ * looking for a pcb in the listen state. Returns 0 otherwise.
+ */
+static int
+tcp_timewait(tw, to, th, m, tlen)
+ struct tcptw *tw;
+ struct tcpopt *to;
+ struct tcphdr *th;
+ struct mbuf *m;
+ int tlen;
+{
+ int thflags;
+ tcp_seq seq;
+#ifdef INET6
+ int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
+#else
+ const int isipv6 = 0;
+#endif
+
+ thflags = th->th_flags;
+
+ /*
+ * NOTE: for FIN_WAIT_2 (to be added later),
+ * must validate sequence number before accepting RST
+ */
+
+ /*
+ * If the segment contains RST:
+ * Drop the segment - see Stevens, vol. 2, p. 964 and
+ * RFC 1337.
+ */
+ if (thflags & TH_RST)
+ goto drop;
+
+ /*
+ * If segment contains a SYN and CC [not CC.NEW] option:
+ * if connection duration > MSL, drop packet and send RST;
+ *
+ * if SEG.CC > CCrecv then is new SYN.
+ * Complete close and delete TCPCB. Then reprocess
+ * segment, hoping to find new TCPCB in LISTEN state;
+ *
+ * else must be old SYN; drop it.
+ * else do normal processing.
+ */
+ if ((thflags & TH_SYN) && (to->to_flags & TOF_CC) && tw->cc_recv != 0) {
+ if ((ticks - tw->t_starttime) > tcp_msl)
+ goto reset;
+ if (CC_GT(to->to_cc, tw->cc_recv)) {
+ tcp_twclose(tw);
+ return (1);
+ }
+ goto drop;
+ }
+
+#if 0
+/* PAWS not needed at the moment */
+ /*
+ * RFC 1323 PAWS: If we have a timestamp reply on this segment
+ * and it's less than ts_recent, drop it.
+ */
+ if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
+ TSTMP_LT(to.to_tsval, tp->ts_recent)) {
+ if ((thflags & TH_ACK) == 0)
+ goto drop;
+ goto ack;
+ }
+ /*
+ * ts_recent is never updated because we never accept new segments.
+ */
+#endif
+
+ /*
+ * If a new connection request is received
+ * while in TIME_WAIT, drop the old connection
+ * and start over if the sequence numbers
+ * are above the previous ones.
+ */
+ if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
+ tcp_twclose(tw);
+ return (1);
+ }
+
+ /*
+ * Drop the the segment if it does not contain an ACK.
+ */
+ if ((thflags & TH_ACK) == 0)
+ goto drop;
+
+ /*
+ * Reset the 2MSL timer if this is a duplicate FIN.
+ */
+ if (thflags & TH_FIN) {
+ seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
+ if (seq + 1 == tw->rcv_nxt)
+ callout_reset(tw->tt_2msl,
+ 2 * tcp_msl, tcp_timer_2msl, tw);
+ }
+
+ /*
+ * Acknowlege the segment, then drop it.
+ */
+ tcp_twrespond(tw, TH_ACK);
+ goto drop;
+
+reset:
+ /*
+ * Generate a RST, dropping incoming segment.
+ * Make ACK acceptable to originator of segment.
+ * Don't bother to respond if destination was broadcast/multicast.
+ */
+ if (m->m_flags & (M_BCAST|M_MCAST))
+ goto drop;
+ if (isipv6) {
+ struct ip6_hdr *ip6;
+
+ /* IPv6 anycast check is done at tcp6_input() */
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+ IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
+ goto drop;
+ } else {
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+ IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+ ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
+ in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
+ goto drop;
+ }
+ if (thflags & TH_ACK) {
+ tcp_respond(NULL,
+ mtod(m, void *), th, m, 0, th->th_ack, TH_RST);
+ } else {
+ seq = th->th_seq + (thflags & TH_SYN ? 1 : 0);
+ tcp_respond(NULL,
+ mtod(m, void *), th, m, seq, 0, TH_RST|TH_ACK);
+ }
+ INP_UNLOCK(tw->tw_inpcb);
+ return (0);
+
+drop:
+ INP_UNLOCK(tw->tw_inpcb);
+ m_freem(m);
+ return (0);
+}
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index ad20203..52dba70 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -137,6 +137,8 @@ static int tcp_reass(struct tcpcb *, struct tcphdr *, int *,
struct mbuf *);
static void tcp_xmit_timer(struct tcpcb *, int);
static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
+static int tcp_timewait(struct tcptw *, struct tcpopt *,
+ struct tcphdr *, struct mbuf *, int);
/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
#ifdef INET6
@@ -636,6 +638,22 @@ findpcb:
goto dropwithreset;
}
INP_LOCK(inp);
+ if (inp->inp_vflag & INP_TIMEWAIT) {
+ /*
+ * The only option of relevance is TOF_CC, and only if
+ * present in a SYN segment. See tcp_timewait().
+ */
+ if (thflags & TH_SYN)
+ tcp_dooptions(&to, optp, optlen, 1);
+ if (tcp_timewait((struct tcptw *)inp->inp_ppcb,
+ &to, th, m, tlen))
+ goto findpcb;
+ /*
+ * tcp_timewait unlocks inp.
+ */
+ INP_INFO_WUNLOCK(&tcbinfo);
+ return;
+ }
tp = intotcpcb(inp);
if (tp == 0) {
INP_UNLOCK(inp);
@@ -1319,6 +1337,7 @@ trimthenstep6:
case TCPS_LAST_ACK:
case TCPS_CLOSING:
case TCPS_TIME_WAIT:
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
if ((thflags & TH_SYN) &&
(to.to_flags & TOF_CC) && tp->cc_recv != 0) {
if (tp->t_state == TCPS_TIME_WAIT &&
@@ -1418,6 +1437,8 @@ trimthenstep6:
break;
case TCPS_TIME_WAIT:
+ KASSERT(tp->t_state != TCPS_TIME_WAIT,
+ ("timewait"));
break;
}
}
@@ -1550,6 +1571,7 @@ trimthenstep6:
* and start over if the sequence numbers
* are above the previous ones.
*/
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
if (thflags & TH_SYN &&
tp->t_state == TCPS_TIME_WAIT &&
SEQ_GT(th->th_seq, tp->rcv_nxt)) {
@@ -1678,7 +1700,7 @@ trimthenstep6:
case TCPS_CLOSING:
case TCPS_LAST_ACK:
case TCPS_TIME_WAIT:
-
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
if (tlen == 0 && tiwin == tp->snd_wnd) {
tcpstat.tcps_rcvdupack++;
@@ -1921,6 +1943,10 @@ process_ACK:
* specification, but if we don't get a FIN
* we'll hang forever.
*/
+ /* XXXjl
+ * we should release the tp also, and use a
+ * compressed state.
+ */
if (so->so_state & SS_CANTRCVMORE) {
soisdisconnected(so);
callout_reset(tp->tt_2msl, tcp_maxidle,
@@ -1938,19 +1964,11 @@ process_ACK:
*/
case TCPS_CLOSING:
if (ourfinisacked) {
- tp->t_state = TCPS_TIME_WAIT;
- tcp_canceltimers(tp);
- /* Shorten TIME_WAIT [RFC-1644, p.28] */
- if (tp->cc_recv != 0 &&
- (ticks - tp->t_starttime) < tcp_msl)
- callout_reset(tp->tt_2msl,
- tp->t_rxtcur *
- TCPTV_TWTRUNC,
- tcp_timer_2msl, tp);
- else
- callout_reset(tp->tt_2msl, 2 * tcp_msl,
- tcp_timer_2msl, tp);
- soisdisconnected(so);
+ KASSERT(headlocked, ("headlocked"));
+ INP_INFO_WUNLOCK(&tcbinfo);
+ m_freem(m);
+ tcp_twstart(tp);
+ return;
}
break;
@@ -1973,6 +1991,7 @@ process_ACK:
* it and restart the finack timer.
*/
case TCPS_TIME_WAIT:
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
callout_reset(tp->tt_2msl, 2 * tcp_msl,
tcp_timer_2msl, tp);
goto dropafterack;
@@ -2166,27 +2185,15 @@ dodata: /* XXX */
* standard timers.
*/
case TCPS_FIN_WAIT_2:
- tp->t_state = TCPS_TIME_WAIT;
- tcp_canceltimers(tp);
- /* Shorten TIME_WAIT [RFC-1644, p.28] */
- if (tp->cc_recv != 0 &&
- (ticks - tp->t_starttime) < tcp_msl) {
- callout_reset(tp->tt_2msl,
- tp->t_rxtcur * TCPTV_TWTRUNC,
- tcp_timer_2msl, tp);
- /* For transaction client, force ACK now. */
- tp->t_flags |= TF_ACKNOW;
- }
- else
- callout_reset(tp->tt_2msl, 2 * tcp_msl,
- tcp_timer_2msl, tp);
- soisdisconnected(so);
- break;
+ KASSERT(headlocked == 0, ("headlocked"));
+ tcp_twstart(tp);
+ return;
/*
* In TIME_WAIT state restart the 2 MSL time_wait timer.
*/
case TCPS_TIME_WAIT:
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
callout_reset(tp->tt_2msl, 2 * tcp_msl,
tcp_timer_2msl, tp);
break;
@@ -2802,3 +2809,152 @@ tcp_newreno_partial_ack(tp, th)
*/
tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
}
+
+/*
+ * Returns 1 if the TIME_WAIT state was killed and we should start over,
+ * looking for a pcb in the listen state. Returns 0 otherwise.
+ */
+static int
+tcp_timewait(tw, to, th, m, tlen)
+ struct tcptw *tw;
+ struct tcpopt *to;
+ struct tcphdr *th;
+ struct mbuf *m;
+ int tlen;
+{
+ int thflags;
+ tcp_seq seq;
+#ifdef INET6
+ int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
+#else
+ const int isipv6 = 0;
+#endif
+
+ thflags = th->th_flags;
+
+ /*
+ * NOTE: for FIN_WAIT_2 (to be added later),
+ * must validate sequence number before accepting RST
+ */
+
+ /*
+ * If the segment contains RST:
+ * Drop the segment - see Stevens, vol. 2, p. 964 and
+ * RFC 1337.
+ */
+ if (thflags & TH_RST)
+ goto drop;
+
+ /*
+ * If segment contains a SYN and CC [not CC.NEW] option:
+ * if connection duration > MSL, drop packet and send RST;
+ *
+ * if SEG.CC > CCrecv then is new SYN.
+ * Complete close and delete TCPCB. Then reprocess
+ * segment, hoping to find new TCPCB in LISTEN state;
+ *
+ * else must be old SYN; drop it.
+ * else do normal processing.
+ */
+ if ((thflags & TH_SYN) && (to->to_flags & TOF_CC) && tw->cc_recv != 0) {
+ if ((ticks - tw->t_starttime) > tcp_msl)
+ goto reset;
+ if (CC_GT(to->to_cc, tw->cc_recv)) {
+ tcp_twclose(tw);
+ return (1);
+ }
+ goto drop;
+ }
+
+#if 0
+/* PAWS not needed at the moment */
+ /*
+ * RFC 1323 PAWS: If we have a timestamp reply on this segment
+ * and it's less than ts_recent, drop it.
+ */
+ if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
+ TSTMP_LT(to.to_tsval, tp->ts_recent)) {
+ if ((thflags & TH_ACK) == 0)
+ goto drop;
+ goto ack;
+ }
+ /*
+ * ts_recent is never updated because we never accept new segments.
+ */
+#endif
+
+ /*
+ * If a new connection request is received
+ * while in TIME_WAIT, drop the old connection
+ * and start over if the sequence numbers
+ * are above the previous ones.
+ */
+ if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
+ tcp_twclose(tw);
+ return (1);
+ }
+
+ /*
+ * Drop the the segment if it does not contain an ACK.
+ */
+ if ((thflags & TH_ACK) == 0)
+ goto drop;
+
+ /*
+ * Reset the 2MSL timer if this is a duplicate FIN.
+ */
+ if (thflags & TH_FIN) {
+ seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
+ if (seq + 1 == tw->rcv_nxt)
+ callout_reset(tw->tt_2msl,
+ 2 * tcp_msl, tcp_timer_2msl, tw);
+ }
+
+ /*
+ * Acknowlege the segment, then drop it.
+ */
+ tcp_twrespond(tw, TH_ACK);
+ goto drop;
+
+reset:
+ /*
+ * Generate a RST, dropping incoming segment.
+ * Make ACK acceptable to originator of segment.
+ * Don't bother to respond if destination was broadcast/multicast.
+ */
+ if (m->m_flags & (M_BCAST|M_MCAST))
+ goto drop;
+ if (isipv6) {
+ struct ip6_hdr *ip6;
+
+ /* IPv6 anycast check is done at tcp6_input() */
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+ IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
+ goto drop;
+ } else {
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+ IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+ ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
+ in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
+ goto drop;
+ }
+ if (thflags & TH_ACK) {
+ tcp_respond(NULL,
+ mtod(m, void *), th, m, 0, th->th_ack, TH_RST);
+ } else {
+ seq = th->th_seq + (thflags & TH_SYN ? 1 : 0);
+ tcp_respond(NULL,
+ mtod(m, void *), th, m, seq, 0, TH_RST|TH_ACK);
+ }
+ INP_UNLOCK(tw->tw_inpcb);
+ return (0);
+
+drop:
+ INP_UNLOCK(tw->tw_inpcb);
+ m_freem(m);
+ return (0);
+}
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 3c43acb..dc7730f 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -179,6 +179,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW,
static void tcp_cleartaocache(void);
static struct inpcb *tcp_notify(struct inpcb *, int);
+static void tcp_discardcb(struct tcpcb *);
/*
* Target size of TCP PCB hash tables. Must be a power of two.
@@ -191,26 +192,23 @@ static struct inpcb *tcp_notify(struct inpcb *, int);
#endif
/*
- * This is the actual shape of what we allocate using the zone
- * allocator. Doing it this way allows us to protect both structures
- * using the same generation count, and also eliminates the overhead
- * of allocating tcpcbs separately. By hiding the structure here,
- * we avoid changing most of the rest of the code (although it needs
- * to be changed, eventually, for greater efficiency).
+ * XXX
+ * Callouts should be moved into struct tcp directly. They are currently
+ * separate becuase the tcpcb structure is exported to userland for sysctl
+ * parsing purposes, which do not know about callouts.
*/
-#define ALIGNMENT 32
-#define ALIGNM1 (ALIGNMENT - 1)
-struct inp_tp {
- union {
- struct inpcb inp;
- char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
- } inp_tp_u;
+struct tcpcb_mem {
struct tcpcb tcb;
- struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl;
- struct callout inp_tp_delack;
+ struct callout tcpcb_mem_rexmt, tcpcb_mem_persist, tcpcb_mem_keep;
+ struct callout tcpcb_mem_2msl, tcpcb_mem_delack;
+};
+struct tcptw_mem {
+ struct tcptw tw;
+ struct callout tcptw_mem_2msl;
};
-#undef ALIGNMENT
-#undef ALIGNM1
+
+static uma_zone_t tcpcb_zone;
+static uma_zone_t tcptw_zone;
/*
* Tcp initialization
@@ -244,7 +242,7 @@ tcp_init()
tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
&tcbinfo.porthashmask);
- tcbinfo.ipi_zone = uma_zcreate("tcpcb", sizeof(struct inp_tp),
+ tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
#ifdef INET6
@@ -257,6 +255,15 @@ tcp_init()
if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
panic("tcp_init");
#undef TCP_MINPROTOHDR
+ /*
+ * These have to be type stable for the benefit of the timers.
+ */
+ tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(tcpcb_zone, maxsockets);
+ tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw_mem),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(tcptw_zone, maxsockets);
syncache_init();
}
@@ -552,16 +559,17 @@ struct tcpcb *
tcp_newtcpcb(inp)
struct inpcb *inp;
{
- struct inp_tp *it;
- register struct tcpcb *tp;
+ struct tcpcb_mem *tm;
+ struct tcpcb *tp;
#ifdef INET6
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
#endif /* INET6 */
- it = (struct inp_tp *)inp;
- tp = &it->tcb;
- bzero((char *) tp, sizeof(struct tcpcb));
- LIST_INIT(&tp->t_segq);
+ tm = uma_zalloc(tcpcb_zone, M_NOWAIT | M_ZERO);
+ if (tm == NULL)
+ return (NULL);
+ tp = &tm->tcb;
+ /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
tp->t_maxseg = tp->t_maxopd =
#ifdef INET6
isipv6 ? tcp_v6mssdflt :
@@ -569,11 +577,11 @@ tcp_newtcpcb(inp)
tcp_mssdflt;
/* Set up our timeouts. */
- callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0);
- callout_init(tp->tt_persist = &it->inp_tp_persist, 0);
- callout_init(tp->tt_keep = &it->inp_tp_keep, 0);
- callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0);
- callout_init(tp->tt_delack = &it->inp_tp_delack, 0);
+ callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, 0);
+ callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, 0);
+ callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, 0);
+ callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, 0);
+ callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, 0);
if (tcp_do_rfc1323)
tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
@@ -628,23 +636,17 @@ tcp_drop(tp, errno)
return (tcp_close(tp));
}
-/*
- * Close a TCP control block:
- * discard all space held by the tcp
- * discard internet protocol block
- * wake up any sleepers
- */
-struct tcpcb *
-tcp_close(tp)
- register struct tcpcb *tp;
+static void
+tcp_discardcb(tp)
+ struct tcpcb *tp;
{
- register struct tseg_qent *q;
+ struct tseg_qent *q;
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
#ifdef INET6
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
#endif /* INET6 */
- register struct rtentry *rt;
+ struct rtentry *rt;
int dosavessthresh;
/*
@@ -762,20 +764,37 @@ tcp_close(tp)
}
no_valid_rt:
/* free the reassembly queue, if any */
- while((q = LIST_FIRST(&tp->t_segq)) != NULL) {
+ while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
FREE(q, M_TSEGQ);
}
inp->inp_ppcb = NULL;
tp->t_inpcb = NULL;
+ uma_zfree(tcpcb_zone, tp);
soisdisconnected(so);
+}
+
+/*
+ * Close a TCP control block:
+ * discard all space held by the tcp
+ * discard internet protocol block
+ * wake up any sleepers
+ */
+struct tcpcb *
+tcp_close(tp)
+ struct tcpcb *tp;
+{
+ struct inpcb *inp = tp->t_inpcb;
+ struct socket *so = inp->inp_socket;
+
+ tcp_discardcb(tp);
#ifdef INET6
if (INP_CHECK_SOCKAF(so, AF_INET6))
in6_pcbdetach(inp);
else
-#endif /* INET6 */
- in_pcbdetach(inp);
+#endif
+ in_pcbdetach(inp);
tcpstat.tcps_closed++;
return ((struct tcpcb *)0);
}
@@ -799,6 +818,8 @@ tcp_drain()
*/
INP_INFO_RLOCK(&tcbinfo);
LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) {
+ if (inpb->inp_vflag & INP_TIMEWAIT)
+ continue;
INP_LOCK(inpb);
if ((tcpb = intotcpcb(inpb))) {
while ((te = LIST_FIRST(&tcpb->t_segq))
@@ -908,7 +929,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
inp = LIST_NEXT(inp, inp_list)) {
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt &&
- cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
+ (((inp->inp_vflag & INP_TIMEWAIT) &&
+ cr_cansee(req->td->td_ucred, intotw(inp)->tw_cred) == 0) ||
+ cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0))
inp_list[i++] = inp;
INP_UNLOCK(inp);
}
@@ -926,12 +949,19 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
/* XXX should avoid extra copy */
bcopy(inp, &xt.xt_inp, sizeof *inp);
inp_ppcb = inp->inp_ppcb;
- if (inp_ppcb != NULL)
- bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
- else
+ if (inp_ppcb == NULL)
+ bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
+ else if (inp->inp_vflag & INP_TIMEWAIT) {
bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
+ xt.xt_tp.t_state = TCPS_TIME_WAIT;
+ } else
+ bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
if (inp->inp_socket)
sotoxsocket(inp->inp_socket, &xt.xt_socket);
+ else {
+ bzero(&xt.xt_socket, sizeof xt.xt_socket);
+ xt.xt_socket.xso_protocol = IPPROTO_TCP;
+ }
xt.xt_inp.inp_gencnt = inp->inp_gencnt;
error = SYSCTL_OUT(req, &xt, sizeof xt);
}
@@ -1487,7 +1517,7 @@ ipsec_hdrsiz_tcp(tp)
struct ip *ip;
#ifdef INET6
struct ip6_hdr *ip6;
-#endif /* INET6 */
+#endif
struct tcphdr *th;
if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL))
@@ -1559,6 +1589,196 @@ tcp_cleartaocache()
}
/*
+ * Move a TCP connection into TIME_WAIT state.
+ * tcbinfo is unlocked.
+ * inp is locked, and is unlocked before returning.
+ */
+void
+tcp_twstart(tp)
+ struct tcpcb *tp;
+{
+ struct tcptw_mem *tm;
+ struct tcptw *tw;
+ struct inpcb *inp;
+ int tw_time, acknow;
+ struct socket *so;
+
+ tm = uma_zalloc(tcptw_zone, M_NOWAIT);
+ if (tm == NULL)
+ /* EEEK! -- preserve old structure or just kill everything? */
+ /* must obtain tcbinfo lock in order to drop the structure. */
+ panic("uma_zalloc(tcptw)");
+ tw = &tm->tw;
+ inp = tp->t_inpcb;
+ tw->tw_inpcb = inp;
+
+ /*
+ * Recover last window size sent.
+ */
+ tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
+
+ /*
+ * Set t_recent if timestamps are used on the connection.
+ */
+ if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
+ (TF_REQ_TSTMP|TF_RCVD_TSTMP))
+ tw->t_recent = tp->ts_recent;
+ else
+ tw->t_recent = 0;
+
+ tw->snd_nxt = tp->snd_nxt;
+ tw->rcv_nxt = tp->rcv_nxt;
+ tw->cc_recv = tp->cc_recv;
+ tw->cc_send = tp->cc_send;
+ tw->t_starttime = tp->t_starttime;
+ callout_init(tw->tt_2msl = &tm->tcptw_mem_2msl, 0);
+
+/* XXX
+ * If this code will
+ * be used for fin-wait-2 state also, then we may need
+ * a ts_recent from the last segment.
+ */
+ /* Shorten TIME_WAIT [RFC-1644, p.28] */
+ if (tp->cc_recv != 0 && (ticks - tp->t_starttime) < tcp_msl) {
+ tw_time = tp->t_rxtcur * TCPTV_TWTRUNC;
+ /* For T/TCP client, force ACK now. */
+ acknow = 1;
+ } else {
+ tw_time = 2 * tcp_msl;
+ acknow = tp->t_flags & TF_ACKNOW;
+ }
+ tcp_discardcb(tp);
+ so = inp->inp_socket;
+ so->so_pcb = NULL;
+ tw->tw_cred = crhold(so->so_cred);
+ tw->tw_so_options = so->so_options;
+ sotryfree(so);
+ inp->inp_socket = NULL;
+ inp->inp_ppcb = (caddr_t)tw;
+ inp->inp_vflag |= INP_TIMEWAIT;
+ callout_reset(tw->tt_2msl, tw_time, tcp_timer_2msl_tw, tw);
+ if (acknow)
+ tcp_twrespond(tw, TH_ACK);
+ INP_UNLOCK(inp);
+}
+
+void
+tcp_twclose(tw)
+ struct tcptw *tw;
+{
+ struct inpcb *inp;
+
+ inp = tw->tw_inpcb;
+ tw->tw_inpcb = NULL;
+ callout_stop(tw->tt_2msl);
+ inp->inp_ppcb = NULL;
+ uma_zfree(tcptw_zone, tw);
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ in6_pcbdetach(inp);
+ else
+#endif
+ in_pcbdetach(inp);
+ tcpstat.tcps_closed++;
+}
+
+int
+tcp_twrespond(struct tcptw *tw, int flags)
+{
+ struct inpcb *inp = tw->tw_inpcb;
+ struct tcphdr *th;
+ struct mbuf *m;
+ struct ip *ip = NULL;
+ u_int8_t *optp;
+ u_int hdrlen, optlen;
+ int error;
+#ifdef INET6
+ struct ip6_hdr *ip6 = NULL;
+ int isipv6 = inp->inp_inc.inc_isipv6;
+#else
+ const int isipv6 = 0;
+#endif
+
+ m = m_gethdr(M_NOWAIT, MT_HEADER);
+ if (m == NULL)
+ return (ENOBUFS);
+ m->m_data += max_linkhdr;
+
+ if (isipv6) {
+ hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+ ip6 = mtod(m, struct ip6_hdr *);
+ th = (struct tcphdr *)(ip6 + 1);
+ tcpip_fillheaders(inp, ip6, th);
+ } else {
+ hdrlen = sizeof(struct tcpiphdr);
+ ip = mtod(m, struct ip *);
+ th = (struct tcphdr *)(ip + 1);
+ tcpip_fillheaders(inp, ip, th);
+ }
+ optp = (u_int8_t *)(th + 1);
+
+ /*
+ * Send a timestamp and echo-reply if both our side and our peer
+ * have sent timestamps in our SYN's and this is not a RST.
+ */
+ if (tw->t_recent && flags == TH_ACK) {
+ u_int32_t *lp = (u_int32_t *)optp;
+
+ /* Form timestamp option as shown in appendix A of RFC 1323. */
+ *lp++ = htonl(TCPOPT_TSTAMP_HDR);
+ *lp++ = htonl(ticks);
+ *lp = htonl(tw->t_recent);
+ optp += TCPOLEN_TSTAMP_APPA;
+ }
+
+ /*
+ * Send `CC-family' options if needed, and it's not a RST.
+ */
+ if (tw->cc_recv != 0 && flags == TH_ACK) {
+ u_int32_t *lp = (u_int32_t *)optp;
+
+ *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC));
+ *lp = htonl(tw->cc_send);
+ optp += TCPOLEN_CC_APPA;
+ }
+ optlen = optp - (u_int8_t *)(th + 1);
+
+ m->m_len = hdrlen + optlen;
+ m->m_pkthdr.len = m->m_len;
+
+ KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
+
+ th->th_seq = htonl(tw->snd_nxt);
+ th->th_ack = htonl(tw->rcv_nxt);
+ th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+ th->th_flags = flags;
+ th->th_win = htons(tw->last_win);
+
+ if (isipv6) {
+ th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
+ sizeof(struct tcphdr) + optlen);
+ ip6->ip6_hlim = in6_selecthlim(inp, inp->in6p_route.ro_rt ?
+ inp->in6p_route.ro_rt->rt_ifp : NULL);
+ error = ip6_output(m, inp->in6p_outputopts, &inp->in6p_route,
+ (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
+ } else {
+ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
+ m->m_pkthdr.csum_flags = CSUM_TCP;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ ip->ip_len = m->m_pkthdr.len;
+ error = ip_output(m, inp->inp_options, &inp->inp_route,
+ (tw->tw_so_options & SO_DONTROUTE), NULL, inp);
+ }
+ if (flags & TH_ACK)
+ tcpstat.tcps_sndacks++;
+ else
+ tcpstat.tcps_sndctrl++;
+ tcpstat.tcps_sndtotal++;
+ return (error);
+}
+
+/*
* TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
*
* This code attempts to calculate the bandwidth-delay product as a
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 8a68579..16955d6 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -252,6 +252,33 @@ tcp_timer_2msl(xtp)
}
void
+tcp_timer_2msl_tw(xtw)
+ void *xtw;
+{
+ struct tcptw *tw = xtw;
+ int s;
+
+ s = splnet();
+ INP_INFO_WLOCK(&tcbinfo);
+ if (tw->tw_inpcb == NULL) {
+ INP_INFO_WUNLOCK(&tcbinfo);
+ splx(s);
+ return;
+ }
+ INP_LOCK(tw->tw_inpcb);
+ if (callout_pending(tw->tt_2msl) || !callout_active(tw->tt_2msl)) {
+ INP_UNLOCK(tw->tw_inpcb);
+ INP_INFO_WUNLOCK(&tcbinfo);
+ splx(s);
+ return;
+ }
+ callout_deactivate(tw->tt_2msl);
+ tcp_twclose(tw);
+ INP_INFO_WUNLOCK(&tcbinfo);
+ splx(s);
+}
+
+void
tcp_timer_keep(xtp)
void *xtp;
{
diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h
index cb835fb..40080b7 100644
--- a/sys/netinet/tcp_timer.h
+++ b/sys/netinet/tcp_timer.h
@@ -152,6 +152,7 @@ extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];
void tcp_timer_2msl(void *xtp);
+void tcp_timer_2msl_tw(void *xtw); /* XXX temporary */
void tcp_timer_keep(void *xtp);
void tcp_timer_persist(void *xtp);
void tcp_timer_rexmt(void *xtp);
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index 3c43acb..dc7730f 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -179,6 +179,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW,
static void tcp_cleartaocache(void);
static struct inpcb *tcp_notify(struct inpcb *, int);
+static void tcp_discardcb(struct tcpcb *);
/*
* Target size of TCP PCB hash tables. Must be a power of two.
@@ -191,26 +192,23 @@ static struct inpcb *tcp_notify(struct inpcb *, int);
#endif
/*
- * This is the actual shape of what we allocate using the zone
- * allocator. Doing it this way allows us to protect both structures
- * using the same generation count, and also eliminates the overhead
- * of allocating tcpcbs separately. By hiding the structure here,
- * we avoid changing most of the rest of the code (although it needs
- * to be changed, eventually, for greater efficiency).
+ * XXX
+ * Callouts should be moved into struct tcp directly. They are currently
+ * separate becuase the tcpcb structure is exported to userland for sysctl
+ * parsing purposes, which do not know about callouts.
*/
-#define ALIGNMENT 32
-#define ALIGNM1 (ALIGNMENT - 1)
-struct inp_tp {
- union {
- struct inpcb inp;
- char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
- } inp_tp_u;
+struct tcpcb_mem {
struct tcpcb tcb;
- struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl;
- struct callout inp_tp_delack;
+ struct callout tcpcb_mem_rexmt, tcpcb_mem_persist, tcpcb_mem_keep;
+ struct callout tcpcb_mem_2msl, tcpcb_mem_delack;
+};
+struct tcptw_mem {
+ struct tcptw tw;
+ struct callout tcptw_mem_2msl;
};
-#undef ALIGNMENT
-#undef ALIGNM1
+
+static uma_zone_t tcpcb_zone;
+static uma_zone_t tcptw_zone;
/*
* Tcp initialization
@@ -244,7 +242,7 @@ tcp_init()
tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
&tcbinfo.porthashmask);
- tcbinfo.ipi_zone = uma_zcreate("tcpcb", sizeof(struct inp_tp),
+ tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
#ifdef INET6
@@ -257,6 +255,15 @@ tcp_init()
if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
panic("tcp_init");
#undef TCP_MINPROTOHDR
+ /*
+ * These have to be type stable for the benefit of the timers.
+ */
+ tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(tcpcb_zone, maxsockets);
+ tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw_mem),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(tcptw_zone, maxsockets);
syncache_init();
}
@@ -552,16 +559,17 @@ struct tcpcb *
tcp_newtcpcb(inp)
struct inpcb *inp;
{
- struct inp_tp *it;
- register struct tcpcb *tp;
+ struct tcpcb_mem *tm;
+ struct tcpcb *tp;
#ifdef INET6
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
#endif /* INET6 */
- it = (struct inp_tp *)inp;
- tp = &it->tcb;
- bzero((char *) tp, sizeof(struct tcpcb));
- LIST_INIT(&tp->t_segq);
+ tm = uma_zalloc(tcpcb_zone, M_NOWAIT | M_ZERO);
+ if (tm == NULL)
+ return (NULL);
+ tp = &tm->tcb;
+ /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
tp->t_maxseg = tp->t_maxopd =
#ifdef INET6
isipv6 ? tcp_v6mssdflt :
@@ -569,11 +577,11 @@ tcp_newtcpcb(inp)
tcp_mssdflt;
/* Set up our timeouts. */
- callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0);
- callout_init(tp->tt_persist = &it->inp_tp_persist, 0);
- callout_init(tp->tt_keep = &it->inp_tp_keep, 0);
- callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0);
- callout_init(tp->tt_delack = &it->inp_tp_delack, 0);
+ callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, 0);
+ callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, 0);
+ callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, 0);
+ callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, 0);
+ callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, 0);
if (tcp_do_rfc1323)
tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
@@ -628,23 +636,17 @@ tcp_drop(tp, errno)
return (tcp_close(tp));
}
-/*
- * Close a TCP control block:
- * discard all space held by the tcp
- * discard internet protocol block
- * wake up any sleepers
- */
-struct tcpcb *
-tcp_close(tp)
- register struct tcpcb *tp;
+static void
+tcp_discardcb(tp)
+ struct tcpcb *tp;
{
- register struct tseg_qent *q;
+ struct tseg_qent *q;
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
#ifdef INET6
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
#endif /* INET6 */
- register struct rtentry *rt;
+ struct rtentry *rt;
int dosavessthresh;
/*
@@ -762,20 +764,37 @@ tcp_close(tp)
}
no_valid_rt:
/* free the reassembly queue, if any */
- while((q = LIST_FIRST(&tp->t_segq)) != NULL) {
+ while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
FREE(q, M_TSEGQ);
}
inp->inp_ppcb = NULL;
tp->t_inpcb = NULL;
+ uma_zfree(tcpcb_zone, tp);
soisdisconnected(so);
+}
+
+/*
+ * Close a TCP control block:
+ * discard all space held by the tcp
+ * discard internet protocol block
+ * wake up any sleepers
+ */
+struct tcpcb *
+tcp_close(tp)
+ struct tcpcb *tp;
+{
+ struct inpcb *inp = tp->t_inpcb;
+ struct socket *so = inp->inp_socket;
+
+ tcp_discardcb(tp);
#ifdef INET6
if (INP_CHECK_SOCKAF(so, AF_INET6))
in6_pcbdetach(inp);
else
-#endif /* INET6 */
- in_pcbdetach(inp);
+#endif
+ in_pcbdetach(inp);
tcpstat.tcps_closed++;
return ((struct tcpcb *)0);
}
@@ -799,6 +818,8 @@ tcp_drain()
*/
INP_INFO_RLOCK(&tcbinfo);
LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) {
+ if (inpb->inp_vflag & INP_TIMEWAIT)
+ continue;
INP_LOCK(inpb);
if ((tcpb = intotcpcb(inpb))) {
while ((te = LIST_FIRST(&tcpb->t_segq))
@@ -908,7 +929,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
inp = LIST_NEXT(inp, inp_list)) {
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt &&
- cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
+ (((inp->inp_vflag & INP_TIMEWAIT) &&
+ cr_cansee(req->td->td_ucred, intotw(inp)->tw_cred) == 0) ||
+ cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0))
inp_list[i++] = inp;
INP_UNLOCK(inp);
}
@@ -926,12 +949,19 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
/* XXX should avoid extra copy */
bcopy(inp, &xt.xt_inp, sizeof *inp);
inp_ppcb = inp->inp_ppcb;
- if (inp_ppcb != NULL)
- bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
- else
+ if (inp_ppcb == NULL)
+ bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
+ else if (inp->inp_vflag & INP_TIMEWAIT) {
bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
+ xt.xt_tp.t_state = TCPS_TIME_WAIT;
+ } else
+ bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
if (inp->inp_socket)
sotoxsocket(inp->inp_socket, &xt.xt_socket);
+ else {
+ bzero(&xt.xt_socket, sizeof xt.xt_socket);
+ xt.xt_socket.xso_protocol = IPPROTO_TCP;
+ }
xt.xt_inp.inp_gencnt = inp->inp_gencnt;
error = SYSCTL_OUT(req, &xt, sizeof xt);
}
@@ -1487,7 +1517,7 @@ ipsec_hdrsiz_tcp(tp)
struct ip *ip;
#ifdef INET6
struct ip6_hdr *ip6;
-#endif /* INET6 */
+#endif
struct tcphdr *th;
if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL))
@@ -1559,6 +1589,196 @@ tcp_cleartaocache()
}
/*
+ * Move a TCP connection into TIME_WAIT state.
+ * tcbinfo is unlocked.
+ * inp is locked, and is unlocked before returning.
+ */
+void
+tcp_twstart(tp)
+ struct tcpcb *tp;
+{
+ struct tcptw_mem *tm;
+ struct tcptw *tw;
+ struct inpcb *inp;
+ int tw_time, acknow;
+ struct socket *so;
+
+ tm = uma_zalloc(tcptw_zone, M_NOWAIT);
+ if (tm == NULL)
+ /* EEEK! -- preserve old structure or just kill everything? */
+ /* must obtain tcbinfo lock in order to drop the structure. */
+ panic("uma_zalloc(tcptw)");
+ tw = &tm->tw;
+ inp = tp->t_inpcb;
+ tw->tw_inpcb = inp;
+
+ /*
+ * Recover last window size sent.
+ */
+ tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
+
+ /*
+ * Set t_recent if timestamps are used on the connection.
+ */
+ if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
+ (TF_REQ_TSTMP|TF_RCVD_TSTMP))
+ tw->t_recent = tp->ts_recent;
+ else
+ tw->t_recent = 0;
+
+ tw->snd_nxt = tp->snd_nxt;
+ tw->rcv_nxt = tp->rcv_nxt;
+ tw->cc_recv = tp->cc_recv;
+ tw->cc_send = tp->cc_send;
+ tw->t_starttime = tp->t_starttime;
+ callout_init(tw->tt_2msl = &tm->tcptw_mem_2msl, 0);
+
+/* XXX
+ * If this code will
+ * be used for fin-wait-2 state also, then we may need
+ * a ts_recent from the last segment.
+ */
+ /* Shorten TIME_WAIT [RFC-1644, p.28] */
+ if (tp->cc_recv != 0 && (ticks - tp->t_starttime) < tcp_msl) {
+ tw_time = tp->t_rxtcur * TCPTV_TWTRUNC;
+ /* For T/TCP client, force ACK now. */
+ acknow = 1;
+ } else {
+ tw_time = 2 * tcp_msl;
+ acknow = tp->t_flags & TF_ACKNOW;
+ }
+ tcp_discardcb(tp);
+ so = inp->inp_socket;
+ so->so_pcb = NULL;
+ tw->tw_cred = crhold(so->so_cred);
+ tw->tw_so_options = so->so_options;
+ sotryfree(so);
+ inp->inp_socket = NULL;
+ inp->inp_ppcb = (caddr_t)tw;
+ inp->inp_vflag |= INP_TIMEWAIT;
+ callout_reset(tw->tt_2msl, tw_time, tcp_timer_2msl_tw, tw);
+ if (acknow)
+ tcp_twrespond(tw, TH_ACK);
+ INP_UNLOCK(inp);
+}
+
+void
+tcp_twclose(tw)
+ struct tcptw *tw;
+{
+ struct inpcb *inp;
+
+ inp = tw->tw_inpcb;
+ tw->tw_inpcb = NULL;
+ callout_stop(tw->tt_2msl);
+ inp->inp_ppcb = NULL;
+ uma_zfree(tcptw_zone, tw);
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ in6_pcbdetach(inp);
+ else
+#endif
+ in_pcbdetach(inp);
+ tcpstat.tcps_closed++;
+}
+
+int
+tcp_twrespond(struct tcptw *tw, int flags)
+{
+ struct inpcb *inp = tw->tw_inpcb;
+ struct tcphdr *th;
+ struct mbuf *m;
+ struct ip *ip = NULL;
+ u_int8_t *optp;
+ u_int hdrlen, optlen;
+ int error;
+#ifdef INET6
+ struct ip6_hdr *ip6 = NULL;
+ int isipv6 = inp->inp_inc.inc_isipv6;
+#else
+ const int isipv6 = 0;
+#endif
+
+ m = m_gethdr(M_NOWAIT, MT_HEADER);
+ if (m == NULL)
+ return (ENOBUFS);
+ m->m_data += max_linkhdr;
+
+ if (isipv6) {
+ hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+ ip6 = mtod(m, struct ip6_hdr *);
+ th = (struct tcphdr *)(ip6 + 1);
+ tcpip_fillheaders(inp, ip6, th);
+ } else {
+ hdrlen = sizeof(struct tcpiphdr);
+ ip = mtod(m, struct ip *);
+ th = (struct tcphdr *)(ip + 1);
+ tcpip_fillheaders(inp, ip, th);
+ }
+ optp = (u_int8_t *)(th + 1);
+
+ /*
+ * Send a timestamp and echo-reply if both our side and our peer
+ * have sent timestamps in our SYN's and this is not a RST.
+ */
+ if (tw->t_recent && flags == TH_ACK) {
+ u_int32_t *lp = (u_int32_t *)optp;
+
+ /* Form timestamp option as shown in appendix A of RFC 1323. */
+ *lp++ = htonl(TCPOPT_TSTAMP_HDR);
+ *lp++ = htonl(ticks);
+ *lp = htonl(tw->t_recent);
+ optp += TCPOLEN_TSTAMP_APPA;
+ }
+
+ /*
+ * Send `CC-family' options if needed, and it's not a RST.
+ */
+ if (tw->cc_recv != 0 && flags == TH_ACK) {
+ u_int32_t *lp = (u_int32_t *)optp;
+
+ *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC));
+ *lp = htonl(tw->cc_send);
+ optp += TCPOLEN_CC_APPA;
+ }
+ optlen = optp - (u_int8_t *)(th + 1);
+
+ m->m_len = hdrlen + optlen;
+ m->m_pkthdr.len = m->m_len;
+
+ KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
+
+ th->th_seq = htonl(tw->snd_nxt);
+ th->th_ack = htonl(tw->rcv_nxt);
+ th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+ th->th_flags = flags;
+ th->th_win = htons(tw->last_win);
+
+ if (isipv6) {
+ th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
+ sizeof(struct tcphdr) + optlen);
+ ip6->ip6_hlim = in6_selecthlim(inp, inp->in6p_route.ro_rt ?
+ inp->in6p_route.ro_rt->rt_ifp : NULL);
+ error = ip6_output(m, inp->in6p_outputopts, &inp->in6p_route,
+ (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
+ } else {
+ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
+ m->m_pkthdr.csum_flags = CSUM_TCP;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ ip->ip_len = m->m_pkthdr.len;
+ error = ip_output(m, inp->inp_options, &inp->inp_route,
+ (tw->tw_so_options & SO_DONTROUTE), NULL, inp);
+ }
+ if (flags & TH_ACK)
+ tcpstat.tcps_sndacks++;
+ else
+ tcpstat.tcps_sndctrl++;
+ tcpstat.tcps_sndtotal++;
+ return (error);
+}
+
+/*
* TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
*
* This code attempts to calculate the bandwidth-delay product as a
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 137b042..e4a07f8 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -235,6 +235,20 @@ struct syncache_head {
TAILQ_HEAD(, syncache) sch_bucket;
u_int sch_length;
};
+
+struct tcptw {
+ struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */
+ tcp_seq snd_nxt;
+ tcp_seq rcv_nxt;
+ tcp_cc cc_recv;
+ tcp_cc cc_send;
+ u_short last_win; /* cached window value */
+ u_short tw_so_options; /* copy of so_options */
+ struct ucred *tw_cred; /* user credentials */
+ u_long t_recent;
+ u_long t_starttime;
+ struct callout *tt_2msl; /* 2*msl TIME_WAIT timer */
+};
/*
* The TAO cache entry which is stored in the protocol family specific
@@ -254,6 +268,7 @@ struct rmxp_tao {
#define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler)
#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
+#define intotw(ip) ((struct tcptw *)(ip)->inp_ppcb)
#define sototcpcb(so) (intotcpcb(sotoinpcb(so)))
/*
@@ -448,6 +463,8 @@ extern int ss_fltsz_local;
void tcp_canceltimers(struct tcpcb *);
struct tcpcb *
tcp_close(struct tcpcb *);
+void tcp_twstart(struct tcpcb *);
+void tcp_twclose(struct tcptw *);
void tcp_ctlinput(int, struct sockaddr *, void *);
int tcp_ctloutput(struct socket *, struct sockopt *);
struct tcpcb *
@@ -471,6 +488,7 @@ struct inpcb *
tcp_quench(struct inpcb *, int);
void tcp_respond(struct tcpcb *, void *,
struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
+int tcp_twrespond(struct tcptw *, int);
struct rtentry *
tcp_rtlookup(struct in_conninfo *);
void tcp_setpersist(struct tcpcb *);
diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
index 963cd9a..b328d32 100644
--- a/sys/netinet6/in6_pcb.c
+++ b/sys/netinet6/in6_pcb.c
@@ -614,9 +614,10 @@ in6_pcbdetach(inp)
#endif /* IPSEC */
inp->inp_gencnt = ++ipi->ipi_gencnt;
in_pcbremlists(inp);
- sotoinpcb(so) = 0;
- sotryfree(so);
-
+ if (so) {
+ so->so_pcb = NULL;
+ sotryfree(so);
+ }
if (inp->in6p_options)
m_freem(inp->in6p_options);
ip6_freepcbopts(inp->in6p_outputopts);
@@ -627,7 +628,6 @@ in6_pcbdetach(inp)
if (inp->inp_options)
(void)m_free(inp->inp_options);
ip_freemoptions(inp->inp_moptions);
-
inp->inp_vflag = 0;
INP_LOCK_DESTROY(inp);
uma_zfree(ipi->ipi_zone, inp);
diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c
index 64b11aa..2b22088 100644
--- a/sys/netinet6/ip6_output.c
+++ b/sys/netinet6/ip6_output.c
@@ -218,10 +218,10 @@ ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
#ifdef IPSEC
/* get a security policy for this packet */
- if (so == NULL)
+ if (inp == NULL)
sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
else
- sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
+ sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
if (sp == NULL) {
ipsec6stat.out_inval++;
diff --git a/sys/netinet6/ipsec.c b/sys/netinet6/ipsec.c
index f898c0a..500f80b 100644
--- a/sys/netinet6/ipsec.c
+++ b/sys/netinet6/ipsec.c
@@ -235,10 +235,10 @@ static int ipsec6_encapsulate __P((struct mbuf *, struct secasvar *));
* NOTE: IPv6 mapped adddress concern is implemented here.
*/
struct secpolicy *
-ipsec4_getpolicybysock(m, dir, so, error)
+ipsec4_getpolicybypcb(m, dir, inp, error)
struct mbuf *m;
u_int dir;
- struct socket *so;
+ struct inpcb *inp;
int *error;
{
struct inpcbpolicy *pcbsp = NULL;
@@ -246,35 +246,19 @@ ipsec4_getpolicybysock(m, dir, so, error)
struct secpolicy *kernsp = NULL; /* policy on kernel */
/* sanity check */
- if (m == NULL || so == NULL || error == NULL)
+ if (m == NULL || inp == NULL || error == NULL)
panic("ipsec4_getpolicybysock: NULL pointer was passed.\n");
- switch (so->so_proto->pr_domain->dom_family) {
- case AF_INET:
- /* set spidx in pcb */
- *error = ipsec4_setspidx_inpcb(m, sotoinpcb(so));
- break;
+ /* set spidx in pcb */
#ifdef INET6
- case AF_INET6:
- /* set spidx in pcb */
- *error = ipsec6_setspidx_in6pcb(m, sotoin6pcb(so));
- break;
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ *error = ipsec6_setspidx_in6pcb(m, inp);
+ else
#endif
- default:
- panic("ipsec4_getpolicybysock: unsupported address family\n");
- }
+ *error = ipsec4_setspidx_inpcb(m, inp);
if (*error)
return NULL;
- switch (so->so_proto->pr_domain->dom_family) {
- case AF_INET:
- pcbsp = sotoinpcb(so)->inp_sp;
- break;
-#ifdef INET6
- case AF_INET6:
- pcbsp = sotoin6pcb(so)->in6p_sp;
- break;
-#endif
- }
+ pcbsp = inp->inp_sp;
/* sanity check */
if (pcbsp == NULL)
@@ -390,6 +374,19 @@ ipsec4_getpolicybysock(m, dir, so, error)
/* NOTREACHED */
}
+struct secpolicy *
+ipsec4_getpolicybysock(m, dir, so, error)
+ struct mbuf *m;
+ u_int dir;
+ struct socket *so;
+ int *error;
+{
+
+ if (so == NULL)
+ panic("ipsec4_getpolicybysock: NULL pointer was passed.\n");
+ return (ipsec4_getpolicybypcb(m, dir, sotoinpcb(so), error));
+}
+
/*
* For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet,
* and return a pointer to SP.
@@ -462,10 +459,10 @@ ipsec4_getpolicybyaddr(m, dir, flag, error)
* others: a pointer to SP
*/
struct secpolicy *
-ipsec6_getpolicybysock(m, dir, so, error)
+ipsec6_getpolicybypcb(m, dir, inp, error)
struct mbuf *m;
u_int dir;
- struct socket *so;
+ struct inpcb *inp;
int *error;
{
struct inpcbpolicy *pcbsp = NULL;
@@ -473,18 +470,17 @@ ipsec6_getpolicybysock(m, dir, so, error)
struct secpolicy *kernsp = NULL; /* policy on kernel */
/* sanity check */
- if (m == NULL || so == NULL || error == NULL)
+ if (m == NULL || inp == NULL || error == NULL)
panic("ipsec6_getpolicybysock: NULL pointer was passed.\n");
#ifdef DIAGNOSTIC
- if (so->so_proto->pr_domain->dom_family != AF_INET6)
+ if ((inp->inp_vflag & INP_IPV6PROTO) == 0)
panic("ipsec6_getpolicybysock: socket domain != inet6\n");
#endif
/* set spidx in pcb */
- ipsec6_setspidx_in6pcb(m, sotoin6pcb(so));
-
- pcbsp = sotoin6pcb(so)->in6p_sp;
+ ipsec6_setspidx_in6pcb(m, inp);
+ pcbsp = inp->in6p_sp;
/* sanity check */
if (pcbsp == NULL)
@@ -601,6 +597,19 @@ ipsec6_getpolicybysock(m, dir, so, error)
/* NOTREACHED */
}
+struct secpolicy *
+ipsec6_getpolicybysock(m, dir, so, error)
+ struct mbuf *m;
+ u_int dir;
+ struct socket *so;
+ int *error;
+{
+
+ if (so == NULL)
+ panic("ipsec6_getpolicybysock: NULL pointer was passed.\n");
+ return (ipsec6_getpolicybypcb(m, dir, sotoin6pcb(so), error));
+}
+
/*
* For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet,
* and return a pointer to SP.
@@ -1690,9 +1699,9 @@ ipsec_in_reject(sp, m)
* and {ah,esp}4_input for tunnel mode
*/
int
-ipsec4_in_reject_so(m, so)
+ipsec4_in_reject(m, inp)
struct mbuf *m;
- struct socket *so;
+ struct inpcb *inp;
{
struct secpolicy *sp = NULL;
int error;
@@ -1706,10 +1715,10 @@ ipsec4_in_reject_so(m, so)
* When we are called from ip_forward(), we call
* ipsec4_getpolicybyaddr() with IP_FORWARDING flag.
*/
- if (so == NULL)
+ if (inp == NULL)
sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error);
else
- sp = ipsec4_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error);
+ sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_INBOUND, inp, &error);
if (sp == NULL)
return 0; /* XXX should be panic ?
@@ -1724,18 +1733,16 @@ ipsec4_in_reject_so(m, so)
}
int
-ipsec4_in_reject(m, inp)
+ipsec4_in_reject_so(m, so)
struct mbuf *m;
- struct inpcb *inp;
+ struct socket *so;
{
- if (inp == NULL)
- return ipsec4_in_reject_so(m, NULL);
- if (inp->inp_socket)
- return ipsec4_in_reject_so(m, inp->inp_socket);
- else
- panic("ipsec4_in_reject: invalid inpcb/socket");
+ if (so == NULL)
+ return ipsec4_in_reject(m, NULL);
+ return ipsec4_in_reject(m, sotoinpcb(so));
}
+
#ifdef INET6
/*
* Check AH/ESP integrity.
@@ -1743,9 +1750,9 @@ ipsec4_in_reject(m, inp)
* and {ah,esp}6_input for tunnel mode
*/
int
-ipsec6_in_reject_so(m, so)
+ipsec6_in_reject(m, in6p)
struct mbuf *m;
- struct socket *so;
+ struct in6pcb *in6p;
{
struct secpolicy *sp = NULL;
int error;
@@ -1759,33 +1766,30 @@ ipsec6_in_reject_so(m, so)
* When we are called from ip_forward(), we call
* ipsec6_getpolicybyaddr() with IP_FORWARDING flag.
*/
- if (so == NULL)
+ if (in6p == NULL)
sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error);
else
- sp = ipsec6_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error);
+ sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_INBOUND, in6p, &error);
if (sp == NULL)
return 0; /* XXX should be panic ? */
result = ipsec_in_reject(sp, m);
KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
- printf("DP ipsec6_in_reject_so call free SP:%p\n", sp));
+ printf("DP ipsec6_in_reject call free SP:%p\n", sp));
key_freesp(sp);
return result;
}
int
-ipsec6_in_reject(m, in6p)
+ipsec6_in_reject_so(m, so)
struct mbuf *m;
- struct in6pcb *in6p;
+ struct socket *so;
{
- if (in6p == NULL)
- return ipsec6_in_reject_so(m, NULL);
- if (in6p->in6p_socket)
- return ipsec6_in_reject_so(m, in6p->in6p_socket);
- else
- panic("ipsec6_in_reject: invalid in6p/socket");
+ if (so == NULL)
+ return ipsec6_in_reject(m, NULL);
+ return ipsec6_in_reject(m, sotoin6pcb(so));
}
#endif
@@ -1889,7 +1893,7 @@ ipsec4_hdrsiz(m, dir, inp)
if (inp == NULL)
sp = ipsec4_getpolicybyaddr(m, dir, IP_FORWARDING, &error);
else
- sp = ipsec4_getpolicybysock(m, dir, inp->inp_socket, &error);
+ sp = ipsec4_getpolicybypcb(m, dir, inp, &error);
if (sp == NULL)
return 0; /* XXX should be panic ? */
@@ -1921,15 +1925,18 @@ ipsec6_hdrsiz(m, dir, in6p)
/* sanity check */
if (m == NULL)
return 0; /* XXX shoud be panic ? */
+#if 0
+ /* this is possible in TIME_WAIT state */
if (in6p != NULL && in6p->in6p_socket == NULL)
panic("ipsec6_hdrsize: why is socket NULL but there is PCB.");
+#endif
/* get SP for this packet */
/* XXX Is it right to call with IP_FORWARDING. */
if (in6p == NULL)
sp = ipsec6_getpolicybyaddr(m, dir, IP_FORWARDING, &error);
else
- sp = ipsec6_getpolicybysock(m, dir, in6p->in6p_socket, &error);
+ sp = ipsec6_getpolicybypcb(m, dir, in6p, &error);
if (sp == NULL)
return 0;
diff --git a/sys/netinet6/ipsec.h b/sys/netinet6/ipsec.h
index 76790b8..40f5f56 100644
--- a/sys/netinet6/ipsec.h
+++ b/sys/netinet6/ipsec.h
@@ -297,12 +297,14 @@ extern int ip4_esp_randpad;
#define ipseclog(x) do { if (ipsec_debug) log x; } while (0)
+struct inpcb;
+extern struct secpolicy *ipsec4_getpolicybypcb
+ __P((struct mbuf *, u_int, struct inpcb *, int *));
extern struct secpolicy *ipsec4_getpolicybysock
__P((struct mbuf *, u_int, struct socket *, int *));
extern struct secpolicy *ipsec4_getpolicybyaddr
__P((struct mbuf *, u_int, int, int *));
-struct inpcb;
extern int ipsec_init_policy __P((struct socket *so, struct inpcbpolicy **));
extern int ipsec_copy_policy
__P((struct inpcbpolicy *, struct inpcbpolicy *));
diff --git a/sys/netinet6/ipsec6.h b/sys/netinet6/ipsec6.h
index e9b8a2c..1811088 100644
--- a/sys/netinet6/ipsec6.h
+++ b/sys/netinet6/ipsec6.h
@@ -50,13 +50,14 @@ extern int ip6_ah_net_deflev;
extern int ip6_ipsec_ecn;
extern int ip6_esp_randpad;
+struct inpcb;
+extern struct secpolicy *ipsec6_getpolicybypcb
+ __P((struct mbuf *, u_int, struct inpcb *, int *));
extern struct secpolicy *ipsec6_getpolicybysock
__P((struct mbuf *, u_int, struct socket *, int *));
extern struct secpolicy *ipsec6_getpolicybyaddr
__P((struct mbuf *, u_int, int, int *));
-struct inpcb;
-
extern int ipsec6_in_reject_so __P((struct mbuf *, struct socket *));
extern int ipsec6_delete_pcbpolicy __P((struct inpcb *));
extern int ipsec6_set_policy __P((struct inpcb *inp, int optname,
diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c
index 540533b..728da31 100644
--- a/sys/netinet6/raw_ip6.c
+++ b/sys/netinet6/raw_ip6.c
@@ -180,7 +180,7 @@ rip6_input(mp, offp, proto)
/*
* Check AH/ESP integrity.
*/
- if (n && ipsec6_in_reject_so(n, last->inp_socket)) {
+ if (n && ipsec6_in_reject(n, last)) {
m_freem(n);
ipsec6stat.in_polvio++;
/* do not inject data into pcb */
@@ -219,7 +219,7 @@ rip6_input(mp, offp, proto)
/*
* Check AH/ESP integrity.
*/
- if (last && ipsec6_in_reject_so(m, last->inp_socket)) {
+ if (last && ipsec6_in_reject(m, last)) {
m_freem(m);
ipsec6stat.in_polvio++;
ip6stat.ip6s_delivered--;
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index b5a4033..fd642cb 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -261,7 +261,7 @@ udp6_input(mp, offp, proto)
/*
* Check AH/ESP integrity.
*/
- if (ipsec6_in_reject_so(m, last->inp_socket))
+ if (ipsec6_in_reject(m, last))
ipsec6stat.in_polvio++;
/* do not inject data into pcb */
else
@@ -328,7 +328,7 @@ udp6_input(mp, offp, proto)
/*
* Check AH/ESP integrity.
*/
- if (ipsec6_in_reject_so(m, last->inp_socket)) {
+ if (ipsec6_in_reject(m, last)) {
ipsec6stat.in_polvio++;
goto bad;
}
@@ -384,7 +384,7 @@ udp6_input(mp, offp, proto)
/*
* Check AH/ESP integrity.
*/
- if (ipsec6_in_reject_so(m, in6p->in6p_socket)) {
+ if (ipsec6_in_reject(m, in6p)) {
ipsec6stat.in_polvio++;
goto bad;
}
OpenPOWER on IntegriCloud