summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authorgrehan <grehan@FreeBSD.org>2011-06-28 06:26:03 +0000
committergrehan <grehan@FreeBSD.org>2011-06-28 06:26:03 +0000
commit2c6741be0f59191f2283eb268e4f7690399d578a (patch)
treeb139c8c6dcca4fa284815daade405b75886ee360 /sys/netinet
parent3c35264f695e0a1f8a04dbcca1c93bb5159b2274 (diff)
parent19ae02bba572390c7299166228d31e54003e094a (diff)
downloadFreeBSD-src-2c6741be0f59191f2283eb268e4f7690399d578a.zip
FreeBSD-src-2c6741be0f59191f2283eb268e4f7690399d578a.tar.gz
IFC @ r222830
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/icmp6.h21
-rw-r--r--sys/netinet/if_ether.c2
-rw-r--r--sys/netinet/in.c25
-rw-r--r--sys/netinet/in_pcb.c377
-rw-r--r--sys/netinet/in_pcb.h137
-rw-r--r--sys/netinet/in_pcbgroup.c457
-rw-r--r--sys/netinet/in_proto.c2
-rw-r--r--sys/netinet/ip_divert.c11
-rw-r--r--sys/netinet/ip_input.c2
-rw-r--r--sys/netinet/ip_ipsec.c2
-rw-r--r--sys/netinet/ipfw/ip_dummynet.c10
-rw-r--r--sys/netinet/ipfw/ip_fw2.c151
-rw-r--r--sys/netinet/ipfw/ip_fw_dynamic.c3
-rw-r--r--sys/netinet/ipfw/ip_fw_nat.c67
-rw-r--r--sys/netinet/ipfw/ip_fw_sockopt.c10
-rw-r--r--sys/netinet/libalias/alias.h24
-rw-r--r--sys/netinet/libalias/alias_sctp.h8
-rw-r--r--sys/netinet/raw_ip.c32
-rw-r--r--sys/netinet/sctp.h14
-rw-r--r--sys/netinet/sctp_auth.c2
-rw-r--r--sys/netinet/sctp_indata.c155
-rw-r--r--sys/netinet/sctp_indata.h5
-rw-r--r--sys/netinet/sctp_output.c457
-rw-r--r--sys/netinet/sctp_pcb.c10
-rw-r--r--sys/netinet/sctp_structs.h3
-rw-r--r--sys/netinet/sctp_uio.h136
-rw-r--r--sys/netinet/sctp_usrreq.c1563
-rw-r--r--sys/netinet/sctp_var.h24
-rw-r--r--sys/netinet/sctputil.c93
-rw-r--r--sys/netinet/sctputil.h14
-rw-r--r--sys/netinet/siftr.c22
-rw-r--r--sys/netinet/tcp_input.c240
-rw-r--r--sys/netinet/tcp_output.c11
-rw-r--r--sys/netinet/tcp_subr.c53
-rw-r--r--sys/netinet/tcp_syncache.c22
-rw-r--r--sys/netinet/tcp_timer.c2
-rw-r--r--sys/netinet/tcp_usrreq.c98
-rw-r--r--sys/netinet/udp_usrreq.c139
38 files changed, 3195 insertions, 1209 deletions
diff --git a/sys/netinet/icmp6.h b/sys/netinet/icmp6.h
index 5faae7c..c9da86a 100644
--- a/sys/netinet/icmp6.h
+++ b/sys/netinet/icmp6.h
@@ -297,8 +297,9 @@ struct nd_opt_hdr { /* Neighbor discovery option header */
#define ND_OPT_PREFIX_INFORMATION 3
#define ND_OPT_REDIRECTED_HEADER 4
#define ND_OPT_MTU 5
-
-#define ND_OPT_ROUTE_INFO 200 /* draft-ietf-ipngwg-router-preference, not officially assigned yet */
+#define ND_OPT_ROUTE_INFO 24 /* RFC 4191 */
+#define ND_OPT_RDNSS 25 /* RFC 6016 */
+#define ND_OPT_DNSSL 31 /* RFC 6016 */
struct nd_opt_prefix_info { /* prefix information */
u_int8_t nd_opt_pi_type;
@@ -338,6 +339,22 @@ struct nd_opt_route_info { /* route info */
/* prefix follows */
} __packed;
+struct nd_opt_rdnss { /* RDNSS option (RFC 6106) */
+ u_int8_t nd_opt_rdnss_type;
+ u_int8_t nd_opt_rdnss_len;
+ u_int16_t nd_opt_rdnss_reserved;
+ u_int32_t nd_opt_rdnss_lifetime;
+ /* followed by list of recursive DNS servers */
+} __packed;
+
+struct nd_opt_dnssl { /* DNSSL option (RFC 6106) */
+ u_int8_t nd_opt_dnssl_type;
+ u_int8_t nd_opt_dnssl_len;
+ u_int16_t nd_opt_dnssl_reserved;
+ u_int32_t nd_opt_dnssl_lifetime;
+ /* followed by list of DNS search domains */
+} __packed;
+
/*
* icmp6 namelookup
*/
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index 3afdc7d..6a66c05 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -759,7 +759,7 @@ match:
}
} else
LLE_WUNLOCK(la);
- } /* end of FIB loop */
+ }
reply:
if (op != ARPOP_REQUEST)
goto drop;
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index 684d808..7ae8477 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -548,7 +548,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
* is the same as before, then the call is
* un-necessarily executed here.
*/
- in_ifscrub(ifp, ia, 0);
+ in_ifscrub(ifp, ia, LLE_STATIC);
ia->ia_sockmask = ifra->ifra_mask;
ia->ia_sockmask.sin_family = AF_INET;
ia->ia_subnetmask =
@@ -557,7 +557,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
}
if ((ifp->if_flags & IFF_POINTOPOINT) &&
(ifra->ifra_dstaddr.sin_family == AF_INET)) {
- in_ifscrub(ifp, ia, 0);
+ in_ifscrub(ifp, ia, LLE_STATIC);
ia->ia_dstaddr = ifra->ifra_dstaddr;
maskIsNew = 1; /* We lie; but the effect's the same */
}
@@ -1179,14 +1179,20 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags)
&& (ia->ia_ifp->if_type != IFT_CARP)) {
ifa_ref(&ia->ia_ifa);
IN_IFADDR_RUNLOCK();
- rtinit(&(target->ia_ifa), (int)RTM_DELETE,
+ error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
rtinitflags(target));
- target->ia_flags &= ~IFA_ROUTE;
-
+ if (error == 0)
+ target->ia_flags &= ~IFA_ROUTE;
+ else
+ log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
+ error);
error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
rtinitflags(ia) | RTF_UP);
if (error == 0)
ia->ia_flags |= IFA_ROUTE;
+ else
+ log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
+ error);
ifa_free(&ia->ia_ifa);
return (error);
}
@@ -1210,9 +1216,12 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags)
/*
* As no-one seem to have this prefix, we can remove the route.
*/
- rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
- target->ia_flags &= ~IFA_ROUTE;
- return (0);
+ error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
+ if (error == 0)
+ target->ia_flags &= ~IFA_ROUTE;
+ else
+ log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
+ return (error);
}
#undef rtinitflags
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 85e31dc..4eb309a 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ipsec.h"
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_pcbgroup.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -128,8 +129,12 @@ static VNET_DEFINE(int, ipport_tcplastcount);
#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
static void in_pcbremlists(struct inpcb *inp);
-
#ifdef INET
+static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
+ struct in_addr faddr, u_int fport_arg,
+ struct in_addr laddr, u_int lport_arg,
+ int lookupflags, struct ifnet *ifp);
+
#define RANGECHK(var, min, max) \
if ((var) < (min)) { (var) = (min); } \
else if ((var) > (max)) { (var) = (max); }
@@ -208,19 +213,24 @@ void
in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
- uint32_t inpcbzone_flags)
+ uint32_t inpcbzone_flags, u_int hashfields)
{
INP_INFO_LOCK_INIT(pcbinfo, name);
+ INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */
#ifdef VIMAGE
pcbinfo->ipi_vnet = curvnet;
#endif
pcbinfo->ipi_listhead = listhead;
LIST_INIT(pcbinfo->ipi_listhead);
+ pcbinfo->ipi_count = 0;
pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
&pcbinfo->ipi_hashmask);
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+ in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
+#endif
pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
inpcbzone_flags);
@@ -234,10 +244,17 @@ void
in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
{
+ KASSERT(pcbinfo->ipi_count == 0,
+ ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count));
+
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+ in_pcbgroup_destroy(pcbinfo);
+#endif
uma_zdestroy(pcbinfo->ipi_zone);
+ INP_HASH_LOCK_DESTROY(pcbinfo);
INP_INFO_LOCK_DESTROY(pcbinfo);
}
@@ -309,8 +326,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
{
int anonport, error;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
return (EINVAL);
@@ -351,8 +368,8 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
* Because no actual state changes occur here, a global write lock on
* the pcbinfo isn't required.
*/
- INP_INFO_LOCK_ASSERT(pcbinfo);
INP_LOCK_ASSERT(inp);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if (inp->inp_flags & INP_HIGHPORT) {
first = V_ipport_hifirstauto; /* sysctl */
@@ -473,11 +490,10 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
int error;
/*
- * Because no actual state changes occur here, a global write lock on
- * the pcbinfo isn't required.
+ * No state changes, so read locks are sufficient here.
*/
- INP_INFO_LOCK_ASSERT(pcbinfo);
INP_LOCK_ASSERT(inp);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
return (EADDRNOTAVAIL);
@@ -612,14 +628,15 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
* then pick one.
*/
int
-in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
+in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
+ struct ucred *cred, struct mbuf *m)
{
u_short lport, fport;
in_addr_t laddr, faddr;
int anonport, error;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
lport = inp->inp_lport;
laddr = inp->inp_laddr.s_addr;
@@ -645,13 +662,20 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
inp->inp_laddr.s_addr = laddr;
inp->inp_faddr.s_addr = faddr;
inp->inp_fport = fport;
- in_pcbrehash(inp);
+ in_pcbrehash_mbuf(inp, m);
if (anonport)
inp->inp_flags |= INP_ANONPORT;
return (0);
}
+int
+in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
+{
+
+ return (in_pcbconnect_mbuf(inp, nam, cred, NULL));
+}
+
/*
* Do proper source address selection on an unbound socket in case
* of connect. Take jails into account as well.
@@ -907,8 +931,8 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
* Because a global state change doesn't actually occur here, a read
* lock is sufficient.
*/
- INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo);
INP_LOCK_ASSERT(inp);
+ INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
if (oinpp != NULL)
*oinpp = NULL;
@@ -983,8 +1007,8 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
if (error)
return (error);
}
- oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
- 0, NULL);
+ oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport,
+ laddr, lport, 0, NULL);
if (oinp != NULL) {
if (oinpp != NULL)
*oinpp = oinp;
@@ -1007,8 +1031,8 @@ void
in_pcbdisconnect(struct inpcb *inp)
{
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
inp->inp_faddr.s_addr = INADDR_ANY;
inp->inp_fport = 0;
@@ -1036,7 +1060,8 @@ in_pcbdetach(struct inpcb *inp)
* in_pcbref() bumps the reference count on an inpcb in order to maintain
* stability of an inpcb pointer despite the inpcb lock being released. This
* is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
- * but where the inpcb lock is already held.
+ * but where the inpcb lock may already held, or when acquiring a reference
+ * via a pcbgroup.
*
* in_pcbref() should be used only to provide brief memory stability, and
* must always be followed by a call to INP_WLOCK() and in_pcbrele() to
@@ -1187,20 +1212,28 @@ void
in_pcbdrop(struct inpcb *inp)
{
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ /*
+ * XXXRW: Possibly we should protect the setting of INP_DROPPED with
+ * the hash lock...?
+ */
inp->inp_flags |= INP_DROPPED;
if (inp->inp_flags & INP_INHASHLIST) {
struct inpcbport *phd = inp->inp_phd;
+ INP_HASH_WLOCK(inp->inp_pcbinfo);
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
LIST_REMOVE(phd, phd_hash);
free(phd, M_PCB);
}
+ INP_HASH_WUNLOCK(inp->inp_pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
+#ifdef PCBGROUP
+ in_pcbgroup_remove(inp);
+#endif
}
}
@@ -1328,7 +1361,8 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
}
/*
- * Lookup a PCB based on the local address and port.
+ * Lookup a PCB based on the local address and port. Caller must hold the
+ * hash lock. No inpcb locks or references are acquired.
*/
#define INP_LOOKUP_MAPPED_PCB_COST 3
struct inpcb *
@@ -1346,7 +1380,7 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
- INP_INFO_LOCK_ASSERT(pcbinfo);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
struct inpcbhead *head;
@@ -1449,11 +1483,155 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
}
#undef INP_LOOKUP_MAPPED_PCB_COST
+#ifdef PCBGROUP
/*
- * Lookup PCB in hash list.
+ * Lookup PCB in hash list, using pcbgroup tables.
*/
-struct inpcb *
-in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
+static struct inpcb *
+in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
+ struct in_addr faddr, u_int fport_arg, struct in_addr laddr,
+ u_int lport_arg, int lookupflags, struct ifnet *ifp)
+{
+ struct inpcbhead *head;
+ struct inpcb *inp, *tmpinp;
+ u_short fport = fport_arg, lport = lport_arg;
+
+ /*
+ * First look for an exact match.
+ */
+ tmpinp = NULL;
+ INP_GROUP_LOCK(pcbgroup);
+ head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
+ pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr == faddr.s_addr &&
+ inp->inp_laddr.s_addr == laddr.s_addr &&
+ inp->inp_fport == fport &&
+ inp->inp_lport == lport) {
+ /*
+ * XXX We should be able to directly return
+ * the inp here, without any checks.
+ * Well unless both bound with SO_REUSEPORT?
+ */
+ if (prison_flag(inp->inp_cred, PR_IP4))
+ goto found;
+ if (tmpinp == NULL)
+ tmpinp = inp;
+ }
+ }
+ if (tmpinp != NULL) {
+ inp = tmpinp;
+ goto found;
+ }
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+#ifdef INET6
+ struct inpcb *local_wild_mapped = NULL;
+#endif
+ struct inpcb *jail_wild = NULL;
+ struct inpcbhead *head;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr != INADDR_ANY ||
+ inp->inp_lport != lport)
+ continue;
+
+ /* XXX inp locking */
+ if (ifp && ifp->if_type == IFT_FAITH &&
+ (inp->inp_flags & INP_FAITH) == 0)
+ continue;
+
+ injail = prison_flag(inp->inp_cred, PR_IP4);
+ if (injail) {
+ if (prison_check_ip4(inp->inp_cred,
+ &laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (inp->inp_laddr.s_addr == laddr.s_addr) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
+#ifdef INET6
+ /* XXX inp locking, NULL check */
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ local_wild_mapped = inp;
+ else
+#endif /* INET6 */
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+#ifdef INET6
+ if (inp == NULL)
+ inp = local_wild_mapped;
+#endif /* defined(INET6) */
+ if (inp != NULL)
+ goto found;
+ } /* if (lookupflags & INPLOOKUP_WILDCARD) */
+ INP_GROUP_UNLOCK(pcbgroup);
+ return (NULL);
+
+found:
+ in_pcbref(inp);
+ INP_GROUP_UNLOCK(pcbgroup);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking bug", __func__);
+ return (inp);
+}
+#endif /* PCBGROUP */
+
+/*
+ * Lookup PCB in hash list, using pcbinfo tables. This variation assumes
+ * that the caller has locked the hash list, and will not perform any further
+ * locking or reference operations on either the hash list or the connection.
+ */
+static struct inpcb *
+in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
struct ifnet *ifp)
{
@@ -1464,7 +1642,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
- INP_INFO_LOCK_ASSERT(pcbinfo);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
/*
* First look for an exact match.
@@ -1574,13 +1752,108 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
return (NULL);
}
+
+/*
+ * Lookup PCB in hash list, using pcbinfo tables. This variation locks the
+ * hash list lock, and will return the inpcb locked (i.e., requires
+ * INPLOOKUP_LOCKPCB).
+ */
+static struct inpcb *
+in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
+ u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
+ struct ifnet *ifp)
+{
+ struct inpcb *inp;
+
+ INP_HASH_RLOCK(pcbinfo);
+ inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
+ (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
+ if (inp != NULL) {
+ in_pcbref(inp);
+ INP_HASH_RUNLOCK(pcbinfo);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking bug", __func__);
+ } else
+ INP_HASH_RUNLOCK(pcbinfo);
+ return (inp);
+}
+
+/*
+ * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
+ * from which a pre-calculated hash value may be extracted.
+ *
+ * Possibly more of this logic should be in in_pcbgroup.c.
+ */
+struct inpcb *
+in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
+ struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
+{
+#if defined(PCBGROUP)
+ struct inpcbgroup *pcbgroup;
+#endif
+
+ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+ KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
+ ("%s: LOCKPCB not set", __func__));
+
+#if defined(PCBGROUP)
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
+ return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
+ lookupflags, ifp));
+}
+
+struct inpcb *
+in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
+ u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
+ struct ifnet *ifp, struct mbuf *m)
+{
+#ifdef PCBGROUP
+ struct inpcbgroup *pcbgroup;
+#endif
+
+ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+ KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
+ ("%s: LOCKPCB not set", __func__));
+
+#ifdef PCBGROUP
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid);
+ if (pcbgroup != NULL)
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr,
+ fport, laddr, lport, lookupflags, ifp));
+ pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
+ return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
+ lookupflags, ifp));
+}
#endif /* INET */
/*
* Insert PCB onto various hash lists.
*/
-int
-in_pcbinshash(struct inpcb *inp)
+static int
+in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
{
struct inpcbhead *pcbhash;
struct inpcbporthead *pcbporthash;
@@ -1588,8 +1861,9 @@ in_pcbinshash(struct inpcb *inp)
struct inpcbport *phd;
u_int32_t hashkey_faddr;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
+
KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
("in_pcbinshash: INP_INHASHLIST"));
@@ -1629,24 +1903,54 @@ in_pcbinshash(struct inpcb *inp)
LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
inp->inp_flags |= INP_INHASHLIST;
+#ifdef PCBGROUP
+ if (do_pcbgroup_update)
+ in_pcbgroup_update(inp);
+#endif
return (0);
}
/*
+ * For now, there are two public interfaces to insert an inpcb into the hash
+ * lists -- one that does update pcbgroups, and one that doesn't. The latter
+ * is used only in the TCP syncache, where in_pcbinshash is called before the
+ * full 4-tuple is set for the inpcb, and we don't want to install in the
+ * pcbgroup until later.
+ *
+ * XXXRW: This seems like a misfeature. in_pcbinshash should always update
+ * connection groups, and partially initialised inpcbs should not be exposed
+ * to either reservation hash tables or pcbgroups.
+ */
+int
+in_pcbinshash(struct inpcb *inp)
+{
+
+ return (in_pcbinshash_internal(inp, 1));
+}
+
+int
+in_pcbinshash_nopcbgroup(struct inpcb *inp)
+{
+
+ return (in_pcbinshash_internal(inp, 0));
+}
+
+/*
* Move PCB to the proper hash bucket when { faddr, fport } have been
* changed. NOTE: This does not handle the case of the lport changing (the
* hashed port list would have to be updated as well), so the lport must
* not change after in_pcbinshash() has been called.
*/
void
-in_pcbrehash(struct inpcb *inp)
+in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
{
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
struct inpcbhead *head;
u_int32_t hashkey_faddr;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
+
KASSERT(inp->inp_flags & INP_INHASHLIST,
("in_pcbrehash: !INP_INHASHLIST"));
@@ -1662,6 +1966,20 @@ in_pcbrehash(struct inpcb *inp)
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
+
+#ifdef PCBGROUP
+ if (m != NULL)
+ in_pcbgroup_update_mbuf(inp, m);
+ else
+ in_pcbgroup_update(inp);
+#endif
+}
+
+void
+in_pcbrehash(struct inpcb *inp)
+{
+
+ in_pcbrehash_mbuf(inp, NULL);
}
/*
@@ -1679,16 +1997,21 @@ in_pcbremlists(struct inpcb *inp)
if (inp->inp_flags & INP_INHASHLIST) {
struct inpcbport *phd = inp->inp_phd;
+ INP_HASH_WLOCK(pcbinfo);
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
LIST_REMOVE(phd, phd_hash);
free(phd, M_PCB);
}
+ INP_HASH_WUNLOCK(pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
}
LIST_REMOVE(inp, inp_list);
pcbinfo->ipi_count--;
+#ifdef PCBGROUP
+ in_pcbgroup_remove(inp);
+#endif
}
/*
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 14d4ea2..dfef963 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -44,6 +44,7 @@
#include <sys/_rwlock.h>
#ifdef _KERNEL
+#include <sys/lock.h>
#include <sys/rwlock.h>
#include <net/vnet.h>
#include <vm/uma.h>
@@ -141,6 +142,7 @@ struct icmp6_filter;
*
* Key:
* (c) - Constant after initialization
+ * (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
* (s) - Protected by another subsystem's locks
@@ -160,9 +162,12 @@ struct icmp6_filter;
*/
struct inpcb {
LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */
+ LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
+ struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
+ LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
@@ -268,22 +273,23 @@ struct inpcbport {
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
*
- * Each pcbinfo is protected by ipi_lock, covering mutable global fields (such
- * as the global pcb list) and hashed lookup tables. The lock order is:
+ * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock,
+ * the former covering mutable global fields (such as the global pcb list),
+ * and the latter covering the hashed lookup tables. The lock order is:
*
- * ipi_lock (before) inpcb locks
+ * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
*
* Locking key:
*
* (c) Constant or nearly constant after initialisation
* (g) Locked by ipi_lock
- * (h) Read using either ipi_lock or inpcb lock; write requires both.
+ * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
+ * (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
- * Global lock protecting global inpcb list, inpcb count, hash tables,
- * etc.
+ * Global lock protecting global inpcb list, inpcb count, etc.
*/
struct rwlock ipi_lock;
@@ -312,17 +318,39 @@ struct inpcbinfo {
struct uma_zone *ipi_zone; /* (c) */
/*
+ * Connection groups associated with this protocol. These fields are
+ * constant, but pcbgroup structures themselves are protected by
+ * per-pcbgroup locks.
+ */
+ struct inpcbgroup *ipi_pcbgroups; /* (c) */
+ u_int ipi_npcbgroups; /* (c) */
+ u_int ipi_hashfields; /* (c) */
+
+ /*
+ * Global lock protecting non-pcbgroup hash lookup tables.
+ */
+ struct rwlock ipi_hash_lock;
+
+ /*
* Global hash of inpcbs, hashed by local and foreign addresses and
* port numbers.
*/
- struct inpcbhead *ipi_hashbase; /* (g) */
- u_long ipi_hashmask; /* (g) */
+ struct inpcbhead *ipi_hashbase; /* (h) */
+ u_long ipi_hashmask; /* (h) */
/*
* Global hash of inpcbs, hashed by only local port number.
*/
- struct inpcbporthead *ipi_porthashbase; /* (g) */
- u_long ipi_porthashmask; /* (g) */
+ struct inpcbporthead *ipi_porthashbase; /* (h) */
+ u_long ipi_porthashmask; /* (h) */
+
+ /*
+ * List of wildcard inpcbs for use with pcbgroups. In the past, was
+ * per-pcbgroup but is now global. All pcbgroup locks must be held
+ * to modify the list, so any is sufficient to read it.
+ */
+ struct inpcbhead *ipi_wildbase; /* (p) */
+ u_long ipi_wildmask; /* (p) */
/*
* Pointer to network stack instance
@@ -335,6 +363,31 @@ struct inpcbinfo {
void *ipi_pspare[2];
};
+/*
+ * Connection groups hold sets of connections that have similar CPU/thread
+ * affinity. Each connection belongs to exactly one connection group.
+ */
+struct inpcbgroup {
+ /*
+ * Per-connection group hash of inpcbs, hashed by local and foreign
+ * addresses and port numbers.
+ */
+ struct inpcbhead *ipg_hashbase; /* (c) */
+ u_long ipg_hashmask; /* (c) */
+
+ /*
+ * Notional affinity of this pcbgroup.
+ */
+ u_int ipg_cpu; /* (p) */
+
+ /*
+ * Per-connection group lock, not to be confused with ipi_lock.
+ * Protects the hash table hung off the group, but also the global
+ * wildcard list in inpcbinfo.
+ */
+ struct mtx ipg_lock;
+} __aligned(CACHE_LINE_SIZE);
+
#define INP_LOCK_INIT(inp, d, t) \
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK)
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock)
@@ -406,6 +459,26 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+#define INP_HASH_LOCK_INIT(ipi, d) \
+ rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
+#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
+ RA_LOCKED)
+#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
+ RA_WLOCKED)
+
+#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
+ MTX_DEF | MTX_DUPOK)
+#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock)
+
+#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock)
+#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED)
+#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock)
+
#define INP_PCBHASH(faddr, lport, fport, mask) \
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
@@ -465,8 +538,18 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
*/
#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */
+#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
+
+/*
+ * Flags passed to in_pcblookup*() functions.
+ */
+#define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */
+#define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */
+#define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */
+
+#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \
+ INPLOOKUP_WLOCKPCB)
-#define INPLOOKUP_WILDCARD 1
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
#define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */
@@ -474,6 +557,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af)
+/*
+ * Constants for pcbinfo.ipi_hashfields.
+ */
+#define IPI_HASHFIELDS_NONE 0
+#define IPI_HASHFIELDS_2TUPLE 1
+#define IPI_HASHFIELDS_4TUPLE 2
+
#ifdef _KERNEL
VNET_DECLARE(int, ipport_reservedhigh);
VNET_DECLARE(int, ipport_reservedlow);
@@ -505,7 +595,21 @@ VNET_DECLARE(int, ipport_tcpallocs);
void in_pcbinfo_destroy(struct inpcbinfo *);
void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *,
- int, int, char *, uma_init, uma_fini, uint32_t);
+ int, int, char *, uma_init, uma_fini, uint32_t, u_int);
+
+struct inpcbgroup *
+ in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
+struct inpcbgroup *
+ in_pcbgroup_byinpcb(struct inpcb *);
+struct inpcbgroup *
+ in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short,
+ struct in_addr, u_short);
+void in_pcbgroup_destroy(struct inpcbinfo *);
+int in_pcbgroup_enabled(struct inpcbinfo *);
+void in_pcbgroup_init(struct inpcbinfo *, u_int, int);
+void in_pcbgroup_remove(struct inpcb *);
+void in_pcbgroup_update(struct inpcb *);
+void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *);
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *);
@@ -515,6 +619,8 @@ int in_pcb_lport(struct inpcb *, struct in_addr *, u_short *,
int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, struct ucred *);
int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
+int in_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *,
+ struct mbuf *);
int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, in_addr_t *, u_short *, struct inpcb **,
struct ucred *);
@@ -523,16 +629,21 @@ void in_pcbdisconnect(struct inpcb *);
void in_pcbdrop(struct inpcb *);
void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
+int in_pcbinshash_nopcbgroup(struct inpcb *);
struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
struct in_addr, u_short, int, struct ucred *);
struct inpcb *
- in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int,
+ in_pcblookup(struct inpcbinfo *, struct in_addr, u_int,
struct in_addr, u_int, int, struct ifnet *);
+struct inpcb *
+ in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int,
+ struct in_addr, u_int, int, struct ifnet *, struct mbuf *);
void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
int, struct inpcb *(*)(struct inpcb *, int));
void in_pcbref(struct inpcb *);
void in_pcbrehash(struct inpcb *);
+void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *);
int in_pcbrele(struct inpcb *);
int in_pcbrele_rlocked(struct inpcb *);
int in_pcbrele_wlocked(struct inpcb *);
diff --git a/sys/netinet/in_pcbgroup.c b/sys/netinet/in_pcbgroup.c
new file mode 100644
index 0000000..c9f5c70
--- /dev/null
+++ b/sys/netinet/in_pcbgroup.c
@@ -0,0 +1,457 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+#include <sys/socketvar.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif /* INET6 */
+
+/*
+ * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's
+ * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization
+ * Strategies in Modern Operating Systems". This implementation differs
+ * significantly from that described in the paper, in that it attempts to
+ * introduce not just notions of affinity for connections and distribute work
+ * so as to reduce lock contention, but also align those notions with
+ * hardware work distribution strategies such as RSS. In this construction,
+ * connection groups supplement, rather than replace, existing reservation
+ * tables for protocol 4-tuples, offering CPU-affine lookup tables with
+ * minimal cache line migration and lock contention during steady state
+ * operation.
+ *
+ * Internet protocols, such as UDP and TCP, register to use connection groups
+ * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this
+ * indicates to the connection group code whether a 2-tuple or 4-tuple is
+ * used as an argument to hashes that assign a connection to a particular
+ * group. This must be aligned with any hardware offloaded distribution
+ * model, such as RSS or similar approaches taken in embedded network boards.
+ * Wildcard sockets require special handling, as in Willman 2006, and are
+ * shared between connection groups -- while being protected by group-local
+ * locks. This means that connection establishment and teardown can be
+ * signficantly more expensive than without connection groups, but that
+ * steady-state processing can be significantly faster.
+ *
+ * Most of the implementation of connection groups is in this file; however,
+ * connection group lookup is implemented in in_pcb.c alongside reservation
+ * table lookups -- see in_pcblookup_group().
+ *
+ * TODO:
+ *
+ * Implement dynamic rebalancing of buckets with connection groups; when
+ * load is unevenly distributed, search for more optimal balancing on
+ * demand. This might require scaling up the number of connection groups
+ * by <<1.
+ *
+ * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection
+ * groups for ip_input and ip6_input, allowing non-offloaded work
+ * distribution.
+ *
+ * Expose effective CPU affinity of connections to userspace using socket
+ * options.
+ *
+ * Investigate per-connection affinity overrides based on socket options; an
+ * option could be set, certainly resulting in work being distributed
+ * differently in software, and possibly propagated to supporting hardware
+ * with TCAMs or hardware hash tables. This might require connections to
+ * exist in more than one connection group at a time.
+ *
+ * Hook netisr thread reconfiguration events, and propagate those to RSS so
+ * that rebalancing can occur when the thread pool grows or shrinks.
+ *
+ * Expose per-pcbgroup statistics to userspace monitoring tools such as
+ * netstat, in order to allow better debugging and profiling.
+ */
+
+void
+in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields,
+ int hash_nelements)
+{
+ struct inpcbgroup *pcbgroup;
+ u_int numpcbgroups, pgn;
+
+ /*
+ * Only enable connection groups for a protocol if it has been
+ * specifically requested.
+ */
+ if (hashfields == IPI_HASHFIELDS_NONE)
+ return;
+
+ /*
+ * Connection groups are about multi-processor load distribution,
+ * lock contention, and connection CPU affinity. As such, no point
+ * in turning them on for a uniprocessor machine, it only wastes
+ * memory.
+ */
+ if (mp_ncpus == 1)
+ return;
+
+ /*
+ * Use one group per CPU for now. If we decide to do dynamic
+ * rebalancing a la RSS, we'll need to shift left by at least 1.
+ */
+ numpcbgroups = mp_ncpus;
+
+ pcbinfo->ipi_hashfields = hashfields;
+ pcbinfo->ipi_pcbgroups = malloc(numpcbgroups *
+ sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO);
+ pcbinfo->ipi_npcbgroups = numpcbgroups;
+ pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB,
+ &pcbinfo->ipi_wildmask);
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
+ pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
+ pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB,
+ &pcbgroup->ipg_hashmask);
+ INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup");
+
+ /*
+ * Initialise notional affinity of the pcbgroup -- for RSS,
+ * we want the same notion of affinity as NICs to be used.
+ * Just round robin for the time being.
+ */
+ pcbgroup->ipg_cpu = (pgn % mp_ncpus);
+ }
+}
+
+void
+in_pcbgroup_destroy(struct inpcbinfo *pcbinfo)
+{
+ struct inpcbgroup *pcbgroup;
+ u_int pgn;
+
+ if (pcbinfo->ipi_npcbgroups == 0)
+ return;
+
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
+ pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
+ KASSERT(LIST_EMPTY(pcbinfo->ipi_listhead),
+ ("in_pcbinfo_destroy: listhead not empty"));
+ INP_GROUP_LOCK_DESTROY(pcbgroup);
+ hashdestroy(pcbgroup->ipg_hashbase, M_PCB,
+ pcbgroup->ipg_hashmask);
+ }
+ hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask);
+ free(pcbinfo->ipi_pcbgroups, M_PCB);
+ pcbinfo->ipi_pcbgroups = NULL;
+ pcbinfo->ipi_npcbgroups = 0;
+ pcbinfo->ipi_hashfields = 0;
+}
+
+/*
+ * Given a hash of whatever the covered tuple might be, return a pcbgroup
+ * index.
+ */
+static __inline u_int
+in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
+{
+
+ return (hash % pcbinfo->ipi_npcbgroups);
+}
+
+/*
+ * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
+ * information is insufficient to identify the pcbgroup.
+ */
+struct inpcbgroup *
+in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
+{
+
+ return (NULL);
+}
+
+static struct inpcbgroup *
+in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
+{
+
+ return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid));
+}
+
+struct inpcbgroup *
+in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr,
+ u_short lport, struct in_addr faddr, u_short fport)
+{
+ uint32_t hash;
+
+ switch (pcbinfo->ipi_hashfields) {
+ case IPI_HASHFIELDS_4TUPLE:
+ hash = faddr.s_addr ^ fport;
+ break;
+
+ case IPI_HASHFIELDS_2TUPLE:
+ hash = faddr.s_addr ^ laddr.s_addr;
+ break;
+
+ default:
+ hash = 0;
+ }
+ return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo,
+ hash)]);
+}
+
+struct inpcbgroup *
+in_pcbgroup_byinpcb(struct inpcb *inp)
+{
+
+ return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr,
+ inp->inp_lport, inp->inp_faddr, inp->inp_fport));
+}
+
+static void
+in_pcbwild_add(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcbhead *head;
+ u_int pgn;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD),
+ ("%s: is wild",__func__));
+
+ pcbinfo = inp->inp_pcbinfo;
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild);
+ inp->inp_flags2 |= INP_PCBGROUPWILD;
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+}
+
+static void
+in_pcbwild_remove(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ u_int pgn;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD),
+ ("%s: not wild", __func__));
+
+ pcbinfo = inp->inp_pcbinfo;
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+ LIST_REMOVE(inp, inp_pcbgroup_wild);
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+ inp->inp_flags2 &= ~INP_PCBGROUPWILD;
+}
+
+static __inline int
+in_pcbwild_needed(struct inpcb *inp)
+{
+
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6)
+ return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr));
+ else
+#endif
+ return (inp->inp_faddr.s_addr == htonl(INADDR_ANY));
+}
+
+static void
+in_pcbwild_update_internal(struct inpcb *inp)
+{
+ int wildcard_needed;
+
+ wildcard_needed = in_pcbwild_needed(inp);
+ if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD))
+ in_pcbwild_add(inp);
+ else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD))
+ in_pcbwild_remove(inp);
+}
+
+/*
+ * Update the pcbgroup of an inpcb, which might include removing an old
+ * pcbgroup reference and/or adding a new one. Wildcard processing is not
+ * performed here, although ideally we'll never install a pcbgroup for a
+ * wildcard inpcb (asserted below).
+ */
+static void
+in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo,
+ struct inpcbgroup *newpcbgroup, struct inpcb *inp)
+{
+ struct inpcbgroup *oldpcbgroup;
+ struct inpcbhead *pcbhash;
+ uint32_t hashkey_faddr;
+
+ INP_WLOCK_ASSERT(inp);
+
+ oldpcbgroup = inp->inp_pcbgroup;
+ if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
+ INP_GROUP_LOCK(oldpcbgroup);
+ LIST_REMOVE(inp, inp_pcbgrouphash);
+ inp->inp_pcbgroup = NULL;
+ INP_GROUP_UNLOCK(oldpcbgroup);
+ }
+ if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6)
+ hashkey_faddr = inp->in6p_faddr.s6_addr32[3]; /* XXX */
+ else
+#endif
+ hashkey_faddr = inp->inp_faddr.s_addr;
+ INP_GROUP_LOCK(newpcbgroup);
+ pcbhash = &newpcbgroup->ipg_hashbase[
+ INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport,
+ newpcbgroup->ipg_hashmask)];
+ LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash);
+ inp->inp_pcbgroup = newpcbgroup;
+ INP_GROUP_UNLOCK(newpcbgroup);
+ }
+
+ KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)),
+ ("%s: pcbgroup and wildcard!", __func__));
+}
+
+/*
+ * Two update paths: one in which the 4-tuple on an inpcb has been updated
+ * and therefore connection groups may need to change (or a wildcard entry
+ * may needed to be installed), and another in which the 4-tuple has been
+ * set as a result of a packet received, in which case we may be able to use
+ * the hash on the mbuf to avoid doing a software hash calculation for RSS.
+ *
+ * In each case: first, let the wildcard code have a go at placing it as a
+ * wildcard socket. If it was a wildcard, or if the connection has been
+ * dropped, then no pcbgroup is required (so potentially clear it);
+ * otherwise, calculate and update the pcbgroup for the inpcb.
+ */
+void
+in_pcbgroup_update(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcbgroup *newpcbgroup;
+
+ INP_WLOCK_ASSERT(inp);
+
+ pcbinfo = inp->inp_pcbinfo;
+ if (!in_pcbgroup_enabled(pcbinfo))
+ return;
+
+ in_pcbwild_update_internal(inp);
+ if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
+ !(inp->inp_flags & INP_DROPPED)) {
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6)
+ newpcbgroup = in6_pcbgroup_byinpcb(inp);
+ else
+#endif
+ newpcbgroup = in_pcbgroup_byinpcb(inp);
+ } else
+ newpcbgroup = NULL;
+ in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
+}
+
+void
+in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcbgroup *newpcbgroup;
+
+ INP_WLOCK_ASSERT(inp);
+
+ pcbinfo = inp->inp_pcbinfo;
+ if (!in_pcbgroup_enabled(pcbinfo))
+ return;
+
+ /*
+ * Possibly should assert !INP_PCBGROUPWILD rather than testing for
+ * it; presumably this function should never be called for anything
+ * other than non-wildcard socket?
+ */
+ in_pcbwild_update_internal(inp);
+ if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
+ !(inp->inp_flags & INP_DROPPED)) {
+ newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m);
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6) {
+ if (newpcbgroup == NULL)
+ newpcbgroup = in6_pcbgroup_byinpcb(inp);
+ } else {
+#endif
+ if (newpcbgroup == NULL)
+ newpcbgroup = in_pcbgroup_byinpcb(inp);
+#ifdef INET6
+ }
+#endif
+ } else
+ newpcbgroup = NULL;
+ in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
+}
+
+/*
+ * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb.
+ */
+void
+in_pcbgroup_remove(struct inpcb *inp)
+{
+ struct inpcbgroup *pcbgroup;
+
+ INP_WLOCK_ASSERT(inp);
+
+ if (!in_pcbgroup_enabled(inp->inp_pcbinfo))
+ return;
+
+ if (inp->inp_flags2 & INP_PCBGROUPWILD)
+ in_pcbwild_remove(inp);
+
+ pcbgroup = inp->inp_pcbgroup;
+ if (pcbgroup != NULL) {
+ INP_GROUP_LOCK(pcbgroup);
+ LIST_REMOVE(inp, inp_pcbgrouphash);
+ inp->inp_pcbgroup = NULL;
+ INP_GROUP_UNLOCK(pcbgroup);
+ }
+}
+
+/*
+ * Query whether or not it is appropriate to use pcbgroups to look up inpcbs
+ * for a protocol.
+ */
+int
+in_pcbgroup_enabled(struct inpcbinfo *pcbinfo)
+{
+
+ return (pcbinfo->ipi_npcbgroups > 0);
+}
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index 2827c22..d2a772f 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -106,6 +106,8 @@ static struct pr_usrreqs nousrreqs;
#include <net/if_pfsync.h>
#endif
+FEATURE(inet, "Internet Protocol version 4");
+
extern struct domain inetdomain;
/* Spacer for loadable protocols. */
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index de88556..527ce56 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -153,7 +153,8 @@ div_init(void)
* place for hashbase == NULL.
*/
in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb",
- div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE);
+ div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_NONE);
}
static void
@@ -530,7 +531,9 @@ div_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
INP_INFO_WLOCK(&V_divcbinfo);
INP_WLOCK(inp);
+ INP_HASH_WLOCK(&V_divcbinfo);
error = in_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_divcbinfo);
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_divcbinfo);
return error;
@@ -659,9 +662,9 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_divcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_divcbinfo);
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index ac1c723..67fcb74 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -488,7 +488,7 @@ tooshort:
}
#ifdef IPSEC
/*
- * Bypass packet filtering for packets from a tunnel (gif).
+ * Bypass packet filtering for packets previously handled by IPsec.
*/
if (ip_ipsec_filtertunnel(m))
goto passin;
diff --git a/sys/netinet/ip_ipsec.c b/sys/netinet/ip_ipsec.c
index 50a6ce4..a3c87f5 100644
--- a/sys/netinet/ip_ipsec.c
+++ b/sys/netinet/ip_ipsec.c
@@ -95,7 +95,7 @@ ip_ipsec_filtertunnel(struct mbuf *m)
#if defined(IPSEC)
/*
- * Bypass packet filtering for packets from a tunnel.
+ * Bypass packet filtering for packets previously handled by IPsec.
*/
if (!V_ip4_ipsec_filtertunnel &&
m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
diff --git a/sys/netinet/ipfw/ip_dummynet.c b/sys/netinet/ipfw/ip_dummynet.c
index ba6e892..e23ba3a 100644
--- a/sys/netinet/ipfw/ip_dummynet.c
+++ b/sys/netinet/ipfw/ip_dummynet.c
@@ -1045,7 +1045,7 @@ config_red(struct dn_fsk *fs)
fs->w_q = fs->fs.w_q;
fs->max_p = fs->fs.max_p;
- D("called");
+ ND("called");
/* Doing stuff that was in userland */
i = fs->sched->link.bandwidth;
s = (i <= 0) ? 0 :
@@ -1109,7 +1109,7 @@ config_red(struct dn_fsk *fs)
if (dn_cfg.red_max_pkt_size < 1)
dn_cfg.red_max_pkt_size = 1500;
fs->max_pkt_size = dn_cfg.red_max_pkt_size;
- D("exit");
+ ND("exit");
return 0;
}
@@ -2176,7 +2176,7 @@ ip_dn_destroy(int last)
DN_BH_WLOCK();
if (last) {
- printf("%s removing last instance\n", __FUNCTION__);
+ ND("removing last instance\n");
ip_dn_ctl_ptr = NULL;
ip_dn_io_ptr = NULL;
}
@@ -2256,13 +2256,13 @@ unload_dn_sched(struct dn_alg *s)
struct dn_alg *tmp, *r;
int err = EINVAL;
- D("called for %s", s->name);
+ ND("called for %s", s->name);
DN_BH_WLOCK();
SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) {
if (strcmp(s->name, r->name) != 0)
continue;
- D("ref_count = %d", r->ref_count);
+ ND("ref_count = %d", r->ref_count);
err = (r->ref_count != 0) ? EBUSY : 0;
if (err == 0)
SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next);
diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c
index 9a75cf5..9e5c737 100644
--- a/sys/netinet/ipfw/ip_fw2.c
+++ b/sys/netinet/ipfw/ip_fw2.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#ifdef INET6
+#include <netinet6/in6_pcb.h>
#include <netinet6/scope6_var.h>
#include <netinet6/ip6_var.h>
#endif
@@ -646,21 +647,27 @@ send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
* we tried and failed, or any other value if successful.
*/
static int
-check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
- struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
- u_int16_t src_port, int *ugid_lookupp,
- struct ucred **uc, struct inpcb *inp)
+check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
+ struct ucred **uc)
{
#ifndef __FreeBSD__
+ /* XXX */
return cred_check(insn, proto, oif,
dst_ip, dst_port, src_ip, src_port,
(struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
#else /* FreeBSD */
+ struct in_addr src_ip, dst_ip;
struct inpcbinfo *pi;
- int wildcard;
- struct inpcb *pcb;
+ struct ipfw_flow_id *id;
+ struct inpcb *pcb, *inp;
+ struct ifnet *oif;
+ int lookupflags;
int match;
+ id = &args->f_id;
+ inp = args->inp;
+ oif = args->oif;
+
/*
* Check to see if the UDP or TCP stack supplied us with
* the PCB. If so, rather then holding a lock and looking
@@ -681,31 +688,53 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
*/
if (*ugid_lookupp == -1)
return (0);
- if (proto == IPPROTO_TCP) {
- wildcard = 0;
+ if (id->proto == IPPROTO_TCP) {
+ lookupflags = 0;
pi = &V_tcbinfo;
- } else if (proto == IPPROTO_UDP) {
- wildcard = INPLOOKUP_WILDCARD;
+ } else if (id->proto == IPPROTO_UDP) {
+ lookupflags = INPLOOKUP_WILDCARD;
pi = &V_udbinfo;
} else
return 0;
+ lookupflags |= INPLOOKUP_RLOCKPCB;
match = 0;
if (*ugid_lookupp == 0) {
- INP_INFO_RLOCK(pi);
- pcb = (oif) ?
- in_pcblookup_hash(pi,
- dst_ip, htons(dst_port),
- src_ip, htons(src_port),
- wildcard, oif) :
- in_pcblookup_hash(pi,
- src_ip, htons(src_port),
- dst_ip, htons(dst_port),
- wildcard, NULL);
+ if (id->addr_type == 6) {
+#ifdef INET6
+ if (oif == NULL)
+ pcb = in6_pcblookup_mbuf(pi,
+ &id->src_ip6, htons(id->src_port),
+ &id->dst_ip6, htons(id->dst_port),
+ lookupflags, oif, args->m);
+ else
+ pcb = in6_pcblookup_mbuf(pi,
+ &id->dst_ip6, htons(id->dst_port),
+ &id->src_ip6, htons(id->src_port),
+ lookupflags, oif, args->m);
+#else
+ *ugid_lookupp = -1;
+ return (0);
+#endif
+ } else {
+ src_ip.s_addr = htonl(id->src_ip);
+ dst_ip.s_addr = htonl(id->dst_ip);
+ if (oif == NULL)
+ pcb = in_pcblookup_mbuf(pi,
+ src_ip, htons(id->src_port),
+ dst_ip, htons(id->dst_port),
+ lookupflags, oif, args->m);
+ else
+ pcb = in_pcblookup_mbuf(pi,
+ dst_ip, htons(id->dst_port),
+ src_ip, htons(id->src_port),
+ lookupflags, oif, args->m);
+ }
if (pcb != NULL) {
+ INP_RLOCK_ASSERT(pcb);
*uc = crhold(pcb->inp_cred);
*ugid_lookupp = 1;
+ INP_RUNLOCK(pcb);
}
- INP_INFO_RUNLOCK(pi);
if (*ugid_lookupp == 0) {
/*
* We tried and failed, set the variable to -1
@@ -714,14 +743,14 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
*ugid_lookupp = -1;
return (0);
}
- }
+ }
if (insn->o.opcode == O_UID)
match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
else if (insn->o.opcode == O_GID)
match = groupmember((gid_t)insn->d[0], *uc);
else if (insn->o.opcode == O_JAIL)
match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
- return match;
+ return (match);
#endif /* __FreeBSD__ */
}
@@ -1259,22 +1288,17 @@ do { \
* as this ensures that we have a
* packet with the ports info.
*/
- if (offset!=0)
- break;
- if (is_ipv6) /* XXX to be fixed later */
+ if (offset != 0)
break;
if (proto == IPPROTO_TCP ||
proto == IPPROTO_UDP)
match = check_uidgid(
(ipfw_insn_u32 *)cmd,
- proto, oif,
- dst_ip, dst_port,
- src_ip, src_port, &ucred_lookup,
+ args, &ucred_lookup,
#ifdef __FreeBSD__
- &ucred_cache, args->inp);
+ &ucred_cache);
#else
- (void *)&ucred_cache,
- (struct inpcb *)args->m);
+ (void *)&ucred_cache);
#endif
break;
@@ -1389,18 +1413,15 @@ do { \
else if (v == 4 || v == 5) {
check_uidgid(
(ipfw_insn_u32 *)cmd,
- proto, oif,
- dst_ip, dst_port,
- src_ip, src_port, &ucred_lookup,
+ args, &ucred_lookup,
#ifdef __FreeBSD__
- &ucred_cache, args->inp);
+ &ucred_cache);
if (v == 4 /* O_UID */)
key = ucred_cache->cr_uid;
else if (v == 5 /* O_JAIL */)
key = ucred_cache->cr_prison->pr_id;
#else /* !__FreeBSD__ */
- (void *)&ucred_cache,
- (struct inpcb *)args->m);
+ (void *)&ucred_cache);
if (v ==4 /* O_UID */)
key = ucred_cache.uid;
else if (v == 5 /* O_JAIL */)
@@ -1827,21 +1848,32 @@ do { \
else
break;
+ /*
+ * XXXRW: so_user_cookie should almost
+ * certainly be inp_user_cookie?
+ */
+
/* For incomming packet, lookup up the
inpcb using the src/dest ip/port tuple */
if (inp == NULL) {
- INP_INFO_RLOCK(pi);
- inp = in_pcblookup_hash(pi,
+ inp = in_pcblookup(pi,
src_ip, htons(src_port),
dst_ip, htons(dst_port),
- 0, NULL);
- INP_INFO_RUNLOCK(pi);
- }
-
- if (inp && inp->inp_socket) {
- tablearg = inp->inp_socket->so_user_cookie;
- if (tablearg)
- match = 1;
+ INPLOOKUP_RLOCKPCB, NULL);
+ if (inp != NULL) {
+ tablearg =
+ inp->inp_socket->so_user_cookie;
+ if (tablearg)
+ match = 1;
+ INP_RUNLOCK(inp);
+ }
+ } else {
+ if (inp->inp_socket) {
+ tablearg =
+ inp->inp_socket->so_user_cookie;
+ if (tablearg)
+ match = 1;
+ }
}
break;
}
@@ -2106,7 +2138,8 @@ do { \
case O_FORWARD_IP:
if (args->eh) /* not valid on layer2 pkts */
break;
- if (!q || dyn_dir == MATCH_FORWARD) {
+ if (q == NULL || q->rule != f ||
+ dyn_dir == MATCH_FORWARD) {
struct sockaddr_in *sa;
sa = &(((ipfw_insn_sa *)cmd)->sa);
if (sa->sin_addr.s_addr == INADDR_ANY) {
@@ -2137,14 +2170,21 @@ do { \
done = 1; /* exit outer loop */
break;
- case O_SETFIB:
+ case O_SETFIB: {
+ uint32_t fib;
+
f->pcnt++; /* update stats */
f->bcnt += pktlen;
f->timestamp = time_uptime;
- M_SETFIB(m, cmd->arg1);
- args->f_id.fib = cmd->arg1;
+ fib = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg:
+ cmd->arg1;
+ if (fib >= rt_numfibs)
+ fib = 0;
+ M_SETFIB(m, fib);
+ args->f_id.fib = fib;
l = 0; /* exit inner loop */
break;
+ }
case O_NAT:
if (!IPFW_NAT_LOADED) {
@@ -2154,6 +2194,13 @@ do { \
int nat_id;
set_match(args, f_pos, chain);
+ /* Check if this is 'global' nat rule */
+ if (cmd->arg1 == 0) {
+ retval = ipfw_nat_ptr(args, NULL, m);
+ l = 0;
+ done = 1;
+ break;
+ }
t = ((ipfw_insn_nat *)cmd)->nat;
if (t == NULL) {
nat_id = (cmd->arg1 == IP_FW_TABLEARG) ?
diff --git a/sys/netinet/ipfw/ip_fw_dynamic.c b/sys/netinet/ipfw/ip_fw_dynamic.c
index 7f0feb4..0bc4cc1 100644
--- a/sys/netinet/ipfw/ip_fw_dynamic.c
+++ b/sys/netinet/ipfw/ip_fw_dynamic.c
@@ -753,11 +753,12 @@ ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
if (q != NULL) { /* should never occur */
+ DEB(
if (last_log != time_uptime) {
last_log = time_uptime;
printf("ipfw: %s: entry already present, done\n",
__func__);
- }
+ })
IPFW_DYN_UNLOCK();
return (0);
}
diff --git a/sys/netinet/ipfw/ip_fw_nat.c b/sys/netinet/ipfw/ip_fw_nat.c
index f8c3e63..1679a97 100644
--- a/sys/netinet/ipfw/ip_fw_nat.c
+++ b/sys/netinet/ipfw/ip_fw_nat.c
@@ -207,7 +207,8 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
struct mbuf *mcl;
struct ip *ip;
/* XXX - libalias duct tape */
- int ldt, retval;
+ int ldt, retval, found;
+ struct ip_fw_chain *chain;
char *c;
ldt = 0;
@@ -256,23 +257,65 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
ldt = 1;
c = mtod(mcl, char *);
- if (args->oif == NULL)
- retval = LibAliasIn(t->lib, c,
- mcl->m_len + M_TRAILINGSPACE(mcl));
- else
- retval = LibAliasOut(t->lib, c,
- mcl->m_len + M_TRAILINGSPACE(mcl));
- if (retval == PKT_ALIAS_RESPOND) {
- m->m_flags |= M_SKIP_FIREWALL;
- retval = PKT_ALIAS_OK;
+
+ /* Check if this is 'global' instance */
+ if (t == NULL) {
+ if (args->oif == NULL) {
+ /* Wrong direction, skip processing */
+ args->m = mcl;
+ return (IP_FW_NAT);
+ }
+
+ found = 0;
+ chain = &V_layer3_chain;
+ IPFW_RLOCK(chain);
+ /* Check every nat entry... */
+ LIST_FOREACH(t, &chain->nat, _next) {
+ if ((t->mode & PKT_ALIAS_SKIP_GLOBAL) != 0)
+ continue;
+ retval = LibAliasOutTry(t->lib, c,
+ mcl->m_len + M_TRAILINGSPACE(mcl), 0);
+ if (retval == PKT_ALIAS_OK) {
+ /* Nat instance recognises state */
+ found = 1;
+ break;
+ }
+ }
+ IPFW_RUNLOCK(chain);
+ if (found != 1) {
+ /* No instance found, return ignore */
+ args->m = mcl;
+ return (IP_FW_NAT);
+ }
+ } else {
+ if (args->oif == NULL)
+ retval = LibAliasIn(t->lib, c,
+ mcl->m_len + M_TRAILINGSPACE(mcl));
+ else
+ retval = LibAliasOut(t->lib, c,
+ mcl->m_len + M_TRAILINGSPACE(mcl));
}
- if (retval != PKT_ALIAS_OK &&
- retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) {
+
+ /*
+ * We drop packet when:
+ * 1. libalias returns PKT_ALIAS_ERROR;
+ * 2. For incoming packets:
+ * a) for unresolved fragments;
+ * b) libalias returns PKT_ALIAS_IGNORED and
+ * PKT_ALIAS_DENY_INCOMING flag is set.
+ */
+ if (retval == PKT_ALIAS_ERROR ||
+ (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT ||
+ (retval == PKT_ALIAS_IGNORED &&
+ (t->mode & PKT_ALIAS_DENY_INCOMING) != 0)))) {
/* XXX - should i add some logging? */
m_free(mcl);
args->m = NULL;
return (IP_FW_DENY);
}
+
+ if (retval == PKT_ALIAS_RESPOND)
+ m->m_flags |= M_SKIP_FIREWALL;
mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len);
/*
diff --git a/sys/netinet/ipfw/ip_fw_sockopt.c b/sys/netinet/ipfw/ip_fw_sockopt.c
index 0c903ee..2347456 100644
--- a/sys/netinet/ipfw/ip_fw_sockopt.c
+++ b/sys/netinet/ipfw/ip_fw_sockopt.c
@@ -349,12 +349,13 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg)
}
if (n == 0) {
- /* A flush request (arg == 0) on empty ruleset
- * returns with no error. On the contrary,
+ /* A flush request (arg == 0 or cmd == 1) on empty
+ * ruleset returns with no error. On the contrary,
* if there is no match on a specific request,
* we return EINVAL.
*/
- error = (arg == 0) ? 0 : EINVAL;
+ if (arg != 0 && cmd != 1)
+ error = EINVAL;
break;
}
@@ -606,7 +607,8 @@ check_ipfw_struct(struct ip_fw *rule, int size)
case O_SETFIB:
if (cmdlen != F_INSN_SIZE(ipfw_insn))
goto bad_size;
- if (cmd->arg1 >= rt_numfibs) {
+ if ((cmd->arg1 != IP_FW_TABLEARG) &&
+ (cmd->arg1 >= rt_numfibs)) {
printf("ipfw: invalid fib number %d\n",
cmd->arg1);
return EINVAL;
diff --git a/sys/netinet/libalias/alias.h b/sys/netinet/libalias/alias.h
index 2aed829..b12b353 100644
--- a/sys/netinet/libalias/alias.h
+++ b/sys/netinet/libalias/alias.h
@@ -197,6 +197,18 @@ struct mbuf *m_megapullup(struct mbuf *, int);
*/
#define PKT_ALIAS_RESET_ON_ADDR_CHANGE 0x20
+/*
+ * If PKT_ALIAS_PROXY_ONLY is set, then NAT will be disabled and only
+ * transparent proxying is performed.
+ */
+#define PKT_ALIAS_PROXY_ONLY 0x40
+
+/*
+ * If PKT_ALIAS_REVERSE is set, the actions of PacketAliasIn() and
+ * PacketAliasOut() are reversed.
+ */
+#define PKT_ALIAS_REVERSE 0x80
+
#ifndef NO_FW_PUNCH
/*
* If PKT_ALIAS_PUNCH_FW is set, active FTP and IRC DCC connections will
@@ -209,16 +221,10 @@ struct mbuf *m_megapullup(struct mbuf *, int);
#endif
/*
- * If PKT_ALIAS_PROXY_ONLY is set, then NAT will be disabled and only
- * transparent proxying is performed.
- */
-#define PKT_ALIAS_PROXY_ONLY 0x40
-
-/*
- * If PKT_ALIAS_REVERSE is set, the actions of PacketAliasIn() and
- * PacketAliasOut() are reversed.
+ * If PKT_ALIAS_SKIP_GLOBAL is set, nat instance is not checked for matching
+ * states in 'ipfw nat global' rule.
*/
-#define PKT_ALIAS_REVERSE 0x80
+#define PKT_ALIAS_SKIP_GLOBAL 0x200
/* Function return codes. */
#define PKT_ALIAS_ERROR -1
diff --git a/sys/netinet/libalias/alias_sctp.h b/sys/netinet/libalias/alias_sctp.h
index 80ed965..99d54ce 100644
--- a/sys/netinet/libalias/alias_sctp.h
+++ b/sys/netinet/libalias/alias_sctp.h
@@ -135,13 +135,13 @@ struct sctp_nat_assoc {
struct in_addr a_addr; /**< alias ip address */
int state; /**< current state of NAT association */
int TableRegister; /**< stores which look up tables association is registered in */
- int exp; /**< timer expiration in seconds from uptime */
+ int exp; /**< timer expiration in seconds from uptime */
int exp_loc; /**< current location in timer_Q */
int num_Gaddr; /**< number of global IP addresses in the list */
LIST_HEAD(sctpGlobalAddresshead,sctp_GlobalAddress) Gaddr; /**< List of global addresses */
- LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/
- LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */
- LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */
+ LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/
+ LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */
+ LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */
//Using libalias locking
};
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index be099a8..e754b88 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -205,7 +205,8 @@ rip_init(void)
{
in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
- 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE);
+ 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_NONE);
EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
}
@@ -226,7 +227,7 @@ rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
{
int policyfail = 0;
- INP_RLOCK_ASSERT(last);
+ INP_LOCK_ASSERT(last);
#ifdef IPSEC
/* check AH/ESP integrity. */
@@ -834,16 +835,19 @@ rip_detach(struct socket *so)
static void
rip_dodisconnect(struct socket *so, struct inpcb *inp)
{
+ struct inpcbinfo *pcbinfo;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
- INP_WLOCK_ASSERT(inp);
-
+ pcbinfo = inp->inp_pcbinfo;
+ INP_INFO_WLOCK(pcbinfo);
+ INP_WLOCK(inp);
rip_delhash(inp);
inp->inp_faddr.s_addr = INADDR_ANY;
rip_inshash(inp);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED;
SOCK_UNLOCK(so);
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(pcbinfo);
}
static void
@@ -854,11 +858,7 @@ rip_abort(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
- INP_INFO_WLOCK(&V_ripcbinfo);
- INP_WLOCK(inp);
rip_dodisconnect(so, inp);
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
}
static void
@@ -869,11 +869,7 @@ rip_close(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("rip_close: inp == NULL"));
- INP_INFO_WLOCK(&V_ripcbinfo);
- INP_WLOCK(inp);
rip_dodisconnect(so, inp);
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
}
static int
@@ -887,11 +883,7 @@ rip_disconnect(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
- INP_INFO_WLOCK(&V_ripcbinfo);
- INP_WLOCK(inp);
rip_dodisconnect(so, inp);
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
return (0);
}
@@ -1077,9 +1069,9 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_ripcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_ripcbinfo);
diff --git a/sys/netinet/sctp.h b/sys/netinet/sctp.h
index fa29a75..3c8cf36 100644
--- a/sys/netinet/sctp.h
+++ b/sys/netinet/sctp.h
@@ -91,7 +91,7 @@ struct sctp_paramhdr {
#define SCTP_PEER_ADDR_PARAMS 0x0000000a
#define SCTP_DEFAULT_SEND_PARAM 0x0000000b
/* ancillary data/notification interest options */
-#define SCTP_EVENTS 0x0000000c
+#define SCTP_EVENTS 0x0000000c /* deprecated */
/* Without this applied we will give V4 and V6 addresses on a V6 socket */
#define SCTP_I_WANT_MAPPED_V4_ADDR 0x0000000d
#define SCTP_MAXSEG 0x0000000e
@@ -114,6 +114,11 @@ struct sctp_paramhdr {
#define SCTP_EXPLICIT_EOR 0x0000001b
#define SCTP_REUSE_PORT 0x0000001c /* rw */
#define SCTP_AUTH_DEACTIVATE_KEY 0x0000001d
+#define SCTP_EVENT 0x0000001e
+#define SCTP_RECVRCVINFO 0x0000001f
+#define SCTP_RECVNXTINFO 0x00000020
+#define SCTP_DEFAULT_SNDINFO 0x00000021
+#define SCTP_DEFAULT_PRINFO 0x00000022
/*
* read-only options
@@ -490,7 +495,7 @@ struct sctp_error_unrecognized_chunk {
/*
* PCB Features (in sctp_features bitmask)
*/
-#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x00000002
+#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x00000002 /* deprecated */
#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT 0x00000004
#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE 0x00000008
#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS 0x00000010
@@ -500,7 +505,7 @@ struct sctp_error_unrecognized_chunk {
/* socket options */
#define SCTP_PCB_FLAGS_NODELAY 0x00000100
#define SCTP_PCB_FLAGS_AUTOCLOSE 0x00000200
-#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x00000400
+#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x00000400 /* deprecated */
#define SCTP_PCB_FLAGS_RECVASSOCEVNT 0x00000800
#define SCTP_PCB_FLAGS_RECVPADDREVNT 0x00001000
#define SCTP_PCB_FLAGS_RECVPEERERR 0x00002000
@@ -516,6 +521,9 @@ struct sctp_error_unrecognized_chunk {
#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS 0x01000000
#define SCTP_PCB_FLAGS_PORTREUSE 0x02000000
#define SCTP_PCB_FLAGS_DRYEVNT 0x04000000
+#define SCTP_PCB_FLAGS_RECVRCVINFO 0x08000000
+#define SCTP_PCB_FLAGS_RECVNXTINFO 0x10000000
+
/*-
* mobility_features parameters (by micchie).Note
* these features are applied against the
diff --git a/sys/netinet/sctp_auth.c b/sys/netinet/sctp_auth.c
index 91e3f78..b68c840 100644
--- a/sys/netinet/sctp_auth.c
+++ b/sys/netinet/sctp_auth.c
@@ -1866,7 +1866,7 @@ sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication,
/* If the socket is gone we are out of here */
return;
}
- if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTHEVNT))
+ if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_AUTHEVNT))
/* event not enabled */
return;
diff --git a/sys/netinet/sctp_indata.c b/sys/netinet/sctp_indata.c
index 9734aea..e142a3e 100644
--- a/sys/netinet/sctp_indata.c
+++ b/sys/netinet/sctp_indata.c
@@ -201,95 +201,114 @@ failed_build:
struct mbuf *
-sctp_build_ctl_nchunk(struct sctp_inpcb *inp,
- struct sctp_sndrcvinfo *sinfo)
+sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo)
{
+ struct sctp_extrcvinfo *seinfo;
struct sctp_sndrcvinfo *outinfo;
+ struct sctp_rcvinfo *rcvinfo;
+ struct sctp_nxtinfo *nxtinfo;
struct cmsghdr *cmh;
struct mbuf *ret;
int len;
- int use_extended = 0;
+ int use_extended;
+ int provide_nxt;
- if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) {
- /* user does not want the sndrcv ctl */
+ if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT) &&
+ sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO) &&
+ sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) {
+ /* user does not want any ancillary data */
return (NULL);
}
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
- use_extended = 1;
- len = CMSG_LEN(sizeof(struct sctp_extrcvinfo));
+ len = 0;
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) {
+ len += CMSG_SPACE(sizeof(struct sctp_rcvinfo));
+ }
+ seinfo = (struct sctp_extrcvinfo *)sinfo;
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO) &&
+ (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_AVAIL)) {
+ provide_nxt = 1;
+ len += CMSG_SPACE(sizeof(struct sctp_rcvinfo));
} else {
- len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+ provide_nxt = 0;
+ }
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) {
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+ use_extended = 1;
+ len += CMSG_SPACE(sizeof(struct sctp_extrcvinfo));
+ } else {
+ use_extended = 0;
+ len += CMSG_SPACE(sizeof(struct sctp_sndrcvinfo));
+ }
+ } else {
+ use_extended = 0;
}
-
- ret = sctp_get_mbuf_for_msg(len,
- 0, M_DONTWAIT, 1, MT_DATA);
-
+ ret = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA);
if (ret == NULL) {
/* No space */
return (ret);
}
- /* We need a CMSG header followed by the struct */
+ SCTP_BUF_LEN(ret) = 0;
+
+ /* We need a CMSG header followed by the struct */
cmh = mtod(ret, struct cmsghdr *);
- outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh);
- cmh->cmsg_level = IPPROTO_SCTP;
- if (use_extended) {
- cmh->cmsg_type = SCTP_EXTRCV;
- cmh->cmsg_len = len;
- memcpy(outinfo, sinfo, len);
- } else {
- cmh->cmsg_type = SCTP_SNDRCV;
- cmh->cmsg_len = len;
- *outinfo = *sinfo;
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) {
+ cmh->cmsg_level = IPPROTO_SCTP;
+ cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_rcvinfo));
+ cmh->cmsg_type = SCTP_RCVINFO;
+ rcvinfo = (struct sctp_rcvinfo *)CMSG_DATA(cmh);
+ rcvinfo->rcv_sid = sinfo->sinfo_stream;
+ rcvinfo->rcv_ssn = sinfo->sinfo_ssn;
+ rcvinfo->rcv_flags = sinfo->sinfo_flags;
+ rcvinfo->rcv_ppid = sinfo->sinfo_ppid;
+ rcvinfo->rcv_tsn = sinfo->sinfo_tsn;
+ rcvinfo->rcv_cumtsn = sinfo->sinfo_cumtsn;
+ rcvinfo->rcv_context = sinfo->sinfo_context;
+ rcvinfo->rcv_assoc_id = sinfo->sinfo_assoc_id;
+ cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_rcvinfo)));
+ SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_rcvinfo));
+ }
+ if (provide_nxt) {
+ cmh->cmsg_level = IPPROTO_SCTP;
+ cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_nxtinfo));
+ cmh->cmsg_type = SCTP_NXTINFO;
+ nxtinfo = (struct sctp_nxtinfo *)CMSG_DATA(cmh);
+ nxtinfo->nxt_sid = seinfo->sreinfo_next_stream;
+ nxtinfo->nxt_flags = 0;
+ if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_UNORDERED) {
+ nxtinfo->nxt_flags |= SCTP_UNORDERED;
+ }
+ if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_NOTIFICATION) {
+ nxtinfo->nxt_flags |= SCTP_NOTIFICATION;
+ }
+ if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_ISCOMPLETE) {
+ nxtinfo->nxt_flags |= SCTP_COMPLETE;
+ }
+ nxtinfo->nxt_ppid = seinfo->sreinfo_next_ppid;
+ nxtinfo->nxt_length = seinfo->sreinfo_next_length;
+ nxtinfo->nxt_assoc_id = seinfo->sreinfo_next_aid;
+ cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_nxtinfo)));
+ SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_nxtinfo));
+ }
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) {
+ cmh->cmsg_level = IPPROTO_SCTP;
+ outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh);
+ if (use_extended) {
+ cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_extrcvinfo));
+ cmh->cmsg_type = SCTP_EXTRCV;
+ memcpy(outinfo, sinfo, sizeof(struct sctp_extrcvinfo));
+ SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_extrcvinfo));
+ } else {
+ cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+ cmh->cmsg_type = SCTP_SNDRCV;
+ *outinfo = *sinfo;
+ SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_sndrcvinfo));
+ }
}
- SCTP_BUF_LEN(ret) = cmh->cmsg_len;
return (ret);
}
-char *
-sctp_build_ctl_cchunk(struct sctp_inpcb *inp,
- int *control_len,
- struct sctp_sndrcvinfo *sinfo)
-{
- struct sctp_sndrcvinfo *outinfo;
- struct cmsghdr *cmh;
- char *buf;
- int len;
- int use_extended = 0;
-
- if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) {
- /* user does not want the sndrcv ctl */
- return (NULL);
- }
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
- use_extended = 1;
- len = CMSG_LEN(sizeof(struct sctp_extrcvinfo));
- } else {
- len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
- }
- SCTP_MALLOC(buf, char *, len, SCTP_M_CMSG);
- if (buf == NULL) {
- /* No space */
- return (buf);
- }
- /* We need a CMSG header followed by the struct */
- cmh = (struct cmsghdr *)buf;
- outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh);
- cmh->cmsg_level = IPPROTO_SCTP;
- if (use_extended) {
- cmh->cmsg_type = SCTP_EXTRCV;
- cmh->cmsg_len = len;
- memcpy(outinfo, sinfo, len);
- } else {
- cmh->cmsg_type = SCTP_SNDRCV;
- cmh->cmsg_len = len;
- *outinfo = *sinfo;
- }
- *control_len = len;
- return (buf);
-}
-
static void
sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn)
{
diff --git a/sys/netinet/sctp_indata.h b/sys/netinet/sctp_indata.h
index 34090df..1dbd364 100644
--- a/sys/netinet/sctp_indata.h
+++ b/sys/netinet/sctp_indata.h
@@ -83,11 +83,6 @@ struct mbuf *
sctp_build_ctl_nchunk(struct sctp_inpcb *inp,
struct sctp_sndrcvinfo *sinfo);
-char *
-sctp_build_ctl_cchunk(struct sctp_inpcb *inp,
- int *control_len,
- struct sctp_sndrcvinfo *sinfo);
-
void sctp_set_rwnd(struct sctp_tcb *, struct sctp_association *);
uint32_t
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index a7d22bd..043b3b2 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -3355,54 +3355,338 @@ sctp_source_address_selection(struct sctp_inpcb *inp,
}
static int
-sctp_find_cmsg(int c_type, void *data, struct mbuf *control, int cpsize)
+sctp_find_cmsg(int c_type, void *data, struct mbuf *control, size_t cpsize)
{
struct cmsghdr cmh;
- int tlen, at;
+ int tlen, at, found;
+ struct sctp_sndinfo sndinfo;
+ struct sctp_prinfo prinfo;
+ struct sctp_authinfo authinfo;
tlen = SCTP_BUF_LEN(control);
at = 0;
+ found = 0;
/*
* Independent of how many mbufs, find the c_type inside the control
* structure and copy out the data.
*/
while (at < tlen) {
if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
- /* not enough room for one more we are done. */
- return (0);
+ /* There is not enough room for one more. */
+ return (found);
}
m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
+ if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) {
+ /* We dont't have a complete CMSG header. */
+ return (found);
+ }
if (((int)cmh.cmsg_len + at) > tlen) {
- /*
- * this is real messed up since there is not enough
- * data here to cover the cmsg header. We are done.
- */
- return (0);
+ /* We don't have the complete CMSG. */
+ return (found);
}
if ((cmh.cmsg_level == IPPROTO_SCTP) &&
- (c_type == cmh.cmsg_type)) {
- /* found the one we want, copy it out */
- at += CMSG_ALIGN(sizeof(struct cmsghdr));
- if ((int)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < cpsize) {
- /*
- * space of cmsg_len after header not big
- * enough
- */
- return (0);
+ ((c_type == cmh.cmsg_type) ||
+ ((c_type == SCTP_SNDRCV) &&
+ ((cmh.cmsg_type == SCTP_SNDINFO) ||
+ (cmh.cmsg_type == SCTP_PRINFO) ||
+ (cmh.cmsg_type == SCTP_AUTHINFO))))) {
+ if (c_type == cmh.cmsg_type) {
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < cpsize) {
+ return (found);
+ }
+ /* It is exactly what we want. Copy it out. */
+ m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), cpsize, (caddr_t)data);
+ return (1);
+ } else {
+ struct sctp_sndrcvinfo *sndrcvinfo;
+
+ sndrcvinfo = (struct sctp_sndrcvinfo *)data;
+ if (found == 0) {
+ if (cpsize < sizeof(struct sctp_sndrcvinfo)) {
+ return (found);
+ }
+ memset(sndrcvinfo, 0, sizeof(struct sctp_sndrcvinfo));
+ }
+ switch (cmh.cmsg_type) {
+ case SCTP_SNDINFO:
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_sndinfo)) {
+ return (found);
+ }
+ m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_sndinfo), (caddr_t)&sndinfo);
+ sndrcvinfo->sinfo_stream = sndinfo.snd_sid;
+ sndrcvinfo->sinfo_flags = sndinfo.snd_flags;
+ sndrcvinfo->sinfo_ppid = sndinfo.snd_ppid;
+ sndrcvinfo->sinfo_context = sndinfo.snd_context;
+ sndrcvinfo->sinfo_assoc_id = sndinfo.snd_assoc_id;
+ break;
+ case SCTP_PRINFO:
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_prinfo)) {
+ return (found);
+ }
+ m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_prinfo), (caddr_t)&prinfo);
+ sndrcvinfo->sinfo_timetolive = prinfo.pr_value;
+ sndrcvinfo->sinfo_flags |= prinfo.pr_policy;
+ break;
+ case SCTP_AUTHINFO:
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_authinfo)) {
+ return (found);
+ }
+ m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_authinfo), (caddr_t)&authinfo);
+ sndrcvinfo->sinfo_keynumber_valid = 1;
+ sndrcvinfo->sinfo_keynumber = authinfo.auth_keyid;
+ break;
+ default:
+ return (found);
+ }
+ found = 1;
}
- m_copydata(control, at, cpsize, data);
+ }
+ at += CMSG_ALIGN(cmh.cmsg_len);
+ }
+ return (found);
+}
+
+static int
+sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *error)
+{
+ struct cmsghdr cmh;
+ int tlen, at;
+ struct sctp_initmsg initmsg;
+
+#ifdef INET
+ struct sockaddr_in sin;
+
+#endif
+#ifdef INET6
+ struct sockaddr_in6 sin6;
+
+#endif
+
+ tlen = SCTP_BUF_LEN(control);
+ at = 0;
+ while (at < tlen) {
+ if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
+ /* There is not enough room for one more. */
+ *error = EINVAL;
return (1);
- } else {
- at += CMSG_ALIGN(cmh.cmsg_len);
- if (cmh.cmsg_len == 0) {
+ }
+ m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
+ if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) {
+ /* We dont't have a complete CMSG header. */
+ *error = EINVAL;
+ return (1);
+ }
+ if (((int)cmh.cmsg_len + at) > tlen) {
+ /* We don't have the complete CMSG. */
+ *error = EINVAL;
+ return (1);
+ }
+ if (cmh.cmsg_level == IPPROTO_SCTP) {
+ switch (cmh.cmsg_type) {
+ case SCTP_INIT:
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_initmsg)) {
+ *error = EINVAL;
+ return (1);
+ }
+ m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg);
+ if (initmsg.sinit_max_attempts)
+ stcb->asoc.max_init_times = initmsg.sinit_max_attempts;
+ if (initmsg.sinit_num_ostreams)
+ stcb->asoc.pre_open_streams = initmsg.sinit_num_ostreams;
+ if (initmsg.sinit_max_instreams)
+ stcb->asoc.max_inbound_streams = initmsg.sinit_max_instreams;
+ if (initmsg.sinit_max_init_timeo)
+ stcb->asoc.initial_init_rto_max = initmsg.sinit_max_init_timeo;
+ if (stcb->asoc.streamoutcnt < stcb->asoc.pre_open_streams) {
+ struct sctp_stream_out *tmp_str;
+ unsigned int i;
+
+ /* Default is NOT correct */
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, default:%d pre_open:%d\n",
+ stcb->asoc.streamoutcnt, stcb->asoc.pre_open_streams);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_MALLOC(tmp_str,
+ struct sctp_stream_out *,
+ (stcb->asoc.pre_open_streams * sizeof(struct sctp_stream_out)),
+ SCTP_M_STRMO);
+ SCTP_TCB_LOCK(stcb);
+ if (tmp_str != NULL) {
+ SCTP_FREE(stcb->asoc.strmout, SCTP_M_STRMO);
+ stcb->asoc.strmout = tmp_str;
+ stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt = stcb->asoc.pre_open_streams;
+ } else {
+ stcb->asoc.pre_open_streams = stcb->asoc.streamoutcnt;
+ }
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ stcb->asoc.strmout[i].next_sequence_sent = 0;
+ TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
+ stcb->asoc.strmout[i].stream_no = i;
+ stcb->asoc.strmout[i].last_msg_incomplete = 0;
+ stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL);
+ }
+ }
+ break;
+#ifdef INET
+ case SCTP_DSTADDRV4:
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in_addr)) {
+ *error = EINVAL;
+ return (1);
+ }
+ memset(&sin, 0, sizeof(struct sockaddr_in));
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_port = stcb->rport;
+ m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
+ if ((sin.sin_addr.s_addr == INADDR_ANY) ||
+ (sin.sin_addr.s_addr == INADDR_BROADCAST) ||
+ IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
+ *error = EINVAL;
+ return (-1);
+ }
+ if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+ *error = ENOBUFS;
+ return (1);
+ }
+ break;
+#endif
+#ifdef INET6
+ case SCTP_DSTADDRV6:
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in6_addr)) {
+ *error = EINVAL;
+ return (1);
+ }
+ memset(&sin6, 0, sizeof(struct sockaddr_in6));
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_port = stcb->rport;
+ m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) ||
+ IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
+ *error = EINVAL;
+ return (-1);
+ }
+#ifdef INET
+ if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
+ in6_sin6_2_sin(&sin, &sin6);
+ if ((sin.sin_addr.s_addr == INADDR_ANY) ||
+ (sin.sin_addr.s_addr == INADDR_BROADCAST) ||
+ IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
+ *error = EINVAL;
+ return (-1);
+ }
+ if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+ *error = ENOBUFS;
+ return (1);
+ }
+ } else
+#endif
+ if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin6, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+ *error = ENOBUFS;
+ return (1);
+ }
+ break;
+#endif
+ default:
break;
}
}
+ at += CMSG_ALIGN(cmh.cmsg_len);
}
- /* not found */
return (0);
}
+static struct sctp_tcb *
+sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p,
+ in_port_t port,
+ struct mbuf *control,
+ struct sctp_nets **net_p,
+ int *error)
+{
+ struct cmsghdr cmh;
+ int tlen, at;
+ struct sctp_tcb *stcb;
+ struct sockaddr *addr;
+
+#ifdef INET
+ struct sockaddr_in sin;
+
+#endif
+#ifdef INET6
+ struct sockaddr_in6 sin6;
+
+#endif
+
+ tlen = SCTP_BUF_LEN(control);
+ at = 0;
+ while (at < tlen) {
+ if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
+ /* There is not enough room for one more. */
+ *error = EINVAL;
+ return (NULL);
+ }
+ m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
+ if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) {
+ /* We dont't have a complete CMSG header. */
+ *error = EINVAL;
+ return (NULL);
+ }
+ if (((int)cmh.cmsg_len + at) > tlen) {
+ /* We don't have the complete CMSG. */
+ *error = EINVAL;
+ return (NULL);
+ }
+ if (cmh.cmsg_level == IPPROTO_SCTP) {
+ switch (cmh.cmsg_type) {
+#ifdef INET
+ case SCTP_DSTADDRV4:
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in_addr)) {
+ *error = EINVAL;
+ return (NULL);
+ }
+ memset(&sin, 0, sizeof(struct sockaddr_in));
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_port = port;
+ m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
+ addr = (struct sockaddr *)&sin;
+ break;
+#endif
+#ifdef INET6
+ case SCTP_DSTADDRV6:
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in6_addr)) {
+ *error = EINVAL;
+ return (NULL);
+ }
+ memset(&sin6, 0, sizeof(struct sockaddr_in6));
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_port = port;
+ m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
+#ifdef INET
+ if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
+ in6_sin6_2_sin(&sin, &sin6);
+ addr = (struct sockaddr *)&sin;
+ } else
+#endif
+ addr = (struct sockaddr *)&sin6;
+ break;
+#endif
+ default:
+ addr = NULL;
+ break;
+ }
+ if (addr) {
+ stcb = sctp_findassociation_ep_addr(inp_p, addr, net_p, NULL, NULL);
+ if (stcb != NULL) {
+ return (stcb);
+ }
+ }
+ }
+ at += CMSG_ALIGN(cmh.cmsg_len);
+ }
+ return (NULL);
+}
+
static struct mbuf *
sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset,
struct mbuf *initack, int initack_offset, struct sctp_state_cookie *stc_in, uint8_t ** signature)
@@ -5989,19 +6273,26 @@ sctp_msg_append(struct sctp_tcb *stcb,
sp->some_taken = 0;
sp->data = m;
sp->tail_mbuf = NULL;
- sp->length = 0;
- at = m;
sctp_set_prsctp_policy(sp);
/*
* We could in theory (for sendall) sifa the length in, but we would
* still have to hunt through the chain since we need to setup the
* tail_mbuf
*/
- while (at) {
+ sp->length = 0;
+ for (at = m; at; at = SCTP_BUF_NEXT(at)) {
if (SCTP_BUF_NEXT(at) == NULL)
sp->tail_mbuf = at;
sp->length += SCTP_BUF_LEN(at);
- at = SCTP_BUF_NEXT(at);
+ }
+ if (srcv->sinfo_keynumber_valid) {
+ sp->auth_keyid = srcv->sinfo_keynumber;
+ } else {
+ sp->auth_keyid = stcb->asoc.authinfo.active_keyid;
+ }
+ if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) {
+ sctp_auth_key_acquire(stcb, sp->auth_keyid);
+ sp->holds_key_ref = 1;
}
SCTP_TCB_SEND_LOCK(stcb);
sctp_snd_sb_alloc(stcb, sp->length);
@@ -6478,7 +6769,9 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
memset(ca, 0, sizeof(struct sctp_copy_all));
ca->inp = inp;
- memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo));
+ if (srcv) {
+ memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo));
+ }
/*
* take off the sendall flag, it would be bad if we failed to do
* this :-0
@@ -12229,9 +12522,13 @@ sctp_copy_it_in(struct sctp_tcb *stcb,
*error = 0;
goto skip_copy;
}
- sp->auth_keyid = stcb->asoc.authinfo.active_keyid;
+ if (srcv->sinfo_keynumber_valid) {
+ sp->auth_keyid = srcv->sinfo_keynumber;
+ } else {
+ sp->auth_keyid = stcb->asoc.authinfo.active_keyid;
+ }
if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) {
- sctp_auth_key_acquire(stcb, stcb->asoc.authinfo.active_keyid);
+ sctp_auth_key_acquire(stcb, sp->auth_keyid);
sp->holds_key_ref = 1;
}
*error = sctp_copy_one(sp, uio, resv_in_first);
@@ -12263,8 +12560,8 @@ sctp_sosend(struct socket *so,
struct thread *p
)
{
- int error, use_rcvinfo = 0;
- struct sctp_sndrcvinfo srcv;
+ int error, use_sndinfo = 0;
+ struct sctp_sndrcvinfo sndrcvninfo;
struct sockaddr *addr_to_use;
#if defined(INET) && defined(INET6)
@@ -12274,10 +12571,10 @@ sctp_sosend(struct socket *so,
if (control) {
/* process cmsg snd/rcv info (maybe a assoc-id) */
- if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&srcv, control,
- sizeof(srcv))) {
+ if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&sndrcvninfo, control,
+ sizeof(sndrcvninfo))) {
/* got one */
- use_rcvinfo = 1;
+ use_sndinfo = 1;
}
}
addr_to_use = addr;
@@ -12295,7 +12592,7 @@ sctp_sosend(struct socket *so,
error = sctp_lower_sosend(so, addr_to_use, uio, top,
control,
flags,
- use_rcvinfo ? &srcv : NULL
+ use_sndinfo ? &sndrcvninfo : NULL
,p
);
return (error);
@@ -12500,6 +12797,9 @@ sctp_lower_sosend(struct socket *so,
SCTP_INP_WUNLOCK(inp);
/* With the lock applied look again */
stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
+ if ((stcb == NULL) && (control != NULL) && (port > 0)) {
+ stcb = sctp_findassociation_cmsgs(&t_inp, port, control, &net, &error);
+ }
if (stcb == NULL) {
SCTP_INP_WLOCK(inp);
SCTP_INP_DECR_REF(inp);
@@ -12507,6 +12807,9 @@ sctp_lower_sosend(struct socket *so,
} else {
hold_tcblock = 1;
}
+ if (error) {
+ goto out_unlocked;
+ }
if (t_inp != inp) {
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
error = ENOTCONN;
@@ -12555,6 +12858,7 @@ sctp_lower_sosend(struct socket *so,
/* Error is setup for us in the call */
goto out_unlocked;
}
+ hold_tcblock = 1;
if (create_lock_applied) {
SCTP_ASOC_CREATE_UNLOCK(inp);
create_lock_applied = 0;
@@ -12574,84 +12878,13 @@ sctp_lower_sosend(struct socket *so,
sctp_initialize_auth_params(inp, stcb);
if (control) {
- /*
- * see if a init structure exists in cmsg
- * headers
- */
- struct sctp_initmsg initm;
- int i;
-
- if (sctp_find_cmsg(SCTP_INIT, (void *)&initm, control,
- sizeof(initm))) {
- /*
- * we have an INIT override of the
- * default
- */
- if (initm.sinit_max_attempts)
- asoc->max_init_times = initm.sinit_max_attempts;
- if (initm.sinit_num_ostreams)
- asoc->pre_open_streams = initm.sinit_num_ostreams;
- if (initm.sinit_max_instreams)
- asoc->max_inbound_streams = initm.sinit_max_instreams;
- if (initm.sinit_max_init_timeo)
- asoc->initial_init_rto_max = initm.sinit_max_init_timeo;
- if (asoc->streamoutcnt < asoc->pre_open_streams) {
- struct sctp_stream_out *tmp_str;
- int had_lock = 0;
-
- /* Default is NOT correct */
- SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, defout:%d pre_open:%d\n",
- asoc->streamoutcnt, asoc->pre_open_streams);
- /*
- * What happens if this
- * fails? we panic ...
- */
-
- if (hold_tcblock) {
- had_lock = 1;
- SCTP_TCB_UNLOCK(stcb);
- }
- SCTP_MALLOC(tmp_str,
- struct sctp_stream_out *,
- (asoc->pre_open_streams *
- sizeof(struct sctp_stream_out)),
- SCTP_M_STRMO);
- if (had_lock) {
- SCTP_TCB_LOCK(stcb);
- }
- if (tmp_str != NULL) {
- SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
- asoc->strmout = tmp_str;
- asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams;
- } else {
- asoc->pre_open_streams = asoc->streamoutcnt;
- }
- for (i = 0; i < asoc->streamoutcnt; i++) {
- /*-
- * inbound side must be set
- * to 0xffff, also NOTE when
- * we get the INIT-ACK back
- * (for INIT sender) we MUST
- * reduce the count
- * (streamoutcnt) but first
- * check if we sent to any
- * of the upper streams that
- * were dropped (if some
- * were). Those that were
- * dropped must be notified
- * to the upper layer as
- * failed to send.
- */
- asoc->strmout[i].next_sequence_sent = 0x0;
- TAILQ_INIT(&asoc->strmout[i].outqueue);
- asoc->strmout[i].stream_no = i;
- asoc->strmout[i].last_msg_incomplete = 0;
- asoc->ss_functions.sctp_ss_init_stream(&asoc->strmout[i], NULL);
- }
- }
+ if (sctp_process_cmsgs_for_init(stcb, control, &error)) {
+ sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_7);
+ hold_tcblock = 0;
+ stcb = NULL;
+ goto out_unlocked;
}
}
- hold_tcblock = 1;
/* out with the INIT */
queue_only_for_init = 1;
/*-
diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c
index e53e28a..8dc01cd 100644
--- a/sys/netinet/sctp_pcb.c
+++ b/sys/netinet/sctp_pcb.c
@@ -4196,11 +4196,11 @@ try_again:
return (0);
}
/*
- * We don't allow assoc id to be 0, this is needed otherwise if the
- * id were to wrap we would have issues with some socket options.
+ * We don't allow assoc id to be one of SCTP_FUTURE_ASSOC,
+ * SCTP_CURRENT_ASSOC and SCTP_ALL_ASSOC.
*/
- if (inp->sctp_associd_counter == 0) {
- inp->sctp_associd_counter++;
+ if (inp->sctp_associd_counter <= SCTP_ALL_ASSOC) {
+ inp->sctp_associd_counter = SCTP_ALL_ASSOC + 1;
}
id = inp->sctp_associd_counter;
inp->sctp_associd_counter++;
@@ -4793,7 +4793,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
/* Held for PD-API clear that. */
sq->pdapi_aborted = 1;
sq->held_length = 0;
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) {
+ if (sctp_stcb_is_feature_on(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) {
/*
* Need to add a PD-API
* aborted indication.
diff --git a/sys/netinet/sctp_structs.h b/sys/netinet/sctp_structs.h
index 250b312..0f9bcaf 100644
--- a/sys/netinet/sctp_structs.h
+++ b/sys/netinet/sctp_structs.h
@@ -647,6 +647,8 @@ struct sctp_nonpad_sndrcvinfo {
uint32_t sinfo_tsn;
uint32_t sinfo_cumtsn;
sctp_assoc_t sinfo_assoc_id;
+ uint16_t sinfo_keynumber;
+ uint16_t sinfo_keynumber_valid;
};
/*
@@ -1201,6 +1203,7 @@ struct sctp_association {
/* JRS 5/21/07 - CMT PF variable */
uint8_t sctp_cmt_pf;
uint8_t use_precise_time;
+ uint32_t sctp_features;
/*
* The mapping array is used to track out of order sequences above
* last_acked_seq. 0 indicates packet missing 1 indicates packet
diff --git a/sys/netinet/sctp_uio.h b/sys/netinet/sctp_uio.h
index 56aef9d..a798682 100644
--- a/sys/netinet/sctp_uio.h
+++ b/sys/netinet/sctp_uio.h
@@ -47,6 +47,16 @@ __FBSDID("$FreeBSD$");
typedef uint32_t sctp_assoc_t;
+#define SCTP_FUTURE_ASSOC 0
+#define SCTP_CURRENT_ASSOC 1
+#define SCTP_ALL_ASSOC 2
+
+struct sctp_event {
+ sctp_assoc_t se_assoc_id;
+ uint16_t se_type;
+ uint8_t se_on;
+};
+
/* Compatibility to previous define's */
#define sctp_stream_reset_events sctp_stream_reset_event
@@ -69,6 +79,14 @@ struct sctp_event_subscribe {
#define SCTP_INIT 0x0001
#define SCTP_SNDRCV 0x0002
#define SCTP_EXTRCV 0x0003
+#define SCTP_SNDINFO 0x0004
+#define SCTP_RCVINFO 0x0005
+#define SCTP_NXTINFO 0x0006
+#define SCTP_PRINFO 0x0007
+#define SCTP_AUTHINFO 0x0008
+#define SCTP_DSTADDRV4 0x0009
+#define SCTP_DSTADDRV6 0x000a
+
/*
* ancillary data structures
*/
@@ -93,8 +111,8 @@ struct sctp_initmsg {
*/
-#define SCTP_ALIGN_RESV_PAD 96
-#define SCTP_ALIGN_RESV_PAD_SHORT 80
+#define SCTP_ALIGN_RESV_PAD 92
+#define SCTP_ALIGN_RESV_PAD_SHORT 76
struct sctp_sndrcvinfo {
uint16_t sinfo_stream;
@@ -106,6 +124,8 @@ struct sctp_sndrcvinfo {
uint32_t sinfo_tsn;
uint32_t sinfo_cumtsn;
sctp_assoc_t sinfo_assoc_id;
+ uint16_t sinfo_keynumber;
+ uint16_t sinfo_keynumber_valid;
uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD];
};
@@ -113,7 +133,6 @@ struct sctp_extrcvinfo {
uint16_t sinfo_stream;
uint16_t sinfo_ssn;
uint16_t sinfo_flags;
- uint16_t sinfo_pr_policy;
uint32_t sinfo_ppid;
uint32_t sinfo_context;
uint32_t sinfo_timetolive;
@@ -125,15 +144,86 @@ struct sctp_extrcvinfo {
uint32_t sreinfo_next_aid;
uint32_t sreinfo_next_length;
uint32_t sreinfo_next_ppid;
+ uint16_t sinfo_keynumber;
+ uint16_t sinfo_keynumber_valid;
uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT];
};
+struct sctp_sndinfo {
+ uint16_t snd_sid;
+ uint16_t snd_flags;
+ uint32_t snd_ppid;
+ uint32_t snd_context;
+ sctp_assoc_t snd_assoc_id;
+};
+
+struct sctp_prinfo {
+ uint16_t pr_policy;
+ uint32_t pr_value;
+};
+
+struct sctp_default_prinfo {
+ uint16_t pr_policy;
+ uint32_t pr_value;
+ sctp_assoc_t pr_assoc_id;
+};
+
+struct sctp_authinfo {
+ uint16_t auth_keyid;
+};
+
+struct sctp_rcvinfo {
+ uint16_t rcv_sid;
+ uint16_t rcv_ssn;
+ uint16_t rcv_flags;
+ uint32_t rcv_ppid;
+ uint32_t rcv_tsn;
+ uint32_t rcv_cumtsn;
+ uint32_t rcv_context;
+ sctp_assoc_t rcv_assoc_id;
+};
+
+struct sctp_nxtinfo {
+ uint16_t nxt_sid;
+ uint16_t nxt_flags;
+ uint32_t nxt_ppid;
+ uint32_t nxt_length;
+ sctp_assoc_t nxt_assoc_id;
+};
+
#define SCTP_NO_NEXT_MSG 0x0000
#define SCTP_NEXT_MSG_AVAIL 0x0001
#define SCTP_NEXT_MSG_ISCOMPLETE 0x0002
#define SCTP_NEXT_MSG_IS_UNORDERED 0x0004
#define SCTP_NEXT_MSG_IS_NOTIFICATION 0x0008
+struct sctp_recvv_rn {
+ struct sctp_rcvinfo recvv_rcvinfo;
+ struct sctp_nxtinfo recvv_nxtinfo;
+};
+
+#define SCTP_RECVV_NOINFO 0
+#define SCTP_RECVV_RCVINFO 1
+#define SCTP_RECVV_NXTINFO 2
+#define SCTP_RECVV_RN 3
+
+#define SCTP_SENDV_NOINFO 0
+#define SCTP_SENDV_SNDINFO 1
+#define SCTP_SENDV_PRINFO 2
+#define SCTP_SENDV_AUTHINFO 3
+#define SCTP_SENDV_SPA 4
+
+struct sctp_sendv_spa {
+ uint32_t sendv_flags;
+ struct sctp_sndinfo sendv_sndinfo;
+ struct sctp_prinfo sendv_prinfo;
+ struct sctp_authinfo sendv_authinfo;
+};
+
+#define SCTP_SEND_SNDINFO_VALID 0x00000001
+#define SCTP_SEND_PRINFO_VALID 0x00000002
+#define SCTP_SEND_AUTHINFO_VALID 0x00000004
+
struct sctp_snd_all_completes {
uint16_t sall_stream;
uint16_t sall_flags;
@@ -144,6 +234,8 @@ struct sctp_snd_all_completes {
};
/* Flags that go into the sinfo->sinfo_flags field */
+#define SCTP_NOTIFICATION 0x0010 /* next message is a notification */
+#define SCTP_COMPLETE 0x0020 /* next message is complete */
#define SCTP_EOF 0x0100 /* Start shutdown procedures */
#define SCTP_ABORT 0x0200 /* Send an ABORT to peer */
#define SCTP_UNORDERED 0x0400 /* Message is un-ordered */
@@ -152,7 +244,7 @@ struct sctp_snd_all_completes {
#define SCTP_EOR 0x2000 /* end of message signal */
#define SCTP_SACK_IMMEDIATELY 0x4000 /* Set I-Bit */
-#define INVALID_SINFO_FLAG(x) (((x) & 0xffffff00 \
+#define INVALID_SINFO_FLAG(x) (((x) & 0xfffffff0 \
& ~(SCTP_EOF | SCTP_ABORT | SCTP_UNORDERED |\
SCTP_ADDR_OVER | SCTP_SENDALL | SCTP_EOR |\
SCTP_SACK_IMMEDIATELY)) != 0)
@@ -163,7 +255,7 @@ struct sctp_snd_all_completes {
#define SCTP_PR_SCTP_BUF 0x0002/* Buffer based PR-SCTP */
#define SCTP_PR_SCTP_RTX 0x0003/* Number of retransmissions based PR-SCTP */
-#define PR_SCTP_POLICY(x) ((x) & 0xff)
+#define PR_SCTP_POLICY(x) ((x) & 0x0f)
#define PR_SCTP_ENABLED(x) (PR_SCTP_POLICY(x) != 0)
#define PR_SCTP_TTL_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL)
#define PR_SCTP_BUF_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF)
@@ -1132,26 +1224,34 @@ int sctp_getladdrs __P((int, sctp_assoc_t, struct sockaddr **));
void sctp_freeladdrs __P((struct sockaddr *));
int sctp_opt_info __P((int, sctp_assoc_t, int, void *, socklen_t *));
+/* deprecated */
ssize_t sctp_sendmsg
-__P((int, const void *, size_t,
- const struct sockaddr *,
+__P((int, const void *, size_t, const struct sockaddr *,
socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
- ssize_t sctp_send __P((int sd, const void *msg, size_t len,
- const struct sctp_sndrcvinfo *sinfo, int flags));
+/* deprecated */
+ ssize_t sctp_send __P((int, const void *, size_t,
+ const struct sctp_sndrcvinfo *, int));
+
+/* deprecated */
+ ssize_t sctp_sendx __P((int, const void *, size_t, struct sockaddr *,
+ int, struct sctp_sndrcvinfo *, int));
+
+/* deprecated */
+ ssize_t sctp_sendmsgx __P((int sd, const void *, size_t, struct sockaddr *,
+ int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
- ssize_t sctp_sendx __P((int sd, const void *msg, size_t len,
- struct sockaddr *addrs, int addrcnt,
- struct sctp_sndrcvinfo *sinfo, int flags));
+ sctp_assoc_t sctp_getassocid __P((int, struct sockaddr *));
- ssize_t sctp_sendmsgx __P((int sd, const void *, size_t,
- struct sockaddr *, int,
- uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
+/* deprecated */
+ ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *, socklen_t *,
+ struct sctp_sndrcvinfo *, int *));
- sctp_assoc_t sctp_getassocid __P((int sd, struct sockaddr *sa));
+ ssize_t sctp_sendv __P((int, const struct iovec *, int, struct sockaddr *,
+ int, void *, socklen_t, unsigned int, int));
- ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *,
- socklen_t *, struct sctp_sndrcvinfo *, int *));
+ ssize_t sctp_recvv __P((int, const struct iovec *, int, struct sockaddr *,
+ socklen_t *, void *, socklen_t *, unsigned int *, int *));
__END_DECLS
diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c
index b3eb805..4c1d726 100644
--- a/sys/netinet/sctp_usrreq.c
+++ b/sys/netinet/sctp_usrreq.c
@@ -713,7 +713,7 @@ sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
control = NULL;
}
error = EDESTADDRREQ;
- return EDESTADDRREQ;
+ return (error);
}
#endif /* INET6 */
connected_type:
@@ -1448,7 +1448,6 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
struct sctp_tcb *stcb = NULL;
struct sockaddr *sa;
int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr;
- int added = 0;
uint32_t vrf_id;
int bad_addresses = 0;
sctp_assoc_t *a_id;
@@ -1560,7 +1559,7 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
}
error = 0;
- added = sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error);
+ sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error);
/* Fill in the return id */
if (error) {
(void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
@@ -1603,7 +1602,7 @@ out_now:
SCTP_TCB_LOCK(stcb); \
} \
SCTP_INP_RUNLOCK(inp); \
- } else if (assoc_id != 0) { \
+ } else if (assoc_id > SCTP_ALL_ASSOC) { \
stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); \
if (stcb == NULL) { \
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); \
@@ -1691,10 +1690,6 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
error = ENOPROTOOPT;
} /* end switch (sopt->sopt_name) */
- if (optname != SCTP_AUTOCLOSE) {
- /* make it an "on/off" value */
- val = (val != 0);
- }
if (*optsize < sizeof(val)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
@@ -1734,8 +1729,8 @@ flags_out:
SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
*value = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE);
*optsize = sizeof(uint32_t);
+ break;
}
- break;
case SCTP_PARTIAL_DELIVERY_POINT:
{
uint32_t *value;
@@ -1743,8 +1738,8 @@ flags_out:
SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
*value = inp->partial_delivery_point;
*optsize = sizeof(uint32_t);
+ break;
}
- break;
case SCTP_FRAGMENT_INTERLEAVE:
{
uint32_t *value;
@@ -1760,8 +1755,8 @@ flags_out:
*value = SCTP_FRAG_LEVEL_0;
}
*optsize = sizeof(uint32_t);
+ break;
}
- break;
case SCTP_CMT_ON_OFF:
{
struct sctp_assoc_value *av;
@@ -1772,14 +1767,20 @@ flags_out:
av->assoc_value = stcb->asoc.sctp_cmt_on_off;
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_RLOCK(inp);
- av->assoc_value = inp->sctp_cmt_on_off;
- SCTP_INP_RUNLOCK(inp);
+ if (av->assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->sctp_cmt_on_off;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
}
- *optsize = sizeof(*av);
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
}
- break;
- /* JRS - Get socket option for pluggable congestion control */
case SCTP_PLUGGABLE_CC:
{
struct sctp_assoc_value *av;
@@ -1790,11 +1791,20 @@ flags_out:
av->assoc_value = stcb->asoc.congestion_control_module;
SCTP_TCB_UNLOCK(stcb);
} else {
- av->assoc_value = inp->sctp_ep.sctp_default_cc_module;
+ if (av->assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->sctp_ep.sctp_default_cc_module;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
}
- *optsize = sizeof(*av);
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
}
- break;
case SCTP_CC_OPTION:
{
struct sctp_cc_option *cc_opt;
@@ -1807,15 +1817,13 @@ flags_out:
if (stcb->asoc.cc_functions.sctp_cwnd_socket_option == NULL) {
error = ENOTSUP;
} else {
- error = (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 0,
- cc_opt);
- *optsize = sizeof(*cc_opt);
+ error = (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 0, cc_opt);
+ *optsize = sizeof(struct sctp_cc_option);
}
SCTP_TCB_UNLOCK(stcb);
}
+ break;
}
- break;
- /* RS - Get socket option for pluggable stream scheduling */
case SCTP_PLUGGABLE_SS:
{
struct sctp_assoc_value *av;
@@ -1826,11 +1834,20 @@ flags_out:
av->assoc_value = stcb->asoc.stream_scheduling_module;
SCTP_TCB_UNLOCK(stcb);
} else {
- av->assoc_value = inp->sctp_ep.sctp_default_ss_module;
+ if (av->assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->sctp_ep.sctp_default_ss_module;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
}
- *optsize = sizeof(*av);
+ break;
}
- break;
case SCTP_SS_VALUE:
{
struct sctp_stream_value *av;
@@ -1843,7 +1860,7 @@ flags_out:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
} else {
- *optsize = sizeof(*av);
+ *optsize = sizeof(struct sctp_stream_value);
}
SCTP_TCB_UNLOCK(stcb);
} else {
@@ -1854,8 +1871,8 @@ flags_out:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
+ break;
}
- break;
case SCTP_GET_ADDR_LEN:
{
struct sctp_assoc_value *av;
@@ -1876,10 +1893,11 @@ flags_out:
#endif
if (error) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ } else {
+ *optsize = sizeof(struct sctp_assoc_value);
}
- *optsize = sizeof(*av);
+ break;
}
- break;
case SCTP_GET_ASSOC_NUMBER:
{
uint32_t *value, cnt;
@@ -1893,9 +1911,8 @@ flags_out:
SCTP_INP_RUNLOCK(inp);
*value = cnt;
*optsize = sizeof(uint32_t);
+ break;
}
- break;
-
case SCTP_GET_ASSOC_ID_LIST:
{
struct sctp_assoc_ids *ids;
@@ -1915,10 +1932,12 @@ flags_out:
}
}
SCTP_INP_RUNLOCK(inp);
- ids->gaids_number_of_ids = at;
- *optsize = ((at * sizeof(sctp_assoc_t)) + sizeof(uint32_t));
+ if (error == 0) {
+ ids->gaids_number_of_ids = at;
+ *optsize = ((at * sizeof(sctp_assoc_t)) + sizeof(uint32_t));
+ }
+ break;
}
- break;
case SCTP_CONTEXT:
{
struct sctp_assoc_value *av;
@@ -1930,19 +1949,27 @@ flags_out:
av->assoc_value = stcb->asoc.context;
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_RLOCK(inp);
- av->assoc_value = inp->sctp_context;
- SCTP_INP_RUNLOCK(inp);
+ if (av->assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->sctp_context;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
}
- *optsize = sizeof(*av);
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
}
- break;
case SCTP_VRF_ID:
{
uint32_t *default_vrfid;
SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, *optsize);
*default_vrfid = inp->def_vrf_id;
+ *optsize = sizeof(uint32_t);
break;
}
case SCTP_GET_ASOC_VRF:
@@ -1954,9 +1981,10 @@ flags_out:
if (stcb == NULL) {
error = EINVAL;
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
- break;
+ } else {
+ id->assoc_value = stcb->asoc.vrf_id;
+ *optsize = sizeof(struct sctp_assoc_value);
}
- id->assoc_value = stcb->asoc.vrf_id;
break;
}
case SCTP_GET_VRF_IDS:
@@ -1976,13 +2004,13 @@ flags_out:
gnv->gn_peers_tag = stcb->asoc.peer_vtag;
gnv->gn_local_tag = stcb->asoc.my_vtag;
SCTP_TCB_UNLOCK(stcb);
+ *optsize = sizeof(struct sctp_get_nonce_values);
} else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
error = ENOTCONN;
}
- *optsize = sizeof(*gnv);
+ break;
}
- break;
case SCTP_DELAYED_SACK:
{
struct sctp_sack_info *sack;
@@ -1994,15 +2022,21 @@ flags_out:
sack->sack_freq = stcb->asoc.sack_freq;
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_RLOCK(inp);
- sack->sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
- sack->sack_freq = inp->sctp_ep.sctp_sack_freq;
- SCTP_INP_RUNLOCK(inp);
+ if (sack->sack_assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ sack->sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
+ sack->sack_freq = inp->sctp_ep.sctp_sack_freq;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
}
- *optsize = sizeof(*sack);
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_sack_info);
+ }
+ break;
}
- break;
-
case SCTP_GET_SNDBUF_USE:
{
struct sctp_sockstat *ss;
@@ -2015,13 +2049,13 @@ flags_out:
ss->ss_total_recv_buf = (stcb->asoc.size_on_reasm_queue +
stcb->asoc.size_on_all_streams);
SCTP_TCB_UNLOCK(stcb);
+ *optsize = sizeof(struct sctp_sockstat);
} else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
error = ENOTCONN;
}
- *optsize = sizeof(struct sctp_sockstat);
+ break;
}
- break;
case SCTP_MAX_BURST:
{
struct sctp_assoc_value *av;
@@ -2033,14 +2067,20 @@ flags_out:
av->assoc_value = stcb->asoc.max_burst;
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_RLOCK(inp);
- av->assoc_value = inp->sctp_ep.max_burst;
- SCTP_INP_RUNLOCK(inp);
+ if (av->assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->sctp_ep.max_burst;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
}
- *optsize = sizeof(struct sctp_assoc_value);
-
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
}
- break;
case SCTP_MAXSEG:
{
struct sctp_assoc_value *av;
@@ -2053,21 +2093,28 @@ flags_out:
av->assoc_value = sctp_get_frag_point(stcb, &stcb->asoc);
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_RLOCK(inp);
- if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ovh = SCTP_MED_OVERHEAD;
+ if (av->assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ovh = SCTP_MED_OVERHEAD;
+ } else {
+ ovh = SCTP_MED_V4_OVERHEAD;
+ }
+ if (inp->sctp_frag_point >= SCTP_DEFAULT_MAXSEGMENT)
+ av->assoc_value = 0;
+ else
+ av->assoc_value = inp->sctp_frag_point - ovh;
+ SCTP_INP_RUNLOCK(inp);
} else {
- ovh = SCTP_MED_V4_OVERHEAD;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
}
- if (inp->sctp_frag_point >= SCTP_DEFAULT_MAXSEGMENT)
- av->assoc_value = 0;
- else
- av->assoc_value = inp->sctp_frag_point - ovh;
- SCTP_INP_RUNLOCK(inp);
}
- *optsize = sizeof(struct sctp_assoc_value);
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
}
- break;
case SCTP_GET_STAT_LOG:
error = sctp_fill_stat_log(optval, optsize);
break;
@@ -2076,7 +2123,7 @@ flags_out:
struct sctp_event_subscribe *events;
SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, *optsize);
- memset(events, 0, sizeof(*events));
+ memset(events, 0, sizeof(struct sctp_event_subscribe));
SCTP_INP_RLOCK(inp);
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT))
events->sctp_data_io_event = 1;
@@ -2112,9 +2159,8 @@ flags_out:
events->sctp_stream_reset_event = 1;
SCTP_INP_RUNLOCK(inp);
*optsize = sizeof(struct sctp_event_subscribe);
+ break;
}
- break;
-
case SCTP_ADAPTATION_LAYER:
{
uint32_t *value;
@@ -2125,8 +2171,8 @@ flags_out:
*value = inp->sctp_ep.adaptation_layer_indicator;
SCTP_INP_RUNLOCK(inp);
*optsize = sizeof(uint32_t);
+ break;
}
- break;
case SCTP_SET_INITIAL_DBG_SEQ:
{
uint32_t *value;
@@ -2136,8 +2182,8 @@ flags_out:
*value = inp->sctp_ep.initial_sequence_debug;
SCTP_INP_RUNLOCK(inp);
*optsize = sizeof(uint32_t);
+ break;
}
- break;
case SCTP_GET_LOCAL_ADDR_SIZE:
{
uint32_t *value;
@@ -2147,8 +2193,8 @@ flags_out:
*value = sctp_count_max_addresses(inp);
SCTP_INP_RUNLOCK(inp);
*optsize = sizeof(uint32_t);
+ break;
}
- break;
case SCTP_GET_REMOTE_ADDR_SIZE:
{
uint32_t *value;
@@ -2184,13 +2230,13 @@ flags_out:
}
SCTP_TCB_UNLOCK(stcb);
*value = (uint32_t) size;
+ *optsize = sizeof(uint32_t);
} else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
error = ENOTCONN;
}
- *optsize = sizeof(uint32_t);
+ break;
}
- break;
case SCTP_GET_PEER_ADDRESSES:
/*
* Get the address information, an array is passed in to
@@ -2260,8 +2306,8 @@ flags_out:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
error = ENOENT;
}
+ break;
}
- break;
case SCTP_GET_LOCAL_ADDRESSES:
{
size_t limit, actual;
@@ -2278,8 +2324,8 @@ flags_out:
SCTP_TCB_UNLOCK(stcb);
}
*optsize = sizeof(struct sockaddr_storage) + actual;
+ break;
}
- break;
case SCTP_PEER_ADDR_PARAMS:
{
struct sctp_paddrparams *paddrp;
@@ -2416,38 +2462,45 @@ flags_out:
paddrp->spp_assoc_id = sctp_get_associd(stcb);
SCTP_TCB_UNLOCK(stcb);
} else {
- /* Use endpoint defaults */
- SCTP_INP_RLOCK(inp);
- paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure;
- paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
- paddrp->spp_assoc_id = (sctp_assoc_t) 0;
- /* get inp's default */
+ if (paddrp->spp_assoc_id == SCTP_FUTURE_ASSOC) {
+ /* Use endpoint defaults */
+ SCTP_INP_RLOCK(inp);
+ paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure;
+ paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
+ paddrp->spp_assoc_id = SCTP_FUTURE_ASSOC;
+ /* get inp's default */
#ifdef INET
- paddrp->spp_ipv4_tos = inp->ip_inp.inp.inp_ip_tos;
- paddrp->spp_flags |= SPP_IPV4_TOS;
+ paddrp->spp_ipv4_tos = inp->ip_inp.inp.inp_ip_tos;
+ paddrp->spp_flags |= SPP_IPV4_TOS;
#endif
#ifdef INET6
- if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- paddrp->spp_ipv6_flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo;
- paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
- }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ paddrp->spp_ipv6_flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo;
+ paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
+ }
#endif
- /* can't return this */
- paddrp->spp_pathmtu = 0;
+ /* can't return this */
+ paddrp->spp_pathmtu = 0;
- /* default behavior, no stcb */
- paddrp->spp_flags = SPP_PMTUD_ENABLE;
+ /* default behavior, no stcb */
+ paddrp->spp_flags = SPP_PMTUD_ENABLE;
- if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) {
- paddrp->spp_flags |= SPP_HB_ENABLE;
+ if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) {
+ paddrp->spp_flags |= SPP_HB_ENABLE;
+ } else {
+ paddrp->spp_flags |= SPP_HB_DISABLE;
+ }
+ SCTP_INP_RUNLOCK(inp);
} else {
- paddrp->spp_flags |= SPP_HB_DISABLE;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
}
- SCTP_INP_RUNLOCK(inp);
}
- *optsize = sizeof(struct sctp_paddrparams);
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_paddrparams);
+ }
+ break;
}
- break;
case SCTP_GET_PEER_ADDR_INFO:
{
struct sctp_paddrinfo *paddri;
@@ -2491,6 +2544,7 @@ flags_out:
paddri->spinfo_assoc_id = sctp_get_associd(stcb);
paddri->spinfo_mtu = net->mtu;
SCTP_TCB_UNLOCK(stcb);
+ *optsize = sizeof(struct sctp_paddrinfo);
} else {
if (stcb) {
SCTP_TCB_UNLOCK(stcb);
@@ -2498,9 +2552,8 @@ flags_out:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
error = ENOENT;
}
- *optsize = sizeof(struct sctp_paddrinfo);
+ break;
}
- break;
case SCTP_PCB_STATUS:
{
struct sctp_pcbinfo *spcb;
@@ -2508,9 +2561,8 @@ flags_out:
SCTP_CHECK_AND_CAST(spcb, optval, struct sctp_pcbinfo, *optsize);
sctp_fill_pcbinfo(spcb);
*optsize = sizeof(struct sctp_pcbinfo);
+ break;
}
- break;
-
case SCTP_STATUS:
{
struct sctp_nets *net;
@@ -2520,7 +2572,7 @@ flags_out:
SCTP_FIND_STCB(inp, stcb, sstat->sstat_assoc_id);
if (stcb == NULL) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
}
@@ -2569,9 +2621,9 @@ flags_out:
sstat->sstat_primary.spinfo_mtu = net->mtu;
sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb);
SCTP_TCB_UNLOCK(stcb);
- *optsize = sizeof(*sstat);
+ *optsize = sizeof(struct sctp_status);
+ break;
}
- break;
case SCTP_RTOINFO:
{
struct sctp_rtoinfo *srto;
@@ -2585,15 +2637,22 @@ flags_out:
srto->srto_min = stcb->asoc.minrto;
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_RLOCK(inp);
- srto->srto_initial = inp->sctp_ep.initial_rto;
- srto->srto_max = inp->sctp_ep.sctp_maxrto;
- srto->srto_min = inp->sctp_ep.sctp_minrto;
- SCTP_INP_RUNLOCK(inp);
+ if (srto->srto_assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ srto->srto_initial = inp->sctp_ep.initial_rto;
+ srto->srto_max = inp->sctp_ep.sctp_maxrto;
+ srto->srto_min = inp->sctp_ep.sctp_minrto;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_rtoinfo);
}
- *optsize = sizeof(*srto);
+ break;
}
- break;
case SCTP_TIMEOUTS:
{
struct sctp_timeouts *stimo;
@@ -2610,23 +2669,21 @@ flags_out:
stimo->stimo_cookie = stcb->asoc.timocookie;
stimo->stimo_shutdownack = stcb->asoc.timoshutdownack;
SCTP_TCB_UNLOCK(stcb);
+ *optsize = sizeof(struct sctp_timeouts);
} else {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
- *optsize = sizeof(*stimo);
+ break;
}
- break;
case SCTP_ASSOCINFO:
{
struct sctp_assocparams *sasoc;
- uint32_t oldval;
SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, *optsize);
SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id);
if (stcb) {
- oldval = sasoc->sasoc_cookie_life;
sasoc->sasoc_cookie_life = TICKS_TO_MSEC(stcb->asoc.cookie_life);
sasoc->sasoc_asocmaxrxt = stcb->asoc.max_send_times;
sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets;
@@ -2634,17 +2691,24 @@ flags_out:
sasoc->sasoc_local_rwnd = stcb->asoc.my_rwnd;
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_RLOCK(inp);
- sasoc->sasoc_cookie_life = TICKS_TO_MSEC(inp->sctp_ep.def_cookie_life);
- sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times;
- sasoc->sasoc_number_peer_destinations = 0;
- sasoc->sasoc_peer_rwnd = 0;
- sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv);
- SCTP_INP_RUNLOCK(inp);
+ if (sasoc->sasoc_assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ sasoc->sasoc_cookie_life = TICKS_TO_MSEC(inp->sctp_ep.def_cookie_life);
+ sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times;
+ sasoc->sasoc_number_peer_destinations = 0;
+ sasoc->sasoc_peer_rwnd = 0;
+ sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv);
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assocparams);
}
- *optsize = sizeof(*sasoc);
+ break;
}
- break;
case SCTP_DEFAULT_SEND_PARAM:
{
struct sctp_sndrcvinfo *s_info;
@@ -2656,13 +2720,20 @@ flags_out:
memcpy(s_info, &stcb->asoc.def_send, sizeof(stcb->asoc.def_send));
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_RLOCK(inp);
- memcpy(s_info, &inp->def_send, sizeof(inp->def_send));
- SCTP_INP_RUNLOCK(inp);
+ if (s_info->sinfo_assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ memcpy(s_info, &inp->def_send, sizeof(inp->def_send));
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
}
- *optsize = sizeof(*s_info);
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_sndrcvinfo);
+ }
+ break;
}
- break;
case SCTP_INITMSG:
{
struct sctp_initmsg *sinit;
@@ -2674,9 +2745,9 @@ flags_out:
sinit->sinit_max_attempts = inp->sctp_ep.max_init_times;
sinit->sinit_max_init_timeo = inp->sctp_ep.initial_init_rto_max;
SCTP_INP_RUNLOCK(inp);
- *optsize = sizeof(*sinit);
+ *optsize = sizeof(struct sctp_initmsg);
+ break;
}
- break;
case SCTP_PRIMARY_ADDR:
/* we allow a "get" operation on this */
{
@@ -2697,14 +2768,13 @@ flags_out:
&stcb->asoc.primary_destination->ro._l_addr,
len);
SCTP_TCB_UNLOCK(stcb);
+ *optsize = sizeof(struct sctp_setprim);
} else {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
- *optsize = sizeof(*ssp);
+ break;
}
- break;
-
case SCTP_HMAC_IDENT:
{
struct sctp_hmacalgo *shmac;
@@ -2726,7 +2796,7 @@ flags_out:
size = sizeof(*shmac) + (hmaclist->num_algo *
sizeof(shmac->shmac_idents[0]));
if ((size_t)(*optsize) < size) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
SCTP_INP_RUNLOCK(inp);
break;
@@ -2752,12 +2822,19 @@ flags_out:
scact->scact_keynumber = stcb->asoc.authinfo.active_keyid;
SCTP_TCB_UNLOCK(stcb);
} else {
- /* get the endpoint active key */
- SCTP_INP_RLOCK(inp);
- scact->scact_keynumber = inp->sctp_ep.default_keyid;
- SCTP_INP_RUNLOCK(inp);
+ if (scact->scact_assoc_id == SCTP_FUTURE_ASSOC) {
+ /* get the endpoint active key */
+ SCTP_INP_RLOCK(inp);
+ scact->scact_keynumber = inp->sctp_ep.default_keyid;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_authkeyid);
}
- *optsize = sizeof(*scact);
break;
}
case SCTP_LOCAL_AUTH_CHUNKS:
@@ -2780,24 +2857,30 @@ flags_out:
} else {
/* copy in the chunks */
(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+ *optsize = sizeof(struct sctp_authchunks) + size;
}
SCTP_TCB_UNLOCK(stcb);
} else {
- /* get off the endpoint */
- SCTP_INP_RLOCK(inp);
- chklist = inp->sctp_ep.local_auth_chunks;
- /* is there enough space? */
- size = sctp_auth_get_chklist_size(chklist);
- if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
- error = EINVAL;
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ if (sac->gauth_assoc_id == SCTP_FUTURE_ASSOC) {
+ /* get off the endpoint */
+ SCTP_INP_RLOCK(inp);
+ chklist = inp->sctp_ep.local_auth_chunks;
+ /* is there enough space? */
+ size = sctp_auth_get_chklist_size(chklist);
+ if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
+ error = EINVAL;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ } else {
+ /* copy in the chunks */
+ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+ *optsize = sizeof(struct sctp_authchunks) + size;
+ }
+ SCTP_INP_RUNLOCK(inp);
} else {
- /* copy in the chunks */
- (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
}
- SCTP_INP_RUNLOCK(inp);
}
- *optsize = sizeof(struct sctp_authchunks) + size;
break;
}
case SCTP_PEER_AUTH_CHUNKS:
@@ -2820,23 +2903,191 @@ flags_out:
} else {
/* copy in the chunks */
(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+ *optsize = sizeof(struct sctp_authchunks) + size;
}
SCTP_TCB_UNLOCK(stcb);
} else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
error = ENOENT;
}
- *optsize = sizeof(struct sctp_authchunks) + size;
break;
}
+ case SCTP_EVENT:
+ {
+ struct sctp_event *event;
+ uint32_t event_type;
+
+ SCTP_CHECK_AND_CAST(event, optval, struct sctp_event, *optsize);
+ SCTP_FIND_STCB(inp, stcb, event->se_assoc_id);
+
+ switch (event->se_type) {
+ case SCTP_ASSOC_CHANGE:
+ event_type = SCTP_PCB_FLAGS_RECVASSOCEVNT;
+ break;
+ case SCTP_PEER_ADDR_CHANGE:
+ event_type = SCTP_PCB_FLAGS_RECVPADDREVNT;
+ break;
+ case SCTP_REMOTE_ERROR:
+ event_type = SCTP_PCB_FLAGS_RECVPEERERR;
+ break;
+ case SCTP_SEND_FAILED:
+ event_type = SCTP_PCB_FLAGS_RECVSENDFAILEVNT;
+ break;
+ case SCTP_SHUTDOWN_EVENT:
+ event_type = SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT;
+ break;
+ case SCTP_ADAPTATION_INDICATION:
+ event_type = SCTP_PCB_FLAGS_ADAPTATIONEVNT;
+ break;
+ case SCTP_PARTIAL_DELIVERY_EVENT:
+ event_type = SCTP_PCB_FLAGS_PDAPIEVNT;
+ break;
+ case SCTP_AUTHENTICATION_EVENT:
+ event_type = SCTP_PCB_FLAGS_AUTHEVNT;
+ break;
+ case SCTP_STREAM_RESET_EVENT:
+ event_type = SCTP_PCB_FLAGS_STREAM_RESETEVNT;
+ break;
+ case SCTP_SENDER_DRY_EVENT:
+ event_type = SCTP_PCB_FLAGS_DRYEVNT;
+ break;
+ case SCTP_NOTIFICATIONS_STOPPED_EVENT:
+ event_type = 0;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP);
+ error = ENOTSUP;
+ break;
+ default:
+ event_type = 0;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ if (event_type > 0) {
+ if (stcb) {
+ event->se_on = sctp_stcb_is_feature_on(inp, stcb, event_type);
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if (event->se_assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ event->se_on = sctp_is_feature_on(inp, event_type);
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_event);
+ }
+ break;
+ }
+ case SCTP_RECVRCVINFO:
+ {
+ int onoff;
+
+ if (*optsize < sizeof(int)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ } else {
+ SCTP_INP_RUNLOCK(inp);
+ onoff = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO);
+ SCTP_INP_RUNLOCK(inp);
+ }
+ if (error == 0) {
+ /* return the option value */
+ *(int *)optval = onoff;
+ *optsize = sizeof(int);
+ }
+ break;
+ }
+ case SCTP_RECVNXTINFO:
+ {
+ int onoff;
+
+ if (*optsize < sizeof(int)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ } else {
+ SCTP_INP_RUNLOCK(inp);
+ onoff = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO);
+ SCTP_INP_RUNLOCK(inp);
+ }
+ if (error == 0) {
+ /* return the option value */
+ *(int *)optval = onoff;
+ *optsize = sizeof(int);
+ }
+ break;
+ }
+ case SCTP_DEFAULT_SNDINFO:
+ {
+ struct sctp_sndinfo *info;
+
+ SCTP_CHECK_AND_CAST(info, optval, struct sctp_sndinfo, *optsize);
+ SCTP_FIND_STCB(inp, stcb, info->snd_assoc_id);
+
+ if (stcb) {
+ info->snd_sid = stcb->asoc.def_send.sinfo_stream;
+ info->snd_flags = stcb->asoc.def_send.sinfo_flags;
+ info->snd_flags &= 0xfff0;
+ info->snd_ppid = stcb->asoc.def_send.sinfo_ppid;
+ info->snd_context = stcb->asoc.def_send.sinfo_context;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if (info->snd_assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ info->snd_sid = inp->def_send.sinfo_stream;
+ info->snd_flags = inp->def_send.sinfo_flags;
+ info->snd_flags &= 0xfff0;
+ info->snd_ppid = inp->def_send.sinfo_ppid;
+ info->snd_context = inp->def_send.sinfo_context;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_sndinfo);
+ }
+ break;
+ }
+ case SCTP_DEFAULT_PRINFO:
+ {
+ struct sctp_default_prinfo *info;
+ SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, *optsize);
+ SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id);
+ if (stcb) {
+ info->pr_policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags);
+ info->pr_value = stcb->asoc.def_send.sinfo_timetolive;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if (info->pr_assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ info->pr_policy = PR_SCTP_POLICY(inp->def_send.sinfo_flags);
+ info->pr_value = inp->def_send.sinfo_timetolive;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_default_prinfo);
+ }
+ break;
+ }
default:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
error = ENOPROTOOPT;
- *optsize = 0;
break;
} /* end switch (sopt->sopt_name) */
+ if (error) {
+ *optsize = 0;
+ }
return (error);
}
@@ -2949,8 +3200,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
sctp_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE);
else
sctp_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE);
+ break;
}
- break;
case SCTP_PARTIAL_DELIVERY_POINT:
{
uint32_t *value;
@@ -2962,8 +3213,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
break;
}
inp->partial_delivery_point = *value;
+ break;
}
- break;
case SCTP_FRAGMENT_INTERLEAVE:
/* not yet until we re-write sctp_recvmsg() */
{
@@ -2984,83 +3235,95 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
+ break;
}
- break;
case SCTP_CMT_ON_OFF:
if (SCTP_BASE_SYSCTL(sctp_cmt_on_off)) {
struct sctp_assoc_value *av;
SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ if (av->assoc_value > SCTP_CMT_MAX) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
SCTP_FIND_STCB(inp, stcb, av->assoc_id);
if (stcb) {
- if (av->assoc_value > SCTP_CMT_MAX) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
- error = EINVAL;
- } else {
- stcb->asoc.sctp_cmt_on_off = av->assoc_value;
- }
+ stcb->asoc.sctp_cmt_on_off = av->assoc_value;
SCTP_TCB_UNLOCK(stcb);
} else {
- if (av->assoc_value > SCTP_CMT_MAX) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
- error = EINVAL;
- } else {
+ if ((av->assoc_id == SCTP_FUTURE_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
SCTP_INP_WLOCK(inp);
inp->sctp_cmt_on_off = av->assoc_value;
SCTP_INP_WUNLOCK(inp);
}
+ if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ stcb->asoc.sctp_cmt_on_off = av->assoc_value;
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ }
+ }
}
} else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
error = ENOPROTOOPT;
}
break;
- /* JRS - Set socket option for pluggable congestion control */
case SCTP_PLUGGABLE_CC:
{
struct sctp_assoc_value *av;
struct sctp_nets *net;
SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ if ((av->assoc_value != SCTP_CC_RFC2581) &&
+ (av->assoc_value != SCTP_CC_HSTCP) &&
+ (av->assoc_value != SCTP_CC_HTCP) &&
+ (av->assoc_value != SCTP_CC_RTCC)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
SCTP_FIND_STCB(inp, stcb, av->assoc_id);
if (stcb) {
- switch (av->assoc_value) {
- case SCTP_CC_RFC2581:
- case SCTP_CC_HSTCP:
- case SCTP_CC_HTCP:
- case SCTP_CC_RTCC:
- stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value];
- stcb->asoc.congestion_control_module = av->assoc_value;
- if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) {
- TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
- stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
- }
+ stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value];
+ stcb->asoc.congestion_control_module = av->assoc_value;
+ if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) {
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
}
- break;
- default:
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
- error = EINVAL;
- break;
}
SCTP_TCB_UNLOCK(stcb);
} else {
- switch (av->assoc_value) {
- case SCTP_CC_RFC2581:
- case SCTP_CC_HSTCP:
- case SCTP_CC_HTCP:
- case SCTP_CC_RTCC:
+ if ((av->assoc_id == SCTP_FUTURE_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
SCTP_INP_WLOCK(inp);
inp->sctp_ep.sctp_default_cc_module = av->assoc_value;
SCTP_INP_WUNLOCK(inp);
- break;
- default:
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
- error = EINVAL;
- break;
+ }
+ if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value];
+ stcb->asoc.congestion_control_module = av->assoc_value;
+ if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) {
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
}
}
+ break;
}
- break;
case SCTP_CC_OPTION:
{
struct sctp_cc_option *cc_opt;
@@ -3068,7 +3331,19 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_CHECK_AND_CAST(cc_opt, optval, struct sctp_cc_option, optsize);
SCTP_FIND_STCB(inp, stcb, cc_opt->aid_value.assoc_id);
if (stcb == NULL) {
- error = EINVAL;
+ if (cc_opt->aid_value.assoc_id == SCTP_CURRENT_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ if (stcb->asoc.cc_functions.sctp_cwnd_socket_option) {
+ (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 1, cc_opt);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ error = EINVAL;
+ }
} else {
if (stcb->asoc.cc_functions.sctp_cwnd_socket_option == NULL) {
error = ENOTSUP;
@@ -3078,54 +3353,54 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
SCTP_TCB_UNLOCK(stcb);
}
+ break;
}
- break;
- /* RS - Set socket option for pluggable stream scheduling */
case SCTP_PLUGGABLE_SS:
{
struct sctp_assoc_value *av;
SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ if ((av->assoc_value != SCTP_SS_DEFAULT) &&
+ (av->assoc_value != SCTP_SS_DEFAULT) &&
+ (av->assoc_value != SCTP_SS_ROUND_ROBIN) &&
+ (av->assoc_value != SCTP_SS_ROUND_ROBIN_PACKET) &&
+ (av->assoc_value != SCTP_SS_PRIORITY) &&
+ (av->assoc_value != SCTP_SS_FAIR_BANDWITH) &&
+ (av->assoc_value != SCTP_SS_FIRST_COME)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
SCTP_FIND_STCB(inp, stcb, av->assoc_id);
if (stcb) {
- switch (av->assoc_value) {
- case SCTP_SS_DEFAULT:
- case SCTP_SS_ROUND_ROBIN:
- case SCTP_SS_ROUND_ROBIN_PACKET:
- case SCTP_SS_PRIORITY:
- case SCTP_SS_FAIR_BANDWITH:
- case SCTP_SS_FIRST_COME:
- stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1);
- stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value];
- stcb->asoc.stream_scheduling_module = av->assoc_value;
- stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
- break;
- default:
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
- error = EINVAL;
- break;
- }
+ stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1);
+ stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value];
+ stcb->asoc.stream_scheduling_module = av->assoc_value;
+ stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
SCTP_TCB_UNLOCK(stcb);
} else {
- switch (av->assoc_value) {
- case SCTP_SS_DEFAULT:
- case SCTP_SS_ROUND_ROBIN:
- case SCTP_SS_ROUND_ROBIN_PACKET:
- case SCTP_SS_PRIORITY:
- case SCTP_SS_FAIR_BANDWITH:
- case SCTP_SS_FIRST_COME:
+ if ((av->assoc_id == SCTP_FUTURE_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
SCTP_INP_WLOCK(inp);
inp->sctp_ep.sctp_default_ss_module = av->assoc_value;
SCTP_INP_WUNLOCK(inp);
- break;
- default:
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
- error = EINVAL;
- break;
+ }
+ if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1);
+ stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value];
+ stcb->asoc.stream_scheduling_module = av->assoc_value;
+ stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
}
}
+ break;
}
- break;
case SCTP_SS_VALUE:
{
struct sctp_stream_value *av;
@@ -3140,15 +3415,29 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
SCTP_TCB_UNLOCK(stcb);
} else {
- /*
- * Can't set stream value without
- * association
- */
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
- error = EINVAL;
+ if (av->assoc_id == SCTP_CURRENT_ASSOC) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ stcb->asoc.ss_functions.sctp_ss_set_value(stcb,
+ &stcb->asoc,
+ &stcb->asoc.strmout[av->stream_id],
+ av->stream_value);
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+
+ } else {
+ /*
+ * Can't set stream value without
+ * association
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
}
+ break;
}
- break;
case SCTP_CLR_STAT_LOG:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
error = EOPNOTSUPP;
@@ -3164,12 +3453,25 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
stcb->asoc.context = av->assoc_value;
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_WLOCK(inp);
- inp->sctp_context = av->assoc_value;
- SCTP_INP_WUNLOCK(inp);
+ if ((av->assoc_id == SCTP_FUTURE_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ inp->sctp_context = av->assoc_value;
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ stcb->asoc.context = av->assoc_value;
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
}
+ break;
}
- break;
case SCTP_VRF_ID:
{
uint32_t *default_vrfid;
@@ -3204,12 +3506,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if (sack->sack_delay) {
if (sack->sack_delay > SCTP_MAX_SACK_DELAY)
sack->sack_delay = SCTP_MAX_SACK_DELAY;
+ if (MSEC_TO_TICKS(sack->sack_delay) < 1) {
+ sack->sack_delay = TICKS_TO_MSEC(1);
+ }
}
if (stcb) {
if (sack->sack_delay) {
- if (MSEC_TO_TICKS(sack->sack_delay) < 1) {
- sack->sack_delay = TICKS_TO_MSEC(1);
- }
stcb->asoc.delayed_ack = sack->sack_delay;
}
if (sack->sack_freq) {
@@ -3217,17 +3519,32 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_WLOCK(inp);
- if (sack->sack_delay) {
- if (MSEC_TO_TICKS(sack->sack_delay) < 1) {
- sack->sack_delay = TICKS_TO_MSEC(1);
+ if ((sack->sack_assoc_id == SCTP_FUTURE_ASSOC) ||
+ (sack->sack_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (sack->sack_delay) {
+ inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sack->sack_delay);
+ }
+ if (sack->sack_freq) {
+ inp->sctp_ep.sctp_sack_freq = sack->sack_freq;
}
- inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sack->sack_delay);
+ SCTP_INP_WUNLOCK(inp);
}
- if (sack->sack_freq) {
- inp->sctp_ep.sctp_sack_freq = sack->sack_freq;
+ if ((sack->sack_assoc_id == SCTP_CURRENT_ASSOC) ||
+ (sack->sack_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ if (sack->sack_delay) {
+ stcb->asoc.delayed_ack = sack->sack_delay;
+ }
+ if (sack->sack_freq) {
+ stcb->asoc.sack_freq = sack->sack_freq;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
}
- SCTP_INP_WUNLOCK(inp);
}
break;
}
@@ -3255,10 +3572,9 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_CHECK_AND_CAST(sca, optval, struct sctp_authkey, optsize);
SCTP_FIND_STCB(inp, stcb, sca->sca_assoc_id);
- size = optsize - sizeof(*sca);
+ size = optsize - sizeof(struct sctp_authkey);
if (stcb) {
- /* set it on the assoc */
shared_keys = &stcb->asoc.shared_keys;
/* clear the cached keys for this key id */
sctp_clear_cachedkeys(stcb, sca->sca_keynumber);
@@ -3288,39 +3604,76 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
error = sctp_insert_sharedkey(shared_keys, shared_key);
SCTP_TCB_UNLOCK(stcb);
} else {
- /* set it on the endpoint */
- SCTP_INP_WLOCK(inp);
- shared_keys = &inp->sctp_ep.shared_keys;
- /*
- * clear the cached keys on all assocs for
- * this key id
- */
- sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber);
- /*
- * create the new shared key and
- * insert/replace it
- */
- if (size > 0) {
- key = sctp_set_key(sca->sca_key, (uint32_t) size);
- if (key == NULL) {
+ if ((sca->sca_assoc_id == SCTP_FUTURE_ASSOC) ||
+ (sca->sca_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ shared_keys = &inp->sctp_ep.shared_keys;
+ /*
+ * clear the cached keys on all
+ * assocs for this key id
+ */
+ sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber);
+ /*
+ * create the new shared key and
+ * insert/replace it
+ */
+ if (size > 0) {
+ key = sctp_set_key(sca->sca_key, (uint32_t) size);
+ if (key == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+ error = ENOMEM;
+ SCTP_INP_WUNLOCK(inp);
+ break;
+ }
+ }
+ shared_key = sctp_alloc_sharedkey();
+ if (shared_key == NULL) {
+ sctp_free_key(key);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
error = ENOMEM;
SCTP_INP_WUNLOCK(inp);
break;
}
- }
- shared_key = sctp_alloc_sharedkey();
- if (shared_key == NULL) {
- sctp_free_key(key);
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
- error = ENOMEM;
+ shared_key->key = key;
+ shared_key->keyid = sca->sca_keynumber;
+ error = sctp_insert_sharedkey(shared_keys, shared_key);
SCTP_INP_WUNLOCK(inp);
- break;
}
- shared_key->key = key;
- shared_key->keyid = sca->sca_keynumber;
- error = sctp_insert_sharedkey(shared_keys, shared_key);
- SCTP_INP_WUNLOCK(inp);
+ if ((sca->sca_assoc_id == SCTP_CURRENT_ASSOC) ||
+ (sca->sca_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ shared_keys = &stcb->asoc.shared_keys;
+ /*
+ * clear the cached keys for
+ * this key id
+ */
+ sctp_clear_cachedkeys(stcb, sca->sca_keynumber);
+ /*
+ * create the new shared key
+ * and insert/replace it
+ */
+ if (size > 0) {
+ key = sctp_set_key(sca->sca_key, (uint32_t) size);
+ if (key == NULL) {
+ SCTP_TCB_UNLOCK(stcb);
+ continue;
+ }
+ }
+ shared_key = sctp_alloc_sharedkey();
+ if (shared_key == NULL) {
+ sctp_free_key(key);
+ SCTP_TCB_UNLOCK(stcb);
+ continue;
+ }
+ shared_key->key = key;
+ shared_key->keyid = sca->sca_keynumber;
+ error = sctp_insert_sharedkey(shared_keys, shared_key);
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
}
break;
}
@@ -3330,7 +3683,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
sctp_hmaclist_t *hmaclist;
uint16_t hmacid;
uint32_t i;
-
size_t found;
SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize);
@@ -3381,8 +3733,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
{
struct sctp_authkeyid *scact;
- SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid,
- optsize);
+ SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, optsize);
SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id);
/* set the active key on the right place */
@@ -3397,16 +3748,25 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
SCTP_TCB_UNLOCK(stcb);
} else {
- /* set the active key on the endpoint */
- SCTP_INP_WLOCK(inp);
- if (sctp_auth_setactivekey_ep(inp,
- scact->scact_keynumber)) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
- SCTP_FROM_SCTP_USRREQ,
- EINVAL);
- error = EINVAL;
+ if ((scact->scact_assoc_id == SCTP_FUTURE_ASSOC) ||
+ (scact->scact_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (sctp_auth_setactivekey_ep(inp, scact->scact_keynumber)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if ((scact->scact_assoc_id == SCTP_CURRENT_ASSOC) ||
+ (scact->scact_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ sctp_auth_setactivekey(stcb, scact->scact_keynumber);
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
}
- SCTP_INP_WUNLOCK(inp);
}
break;
}
@@ -3414,30 +3774,36 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
{
struct sctp_authkeyid *scdel;
- SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid,
- optsize);
+ SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid, optsize);
SCTP_FIND_STCB(inp, stcb, scdel->scact_assoc_id);
/* delete the key from the right place */
if (stcb) {
- if (sctp_delete_sharedkey(stcb,
- scdel->scact_keynumber)) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
- SCTP_FROM_SCTP_USRREQ,
- EINVAL);
+ if (sctp_delete_sharedkey(stcb, scdel->scact_keynumber)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_WLOCK(inp);
- if (sctp_delete_sharedkey_ep(inp,
- scdel->scact_keynumber)) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
- SCTP_FROM_SCTP_USRREQ,
- EINVAL);
- error = EINVAL;
+ if ((scdel->scact_assoc_id == SCTP_FUTURE_ASSOC) ||
+ (scdel->scact_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (sctp_delete_sharedkey_ep(inp, scdel->scact_keynumber)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if ((scdel->scact_assoc_id == SCTP_CURRENT_ASSOC) ||
+ (scdel->scact_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ sctp_delete_sharedkey(stcb, scdel->scact_keynumber);
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
}
- SCTP_INP_WUNLOCK(inp);
}
break;
}
@@ -3445,30 +3811,36 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
{
struct sctp_authkeyid *keyid;
- SCTP_CHECK_AND_CAST(keyid, optval, struct sctp_authkeyid,
- optsize);
+ SCTP_CHECK_AND_CAST(keyid, optval, struct sctp_authkeyid, optsize);
SCTP_FIND_STCB(inp, stcb, keyid->scact_assoc_id);
/* deactivate the key from the right place */
if (stcb) {
- if (sctp_deact_sharedkey(stcb,
- keyid->scact_keynumber)) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
- SCTP_FROM_SCTP_USRREQ,
- EINVAL);
+ if (sctp_deact_sharedkey(stcb, keyid->scact_keynumber)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_WLOCK(inp);
- if (sctp_deact_sharedkey_ep(inp,
- keyid->scact_keynumber)) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
- SCTP_FROM_SCTP_USRREQ,
- EINVAL);
- error = EINVAL;
+ if ((keyid->scact_assoc_id == SCTP_FUTURE_ASSOC) ||
+ (keyid->scact_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (sctp_deact_sharedkey_ep(inp, keyid->scact_keynumber)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if ((keyid->scact_assoc_id == SCTP_CURRENT_ASSOC) ||
+ (keyid->scact_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ sctp_deact_sharedkey(stcb, keyid->scact_keynumber);
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
}
- SCTP_INP_WUNLOCK(inp);
}
break;
}
@@ -3632,9 +4004,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
SCTP_TCB_UNLOCK(stcb);
+ break;
}
- break;
-
case SCTP_CONNECT_X:
if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -3643,7 +4014,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
error = sctp_do_connect_x(so, inp, optval, optsize, p, 0);
break;
-
case SCTP_CONNECT_X_DELAYED:
if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -3652,7 +4022,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
error = sctp_do_connect_x(so, inp, optval, optsize, p, 1);
break;
-
case SCTP_CONNECT_X_COMPLETE:
{
struct sockaddr *sa;
@@ -3706,8 +4075,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
error = EALREADY;
}
SCTP_TCB_UNLOCK(stcb);
+ break;
}
- break;
case SCTP_MAX_BURST:
{
struct sctp_assoc_value *av;
@@ -3719,12 +4088,25 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
stcb->asoc.max_burst = av->assoc_value;
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_WLOCK(inp);
- inp->sctp_ep.max_burst = av->assoc_value;
- SCTP_INP_WUNLOCK(inp);
+ if ((av->assoc_id == SCTP_FUTURE_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ inp->sctp_ep.max_burst = av->assoc_value;
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ stcb->asoc.max_burst = av->assoc_value;
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
}
+ break;
}
- break;
case SCTP_MAXSEG:
{
struct sctp_assoc_value *av;
@@ -3746,20 +4128,25 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_WLOCK(inp);
- /*
- * FIXME MT: I think this is not in tune
- * with the API ID
- */
- if (av->assoc_value) {
- inp->sctp_frag_point = (av->assoc_value + ovh);
+ if (av->assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_WLOCK(inp);
+ /*
+ * FIXME MT: I think this is not in
+ * tune with the API ID
+ */
+ if (av->assoc_value) {
+ inp->sctp_frag_point = (av->assoc_value + ovh);
+ } else {
+ inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+ }
+ SCTP_INP_WUNLOCK(inp);
} else {
- inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
}
- SCTP_INP_WUNLOCK(inp);
}
+ break;
}
- break;
case SCTP_EVENTS:
{
struct sctp_event_subscribe *events;
@@ -3823,22 +4210,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if (events->sctp_sender_dry_event) {
sctp_feature_on(inp, SCTP_PCB_FLAGS_DRYEVNT);
- if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
- (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
- stcb = LIST_FIRST(&inp->sctp_asoc_list);
- if (stcb) {
- SCTP_TCB_LOCK(stcb);
- }
- if (stcb &&
- TAILQ_EMPTY(&stcb->asoc.send_queue) &&
- TAILQ_EMPTY(&stcb->asoc.sent_queue) &&
- (stcb->asoc.stream_queue_cnt == 0)) {
- sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED);
- }
- if (stcb) {
- SCTP_TCB_UNLOCK(stcb);
- }
- }
} else {
sctp_feature_off(inp, SCTP_PCB_FLAGS_DRYEVNT);
}
@@ -3849,9 +4220,84 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
sctp_feature_off(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
}
SCTP_INP_WUNLOCK(inp);
- }
- break;
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ if (events->sctp_association_event) {
+ sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT);
+ }
+ if (events->sctp_address_event) {
+ sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT);
+ }
+ if (events->sctp_send_failure_event) {
+ sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
+ }
+ if (events->sctp_peer_error_event) {
+ sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVPEERERR);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVPEERERR);
+ }
+ if (events->sctp_shutdown_event) {
+ sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
+ }
+ if (events->sctp_partial_delivery_event) {
+ sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT);
+ }
+ if (events->sctp_adaptation_layer_event) {
+ sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
+ }
+ if (events->sctp_authentication_event) {
+ sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_AUTHEVNT);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_AUTHEVNT);
+ }
+ if (events->sctp_sender_dry_event) {
+ sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DRYEVNT);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DRYEVNT);
+ }
+ if (events->sctp_stream_reset_event) {
+ sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ /*
+ * Send up the sender dry event only for 1-to-1
+ * style sockets.
+ */
+ if (events->sctp_sender_dry_event) {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ if (stcb) {
+ SCTP_TCB_LOCK(stcb);
+ if (TAILQ_EMPTY(&stcb->asoc.send_queue) &&
+ TAILQ_EMPTY(&stcb->asoc.sent_queue) &&
+ (stcb->asoc.stream_queue_cnt == 0)) {
+ sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ }
+ }
+ SCTP_INP_RUNLOCK(inp);
+ break;
+ }
case SCTP_ADAPTATION_LAYER:
{
struct sctp_setadaptation *adap_bits;
@@ -3860,8 +4306,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_INP_WLOCK(inp);
inp->sctp_ep.adaptation_layer_indicator = adap_bits->ssb_adaptation_ind;
SCTP_INP_WUNLOCK(inp);
+ break;
}
- break;
#ifdef SCTP_DEBUG
case SCTP_SET_INITIAL_DBG_SEQ:
{
@@ -3871,8 +4317,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_INP_WLOCK(inp);
inp->sctp_ep.initial_sequence_debug = *vvv;
SCTP_INP_WUNLOCK(inp);
+ break;
}
- break;
#endif
case SCTP_DEFAULT_SEND_PARAM:
{
@@ -3882,7 +4328,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id);
if (stcb) {
- if (s_info->sinfo_stream <= stcb->asoc.streamoutcnt) {
+ if (s_info->sinfo_stream < stcb->asoc.streamoutcnt) {
memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send)));
} else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -3890,12 +4336,27 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_WLOCK(inp);
- memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send)));
- SCTP_INP_WUNLOCK(inp);
+ if ((s_info->sinfo_assoc_id == SCTP_FUTURE_ASSOC) ||
+ (s_info->sinfo_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send)));
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if ((s_info->sinfo_assoc_id == SCTP_CURRENT_ASSOC) ||
+ (s_info->sinfo_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ if (s_info->sinfo_stream < stcb->asoc.streamoutcnt) {
+ memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send)));
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
}
+ break;
}
- break;
case SCTP_PEER_ADDR_PARAMS:
/* Applys to the specific association */
{
@@ -4116,31 +4577,37 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_TCB_UNLOCK(stcb);
} else {
/************************NO TCB, SET TO default stuff ******************/
- SCTP_INP_WLOCK(inp);
- /*
- * For the TOS/FLOWLABEL stuff you set it
- * with the options on the socket
- */
- if (paddrp->spp_pathmaxrxt) {
- inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
- }
- if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
- inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
- else if (paddrp->spp_hbinterval) {
- if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL)
- paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
- inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
- }
- if (paddrp->spp_flags & SPP_HB_ENABLE) {
- sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
+ if (paddrp->spp_assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_WLOCK(inp);
+ /*
+ * For the TOS/FLOWLABEL stuff you
+ * set it with the options on the
+ * socket
+ */
+ if (paddrp->spp_pathmaxrxt) {
+ inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
+ }
+ if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
+ inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
+ else if (paddrp->spp_hbinterval) {
+ if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL)
+ paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
+ inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
+ }
+ if (paddrp->spp_flags & SPP_HB_ENABLE) {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
- } else if (paddrp->spp_flags & SPP_HB_DISABLE) {
- sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
+ } else if (paddrp->spp_flags & SPP_HB_DISABLE) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
}
- SCTP_INP_WUNLOCK(inp);
}
+ break;
}
- break;
case SCTP_RTOINFO:
{
struct sctp_rtoinfo *srto;
@@ -4172,31 +4639,36 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_WLOCK(inp);
- if (srto->srto_initial)
- new_init = srto->srto_initial;
- else
- new_init = inp->sctp_ep.initial_rto;
- if (srto->srto_max)
- new_max = srto->srto_max;
- else
- new_max = inp->sctp_ep.sctp_maxrto;
- if (srto->srto_min)
- new_min = srto->srto_min;
- else
- new_min = inp->sctp_ep.sctp_minrto;
- if ((new_min <= new_init) && (new_init <= new_max)) {
- inp->sctp_ep.initial_rto = new_init;
- inp->sctp_ep.sctp_maxrto = new_max;
- inp->sctp_ep.sctp_minrto = new_min;
+ if (srto->srto_assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_WLOCK(inp);
+ if (srto->srto_initial)
+ new_init = srto->srto_initial;
+ else
+ new_init = inp->sctp_ep.initial_rto;
+ if (srto->srto_max)
+ new_max = srto->srto_max;
+ else
+ new_max = inp->sctp_ep.sctp_maxrto;
+ if (srto->srto_min)
+ new_min = srto->srto_min;
+ else
+ new_min = inp->sctp_ep.sctp_minrto;
+ if ((new_min <= new_init) && (new_init <= new_max)) {
+ inp->sctp_ep.initial_rto = new_init;
+ inp->sctp_ep.sctp_maxrto = new_max;
+ inp->sctp_ep.sctp_minrto = new_min;
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_INP_WUNLOCK(inp);
} else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
- SCTP_INP_WUNLOCK(inp);
}
+ break;
}
- break;
case SCTP_ASSOCINFO:
{
struct sctp_assocparams *sasoc;
@@ -4214,27 +4686,26 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if (stcb) {
if (sasoc->sasoc_asocmaxrxt)
stcb->asoc.max_send_times = sasoc->sasoc_asocmaxrxt;
- sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets;
- sasoc->sasoc_peer_rwnd = 0;
- sasoc->sasoc_local_rwnd = 0;
if (sasoc->sasoc_cookie_life) {
stcb->asoc.cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
}
SCTP_TCB_UNLOCK(stcb);
} else {
- SCTP_INP_WLOCK(inp);
- if (sasoc->sasoc_asocmaxrxt)
- inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt;
- sasoc->sasoc_number_peer_destinations = 0;
- sasoc->sasoc_peer_rwnd = 0;
- sasoc->sasoc_local_rwnd = 0;
- if (sasoc->sasoc_cookie_life) {
- inp->sctp_ep.def_cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
+ if (sasoc->sasoc_assoc_id == SCTP_FUTURE_ASSOC) {
+ SCTP_INP_WLOCK(inp);
+ if (sasoc->sasoc_asocmaxrxt)
+ inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt;
+ if (sasoc->sasoc_cookie_life) {
+ inp->sctp_ep.def_cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
}
- SCTP_INP_WUNLOCK(inp);
}
+ break;
}
- break;
case SCTP_INITMSG:
{
struct sctp_initmsg *sinit;
@@ -4253,12 +4724,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if (sinit->sinit_max_init_timeo)
inp->sctp_ep.initial_init_rto_max = sinit->sinit_max_init_timeo;
SCTP_INP_WUNLOCK(inp);
+ break;
}
- break;
case SCTP_PRIMARY_ADDR:
{
struct sctp_setprim *spa;
- struct sctp_nets *net, *lnet;
+ struct sctp_nets *net;
SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize);
SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id);
@@ -4287,7 +4758,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if ((net != stcb->asoc.primary_destination) &&
(!(net->dest_state & SCTP_ADDR_UNCONFIRMED))) {
/* Ok we need to set it */
- lnet = stcb->asoc.primary_destination;
if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
if (net->dest_state & SCTP_ADDR_SWITCH_PRIMARY) {
net->dest_state |= SCTP_ADDR_DOUBLE_SWITCH;
@@ -4302,8 +4772,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if (stcb) {
SCTP_TCB_UNLOCK(stcb);
}
+ break;
}
- break;
case SCTP_SET_DYNAMIC_PRIMARY:
{
union sctp_sockstore *ss;
@@ -4316,8 +4786,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_CHECK_AND_CAST(ss, optval, union sctp_sockstore, optsize);
/* SUPER USER CHECK? */
error = sctp_dynamic_set_primary(&ss->sa, vrf_id);
+ break;
}
- break;
case SCTP_SET_PEER_PRIMARY_ADDR:
{
struct sctp_setpeerprim *sspp;
@@ -4370,9 +4840,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
-
+ break;
}
- break;
case SCTP_BINDX_ADD_ADDR:
{
struct sctp_getaddresses *addrs;
@@ -4418,8 +4887,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
sctp_bindx_add_address(so, inp, addrs->addr,
addrs->sget_assoc_id, vrf_id,
&error, p);
+ break;
}
- break;
case SCTP_BINDX_REM_ADDR:
{
struct sctp_getaddresses *addrs;
@@ -4465,8 +4934,232 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
sctp_bindx_delete_address(so, inp, addrs->addr,
addrs->sget_assoc_id, vrf_id,
&error);
+ break;
+ }
+ case SCTP_EVENT:
+ {
+ struct sctp_event *event;
+ uint32_t event_type;
+
+ SCTP_CHECK_AND_CAST(event, optval, struct sctp_event, optsize);
+ SCTP_FIND_STCB(inp, stcb, event->se_assoc_id);
+ switch (event->se_type) {
+ case SCTP_ASSOC_CHANGE:
+ event_type = SCTP_PCB_FLAGS_RECVASSOCEVNT;
+ break;
+ case SCTP_PEER_ADDR_CHANGE:
+ event_type = SCTP_PCB_FLAGS_RECVPADDREVNT;
+ break;
+ case SCTP_REMOTE_ERROR:
+ event_type = SCTP_PCB_FLAGS_RECVPEERERR;
+ break;
+ case SCTP_SEND_FAILED:
+ event_type = SCTP_PCB_FLAGS_RECVSENDFAILEVNT;
+ break;
+ case SCTP_SHUTDOWN_EVENT:
+ event_type = SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT;
+ break;
+ case SCTP_ADAPTATION_INDICATION:
+ event_type = SCTP_PCB_FLAGS_ADAPTATIONEVNT;
+ break;
+ case SCTP_PARTIAL_DELIVERY_EVENT:
+ event_type = SCTP_PCB_FLAGS_PDAPIEVNT;
+ break;
+ case SCTP_AUTHENTICATION_EVENT:
+ event_type = SCTP_PCB_FLAGS_AUTHEVNT;
+ break;
+ case SCTP_STREAM_RESET_EVENT:
+ event_type = SCTP_PCB_FLAGS_STREAM_RESETEVNT;
+ break;
+ case SCTP_SENDER_DRY_EVENT:
+ event_type = SCTP_PCB_FLAGS_DRYEVNT;
+ break;
+ case SCTP_NOTIFICATIONS_STOPPED_EVENT:
+ event_type = 0;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP);
+ error = ENOTSUP;
+ break;
+ default:
+ event_type = 0;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ if (event_type > 0) {
+ if (stcb) {
+ if (event->se_on) {
+ sctp_stcb_feature_on(inp, stcb, event_type);
+ if (event_type == SCTP_PCB_FLAGS_DRYEVNT) {
+ if (TAILQ_EMPTY(&stcb->asoc.send_queue) &&
+ TAILQ_EMPTY(&stcb->asoc.sent_queue) &&
+ (stcb->asoc.stream_queue_cnt == 0)) {
+ sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED);
+ }
+ }
+ } else {
+ sctp_stcb_feature_off(inp, stcb, event_type);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ /*
+ * We don't want to send up a storm
+ * of events, so return an error for
+ * sender dry events
+ */
+ if ((event_type == SCTP_PCB_FLAGS_DRYEVNT) &&
+ ((event->se_assoc_id == SCTP_ALL_ASSOC) ||
+ (event->se_assoc_id == SCTP_CURRENT_ASSOC))) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP);
+ error = ENOTSUP;
+ break;
+ }
+ if ((event->se_assoc_id == SCTP_FUTURE_ASSOC) ||
+ (event->se_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (event->se_on) {
+ sctp_feature_on(inp, event_type);
+ } else {
+ sctp_feature_off(inp, event_type);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if ((event->se_assoc_id == SCTP_CURRENT_ASSOC) ||
+ (event->se_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ if (event->se_on) {
+ sctp_stcb_feature_on(inp, stcb, event_type);
+ } else {
+ sctp_stcb_feature_off(inp, stcb, event_type);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+ }
+ }
+ break;
+ }
+ case SCTP_RECVRCVINFO:
+ {
+ int *onoff;
+
+ SCTP_CHECK_AND_CAST(onoff, optval, int, optsize);
+ SCTP_INP_WLOCK(inp);
+ if (*onoff != 0) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ break;
+ }
+ case SCTP_RECVNXTINFO:
+ {
+ int *onoff;
+
+ SCTP_CHECK_AND_CAST(onoff, optval, int, optsize);
+ SCTP_INP_WLOCK(inp);
+ if (*onoff != 0) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ break;
+ }
+ case SCTP_DEFAULT_SNDINFO:
+ {
+ struct sctp_sndinfo *info;
+ uint16_t policy;
+
+ SCTP_CHECK_AND_CAST(info, optval, struct sctp_sndinfo, optsize);
+ SCTP_FIND_STCB(inp, stcb, info->snd_assoc_id);
+
+ if (stcb) {
+ if (info->snd_sid < stcb->asoc.streamoutcnt) {
+ stcb->asoc.def_send.sinfo_stream = info->snd_sid;
+ policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags);
+ stcb->asoc.def_send.sinfo_flags = info->snd_flags;
+ stcb->asoc.def_send.sinfo_flags |= policy;
+ stcb->asoc.def_send.sinfo_ppid = info->snd_ppid;
+ stcb->asoc.def_send.sinfo_context = info->snd_context;
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((info->snd_assoc_id == SCTP_FUTURE_ASSOC) ||
+ (info->snd_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ inp->def_send.sinfo_stream = info->snd_sid;
+ policy = PR_SCTP_POLICY(inp->def_send.sinfo_flags);
+ inp->def_send.sinfo_flags = info->snd_flags;
+ inp->def_send.sinfo_flags |= policy;
+ inp->def_send.sinfo_ppid = info->snd_ppid;
+ inp->def_send.sinfo_context = info->snd_context;
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if ((info->snd_assoc_id == SCTP_CURRENT_ASSOC) ||
+ (info->snd_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ if (info->snd_sid < stcb->asoc.streamoutcnt) {
+ stcb->asoc.def_send.sinfo_stream = info->snd_sid;
+ policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags);
+ stcb->asoc.def_send.sinfo_flags = info->snd_flags;
+ stcb->asoc.def_send.sinfo_flags |= policy;
+ stcb->asoc.def_send.sinfo_ppid = info->snd_ppid;
+ stcb->asoc.def_send.sinfo_context = info->snd_context;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+ }
+ break;
+ }
+ case SCTP_DEFAULT_PRINFO:
+ {
+ struct sctp_default_prinfo *info;
+
+ SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, optsize);
+ SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id);
+
+ if (PR_SCTP_INVALID_POLICY(info->pr_policy)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ if (stcb) {
+ stcb->asoc.def_send.sinfo_flags &= 0xfff0;
+ stcb->asoc.def_send.sinfo_flags |= info->pr_policy;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((info->pr_assoc_id == SCTP_FUTURE_ASSOC) ||
+ (info->pr_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ inp->def_send.sinfo_flags &= 0xfff0;
+ inp->def_send.sinfo_flags |= info->pr_policy;
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if ((info->pr_assoc_id == SCTP_CURRENT_ASSOC) ||
+ (info->pr_assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ stcb->asoc.def_send.sinfo_flags &= 0xfff0;
+ stcb->asoc.def_send.sinfo_flags |= info->pr_policy;
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+ }
+ break;
}
- break;
default:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
error = ENOPROTOOPT;
diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h
index 1e17900..e48dfe4 100644
--- a/sys/netinet/sctp_var.h
+++ b/sys/netinet/sctp_var.h
@@ -50,6 +50,30 @@ extern struct pr_usrreqs sctp_usrreqs;
#define sctp_is_feature_on(inp, feature) ((inp->sctp_features & feature) == feature)
#define sctp_is_feature_off(inp, feature) ((inp->sctp_features & feature) == 0)
+#define sctp_stcb_feature_on(inp, stcb, feature) {\
+ if (stcb) { \
+ stcb->asoc.sctp_features |= feature; \
+ } else { \
+ inp->sctp_features |= feature; \
+ } \
+}
+#define sctp_stcb_feature_off(inp, stcb, feature) {\
+ if (stcb) { \
+ stcb->asoc.sctp_features &= ~feature; \
+ } else { \
+ inp->sctp_features &= ~feature; \
+ } \
+}
+#define sctp_stcb_is_feature_on(inp, stcb, feature) \
+ (((stcb != NULL) && \
+ ((stcb->asoc.sctp_features & feature) == feature)) || \
+ ((stcb == NULL) && \
+ ((inp->sctp_features & feature) == feature)))
+#define sctp_stcb_is_feature_off(inp, stcb, feature) \
+ (((stcb != NULL) && \
+ ((stcb->asoc.sctp_features & feature) == 0)) || \
+ ((stcb == NULL) && \
+ ((inp->sctp_features & feature) == 0)))
/* managing mobility_feature in inpcb (by micchie) */
#define sctp_mobility_feature_on(inp, feature) (inp->sctp_mobility_features |= feature)
diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c
index 39df039..9a8bd2e 100644
--- a/sys/netinet/sctputil.c
+++ b/sys/netinet/sctputil.c
@@ -923,6 +923,7 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb,
asoc->sctp_nr_sack_on_off = (uint8_t) SCTP_BASE_SYSCTL(sctp_nr_sack_on_off);
asoc->sctp_cmt_pf = (uint8_t) SCTP_BASE_SYSCTL(sctp_cmt_pf);
asoc->sctp_frag_point = m->sctp_frag_point;
+ asoc->sctp_features = m->sctp_features;
#ifdef INET
asoc->default_tos = m->ip_inp.inp.inp_ip_tos;
#else
@@ -2760,7 +2761,7 @@ sctp_notify_assoc_change(uint32_t event, struct sctp_tcb *stcb,
}
#endif
}
- if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
+ if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
/* event not enabled */
return;
}
@@ -2831,7 +2832,7 @@ sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state,
struct sctp_paddr_change *spc;
struct sctp_queued_to_read *control;
- if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVPADDREVNT)) {
+ if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT)) {
/* event not enabled */
return;
}
@@ -2914,7 +2915,7 @@ sctp_notify_send_failed(struct sctp_tcb *stcb, uint32_t error,
struct sctp_queued_to_read *control;
int length;
- if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) {
+ if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) {
/* event not enabled */
return;
}
@@ -2997,7 +2998,7 @@ sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error,
struct sctp_queued_to_read *control;
int length;
- if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) {
+ if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) {
/* event not enabled */
return;
}
@@ -3067,7 +3068,7 @@ sctp_notify_adaptation_layer(struct sctp_tcb *stcb,
struct sctp_adaptation_event *sai;
struct sctp_queued_to_read *control;
- if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_ADAPTATIONEVNT)) {
+ if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT)) {
/* event not enabled */
return;
}
@@ -3118,7 +3119,7 @@ sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error,
struct sctp_queued_to_read *control;
struct sockbuf *sb;
- if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_PDAPIEVNT)) {
+ if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_PDAPIEVNT)) {
/* event not enabled */
return;
}
@@ -3231,7 +3232,7 @@ sctp_notify_shutdown_event(struct sctp_tcb *stcb)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
- if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT)) {
+ if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT)) {
/* event not enabled */
return;
}
@@ -3278,7 +3279,7 @@ sctp_notify_sender_dry_event(struct sctp_tcb *stcb,
struct sctp_sender_dry_event *event;
struct sctp_queued_to_read *control;
- if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_DRYEVNT)) {
+ if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_DRYEVNT)) {
/* event not enabled */
return;
}
@@ -5490,7 +5491,8 @@ found_one:
if ((sinfo) && filling_sinfo) {
memcpy(sinfo, control, sizeof(struct sctp_nonpad_sndrcvinfo));
nxt = TAILQ_NEXT(control, next);
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO) ||
+ sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) {
struct sctp_extrcvinfo *s_extra;
s_extra = (struct sctp_extrcvinfo *)sinfo;
@@ -5997,7 +5999,8 @@ out:
if (((out_flags & MSG_EOR) == 0) &&
((in_flags & MSG_PEEK) == 0) &&
(sinfo) &&
- (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO))) {
+ (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO) ||
+ sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO))) {
struct sctp_extrcvinfo *s_extra;
s_extra = (struct sctp_extrcvinfo *)sinfo;
@@ -6147,8 +6150,9 @@ sctp_soreceive(struct socket *so,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
return (EINVAL);
}
- if ((sctp_is_feature_off(inp,
- SCTP_PCB_FLAGS_RECVDATAIOEVNT)) ||
+ if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT) &&
+ sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO) &&
+ sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) ||
(controlp == NULL)) {
/* user does not want the sndrcv ctl */
filling_sinfo = 0;
@@ -6184,71 +6188,6 @@ sctp_soreceive(struct socket *so,
}
-int
-sctp_l_soreceive(struct socket *so,
- struct sockaddr **name,
- struct uio *uio,
- char **controlp,
- int *controllen,
- int *flag)
-{
- int error, fromlen;
- uint8_t sockbuf[256];
- struct sockaddr *from;
- struct sctp_extrcvinfo sinfo;
- int filling_sinfo = 1;
- struct sctp_inpcb *inp;
-
- inp = (struct sctp_inpcb *)so->so_pcb;
- /* pickup the assoc we are reading from */
- if (inp == NULL) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
- return (EINVAL);
- }
- if ((sctp_is_feature_off(inp,
- SCTP_PCB_FLAGS_RECVDATAIOEVNT)) ||
- (controlp == NULL)) {
- /* user does not want the sndrcv ctl */
- filling_sinfo = 0;
- }
- if (name) {
- from = (struct sockaddr *)sockbuf;
- fromlen = sizeof(sockbuf);
- from->sa_len = 0;
- } else {
- from = NULL;
- fromlen = 0;
- }
-
- error = sctp_sorecvmsg(so, uio,
- (struct mbuf **)NULL,
- from, fromlen, flag,
- (struct sctp_sndrcvinfo *)&sinfo,
- filling_sinfo);
- if ((controlp) && (filling_sinfo)) {
- /*
- * copy back the sinfo in a CMSG format note that the caller
- * has reponsibility for freeing the memory.
- */
- if (filling_sinfo)
- *controlp = sctp_build_ctl_cchunk(inp,
- controllen,
- (struct sctp_sndrcvinfo *)&sinfo);
- }
- if (name) {
- /* copy back the address info */
- if (from && from->sa_len) {
- *name = sodupsockaddr(from, M_WAIT);
- } else {
- *name = NULL;
- }
- }
- return (error);
-}
-
-
-
-
diff --git a/sys/netinet/sctputil.h b/sys/netinet/sctputil.h
index 69983e0..460adc7 100644
--- a/sys/netinet/sctputil.h
+++ b/sys/netinet/sctputil.h
@@ -328,20 +328,6 @@ sctp_soreceive(struct socket *so, struct sockaddr **psa,
struct mbuf **controlp,
int *flagsp);
-
-/* For those not passing mbufs, this does the
- * translations for you. Caller owns memory
- * of size controllen returned in controlp.
- */
-int
-sctp_l_soreceive(struct socket *so,
- struct sockaddr **name,
- struct uio *uio,
- char **controlp,
- int *controllen,
- int *flag);
-
-
void
sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d);
diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c
index 9d11262..6145a54 100644
--- a/sys/netinet/siftr.c
+++ b/sys/netinet/siftr.c
@@ -696,17 +696,16 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport,
/* We need the tcbinfo lock. */
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
- INP_INFO_RLOCK(&V_tcbinfo);
if (dir == PFIL_IN)
inp = (ipver == INP_IPV4 ?
- in_pcblookup_hash(&V_tcbinfo, ip->ip_src, sport, ip->ip_dst,
- dport, 0, m->m_pkthdr.rcvif)
+ in_pcblookup(&V_tcbinfo, ip->ip_src, sport, ip->ip_dst,
+ dport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif)
:
#ifdef SIFTR_IPV6
- in6_pcblookup_hash(&V_tcbinfo,
+ in6_pcblookup(&V_tcbinfo,
&((struct ip6_hdr *)ip)->ip6_src, sport,
- &((struct ip6_hdr *)ip)->ip6_dst, dport, 0,
+ &((struct ip6_hdr *)ip)->ip6_dst, dport, INPLOOKUP_RLOCKPCB,
m->m_pkthdr.rcvif)
#else
NULL
@@ -715,13 +714,13 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport,
else
inp = (ipver == INP_IPV4 ?
- in_pcblookup_hash(&V_tcbinfo, ip->ip_dst, dport, ip->ip_src,
- sport, 0, m->m_pkthdr.rcvif)
+ in_pcblookup(&V_tcbinfo, ip->ip_dst, dport, ip->ip_src,
+ sport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif)
:
#ifdef SIFTR_IPV6
- in6_pcblookup_hash(&V_tcbinfo,
+ in6_pcblookup(&V_tcbinfo,
&((struct ip6_hdr *)ip)->ip6_dst, dport,
- &((struct ip6_hdr *)ip)->ip6_src, sport, 0,
+ &((struct ip6_hdr *)ip)->ip6_src, sport, INPLOOKUP_RLOCKPCB,
m->m_pkthdr.rcvif)
#else
NULL
@@ -734,12 +733,7 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport,
ss->nskip_in_inpcb++;
else
ss->nskip_out_inpcb++;
- } else {
- /* Acquire the inpcb lock. */
- INP_UNLOCK_ASSERT(inp);
- INP_RLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
return (inp);
}
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 1a94d0a..e3e9aa6 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -5,6 +5,7 @@
* Swinburne University of Technology, Melbourne, Australia.
* Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
* Copyright (c) 2010 The FreeBSD Foundation
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* Portions of this software were developed at the Centre for Advanced Internet
@@ -16,6 +17,9 @@
* Internet Architectures, Swinburne University of Technology, Melbourne,
* Australia by David Hayes under sponsorship from the FreeBSD Foundation.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -197,10 +201,6 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
&VNET_NAME(tcp_autorcvbuf_max), 0,
"Max size of automatic receive buffer");
-int tcp_read_locking = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, read_locking, CTLFLAG_RW,
- &tcp_read_locking, 0, "Enable read locking strategy");
-
VNET_DEFINE(struct inpcbhead, tcb);
#define tcb6 tcb /* for KAME src sync over BSD*'s */
VNET_DEFINE(struct inpcbinfo, tcbinfo);
@@ -591,8 +591,7 @@ tcp_input(struct mbuf *m, int off0)
char *s = NULL; /* address and port logging */
int ti_locked;
#define TI_UNLOCKED 1
-#define TI_RLOCKED 2
-#define TI_WLOCKED 3
+#define TI_WLOCKED 2
#ifdef TCPDEBUG
/*
@@ -756,30 +755,25 @@ tcp_input(struct mbuf *m, int off0)
drop_hdrlen = off0 + off;
/*
- * Locate pcb for segment, which requires a lock on tcbinfo.
- * Optimisticaly acquire a global read lock rather than a write lock
- * unless header flags necessarily imply a state change. There are
- * two cases where we might discover later we need a write lock
- * despite the flags: ACKs moving a connection out of the syncache,
- * and ACKs for a connection in TIMEWAIT.
+ * Locate pcb for segment; if we're likely to add or remove a
+ * connection then first acquire pcbinfo lock. There are two cases
+ * where we might discover later we need a write lock despite the
+ * flags: ACKs moving a connection out of the syncache, and ACKs for
+ * a connection in TIMEWAIT.
*/
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
- tcp_read_locking == 0) {
+ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
- } else {
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- }
+ } else
+ ti_locked = TI_UNLOCKED;
findpcb:
#ifdef INVARIANTS
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- else
- panic("%s: findpcb ti_locked %d\n", __func__, ti_locked);
+ } else {
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
#endif
#ifdef INET
@@ -797,20 +791,20 @@ findpcb:
* Transparently forwarded. Pretend to be the destination.
* already got one like this?
*/
- inp = in_pcblookup_hash(&V_tcbinfo,
- ip->ip_src, th->th_sport,
- ip->ip_dst, th->th_dport,
- 0, m->m_pkthdr.rcvif);
+ inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport,
+ ip->ip_dst, th->th_dport, INPLOOKUP_WLOCKPCB,
+ m->m_pkthdr.rcvif, m);
if (!inp) {
- /* It's new. Try to find the ambushing socket. */
- inp = in_pcblookup_hash(&V_tcbinfo,
- ip->ip_src, th->th_sport,
- next_hop->sin_addr,
- next_hop->sin_port ?
- ntohs(next_hop->sin_port) :
- th->th_dport,
- INPLOOKUP_WILDCARD,
- m->m_pkthdr.rcvif);
+ /*
+ * It's new. Try to find the ambushing socket.
+ * Because we've rewritten the destination address,
+ * any hardware-generated hash is ignored.
+ */
+ inp = in_pcblookup(&V_tcbinfo, ip->ip_src,
+ th->th_sport, next_hop->sin_addr,
+ next_hop->sin_port ? ntohs(next_hop->sin_port) :
+ th->th_dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
}
/* Remove the tag from the packet. We don't need it anymore. */
m_tag_delete(m, fwd_tag);
@@ -820,21 +814,19 @@ findpcb:
{
#ifdef INET6
if (isipv6)
- inp = in6_pcblookup_hash(&V_tcbinfo,
- &ip6->ip6_src, th->th_sport,
- &ip6->ip6_dst, th->th_dport,
- INPLOOKUP_WILDCARD,
- m->m_pkthdr.rcvif);
+ inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src,
+ th->th_sport, &ip6->ip6_dst, th->th_dport,
+ INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB,
+ m->m_pkthdr.rcvif, m);
#endif
#if defined(INET) && defined(INET6)
else
#endif
#ifdef INET
- inp = in_pcblookup_hash(&V_tcbinfo,
- ip->ip_src, th->th_sport,
- ip->ip_dst, th->th_dport,
- INPLOOKUP_WILDCARD,
- m->m_pkthdr.rcvif);
+ inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
+ th->th_sport, ip->ip_dst, th->th_dport,
+ INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB,
+ m->m_pkthdr.rcvif, m);
#endif
}
@@ -865,7 +857,7 @@ findpcb:
rstreason = BANDLIM_RST_CLOSEDPORT;
goto dropwithreset;
}
- INP_WLOCK(inp);
+ INP_WLOCK_ASSERT(inp);
if (!(inp->inp_flags & INP_HW_FLOWID)
&& (m->m_flags & M_FLOWID)
&& ((inp->inp_socket == NULL)
@@ -906,28 +898,26 @@ findpcb:
* legitimate new connection attempt the old INPCB gets removed and
* we can try again to find a listening socket.
*
- * At this point, due to earlier optimism, we may hold a read lock on
- * the inpcbinfo, rather than a write lock. If so, we need to
- * upgrade, or if that fails, acquire a reference on the inpcb, drop
- * all locks, acquire a global write lock, and then re-acquire the
- * inpcb lock. We may at that point discover that another thread has
- * tried to free the inpcb, in which case we need to loop back and
- * try to find a new inpcb to deliver to.
+ * At this point, due to earlier optimism, we may hold only an inpcb
+ * lock, and not the inpcbinfo write lock. If so, we need to try to
+ * acquire it, or if that fails, acquire a reference on the inpcb,
+ * drop all locks, acquire a global write lock, and then re-acquire
+ * the inpcb lock. We may at that point discover that another thread
+ * has tried to free the inpcb, in which case we need to loop back
+ * and try to find a new inpcb to deliver to.
+ *
+ * XXXRW: It may be time to rethink timewait locking.
*/
relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("%s: INP_TIMEWAIT ti_locked %d", __func__, ti_locked));
-
- if (ti_locked == TI_RLOCKED) {
- if (INP_INFO_TRY_UPGRADE(&V_tcbinfo) == 0) {
+ if (ti_locked == TI_UNLOCKED) {
+ if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
INP_WLOCK(inp);
- if (in_pcbrele(inp)) {
+ if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
}
@@ -975,26 +965,24 @@ relocked:
/*
* We've identified a valid inpcb, but it could be that we need an
- * inpcbinfo write lock and have only a read lock. In this case,
- * attempt to upgrade/relock using the same strategy as the TIMEWAIT
- * case above. If we relock, we have to jump back to 'relocked' as
- * the connection might now be in TIMEWAIT.
+ * inpcbinfo write lock but don't hold it. In this case, attempt to
+ * acquire using the same strategy as the TIMEWAIT case above. If we
+ * relock, we have to jump back to 'relocked' as the connection might
+ * now be in TIMEWAIT.
*/
- if (tp->t_state != TCPS_ESTABLISHED ||
- (thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
- tcp_read_locking == 0) {
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("%s: upgrade check ti_locked %d", __func__, ti_locked));
-
- if (ti_locked == TI_RLOCKED) {
- if (INP_INFO_TRY_UPGRADE(&V_tcbinfo) == 0) {
+#ifdef INVARIANTS
+ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+#endif
+ if (tp->t_state != TCPS_ESTABLISHED) {
+ if (ti_locked == TI_UNLOCKED) {
+ if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
INP_WLOCK(inp);
- if (in_pcbrele(inp)) {
+ if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
}
@@ -1027,13 +1015,16 @@ relocked:
/*
* When the socket is accepting connections (the INPCB is in LISTEN
* state) we look into the SYN cache if this is a new connection
- * attempt or the completion of a previous one.
+ * attempt or the completion of a previous one. Because listen
+ * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
+ * held in this case.
*/
if (so->so_options & SO_ACCEPTCONN) {
struct in_conninfo inc;
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
"tp not listening", __func__));
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
bzero(&inc, sizeof(inc));
#ifdef INET6
@@ -1371,13 +1362,17 @@ relocked:
return;
dropwithreset:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropwithreset ti_locked %d", __func__, ti_locked);
- ti_locked = TI_UNLOCKED;
+ ti_locked = TI_UNLOCKED;
+ }
+#ifdef INVARIANTS
+ else {
+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset "
+ "ti_locked: %d", __func__, ti_locked));
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
+#endif
if (inp != NULL) {
tcp_dropwithreset(m, th, tp, tlen, rstreason);
@@ -1388,13 +1383,17 @@ dropwithreset:
goto drop;
dropunlock:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropunlock ti_locked %d", __func__, ti_locked);
- ti_locked = TI_UNLOCKED;
+ ti_locked = TI_UNLOCKED;
+ }
+#ifdef INVARIANTS
+ else {
+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock "
+ "ti_locked: %d", __func__, ti_locked));
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
+#endif
if (inp != NULL)
INP_WUNLOCK(inp);
@@ -1449,13 +1448,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
} else {
#ifdef INVARIANTS
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- else
- panic("%s: ti_locked %d for EST", __func__,
- ti_locked);
+ else {
+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
+ "ti_locked: %d", __func__, ti_locked));
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
#endif
}
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -1601,13 +1600,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* This is a pure ack for outstanding data.
*/
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: ti_locked %d on pure ACK",
- __func__, ti_locked);
ti_locked = TI_UNLOCKED;
TCPSTAT_INC(tcps_predack);
@@ -1708,13 +1702,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* nothing on the reassembly queue and we have enough
* buffer space to take it.
*/
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: ti_locked %d on pure data "
- "segment", __func__, ti_locked);
ti_locked = TI_UNLOCKED;
/* Clean receiver SACK report if present */
@@ -2550,9 +2539,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
}
process_ACK:
- INP_INFO_LOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_input: process_ACK ti_locked %d", ti_locked));
INP_WLOCK_ASSERT(tp->t_inpcb);
acked = BYTES_THIS_ACK(tp, th);
@@ -2716,9 +2702,6 @@ process_ACK:
}
step6:
- INP_INFO_LOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_do_segment: step6 ti_locked %d", ti_locked));
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2804,9 +2787,6 @@ step6:
tp->rcv_up = tp->rcv_nxt;
}
dodata: /* XXX */
- INP_INFO_LOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_do_segment: dodata ti_locked %d", ti_locked));
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2938,13 +2918,8 @@ dodata: /* XXX */
return;
}
}
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dodata epilogue ti_locked %d", __func__,
- ti_locked);
ti_locked = TI_UNLOCKED;
#ifdef TCPDEBUG
@@ -2973,9 +2948,6 @@ check_delack:
return;
dropafterack:
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_do_segment: dropafterack ti_locked %d", ti_locked));
-
/*
* Generate an ACK dropping incoming segment if it occupies
* sequence space, where the ACK reflects our state.
@@ -3002,13 +2974,8 @@ dropafterack:
tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropafterack epilogue ti_locked %d", __func__,
- ti_locked);
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW;
@@ -3018,12 +2985,8 @@ dropafterack:
return;
dropwithreset:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropwithreset ti_locked %d", __func__, ti_locked);
ti_locked = TI_UNLOCKED;
if (tp != NULL) {
@@ -3034,15 +2997,14 @@ dropwithreset:
return;
drop:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WUNLOCK(&V_tcbinfo);
+ ti_locked = TI_UNLOCKED;
+ }
#ifdef INVARIANTS
else
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
#endif
- ti_locked = TI_UNLOCKED;
/*
* Drop space held by incoming segment and return.
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 4b5fa10..4542ac5 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1102,8 +1102,15 @@ send:
m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
}
+#ifdef IPSEC
+ KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL),
+ ("%s: mbuf chain shorter than expected: %ld + %u + %u - %u != %u",
+ __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL)));
+#else
KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL),
- ("%s: mbuf chain shorter than expected", __func__));
+ ("%s: mbuf chain shorter than expected: %ld + %u + %u != %u",
+ __func__, len, hdrlen, ipoptlen, m_length(m, NULL)));
+#endif
/*
* In transmit state, time the transmission and arrange for
@@ -1331,7 +1338,7 @@ out:
* then remember the size of the advertised window.
* Any pending ACK has now been sent.
*/
- if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
+ if (recwin >= 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
tp->rcv_adv = tp->rcv_nxt + recwin;
tp->last_ack_sent = tp->rcv_nxt;
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 2c013be..6ed58911 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -300,7 +300,8 @@ tcp_init(void)
hashsize = 512; /* safe default */
}
in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
- "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE);
+ "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_4TUPLE);
/*
* These have to be type stable for the benefit of the timers.
@@ -1184,9 +1185,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_tcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -1228,12 +1229,9 @@ tcp_getcred(SYSCTL_HANDLER_ARGS)
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
- INP_INFO_RLOCK(&V_tcbinfo);
- inp = in_pcblookup_hash(&V_tcbinfo, addrs[1].sin_addr,
- addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 0, NULL);
+ inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
+ addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -1241,10 +1239,8 @@ tcp_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -1286,23 +1282,20 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
- INP_INFO_RLOCK(&V_tcbinfo);
#ifdef INET
if (mapped == 1)
- inp = in_pcblookup_hash(&V_tcbinfo,
+ inp = in_pcblookup(&V_tcbinfo,
*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
addrs[1].sin6_port,
*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
- addrs[0].sin6_port,
- 0, NULL);
+ addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL);
else
#endif
- inp = in6_pcblookup_hash(&V_tcbinfo,
+ inp = in6_pcblookup(&V_tcbinfo,
&addrs[1].sin6_addr, addrs[1].sin6_port,
- &addrs[0].sin6_addr, addrs[0].sin6_port, 0, NULL);
+ &addrs[0].sin6_addr, addrs[0].sin6_port,
+ INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -1310,10 +1303,8 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -1374,10 +1365,9 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
th = (struct tcphdr *)((caddr_t)ip
+ (ip->ip_hl << 2));
INP_INFO_WLOCK(&V_tcbinfo);
- inp = in_pcblookup_hash(&V_tcbinfo, faddr, th->th_dport,
- ip->ip_src, th->th_sport, 0, NULL);
+ inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport,
+ ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL) {
- INP_WLOCK(inp);
if (!(inp->inp_flags & INP_TIMEWAIT) &&
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
@@ -2154,20 +2144,19 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
- inp = in6_pcblookup_hash(&V_tcbinfo, &fin6->sin6_addr,
- fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, 0,
- NULL);
+ inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr,
+ fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
+ INPLOOKUP_WLOCKPCB, NULL);
break;
#endif
#ifdef INET
case AF_INET:
- inp = in_pcblookup_hash(&V_tcbinfo, fin->sin_addr,
- fin->sin_port, lin->sin_addr, lin->sin_port, 0, NULL);
+ inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port,
+ lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL);
break;
#endif
}
if (inp != NULL) {
- INP_WLOCK(inp);
if (inp->inp_flags & INP_TIMEWAIT) {
/*
* XXXRW: There currently exists a state where an
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 8262f43..66e4732 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_pcbgroup.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -661,6 +662,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
inp = sotoinpcb(so);
inp->inp_inc.inc_fibnum = so->so_fibnum;
INP_WLOCK(inp);
+ INP_HASH_WLOCK(&V_tcbinfo);
/* Insert new socket into PCB hash list. */
inp->inp_inc.inc_flags = sc->sc_inc.inc_flags;
@@ -675,8 +677,14 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
#ifdef INET6
}
#endif
+
+ /*
+ * Install in the reservation hash table for now, but don't yet
+ * install a connection group since the full 4-tuple isn't yet
+ * configured.
+ */
inp->inp_lport = sc->sc_inc.inc_lport;
- if ((error = in_pcbinshash(inp)) != 0) {
+ if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) {
/*
* Undo the assignments above if we failed to
* put the PCB on the hash lists.
@@ -694,6 +702,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
s, __func__, error);
free(s, M_TCPLOG);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
#ifdef IPSEC
@@ -728,8 +737,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
laddr6 = inp->in6p_laddr;
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
inp->in6p_laddr = sc->sc_inc.inc6_laddr;
- if ((error = in6_pcbconnect(inp, (struct sockaddr *)&sin6,
- thread0.td_ucred)) != 0) {
+ if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6,
+ thread0.td_ucred, m)) != 0) {
inp->in6p_laddr = laddr6;
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed "
@@ -737,6 +746,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
s, __func__, error);
free(s, M_TCPLOG);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
/* Override flowlabel from in6_pcbconnect. */
@@ -767,8 +777,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
laddr = inp->inp_laddr;
if (inp->inp_laddr.s_addr == INADDR_ANY)
inp->inp_laddr = sc->sc_inc.inc_laddr;
- if ((error = in_pcbconnect(inp, (struct sockaddr *)&sin,
- thread0.td_ucred)) != 0) {
+ if ((error = in_pcbconnect_mbuf(inp, (struct sockaddr *)&sin,
+ thread0.td_ucred, m)) != 0) {
inp->inp_laddr = laddr;
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: in_pcbconnect failed "
@@ -776,10 +786,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
s, __func__, error);
free(s, M_TCPLOG);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
}
#endif /* INET */
+ INP_HASH_WUNLOCK(&V_tcbinfo);
tp = intotcpcb(inp);
tp->t_state = TCPS_SYN_RECEIVED;
tp->iss = sc->sc_iss;
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 5c2c5c2..73984c7 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -490,7 +490,7 @@ tcp_timer_rexmt(void * xtp)
INP_WUNLOCK(inp);
INP_INFO_WLOCK(&V_tcbinfo);
INP_WLOCK(inp);
- if (in_pcbrele(inp)) {
+ if (in_pcbrele_wlocked(inp)) {
INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 318fe27..96cb1e4 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -2,8 +2,12 @@
* Copyright (c) 1982, 1986, 1988, 1993
* The Regents of the University of California.
* Copyright (c) 2006-2007 Robert N. M. Watson
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -251,7 +255,6 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
return (EAFNOSUPPORT);
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
INP_WLOCK(inp);
@@ -261,11 +264,12 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
}
tp = intotcpcb(inp);
TCPDEBUG1();
+ INP_HASH_WLOCK(&V_tcbinfo);
error = in_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
out:
TCPDEBUG2(PRU_BIND);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
@@ -292,7 +296,6 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
return (EAFNOSUPPORT);
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
INP_WLOCK(inp);
@@ -302,6 +305,7 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
}
tp = intotcpcb(inp);
TCPDEBUG1();
+ INP_HASH_WLOCK(&V_tcbinfo);
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
#ifdef INET
@@ -316,15 +320,16 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
inp->inp_vflag &= ~INP_IPV6;
error = in_pcbbind(inp, (struct sockaddr *)&sin,
td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto out;
}
}
#endif
error = in6_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
out:
TCPDEBUG2(PRU_BIND);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET6 */
@@ -341,7 +346,6 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
struct tcpcb *tp = NULL;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
INP_WLOCK(inp);
@@ -353,8 +357,10 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
TCPDEBUG1();
SOCK_LOCK(so);
error = solisten_proto_check(so);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (error == 0 && inp->inp_lport == 0)
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
@@ -365,7 +371,6 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
out:
TCPDEBUG2(PRU_LISTEN);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET */
@@ -379,7 +384,6 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
struct tcpcb *tp = NULL;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
INP_WLOCK(inp);
@@ -391,12 +395,14 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
TCPDEBUG1();
SOCK_LOCK(so);
error = solisten_proto_check(so);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (error == 0 && inp->inp_lport == 0) {
inp->inp_vflag &= ~INP_IPV4;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
inp->inp_vflag |= INP_IPV4;
error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
@@ -406,7 +412,6 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
out:
TCPDEBUG2(PRU_LISTEN);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET6 */
@@ -440,7 +445,6 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
return (error);
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
INP_WLOCK(inp);
@@ -456,7 +460,6 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET */
@@ -482,7 +485,6 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
&& IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
return (EAFNOSUPPORT);
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
INP_WLOCK(inp);
@@ -493,6 +495,11 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
tp = intotcpcb(inp);
TCPDEBUG1();
#ifdef INET
+ /*
+ * XXXRW: Some confusion: V4/V6 flags relate to binding, and
+ * therefore probably require the hash lock, which isn't held here.
+ * Is this a significant problem?
+ */
if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
struct sockaddr_in sin;
@@ -525,7 +532,6 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET6 */
@@ -639,6 +645,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED;
@@ -664,6 +671,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
out:
TCPDEBUG2(PRU_ACCEPT);
INP_WUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
if (error == 0) {
if (v4)
*nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -750,25 +758,17 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
- int headlocked = 0;
#ifdef INET6
int isipv6;
#endif
TCPDEBUG0;
/*
- * We require the pcbinfo lock in two cases:
- *
- * (1) An implied connect is taking place, which can result in
- * binding IPs and ports and hence modification of the pcb hash
- * chains.
- *
- * (2) PRUS_EOF is set, resulting in explicit close on the send.
+ * We require the pcbinfo lock if we will close the socket as part of
+ * this call.
*/
- if ((nam != NULL) || (flags & PRUS_EOF)) {
+ if (flags & PRUS_EOF)
INP_INFO_WLOCK(&V_tcbinfo);
- headlocked = 1;
- }
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
@@ -805,7 +805,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#ifdef INET6
if (isipv6)
error = tcp6_connect(tp, nam, td);
@@ -830,10 +829,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
socantsendmore(so);
tcp_usrclosed(tp);
}
- if (headlocked) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
- headlocked = 0;
- }
if (!(inp->inp_flags & INP_DROPPED)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
@@ -869,7 +864,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#ifdef INET6
if (isipv6)
error = tcp6_connect(tp, nam, td);
@@ -884,11 +878,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
goto out;
tp->snd_wnd = TTCP_CLIENT_SND_WND;
tcp_mss(tp, -1);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- headlocked = 0;
- } else if (nam) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
- headlocked = 0;
}
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_flags |= TF_FORCEDATA;
@@ -899,7 +888,7 @@ out:
TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
- if (headlocked)
+ if (flags & PRUS_EOF)
INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
@@ -1087,13 +1076,13 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
u_short lport;
int error;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (inp->inp_lport == 0) {
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
if (error)
- return error;
+ goto out;
}
/*
@@ -1106,11 +1095,14 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
&inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
if (error && oinp == NULL)
- return error;
- if (oinp)
- return EADDRINUSE;
+ goto out;
+ if (oinp) {
+ error = EADDRINUSE;
+ goto out;
+ }
inp->inp_laddr = laddr;
in_pcbrehash(inp);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
/*
* Compute window scaling to request:
@@ -1129,6 +1121,10 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
tcp_sendseqinit(tp);
return 0;
+
+out:
+ INP_HASH_WUNLOCK(&V_tcbinfo);
+ return (error);
}
#endif /* INET */
@@ -1142,13 +1138,13 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
struct in6_addr addr6;
int error;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (inp->inp_lport == 0) {
error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
if (error)
- return error;
+ goto out;
}
/*
@@ -1156,18 +1152,23 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
* earlier incarnation of this same connection still in
* TIME_WAIT state, creating an ADDRINUSE error.
* in6_pcbladdr() also handles scope zone IDs.
+ *
+ * XXXRW: We wouldn't need to expose in6_pcblookup_hash_locked()
+ * outside of in6_pcb.c if there were an in6_pcbconnect_setup().
*/
error = in6_pcbladdr(inp, nam, &addr6);
if (error)
- return error;
- oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
+ goto out;
+ oinp = in6_pcblookup_hash_locked(inp->inp_pcbinfo,
&sin6->sin6_addr, sin6->sin6_port,
IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
? &addr6
: &inp->in6p_laddr,
inp->inp_lport, 0, NULL);
- if (oinp)
- return EADDRINUSE;
+ if (oinp) {
+ error = EADDRINUSE;
+ goto out;
+ }
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
inp->in6p_laddr = addr6;
inp->in6p_faddr = sin6->sin6_addr;
@@ -1178,6 +1179,7 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
inp->inp_flow |=
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
in_pcbrehash(inp);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
/* Compute window scaling to request. */
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
@@ -1192,6 +1194,10 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
tcp_sendseqinit(tp);
return 0;
+
+out:
+ INP_HASH_WUNLOCK(&V_tcbinfo);
+ return error;
}
#endif /* INET6 */
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index c3503e6..28eb8fd 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -2,8 +2,12 @@
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
* The Regents of the University of California.
* Copyright (c) 2008 Robert N. M. Watson
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -182,7 +186,8 @@ udp_init(void)
{
in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
- "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE);
+ "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_2TUPLE);
V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(V_udpcb_zone, maxsockets);
@@ -253,7 +258,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
#endif
struct udpcb *up;
- INP_RLOCK_ASSERT(inp);
+ INP_LOCK_ASSERT(inp);
/*
* Engage the tunneling protocol.
@@ -458,12 +463,12 @@ udp_input(struct mbuf *m, int off)
}
#endif
- INP_INFO_RLOCK(&V_udbinfo);
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
in_broadcast(ip->ip_dst, ifp)) {
struct inpcb *last;
struct ip_moptions *imo;
+ INP_INFO_RLOCK(&V_udbinfo);
last = NULL;
LIST_FOREACH(inp, &V_udb, inp_list) {
if (inp->inp_lport != uh->uh_dport)
@@ -485,6 +490,13 @@ udp_input(struct mbuf *m, int off)
INP_RLOCK(inp);
/*
+ * XXXRW: Because we weren't holding either the inpcb
+ * or the hash lock when we checked for a match
+ * before, we should probably recheck now that the
+ * inpcb lock is held.
+ */
+
+ /*
* Handle socket delivery policy for any-source
* and source-specific multicast. [RFC3678]
*/
@@ -542,7 +554,10 @@ udp_input(struct mbuf *m, int off)
* or multicast datgram.)
*/
UDPSTAT_INC(udps_noportbcast);
- goto badheadlocked;
+ if (inp)
+ INP_RUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_udbinfo);
+ goto badunlocked;
}
udp_append(last, ip, m, iphlen, &udp_in);
INP_RUNLOCK(last);
@@ -553,8 +568,9 @@ udp_input(struct mbuf *m, int off)
/*
* Locate pcb for datagram.
*/
- inp = in_pcblookup_hash(&V_udbinfo, ip->ip_src, uh->uh_sport,
- ip->ip_dst, uh->uh_dport, 1, ifp);
+ inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport,
+ ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
+ ifp, m);
if (inp == NULL) {
if (udp_log_in_vain) {
char buf[4*sizeof "123"];
@@ -568,36 +584,31 @@ udp_input(struct mbuf *m, int off)
UDPSTAT_INC(udps_noport);
if (m->m_flags & (M_BCAST | M_MCAST)) {
UDPSTAT_INC(udps_noportbcast);
- goto badheadlocked;
+ goto badunlocked;
}
if (V_udp_blackhole)
- goto badheadlocked;
+ goto badunlocked;
if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
- goto badheadlocked;
+ goto badunlocked;
*ip = save_ip;
ip->ip_len += iphlen;
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
- INP_INFO_RUNLOCK(&V_udbinfo);
return;
}
/*
* Check the minimum TTL for socket.
*/
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
INP_RUNLOCK(inp);
- goto badunlocked;
+ m_freem(m);
+ return;
}
udp_append(inp, ip, m, iphlen, &udp_in);
INP_RUNLOCK(inp);
return;
-badheadlocked:
- if (inp)
- INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
badunlocked:
m_freem(m);
}
@@ -656,17 +667,15 @@ udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
return;
if (ip != NULL) {
uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
- INP_INFO_RLOCK(&V_udbinfo);
- inp = in_pcblookup_hash(&V_udbinfo, faddr, uh->uh_dport,
- ip->ip_src, uh->uh_sport, 0, NULL);
+ inp = in_pcblookup(&V_udbinfo, faddr, uh->uh_dport,
+ ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_socket != NULL) {
udp_notify(inp, inetctlerrmap[cmd]);
}
INP_RUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_udbinfo);
} else
in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd],
udp_notify);
@@ -756,9 +765,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_udbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_udbinfo);
@@ -799,12 +808,11 @@ udp_getcred(SYSCTL_HANDLER_ARGS)
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
- INP_INFO_RLOCK(&V_udbinfo);
- inp = in_pcblookup_hash(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
- addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
+ inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
+ addrs[0].sin_addr, addrs[0].sin_port,
+ INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -812,10 +820,8 @@ udp_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_udbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -924,6 +930,9 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt)
}
#ifdef INET
+#define UH_WLOCKED 2
+#define UH_RLOCKED 1
+#define UH_UNLOCKED 0
static int
udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
struct mbuf *control, struct thread *td)
@@ -1016,29 +1025,27 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* conservative locks than required the second time around, so later
* assertions have to accept that. Further analysis of the number of
* misses under contention is required.
+ *
+ * XXXRW: Check that hash locking update here is correct.
*/
sin = (struct sockaddr_in *)addr;
INP_RLOCK(inp);
if (sin != NULL &&
(inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
INP_RUNLOCK(inp);
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
- unlock_udbinfo = 2;
+ INP_HASH_WLOCK(&V_udbinfo);
+ unlock_udbinfo = UH_WLOCKED;
} else if ((sin != NULL && (
(sin->sin_addr.s_addr == INADDR_ANY) ||
(sin->sin_addr.s_addr == INADDR_BROADCAST) ||
(inp->inp_laddr.s_addr == INADDR_ANY) ||
(inp->inp_lport == 0))) ||
(src.sin_family == AF_INET)) {
- if (!INP_INFO_TRY_RLOCK(&V_udbinfo)) {
- INP_RUNLOCK(inp);
- INP_INFO_RLOCK(&V_udbinfo);
- INP_RLOCK(inp);
- }
- unlock_udbinfo = 1;
+ INP_HASH_RLOCK(&V_udbinfo);
+ unlock_udbinfo = UH_RLOCKED;
} else
- unlock_udbinfo = 0;
+ unlock_udbinfo = UH_UNLOCKED;
/*
* If the IP_SENDSRCADDR control message was specified, override the
@@ -1048,7 +1055,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
laddr = inp->inp_laddr;
lport = inp->inp_lport;
if (src.sin_family == AF_INET) {
- INP_INFO_LOCK_ASSERT(&V_udbinfo);
+ INP_HASH_LOCK_ASSERT(&V_udbinfo);
if ((lport == 0) ||
(laddr.s_addr == INADDR_ANY &&
src.sin_addr.s_addr == INADDR_ANY)) {
@@ -1099,7 +1106,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
inp->inp_lport == 0 ||
sin->sin_addr.s_addr == INADDR_ANY ||
sin->sin_addr.s_addr == INADDR_BROADCAST) {
- INP_INFO_LOCK_ASSERT(&V_udbinfo);
+ INP_HASH_LOCK_ASSERT(&V_udbinfo);
error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
&lport, &faddr.s_addr, &fport, NULL,
td->td_ucred);
@@ -1113,8 +1120,8 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
/* Commit the local port if newly assigned. */
if (inp->inp_laddr.s_addr == INADDR_ANY &&
inp->inp_lport == 0) {
- INP_INFO_WLOCK_ASSERT(&V_udbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(&V_udbinfo);
/*
* Remember addr if jailed, to prevent
* rebinding.
@@ -1209,25 +1216,25 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */
UDPSTAT_INC(udps_opackets);
- if (unlock_udbinfo == 2)
- INP_INFO_WUNLOCK(&V_udbinfo);
- else if (unlock_udbinfo == 1)
- INP_INFO_RUNLOCK(&V_udbinfo);
+ if (unlock_udbinfo == UH_WLOCKED)
+ INP_HASH_WUNLOCK(&V_udbinfo);
+ else if (unlock_udbinfo == UH_RLOCKED)
+ INP_HASH_RUNLOCK(&V_udbinfo);
error = ip_output(m, inp->inp_options, NULL, ipflags,
inp->inp_moptions, inp);
- if (unlock_udbinfo == 2)
+ if (unlock_udbinfo == UH_WLOCKED)
INP_WUNLOCK(inp);
else
INP_RUNLOCK(inp);
return (error);
release:
- if (unlock_udbinfo == 2) {
+ if (unlock_udbinfo == UH_WLOCKED) {
+ INP_HASH_WUNLOCK(&V_udbinfo);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
- } else if (unlock_udbinfo == 1) {
+ } else if (unlock_udbinfo == UH_RLOCKED) {
+ INP_HASH_RUNLOCK(&V_udbinfo);
INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
} else
INP_RUNLOCK(inp);
m_freem(m);
@@ -1376,15 +1383,15 @@ udp_abort(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ INP_HASH_WLOCK(&V_udbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
+ INP_HASH_WUNLOCK(&V_udbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
}
static int
@@ -1453,11 +1460,11 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
+ INP_HASH_WLOCK(&V_udbinfo);
error = in_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_udbinfo);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
@@ -1468,15 +1475,15 @@ udp_close(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_close: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ INP_HASH_WLOCK(&V_udbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
+ INP_HASH_WUNLOCK(&V_udbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
}
static int
@@ -1488,25 +1495,23 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (EISCONN);
}
sin = (struct sockaddr_in *)nam;
error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
if (error != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
+ INP_HASH_WLOCK(&V_udbinfo);
error = in_pcbconnect(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_udbinfo);
if (error == 0)
soisconnected(so);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
@@ -1538,21 +1543,19 @@ udp_disconnect(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr == INADDR_ANY) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (ENOTCONN);
}
-
+ INP_HASH_WLOCK(&V_udbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
+ INP_HASH_WUNLOCK(&V_udbinfo);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
SOCK_UNLOCK(so);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (0);
}
OpenPOWER on IntegriCloud