summaryrefslogtreecommitdiffstats
path: root/sys/netinet/in_mcast.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/netinet/in_mcast.c')
-rw-r--r--sys/netinet/in_mcast.c2413
1 files changed, 1730 insertions, 683 deletions
diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c
index 7d9aecb..4ffabbd 100644
--- a/sys/netinet/in_mcast.c
+++ b/sys/netinet/in_mcast.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2007 Bruce M. Simpson.
+ * Copyright (c) 2007-2009 Bruce Simpson.
* Copyright (c) 2005 Robert N. M. Watson.
* All rights reserved.
*
@@ -30,10 +30,6 @@
/*
* IPv4 multicast socket, group, and socket option processing module.
- * Until further notice, this file requires INET to compile.
- * TODO: Make this infrastructure independent of address family.
- * TODO: Teach netinet6 to use this code.
- * TODO: Hook up SSM logic to IGMPv3/MLDv2.
*/
#include <sys/cdefs.h>
@@ -49,8 +45,11 @@ __FBSDID("$FreeBSD$");
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <sys/protosw.h>
#include <sys/sysctl.h>
#include <sys/vimage.h>
+#include <sys/ktr.h>
+#include <sys/tree.h>
#include <net/if.h>
#include <net/if_dl.h>
@@ -65,69 +64,164 @@ __FBSDID("$FreeBSD$");
#include <netinet/igmp_var.h>
#include <netinet/vinet.h>
+#ifndef KTR_IGMPV3
+#define KTR_IGMPV3 KTR_SUBSYS
+#endif
+
#ifndef __SOCKUNION_DECLARED
union sockunion {
struct sockaddr_storage ss;
struct sockaddr sa;
struct sockaddr_dl sdl;
struct sockaddr_in sin;
-#ifdef INET6
- struct sockaddr_in6 sin6;
-#endif
};
typedef union sockunion sockunion_t;
#define __SOCKUNION_DECLARED
#endif /* __SOCKUNION_DECLARED */
+static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
+ "IPv4 multicast PCB-layer source filter");
static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
-static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
+static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
+ "IPv4 multicast IGMP-layer source filter");
-/*
- * The IPv4 multicast list (in_multihead and associated structures) are
- * protected by the global in_multi_mtx. See in_var.h for more details. For
- * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
- * ip_output() to send IGMP packets while holding the lock; this probably is
- * not quite desirable.
- */
#ifdef VIMAGE_GLOBALS
-struct in_multihead in_multihead; /* XXX BSS initialization */
+struct in_multihead in_multihead; /* XXX now unused; retain for ABI */
#endif
+
+/*
+ * Locking:
+ * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
+ * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
+ * it can be taken by code in net/if.c also.
+ * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
+ *
+ * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
+ * any need for in_multi itself to be virtualized -- it is bound to an ifp
+ * anyway no matter what happens.
+ */
struct mtx in_multi_mtx;
-MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
+MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
/*
* Functions with non-static linkage defined in this file should be
* declared in in_var.h:
- * imo_match_group()
- * imo_match_source()
+ * imo_multi_filter()
* in_addmulti()
* in_delmulti()
- * in_delmulti_locked()
+ * in_joingroup()
+ * in_joingroup_locked()
+ * in_leavegroup()
+ * in_leavegroup_locked()
* and ip_var.h:
* inp_freemoptions()
* inp_getmoptions()
* inp_setmoptions()
+ *
+ * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
+ * and in_delmulti().
*/
+static void imf_commit(struct in_mfilter *);
+static int imf_get_source(struct in_mfilter *imf,
+ const struct sockaddr_in *psin,
+ struct in_msource **);
+static struct in_msource *
+ imf_graft(struct in_mfilter *, const uint8_t,
+ const struct sockaddr_in *);
+static void imf_leave(struct in_mfilter *);
+static int imf_prune(struct in_mfilter *, const struct sockaddr_in *);
+static void imf_purge(struct in_mfilter *);
+static void imf_rollback(struct in_mfilter *);
+static void imf_reap(struct in_mfilter *);
static int imo_grow(struct ip_moptions *);
-static int imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
-static int imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
-static int inp_change_source_filter(struct inpcb *, struct sockopt *);
+static size_t imo_match_group(const struct ip_moptions *,
+ const struct ifnet *, const struct sockaddr *);
+static struct in_msource *
+ imo_match_source(const struct ip_moptions *, const size_t,
+ const struct sockaddr *);
+static void ims_merge(struct ip_msource *ims,
+ const struct in_msource *lims, const int rollback);
+static int in_getmulti(struct ifnet *, const struct in_addr *,
+ struct in_multi **);
+static int inm_get_source(struct in_multi *inm, const in_addr_t haddr,
+ const int noalloc, struct ip_msource **pims);
+static int inm_is_ifp_detached(const struct in_multi *);
+static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
+static void inm_purge(struct in_multi *);
+static void inm_reap(struct in_multi *);
static struct ip_moptions *
inp_findmoptions(struct inpcb *);
static int inp_get_source_filters(struct inpcb *, struct sockopt *);
static int inp_join_group(struct inpcb *, struct sockopt *);
static int inp_leave_group(struct inpcb *, struct sockopt *);
+static struct ifnet *
+ inp_lookup_mcast_ifp(const struct inpcb *,
+ const struct sockaddr_in *, const struct in_addr);
+static int inp_block_unblock_source(struct inpcb *, struct sockopt *);
static int inp_set_multicast_if(struct inpcb *, struct sockopt *);
static int inp_set_source_filters(struct inpcb *, struct sockopt *);
+static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv4 multicast");
+static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
+SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
+ CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0,
+ "Max source filters per group");
+TUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc);
+
+static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
+SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
+ CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0,
+ "Max source filters per socket");
+TUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc);
+
int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
&in_mcast_loop, 0, "Loopback multicast datagrams by default");
TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
+SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
+ CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
+ "Per-interface stack-wide source filters");
+
+/*
+ * Inline function which wraps assertions for a valid ifp.
+ * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
+ * is detached.
+ */
+static int __inline
+inm_is_ifp_detached(const struct in_multi *inm)
+{
+ struct ifnet *ifp;
+
+ KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
+ ifp = inm->inm_ifma->ifma_ifp;
+ if (ifp != NULL) {
+ /*
+ * Sanity check that netinet's notion of ifp is the
+ * same as net's.
+ */
+ KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
+ }
+
+ return (ifp == NULL);
+}
+
+/*
+ * Initialize an in_mfilter structure to a known state at t0, t1
+ * with an empty source filter list.
+ */
+static __inline void
+imf_init(struct in_mfilter *imf, const int st0, const int st1)
+{
+ memset(imf, 0, sizeof(struct in_mfilter));
+ RB_INIT(&imf->imf_sources);
+ imf->imf_st[0] = st0;
+ imf->imf_st[1] = st1;
+}
+
/*
* Resize the ip_moptions vector to the next power-of-two minus 1.
* May be called with locks held; do not sleep.
@@ -154,13 +248,12 @@ imo_grow(struct ip_moptions *imo)
nmships = (struct in_multi **)realloc(omships,
sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
nmfilters = (struct in_mfilter *)realloc(omfilters,
- sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
+ sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
if (nmships != NULL && nmfilters != NULL) {
/* Initialize newly allocated source filter heads. */
for (idx = oldmax; idx < newmax; idx++) {
- nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
- nmfilters[idx].imf_nsources = 0;
- TAILQ_INIT(&nmfilters[idx].imf_sources);
+ imf_init(&nmfilters[idx], MCAST_UNDEFINED,
+ MCAST_EXCLUDE);
}
imo->imo_max_memberships = newmax;
imo->imo_membership = nmships;
@@ -172,7 +265,7 @@ imo_grow(struct ip_moptions *imo)
if (nmships != NULL)
free(nmships, M_IPMOPTS);
if (nmfilters != NULL)
- free(nmfilters, M_IPMSOURCE);
+ free(nmfilters, M_INMFILTER);
return (ETOOMANYREFS);
}
@@ -180,80 +273,20 @@ imo_grow(struct ip_moptions *imo)
}
/*
- * Add a source to a multicast filter list.
- * Assumes the associated inpcb is locked.
- */
-static int
-imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
-{
- struct in_msource *ims, *nims;
- struct in_mfilter *imf;
-
- KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
- KASSERT(imo->imo_mfilters != NULL,
- ("%s: imo_mfilters vector not allocated", __func__));
-
- imf = &imo->imo_mfilters[gidx];
- if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
- return (ENOBUFS);
-
- ims = imo_match_source(imo, gidx, &src->sa);
- if (ims != NULL)
- return (EADDRNOTAVAIL);
-
- /* Do not sleep with inp lock held. */
- nims = malloc(sizeof(struct in_msource),
- M_IPMSOURCE, M_NOWAIT | M_ZERO);
- if (nims == NULL)
- return (ENOBUFS);
-
- nims->ims_addr = src->ss;
- TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
- imf->imf_nsources++;
-
- return (0);
-}
-
-static int
-imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
-{
- struct in_msource *ims;
- struct in_mfilter *imf;
-
- KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
- KASSERT(imo->imo_mfilters != NULL,
- ("%s: imo_mfilters vector not allocated", __func__));
-
- imf = &imo->imo_mfilters[gidx];
- if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
- return (ENOBUFS);
-
- ims = imo_match_source(imo, gidx, &src->sa);
- if (ims == NULL)
- return (EADDRNOTAVAIL);
-
- TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
- free(ims, M_IPMSOURCE);
- imf->imf_nsources--;
-
- return (0);
-}
-
-/*
* Find an IPv4 multicast group entry for this ip_moptions instance
* which matches the specified group, and optionally an interface.
* Return its index into the array, or -1 if not found.
*/
-size_t
-imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
- struct sockaddr *group)
+static size_t
+imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
+ const struct sockaddr *group)
{
- sockunion_t *gsa;
+ const struct sockaddr_in *gsin;
struct in_multi **pinm;
int idx;
int nmships;
- gsa = (sockunion_t *)group;
+ gsin = (const struct sockaddr_in *)group;
/* The imo_membership array may be lazy allocated. */
if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
@@ -264,14 +297,8 @@ imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
for (idx = 0; idx < nmships; idx++, pinm++) {
if (*pinm == NULL)
continue;
-#if 0
- printf("%s: trying ifp = %p, inaddr = %s ", __func__,
- ifp, inet_ntoa(gsa->sin.sin_addr));
- printf("against %p, %s\n",
- (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
-#endif
if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
- (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
+ in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
break;
}
}
@@ -282,14 +309,20 @@ imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
}
/*
- * Find a multicast source entry for this imo which matches
+ * Find an IPv4 multicast source entry for this imo which matches
* the given group index for this socket, and source address.
+ *
+ * NOTE: This does not check if the entry is in-mode, merely if
+ * it exists, which may not be the desired behaviour.
*/
-struct in_msource *
-imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
+static struct in_msource *
+imo_match_source(const struct ip_moptions *imo, const size_t gidx,
+ const struct sockaddr *src)
{
+ struct ip_msource find;
struct in_mfilter *imf;
- struct in_msource *ims, *pims;
+ struct ip_msource *ims;
+ const sockunion_t *psa;
KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
@@ -298,41 +331,82 @@ imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
/* The imo_mfilters array may be lazy allocated. */
if (imo->imo_mfilters == NULL)
return (NULL);
-
- pims = NULL;
imf = &imo->imo_mfilters[gidx];
- TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
- /*
- * Perform bitwise comparison of two IPv4 addresses.
- * TODO: Do the same for IPv6.
- * Do not use sa_equal() for this as it is not aware of
- * deeper structure in sockaddr_in or sockaddr_in6.
- */
- if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
- ((struct sockaddr_in *)src)->sin_addr.s_addr) {
- pims = ims;
- break;
- }
- }
- return (pims);
+ /* Source trees are keyed in host byte order. */
+ psa = (const sockunion_t *)src;
+ find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
+ ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
+
+ return ((struct in_msource *)ims);
}
/*
- * Join an IPv4 multicast group.
+ * Perform filtering for multicast datagrams on a socket by group and source.
+ *
+ * Returns 0 if a datagram should be allowed through, or various error codes
+ * if the socket was not a member of the group, or the source was muted, etc.
*/
-struct in_multi *
-in_addmulti(struct in_addr *ap, struct ifnet *ifp)
+int
+imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
+ const struct sockaddr *group, const struct sockaddr *src)
{
- INIT_VNET_INET(ifp->if_vnet);
- struct in_multi *inm;
+ size_t gidx;
+ struct in_msource *ims;
+ int mode;
- inm = NULL;
+ KASSERT(ifp != NULL, ("%s: null ifp", __func__));
- IFF_LOCKGIANT(ifp);
- IN_MULTI_LOCK();
+ gidx = imo_match_group(imo, ifp, group);
+ if (gidx == -1)
+ return (MCAST_NOTGMEMBER);
+
+ /*
+ * Check if the source was included in an (S,G) join.
+ * Allow reception on exclusive memberships by default,
+ * reject reception on inclusive memberships by default.
+ * Exclude source only if an in-mode exclude filter exists.
+ * Include source only if an in-mode include filter exists.
+ * NOTE: We are comparing group state here at IGMP t1 (now)
+ * with socket-layer t0 (since last downcall).
+ */
+ mode = imo->imo_mfilters[gidx].imf_st[1];
+ ims = imo_match_source(imo, gidx, src);
+
+ if ((ims == NULL && mode == MCAST_INCLUDE) ||
+ (ims != NULL && ims->imsl_st[0] != mode))
+ return (MCAST_NOTSMEMBER);
+
+ return (MCAST_PASS);
+}
+
+/*
+ * Find and return a reference to an in_multi record for (ifp, group),
+ * and bump its reference count.
+ * If one does not exist, try to allocate it, and update link-layer multicast
+ * filters on ifp to listen for group.
+ * Assumes the IN_MULTI lock is held across the call.
+ * Return 0 if successful, otherwise return an appropriate error code.
+ */
+static int
+in_getmulti(struct ifnet *ifp, const struct in_addr *group,
+ struct in_multi **pinm)
+{
+ INIT_VNET_INET(ifp->if_vnet);
+ struct sockaddr_in gsin;
+ struct ifmultiaddr *ifma;
+ struct in_ifinfo *ii;
+ struct in_multi *inm;
+ int error;
+
+#if defined(INVARIANTS) && defined(IFF_ASSERTGIANT)
+ IFF_ASSERTGIANT(ifp);
+#endif
+ IN_MULTI_LOCK_ASSERT();
- IN_LOOKUP_MULTI(*ap, ifp, inm);
+ ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
+
+ inm = inm_lookup(ifp, *group);
if (inm != NULL) {
/*
* If we already joined this group, just bump the
@@ -341,141 +415,900 @@ in_addmulti(struct in_addr *ap, struct ifnet *ifp)
KASSERT(inm->inm_refcount >= 1,
("%s: bad refcount %d", __func__, inm->inm_refcount));
++inm->inm_refcount;
- } else do {
- sockunion_t gsa;
- struct ifmultiaddr *ifma;
- struct in_multi *ninm;
- int error;
+ *pinm = inm;
+ return (0);
+ }
- memset(&gsa, 0, sizeof(gsa));
- gsa.sin.sin_family = AF_INET;
- gsa.sin.sin_len = sizeof(struct sockaddr_in);
- gsa.sin.sin_addr = *ap;
+ memset(&gsin, 0, sizeof(gsin));
+ gsin.sin_family = AF_INET;
+ gsin.sin_len = sizeof(struct sockaddr_in);
+ gsin.sin_addr = *group;
- /*
- * Check if a link-layer group is already associated
- * with this network-layer group on the given ifnet.
- * If so, bump the refcount on the existing network-layer
- * group association and return it.
- */
- error = if_addmulti(ifp, &gsa.sa, &ifma);
- if (error)
- break;
- if (ifma->ifma_protospec != NULL) {
- inm = (struct in_multi *)ifma->ifma_protospec;
+ /*
+ * Check if a link-layer group is already associated
+ * with this network-layer group on the given ifnet.
+ */
+ error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
+ if (error != 0)
+ return (error);
+
+ /*
+ * If something other than netinet is occupying the link-layer
+ * group, print a meaningful error message and back out of
+ * the allocation.
+ * Otherwise, bump the refcount on the existing network-layer
+ * group association and return it.
+ */
+ if (ifma->ifma_protospec != NULL) {
+ inm = (struct in_multi *)ifma->ifma_protospec;
#ifdef INVARIANTS
- if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
- inm->inm_addr.s_addr != ap->s_addr)
- panic("%s: ifma is inconsistent", __func__);
+ KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
+ __func__));
+ KASSERT(ifma->ifma_addr->sa_family == AF_INET,
+ ("%s: ifma not AF_INET", __func__));
+ KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
+ if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
+ !in_hosteq(inm->inm_addr, *group))
+ panic("%s: ifma %p is inconsistent with %p (%s)",
+ __func__, ifma, inm, inet_ntoa(*group));
#endif
- ++inm->inm_refcount;
- break;
+ ++inm->inm_refcount;
+ *pinm = inm;
+ return (0);
+ }
+
+ /*
+ * A new in_multi record is needed; allocate and initialize it.
+ * We DO NOT perform an IGMP join as the in_ layer may need to
+ * push an initial source list down to IGMP to support SSM.
+ *
+ * The initial source filter state is INCLUDE, {} as per the RFC.
+ */
+ inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
+ if (inm == NULL) {
+ if_delmulti_ifma(ifma);
+ return (ENOMEM);
+ }
+ inm->inm_addr = *group;
+ inm->inm_ifp = ifp;
+ inm->inm_igi = ii->ii_igmp;
+ inm->inm_ifma = ifma;
+ inm->inm_refcount = 1;
+ inm->inm_state = IGMP_NOT_MEMBER;
+
+ /*
+ * Pending state-changes per group are subject to a bounds check.
+ */
+ IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
+
+ inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
+ inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
+ RB_INIT(&inm->inm_srcs);
+
+ ifma->ifma_protospec = inm;
+
+ *pinm = inm;
+
+ return (0);
+}
+
+/*
+ * Drop a reference to an in_multi record.
+ *
+ * If the refcount drops to 0, free the in_multi record and
+ * delete the underlying link-layer membership.
+ */
+void
+inm_release_locked(struct in_multi *inm)
+{
+ struct ifmultiaddr *ifma;
+
+#if defined(INVARIANTS) && defined(IFF_ASSERTGIANT)
+ if (!inm_is_ifp_detached(inm))
+ IFF_ASSERTGIANT(ifp);
+#endif
+
+ IN_MULTI_LOCK_ASSERT();
+
+ CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
+
+ if (--inm->inm_refcount > 0) {
+ CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
+ inm->inm_refcount);
+ return;
+ }
+
+ CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
+
+ ifma = inm->inm_ifma;
+
+ CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
+ KASSERT(ifma->ifma_protospec == inm,
+ ("%s: ifma_protospec != inm", __func__));
+ ifma->ifma_protospec = NULL;
+
+ inm_purge(inm);
+
+ free(inm, M_IPMADDR);
+
+ if_delmulti_ifma(ifma);
+}
+
+/*
+ * Clear recorded source entries for a group.
+ * Used by the IGMP code. Caller must hold the IN_MULTI lock.
+ * FIXME: Should reap.
+ */
+void
+inm_clear_recorded(struct in_multi *inm)
+{
+ struct ip_msource *ims;
+
+ IN_MULTI_LOCK_ASSERT();
+
+ RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
+ if (ims->ims_stp) {
+ ims->ims_stp = 0;
+ --inm->inm_st[1].iss_rec;
}
+ }
+ KASSERT(inm->inm_st[1].iss_rec == 0,
+ ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
+}
- /*
- * A new membership is needed; construct it and
- * perform the IGMP join.
- */
- ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
- if (ninm == NULL) {
- if_delmulti_ifma(ifma);
+/*
+ * Record a source as pending for a Source-Group IGMPv3 query.
+ * This lives here as it modifies the shared tree.
+ *
+ * inm is the group descriptor.
+ * naddr is the address of the source to record in network-byte order.
+ *
+ * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
+ * lazy-allocate a source node in response to an SG query.
+ * Otherwise, no allocation is performed. This saves some memory
+ * with the trade-off that the source will not be reported to the
+ * router if joined in the window between the query response and
+ * the group actually being joined on the local host.
+ *
+ * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
+ * This turns off the allocation of a recorded source entry if
+ * the group has not been joined.
+ *
+ * Return 0 if the source didn't exist or was already marked as recorded.
+ * Return 1 if the source was marked as recorded by this function.
+ * Return <0 if any error occured (negated errno code).
+ */
+int
+inm_record_source(struct in_multi *inm, const in_addr_t naddr)
+{
+ struct ip_msource find;
+ struct ip_msource *ims, *nims;
+
+ IN_MULTI_LOCK_ASSERT();
+
+ find.ims_haddr = ntohl(naddr);
+ ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
+ if (ims && ims->ims_stp)
+ return (0);
+ if (ims == NULL) {
+ if (inm->inm_nsrc == in_mcast_maxgrpsrc)
+ return (-ENOSPC);
+ nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
+ M_NOWAIT | M_ZERO);
+ if (nims == NULL)
+ return (-ENOMEM);
+ nims->ims_haddr = find.ims_haddr;
+ RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
+ ++inm->inm_nsrc;
+ ims = nims;
+ }
+
+ /*
+ * Mark the source as recorded and update the recorded
+ * source count.
+ */
+ ++ims->ims_stp;
+ ++inm->inm_st[1].iss_rec;
+
+ return (1);
+}
+
+/*
+ * Return a pointer to an in_msource owned by an in_mfilter,
+ * given its source address.
+ * Lazy-allocate if needed. If this is a new entry its filter state is
+ * undefined at t0.
+ *
+ * imf is the filter set being modified.
+ * haddr is the source address in *host* byte-order.
+ *
+ * SMPng: May be called with locks held; malloc must not block.
+ */
+static int
+imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
+ struct in_msource **plims)
+{
+ struct ip_msource find;
+ struct ip_msource *ims, *nims;
+ struct in_msource *lims;
+ int error;
+
+ error = 0;
+ ims = NULL;
+ lims = NULL;
+
+ /* key is host byte order */
+ find.ims_haddr = ntohl(psin->sin_addr.s_addr);
+ ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
+ lims = (struct in_msource *)ims;
+ if (lims == NULL) {
+ if (imf->imf_nsrc == in_mcast_maxsocksrc)
+ return (ENOSPC);
+ nims = malloc(sizeof(struct in_msource), M_INMFILTER,
+ M_NOWAIT | M_ZERO);
+ if (nims == NULL)
+ return (ENOMEM);
+ lims = (struct in_msource *)nims;
+ lims->ims_haddr = find.ims_haddr;
+ lims->imsl_st[0] = MCAST_UNDEFINED;
+ RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
+ ++imf->imf_nsrc;
+ }
+
+ *plims = lims;
+
+ return (error);
+}
+
+/*
+ * Graft a source entry into an existing socket-layer filter set,
+ * maintaining any required invariants and checking allocations.
+ *
+ * The source is marked as being in the new filter mode at t1.
+ *
+ * Return the pointer to the new node, otherwise return NULL.
+ */
+static struct in_msource *
+imf_graft(struct in_mfilter *imf, const uint8_t st1,
+ const struct sockaddr_in *psin)
+{
+ struct ip_msource *nims;
+ struct in_msource *lims;
+
+ nims = malloc(sizeof(struct in_msource), M_INMFILTER,
+ M_NOWAIT | M_ZERO);
+ if (nims == NULL)
+ return (NULL);
+ lims = (struct in_msource *)nims;
+ lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
+ lims->imsl_st[0] = MCAST_UNDEFINED;
+ lims->imsl_st[1] = st1;
+ RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
+ ++imf->imf_nsrc;
+
+ return (lims);
+}
+
+/*
+ * Prune a source entry from an existing socket-layer filter set,
+ * maintaining any required invariants and checking allocations.
+ *
+ * The source is marked as being left at t1, it is not freed.
+ *
+ * Return 0 if no error occurred, otherwise return an errno value.
+ */
+static int
+imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
+{
+ struct ip_msource find;
+ struct ip_msource *ims;
+ struct in_msource *lims;
+
+ /* key is host byte order */
+ find.ims_haddr = ntohl(psin->sin_addr.s_addr);
+ ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
+ if (ims == NULL)
+ return (ENOENT);
+ lims = (struct in_msource *)ims;
+ lims->imsl_st[1] = MCAST_UNDEFINED;
+ return (0);
+}
+
+/*
+ * Revert socket-layer filter set deltas at t1 to t0 state.
+ */
+static void
+imf_rollback(struct in_mfilter *imf)
+{
+ struct ip_msource *ims, *tims;
+ struct in_msource *lims;
+
+ RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
+ lims = (struct in_msource *)ims;
+ if (lims->imsl_st[0] == lims->imsl_st[1]) {
+ /* no change at t1 */
+ continue;
+ } else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
+ /* revert change to existing source at t1 */
+ lims->imsl_st[1] = lims->imsl_st[0];
+ } else {
+ /* revert source added t1 */
+ CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
+ RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
+ free(ims, M_INMFILTER);
+ imf->imf_nsrc--;
+ }
+ }
+ imf->imf_st[1] = imf->imf_st[0];
+}
+
+/*
+ * Mark socket-layer filter set as INCLUDE {} at t1.
+ */
+static void
+imf_leave(struct in_mfilter *imf)
+{
+ struct ip_msource *ims;
+ struct in_msource *lims;
+
+ RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+ lims = (struct in_msource *)ims;
+ lims->imsl_st[1] = MCAST_UNDEFINED;
+ }
+ imf->imf_st[1] = MCAST_INCLUDE;
+}
+
+/*
+ * Mark socket-layer filter set deltas as committed.
+ */
+static void
+imf_commit(struct in_mfilter *imf)
+{
+ struct ip_msource *ims;
+ struct in_msource *lims;
+
+ RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+ lims = (struct in_msource *)ims;
+ lims->imsl_st[0] = lims->imsl_st[1];
+ }
+ imf->imf_st[0] = imf->imf_st[1];
+}
+
+/*
+ * Reap unreferenced sources from socket-layer filter set.
+ */
+static void
+imf_reap(struct in_mfilter *imf)
+{
+ struct ip_msource *ims, *tims;
+ struct in_msource *lims;
+
+ RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
+ lims = (struct in_msource *)ims;
+ if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
+ (lims->imsl_st[1] == MCAST_UNDEFINED)) {
+ CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
+ RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
+ free(ims, M_INMFILTER);
+ imf->imf_nsrc--;
+ }
+ }
+}
+
+/*
+ * Purge socket-layer filter set.
+ */
+static void
+imf_purge(struct in_mfilter *imf)
+{
+ struct ip_msource *ims, *tims;
+
+ RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
+ CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
+ RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
+ free(ims, M_INMFILTER);
+ imf->imf_nsrc--;
+ }
+ imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
+ KASSERT(RB_EMPTY(&imf->imf_sources),
+ ("%s: imf_sources not empty", __func__));
+}
+
+/*
+ * Look up a source filter entry for a multicast group.
+ *
+ * inm is the group descriptor to work with.
+ * haddr is the host-byte-order IPv4 address to look up.
+ * noalloc may be non-zero to suppress allocation of sources.
+ * *pims will be set to the address of the retrieved or allocated source.
+ *
+ * SMPng: NOTE: may be called with locks held.
+ * Return 0 if successful, otherwise return a non-zero error code.
+ */
+static int
+inm_get_source(struct in_multi *inm, const in_addr_t haddr,
+ const int noalloc, struct ip_msource **pims)
+{
+ struct ip_msource find;
+ struct ip_msource *ims, *nims;
+#ifdef KTR
+ struct in_addr ia;
+#endif
+
+ find.ims_haddr = haddr;
+ ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
+ if (ims == NULL && !noalloc) {
+ if (inm->inm_nsrc == in_mcast_maxgrpsrc)
+ return (ENOSPC);
+ nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
+ M_NOWAIT | M_ZERO);
+ if (nims == NULL)
+ return (ENOMEM);
+ nims->ims_haddr = haddr;
+ RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
+ ++inm->inm_nsrc;
+ ims = nims;
+#ifdef KTR
+ ia.s_addr = htonl(haddr);
+ CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__,
+ inet_ntoa(ia), ims);
+#endif
+ }
+
+ *pims = ims;
+ return (0);
+}
+
+/*
+ * Merge socket-layer source into IGMP-layer source.
+ * If rollback is non-zero, perform the inverse of the merge.
+ */
+static void
+ims_merge(struct ip_msource *ims, const struct in_msource *lims,
+ const int rollback)
+{
+ int n = rollback ? -1 : 1;
+#ifdef KTR
+ struct in_addr ia;
+
+ ia.s_addr = htonl(ims->ims_haddr);
+#endif
+
+ if (lims->imsl_st[0] == MCAST_EXCLUDE) {
+ CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s",
+ __func__, n, inet_ntoa(ia));
+ ims->ims_st[1].ex -= n;
+ } else if (lims->imsl_st[0] == MCAST_INCLUDE) {
+ CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s",
+ __func__, n, inet_ntoa(ia));
+ ims->ims_st[1].in -= n;
+ }
+
+ if (lims->imsl_st[1] == MCAST_EXCLUDE) {
+ CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s",
+ __func__, n, inet_ntoa(ia));
+ ims->ims_st[1].ex += n;
+ } else if (lims->imsl_st[1] == MCAST_INCLUDE) {
+ CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s",
+ __func__, n, inet_ntoa(ia));
+ ims->ims_st[1].in += n;
+ }
+}
+
+/*
+ * Atomically update the global in_multi state, when a membership's
+ * filter list is being updated in any way.
+ *
+ * imf is the per-inpcb-membership group filter pointer.
+ * A fake imf may be passed for in-kernel consumers.
+ *
+ * XXX This is a candidate for a set-symmetric-difference style loop
+ * which would eliminate the repeated lookup from root of ims nodes,
+ * as they share the same key space.
+ *
+ * If any error occurred this function will back out of refcounts
+ * and return a non-zero value.
+ */
+static int
+inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
+{
+ struct ip_msource *ims, *nims;
+ struct in_msource *lims;
+ int schanged, error;
+ int nsrc0, nsrc1;
+
+ schanged = 0;
+ error = 0;
+ nsrc1 = nsrc0 = 0;
+
+ /*
+ * Update the source filters first, as this may fail.
+ * Maintain count of in-mode filters at t0, t1. These are
+ * used to work out if we transition into ASM mode or not.
+ * Maintain a count of source filters whose state was
+ * actually modified by this operation.
+ */
+ RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+ lims = (struct in_msource *)ims;
+ if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
+ if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
+ if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
+ error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
+ ++schanged;
+ if (error)
break;
+ ims_merge(nims, lims, 0);
+ }
+ if (error) {
+ struct ip_msource *bims;
+
+ RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
+ lims = (struct in_msource *)ims;
+ if (lims->imsl_st[0] == lims->imsl_st[1])
+ continue;
+ (void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
+ if (bims == NULL)
+ continue;
+ ims_merge(bims, lims, 1);
+ }
+ goto out_reap;
+ }
+
+ CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
+ __func__, nsrc0, nsrc1);
+
+ /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
+ if (imf->imf_st[0] == imf->imf_st[1] &&
+ imf->imf_st[1] == MCAST_INCLUDE) {
+ if (nsrc1 == 0) {
+ CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
+ --inm->inm_st[1].iss_in;
}
- ninm->inm_addr = *ap;
- ninm->inm_ifp = ifp;
- ninm->inm_ifma = ifma;
- ninm->inm_refcount = 1;
- ifma->ifma_protospec = ninm;
- LIST_INSERT_HEAD(&V_in_multihead, ninm, inm_link);
+ }
+
+ /* Handle filter mode transition on socket. */
+ if (imf->imf_st[0] != imf->imf_st[1]) {
+ CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
+ __func__, imf->imf_st[0], imf->imf_st[1]);
+
+ if (imf->imf_st[0] == MCAST_EXCLUDE) {
+ CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
+ --inm->inm_st[1].iss_ex;
+ } else if (imf->imf_st[0] == MCAST_INCLUDE) {
+ CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
+ --inm->inm_st[1].iss_in;
+ }
+
+ if (imf->imf_st[1] == MCAST_EXCLUDE) {
+ CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
+ inm->inm_st[1].iss_ex++;
+ } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
+ CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
+ inm->inm_st[1].iss_in++;
+ }
+ }
+
+ /*
+ * Track inm filter state in terms of listener counts.
+ * If there are any exclusive listeners, stack-wide
+ * membership is exclusive.
+ * Otherwise, if only inclusive listeners, stack-wide is inclusive.
+ * If no listeners remain, state is undefined at t1,
+ * and the IGMP lifecycle for this group should finish.
+ */
+ if (inm->inm_st[1].iss_ex > 0) {
+ CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
+ inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
+ } else if (inm->inm_st[1].iss_in > 0) {
+ CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
+ inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
+ } else {
+ CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
+ inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
+ }
+
+ /* Decrement ASM listener count on transition out of ASM mode. */
+ if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
+ if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
+ (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
+ CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
+ --inm->inm_st[1].iss_asm;
+ }
+
+ /* Increment ASM listener count on transition to ASM mode. */
+ if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
+ CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
+ inm->inm_st[1].iss_asm++;
+ }
+
+ CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
+ inm_print(inm);
+
+out_reap:
+ if (schanged > 0) {
+ CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
+ inm_reap(inm);
+ }
+ return (error);
+}
+
+/*
+ * Mark an in_multi's filter set deltas as committed.
+ * Called by IGMP after a state change has been enqueued.
+ */
+void
+inm_commit(struct in_multi *inm)
+{
+ struct ip_msource *ims;
- igmp_joingroup(ninm);
+ CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
+ CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
+ inm_print(inm);
- inm = ninm;
- } while (0);
+ RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
+ ims->ims_st[0] = ims->ims_st[1];
+ }
+ inm->inm_st[0] = inm->inm_st[1];
+}
+/*
+ * Reap unreferenced nodes from an in_multi's filter set.
+ */
+static void
+inm_reap(struct in_multi *inm)
+{
+ struct ip_msource *ims, *tims;
+
+ RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
+ if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
+ ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
+ ims->ims_stp != 0)
+ continue;
+ CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
+ RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
+ free(ims, M_IPMSOURCE);
+ inm->inm_nsrc--;
+ }
+}
+
+/*
+ * Purge all source nodes from an in_multi's filter set.
+ */
+static void
+inm_purge(struct in_multi *inm)
+{
+ struct ip_msource *ims, *tims;
+
+ RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
+ CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
+ RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
+ free(ims, M_IPMSOURCE);
+ inm->inm_nsrc--;
+ }
+}
+
+/*
+ * Join a multicast group; unlocked entry point.
+ *
+ * SMPng: XXX: in_joingroup() is called from in_control() when Giant
+ * is not held. Fortunately, ifp is unlikely to have been detached
+ * at this point, so we assume it's OK to recurse.
+ */
+int
+in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
+ /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
+{
+ int error;
+
+ IFF_LOCKGIANT(ifp);
+ IN_MULTI_LOCK();
+ error = in_joingroup_locked(ifp, gina, imf, pinm);
IN_MULTI_UNLOCK();
IFF_UNLOCKGIANT(ifp);
- return (inm);
+ return (error);
}
/*
- * Leave an IPv4 multicast group.
- * It is OK to call this routine if the underlying ifnet went away.
+ * Join a multicast group; real entry point.
*
- * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
- * will set ifma_ifp to NULL when the associated ifnet instance is detached
- * from the system.
+ * Only preserves atomicity at inm level.
+ * NOTE: imf argument cannot be const due to sys/tree.h limitations.
*
- * The only reason we need to violate layers and check ifma_ifp here at all
- * is because certain hardware drivers still require Giant to be held,
- * and it must always be taken before other locks.
+ * If the IGMP downcall fails, the group is not joined, and an error
+ * code is returned.
*/
-void
-in_delmulti(struct in_multi *inm)
+int
+in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
+ /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
{
- struct ifnet *ifp;
+ struct in_mfilter timf;
+ struct in_multi *inm;
+ int error;
- KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
- KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
- ifp = inm->inm_ifma->ifma_ifp;
+ IN_MULTI_LOCK_ASSERT();
- if (ifp != NULL) {
- /*
- * Sanity check that netinet's notion of ifp is the
- * same as net's.
- */
- KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
- IFF_LOCKGIANT(ifp);
+ CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__,
+ inet_ntoa(*gina), ifp, ifp->if_xname);
+
+ error = 0;
+ inm = NULL;
+
+ /*
+ * If no imf was specified (i.e. kernel consumer),
+ * fake one up and assume it is an ASM join.
+ */
+ if (imf == NULL) {
+ imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
+ imf = &timf;
+ }
+
+ error = in_getmulti(ifp, gina, &inm);
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
+ return (error);
+ }
+
+ CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+ error = inm_merge(inm, imf);
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
+ goto out_inm_release;
+ }
+
+ CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+ error = igmp_change_state(inm);
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
+ goto out_inm_release;
+ }
+
+out_inm_release:
+ if (error) {
+ CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
+ inm_release_locked(inm);
+ } else {
+ *pinm = inm;
}
+ return (error);
+}
+
+/*
+ * Leave a multicast group; unlocked entry point.
+ */
+int
+in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
+{
+ struct ifnet *ifp;
+ int detached, error;
+
+ detached = inm_is_ifp_detached(inm);
+ ifp = inm->inm_ifp;
+ if (!detached)
+ IFF_LOCKGIANT(ifp);
+
IN_MULTI_LOCK();
- in_delmulti_locked(inm);
+ error = in_leavegroup_locked(inm, imf);
IN_MULTI_UNLOCK();
- if (ifp != NULL)
+ if (!detached)
IFF_UNLOCKGIANT(ifp);
+
+ return (error);
}
/*
- * Delete a multicast address record, with locks held.
+ * Leave a multicast group; real entry point.
+ * All source filters will be expunged.
+ *
+ * Only preserves atomicity at inm level.
*
- * It is OK to call this routine if the ifp went away.
- * Assumes that caller holds the IN_MULTI lock, and that
- * Giant was taken before other locks if required by the hardware.
+ * Holding the write lock for the INP which contains imf
+ * is highly advisable. We can't assert for it as imf does not
+ * contain a back-pointer to the owning inp.
+ *
+ * Note: This is not the same as inm_release(*) as this function also
+ * makes a state change downcall into IGMP.
*/
-void
-in_delmulti_locked(struct in_multi *inm)
+int
+in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
{
- struct ifmultiaddr *ifma;
-
- IN_MULTI_LOCK_ASSERT();
- KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
+ struct in_mfilter timf;
+ int error;
- if (--inm->inm_refcount == 0) {
- igmp_leavegroup(inm);
+ error = 0;
- ifma = inm->inm_ifma;
-#ifdef DIAGNOSTIC
- if (bootverbose)
- printf("%s: purging ifma %p\n", __func__, ifma);
+#if defined(INVARIANTS) && defined(IFF_ASSERTGIANT)
+ if (!inm_is_ifp_detached(inm))
+ IFF_ASSERTGIANT(inm->inm_ifp);
#endif
- KASSERT(ifma->ifma_protospec == inm,
- ("%s: ifma_protospec != inm", __func__));
- ifma->ifma_protospec = NULL;
- LIST_REMOVE(inm, inm_link);
- free(inm, M_IPMADDR);
+ IN_MULTI_LOCK_ASSERT();
- if_delmulti_ifma(ifma);
+ CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__,
+ inm, inet_ntoa(inm->inm_addr),
+ (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
+ imf);
+
+ /*
+ * If no imf was specified (i.e. kernel consumer),
+ * fake one up and assume it is an ASM join.
+ */
+ if (imf == NULL) {
+ imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
+ imf = &timf;
}
+
+ /*
+ * Begin state merge transaction at IGMP layer.
+ *
+ * As this particular invocation should not cause any memory
+ * to be allocated, and there is no opportunity to roll back
+ * the transaction, it MUST NOT fail.
+ */
+ CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+ error = inm_merge(inm, imf);
+ KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
+
+ CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+ error = igmp_change_state(inm);
+ if (error)
+ CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+
+ CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
+ inm_release_locked(inm);
+
+ return (error);
+}
+
+/*#ifndef BURN_BRIDGES*/
+/*
+ * Join an IPv4 multicast group in (*,G) exclusive mode.
+ * The group must be a 224.0.0.0/24 link-scope group.
+ * This KPI is for legacy kernel consumers only.
+ */
+struct in_multi *
+in_addmulti(struct in_addr *ap, struct ifnet *ifp)
+{
+ struct in_multi *pinm;
+ int error;
+
+ KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
+ ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap)));
+
+ error = in_joingroup(ifp, ap, NULL, &pinm);
+ if (error != 0)
+ pinm = NULL;
+
+ return (pinm);
}
/*
- * Block or unblock an ASM/SSM multicast source on an inpcb.
+ * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
+ * This KPI is for legacy kernel consumers only.
+ */
+void
+in_delmulti(struct in_multi *inm)
+{
+
+ (void)in_leavegroup(inm, NULL);
+}
+/*#endif*/
+
+/*
+ * Block or unblock an ASM multicast source on an inpcb.
+ * This implements the delta-based API described in RFC 3678.
+ *
+ * The delta-based API applies only to exclusive-mode memberships.
+ * An IGMP downcall will be performed.
+ *
+ * SMPng: NOTE: Must take Giant as a join may create a new ifma.
+ *
+ * Return 0 if successful, otherwise return an appropriate error code.
*/
static int
-inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
+inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET(curvnet);
@@ -485,13 +1318,14 @@ inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
struct in_mfilter *imf;
struct ip_moptions *imo;
struct in_msource *ims;
+ struct in_multi *inm;
size_t idx;
- int error;
- int block;
+ uint16_t fmode;
+ int error, doblock;
ifp = NULL;
error = 0;
- block = 0;
+ doblock = 0;
memset(&gsr, 0, sizeof(struct group_source_req));
gsa = (sockunion_t *)&gsr.gsr_group;
@@ -516,18 +1350,14 @@ inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
ssa->sin.sin_len = sizeof(struct sockaddr_in);
ssa->sin.sin_addr = mreqs.imr_sourceaddr;
- if (mreqs.imr_interface.s_addr != INADDR_ANY)
+ if (!in_nullhost(mreqs.imr_interface))
INADDR_TO_IFP(mreqs.imr_interface, ifp);
if (sopt->sopt_name == IP_BLOCK_SOURCE)
- block = 1;
+ doblock = 1;
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: imr_interface = %s, ifp = %p\n",
- __func__, inet_ntoa(mreqs.imr_interface), ifp);
- }
-#endif
+ CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
+ __func__, inet_ntoa(mreqs.imr_interface), ifp);
break;
}
@@ -553,24 +1383,21 @@ inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
ifp = ifnet_byindex(gsr.gsr_interface);
if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
- block = 1;
+ doblock = 1;
break;
default:
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: unknown sopt_name %d\n", __func__,
- sopt->sopt_name);
- }
-#endif
+ CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
+ __func__, sopt->sopt_name);
return (EOPNOTSUPP);
break;
}
- /* XXX INET6 */
if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
return (EINVAL);
+ IFF_LOCKGIANT(ifp);
+
/*
* Check if we are actually a member of this group.
*/
@@ -578,103 +1405,97 @@ inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
idx = imo_match_group(imo, ifp, &gsa->sa);
if (idx == -1 || imo->imo_mfilters == NULL) {
error = EADDRNOTAVAIL;
- goto out_locked;
+ goto out_inp_locked;
}
KASSERT(imo->imo_mfilters != NULL,
("%s: imo_mfilters not allocated", __func__));
imf = &imo->imo_mfilters[idx];
+ inm = imo->imo_membership[idx];
/*
- * SSM multicast truth table for block/unblock operations.
- *
- * Operation Filter Mode Entry exists? Action
- *
- * block exclude no add source to filter
- * unblock include no add source to filter
- * block include no EINVAL
- * unblock exclude no EINVAL
- * block exclude yes EADDRNOTAVAIL
- * unblock include yes EADDRNOTAVAIL
- * block include yes remove source from filter
- * unblock exclude yes remove source from filter
- *
- * FreeBSD does not explicitly distinguish between ASM and SSM
- * mode sockets; all sockets are assumed to have a filter list.
+ * Attempting to use the delta-based API on an
+ * non exclusive-mode membership is an error.
*/
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: imf_fmode is %s\n", __func__,
- imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
+ fmode = imf->imf_st[0];
+ if (fmode != MCAST_EXCLUDE) {
+ error = EINVAL;
+ goto out_inp_locked;
}
-#endif
+
+ /*
+ * Deal with error cases up-front:
+ * Asked to block, but already blocked; or
+ * Asked to unblock, but nothing to unblock.
+ * If adding a new block entry, allocate it.
+ */
ims = imo_match_source(imo, idx, &ssa->sa);
- if (ims == NULL) {
- if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
- (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: adding %s to filter list\n",
- __func__, inet_ntoa(ssa->sin.sin_addr));
- }
-#endif
- error = imo_join_source(imo, idx, ssa);
- }
- if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
- (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
- /*
- * If the socket is in inclusive mode:
- * the source is already blocked as it has no entry.
- * If the socket is in exclusive mode:
- * the source is already unblocked as it has no entry.
- */
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: ims %p; %s already [un]blocked\n",
- __func__, ims,
- inet_ntoa(ssa->sin.sin_addr));
- }
-#endif
- error = EINVAL;
- }
+ if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
+ CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
+ inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not ");
+ error = EADDRNOTAVAIL;
+ goto out_inp_locked;
+ }
+
+ INP_WLOCK_ASSERT(inp);
+
+ /*
+ * Begin state merge transaction at socket layer.
+ */
+ if (doblock) {
+ CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
+ ims = imf_graft(imf, fmode, &ssa->sin);
+ if (ims == NULL)
+ error = ENOMEM;
} else {
- if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
- (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
- /*
- * If the socket is in exclusive mode:
- * the source is already blocked as it has an entry.
- * If the socket is in inclusive mode:
- * the source is already unblocked as it has an entry.
- */
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: ims %p; %s already [un]blocked\n",
- __func__, ims,
- inet_ntoa(ssa->sin.sin_addr));
- }
-#endif
- error = EADDRNOTAVAIL;
- }
- if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
- (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: removing %s from filter list\n",
- __func__, inet_ntoa(ssa->sin.sin_addr));
- }
-#endif
- error = imo_leave_source(imo, idx, ssa);
- }
+ CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
+ error = imf_prune(imf, &ssa->sin);
+ }
+
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
+ goto out_imf_rollback;
+ }
+
+ /*
+ * Begin state merge transaction at IGMP layer.
+ */
+ IN_MULTI_LOCK();
+
+ CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+ error = inm_merge(inm, imf);
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
+ goto out_imf_rollback;
}
-out_locked:
+ CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+ error = igmp_change_state(inm);
+ if (error)
+ CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+
+ IN_MULTI_UNLOCK();
+
+out_imf_rollback:
+ if (error)
+ imf_rollback(imf);
+ else
+ imf_commit(imf);
+
+ imf_reap(imf);
+
+out_inp_locked:
INP_WUNLOCK(inp);
+ IFF_UNLOCKGIANT(ifp);
return (error);
}
/*
* Given an inpcb, return its multicast options structure pointer. Accepts
* an unlocked inpcb pointer, but will return it locked. May sleep.
+ *
+ * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
+ * SMPng: NOTE: Returns with the INP write lock held.
*/
static struct ip_moptions *
inp_findmoptions(struct inpcb *inp)
@@ -690,13 +1511,11 @@ inp_findmoptions(struct inpcb *inp)
INP_WUNLOCK(inp);
- imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
- M_WAITOK);
- immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
- M_IPMOPTS, M_WAITOK | M_ZERO);
- imfp = (struct in_mfilter *)malloc(
- sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
- M_IPMSOURCE, M_WAITOK);
+ imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
+ immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
+ M_WAITOK | M_ZERO);
+ imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
+ M_INMFILTER, M_WAITOK);
imo->imo_multicast_ifp = NULL;
imo->imo_multicast_addr.s_addr = INADDR_ANY;
@@ -708,16 +1527,13 @@ inp_findmoptions(struct inpcb *inp)
imo->imo_membership = immp;
/* Initialize per-group source filters. */
- for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
- imfp[idx].imf_fmode = MCAST_EXCLUDE;
- imfp[idx].imf_nsources = 0;
- TAILQ_INIT(&imfp[idx].imf_sources);
- }
+ for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
+ imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
imo->imo_mfilters = imfp;
INP_WLOCK(inp);
if (inp->inp_moptions != NULL) {
- free(imfp, M_IPMSOURCE);
+ free(imfp, M_INMFILTER);
free(immp, M_IPMOPTS);
free(imo, M_IPMOPTS);
return (inp->inp_moptions);
@@ -728,35 +1544,29 @@ inp_findmoptions(struct inpcb *inp)
/*
* Discard the IP multicast options (and source filters).
+ *
+ * SMPng: NOTE: assumes INP write lock is held.
*/
void
inp_freemoptions(struct ip_moptions *imo)
{
struct in_mfilter *imf;
- struct in_msource *ims, *tims;
size_t idx, nmships;
KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
nmships = imo->imo_num_memberships;
for (idx = 0; idx < nmships; ++idx) {
- in_delmulti(imo->imo_membership[idx]);
-
- if (imo->imo_mfilters != NULL) {
- imf = &imo->imo_mfilters[idx];
- TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
- ims_next, tims) {
- TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
- free(ims, M_IPMSOURCE);
- imf->imf_nsources--;
- }
- KASSERT(imf->imf_nsources == 0,
- ("%s: did not free all imf_nsources", __func__));
- }
+ imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
+ if (imf)
+ imf_leave(imf);
+ (void)in_leavegroup(imo->imo_membership[idx], imf);
+ if (imf)
+ imf_purge(imf);
}
- if (imo->imo_mfilters != NULL)
- free(imo->imo_mfilters, M_IPMSOURCE);
+ if (imo->imo_mfilters)
+ free(imo->imo_mfilters, M_INMFILTER);
free(imo->imo_membership, M_IPMOPTS);
free(imo, M_IPMOPTS);
}
@@ -774,11 +1584,13 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
struct ifnet *ifp;
struct ip_moptions *imo;
struct in_mfilter *imf;
- struct in_msource *ims;
+ struct ip_msource *ims;
+ struct in_msource *lims;
+ struct sockaddr_in *psin;
struct sockaddr_storage *ptss;
struct sockaddr_storage *tss;
int error;
- size_t idx;
+ size_t idx, nsrcs, ncsrcs;
INP_WLOCK_ASSERT(inp);
@@ -810,36 +1622,52 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
INP_WUNLOCK(inp);
return (EADDRNOTAVAIL);
}
-
imf = &imo->imo_mfilters[idx];
- msfr.msfr_fmode = imf->imf_fmode;
- msfr.msfr_nsrcs = imf->imf_nsources;
+
+ /*
+ * Ignore memberships which are in limbo.
+ */
+ if (imf->imf_st[1] == MCAST_UNDEFINED) {
+ INP_WUNLOCK(inp);
+ return (EAGAIN);
+ }
+ msfr.msfr_fmode = imf->imf_st[1];
/*
* If the user specified a buffer, copy out the source filter
* entries to userland gracefully.
- * msfr.msfr_nsrcs is always set to the total number of filter
- * entries which the kernel currently has for this group.
+ * We only copy out the number of entries which userland
+ * has asked for, but we always tell userland how big the
+ * buffer really needs to be.
*/
tss = NULL;
if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
- /*
- * Make a copy of the source vector so that we do not
- * thrash the inpcb lock whilst copying it out.
- * We only copy out the number of entries which userland
- * has asked for, but we always tell userland how big the
- * buffer really needs to be.
- */
tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
- M_TEMP, M_NOWAIT);
+ M_TEMP, M_NOWAIT | M_ZERO);
if (tss == NULL) {
- error = ENOBUFS;
- } else {
- ptss = tss;
- TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
- memcpy(ptss++, &ims->ims_addr,
- sizeof(struct sockaddr_storage));
- }
+ INP_WUNLOCK(inp);
+ return (ENOBUFS);
+ }
+ }
+
+ /*
+ * Count number of sources in-mode at t0.
+ * If buffer space exists and remains, copy out source entries.
+ */
+ nsrcs = msfr.msfr_nsrcs;
+ ncsrcs = 0;
+ ptss = tss;
+ RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+ lims = (struct in_msource *)ims;
+ if (lims->imsl_st[0] == MCAST_UNDEFINED ||
+ lims->imsl_st[0] != imf->imf_st[0])
+ continue;
+ ++ncsrcs;
+ if (tss != NULL && nsrcs-- > 0) {
+ psin = (struct sockaddr_in *)ptss++;
+ psin->sin_family = AF_INET;
+ psin->sin_len = sizeof(struct sockaddr_in);
+ psin->sin_addr.s_addr = htonl(lims->ims_haddr);
}
}
@@ -849,11 +1677,11 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
error = copyout(tss, msfr.msfr_srcs,
sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
free(tss, M_TEMP);
+ if (error)
+ return (error);
}
- if (error)
- return (error);
-
+ msfr.msfr_nsrcs = ncsrcs;
error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
return (error);
@@ -901,7 +1729,7 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
memset(&mreqn, 0, sizeof(struct ip_mreqn));
if (imo != NULL) {
ifp = imo->imo_multicast_ifp;
- if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
+ if (!in_nullhost(imo->imo_multicast_addr)) {
mreqn.imr_address = imo->imo_multicast_addr;
} else if (ifp != NULL) {
mreqn.imr_ifindex = ifp->if_index;
@@ -967,6 +1795,73 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
}
/*
+ * Look up the ifnet to use for a multicast group membership,
+ * given the IPv4 address of an interface, and the IPv4 group address.
+ *
+ * This routine exists to support legacy multicast applications
+ * which do not understand that multicast memberships are scoped to
+ * specific physical links in the networking stack, or which need
+ * to join link-scope groups before IPv4 addresses are configured.
+ *
+ * If inp is non-NULL, use this socket's current FIB number for any
+ * required FIB lookup.
+ * If ina is INADDR_ANY, look up the group address in the unicast FIB,
+ * and use its ifp; usually, this points to the default next-hop.
+ *
+ * If the FIB lookup fails, attempt to use the first non-loopback
+ * interface with multicast capability in the system as a
+ * last resort. The legacy IPv4 ASM API requires that we do
+ * this in order to allow groups to be joined when the routing
+ * table has not yet been populated during boot.
+ *
+ * Returns NULL if no ifp could be found.
+ *
+ * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP.
+ * FUTURE: Implement IPv4 source-address selection.
+ */
+static struct ifnet *
+inp_lookup_mcast_ifp(const struct inpcb *inp,
+ const struct sockaddr_in *gsin, const struct in_addr ina)
+{
+ struct ifnet *ifp;
+
+ KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
+ KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
+ ("%s: not multicast", __func__));
+
+ ifp = NULL;
+ if (!in_nullhost(ina)) {
+ INADDR_TO_IFP(ina, ifp);
+ } else {
+ struct route ro;
+
+ ro.ro_rt = NULL;
+ memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
+ in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0);
+ if (ro.ro_rt != NULL) {
+ ifp = ro.ro_rt->rt_ifp;
+ KASSERT(ifp != NULL, ("%s: null ifp", __func__));
+ RTFREE(ro.ro_rt);
+ } else {
+ struct in_ifaddr *ia;
+ struct ifnet *mifp;
+
+ mifp = NULL;
+ TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+ mifp = ia->ia_ifp;
+ if (!(mifp->if_flags & IFF_LOOPBACK) &&
+ (mifp->if_flags & IFF_MULTICAST)) {
+ ifp = mifp;
+ break;
+ }
+ }
+ }
+ }
+
+ return (ifp);
+}
+
+/*
* Join an IPv4 multicast group, possibly with a source.
*/
static int
@@ -980,11 +1875,14 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
struct in_mfilter *imf;
struct ip_moptions *imo;
struct in_multi *inm;
+ struct in_msource *lims;
size_t idx;
- int error;
+ int error, is_new;
ifp = NULL;
+ imf = NULL;
error = 0;
+ is_new = 0;
memset(&gsr, 0, sizeof(struct group_source_req));
gsa = (sockunion_t *)&gsr.gsr_group;
@@ -1025,52 +1923,10 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
ssa->sin.sin_addr = mreqs.imr_sourceaddr;
}
- /*
- * Obtain ifp. If no interface address was provided,
- * use the interface of the route in the unicast FIB for
- * the given multicast destination; usually, this is the
- * default route.
- * If this lookup fails, attempt to use the first non-loopback
- * interface with multicast capability in the system as a
- * last resort. The legacy IPv4 ASM API requires that we do
- * this in order to allow groups to be joined when the routing
- * table has not yet been populated during boot.
- * If all of these conditions fail, return EADDRNOTAVAIL, and
- * reject the IPv4 multicast join.
- */
- if (mreqs.imr_interface.s_addr != INADDR_ANY) {
- INADDR_TO_IFP(mreqs.imr_interface, ifp);
- } else {
- struct route ro;
-
- ro.ro_rt = NULL;
- *(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
- in_rtalloc_ign(&ro, 0,
- inp->inp_inc.inc_fibnum);
- if (ro.ro_rt != NULL) {
- ifp = ro.ro_rt->rt_ifp;
- KASSERT(ifp != NULL, ("%s: null ifp",
- __func__));
- RTFREE(ro.ro_rt);
- } else {
- struct in_ifaddr *ia;
- struct ifnet *mfp = NULL;
- TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- mfp = ia->ia_ifp;
- if (!(mfp->if_flags & IFF_LOOPBACK) &&
- (mfp->if_flags & IFF_MULTICAST)) {
- ifp = mfp;
- break;
- }
- }
- }
- }
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: imr_interface = %s, ifp = %p\n",
- __func__, inet_ntoa(mreqs.imr_interface), ifp);
- }
-#endif
+ ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
+ mreqs.imr_interface);
+ CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
+ __func__, inet_ntoa(mreqs.imr_interface), ifp);
break;
}
@@ -1095,7 +1951,6 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
/*
* Overwrite the port field if present, as the sockaddr
* being copied in may be matched with a binary comparison.
- * XXX INET6
*/
gsa->sin.sin_port = 0;
if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
@@ -1105,22 +1960,14 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
ssa->sin.sin_port = 0;
}
- /*
- * Obtain the ifp.
- */
if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
return (EADDRNOTAVAIL);
ifp = ifnet_byindex(gsr.gsr_interface);
-
break;
default:
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: unknown sopt_name %d\n", __func__,
- sopt->sopt_name);
- }
-#endif
+ CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
+ __func__, sopt->sopt_name);
return (EOPNOTSUPP);
break;
}
@@ -1131,96 +1978,131 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
return (EADDRNOTAVAIL);
+ IFF_LOCKGIANT(ifp);
+
/*
- * Check if we already hold membership of this group for this inpcb.
- * If so, we do not need to perform the initial join.
+ * MCAST_JOIN_SOURCE on an exclusive membership is an error.
+ * On an existing inclusive membership, it just adds the
+ * source to the filter list.
*/
imo = inp_findmoptions(inp);
idx = imo_match_group(imo, ifp, &gsa->sa);
- if (idx != -1) {
- if (ssa->ss.ss_family != AF_UNSPEC) {
- /*
- * Attempting to join an ASM group (when already
- * an ASM or SSM member) is an error.
- */
+ if (idx == -1) {
+ is_new = 1;
+ } else {
+ inm = imo->imo_membership[idx];
+ imf = &imo->imo_mfilters[idx];
+ if (ssa->ss.ss_family != AF_UNSPEC &&
+ imf->imf_st[1] != MCAST_INCLUDE) {
+ error = EINVAL;
+ goto out_inp_locked;
+ }
+ lims = imo_match_source(imo, idx, &ssa->sa);
+ if (lims != NULL) {
error = EADDRNOTAVAIL;
- } else {
- imf = &imo->imo_mfilters[idx];
- if (imf->imf_nsources == 0) {
- /*
- * Attempting to join an SSM group (when
- * already an ASM member) is an error.
- */
- error = EINVAL;
- } else {
- /*
- * Attempting to join an SSM group (when
- * already an SSM member) means "add this
- * source to the inclusive filter list".
- */
- error = imo_join_source(imo, idx, ssa);
- }
+ goto out_inp_locked;
}
- goto out_locked;
}
/*
- * Call imo_grow() to reallocate the membership and source filter
- * vectors if they are full. If the size would exceed the hard limit,
- * then we know we've really run out of entries. We keep the INP
- * lock held to avoid introducing a race condition.
+ * Begin state merge transaction at socket layer.
*/
- if (imo->imo_num_memberships == imo->imo_max_memberships) {
- error = imo_grow(imo);
- if (error)
- goto out_locked;
+ INP_WLOCK_ASSERT(inp);
+
+ if (is_new) {
+ if (imo->imo_num_memberships == imo->imo_max_memberships) {
+ error = imo_grow(imo);
+ if (error)
+ goto out_inp_locked;
+ }
+ /*
+ * Allocate the new slot upfront so we can deal with
+ * grafting the new source filter in same code path
+ * as for join-source on existing membership.
+ */
+ idx = imo->imo_num_memberships;
+ imo->imo_membership[idx] = NULL;
+ imo->imo_num_memberships++;
+ KASSERT(imo->imo_mfilters != NULL,
+ ("%s: imf_mfilters vector was not allocated", __func__));
+ imf = &imo->imo_mfilters[idx];
+ KASSERT(RB_EMPTY(&imf->imf_sources),
+ ("%s: imf_sources not empty", __func__));
}
/*
- * So far, so good: perform the layer 3 join, layer 2 join,
- * and make an IGMP announcement if needed.
+ * Graft new source into filter list for this inpcb's
+ * membership of the group. The in_multi may not have
+ * been allocated yet if this is a new membership.
*/
- inm = in_addmulti(&gsa->sin.sin_addr, ifp);
- if (inm == NULL) {
- error = ENOBUFS;
- goto out_locked;
+ if (ssa->ss.ss_family != AF_UNSPEC) {
+ /* Membership starts in IN mode */
+ if (is_new) {
+ CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
+ imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
+ } else {
+ CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
+ }
+ lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
+ if (lims == NULL) {
+ CTR1(KTR_IGMPV3, "%s: merge imf state failed",
+ __func__);
+ error = ENOMEM;
+ goto out_imo_free;
+ }
}
- idx = imo->imo_num_memberships;
- imo->imo_membership[idx] = inm;
- imo->imo_num_memberships++;
-
- KASSERT(imo->imo_mfilters != NULL,
- ("%s: imf_mfilters vector was not allocated", __func__));
- imf = &imo->imo_mfilters[idx];
- KASSERT(TAILQ_EMPTY(&imf->imf_sources),
- ("%s: imf_sources not empty", __func__));
/*
- * If this is a new SSM group join (i.e. a source was specified
- * with this group), add this source to the filter list.
+ * Begin state merge transaction at IGMP layer.
*/
- if (ssa->ss.ss_family != AF_UNSPEC) {
- /*
- * An initial SSM join implies that this socket's membership
- * of the multicast group is now in inclusive mode.
- */
- imf->imf_fmode = MCAST_INCLUDE;
+ IN_MULTI_LOCK();
- error = imo_join_source(imo, idx, ssa);
+ if (is_new) {
+ error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
+ &inm);
+ if (error)
+ goto out_imo_free;
+ imo->imo_membership[idx] = inm;
+ } else {
+ CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+ error = inm_merge(inm, imf);
if (error) {
- /*
- * Drop inp lock before calling in_delmulti(),
- * to prevent a lock order reversal.
- */
- --imo->imo_num_memberships;
- INP_WUNLOCK(inp);
- in_delmulti(inm);
- return (error);
+ CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
+ __func__);
+ goto out_imf_rollback;
+ }
+ CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+ error = igmp_change_state(inm);
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
+ __func__);
+ goto out_imf_rollback;
}
}
-out_locked:
+ IN_MULTI_UNLOCK();
+
+out_imf_rollback:
+ INP_WLOCK_ASSERT(inp);
+ if (error) {
+ imf_rollback(imf);
+ if (is_new)
+ imf_purge(imf);
+ else
+ imf_reap(imf);
+ } else {
+ imf_commit(imf);
+ }
+
+out_imo_free:
+ if (error && is_new) {
+ imo->imo_membership[idx] = NULL;
+ --imo->imo_num_memberships;
+ }
+
+out_inp_locked:
INP_WUNLOCK(inp);
+ IFF_UNLOCKGIANT(ifp);
return (error);
}
@@ -1238,13 +2120,14 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
struct ifnet *ifp;
struct in_mfilter *imf;
struct ip_moptions *imo;
- struct in_msource *ims, *tims;
+ struct in_msource *ims;
struct in_multi *inm;
size_t idx;
- int error;
+ int error, is_final;
ifp = NULL;
error = 0;
+ is_final = 1;
memset(&gsr, 0, sizeof(struct group_source_req));
gsa = (sockunion_t *)&gsr.gsr_group;
@@ -1284,15 +2167,12 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
ssa->sin.sin_addr = mreqs.imr_sourceaddr;
}
- if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
+ if (!in_nullhost(gsa->sin.sin_addr))
INADDR_TO_IFP(mreqs.imr_interface, ifp);
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: imr_interface = %s, ifp = %p\n",
- __func__, inet_ntoa(mreqs.imr_interface), ifp);
- }
-#endif
+ CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
+ __func__, inet_ntoa(mreqs.imr_interface), ifp);
+
break;
case MCAST_LEAVE_GROUP:
@@ -1326,12 +2206,8 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
break;
default:
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: unknown sopt_name %d\n", __func__,
- sopt->sopt_name);
- }
-#endif
+ CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
+ __func__, sopt->sopt_name);
return (EOPNOTSUPP);
break;
}
@@ -1339,6 +2215,9 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
return (EINVAL);
+ if (ifp)
+ IFF_LOCKGIANT(ifp);
+
/*
* Find the membership in the membership array.
*/
@@ -1346,66 +2225,95 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
idx = imo_match_group(imo, ifp, &gsa->sa);
if (idx == -1) {
error = EADDRNOTAVAIL;
- goto out_locked;
+ goto out_inp_locked;
}
+ inm = imo->imo_membership[idx];
imf = &imo->imo_mfilters[idx];
+ if (ssa->ss.ss_family != AF_UNSPEC)
+ is_final = 0;
+
+ /*
+ * Begin state merge transaction at socket layer.
+ */
+ INP_WLOCK_ASSERT(inp);
+
/*
* If we were instructed only to leave a given source, do so.
+ * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
*/
- if (ssa->ss.ss_family != AF_UNSPEC) {
- if (imf->imf_nsources == 0 ||
- imf->imf_fmode == MCAST_EXCLUDE) {
- /*
- * Attempting to SSM leave an ASM group
- * is an error; should use *_BLOCK_SOURCE instead.
- * Attempting to SSM leave a source in a group when
- * the socket is in 'exclude mode' is also an error.
- */
- error = EINVAL;
- } else {
- error = imo_leave_source(imo, idx, ssa);
+ if (is_final) {
+ imf_leave(imf);
+ } else {
+ if (imf->imf_st[0] == MCAST_EXCLUDE) {
+ error = EADDRNOTAVAIL;
+ goto out_inp_locked;
+ }
+ ims = imo_match_source(imo, idx, &ssa->sa);
+ if (ims == NULL) {
+ CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
+ inet_ntoa(ssa->sin.sin_addr), "not ");
+ error = EADDRNOTAVAIL;
+ goto out_inp_locked;
+ }
+ CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
+ error = imf_prune(imf, &ssa->sin);
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: merge imf state failed",
+ __func__);
+ goto out_inp_locked;
}
- /*
- * If an error occurred, or this source is not the last
- * source in the group, do not leave the whole group.
- */
- if (error || imf->imf_nsources > 0)
- goto out_locked;
}
/*
- * Give up the multicast address record to which the membership points.
+ * Begin state merge transaction at IGMP layer.
*/
- inm = imo->imo_membership[idx];
- in_delmulti(inm);
+ IN_MULTI_LOCK();
- /*
- * Free any source filters for this group if they exist.
- * Revert inpcb to the default MCAST_EXCLUDE state.
- */
- if (imo->imo_mfilters != NULL) {
- TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
- TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
- free(ims, M_IPMSOURCE);
- imf->imf_nsources--;
+ if (is_final) {
+ /*
+ * Give up the multicast address record to which
+ * the membership points.
+ */
+ (void)in_leavegroup_locked(inm, imf);
+ } else {
+ CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+ error = inm_merge(inm, imf);
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
+ __func__);
+ goto out_imf_rollback;
+ }
+
+ CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+ error = igmp_change_state(inm);
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
+ __func__);
}
- KASSERT(imf->imf_nsources == 0,
- ("%s: imf_nsources not 0", __func__));
- KASSERT(TAILQ_EMPTY(&imf->imf_sources),
- ("%s: imf_sources not empty", __func__));
- imf->imf_fmode = MCAST_EXCLUDE;
}
- /*
- * Remove the gap in the membership array.
- */
- for (++idx; idx < imo->imo_num_memberships; ++idx)
- imo->imo_membership[idx-1] = imo->imo_membership[idx];
- imo->imo_num_memberships--;
+ IN_MULTI_UNLOCK();
+
+out_imf_rollback:
+ if (error)
+ imf_rollback(imf);
+ else
+ imf_commit(imf);
+
+ imf_reap(imf);
-out_locked:
+ if (is_final) {
+ /* Remove the gap in the membership array. */
+ for (++idx; idx < imo->imo_num_memberships; ++idx)
+ imo->imo_membership[idx-1] = imo->imo_membership[idx];
+ imo->imo_num_memberships--;
+ }
+
+out_inp_locked:
INP_WUNLOCK(inp);
+ if (ifp)
+ IFF_UNLOCKGIANT(ifp);
return (error);
}
@@ -1456,19 +2364,15 @@ inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
sizeof(struct in_addr));
if (error)
return (error);
- if (addr.s_addr == INADDR_ANY) {
+ if (in_nullhost(addr)) {
ifp = NULL;
} else {
INADDR_TO_IFP(addr, ifp);
if (ifp == NULL)
return (EADDRNOTAVAIL);
}
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: ifp = %p, addr = %s\n",
- __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
- }
-#endif
+ CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp,
+ inet_ntoa(addr));
}
/* Reject interfaces which do not support multicast. */
@@ -1485,6 +2389,8 @@ inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
/*
* Atomically set source filters on a socket for an IPv4 multicast group.
+ *
+ * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
*/
static int
inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
@@ -1495,7 +2401,7 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
struct ifnet *ifp;
struct in_mfilter *imf;
struct ip_moptions *imo;
- struct in_msource *ims, *tims;
+ struct in_multi *inm;
size_t idx;
int error;
@@ -1504,7 +2410,7 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
if (error)
return (error);
- if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
+ if (msfr.msfr_nsrcs > in_mcast_maxsocksrc ||
(msfr.msfr_fmode != MCAST_EXCLUDE &&
msfr.msfr_fmode != MCAST_INCLUDE))
return (EINVAL);
@@ -1526,62 +2432,44 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
if (ifp == NULL)
return (EADDRNOTAVAIL);
+ IFF_LOCKGIANT(ifp);
+
/*
- * Take the INP lock.
+ * Take the INP write lock.
* Check if this socket is a member of this group.
*/
imo = inp_findmoptions(inp);
idx = imo_match_group(imo, ifp, &gsa->sa);
if (idx == -1 || imo->imo_mfilters == NULL) {
error = EADDRNOTAVAIL;
- goto out_locked;
+ goto out_inp_locked;
}
+ inm = imo->imo_membership[idx];
imf = &imo->imo_mfilters[idx];
-#ifdef DIAGNOSTIC
- if (bootverbose)
- printf("%s: clearing source list\n", __func__);
-#endif
-
/*
- * Remove any existing source filters.
+ * Begin state merge transaction at socket layer.
*/
- TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
- TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
- free(ims, M_IPMSOURCE);
- imf->imf_nsources--;
- }
- KASSERT(imf->imf_nsources == 0,
- ("%s: source list not cleared", __func__));
+ INP_WLOCK_ASSERT(inp);
+
+ imf->imf_st[1] = msfr.msfr_fmode;
/*
* Apply any new source filters, if present.
+ * Make a copy of the user-space source vector so
+ * that we may copy them with a single copyin. This
+ * allows us to deal with page faults up-front.
*/
if (msfr.msfr_nsrcs > 0) {
- struct in_msource **pnims;
- struct in_msource *nims;
- struct sockaddr_storage *kss;
- struct sockaddr_storage *pkss;
- sockunion_t *psu;
- int i, j;
+ struct in_msource *lims;
+ struct sockaddr_in *psin;
+ struct sockaddr_storage *kss, *pkss;
+ int i;
- /*
- * Drop the inp lock so we may sleep if we need to
- * in order to satisfy a malloc request.
- * We will re-take it before changing socket state.
- */
INP_WUNLOCK(inp);
-#ifdef DIAGNOSTIC
- if (bootverbose) {
- printf("%s: loading %lu source list entries\n",
- __func__, (unsigned long)msfr.msfr_nsrcs);
- }
-#endif
- /*
- * Make a copy of the user-space source vector so
- * that we may copy them with a single copyin. This
- * allows us to deal with page faults up-front.
- */
+
+ CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
+ __func__, (unsigned long)msfr.msfr_nsrcs);
kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
M_TEMP, M_WAITOK);
error = copyin(msfr.msfr_srcs, kss,
@@ -1591,103 +2479,79 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
return (error);
}
- /*
- * Perform argument checking on every sockaddr_storage
- * structure in the vector provided to us. Overwrite
- * fields which should not apply to source entries.
- * TODO: Check for duplicate sources on this pass.
- */
- psu = (sockunion_t *)kss;
- for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
- switch (psu->ss.ss_family) {
- case AF_INET:
- if (psu->sin.sin_len !=
- sizeof(struct sockaddr_in)) {
- error = EINVAL;
- } else {
- psu->sin.sin_port = 0;
- }
- break;
-#ifdef notyet
- case AF_INET6;
- if (psu->sin6.sin6_len !=
- sizeof(struct sockaddr_in6)) {
- error = EINVAL;
- } else {
- psu->sin6.sin6_port = 0;
- psu->sin6.sin6_flowinfo = 0;
- }
- break;
-#endif
- default:
- error = EAFNOSUPPORT;
- break;
- }
- if (error)
- break;
- }
- if (error) {
- free(kss, M_TEMP);
- return (error);
- }
+ INP_WLOCK(inp);
/*
- * Allocate a block to track all the in_msource
- * entries we are about to allocate, in case we
- * abruptly need to free them.
+ * Mark all source filters as UNDEFINED at t1.
+ * Restore new group filter mode, as imf_leave()
+ * will set it to INCLUDE.
*/
- pnims = malloc(sizeof(struct in_msource *) * msfr.msfr_nsrcs,
- M_TEMP, M_WAITOK | M_ZERO);
+ imf_leave(imf);
+ imf->imf_st[1] = msfr.msfr_fmode;
/*
- * Allocate up to nsrcs individual chunks.
- * If we encounter an error, backtrack out of
- * all allocations cleanly; updates must be atomic.
+ * Update socket layer filters at t1, lazy-allocating
+ * new entries. This saves a bunch of memory at the
+ * cost of one RB_FIND() per source entry; duplicate
+ * entries in the msfr_nsrcs vector are ignored.
+ * If we encounter an error, rollback transaction.
+ *
+ * XXX This too could be replaced with a set-symmetric
+ * difference like loop to avoid walking from root
+ * every time, as the key space is common.
*/
- pkss = kss;
- nims = NULL;
- for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
- nims = malloc(sizeof(struct in_msource) *
- msfr.msfr_nsrcs, M_IPMSOURCE, M_WAITOK | M_ZERO);
- pnims[i] = nims;
- }
- if (i < msfr.msfr_nsrcs) {
- for (j = 0; j < i; j++) {
- if (pnims[j] != NULL)
- free(pnims[j], M_IPMSOURCE);
+ for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
+ psin = (struct sockaddr_in *)pkss;
+ if (psin->sin_family != AF_INET) {
+ error = EAFNOSUPPORT;
+ break;
}
- free(pnims, M_TEMP);
- free(kss, M_TEMP);
- return (ENOBUFS);
- }
-
- INP_UNLOCK_ASSERT(inp);
-
- /*
- * Finally, apply the filters to the socket.
- * Re-take the inp lock; we are changing socket state.
- */
- pkss = kss;
- INP_WLOCK(inp);
- for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
- memcpy(&(pnims[i]->ims_addr), pkss,
- sizeof(struct sockaddr_storage));
- TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
- ims_next);
- imf->imf_nsources++;
+ if (psin->sin_len != sizeof(struct sockaddr_in)) {
+ error = EINVAL;
+ break;
+ }
+ error = imf_get_source(imf, psin, &lims);
+ if (error)
+ break;
+ lims->imsl_st[1] = imf->imf_st[1];
}
- free(pnims, M_TEMP);
free(kss, M_TEMP);
}
+ if (error)
+ goto out_imf_rollback;
+
+ INP_WLOCK_ASSERT(inp);
+ IN_MULTI_LOCK();
+
/*
- * Update the filter mode on the socket before releasing the inpcb.
+ * Begin state merge transaction at IGMP layer.
*/
- INP_WLOCK_ASSERT(inp);
- imf->imf_fmode = msfr.msfr_fmode;
+ CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+ error = inm_merge(inm, imf);
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
+ goto out_imf_rollback;
+ }
+
+ CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+ error = igmp_change_state(inm);
+ if (error)
+ CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+
+ IN_MULTI_UNLOCK();
+
+out_imf_rollback:
+ if (error)
+ imf_rollback(imf);
+ else
+ imf_commit(imf);
-out_locked:
+ imf_reap(imf);
+
+out_inp_locked:
INP_WUNLOCK(inp);
+ IFF_UNLOCKGIANT(ifp);
return (error);
}
@@ -1699,6 +2563,10 @@ out_locked:
* it is not possible to merge the duplicate code, because the idempotence
* of the IPv4 multicast part of the BSD Sockets API must be preserved;
* the effects of these options must be treated as separate and distinct.
+ *
+ * SMPng: XXX: Unlocked read of inp_socket believed OK.
+ * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
+ * is refactored to no longer use vifs.
*/
int
inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
@@ -1711,11 +2579,10 @@ inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
/*
* If socket is neither of type SOCK_RAW or SOCK_DGRAM,
* or is a divert socket, reject it.
- * XXX Unlocked read of inp_socket believed OK.
*/
if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
(inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
- inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
+ inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
return (EOPNOTSUPP);
switch (sopt->sopt_name) {
@@ -1826,7 +2693,7 @@ inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
case IP_UNBLOCK_SOURCE:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
- error = inp_change_source_filter(inp, sopt);
+ error = inp_block_unblock_source(inp, sopt);
break;
case IP_MSFILTER:
@@ -1842,3 +2709,183 @@ inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
return (error);
}
+
+/*
+ * Expose IGMP's multicast filter mode and source list(s) to userland,
+ * keyed by (ifindex, group).
+ * The filter mode is written out as a uint32_t, followed by
+ * 0..n of struct in_addr.
+ * For use by ifmcstat(8).
+ * SMPng: NOTE: unlocked read of ifindex space.
+ */
+static int
+sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
+{
+ INIT_VNET_NET(curvnet);
+ struct in_addr src, group;
+ struct ifnet *ifp;
+ struct ifmultiaddr *ifma;
+ struct in_multi *inm;
+ struct ip_msource *ims;
+ int *name;
+ int retval;
+ u_int namelen;
+ uint32_t fmode, ifindex;
+
+ name = (int *)arg1;
+ namelen = arg2;
+
+ if (req->newptr != NULL)
+ return (EPERM);
+
+ if (namelen != 2)
+ return (EINVAL);
+
+ ifindex = name[0];
+ if (ifindex <= 0 || ifindex > V_if_index) {
+ CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
+ __func__, ifindex);
+ return (ENOENT);
+ }
+
+ group.s_addr = name[1];
+ if (!IN_MULTICAST(ntohl(group.s_addr))) {
+ CTR2(KTR_IGMPV3, "%s: group %s is not multicast",
+ __func__, inet_ntoa(group));
+ return (EINVAL);
+ }
+
+ ifp = ifnet_byindex(ifindex);
+ if (ifp == NULL) {
+ CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
+ __func__, ifindex);
+ return (ENOENT);
+ }
+
+ retval = sysctl_wire_old_buffer(req,
+ sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
+ if (retval)
+ return (retval);
+
+ IN_MULTI_LOCK();
+
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_INET ||
+ ifma->ifma_protospec == NULL)
+ continue;
+ inm = (struct in_multi *)ifma->ifma_protospec;
+ if (!in_hosteq(inm->inm_addr, group))
+ continue;
+ fmode = inm->inm_st[1].iss_fmode;
+ retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
+ if (retval != 0)
+ break;
+ RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
+#ifdef KTR
+ struct in_addr ina;
+ ina.s_addr = htonl(ims->ims_haddr);
+ CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
+ inet_ntoa(ina));
+#endif
+ /*
+ * Only copy-out sources which are in-mode.
+ */
+ if (fmode != ims_get_mode(inm, ims, 1)) {
+ CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
+ __func__);
+ continue;
+ }
+ src.s_addr = htonl(ims->ims_haddr);
+ retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
+ if (retval != 0)
+ break;
+ }
+ }
+ IF_ADDR_UNLOCK(ifp);
+
+ IN_MULTI_UNLOCK();
+
+ return (retval);
+}
+
+#ifdef KTR
+
+static const char *inm_modestrs[] = { "un", "in", "ex" };
+
+static const char *
+inm_mode_str(const int mode)
+{
+
+ if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
+ return (inm_modestrs[mode]);
+ return ("??");
+}
+
+static const char *inm_statestrs[] = {
+ "not-member",
+ "silent",
+ "idle",
+ "lazy",
+ "sleeping",
+ "awakening",
+ "query-pending",
+ "sg-query-pending",
+ "leaving"
+};
+
+static const char *
+inm_state_str(const int state)
+{
+
+ if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
+ return (inm_statestrs[state]);
+ return ("??");
+}
+
+/*
+ * Dump an in_multi structure to the console.
+ */
+void
+inm_print(const struct in_multi *inm)
+{
+ int t;
+
+ printf("%s: --- begin inm %p ---\n", __func__, inm);
+ printf("addr %s ifp %p(%s) ifma %p\n",
+ inet_ntoa(inm->inm_addr),
+ inm->inm_ifp,
+ inm->inm_ifp->if_xname,
+ inm->inm_ifma);
+ printf("timer %u state %s refcount %u scq.len %u\n",
+ inm->inm_timer,
+ inm_state_str(inm->inm_state),
+ inm->inm_refcount,
+ inm->inm_scq.ifq_len);
+ printf("igi %p nsrc %lu sctimer %u scrv %u\n",
+ inm->inm_igi,
+ inm->inm_nsrc,
+ inm->inm_sctimer,
+ inm->inm_scrv);
+ for (t = 0; t < 2; t++) {
+ printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
+ inm_mode_str(inm->inm_st[t].iss_fmode),
+ inm->inm_st[t].iss_asm,
+ inm->inm_st[t].iss_ex,
+ inm->inm_st[t].iss_in,
+ inm->inm_st[t].iss_rec);
+ }
+ printf("%s: --- end inm %p ---\n", __func__, inm);
+}
+
+#else /* !KTR */
+
+void
+inm_print(const struct in_multi *inm)
+{
+
+}
+
+#endif /* KTR */
+
+RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
OpenPOWER on IntegriCloud