summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--UPDATING7
-rw-r--r--sys/modules/ip6_mroute_mod/Makefile19
-rw-r--r--sys/modules/ip_mroute_mod/Makefile9
-rw-r--r--sys/netinet/ip_mroute.c1023
-rw-r--r--sys/netinet/ip_mroute.h24
-rw-r--r--sys/netinet6/ip6_mroute.c396
-rw-r--r--sys/netinet6/ip6_mroute.h2
-rw-r--r--usr.bin/netstat/main.c9
-rw-r--r--usr.bin/netstat/mroute.c306
-rw-r--r--usr.bin/netstat/netstat.h2
10 files changed, 905 insertions, 892 deletions
diff --git a/UPDATING b/UPDATING
index 6b490fc..3864a0d 100644
--- a/UPDATING
+++ b/UPDATING
@@ -22,6 +22,13 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW:
to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
+20090319:
+ The multicast forwarding code has been cleaned up. netstat(1)
+ only relies on KVM now for printing bandwidth upcall meters.
+ The IPv4 and IPv6 modules are split into ip_mroute_mod and
+ ip6_mroute_mod respectively. The config(5) options for statically
+ compiling this code remain the same, i.e. 'options MROUTING'.
+
20090315:
Support for the IFF_NEEDSGIANT network interface flag has been
removed, which means that non-MPSAFE network device drivers are no
diff --git a/sys/modules/ip6_mroute_mod/Makefile b/sys/modules/ip6_mroute_mod/Makefile
new file mode 100644
index 0000000..f361c9d
--- /dev/null
+++ b/sys/modules/ip6_mroute_mod/Makefile
@@ -0,0 +1,19 @@
+# $FreeBSD$
+
+.include <bsd.own.mk>
+
+.PATH: ${.CURDIR}/../../netinet6
+
+KMOD= ip6_mroute
+
+SRCS= ip6_mroute.c
+SRCS+= opt_inet6.h opt_mac.h opt_mrouting.h
+
+.if !defined(KERNBUILDDIR)
+opt_inet6.h:
+ echo "#define INET6 1" > ${.TARGET}
+opt_mrouting.h:
+ echo "#define MROUTING 1" > ${.TARGET}
+.endif
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/ip_mroute_mod/Makefile b/sys/modules/ip_mroute_mod/Makefile
index 3ca0a3dc..141eceb 100644
--- a/sys/modules/ip_mroute_mod/Makefile
+++ b/sys/modules/ip_mroute_mod/Makefile
@@ -8,21 +8,12 @@ KMOD= ip_mroute
SRCS= ip_mroute.c
SRCS+= opt_inet.h opt_mac.h opt_mrouting.h opt_route.h
-SRCS+= opt_inet6.h
-
-.if ${MK_INET6_SUPPORT} != "no"
-SRCS+= ip6_mroute.c
-.endif
.if !defined(KERNBUILDDIR)
opt_inet.h:
echo "#define INET 1" > ${.TARGET}
opt_mrouting.h:
echo "#define MROUTING 1" > ${.TARGET}
-.if ${MK_INET6_SUPPORT} != "no"
-opt_inet6.h:
- echo "#define INET6 1" > ${.TARGET}
-.endif
.endif
.include <bsd.kmod.mk>
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
index 36a5efd..d4b0828 100644
--- a/sys/netinet/ip_mroute.c
+++ b/sys/netinet/ip_mroute.c
@@ -53,11 +53,24 @@
* bandwidth metering and signaling
*/
+/*
+ * TODO: Prefix functions with ipmf_.
+ * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol
+ * domain attachment (if_afdata) so we can track consumers of that service.
+ * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT,
+ * move it to socket options.
+ * TODO: Rototile log_debug to use KTR.
+ * TODO: Cleanup LSRR removal further.
+ * TODO: Push RSVP stubs into raw_ip.c.
+ * TODO: Use bitstring.h for vif set.
+ * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded.
+ * TODO: Sync ip6_mroute.c with this file.
+ */
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
-#include "opt_inet6.h"
#include "opt_mac.h"
#include "opt_mrouting.h"
@@ -81,9 +94,11 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vimage.h>
+
#include <net/if.h>
#include <net/netisr.h>
#include <net/route.h>
+
#include <netinet/in.h>
#include <netinet/igmp.h>
#include <netinet/in_systm.h>
@@ -98,96 +113,84 @@ __FBSDID("$FreeBSD$");
#include <netinet/udp.h>
#include <netinet/vinet.h>
-#ifdef INET6
-#include <netinet/ip6.h>
-#include <netinet6/in6_var.h>
-#include <netinet6/ip6_mroute.h>
-#include <netinet6/ip6_var.h>
-#endif
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
-/*
- * Control debugging code for rsvp and multicast routing code.
- * Can only set them with the debugger.
- */
-static u_int rsvpdebug; /* non-zero enables debugging */
-
-static u_int mrtdebug; /* any set of the flags below */
#define DEBUG_MFC 0x02
#define DEBUG_FORWARD 0x04
#define DEBUG_EXPIRE 0x08
#define DEBUG_XMIT 0x10
#define DEBUG_PIM 0x20
+static u_int mrtdebug;
+SYSCTL_INT(_debug, OID_AUTO, mrtdebug, CTLFLAG_RW, &mrtdebug, 0,
+ "Enable/disable IPv4 multicast forwarding debugging flags");
#define VIFI_INVALID ((vifi_t) -1)
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
-#define M_HASCL(m) ((m)->m_flags & M_EXT)
-
-static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables");
+static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
/*
* Locking. We use two locks: one for the virtual interface table and
* one for the forwarding table. These locks may be nested in which case
* the VIF lock must always be taken first. Note that each lock is used
* to cover not only the specific data structure but also related data
- * structures. It may be better to add more fine-grained locking later;
- * it's not clear how performance-critical this code is.
- *
- * XXX: This module could particularly benefit from being cleaned
- * up to use the <sys/queue.h> macros.
- *
+ * structures.
*/
-static struct mrtstat mrtstat;
-SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
- &mrtstat, mrtstat,
- "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)");
-
-static struct mfc *mfctable[MFCTBLSIZ];
-SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD,
- &mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]",
- "Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)");
-
static struct mtx mrouter_mtx;
#define MROUTER_LOCK() mtx_lock(&mrouter_mtx)
#define MROUTER_UNLOCK() mtx_unlock(&mrouter_mtx)
#define MROUTER_LOCK_ASSERT() mtx_assert(&mrouter_mtx, MA_OWNED)
-#define MROUTER_LOCK_INIT() \
+#define MROUTER_LOCK_INIT() \
mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
#define MROUTER_LOCK_DESTROY() mtx_destroy(&mrouter_mtx)
+static struct mrtstat mrtstat;
+SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
+ &mrtstat, mrtstat,
+ "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
+ "netinet/ip_mroute.h)");
+
+static u_long mfchash;
+#define MFCHASH(a, g) \
+ ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
+ ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash)
+#define MFCHASHSIZE 256
+
+static u_char *nexpire; /* 0..mfchashsize-1 */
+static u_long mfchashsize; /* Hash size */
+LIST_HEAD(mfchashhdr, mfc) *mfchashtbl;
+
static struct mtx mfc_mtx;
-#define MFC_LOCK() mtx_lock(&mfc_mtx)
-#define MFC_UNLOCK() mtx_unlock(&mfc_mtx)
+#define MFC_LOCK() mtx_lock(&mfc_mtx)
+#define MFC_UNLOCK() mtx_unlock(&mfc_mtx)
#define MFC_LOCK_ASSERT() mtx_assert(&mfc_mtx, MA_OWNED)
-#define MFC_LOCK_INIT() mtx_init(&mfc_mtx, "mroute mfc table", NULL, MTX_DEF)
+#define MFC_LOCK_INIT() \
+ mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF)
#define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx)
+static vifi_t numvifs;
static struct vif viftable[MAXVIFS];
SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD,
&viftable, sizeof(viftable), "S,vif[MAXVIFS]",
- "Multicast Virtual Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
+ "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
static struct mtx vif_mtx;
-#define VIF_LOCK() mtx_lock(&vif_mtx)
-#define VIF_UNLOCK() mtx_unlock(&vif_mtx)
+#define VIF_LOCK() mtx_lock(&vif_mtx)
+#define VIF_UNLOCK() mtx_unlock(&vif_mtx)
#define VIF_LOCK_ASSERT() mtx_assert(&vif_mtx, MA_OWNED)
-#define VIF_LOCK_INIT() mtx_init(&vif_mtx, "mroute vif table", NULL, MTX_DEF)
+#define VIF_LOCK_INIT() \
+ mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF)
#define VIF_LOCK_DESTROY() mtx_destroy(&vif_mtx)
-static u_char nexpire[MFCTBLSIZ];
-
static eventhandler_tag if_detach_event_tag = NULL;
static struct callout expire_upcalls_ch;
-
#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
#define UPCALL_EXPIRE 6 /* number of timeouts */
-#define ENCAP_TTL 64
-
/*
* Bandwidth meter variables and constants
*/
@@ -223,7 +226,7 @@ SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
"Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
extern struct domain inetdomain;
-struct protosw in_pim_protosw = {
+static const struct protosw in_pim_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_PIM,
@@ -235,18 +238,6 @@ struct protosw in_pim_protosw = {
};
static const struct encaptab *pim_encap_cookie;
-#ifdef INET6
-/* ip6_mroute.c glue */
-extern struct in6_protosw in6_pim_protosw;
-static const struct encaptab *pim6_encap_cookie;
-
-extern int X_ip6_mrouter_set(struct socket *, struct sockopt *);
-extern int X_ip6_mrouter_get(struct socket *, struct sockopt *);
-extern int X_ip6_mrouter_done(void);
-extern int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
-extern int X_mrt6_ioctl(int, caddr_t);
-#endif
-
static int pim_encapcheck(const struct mbuf *, int, int, void *);
/*
@@ -264,6 +255,7 @@ struct pim_encap_pimhdr {
struct pim pim;
uint32_t flags;
};
+#define PIM_ENCAP_TTL 64
static struct ip pim_encap_iphdr = {
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -277,7 +269,7 @@ static struct ip pim_encap_iphdr = {
sizeof(struct ip), /* total length */
0, /* id */
0, /* frag offset */
- ENCAP_TTL,
+ PIM_ENCAP_TTL,
IPPROTO_PIM,
0, /* checksum */
};
@@ -297,129 +289,97 @@ static vifi_t reg_vif_num = VIFI_INVALID;
/*
* Private variables.
*/
-static vifi_t numvifs;
-static u_long X_ip_mcast_src(int vifi);
-static int X_ip_mforward(struct ip *ip, struct ifnet *ifp,
- struct mbuf *m, struct ip_moptions *imo);
+static u_long X_ip_mcast_src(int);
+static int X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
+ struct ip_moptions *);
static int X_ip_mrouter_done(void);
-static int X_ip_mrouter_get(struct socket *so, struct sockopt *m);
-static int X_ip_mrouter_set(struct socket *so, struct sockopt *m);
-static int X_legal_vif_num(int vif);
-static int X_mrt_ioctl(int cmd, caddr_t data, int fibnum);
-
-static int get_sg_cnt(struct sioc_sg_req *);
-static int get_vif_cnt(struct sioc_vif_req *);
-static void if_detached_event(void *arg __unused, struct ifnet *);
-static int ip_mrouter_init(struct socket *, int);
-static int add_vif(struct vifctl *);
-static int del_vif_locked(vifi_t);
-static int del_vif(vifi_t);
-static int add_mfc(struct mfcctl2 *);
-static int del_mfc(struct mfcctl2 *);
-static int set_api_config(uint32_t *); /* chose API capabilities */
-static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
-static int set_assert(int);
-static void expire_upcalls(void *);
-static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
-static void phyint_send(struct ip *, struct vif *, struct mbuf *);
-static void send_packet(struct vif *, struct mbuf *);
-
-/*
- * Bandwidth monitoring
- */
-static void free_bw_list(struct bw_meter *list);
-static int add_bw_upcall(struct bw_upcall *);
-static int del_bw_upcall(struct bw_upcall *);
-static void bw_meter_receive_packet(struct bw_meter *x, int plen,
- struct timeval *nowp);
-static void bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp);
-static void bw_upcalls_send(void);
-static void schedule_bw_meter(struct bw_meter *x, struct timeval *nowp);
-static void unschedule_bw_meter(struct bw_meter *x);
-static void bw_meter_process(void);
-static void expire_bw_upcalls_send(void *);
-static void expire_bw_meter_process(void *);
-
-static int pim_register_send(struct ip *, struct vif *,
- struct mbuf *, struct mfc *);
-static int pim_register_send_rp(struct ip *, struct vif *,
- struct mbuf *, struct mfc *);
-static int pim_register_send_upcall(struct ip *, struct vif *,
- struct mbuf *, struct mfc *);
-static struct mbuf *pim_register_prepare(struct ip *, struct mbuf *);
-
-/*
- * whether or not special PIM assert processing is enabled.
- */
-static int pim_assert;
-/*
- * Rate limit for assert notification messages, in usec
- */
-#define ASSERT_MSG_TIME 3000000
+static int X_ip_mrouter_get(struct socket *, struct sockopt *);
+static int X_ip_mrouter_set(struct socket *, struct sockopt *);
+static int X_legal_vif_num(int);
+static int X_mrt_ioctl(int, caddr_t, int);
+
+static int add_bw_upcall(struct bw_upcall *);
+static int add_mfc(struct mfcctl2 *);
+static int add_vif(struct vifctl *);
+static void bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
+static void bw_meter_process(void);
+static void bw_meter_receive_packet(struct bw_meter *, int,
+ struct timeval *);
+static void bw_upcalls_send(void);
+static int del_bw_upcall(struct bw_upcall *);
+static int del_mfc(struct mfcctl2 *);
+static int del_vif(vifi_t);
+static int del_vif_locked(vifi_t);
+static void expire_bw_meter_process(void *);
+static void expire_bw_upcalls_send(void *);
+static void expire_mfc(struct mfc *);
+static void expire_upcalls(void *);
+static void free_bw_list(struct bw_meter *);
+static int get_sg_cnt(struct sioc_sg_req *);
+static int get_vif_cnt(struct sioc_vif_req *);
+static void if_detached_event(void *, struct ifnet *);
+static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
+static int ip_mrouter_init(struct socket *, int);
+static __inline struct mfc *
+ mfc_find(struct in_addr *, struct in_addr *);
+static void phyint_send(struct ip *, struct vif *, struct mbuf *);
+static struct mbuf *
+ pim_register_prepare(struct ip *, struct mbuf *);
+static int pim_register_send(struct ip *, struct vif *,
+ struct mbuf *, struct mfc *);
+static int pim_register_send_rp(struct ip *, struct vif *,
+ struct mbuf *, struct mfc *);
+static int pim_register_send_upcall(struct ip *, struct vif *,
+ struct mbuf *, struct mfc *);
+static void schedule_bw_meter(struct bw_meter *, struct timeval *);
+static void send_packet(struct vif *, struct mbuf *);
+static int set_api_config(uint32_t *);
+static int set_assert(int);
+static int socket_send(struct socket *, struct mbuf *,
+ struct sockaddr_in *);
+static void unschedule_bw_meter(struct bw_meter *);
/*
- * Kernel multicast routing API capabilities and setup.
+ * Kernel multicast forwarding API capabilities and setup.
* If more API capabilities are added to the kernel, they should be
* recorded in `mrt_api_support'.
*/
+#define MRT_API_VERSION 0x0305
+
+static const int mrt_api_version = MRT_API_VERSION;
static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
MRT_MFC_FLAGS_BORDER_VIF |
MRT_MFC_RP |
MRT_MFC_BW_UPCALL);
static uint32_t mrt_api_config = 0;
-/*
- * Hash function for a source, group entry
- */
-#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
- ((g) >> 20) ^ ((g) >> 10) ^ (g))
+static int pim_assert_enabled;
+static struct timeval pim_assert_interval = { 3, 0 }; /* Rate limit */
/*
- * Find a route for a given origin IP address and Multicast group address
- * Statistics are updated by the caller if needed
- * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses)
+ * Find a route for a given origin IP address and multicast group address.
+ * Statistics must be updated by the caller.
*/
-static struct mfc *
-mfc_find(in_addr_t o, in_addr_t g)
+static __inline struct mfc *
+mfc_find(struct in_addr *o, struct in_addr *g)
{
- struct mfc *rt;
+ struct mfc *rt;
- MFC_LOCK_ASSERT();
+ MFC_LOCK_ASSERT();
- for (rt = mfctable[MFCHASH(o,g)]; rt; rt = rt->mfc_next)
- if ((rt->mfc_origin.s_addr == o) &&
- (rt->mfc_mcastgrp.s_addr == g) && (rt->mfc_stall == NULL))
- break;
- return rt;
-}
+ LIST_FOREACH(rt, &mfchashtbl[MFCHASH(*o, *g)], mfc_hash) {
+ if (in_hosteq(rt->mfc_origin, *o) &&
+ in_hosteq(rt->mfc_mcastgrp, *g) &&
+ TAILQ_EMPTY(&rt->mfc_stall))
+ break;
+ }
-/*
- * Macros to compute elapsed time efficiently
- * Borrowed from Van Jacobson's scheduling code
- */
-#define TV_DELTA(a, b, delta) { \
- int xxs; \
- delta = (a).tv_usec - (b).tv_usec; \
- if ((xxs = (a).tv_sec - (b).tv_sec)) { \
- switch (xxs) { \
- case 2: \
- delta += 1000000; \
- /* FALLTHROUGH */ \
- case 1: \
- delta += 1000000; \
- break; \
- default: \
- delta += (1000000 * xxs); \
- } \
- } \
+ return (rt);
}
-#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
- (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
-
/*
- * Handle MRT setsockopt commands to modify the multicast routing tables.
+ * Handle MRT setsockopt commands to modify the multicast forwarding tables.
*/
static int
X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
@@ -526,15 +486,15 @@ static int
X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
{
int error;
- static int version = 0x0305; /* !!! why is this here? XXX */
switch (sopt->sopt_name) {
case MRT_VERSION:
- error = sooptcopyout(sopt, &version, sizeof version);
+ error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version);
break;
case MRT_ASSERT:
- error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert);
+ error = sooptcopyout(sopt, &pim_assert_enabled,
+ sizeof pim_assert_enabled);
break;
case MRT_API_SUPPORT:
@@ -556,7 +516,7 @@ X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
* Handle ioctl commands to obtain information from the cache
*/
static int
-X_mrt_ioctl(int cmd, caddr_t data, int fibnum)
+X_mrt_ioctl(int cmd, caddr_t data, int fibnum __unused)
{
int error = 0;
@@ -593,7 +553,7 @@ get_sg_cnt(struct sioc_sg_req *req)
struct mfc *rt;
MFC_LOCK();
- rt = mfc_find(req->src.s_addr, req->grp.s_addr);
+ rt = mfc_find(&req->src, &req->grp);
if (rt == NULL) {
MFC_UNLOCK();
req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
@@ -632,10 +592,8 @@ get_vif_cnt(struct sioc_vif_req *req)
static void
ip_mrouter_reset(void)
{
- bzero((caddr_t)mfctable, sizeof(mfctable));
- bzero((caddr_t)nexpire, sizeof(nexpire));
- pim_assert = 0;
+ pim_assert_enabled = 0;
mrt_api_config = 0;
callout_init(&expire_upcalls_ch, CALLOUT_MPSAFE);
@@ -652,55 +610,40 @@ if_detached_event(void *arg __unused, struct ifnet *ifp)
INIT_VNET_INET(curvnet);
vifi_t vifi;
int i;
- struct mfc *mfc;
- struct mfc *nmfc;
- struct mfc **ppmfc; /* Pointer to previous node's next-pointer */
- struct rtdetq *pq;
- struct rtdetq *npq;
MROUTER_LOCK();
+
if (V_ip_mrouter == NULL) {
MROUTER_UNLOCK();
+ return;
}
+ VIF_LOCK();
+ MFC_LOCK();
+
/*
* Tear down multicast forwarder state associated with this ifnet.
* 1. Walk the vif list, matching vifs against this ifnet.
* 2. Walk the multicast forwarding cache (mfc) looking for
* inner matches with this vif's index.
- * 3. Free any pending mbufs for this mfc.
- * 4. Free the associated mfc entry and state associated with this vif.
- * Be very careful about unlinking from a singly-linked list whose
- * "head node" is a pointer in a simple array.
- * 5. Free vif state. This should disable ALLMULTI on the interface.
+ * 3. Expire any matching multicast forwarding cache entries.
+ * 4. Free vif state. This should disable ALLMULTI on the interface.
*/
- VIF_LOCK();
- MFC_LOCK();
for (vifi = 0; vifi < numvifs; vifi++) {
if (viftable[vifi].v_ifp != ifp)
continue;
- for (i = 0; i < MFCTBLSIZ; i++) {
- ppmfc = &mfctable[i];
- for (mfc = mfctable[i]; mfc != NULL; ) {
- nmfc = mfc->mfc_next;
- if (mfc->mfc_parent == vifi) {
- for (pq = mfc->mfc_stall; pq != NULL; ) {
- npq = pq->next;
- m_freem(pq->m);
- free(pq, M_MRTABLE);
- pq = npq;
- }
- free_bw_list(mfc->mfc_bw_meter);
- free(mfc, M_MRTABLE);
- *ppmfc = nmfc;
- } else {
- ppmfc = &mfc->mfc_next;
+ for (i = 0; i < mfchashsize; i++) {
+ struct mfc *rt, *nrt;
+ for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
+ nrt = LIST_NEXT(rt, mfc_hash);
+ if (rt->mfc_parent == vifi) {
+ expire_mfc(rt);
+ }
}
- mfc = nmfc;
- }
}
del_vif_locked(vifi);
}
+
MFC_UNLOCK();
VIF_UNLOCK();
@@ -708,7 +651,7 @@ if_detached_event(void *arg __unused, struct ifnet *ifp)
}
/*
- * Enable multicast routing
+ * Enable multicast forwarding.
*/
static int
ip_mrouter_init(struct socket *so, int version)
@@ -739,6 +682,8 @@ ip_mrouter_init(struct socket *so, int version)
return (ENOMEM);
}
+ mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &mfchash, HASH_NOWAIT);
+
callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL);
callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD,
@@ -756,7 +701,7 @@ ip_mrouter_init(struct socket *so, int version)
}
/*
- * Disable multicast routing
+ * Disable multicast forwarding.
*/
static int
X_ip_mrouter_done(void)
@@ -766,8 +711,6 @@ X_ip_mrouter_done(void)
int i;
struct ifnet *ifp;
struct ifreq ifr;
- struct mfc *rt;
- struct rtdetq *rte;
MROUTER_LOCK();
@@ -783,12 +726,13 @@ X_ip_mrouter_done(void)
mrt_api_config = 0;
VIF_LOCK();
+
/*
* For each phyint in use, disable promiscuous reception of all IP
* multicasts.
*/
for (vifi = 0; vifi < numvifs; vifi++) {
- if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
+ if (!in_nullhost(viftable[vifi].v_lcl_addr) &&
!(viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr);
@@ -801,38 +745,37 @@ X_ip_mrouter_done(void)
}
bzero((caddr_t)viftable, sizeof(viftable));
numvifs = 0;
- pim_assert = 0;
+ pim_assert_enabled = 0;
+
VIF_UNLOCK();
+
EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
- /*
- * Free all multicast forwarding cache entries.
- */
callout_stop(&expire_upcalls_ch);
callout_stop(&bw_upcalls_ch);
callout_stop(&bw_meter_ch);
MFC_LOCK();
- for (i = 0; i < MFCTBLSIZ; i++) {
- for (rt = mfctable[i]; rt != NULL; ) {
- struct mfc *nr = rt->mfc_next;
-
- for (rte = rt->mfc_stall; rte != NULL; ) {
- struct rtdetq *n = rte->next;
- m_freem(rte->m);
- free(rte, M_MRTABLE);
- rte = n;
- }
- free_bw_list(rt->mfc_bw_meter);
- free(rt, M_MRTABLE);
- rt = nr;
+ /*
+ * Free all multicast forwarding cache entries.
+ * Do not use hashdestroy(), as we must perform other cleanup.
+ */
+ for (i = 0; i < mfchashsize; i++) {
+ struct mfc *rt, *nrt;
+ for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
+ nrt = LIST_NEXT(rt, mfc_hash);
+ expire_mfc(rt);
}
}
- bzero((caddr_t)mfctable, sizeof(mfctable));
- bzero((caddr_t)nexpire, sizeof(nexpire));
+ free(mfchashtbl, M_MRTABLE);
+ mfchashtbl = NULL;
+
+ bzero(nexpire, sizeof(nexpire[0]) * mfchashsize);
+
bw_upcalls_n = 0;
bzero(bw_meter_timers, sizeof(bw_meter_timers));
+
MFC_UNLOCK();
reg_vif_num = VIFI_INVALID;
@@ -854,7 +797,7 @@ set_assert(int i)
if ((i != 1) && (i != 0))
return EINVAL;
- pim_assert = i;
+ pim_assert_enabled = i;
return 0;
}
@@ -878,17 +821,22 @@ set_api_config(uint32_t *apival)
*apival = 0;
return EPERM;
}
- if (pim_assert) {
+ if (pim_assert_enabled) {
*apival = 0;
return EPERM;
}
- for (i = 0; i < MFCTBLSIZ; i++) {
- if (mfctable[i] != NULL) {
+
+ MFC_LOCK();
+
+ for (i = 0; i < mfchashsize; i++) {
+ if (LIST_FIRST(&mfchashtbl[i]) != NULL) {
*apival = 0;
return EPERM;
}
}
+ MFC_UNLOCK();
+
mrt_api_config = *apival & mrt_api_support;
*apival = mrt_api_config;
@@ -918,11 +866,11 @@ add_vif(struct vifctl *vifcp)
VIF_UNLOCK();
return EINVAL;
}
- if (vifp->v_lcl_addr.s_addr != INADDR_ANY) {
+ if (!in_nullhost(vifp->v_lcl_addr)) {
VIF_UNLOCK();
return EADDRINUSE;
}
- if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY) {
+ if (in_nullhost(vifcp->vifc_lcl_addr)) {
VIF_UNLOCK();
return EADDRNOTAVAIL;
}
@@ -978,8 +926,6 @@ add_vif(struct vifctl *vifcp)
vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
vifp->v_ifp = ifp;
- vifp->v_rsvp_on = 0;
- vifp->v_rsvpd = NULL;
/* initialize per vif pkt counters */
vifp->v_pkt_in = 0;
vifp->v_pkt_out = 0;
@@ -988,7 +934,8 @@ add_vif(struct vifctl *vifcp)
bzero(&vifp->v_route, sizeof(vifp->v_route));
/* Adjust numvifs up if the vifi is higher than numvifs */
- if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
+ if (numvifs <= vifcp->vifc_vifi)
+ numvifs = vifcp->vifc_vifi + 1;
VIF_UNLOCK();
@@ -1017,7 +964,7 @@ del_vif_locked(vifi_t vifi)
return EINVAL;
}
vifp = &viftable[vifi];
- if (vifp->v_lcl_addr.s_addr == INADDR_ANY) {
+ if (in_nullhost(vifp->v_lcl_addr)) {
return EADDRNOTAVAIL;
}
@@ -1034,7 +981,7 @@ del_vif_locked(vifi_t vifi)
/* Adjust numvifs down */
for (vifi = numvifs; vifi > 0; vifi--)
- if (viftable[vifi-1].v_lcl_addr.s_addr != INADDR_ANY)
+ if (!in_nullhost(viftable[vifi-1].v_lcl_addr))
break;
numvifs = vifi;
@@ -1089,9 +1036,25 @@ init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
rt->mfc_pkt_cnt = 0;
rt->mfc_byte_cnt = 0;
rt->mfc_wrong_if = 0;
- rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
+ timevalclear(&rt->mfc_last_assert);
}
+static void
+expire_mfc(struct mfc *rt)
+{
+ struct rtdetq *rte, *nrte;
+
+ free_bw_list(rt->mfc_bw_meter);
+
+ TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
+ m_freem(rte->m);
+ TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
+ free(rte, M_MRTABLE);
+ }
+
+ LIST_REMOVE(rt, mfc_hash);
+ free(rt, M_MRTABLE);
+}
/*
* Add an mfc entry
@@ -1100,14 +1063,14 @@ static int
add_mfc(struct mfcctl2 *mfccp)
{
struct mfc *rt;
- u_long hash;
- struct rtdetq *rte;
+ struct rtdetq *rte, *nrte;
+ u_long hash = 0;
u_short nstl;
VIF_LOCK();
MFC_LOCK();
- rt = mfc_find(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
+ rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);
/* If an entry already exists, just update the fields */
if (rt) {
@@ -1120,47 +1083,48 @@ add_mfc(struct mfcctl2 *mfccp)
update_mfc_params(rt, mfccp);
MFC_UNLOCK();
VIF_UNLOCK();
- return 0;
+ return (0);
}
/*
* Find the entry for which the upcall was made and update
*/
- hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
- for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) {
-
- if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
- (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
- (rt->mfc_stall != NULL)) {
-
- if (nstl++)
- log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
- "multiple kernel entries",
- (u_long)ntohl(mfccp->mfcc_origin.s_addr),
- (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
- mfccp->mfcc_parent, (void *)rt->mfc_stall);
-
- if (mrtdebug & DEBUG_MFC)
- log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
- (u_long)ntohl(mfccp->mfcc_origin.s_addr),
- (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
- mfccp->mfcc_parent, (void *)rt->mfc_stall);
-
- init_mfc_params(rt, mfccp);
-
- rt->mfc_expire = 0; /* Don't clean this guy up */
- nexpire[hash]--;
+ nstl = 0;
+ hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
+ LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
+ if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
+ in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
+ !TAILQ_EMPTY(&rt->mfc_stall)) {
+ if (nstl++) {
+ log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
+ "multiple kernel entries",
+ (u_long)ntohl(mfccp->mfcc_origin.s_addr),
+ (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
+ mfccp->mfcc_parent,
+ (void *)TAILQ_FIRST(&rt->mfc_stall));
+ }
- /* free packets Qed at the end of this entry */
- for (rte = rt->mfc_stall; rte != NULL; ) {
- struct rtdetq *n = rte->next;
+ if (mrtdebug & DEBUG_MFC) {
+ log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
+ (u_long)ntohl(mfccp->mfcc_origin.s_addr),
+ (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
+ mfccp->mfcc_parent,
+ (void *)TAILQ_FIRST(&rt->mfc_stall));
+ }
- ip_mdq(rte->m, rte->ifp, rt, -1);
- m_freem(rte->m);
- free(rte, M_MRTABLE);
- rte = n;
- }
- rt->mfc_stall = NULL;
+ init_mfc_params(rt, mfccp);
+ rt->mfc_expire = 0; /* Don't clean this guy up */
+ nexpire[hash]--;
+
+ /* Free queued packets, but attempt to forward them first. */
+ TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
+ if (rte->ifp != NULL)
+ ip_mdq(rte->m, rte->ifp, rt, -1);
+ m_freem(rte->m);
+ TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
+ rt->mfc_nstall--;
+ free(rte, M_MRTABLE);
+ }
}
}
@@ -1168,43 +1132,50 @@ add_mfc(struct mfcctl2 *mfccp)
* It is possible that an entry is being inserted without an upcall
*/
if (nstl == 0) {
+ /*
+ * No mfc; make a new one
+ */
if (mrtdebug & DEBUG_MFC)
log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n",
hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr),
(u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
mfccp->mfcc_parent);
- for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) {
- if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
- (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
- init_mfc_params(rt, mfccp);
- if (rt->mfc_expire)
- nexpire[hash]--;
- rt->mfc_expire = 0;
- break; /* XXX */
- }
+ LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
+ if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
+ in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) {
+ init_mfc_params(rt, mfccp);
+ if (rt->mfc_expire)
+ nexpire[hash]--;
+ rt->mfc_expire = 0;
+ break; /* XXX */
+ }
}
+
if (rt == NULL) { /* no upcall, so make a new entry */
rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
if (rt == NULL) {
MFC_UNLOCK();
VIF_UNLOCK();
- return ENOBUFS;
+ return (ENOBUFS);
}
init_mfc_params(rt, mfccp);
- rt->mfc_expire = 0;
- rt->mfc_stall = NULL;
+ TAILQ_INIT(&rt->mfc_stall);
+ rt->mfc_nstall = 0;
+ rt->mfc_expire = 0;
rt->mfc_bw_meter = NULL;
+
/* insert new entry at head of hash chain */
- rt->mfc_next = mfctable[hash];
- mfctable[hash] = rt;
+ LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
}
}
+
MFC_UNLOCK();
VIF_UNLOCK();
- return 0;
+
+ return (0);
}
/*
@@ -1216,49 +1187,40 @@ del_mfc(struct mfcctl2 *mfccp)
struct in_addr origin;
struct in_addr mcastgrp;
struct mfc *rt;
- struct mfc **nptr;
- u_long hash;
- struct bw_meter *list;
origin = mfccp->mfcc_origin;
mcastgrp = mfccp->mfcc_mcastgrp;
- if (mrtdebug & DEBUG_MFC)
+ if (mrtdebug & DEBUG_MFC) {
log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n",
- (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr));
+ (u_long)ntohl(origin.s_addr),
+ (u_long)ntohl(mcastgrp.s_addr));
+ }
MFC_LOCK();
- hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
- for (nptr = &mfctable[hash]; (rt = *nptr) != NULL; nptr = &rt->mfc_next)
- if (origin.s_addr == rt->mfc_origin.s_addr &&
- mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
- rt->mfc_stall == NULL)
- break;
+ rt = mfc_find(&origin, &mcastgrp);
if (rt == NULL) {
MFC_UNLOCK();
return EADDRNOTAVAIL;
}
- *nptr = rt->mfc_next;
-
/*
* free the bw_meter entries
*/
- list = rt->mfc_bw_meter;
+ free_bw_list(rt->mfc_bw_meter);
rt->mfc_bw_meter = NULL;
+ LIST_REMOVE(rt, mfc_hash);
free(rt, M_MRTABLE);
- free_bw_list(list);
-
MFC_UNLOCK();
- return 0;
+ return (0);
}
/*
- * Send a message to the routing daemon on the multicast routing socket
+ * Send a message to the routing daemon on the multicast routing socket.
*/
static int
socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
@@ -1329,26 +1291,11 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
if (ip->ip_ttl < MAXTTL)
ip->ip_ttl++; /* compensate for -1 in *_send routines */
- if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
- struct vif *vifp = viftable + vifi;
-
- printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s)\n",
- (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr),
- vifi,
- (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
- vifp->v_ifp->if_xname);
- }
error = ip_mdq(m, ifp, NULL, vifi);
MFC_UNLOCK();
VIF_UNLOCK();
return error;
}
- if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
- printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n",
- (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr));
- if (!imo)
- printf("In fact, no options were specified at all\n");
- }
/*
* Don't forward a packet with time-to-live of zero or one,
@@ -1364,7 +1311,7 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
* Determine forwarding vifs from the forwarding cache table
*/
++mrtstat.mrts_mfc_lookups;
- rt = mfc_find(ip->ip_src.s_addr, ip->ip_dst.s_addr);
+ rt = mfc_find(&ip->ip_src, &ip->ip_dst);
/* Entry exists, so forward if necessary */
if (rt != NULL) {
@@ -1396,12 +1343,14 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
* just going to fail anyway. Make sure to pullup the header so
* that other people can't step on it.
*/
- rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT);
+ rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE,
+ M_NOWAIT|M_ZERO);
if (rte == NULL) {
MFC_UNLOCK();
VIF_UNLOCK();
return ENOBUFS;
}
+
mb0 = m_copypacket(m, M_DONTWAIT);
if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
mb0 = m_pullup(mb0, hlen);
@@ -1413,12 +1362,12 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
}
/* is there an upcall waiting for this flow ? */
- hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
- for (rt = mfctable[hash]; rt; rt = rt->mfc_next) {
- if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
- (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
- (rt->mfc_stall != NULL))
- break;
+ hash = MFCHASH(ip->ip_src, ip->ip_dst);
+ LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
+ if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
+ in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
+ !TAILQ_EMPTY(&rt->mfc_stall))
+ break;
}
if (rt == NULL) {
@@ -1431,7 +1380,8 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
* Locate the vifi for the incoming interface for this packet.
* If none found, drop packet.
*/
- for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++)
+ for (vifi = 0; vifi < numvifs &&
+ viftable[vifi].v_ifp != ifp; vifi++)
;
if (vifi >= numvifs) /* vif not found, drop packet */
goto non_fatal;
@@ -1482,45 +1432,32 @@ fail:
}
rt->mfc_parent = -1;
- rt->mfc_rp.s_addr = INADDR_ANY; /* clear the RP address */
-
+ /* clear the RP address */
+ rt->mfc_rp.s_addr = INADDR_ANY;
rt->mfc_bw_meter = NULL;
/* link into table */
- rt->mfc_next = mfctable[hash];
- mfctable[hash] = rt;
- rt->mfc_stall = rte;
+ LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
+ TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link);
+ rt->mfc_nstall++;
} else {
- /* determine if q has overflowed */
- int npkts = 0;
- struct rtdetq **p;
-
- /*
- * XXX ouch! we need to append to the list, but we
- * only have a pointer to the front, so we have to
- * scan the entire list every time.
- */
- for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
- npkts++;
-
- if (npkts > MAX_UPQ) {
+ /* determine if queue has overflowed */
+ if (rt->mfc_nstall > MAX_UPQ) {
mrtstat.mrts_upq_ovflw++;
non_fatal:
free(rte, M_MRTABLE);
m_freem(mb0);
MFC_UNLOCK();
VIF_UNLOCK();
- return 0;
+ return (0);
}
-
- /* Add this entry to the end of the queue */
- *p = rte;
+ TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link);
+ rt->mfc_nstall++;
}
rte->m = mb0;
rte->ifp = ifp;
- rte->next = NULL;
MFC_UNLOCK();
VIF_UNLOCK();
@@ -1535,58 +1472,46 @@ non_fatal:
static void
expire_upcalls(void *unused)
{
- struct rtdetq *rte;
- struct mfc *mfc, **nptr;
int i;
MFC_LOCK();
- for (i = 0; i < MFCTBLSIZ; i++) {
+
+ for (i = 0; i < mfchashsize; i++) {
+ struct mfc *rt, *nrt;
+
if (nexpire[i] == 0)
continue;
- nptr = &mfctable[i];
- for (mfc = *nptr; mfc != NULL; mfc = *nptr) {
- /*
- * Skip real cache entries
- * Make sure it wasn't marked to not expire (shouldn't happen)
- * If it expires now
- */
- if (mfc->mfc_stall != NULL && mfc->mfc_expire != 0 &&
- --mfc->mfc_expire == 0) {
- if (mrtdebug & DEBUG_EXPIRE)
- log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n",
- (u_long)ntohl(mfc->mfc_origin.s_addr),
- (u_long)ntohl(mfc->mfc_mcastgrp.s_addr));
- /*
- * drop all the packets
- * free the mbuf with the pkt, if, timing info
- */
- for (rte = mfc->mfc_stall; rte; ) {
- struct rtdetq *n = rte->next;
- m_freem(rte->m);
- free(rte, M_MRTABLE);
- rte = n;
- }
- ++mrtstat.mrts_cache_cleanups;
- nexpire[i]--;
+ for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
+ nrt = LIST_NEXT(rt, mfc_hash);
+
+ if (TAILQ_EMPTY(&rt->mfc_stall))
+ continue;
+
+ if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)
+ continue;
/*
* free the bw_meter entries
*/
- while (mfc->mfc_bw_meter != NULL) {
- struct bw_meter *x = mfc->mfc_bw_meter;
+ while (rt->mfc_bw_meter != NULL) {
+ struct bw_meter *x = rt->mfc_bw_meter;
- mfc->mfc_bw_meter = x->bm_mfc_next;
+ rt->mfc_bw_meter = x->bm_mfc_next;
free(x, M_BWMETER);
}
- *nptr = mfc->mfc_next;
- free(mfc, M_MRTABLE);
- } else {
- nptr = &mfc->mfc_next;
+ ++mrtstat.mrts_cache_cleanups;
+ if (mrtdebug & DEBUG_EXPIRE) {
+ log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n",
+ (u_long)ntohl(rt->mfc_origin.s_addr),
+ (u_long)ntohl(rt->mfc_mcastgrp.s_addr));
+ }
+
+ expire_mfc(rt);
}
- }
}
+
MFC_UNLOCK();
callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL);
@@ -1637,9 +1562,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
* can complete the SPT switch, regardless of the type
* of the iif (broadcast media, GRE tunnel, etc).
*/
- if (pim_assert && (vifi < numvifs) && viftable[vifi].v_ifp) {
- struct timeval now;
- u_long delta;
+ if (pim_assert_enabled && (vifi < numvifs) && viftable[vifi].v_ifp) {
if (ifp == &multicast_register_if)
pimstat.pims_rcv_registers_wrongiif++;
@@ -1653,11 +1576,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF)
return 0; /* WRONGVIF disabled: ignore the packet */
- GET_TIME(now);
-
- TV_DELTA(now, rt->mfc_last_assert, delta);
-
- if (delta > ASSERT_MSG_TIME) {
+ if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) {
struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
struct igmpmsg *im;
int hlen = ip->ip_hl << 2;
@@ -1668,8 +1587,6 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
if (mm == NULL)
return ENOBUFS;
- rt->mfc_last_assert = now;
-
im = mtod(mm, struct igmpmsg *);
im->im_msgtype = IGMPMSG_WRONGVIF;
im->im_mbz = 0;
@@ -1689,8 +1606,9 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
return 0;
}
+
/* If I sourced this packet, it counts as output, else it was input. */
- if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
+ if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) {
viftable[vifi].v_pkt_out++;
viftable[vifi].v_bytes_out += plen;
} else {
@@ -1723,7 +1641,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
struct bw_meter *x;
struct timeval now;
- GET_TIME(now);
+ microtime(&now);
MFC_LOCK_ASSERT();
for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next)
bw_meter_receive_packet(x, plen, &now);
@@ -1733,13 +1651,23 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
}
/*
- * check if a vif number is legal/ok. This is used by ip_output.
+ * Check if a vif number is legal/ok. This is used by in_mcast.c.
*/
static int
X_legal_vif_num(int vif)
{
- /* XXX unlocked, matter? */
- return (vif >= 0 && vif < numvifs);
+ int ret;
+
+ ret = 0;
+ if (vif < 0)
+ return (ret);
+
+ VIF_LOCK();
+ if (vif < numvifs)
+ ret = 1;
+ VIF_UNLOCK();
+
+ return (ret);
}
/*
@@ -1748,11 +1676,18 @@ X_legal_vif_num(int vif)
static u_long
X_ip_mcast_src(int vifi)
{
- /* XXX unlocked, matter? */
- if (vifi >= 0 && vifi < numvifs)
- return viftable[vifi].v_lcl_addr.s_addr;
- else
- return INADDR_ANY;
+ in_addr_t addr;
+
+ addr = INADDR_ANY;
+ if (vifi < 0)
+ return (addr);
+
+ VIF_LOCK();
+ if (vifi < numvifs)
+ addr = viftable[vifi].v_lcl_addr.s_addr;
+ VIF_UNLOCK();
+
+ return (addr);
}
static void
@@ -1807,167 +1742,29 @@ send_packet(struct vif *vifp, struct mbuf *m)
}
}
+/*
+ * Stubs for old RSVP socket shim implementation.
+ */
+
static int
-X_ip_rsvp_vif(struct socket *so, struct sockopt *sopt)
+X_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused)
{
- INIT_VNET_INET(curvnet);
- int error, vifi;
- if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
- return EOPNOTSUPP;
-
- error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
- if (error)
- return error;
-
- VIF_LOCK();
-
- if (vifi < 0 || vifi >= numvifs) { /* Error if vif is invalid */
- VIF_UNLOCK();
- return EADDRNOTAVAIL;
- }
-
- if (sopt->sopt_name == IP_RSVP_VIF_ON) {
- /* Check if socket is available. */
- if (viftable[vifi].v_rsvpd != NULL) {
- VIF_UNLOCK();
- return EADDRINUSE;
- }
-
- viftable[vifi].v_rsvpd = so;
- /* This may seem silly, but we need to be sure we don't over-increment
- * the RSVP counter, in case something slips up.
- */
- if (!viftable[vifi].v_rsvp_on) {
- viftable[vifi].v_rsvp_on = 1;
- V_rsvp_on++;
- }
- } else { /* must be VIF_OFF */
- /*
- * XXX as an additional consistency check, one could make sure
- * that viftable[vifi].v_rsvpd == so, otherwise passing so as
- * first parameter is pretty useless.
- */
- viftable[vifi].v_rsvpd = NULL;
- /*
- * This may seem silly, but we need to be sure we don't over-decrement
- * the RSVP counter, in case something slips up.
- */
- if (viftable[vifi].v_rsvp_on) {
- viftable[vifi].v_rsvp_on = 0;
- V_rsvp_on--;
- }
- }
- VIF_UNLOCK();
- return 0;
+ return (EOPNOTSUPP);
}
static void
-X_ip_rsvp_force_done(struct socket *so)
+X_ip_rsvp_force_done(struct socket *so __unused)
{
- INIT_VNET_INET(curvnet);
- int vifi;
- /* Don't bother if it is not the right type of socket. */
- if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
- return;
-
- VIF_LOCK();
-
- /* The socket may be attached to more than one vif...this
- * is perfectly legal.
- */
- for (vifi = 0; vifi < numvifs; vifi++) {
- if (viftable[vifi].v_rsvpd == so) {
- viftable[vifi].v_rsvpd = NULL;
- /* This may seem silly, but we need to be sure we don't
- * over-decrement the RSVP counter, in case something slips up.
- */
- if (viftable[vifi].v_rsvp_on) {
- viftable[vifi].v_rsvp_on = 0;
- V_rsvp_on--;
- }
- }
- }
-
- VIF_UNLOCK();
}
static void
-X_rsvp_input(struct mbuf *m, int off)
+X_rsvp_input(struct mbuf *m, int off __unused)
{
- INIT_VNET_INET(curvnet);
- int vifi;
- struct ip *ip = mtod(m, struct ip *);
- struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET };
- struct ifnet *ifp;
-
- if (rsvpdebug)
- printf("rsvp_input: rsvp_on %d\n", V_rsvp_on);
-
- /* Can still get packets with rsvp_on = 0 if there is a local member
- * of the group to which the RSVP packet is addressed. But in this
- * case we want to throw the packet away.
- */
- if (!V_rsvp_on) {
- m_freem(m);
- return;
- }
-
- if (rsvpdebug)
- printf("rsvp_input: check vifs\n");
-
-#ifdef DIAGNOSTIC
- M_ASSERTPKTHDR(m);
-#endif
-
- ifp = m->m_pkthdr.rcvif;
-
- VIF_LOCK();
- /* Find which vif the packet arrived on. */
- for (vifi = 0; vifi < numvifs; vifi++)
- if (viftable[vifi].v_ifp == ifp)
- break;
- if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) {
- /*
- * Drop the lock here to avoid holding it across rip_input.
- * This could make rsvpdebug printfs wrong. If you care,
- * record the state of stuff before dropping the lock.
- */
- VIF_UNLOCK();
- /*
- * If the old-style non-vif-associated socket is set,
- * then use it. Otherwise, drop packet since there
- * is no specific socket for this vif.
- */
- if (V_ip_rsvpd != NULL) {
- if (rsvpdebug)
- printf("rsvp_input: Sending packet up old-style socket\n");
- rip_input(m, off); /* xxx */
- } else {
- if (rsvpdebug && vifi == numvifs)
- printf("rsvp_input: Can't find vif for packet.\n");
- else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL)
- printf("rsvp_input: No socket defined for vif %d\n",vifi);
- m_freem(m);
- }
- return;
- }
- rsvp_src.sin_addr = ip->ip_src;
-
- if (rsvpdebug && m)
- printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n",
- m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
-
- if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) {
- if (rsvpdebug)
- printf("rsvp_input: Failed to append to socket\n");
- } else {
- if (rsvpdebug)
- printf("rsvp_input: send packet up\n");
- }
- VIF_UNLOCK();
+ if (!V_rsvp_on)
+ m_freem(m);
}
/*
@@ -2033,7 +1830,7 @@ add_bw_upcall(struct bw_upcall *req)
* Find if we have already same bw_meter entry
*/
MFC_LOCK();
- mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr);
+ mfc = mfc_find(&req->bu_src, &req->bu_dst);
if (mfc == NULL) {
MFC_UNLOCK();
return EADDRNOTAVAIL;
@@ -2058,7 +1855,7 @@ add_bw_upcall(struct bw_upcall *req)
/* Set the new bw_meter entry */
x->bm_threshold.b_time = req->bu_threshold.b_time;
- GET_TIME(now);
+ microtime(&now);
x->bm_start_time = now;
x->bm_threshold.b_packets = req->bu_threshold.b_packets;
x->bm_threshold.b_bytes = req->bu_threshold.b_bytes;
@@ -2103,8 +1900,9 @@ del_bw_upcall(struct bw_upcall *req)
return EOPNOTSUPP;
MFC_LOCK();
+
/* Find the corresponding MFC entry */
- mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr);
+ mfc = mfc_find(&req->bu_src, &req->bu_dst);
if (mfc == NULL) {
MFC_UNLOCK();
return EADDRNOTAVAIL;
@@ -2446,7 +2244,7 @@ bw_meter_process()
int i;
struct timeval now, process_endtime;
- GET_TIME(now);
+ microtime(&now);
if (last_tv_sec == now.tv_sec)
return; /* nothing to do */
@@ -2572,7 +2370,7 @@ pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
* rendezvous point was unspecified, and we were told not to.
*/
if (pim_squelch_wholepkt != 0 && (mrt_api_config & MRT_MFC_RP) &&
- (rt->mfc_rp.s_addr == INADDR_ANY))
+ in_nullhost(rt->mfc_rp))
return 0;
mb_copy = pim_register_prepare(ip, m);
@@ -2589,8 +2387,7 @@ pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
mm = m_pullup(mm, sizeof(struct ip));
if (mm != NULL) {
ip = mtod(mm, struct ip *);
- if ((mrt_api_config & MRT_MFC_RP) &&
- (rt->mfc_rp.s_addr != INADDR_ANY)) {
+ if ((mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) {
pim_register_send_rp(ip, vifp, mm, rt);
} else {
pim_register_send_upcall(ip, vifp, mm, rt);
@@ -2723,7 +2520,7 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
VIF_LOCK_ASSERT();
- if ((vifi >= numvifs) || (viftable[vifi].v_lcl_addr.s_addr == 0)) {
+ if ((vifi >= numvifs) || in_nullhost(viftable[vifi].v_lcl_addr)) {
m_freem(mb_copy);
return EADDRNOTAVAIL; /* The iif vif is invalid */
}
@@ -2779,7 +2576,7 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
}
/*
- * pim_encapcheck() is called by the encap[46]_input() path at runtime to
+ * pim_encapcheck() is called by the encap4_input() path at runtime to
* determine if a packet is for PIM; allowing PIM to be dynamically loaded
* into the kernel.
*/
@@ -3037,10 +2834,37 @@ pim_input_to_daemon:
return;
}
-/*
- * XXX: This is common code for dealing with initialization for both
- * the IPv4 and IPv6 multicast forwarding paths. It could do with cleanup.
- */
+static int
+sysctl_mfctable(SYSCTL_HANDLER_ARGS)
+{
+ struct mfc *rt;
+ int error, i;
+
+ if (req->newptr)
+ return (EPERM);
+ if (mfchashtbl == NULL) /* XXX unlocked */
+ return (0);
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error)
+ return (error);
+
+ MFC_LOCK();
+ for (i = 0; i < mfchashsize; i++) {
+ LIST_FOREACH(rt, &mfchashtbl[i], mfc_hash) {
+ error = SYSCTL_OUT(req, rt, sizeof(struct mfc));
+ if (error)
+ goto out_locked;
+ }
+ }
+out_locked:
+ MFC_UNLOCK();
+ return (error);
+}
+
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, sysctl_mfctable,
+ "IPv4 Multicast Forwarding Table (struct *mfc[mfchashsize], "
+ "netinet/ip_mroute.h)");
+
static int
ip_mroute_modevent(module_t mod, int type, void *unused)
{
@@ -3051,9 +2875,20 @@ ip_mroute_modevent(module_t mod, int type, void *unused)
MROUTER_LOCK_INIT();
MFC_LOCK_INIT();
VIF_LOCK_INIT();
- ip_mrouter_reset();
+
+ mfchashsize = MFCHASHSIZE;
+ if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) &&
+ !powerof2(mfchashsize)) {
+ printf("WARNING: %s not a power of 2; using default\n",
+ "net.inet.ip.mfchashsize");
+ mfchashsize = MFCHASHSIZE;
+ }
+ MALLOC(nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
+
+ pim_squelch_wholepkt = 0;
TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
&pim_squelch_wholepkt);
+ ip_mrouter_reset();
pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
pim_encapcheck, &in_pim_protosw, NULL);
@@ -3065,36 +2900,12 @@ ip_mroute_modevent(module_t mod, int type, void *unused)
return (EINVAL);
}
-#ifdef INET6
- pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM,
- pim_encapcheck, (struct protosw *)&in6_pim_protosw, NULL);
- if (pim6_encap_cookie == NULL) {
- printf("ip_mroute: unable to attach pim6 encap\n");
- if (pim_encap_cookie) {
- encap_detach(pim_encap_cookie);
- pim_encap_cookie = NULL;
- }
- VIF_LOCK_DESTROY();
- MFC_LOCK_DESTROY();
- MROUTER_LOCK_DESTROY();
- return (EINVAL);
- }
-#endif
-
ip_mcast_src = X_ip_mcast_src;
ip_mforward = X_ip_mforward;
ip_mrouter_done = X_ip_mrouter_done;
ip_mrouter_get = X_ip_mrouter_get;
ip_mrouter_set = X_ip_mrouter_set;
-#ifdef INET6
- ip6_mforward = X_ip6_mforward;
- ip6_mrouter_done = X_ip6_mrouter_done;
- ip6_mrouter_get = X_ip6_mrouter_get;
- ip6_mrouter_set = X_ip6_mrouter_set;
- mrt6_ioctl = X_mrt6_ioctl;
-#endif
-
ip_rsvp_force_done = X_ip_rsvp_force_done;
ip_rsvp_vif = X_ip_rsvp_vif;
@@ -3112,31 +2923,18 @@ ip_mroute_modevent(module_t mod, int type, void *unused)
* just loaded and then unloaded w/o starting up a user
* process we still need to cleanup.
*/
- if (V_ip_mrouter
-#ifdef INET6
- || ip6_mrouter
-#endif
- )
- return EINVAL;
-
-#ifdef INET6
- if (pim6_encap_cookie) {
- encap_detach(pim6_encap_cookie);
- pim6_encap_cookie = NULL;
- }
- X_ip6_mrouter_done();
- ip6_mforward = NULL;
- ip6_mrouter_done = NULL;
- ip6_mrouter_get = NULL;
- ip6_mrouter_set = NULL;
- mrt6_ioctl = NULL;
-#endif
+ if (V_ip_mrouter != NULL)
+ return (EINVAL);
if (pim_encap_cookie) {
encap_detach(pim_encap_cookie);
pim_encap_cookie = NULL;
}
X_ip_mrouter_done();
+
+ FREE(nexpire, M_MRTABLE);
+ nexpire = NULL;
+
ip_mcast_src = NULL;
ip_mforward = NULL;
ip_mrouter_done = NULL;
@@ -3166,4 +2964,5 @@ static moduledata_t ip_mroutemod = {
ip_mroute_modevent,
0
};
+
DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h
index 4043e44..a167965 100644
--- a/sys/netinet/ip_mroute.h
+++ b/sys/netinet/ip_mroute.h
@@ -70,9 +70,6 @@
#define MRT_ADD_BW_UPCALL 111 /* create bandwidth monitor */
#define MRT_DEL_BW_UPCALL 112 /* delete bandwidth monitor */
-
-#define GET_TIME(t) microtime(&t)
-
/*
* Types and macros for handling bitmaps with one bit per virtual interface.
*/
@@ -253,8 +250,6 @@ struct sioc_vif_req {
struct vif {
u_char v_flags; /* VIFF_ flags defined above */
u_char v_threshold; /* min ttl required to forward on vif*/
- u_int v_rate_limit; /* ignored; kept for compatibility */
- struct tbf *v_tbf; /* ignored; kept for compatibility */
struct in_addr v_lcl_addr; /* local interface address */
struct in_addr v_rmt_addr; /* remote address (tunnels only) */
struct ifnet *v_ifp; /* pointer to interface */
@@ -263,16 +258,13 @@ struct vif {
u_long v_bytes_in; /* # bytes in on interface */
u_long v_bytes_out; /* # bytes out on interface */
struct route v_route; /* cached route */
- u_int v_rsvp_on; /* RSVP listening on this vif */
- struct socket *v_rsvpd; /* RSVP daemon socket */
};
/*
* The kernel's multicast forwarding cache entry structure
- * (A field for the type of service (mfc_tos) is to be added
- * at a future point)
*/
struct mfc {
+ LIST_ENTRY(mfc) mfc_hash;
struct in_addr mfc_origin; /* IP origin of mcasts */
struct in_addr mfc_mcastgrp; /* multicast group associated*/
vifi_t mfc_parent; /* incoming vif */
@@ -282,11 +274,11 @@ struct mfc {
u_long mfc_wrong_if; /* wrong if for src-grp */
int mfc_expire; /* time to clean entry up */
struct timeval mfc_last_assert; /* last time I sent an assert*/
- struct rtdetq *mfc_stall; /* q of packets awaiting mfc */
- struct mfc *mfc_next; /* next mfc entry */
uint8_t mfc_flags[MAXVIFS]; /* the MRT_MFC_FLAGS_* flags */
struct in_addr mfc_rp; /* the RP address */
struct bw_meter *mfc_bw_meter; /* list of bandwidth meters */
+ u_long mfc_nstall; /* # of packets awaiting mfc */
+ TAILQ_HEAD(, rtdetq) mfc_stall; /* q of packets awaiting mfc */
};
/*
@@ -311,19 +303,11 @@ struct igmpmsg {
* Argument structure used for pkt info. while upcall is made
*/
struct rtdetq {
+ TAILQ_ENTRY(rtdetq) rte_link;
struct mbuf *m; /* A copy of the packet */
struct ifnet *ifp; /* Interface pkt came in on */
vifi_t xmt_vif; /* Saved copy of imo_multicast_vif */
- struct rtdetq *next; /* Next in list of packets */
};
-
-#define MFCTBLSIZ 256
-#if (MFCTBLSIZ & (MFCTBLSIZ - 1)) == 0 /* from sys:route.h */
-#define MFCHASHMOD(h) ((h) & (MFCTBLSIZ - 1))
-#else
-#define MFCHASHMOD(h) ((h) % MFCTBLSIZ)
-#endif
-
#define MAX_UPQ 4 /* max. no of pkts in upcall Q */
/*
diff --git a/sys/netinet6/ip6_mroute.c b/sys/netinet6/ip6_mroute.c
index 18e7aca..29201d6 100644
--- a/sys/netinet6/ip6_mroute.c
+++ b/sys/netinet6/ip6_mroute.c
@@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/module.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
@@ -114,6 +115,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_var.h>
#include <netinet/icmp6.h>
#include <netinet/vinet.h>
+#include <netinet/ip_encap.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -130,20 +132,18 @@ static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry");
/* XXX: this is a very common idiom; move to <sys/mbuf.h> ? */
#define M_HASCL(m) ((m)->m_flags & M_EXT)
-static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *);
-static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
-
-static void pim6_init(void);
-static int set_pim6(int *);
-static int socket_send __P((struct socket *, struct mbuf *,
- struct sockaddr_in6 *));
-static int register_send __P((struct ip6_hdr *, struct mif6 *,
- struct mbuf *));
+static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *);
+static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
+static void pim6_init(void);
+static int register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
+static int set_pim6(int *);
+static int socket_send(struct socket *, struct mbuf *,
+ struct sockaddr_in6 *);
extern struct domain inet6domain;
-/* XXX: referenced from ip_mroute.c for dynamically loading this code. */
-struct ip6protosw in6_pim_protosw = {
+static const struct encaptab *pim6_encap_cookie;
+static const struct ip6protosw in6_pim_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_PIM,
@@ -154,6 +154,7 @@ struct ip6protosw in6_pim_protosw = {
.pr_init = pim6_init,
.pr_usrreqs = &rip6_usrreqs
};
+static int pim6_encapcheck(const struct mbuf *, int, int, void *);
#ifdef VIMAGE_GLOBALS
static int ip6_mrouter_ver;
@@ -171,18 +172,48 @@ SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW,
#define NO_RTE_FOUND 0x1
#define RTE_FOUND 0x2
+static struct mtx mrouter6_mtx;
+#define MROUTER6_LOCK() mtx_lock(&mrouter6_mtx)
+#define MROUTER6_UNLOCK() mtx_unlock(&mrouter6_mtx)
+#define MROUTER6_LOCK_ASSERT() do { \
+ mtx_assert(&mrouter6_mtx, MA_OWNED); \
+ NET_ASSERT_GIANT(); \
+} while (0)
+#define MROUTER6_LOCK_INIT() \
+ mtx_init(&mrouter6_mtx, "IPv6 multicast forwarding", NULL, MTX_DEF)
+#define MROUTER6_LOCK_DESTROY() mtx_destroy(&mrouter6_mtx)
+
static struct mf6c *mf6ctable[MF6CTBLSIZ];
SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mf6ctable, CTLFLAG_RD,
&mf6ctable, sizeof(mf6ctable), "S,*mf6ctable[MF6CTBLSIZ]",
- "Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], "
+ "IPv6 Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], "
"netinet6/ip6_mroute.h)");
+static struct mtx mfc6_mtx;
+#define MFC6_LOCK() mtx_lock(&mfc6_mtx)
+#define MFC6_UNLOCK() mtx_unlock(&mfc6_mtx)
+#define MFC6_LOCK_ASSERT() do { \
+ mtx_assert(&mfc6_mtx, MA_OWNED); \
+ NET_ASSERT_GIANT(); \
+} while (0)
+#define MFC6_LOCK_INIT() \
+ mtx_init(&mfc6_mtx, "IPv6 multicast forwarding cache", NULL, MTX_DEF)
+#define MFC6_LOCK_DESTROY() mtx_destroy(&mfc6_mtx)
+
static u_char n6expire[MF6CTBLSIZ];
static struct mif6 mif6table[MAXMIFS];
SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD,
- &mif6table, sizeof(mif6table), "S,vif[MAXMIFS]",
- "Multicast Interfaces (struct mif[MAXMIFS], netinet6/ip6_mroute.h)");
+ &mif6table, sizeof(mif6table), "S,mif6[MAXMIFS]",
+ "IPv6 Multicast Interfaces (struct mif6[MAXMIFS], netinet6/ip6_mroute.h)");
+
+static struct mtx mif6_mtx;
+#define MIF6_LOCK() mtx_lock(&mif6_mtx)
+#define MIF6_UNLOCK() mtx_unlock(&mif6_mtx)
+#define MIF6_LOCK_ASSERT() mtx_assert(&mif6_mtx, MA_OWNED)
+#define MIF6_LOCK_INIT() \
+ mtx_init(&mif6_mtx, "IPv6 multicast interfaces", NULL, MTX_DEF)
+#define MIF6_LOCK_DESTROY() mtx_destroy(&mif6_mtx)
#ifdef MRT6DEBUG
#ifdef VIMAGE_GLOBALS
@@ -200,11 +231,9 @@ static void expire_upcalls(void *);
#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
#define UPCALL_EXPIRE 6 /* number of timeouts */
-#ifdef INET
-#ifdef MROUTING
-extern struct socket *ip_mrouter;
-#endif
-#endif
+/*
+ * XXX TODO: maintain a count to if_allmulti() calls in struct ifnet.
+ */
/*
* 'Interfaces' associated with decapsulator (so we can tell
@@ -298,21 +327,22 @@ static u_long upcall_data[UPCALL_MAX + 1];
static void collate();
#endif /* UPCALL_TIMING */
-static int get_sg_cnt(struct sioc_sg_req6 *);
-static int get_mif6_cnt(struct sioc_mif_req6 *);
static int ip6_mrouter_init(struct socket *, int, int);
-static int add_m6if(struct mif6ctl *);
-static int del_m6if(mifi_t *);
static int add_m6fc(struct mf6cctl *);
+static int add_m6if(struct mif6ctl *);
static int del_m6fc(struct mf6cctl *);
+static int del_m6if(mifi_t *);
+static int del_m6if_locked(mifi_t *);
+static int get_mif6_cnt(struct sioc_mif_req6 *);
+static int get_sg_cnt(struct sioc_sg_req6 *);
static struct callout expire_upcalls_ch;
-int X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m);
+int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
int X_ip6_mrouter_done(void);
-int X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt);
-int X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt);
-int X_mrt6_ioctl(int cmd, caddr_t data);
+int X_ip6_mrouter_set(struct socket *, struct sockopt *);
+int X_ip6_mrouter_get(struct socket *, struct sockopt *);
+int X_mrt6_ioctl(int, caddr_t);
static void
pim6_init(void)
@@ -419,14 +449,24 @@ X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt)
int
X_mrt6_ioctl(int cmd, caddr_t data)
{
+ int ret;
+
+ ret = EINVAL;
+
switch (cmd) {
case SIOCGETSGCNT_IN6:
- return (get_sg_cnt((struct sioc_sg_req6 *)data));
+ ret = get_sg_cnt((struct sioc_sg_req6 *)data);
+ break;
+
case SIOCGETMIFCNT_IN6:
- return (get_mif6_cnt((struct sioc_mif_req6 *)data));
+ ret = get_mif6_cnt((struct sioc_mif_req6 *)data);
+ break;
+
default:
- return (EINVAL);
+ break;
}
+
+ return (ret);
}
/*
@@ -436,22 +476,24 @@ static int
get_sg_cnt(struct sioc_sg_req6 *req)
{
struct mf6c *rt;
- int s;
+ int ret;
+
+ ret = 0;
+
+ MFC6_LOCK();
- s = splnet();
MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt);
- splx(s);
- if (rt != NULL) {
+ if (rt == NULL) {
+ ret = ESRCH;
+ } else {
req->pktcnt = rt->mf6c_pkt_cnt;
req->bytecnt = rt->mf6c_byte_cnt;
req->wrong_if = rt->mf6c_wrong_if;
- } else
- return (ESRCH);
-#if 0
- req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
-#endif
+ }
- return (0);
+ MFC6_UNLOCK();
+
+ return (ret);
}
/*
@@ -460,17 +502,26 @@ get_sg_cnt(struct sioc_sg_req6 *req)
static int
get_mif6_cnt(struct sioc_mif_req6 *req)
{
- mifi_t mifi = req->mifi;
+ mifi_t mifi;
+ int ret;
- if (mifi >= nummifs)
- return (EINVAL);
+ ret = 0;
+ mifi = req->mifi;
- req->icount = mif6table[mifi].m6_pkt_in;
- req->ocount = mif6table[mifi].m6_pkt_out;
- req->ibytes = mif6table[mifi].m6_bytes_in;
- req->obytes = mif6table[mifi].m6_bytes_out;
+ MIF6_LOCK();
- return (0);
+ if (mifi >= nummifs) {
+ ret = EINVAL;
+ } else {
+ req->icount = mif6table[mifi].m6_pkt_in;
+ req->ocount = mif6table[mifi].m6_pkt_out;
+ req->ibytes = mif6table[mifi].m6_bytes_in;
+ req->obytes = mif6table[mifi].m6_bytes_out;
+ }
+
+ MIF6_UNLOCK();
+
+ return (ret);
}
static int
@@ -507,8 +558,12 @@ ip6_mrouter_init(struct socket *so, int v, int cmd)
if (v != 1)
return (ENOPROTOOPT);
- if (ip6_mrouter != NULL)
+ MROUTER6_LOCK();
+
+ if (ip6_mrouter != NULL) {
+ MROUTER6_UNLOCK();
return (EADDRINUSE);
+ }
ip6_mrouter = so;
V_ip6_mrouter_ver = cmd;
@@ -522,6 +577,8 @@ ip6_mrouter_init(struct socket *so, int v, int cmd)
callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
expire_upcalls, NULL);
+ MROUTER6_UNLOCK();
+
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_DEBUG, "ip6_mrouter_init\n");
@@ -531,7 +588,7 @@ ip6_mrouter_init(struct socket *so, int v, int cmd)
}
/*
- * Disable multicast routing
+ * Disable IPv6 multicast forwarding.
*/
int
X_ip6_mrouter_done(void)
@@ -541,31 +598,22 @@ X_ip6_mrouter_done(void)
int i;
struct mf6c *rt;
struct rtdetq *rte;
- int s;
- s = splnet();
+ MROUTER6_LOCK();
+
+ if (ip6_mrouter == NULL) {
+ MROUTER6_UNLOCK();
+ return (EINVAL);
+ }
/*
* For each phyint in use, disable promiscuous reception of all IPv6
* multicasts.
*/
-#ifdef INET
-#ifdef MROUTING
- /*
- * If there is still IPv4 multicast routing daemon,
- * we remain interfaces to receive all muliticasted packets.
- * XXX: there may be an interface in which the IPv4 multicast
- * daemon is not interested...
- */
- if (!V_ip_mrouter)
-#endif
-#endif
- {
- for (mifi = 0; mifi < nummifs; mifi++) {
- if (mif6table[mifi].m6_ifp &&
- !(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
- if_allmulti(mif6table[mifi].m6_ifp, 0);
- }
+ for (mifi = 0; mifi < nummifs; mifi++) {
+ if (mif6table[mifi].m6_ifp &&
+ !(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
+ if_allmulti(mif6table[mifi].m6_ifp, 0);
}
}
bzero((caddr_t)mif6table, sizeof(mif6table));
@@ -578,6 +626,7 @@ X_ip6_mrouter_done(void)
/*
* Free all multicast forwarding cache entries.
*/
+ MFC6_LOCK();
for (i = 0; i < MF6CTBLSIZ; i++) {
rt = mf6ctable[i];
while (rt) {
@@ -595,8 +644,8 @@ X_ip6_mrouter_done(void)
free(frt, M_MRTABLE6);
}
}
-
bzero((caddr_t)mf6ctable, sizeof(mf6ctable));
+ MFC6_UNLOCK();
/*
* Reset register interface
@@ -611,7 +660,7 @@ X_ip6_mrouter_done(void)
ip6_mrouter = NULL;
V_ip6_mrouter_ver = 0;
- splx(s);
+ MROUTER6_UNLOCK();
#ifdef MRT6DEBUG
if (V_mrt6debug)
@@ -632,15 +681,24 @@ add_m6if(struct mif6ctl *mifcp)
INIT_VNET_NET(curvnet);
struct mif6 *mifp;
struct ifnet *ifp;
- int error, s;
+ int error;
- if (mifcp->mif6c_mifi >= MAXMIFS)
+ MIF6_LOCK();
+
+ if (mifcp->mif6c_mifi >= MAXMIFS) {
+ MIF6_UNLOCK();
return (EINVAL);
+ }
mifp = mif6table + mifcp->mif6c_mifi;
- if (mifp->m6_ifp)
+ if (mifp->m6_ifp != NULL) {
+ MIF6_UNLOCK();
return (EADDRINUSE); /* XXX: is it appropriate? */
- if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > V_if_index)
+ }
+ if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > V_if_index) {
+ MIF6_UNLOCK();
return (ENXIO);
+ }
+
ifp = ifnet_byindex(mifcp->mif6c_pifi);
if (mifcp->mif6c_flags & MIFF_REGISTER) {
@@ -661,21 +719,20 @@ add_m6if(struct mif6ctl *mifcp)
} else {
ifp = multicast_register_if6;
}
-
- } /* if REGISTER */
- else {
+ } else {
/* Make sure the interface supports multicast */
- if ((ifp->if_flags & IFF_MULTICAST) == 0)
+ if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+ MIF6_UNLOCK();
return (EOPNOTSUPP);
+ }
- s = splnet();
error = if_allmulti(ifp, 1);
- splx(s);
- if (error)
+ if (error) {
+ MIF6_UNLOCK();
return (error);
+ }
}
- s = splnet();
mifp->m6_flags = mifcp->mif6c_flags;
mifp->m6_ifp = ifp;
@@ -684,12 +741,14 @@ add_m6if(struct mif6ctl *mifcp)
mifp->m6_pkt_out = 0;
mifp->m6_bytes_in = 0;
mifp->m6_bytes_out = 0;
- splx(s);
+ bzero(&mifp->m6_route, sizeof(mifp->m6_route));
/* Adjust nummifs up if the mifi is higher than nummifs */
if (nummifs <= mifcp->mif6c_mifi)
nummifs = mifcp->mif6c_mifi + 1;
+ MIF6_UNLOCK();
+
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_DEBUG,
@@ -705,27 +764,22 @@ add_m6if(struct mif6ctl *mifcp)
* Delete a mif from the mif table
*/
static int
-del_m6if(mifi_t *mifip)
+del_m6if_locked(mifi_t *mifip)
{
struct mif6 *mifp = mif6table + *mifip;
mifi_t mifi;
struct ifnet *ifp;
- int s;
+
+ MIF6_LOCK_ASSERT();
if (*mifip >= nummifs)
return (EINVAL);
if (mifp->m6_ifp == NULL)
return (EINVAL);
- s = splnet();
-
if (!(mifp->m6_flags & MIFF_REGISTER)) {
- /*
- * XXX: what if there is yet IPv4 multicast daemon
- * using the interface?
- */
+ /* XXX: TODO: Maintain an ALLMULTI refcount in struct ifnet. */
ifp = mifp->m6_ifp;
-
if_allmulti(ifp, 0);
} else {
if (reg_mif_num != (mifi_t)-1 &&
@@ -745,8 +799,6 @@ del_m6if(mifi_t *mifip)
break;
nummifs = mifi;
- splx(s);
-
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_DEBUG, "del_m6if %d, nummifs %d\n", *mifip, nummifs);
@@ -755,6 +807,18 @@ del_m6if(mifi_t *mifip)
return (0);
}
+static int
+del_m6if(mifi_t *mifip)
+{
+ int cc;
+
+ MIF6_LOCK();
+ cc = del_m6if_locked(mifip);
+ MIF6_UNLOCK();
+
+ return (cc);
+}
+
/*
* Add an mfc entry
*/
@@ -765,9 +829,10 @@ add_m6fc(struct mf6cctl *mfccp)
u_long hash;
struct rtdetq *rte;
u_short nstl;
- int s;
char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
+ MFC6_LOCK();
+
MF6CFIND(mfccp->mf6cc_origin.sin6_addr,
mfccp->mf6cc_mcastgrp.sin6_addr, rt);
@@ -783,17 +848,16 @@ add_m6fc(struct mf6cctl *mfccp)
}
#endif
- s = splnet();
rt->mf6c_parent = mfccp->mf6cc_parent;
rt->mf6c_ifset = mfccp->mf6cc_ifset;
- splx(s);
+
+ MFC6_UNLOCK();
return (0);
}
/*
* Find the entry for which the upcall was made and update
*/
- s = splnet();
hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr,
mfccp->mf6cc_mcastgrp.sin6_addr);
for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) {
@@ -891,7 +955,7 @@ add_m6fc(struct mf6cctl *mfccp)
rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6,
M_NOWAIT);
if (rt == NULL) {
- splx(s);
+ MFC6_UNLOCK();
return (ENOBUFS);
}
@@ -912,7 +976,8 @@ add_m6fc(struct mf6cctl *mfccp)
mf6ctable[hash] = rt;
}
}
- splx(s);
+
+ MFC6_UNLOCK();
return (0);
}
@@ -953,7 +1018,6 @@ del_m6fc(struct mf6cctl *mfccp)
struct mf6c *rt;
struct mf6c **nptr;
u_long hash;
- int s;
origin = mfccp->mf6cc_origin;
mcastgrp = mfccp->mf6cc_mcastgrp;
@@ -968,7 +1032,7 @@ del_m6fc(struct mf6cctl *mfccp)
}
#endif
- s = splnet();
+ MFC6_LOCK();
nptr = &mf6ctable[hash];
while ((rt = *nptr) != NULL) {
@@ -982,14 +1046,14 @@ del_m6fc(struct mf6cctl *mfccp)
nptr = &rt->mf6c_next;
}
if (rt == NULL) {
- splx(s);
+ MFC6_UNLOCK();
return (EADDRNOTAVAIL);
}
*nptr = rt->mf6c_next;
free(rt, M_MRTABLE6);
- splx(s);
+ MFC6_UNLOCK();
return (0);
}
@@ -1035,7 +1099,6 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
struct mf6c *rt;
struct mif6 *mifp;
struct mbuf *mm;
- int s;
mifi_t mifi;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
@@ -1078,15 +1141,16 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
return (0);
}
+ MFC6_LOCK();
+
/*
* Determine forwarding mifs from the forwarding cache table
*/
- s = splnet();
MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt);
/* Entry exists, so forward if necessary */
if (rt) {
- splx(s);
+ MFC6_UNLOCK();
return (ip6_mdq(m, ifp, rt));
} else {
/*
@@ -1120,7 +1184,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6,
M_NOWAIT);
if (rte == NULL) {
- splx(s);
+ MFC6_UNLOCK();
return (ENOBUFS);
}
mb0 = m_copy(m, 0, M_COPYALL);
@@ -1133,7 +1197,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
if (mb0 == NULL) {
free(rte, M_MRTABLE6);
- splx(s);
+ MFC6_UNLOCK();
return (ENOBUFS);
}
@@ -1160,7 +1224,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
if (rt == NULL) {
free(rte, M_MRTABLE6);
m_freem(mb0);
- splx(s);
+ MFC6_UNLOCK();
return (ENOBUFS);
}
/*
@@ -1173,7 +1237,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
free(rte, M_MRTABLE6);
m_freem(mb0);
free(rt, M_MRTABLE6);
- splx(s);
+ MFC6_UNLOCK();
return (ENOBUFS);
}
@@ -1203,7 +1267,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
free(rte, M_MRTABLE6);
m_freem(mb0);
free(rt, M_MRTABLE6);
- splx(s);
+ MFC6_UNLOCK();
return (EINVAL);
}
@@ -1236,7 +1300,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
free(rte, M_MRTABLE6);
m_freem(mb0);
free(rt, M_MRTABLE6);
- splx(s);
+ MFC6_UNLOCK();
return (ENOBUFS);
}
@@ -1269,7 +1333,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
mrt6stat.mrt6s_upq_ovflw++;
free(rte, M_MRTABLE6);
m_freem(mb0);
- splx(s);
+ MFC6_UNLOCK();
return (0);
}
@@ -1284,7 +1348,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
rte->t = tp;
#endif /* UPCALL_TIMING */
- splx(s);
+ MFC6_UNLOCK();
return (0);
}
@@ -1300,9 +1364,8 @@ expire_upcalls(void *unused)
struct rtdetq *rte;
struct mf6c *mfc, **nptr;
int i;
- int s;
- s = splnet();
+ MFC6_LOCK();
for (i = 0; i < MF6CTBLSIZ; i++) {
if (n6expire[i] == 0)
continue;
@@ -1346,7 +1409,7 @@ expire_upcalls(void *unused)
}
}
}
- splx(s);
+ MFC6_UNLOCK();
callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
expire_upcalls, NULL);
}
@@ -1540,8 +1603,6 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
struct mbuf *mb_copy;
struct ifnet *ifp = mifp->m6_ifp;
int error = 0;
- int s = splnet(); /* needs to protect static "ro" below. */
- static struct route_in6 ro;
struct in6_multi *in6m;
struct sockaddr_in6 *dst6;
u_long linkmtu;
@@ -1556,7 +1617,6 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
(M_HASCL(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr)))
mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr));
if (mb_copy == NULL) {
- splx(s);
return;
}
/* set MCAST flag to the outgoing packet */
@@ -1576,7 +1636,7 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
/* XXX: ip6_output will override ip6->ip6_hlim */
im6o.im6o_multicast_hlim = ip6->ip6_hlim;
im6o.im6o_multicast_loop = 1;
- error = ip6_output(mb_copy, NULL, &ro,
+ error = ip6_output(mb_copy, NULL, &mifp->m6_route,
IPV6_FORWARDING, &im6o, NULL, NULL);
#ifdef MRT6DEBUG
@@ -1584,7 +1644,6 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
mifp - mif6table, error);
#endif
- splx(s);
return;
}
@@ -1592,13 +1651,13 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
* If we belong to the destination multicast group
* on the outgoing interface, loop back a copy.
*/
- dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
+ dst6 = &mifp->m6_route.ro_dst;
IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
if (in6m != NULL) {
dst6->sin6_len = sizeof(struct sockaddr_in6);
dst6->sin6_family = AF_INET6;
dst6->sin6_addr = ip6->ip6_dst;
- ip6_mloopback(ifp, m, (struct sockaddr_in6 *)&ro.ro_dst);
+ ip6_mloopback(ifp, m, &mifp->m6_route.ro_dst);
}
/*
* Put the packet into the sending queue of the outgoing interface
@@ -1614,7 +1673,7 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
* we need no ND for a multicast forwarded packet...right?
*/
error = (*ifp->if_output)(ifp, mb_copy,
- (struct sockaddr *)&ro.ro_dst, NULL);
+ (struct sockaddr *)&mifp->m6_route.ro_dst, NULL);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_XMIT)
log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
@@ -1645,8 +1704,6 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
m_freem(mb_copy); /* simply discard the packet */
}
}
-
- splx(s);
}
static int
@@ -1715,6 +1772,24 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
}
/*
+ * pim6_encapcheck() is called by the encap6_input() path at runtime to
+ * determine if a packet is for PIM; allowing PIM to be dynamically loaded
+ * into the kernel.
+ */
+static int
+pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+{
+
+#ifdef DIAGNOSTIC
+ KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
+#endif
+ if (proto != IPPROTO_PIM)
+ return 0; /* not for us; reject the datagram. */
+
+ return 64; /* claim the datagram. */
+}
+
+/*
* PIM sparse mode hook
* Receives the pim control messages, and passes them up to the listening
* socket, using rip6_input.
@@ -1951,3 +2026,66 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
rip6_input(&m, offp, proto);
return (IPPROTO_DONE);
}
+
+static int
+ip6_mroute_modevent(module_t mod, int type, void *unused)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ MROUTER6_LOCK_INIT();
+ MFC6_LOCK_INIT();
+ MIF6_LOCK_INIT();
+
+ pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM,
+ pim6_encapcheck,
+ (const struct protosw *)&in6_pim_protosw, NULL);
+ if (pim6_encap_cookie == NULL) {
+ printf("ip6_mroute: unable to attach pim6 encap\n");
+ MIF6_LOCK_DESTROY();
+ MFC6_LOCK_DESTROY();
+ MROUTER6_LOCK_DESTROY();
+ return (EINVAL);
+ }
+
+ ip6_mforward = X_ip6_mforward;
+ ip6_mrouter_done = X_ip6_mrouter_done;
+ ip6_mrouter_get = X_ip6_mrouter_get;
+ ip6_mrouter_set = X_ip6_mrouter_set;
+ mrt6_ioctl = X_mrt6_ioctl;
+ break;
+
+ case MOD_UNLOAD:
+ if (ip6_mrouter != NULL)
+ return EINVAL;
+
+ if (pim6_encap_cookie) {
+ encap_detach(pim6_encap_cookie);
+ pim6_encap_cookie = NULL;
+ }
+ X_ip6_mrouter_done();
+ ip6_mforward = NULL;
+ ip6_mrouter_done = NULL;
+ ip6_mrouter_get = NULL;
+ ip6_mrouter_set = NULL;
+ mrt6_ioctl = NULL;
+
+ MIF6_LOCK_DESTROY();
+ MFC6_LOCK_DESTROY();
+ MROUTER6_LOCK_DESTROY();
+ break;
+
+ default:
+ return (EOPNOTSUPP);
+ }
+
+ return (0);
+}
+
+static moduledata_t ip6_mroutemod = {
+ "ip6_mroute",
+ ip6_mroute_modevent,
+ 0
+};
+
+DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/sys/netinet6/ip6_mroute.h b/sys/netinet6/ip6_mroute.h
index 0e35e62..a0a5e4a 100644
--- a/sys/netinet6/ip6_mroute.h
+++ b/sys/netinet6/ip6_mroute.h
@@ -212,7 +212,7 @@ struct mif6 {
u_quad_t m6_pkt_out; /* # pkts out on interface */
u_quad_t m6_bytes_in; /* # bytes in on interface */
u_quad_t m6_bytes_out; /* # bytes out on interface */
- struct route_in6 m6_route;/* cached route if this is a tunnel */
+ struct route_in6 m6_route; /* cached route */
#ifdef notyet
u_int m6_rsvp_on; /* RSVP listening on this vif */
struct socket *m6_rsvpd; /* RSVP daemon socket */
diff --git a/usr.bin/netstat/main.c b/usr.bin/netstat/main.c
index 6eb313a..8b25ff5 100644
--- a/usr.bin/netstat/main.c
+++ b/usr.bin/netstat/main.c
@@ -82,8 +82,8 @@ static struct nlist nl[] = {
{ .n_name = "_rt_tables"},
#define N_MRTSTAT 3
{ .n_name = "_mrtstat" },
-#define N_MFCTABLE 4
- { .n_name = "_mfctable" },
+#define N_MFCHASHTBL 4
+ { .n_name = "_mfchashtbl" },
#define N_VIFTABLE 5
{ .n_name = "_viftable" },
#define N_IPX 6
@@ -182,6 +182,8 @@ static struct nlist nl[] = {
{ .n_name = "_rip6stat" },
#define N_SCTPSTAT 53
{ .n_name = "_sctpstat" },
+#define N_MFCTABLESIZE 54
+ { .n_name = "_mfctablesize" },
{ .n_name = NULL },
};
@@ -550,7 +552,8 @@ main(int argc, char *argv[])
#endif
} else {
if (af == AF_INET || af == AF_UNSPEC)
- mroutepr(nl[N_MFCTABLE].n_value,
+ mroutepr(nl[N_MFCHASHTBL].n_value,
+ nl[N_MFCTABLESIZE].n_value,
nl[N_VIFTABLE].n_value);
#ifdef INET6
if (af == AF_INET6 || af == AF_UNSPEC)
diff --git a/usr.bin/netstat/mroute.c b/usr.bin/netstat/mroute.c
index dbce318..8009c30 100644
--- a/usr.bin/netstat/mroute.c
+++ b/usr.bin/netstat/mroute.c
@@ -67,124 +67,9 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include "netstat.h"
-static void print_bw_meter(struct bw_meter *bw_meter, int *banner_printed);
-void
-mroutepr(u_long mfcaddr, u_long vifaddr)
-{
- struct mfc *mfctable[MFCTBLSIZ];
- struct vif viftable[MAXVIFS];
- struct mfc mfc, *m;
- struct vif *v;
- vifi_t vifi;
- int i;
- int banner_printed;
- int saved_numeric_addr;
- vifi_t maxvif = 0;
- size_t len;
-
- len = sizeof(mfctable);
- if (live) {
- if (sysctlbyname("net.inet.ip.mfctable", mfctable, &len, NULL,
- 0) < 0) {
- warn("sysctl: net.inet.ip.mfctable");
- return;
- }
- } else
- kread(mfcaddr, (char *)mfctable, sizeof(mfctable));
-
- len = sizeof(viftable);
- if (live) {
- if (sysctlbyname("net.inet.ip.viftable", viftable, &len, NULL,
- 0) < 0) {
- warn("sysctl: net.inet.ip.viftable");
- return;
- }
- } else
- kread(vifaddr, (char *)viftable, sizeof(viftable));
-
- saved_numeric_addr = numeric_addr;
- numeric_addr = 1;
-
- banner_printed = 0;
- for (vifi = 0, v = viftable; vifi < MAXVIFS; ++vifi, ++v) {
- if (v->v_lcl_addr.s_addr == 0)
- continue;
-
- maxvif = vifi;
- if (!banner_printed) {
- printf("\nIPv4 Virtual Interface Table\n"
- " Vif Thresh Rate Local-Address "
- "Remote-Address Pkts-In Pkts-Out\n");
- banner_printed = 1;
- }
-
- printf(" %2u %6u %4d %-15.15s",
- /* opposite math of add_vif() */
- vifi, v->v_threshold, v->v_rate_limit * 1000 / 1024,
- routename(v->v_lcl_addr.s_addr));
- printf(" %-15.15s", (v->v_flags & VIFF_TUNNEL) ?
- routename(v->v_rmt_addr.s_addr) : "");
-
- printf(" %9lu %9lu\n", v->v_pkt_in, v->v_pkt_out);
- }
- if (!banner_printed)
- printf("\nIPv4 Virtual Interface Table is empty\n");
-
- banner_printed = 0;
- for (i = 0; i < MFCTBLSIZ; ++i) {
- m = mfctable[i];
- while(m) {
- /* XXX KVM */
- kread((u_long)m, (char *)&mfc, sizeof mfc);
-
- if (!banner_printed) {
- printf("\nIPv4 Multicast Forwarding Table\n"
- " Origin Group "
- " Packets In-Vif Out-Vifs:Ttls\n");
- banner_printed = 1;
- }
-
- printf(" %-15.15s", routename(mfc.mfc_origin.s_addr));
- printf(" %-15.15s", routename(mfc.mfc_mcastgrp.s_addr));
- printf(" %9lu", mfc.mfc_pkt_cnt);
- printf(" %3d ", mfc.mfc_parent);
- for (vifi = 0; vifi <= maxvif; vifi++) {
- if (mfc.mfc_ttls[vifi] > 0)
- printf(" %u:%u", vifi,
- mfc.mfc_ttls[vifi]);
- }
- printf("\n");
-
- /* Print the bw meter information */
- {
- struct bw_meter bw_meter, *bwm;
- int banner_printed2 = 0;
-
- bwm = mfc.mfc_bw_meter;
- while (bwm) {
- /* XXX KVM */
- kread((u_long)bwm, (char *)&bw_meter,
- sizeof bw_meter);
- print_bw_meter(&bw_meter,
- &banner_printed2);
- bwm = bw_meter.bm_mfc_next;
- }
-#if 0 /* Don't ever print it? */
- if (! banner_printed2)
- printf("\n No Bandwidth Meters\n");
-#endif
- }
-
- m = mfc.mfc_next;
- }
- }
- if (!banner_printed)
- printf("\nIPv4 Multicast Forwarding Table is empty\n");
-
- printf("\n");
- numeric_addr = saved_numeric_addr;
-}
+static void print_bw_meter(struct bw_meter *, int *);
+static void print_mfc(struct mfc *, int, int *);
static void
print_bw_meter(struct bw_meter *bw_meter, int *banner_printed)
@@ -262,6 +147,193 @@ print_bw_meter(struct bw_meter *bw_meter, int *banner_printed)
printf("\n");
}
+static void
+print_mfc(struct mfc *m, int maxvif, int *banner_printed)
+{
+ struct bw_meter bw_meter, *bwm;
+ int bw_banner_printed;
+ int error;
+ vifi_t vifi;
+
+ bw_banner_printed = 0;
+
+ if (! *banner_printed) {
+ printf("\nIPv4 Multicast Forwarding Table\n"
+ " Origin Group "
+ " Packets In-Vif Out-Vifs:Ttls\n");
+ *banner_printed = 1;
+ }
+
+ printf(" %-15.15s", routename(m->mfc_origin.s_addr));
+ printf(" %-15.15s", routename(m->mfc_mcastgrp.s_addr));
+ printf(" %9lu", m->mfc_pkt_cnt);
+ printf(" %3d ", m->mfc_parent);
+ for (vifi = 0; vifi <= maxvif; vifi++) {
+ if (m->mfc_ttls[vifi] > 0)
+ printf(" %u:%u", vifi, m->mfc_ttls[vifi]);
+ }
+ printf("\n");
+
+ /*
+ * XXX We break the rules and try to use KVM to read the
+ * bandwidth meters, they are not retrievable via sysctl yet.
+ */
+ bwm = m->mfc_bw_meter;
+ while (bwm != NULL) {
+ error = kread((u_long)bwm, (char *)&bw_meter,
+ sizeof(bw_meter));
+ if (error)
+ break;
+ print_bw_meter(&bw_meter, &bw_banner_printed);
+ bwm = bw_meter.bm_mfc_next;
+ }
+}
+
+void
+mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl)
+{
+ struct vif viftable[MAXVIFS];
+ struct vif *v;
+ struct mfc *m;
+ int banner_printed;
+ int saved_numeric_addr;
+ size_t len;
+ vifi_t vifi, maxvif;
+
+ saved_numeric_addr = numeric_addr;
+ numeric_addr = 1;
+
+ /*
+ * TODO:
+ * The VIF table will move to hanging off the struct if_info for
+ * each IPv4 configured interface. Currently it is statically
+ * allocated, and retrieved either using KVM or an opaque SYSCTL.
+ *
+ * This can't happen until the API documented in multicast(4)
+ * is itself refactored. The historical reason why VIFs use
+ * a separate ifindex space is entirely due to the legacy
+ * capability of the MROUTING code to create IPIP tunnels on
+ * the fly to support DVMRP. When gif(4) became available, this
+ * functionality was deprecated, as PIM does not use it.
+ */
+ maxvif = 0;
+
+ len = sizeof(viftable);
+ if (live) {
+ if (sysctlbyname("net.inet.ip.viftable", viftable, &len, NULL,
+ 0) < 0) {
+ warn("sysctl: net.inet.ip.viftable");
+ return;
+ }
+ } else
+ kread(pviftbl, (char *)viftable, sizeof(viftable));
+
+ banner_printed = 0;
+ for (vifi = 0, v = viftable; vifi < MAXVIFS; ++vifi, ++v) {
+ if (v->v_lcl_addr.s_addr == 0)
+ continue;
+
+ maxvif = vifi;
+ if (!banner_printed) {
+ printf("\nIPv4 Virtual Interface Table\n"
+ " Vif Thresh Local-Address "
+ "Remote-Address Pkts-In Pkts-Out\n");
+ banner_printed = 1;
+ }
+
+ printf(" %2u %6u %-15.15s",
+ /* opposite math of add_vif() */
+ vifi, v->v_threshold,
+ routename(v->v_lcl_addr.s_addr));
+ printf(" %-15.15s", (v->v_flags & VIFF_TUNNEL) ?
+ routename(v->v_rmt_addr.s_addr) : "");
+
+ printf(" %9lu %9lu\n", v->v_pkt_in, v->v_pkt_out);
+ }
+ if (!banner_printed)
+ printf("\nIPv4 Virtual Interface Table is empty\n");
+
+ banner_printed = 0;
+
+ /*
+ * TODO:
+ * The MFC table will move into the AF_INET radix trie in future.
+ * In 8.x, it becomes a dynamically allocated structure referenced
+ * by a hashed LIST, allowing more than 256 entries w/o kernel tuning.
+ *
+ * If retrieved via opaque SYSCTL, the kernel will coalesce it into
+ * a static table for us.
+ * If retrieved via KVM, the hash list pointers must be followed.
+ */
+ if (live) {
+ struct mfc *mfctable;
+
+ len = 0;
+ if (sysctlbyname("net.inet.ip.mfctable", NULL, &len, NULL,
+ 0) < 0) {
+ warn("sysctl: net.inet.ip.mfctable");
+ return;
+ }
+
+ mfctable = malloc(len);
+ if (mfctable == NULL) {
+ warnx("malloc %lu bytes", (u_long)len);
+ return;
+ }
+ if (sysctlbyname("net.inet.ip.mfctable", mfctable, &len, NULL,
+ 0) < 0) {
+ free(mfctable);
+ warn("sysctl: net.inet.ip.mfctable");
+ return;
+ }
+
+ m = mfctable;
+ while (len >= sizeof(*m)) {
+ print_mfc(m++, maxvif, &banner_printed);
+ len -= sizeof(*m);
+ }
+ if (len != 0)
+ warnx("print_mfc: %d trailing bytes", len);
+
+ free(mfctable);
+ } else {
+ LIST_HEAD(, mfc) *mfchashtbl;
+ u_long i, mfctablesize;
+ struct mfc mfc;
+ int error;
+
+ error = kread(pmfctablesize, (char *)&mfctablesize,
+ sizeof(u_long));
+ if (error) {
+ warn("kread: mfctablesize");
+ return;
+ }
+
+ len = sizeof(*mfchashtbl) * mfctablesize;
+ mfchashtbl = malloc(len);
+ if (mfchashtbl == NULL) {
+ warnx("malloc %lu bytes", (u_long)len);
+ return;
+ }
+ kread(pmfchashtbl, (char *)&mfchashtbl, len);
+
+ for (i = 0; i < mfctablesize; i++) {
+ LIST_FOREACH(m, &mfchashtbl[i], mfc_hash) {
+ kread((u_long)m, (char *)&mfc, sizeof(mfc));
+ print_mfc(m, maxvif, &banner_printed);
+ }
+ }
+
+ free(mfchashtbl);
+ }
+
+ if (!banner_printed)
+ printf("\nIPv4 Multicast Forwarding Table is empty\n");
+
+ printf("\n");
+ numeric_addr = saved_numeric_addr;
+}
+
void
mrt_stats(u_long mstaddr)
{
diff --git a/usr.bin/netstat/netstat.h b/usr.bin/netstat/netstat.h
index da76ff1..483bd6c 100644
--- a/usr.bin/netstat/netstat.h
+++ b/usr.bin/netstat/netstat.h
@@ -160,6 +160,6 @@ void tp_protopr(u_long, const char *, int, int);
void tp_inproto(u_long);
void tp_stats(caddr_t, caddr_t);
-void mroutepr(u_long, u_long);
+void mroutepr(u_long, u_long, u_long);
void mrt_stats(u_long);
void bpf_stats(char *);
OpenPOWER on IntegriCloud