Kernel side of 3.5 multicast routing code, based on work by Bill Fenner

and other work done here. The LKM support is probably broken, but it still compiles and will be fixed later.
author: wollman <wollman@FreeBSD.org> 1995-06-13 17:51:16 +0000
committer: wollman <wollman@FreeBSD.org> 1995-06-13 17:51:16 +0000
commit: 20ad4f8359820cf12331c0335034438fc23ad604 (patch)
tree: b5de17b0bb0d03b7ed728b104d3d0f4a9752fd4a /sys/netinet/ip_mroute.c
parent: c0dfcf234735c518d2c756c46fddf87d2332c838 (diff)
download: FreeBSD-src-20ad4f8359820cf12331c0335034438fc23ad604.zip
FreeBSD-src-20ad4f8359820cf12331c0335034438fc23ad604.tar.gz
1 files changed, 995 insertions, 651 deletions
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
index 7226fd4..2abb1e1 100644
--- a/sys/netinet/ip_mroute.c
+++ b/sys/netinet/ip_mroute.c
@@ -6,8 +6,9 @@
  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  * Modified by Van Jacobson, LBL, January 1993
  * Modified by Ajit Thyagarajan, PARC, August 1993
+ * Modified by Bill Fenner, PARC, April 1995
  *
- * MROUTING 1.8
+ * MROUTING Revision: 3.5
  */
 
 
@@ -19,6 +20,7 @@
 #include <sys/protosw.h>
 #include <sys/errno.h>
 #include <sys/time.h>
+#include <sys/kernel.h>
 #include <sys/ioctl.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
@@ -33,6 +35,7 @@
 #include <netinet/igmp.h>
 #include <netinet/igmp_var.h>
 #include <netinet/ip_mroute.h>
+#include <netinet/udp.h>
 
 #ifndef NTOHL
 #if BYTE_ORDER != BIG_ENDIAN
@@ -48,18 +51,20 @@
 #endif
 #endif
 
+extern int rsvp_on;
+
 #ifndef MROUTING
 /*
  * Dummy routines and globals used when multicast routing is not compiled in.
  */
 
-u_int		ip_mrtproto = 0;
 struct socket  *ip_mrouter  = NULL;
+u_int		ip_mrtproto = 0;
 struct mrtstat	mrtstat;
-
+u_int		rsvpdebug = 0;
 
 int
-_ip_mrouter_cmd(cmd, so, m)
+_ip_mrouter_set(cmd, so, m)
 	int cmd;
 	struct socket *so;
 	struct mbuf *m;
@@ -67,7 +72,19 @@ _ip_mrouter_cmd(cmd, so, m)
 	return(EOPNOTSUPP);
 }
 
-int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd;
+int (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = _ip_mrouter_set;
+
+
+int
+_ip_mrouter_get(cmd, so, m)
+	int cmd;
+	struct socket *so;
+	struct mbuf **m;
+{
+	return(EOPNOTSUPP);
+}
+
+int (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = _ip_mrouter_get;
 
 int
 _ip_mrouter_done()
@@ -98,14 +115,72 @@ _mrt_ioctl(int req, caddr_t data, struct proc *p)
 
 int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
 
-void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */
+void
+rsvp_input(m, iphlen)		/* XXX must fixup manually */
+	struct mbuf *m;
+	int iphlen;
+{
+    /* Can still get packets with rsvp_on = 0 if there is a local member
+     * of the group to which the RSVP packet is addressed.  But in this
+     * case we want to throw the packet away.
+     */
+    if (!rsvp_on) {
+	m_freem(m);
+	return;
+    }
+ 
+    if (ip_rsvpd != NULL) {
+	if (rsvpdebug)
+	    printf("rsvp_input: Sending packet up old-style socket\n");
+	rip_input(m);
+	return;
+    }
+    /* Drop the packet */
+    m_freem(m);
+}
+
+void ipip_input(struct mbuf *m) { /* XXX must fixup manually */
 	rip_input(m);
 }
 
 int (*legal_vif_num)(int) = 0;
 
+/*
+ * This should never be called, since IP_MULTICAST_VIF should fail, but
+ * just in case it does get called, the code a little lower in ip_output
+ * will assign the packet a local address.
+ */
+u_long
+_ip_mcast_src(int vifi) { return INADDR_ANY; }
+u_long (*ip_mcast_src)(int) = _ip_mcast_src;
+
+int
+ip_rsvp_vif_init(so, m)
+    struct socket *so;
+    struct mbuf *m;
+{
+    return(EINVAL);
+}
+
+int
+ip_rsvp_vif_done(so, m)
+    struct socket *so;
+    struct mbuf *m;
+{
+    return(EINVAL);
+}
+
+void
+ip_rsvp_force_done(so)
+    struct socket *so;
+{
+    return;
+}
+
 #else /* MROUTING */
 
+#define M_HASCL(m)	((m)->m_flags & M_EXT)
+
 #define INSIZ		sizeof(struct in_addr)
 #define	same(a1, a2) \
 	(bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
@@ -130,16 +205,23 @@ extern int ip_mrtproto;
 #define RTE_FOUND	0x2
 
 struct mbuf    *mfctable[MFCTBLSIZ];
+u_char		nexpire[MFCTBLSIZ];
 struct vif	viftable[MAXVIFS];
 u_int		mrtdebug = 0;	  /* debug level 	*/
+#define		DEBUG_MFC	0x02
+#define		DEBUG_FORWARD	0x04
+#define		DEBUG_EXPIRE	0x08
+#define		DEBUG_XMIT	0x10
 u_int       	tbfdebug = 0;     /* tbf debug level 	*/
+u_int		rsvpdebug = 0;	  /* rsvp debug level   */
 
-u_long timeout_val = 0;			/* count of outstanding upcalls */
+#define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
+#define		UPCALL_EXPIRE	6		/* number of timeouts	*/
 
 /*
  * Define the token bucket filter structures
- * tbftable -> each vif has one of these for storing info
- * qtable   -> each interface has an associated queue of pkts
+ * tbftable -> each vif has one of these for storing info 
+ * qtable   -> each interface has an associated queue of pkts 
  */
 
 struct tbf tbftable[MAXVIFS];
@@ -156,7 +238,7 @@ struct pkt_queue qtable[MAXVIFS][MAXQSIZE];
 struct ifnet multicast_decap_if[MAXVIFS];
 
 #define ENCAP_TTL 64
-#define ENCAP_PROTO 4
+#define ENCAP_PROTO IPPROTO_IPIP	/* 4 */
 
 /* prototype IP hdr for encapsulated packets */
 struct ip multicast_encap_iphdr = {
@@ -169,7 +251,7 @@ struct ip multicast_encap_iphdr = {
 	sizeof(struct ip),		/* total length */
 	0,				/* id */
 	0,				/* frag offset */
-	ENCAP_TTL, ENCAP_PROTO,
+	ENCAP_TTL, ENCAP_PROTO,	
 	0,				/* checksum */
 };
 
@@ -178,115 +260,82 @@ struct ip multicast_encap_iphdr = {
  */
 static vifi_t	   numvifs = 0;
 static void (*encap_oldrawip)() = 0;
+static int have_encap_tunnel = 0;
 
 /*
- * one-back cache used by multiencap_decap to locate a tunnel's vif
+ * one-back cache used by ipip_input to locate a tunnel's vif
  * given a datagram's src ip address.
  */
 static u_long last_encap_src;
 static struct vif *last_encap_vif;
 
-static u_long nethash_fc(u_long, u_long);
-static struct mfc *mfcfind(u_long, u_long);
-int get_sg_cnt(struct sioc_sg_req *);
-int get_vif_cnt(struct sioc_vif_req *);
-int get_vifs(caddr_t);
+static int get_sg_cnt(struct sioc_sg_req *);
+static int get_vif_cnt(struct sioc_vif_req *);
+int ip_mrouter_init(struct socket *, struct mbuf *);
 static int add_vif(struct vifctl *);
 static int del_vif(vifi_t *);
 static int add_mfc(struct mfcctl *);
-static int del_mfc(struct delmfcctl *);
-static void cleanup_cache(void *);
-static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *,
-		  struct ip_moptions *);
+static int del_mfc(struct mfcctl *);
+static int get_version(struct mbuf *);
+static int get_assert(struct mbuf *);
+static int set_assert(int *);
+static void expire_upcalls(void *);
+static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *,
+		  vifi_t);
 static void phyint_send(struct ip *, struct vif *, struct mbuf *);
-static void srcrt_send(struct ip *, struct vif *, struct mbuf *);
 static void encap_send(struct ip *, struct vif *, struct mbuf *);
-void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long,
+static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long,
 		 struct ip_moptions *);
-void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *);
-void tbf_process_q(struct vif *);
-void tbf_dequeue(struct vif *, int);
-void tbf_reprocess_q(void *);
-int tbf_dq_sel(struct vif *, struct ip *);
-void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *);
-void tbf_update_tokens(struct vif *);
+static void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *);
+static void tbf_process_q(struct vif *);
+static void tbf_dequeue(struct vif *, int);
+static void tbf_reprocess_q(void *);
+static int tbf_dq_sel(struct vif *, struct ip *);
+static void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *);
+static void tbf_update_tokens(struct vif *);
 static int priority(struct vif *, struct ip *);
-static int ip_mrouter_init(struct socket *);
-void multiencap_decap(struct mbuf *m);
+void multiencap_decap(struct mbuf *);
 
 /*
- * A simple hash function: returns MFCHASHMOD of the low-order octet of
- * the argument's network or subnet number and the multicast group assoc.
+ * whether or not special PIM assert processing is enabled.
  */
-static u_long
-nethash_fc(m,n)
-    register u_long m;
-    register u_long n;
-{
-    struct in_addr in1;
-    struct in_addr in2;
-
-    in1.s_addr = m;
-    m = in_netof(in1);
-    while ((m & 0xff) == 0) m >>= 8;
-
-    in2.s_addr = n;
-    n = in_netof(in2);
-    while ((n & 0xff) == 0) n >>= 8;
-
-    return (MFCHASHMOD(m) ^ MFCHASHMOD(n));
-}
+static int pim_assert;
+/*
+ * Rate limit for assert notification messages, in usec
+ */
+#define ASSERT_MSG_TIME		3000000
 
 /*
- * this is a direct-mapped cache used to speed the mapping from a
- * datagram source address to the associated multicast route.  Note
- * that unlike mrttable, the hash is on IP address, not IP net number.
+ * Hash function for a source, group entry
  */
-#define MFCHASHSIZ 1024
-#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
-			((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1))
-struct mfc *mfchash[MFCHASHSIZ];
+#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
+			((g) >> 20) ^ ((g) >> 10) ^ (g))
 
 /*
  * Find a route for a given origin IP address and Multicast group address
  * Type of service parameter to be added in the future!!!
  */
+
 #define MFCFIND(o, g, rt) { \
-	register u_int _mrhasho = o; \
-	register u_int _mrhashg = g; \
-	_mrhasho = MFCHASH(_mrhasho, _mrhashg); \
+	register struct mbuf *_mb_rt = mfctable[MFCHASH(o,g)]; \
+	register struct mfc *_rt = NULL; \
+	rt = NULL; \
 	++mrtstat.mrts_mfc_lookups; \
-	rt = mfchash[_mrhasho]; \
-	if ((rt == NULL) || \
-	    ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \
-	     (g != rt->mfc_mcastgrp.s_addr)) \
-	     if ((rt = mfcfind(o, g)) != NULL) \
-		mfchash[_mrhasho] = rt; \
+	while (_mb_rt) { \
+		_rt = mtod(_mb_rt, struct mfc *); \
+		if ((_rt->mfc_origin.s_addr == o) && \
+		    (_rt->mfc_mcastgrp.s_addr == g) && \
+		    (_mb_rt->m_act == NULL)) { \
+			rt = _rt; \
+			break; \
+		} \
+		_mb_rt = _mb_rt->m_next; \
+	} \
+	if (rt == NULL) { \
+		++mrtstat.mrts_mfc_misses; \
+	} \
 }
 
-/*
- * Find route by examining hash table entries
- */
-static struct mfc *
-mfcfind(origin, mcastgrp)
-    u_long origin;
-    u_long mcastgrp;
-{
-    register struct mbuf *mb_rt;
-    register struct mfc *rt;
-    register u_long hash;
-
-    hash = nethash_fc(origin, mcastgrp);
-    for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
-	rt = mtod(mb_rt, struct mfc *);
-	if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) &&
-	    (mcastgrp == rt->mfc_mcastgrp.s_addr) &&
-	    (mb_rt->m_act == NULL))
-	    return (rt);
-    }
-    mrtstat.mrts_mfc_misses++;
-    return NULL;
-}
 
 /*
  * Macros to compute elapsed time efficiently
@@ -313,30 +362,63 @@ mfcfind(origin, mcastgrp)
 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
 	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
 
+#ifdef UPCALL_TIMING
+u_long upcall_data[51];
+static void collate(struct timeval *);
+#endif /* UPCALL_TIMING */
+
+
 /*
- * Handle DVMRP setsockopt commands to modify the multicast routing tables.
+ * Handle MRT setsockopt commands to modify the multicast routing tables.
  */
 int
-X_ip_mrouter_cmd(cmd, so, m)
+X_ip_mrouter_set(cmd, so, m)
     int cmd;
     struct socket *so;
     struct mbuf *m;
 {
-   if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES;
+   if (cmd != MRT_INIT && so != ip_mrouter) return EACCES;
 
     switch (cmd) {
-	case DVMRP_INIT:     return ip_mrouter_init(so);
-	case DVMRP_DONE:     return ip_mrouter_done();
-	case DVMRP_ADD_VIF:  return add_vif (mtod(m, struct vifctl *));
-	case DVMRP_DEL_VIF:  return del_vif (mtod(m, vifi_t *));
-	case DVMRP_ADD_MFC:  return add_mfc (mtod(m, struct mfcctl *));
-	case DVMRP_DEL_MFC:  return del_mfc (mtod(m, struct delmfcctl *));
+	case MRT_INIT:     return ip_mrouter_init(so, m);
+	case MRT_DONE:     return ip_mrouter_done();
+	case MRT_ADD_VIF:  return add_vif (mtod(m, struct vifctl *));
+	case MRT_DEL_VIF:  return del_vif (mtod(m, vifi_t *));
+	case MRT_ADD_MFC:  return add_mfc (mtod(m, struct mfcctl *));
+	case MRT_DEL_MFC:  return del_mfc (mtod(m, struct mfcctl *));
+	case MRT_ASSERT:   return set_assert(mtod(m, int *));
 	default:             return EOPNOTSUPP;
     }
 }
 
 #ifndef MROUTE_LKM
-int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd;
+int (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = X_ip_mrouter_set;
+#endif
+
+/*
+ * Handle MRT getsockopt commands
+ */
+int
+X_ip_mrouter_get(cmd, so, m)
+    int cmd;
+    struct socket *so;
+    struct mbuf **m;
+{
+    struct mbuf *mb;
+
+    if (so != ip_mrouter) return EACCES;
+
+    *m = mb = m_get(M_WAIT, MT_SOOPTS);
+  
+    switch (cmd) {
+	case MRT_VERSION:   return get_version(mb);
+	case MRT_ASSERT:    return get_assert(mb);
+	default:            return EOPNOTSUPP;
+    }
+}
+
+#ifndef MROUTE_LKM
+int (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = X_ip_mrouter_get;
 #endif
 
 /*
@@ -350,18 +432,15 @@ X_mrt_ioctl(cmd, data)
     int error = 0;
 
     switch (cmd) {
-      case (SIOCGETVIFINF):		/* Read Virtual Interface (m/cast) */
-	  return (get_vifs(data));
-	  break;
-      case (SIOCGETVIFCNT):
-	  return (get_vif_cnt((struct sioc_vif_req *)data));
-	  break;
-      case (SIOCGETSGCNT):
-	  return (get_sg_cnt((struct sioc_sg_req *)data));
-	  break;
+	case (SIOCGETVIFCNT):
+	    return (get_vif_cnt((struct sioc_vif_req *)data));
+	    break;
+	case (SIOCGETSGCNT):
+	    return (get_sg_cnt((struct sioc_sg_req *)data));
+	    break;
 	default:
-	  return (EINVAL);
-	  break;
+	    return (EINVAL);
+	    break;
     }
     return error;
 }
@@ -371,9 +450,9 @@ int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl;
 #endif
 
 /*
- * returns the packet count for the source group provided
+ * returns the packet, byte, rpf-failure count for the source group provided
  */
-int
+static int
 get_sg_cnt(req)
     register struct sioc_sg_req *req;
 {
@@ -383,83 +462,73 @@ get_sg_cnt(req)
     s = splnet();
     MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
     splx(s);
-    if (rt != NULL)
-	req->count = rt->mfc_pkt_cnt;
-    else
-	req->count = 0xffffffff;
+    if (rt != NULL) {
+	req->pktcnt = rt->mfc_pkt_cnt;
+	req->bytecnt = rt->mfc_byte_cnt;
+	req->wrong_if = rt->mfc_wrong_if;
+    } else
+	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
 
     return 0;
 }
 
 /*
- * returns the input and output packet counts on the interface provided
+ * returns the input and output packet and byte counts on the vif provided
  */
-int
+static int
 get_vif_cnt(req)
     register struct sioc_vif_req *req;
 {
     register vifi_t vifi = req->vifi;
 
+    if (vifi >= numvifs) return EINVAL;
+
     req->icount = viftable[vifi].v_pkt_in;
     req->ocount = viftable[vifi].v_pkt_out;
+    req->ibytes = viftable[vifi].v_bytes_in;
+    req->obytes = viftable[vifi].v_bytes_out;
 
     return 0;
 }
 
-int
-get_vifs(data)
-    char *data;
-{
-    struct vif_conf *vifc = (struct vif_conf *)data;
-    struct vif_req *vifrp, vifr;
-    int space, error=0;
-
-    vifi_t vifi;
-    int s;
-
-    space = vifc->vifc_len;
-    vifrp  = vifc->vifc_req;
-
-    s = splnet();
-    vifc->vifc_num=numvifs;
-
-    for (vifi = 0; vifi <  numvifs; vifi++, vifrp++) {
-	if (viftable[vifi].v_lcl_addr.s_addr != 0) {
-	    vifr.v_flags=viftable[vifi].v_flags;
-	    vifr.v_threshold=viftable[vifi].v_threshold;
-	    vifr.v_lcl_addr=viftable[vifi].v_lcl_addr;
-	    vifr.v_rmt_addr=viftable[vifi].v_rmt_addr;
-	    strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ);
-	    if ((space -= sizeof(vifr)) < 0) {
-		splx(s);
-		return(ENOSPC);
-	    }
-	    error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr));
-	    if (error) {
-		splx(s);
-		return(error);
-	    }
-	}
-    }
-    splx(s);
-    return 0;
-}
 /*
  * Enable multicast routing
  */
-static int
-ip_mrouter_init(so)
+int
+ip_mrouter_init(so, m)
 	struct socket *so;
+	struct mbuf *m;
 {
+    int *v;
+    int i;
+
+    if (mrtdebug)
+	log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d",
+		so->so_type, so->so_proto->pr_protocol);
+
     if (so->so_type != SOCK_RAW ||
 	so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
 
+    if (!m || (m->m_len != sizeof(int *)))
+	return ENOPROTOOPT;
+
+    v = mtod(m, int *);
+    if (*v != 1)
+	return ENOPROTOOPT;
+
     if (ip_mrouter != NULL) return EADDRINUSE;
 
     ip_mrouter = so;
 
+    bzero((caddr_t)mfctable, sizeof(mfctable));
+    bzero((caddr_t)nexpire, sizeof(nexpire));
+
+    pim_assert = 0;
+
+    timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
+
     if (mrtdebug)
-	log(LOG_DEBUG, "ip_mrouter_init\n");
+	log(LOG_DEBUG, "ip_mrouter_init");
 
     return 0;
 }
@@ -475,6 +544,7 @@ X_ip_mrouter_done()
     struct ifnet *ifp;
     struct ifreq ifr;
     struct mbuf *mb_rt;
+    struct mfc *rt;
     struct mbuf *m;
     struct rtdetq *rte;
     int s;
@@ -499,52 +569,44 @@ X_ip_mrouter_done()
     bzero((caddr_t)tbftable, sizeof(tbftable));
     bzero((caddr_t)viftable, sizeof(viftable));
     numvifs = 0;
+    pim_assert = 0;
+
+    untimeout(expire_upcalls, (caddr_t)NULL);
 
     /*
-     * Check if any outstanding timeouts remain
+     * Free all multicast forwarding cache entries.
      */
-    if (timeout_val != 0)
-	for (i = 0; i < MFCTBLSIZ; i++) {
-	    mb_rt = mfctable[i];
-	    while (mb_rt) {
-		if ( mb_rt->m_act != NULL) {
-		    untimeout(cleanup_cache, (caddr_t)mb_rt);
-		    while (mb_rt->m_act) {
-		        m = mb_rt->m_act;
-			mb_rt->m_act = m->m_act;
-			rte = mtod(m, struct rtdetq *);
-			m_freem(rte->m);
-			m_free(m);
-		    }
-		    timeout_val--;
+    for (i = 0; i < MFCTBLSIZ; i++) {
+	mb_rt = mfctable[i];
+	while (mb_rt) {
+	    if (mb_rt->m_act != NULL) {
+		while (mb_rt->m_act) {
+		    m = mb_rt->m_act;
+		    mb_rt->m_act = m->m_act;
+		    rte = mtod(m, struct rtdetq *);
+		    m_freem(rte->m);
+		    m_free(m);
 		}
-	    mb_rt = mb_rt->m_next;
 	    }
-	    if (timeout_val == 0)
-		break;
+	    mb_rt = m_free(mb_rt);
 	}
-
-    /*
-     * Free all multicast forwarding cache entries.
-     */
-    for (i = 0; i < MFCTBLSIZ; i++)
-	m_freem(mfctable[i]);
+    }
 
     bzero((caddr_t)mfctable, sizeof(mfctable));
-    bzero((caddr_t)mfchash, sizeof(mfchash));
 
     /*
      * Reset de-encapsulation cache
      */
     last_encap_src = NULL;
     last_encap_vif = NULL;
-
+    have_encap_tunnel = 0;
+ 
     ip_mrouter = NULL;
 
     splx(s);
 
     if (mrtdebug)
-	log(LOG_DEBUG, "ip_mrouter_done\n");
+	log(LOG_DEBUG, "ip_mrouter_done");
 
     return 0;
 }
@@ -553,6 +615,51 @@ X_ip_mrouter_done()
 int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
 #endif
 
+static int
+get_version(mb)
+    struct mbuf *mb;
+{
+    int *v;
+
+    v = mtod(mb, int *);
+
+    *v = 0x0305;	/* XXX !!!! */
+    mb->m_len = sizeof(int);
+
+    return 0;
+}
+
+/*
+ * Set PIM assert processing global
+ */
+static int
+set_assert(i)
+    int *i;
+{
+    if ((*i != 1) && (*i != 0))
+	return EINVAL;
+
+    pim_assert = *i;
+
+    return 0;
+}
+
+/*
+ * Get PIM assert processing global
+ */
+static int
+get_assert(m)
+    struct mbuf *m;
+{
+    int *i;
+
+    i = mtod(m, int *);
+
+    *i = pim_assert;
+
+    return 0;
+}
+
 /*
  * Add a vif to the vif table
  */
@@ -579,20 +686,28 @@ add_vif(vifcp)
 
     if (vifcp->vifc_flags & VIFF_TUNNEL) {
 	if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
-          if (encap_oldrawip == 0) {
-              extern struct protosw inetsw[];
-              register u_char pr = ip_protox[ENCAP_PROTO];
-
-              encap_oldrawip = inetsw[pr].pr_input;
-              inetsw[pr].pr_input = multiencap_decap;
-		for (s = 0; s < MAXVIFS; ++s) {
-		    multicast_decap_if[s].if_name = "mdecap";
-		    multicast_decap_if[s].if_unit = s;
+		/*
+		 * An encapsulating tunnel is wanted.  Tell ipip_input() to
+		 * start paying attention to encapsulated packets.
+		 */
+		if (have_encap_tunnel == 0) {
+			have_encap_tunnel = 1;
+			for (s = 0; s < MAXVIFS; ++s) {
+				multicast_decap_if[s].if_name = "mdecap";
+				multicast_decap_if[s].if_unit = s;
+			}
 		}
-	    }
-	    ifp = &multicast_decap_if[vifcp->vifc_vifi];
+		/*
+		 * Set interface to fake encapsulator interface
+		 */
+		ifp = &multicast_decap_if[vifcp->vifc_vifi];
+		/*
+		 * Prepare cached route entry
+		 */
+		bzero(&vifp->v_route, sizeof(vifp->v_route));
 	} else {
-	    ifp = 0;
+	    log(LOG_ERR, "Source routed tunnels not supported.");
+	    return EOPNOTSUPP;
 	}
     } else {
 	/* Make sure the interface supports multicast */
@@ -622,22 +737,26 @@ add_vif(vifcp)
     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
     vifp->v_ifp       = ifp;
     vifp->v_rate_limit= vifcp->vifc_rate_limit;
+    vifp->v_rsvp_on   = 0;
+    vifp->v_rsvpd     = NULL;
     /* initialize per vif pkt counters */
     vifp->v_pkt_in    = 0;
     vifp->v_pkt_out   = 0;
+    vifp->v_bytes_in  = 0;
+    vifp->v_bytes_out = 0;
     splx(s);
 
     /* Adjust numvifs up if the vifi is higher than numvifs */
     if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
 
     if (mrtdebug)
-	log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n",
-	    vifcp->vifc_vifi,
+	log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d",
+	    vifcp->vifc_vifi, 
 	    ntohl(vifcp->vifc_lcl_addr.s_addr),
 	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
 	    ntohl(vifcp->vifc_rmt_addr.s_addr),
 	    vifcp->vifc_threshold,
-	    vifcp->vifc_rate_limit);
+	    vifcp->vifc_rate_limit);    
 
     return 0;
 }
@@ -685,7 +804,7 @@ del_vif(vifip)
     splx(s);
 
     if (mrtdebug)
-      log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs);
+      log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs);
 
     return 0;
 }
@@ -698,9 +817,7 @@ add_mfc(mfccp)
     struct mfcctl *mfccp;
 {
     struct mfc *rt;
-    struct mfc *rt1 = 0;
     register struct mbuf *mb_rt;
-    struct mbuf *prev_mb_rt;
     u_long hash;
     struct mbuf *mb_ntry;
     struct rtdetq *rte;
@@ -708,81 +825,77 @@ add_mfc(mfccp)
     int s;
     int i;
 
-    rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
+    MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
 
     /* If an entry already exists, just update the fields */
     if (rt) {
-	if (mrtdebug)
-	    log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x\n",
+	if (mrtdebug & DEBUG_MFC)
+	    log(LOG_DEBUG,"add_mfc update o %x g %x p %x",
 		ntohl(mfccp->mfcc_origin.s_addr),
 		ntohl(mfccp->mfcc_mcastgrp.s_addr),
-		ntohl(mfccp->mfcc_originmask.s_addr),
 		mfccp->mfcc_parent);
 
 	s = splnet();
 	rt->mfc_parent = mfccp->mfcc_parent;
 	for (i = 0; i < numvifs; i++)
-	    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
+	    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 	splx(s);
 	return 0;
     }
 
-    /*
+    /* 
      * Find the entry for which the upcall was made and update
      */
     s = splnet();
-    hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
-    for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0;
-	 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
+    hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
+    for (mb_rt = mfctable[hash], nstl = 0; mb_rt; mb_rt = mb_rt->m_next) {
 
 	rt = mtod(mb_rt, struct mfc *);
-	if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr)
-	     == mfccp->mfcc_origin.s_addr) &&
+	if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
 	    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
 	    (mb_rt->m_act != NULL)) {
+  
+	    if (nstl++)
+		log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %x",
+		    "multiple kernel entries",
+		    ntohl(mfccp->mfcc_origin.s_addr),
+		    ntohl(mfccp->mfcc_mcastgrp.s_addr),
+		    mfccp->mfcc_parent, mb_rt->m_act);
+
+	    if (mrtdebug & DEBUG_MFC)
+		log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %x",
+		    ntohl(mfccp->mfcc_origin.s_addr),
+		    ntohl(mfccp->mfcc_mcastgrp.s_addr),
+		    mfccp->mfcc_parent, mb_rt->m_act);
 
-	    if (!nstl++) {
-		if (mrtdebug)
-		    log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x\n",
-			ntohl(mfccp->mfcc_origin.s_addr),
-			ntohl(mfccp->mfcc_mcastgrp.s_addr),
-			ntohl(mfccp->mfcc_originmask.s_addr),
-			mfccp->mfcc_parent, mb_rt->m_act);
-
-		rt->mfc_origin     = mfccp->mfcc_origin;
-		rt->mfc_originmask = mfccp->mfcc_originmask;
-		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
-		rt->mfc_parent     = mfccp->mfcc_parent;
-		for (i = 0; i < numvifs; i++)
-		    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
-		/* initialize pkt counters per src-grp */
-		rt->mfc_pkt_cnt    = 0;
-		rt1 = rt;
-	    }
+	    rt->mfc_origin     = mfccp->mfcc_origin;
+	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
+	    rt->mfc_parent     = mfccp->mfcc_parent;
+	    for (i = 0; i < numvifs; i++)
+		rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
+	    /* initialize pkt counters per src-grp */
+	    rt->mfc_pkt_cnt    = 0;
+	    rt->mfc_byte_cnt   = 0;
+	    rt->mfc_wrong_if   = 0;
+	    rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
 
-	    /* prevent cleanup of cache entry */
-	    untimeout(cleanup_cache, (caddr_t)mb_rt);
-	    timeout_val--;
+	    rt->mfc_expire = 0;	/* Don't clean this guy up */
+	    nexpire[hash]--;
 
 	    /* free packets Qed at the end of this entry */
 	    while (mb_rt->m_act) {
 		mb_ntry = mb_rt->m_act;
 		rte = mtod(mb_ntry, struct rtdetq *);
-		ip_mdq(rte->m, rte->ifp, rte->tunnel_src,
-		       rt1, rte->imo);
+/* #ifdef RSVP_ISI */
+		ip_mdq(rte->m, rte->ifp, rt, -1);
+/* #endif */
 		mb_rt->m_act = mb_ntry->m_act;
 		m_freem(rte->m);
+#ifdef UPCALL_TIMING
+		collate(&(rte->t));
+#endif /* UPCALL_TIMING */
 		m_free(mb_ntry);
 	    }
-
-	    /*
-	     * If more than one entry was created for a single upcall
-	     * delete that entry
-	     */
-	    if (nstl > 1) {
-		MFREE(mb_rt, prev_mb_rt->m_next);
-		mb_rt = prev_mb_rt;
-	    }
 	}
     }
 
@@ -790,29 +903,31 @@ add_mfc(mfccp)
      * It is possible that an entry is being inserted without an upcall
      */
     if (nstl == 0) {
-	if (mrtdebug)
-	    log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x\n",
+	if (mrtdebug & DEBUG_MFC)
+	    log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x p %x",
 		hash, ntohl(mfccp->mfcc_origin.s_addr),
 		ntohl(mfccp->mfcc_mcastgrp.s_addr),
-		ntohl(mfccp->mfcc_originmask.s_addr),
 		mfccp->mfcc_parent);
-
-	for (prev_mb_rt = mb_rt = mfctable[hash];
-	     mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
-
+	
+	for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
+	    
 	    rt = mtod(mb_rt, struct mfc *);
-	    if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr)
-		 == mfccp->mfcc_origin.s_addr) &&
+	    if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
 		(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
 
 		rt->mfc_origin     = mfccp->mfcc_origin;
-		rt->mfc_originmask = mfccp->mfcc_originmask;
 		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 		rt->mfc_parent     = mfccp->mfcc_parent;
 		for (i = 0; i < numvifs; i++)
-		    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
+		    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 		/* initialize pkt counters per src-grp */
 		rt->mfc_pkt_cnt    = 0;
+		rt->mfc_byte_cnt   = 0;
+		rt->mfc_wrong_if   = 0;
+		rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
+		if (rt->mfc_expire)
+		    nexpire[hash]--;
+		rt->mfc_expire	   = 0;
 	    }
 	}
 	if (mb_rt == NULL) {
@@ -822,19 +937,22 @@ add_mfc(mfccp)
 		splx(s);
 		return ENOBUFS;
 	    }
-
+	    
 	    rt = mtod(mb_rt, struct mfc *);
-
+	    
 	    /* insert new entry at head of hash chain */
 	    rt->mfc_origin     = mfccp->mfcc_origin;
-	    rt->mfc_originmask = mfccp->mfcc_originmask;
 	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 	    rt->mfc_parent     = mfccp->mfcc_parent;
 	    for (i = 0; i < numvifs; i++)
-		VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
+		    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 	    /* initialize pkt counters per src-grp */
 	    rt->mfc_pkt_cnt    = 0;
-
+	    rt->mfc_byte_cnt   = 0;
+	    rt->mfc_wrong_if   = 0;
+	    rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
+	    rt->mfc_expire     = 0;
+	    
 	    /* link into table */
 	    mb_rt->m_next  = mfctable[hash];
 	    mfctable[hash] = mb_rt;
@@ -845,56 +963,73 @@ add_mfc(mfccp)
     return 0;
 }
 
+#ifdef UPCALL_TIMING
+/*
+ * collect delay statistics on the upcalls 
+ */
+static void collate(t)
+register struct timeval *t;
+{
+    register u_long d;
+    register struct timeval tp;
+    register u_long delta;
+    
+    GET_TIME(tp);
+    
+    if (TV_LT(*t, tp))
+    {
+	TV_DELTA(tp, *t, delta);
+	
+	d = delta >> 10;
+	if (d > 50)
+	    d = 50;
+	
+	++upcall_data[d];
+    }
+}
+#endif /* UPCALL_TIMING */
+
 /*
  * Delete an mfc entry
  */
 static int
 del_mfc(mfccp)
-    struct delmfcctl *mfccp;
+    struct mfcctl *mfccp;
 {
     struct in_addr 	origin;
     struct in_addr 	mcastgrp;
     struct mfc 		*rt;
     struct mbuf 	*mb_rt;
-    struct mbuf 	*prev_mb_rt;
+    struct mbuf 	**nptr;
     u_long 		hash;
-    struct mfc 		**cmfc;
-    struct mfc 		**cmfcend;
-    int s;
+    int s, i;
 
     origin = mfccp->mfcc_origin;
     mcastgrp = mfccp->mfcc_mcastgrp;
-    hash = nethash_fc(origin.s_addr, mcastgrp.s_addr);
+    hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
 
-    if (mrtdebug)
-	log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x\n",
+    if (mrtdebug & DEBUG_MFC)
+	log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x",
 	    ntohl(origin.s_addr), ntohl(mcastgrp.s_addr));
 
-    for (prev_mb_rt = mb_rt = mfctable[hash]
-	 ; mb_rt
-	 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
+    s = splnet();
+
+    nptr = &mfctable[hash];
+    while ((mb_rt = *nptr) != NULL) {
         rt = mtod(mb_rt, struct mfc *);
 	if (origin.s_addr == rt->mfc_origin.s_addr &&
 	    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
 	    mb_rt->m_act == NULL)
 	    break;
+
+	nptr = &mb_rt->m_next;
     }
     if (mb_rt == NULL) {
-	return ESRCH;
+	splx(s);
+	return EADDRNOTAVAIL;
     }
 
-    s = splnet();
-
-    cmfc = mfchash;
-    cmfcend = cmfc + MFCHASHSIZ;
-    for ( ; cmfc < cmfcend; ++cmfc)
-	if (*cmfc == rt)
-	    *cmfc = 0;
-
-    if (prev_mb_rt != mb_rt) {	/* if moved past head of list */
-	MFREE(mb_rt, prev_mb_rt->m_next);
-    } else			/* delete head of list, it is in the table */
-        mfctable[hash] = m_free(mb_rt);
+    MFREE(mb_rt, *nptr);
 
     splx(s);
 
@@ -902,13 +1037,34 @@ del_mfc(mfccp)
 }
 
 /*
+ * Send a message to mrouted on the multicast routing socket
+ */
+static int
+socket_send(s, mm, src)
+	struct socket *s;
+	struct mbuf *mm;
+	struct sockaddr_in *src;
+{
+	if (s) {
+		if (sbappendaddr(&s->so_rcv,
+				 (struct sockaddr *)src,
+				 mm, (struct mbuf *)0) != 0) {
+			sorwakeup(s);
+			return 0;
+		}
+	}
+	m_freem(mm);
+	return -1;
+}
+
+/*
  * IP multicast forwarding function. This function assumes that the packet
  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
  * pointed to by "ifp", and the packet is to be relayed to other networks
  * that have members of the packet's destination IP multicast group.
  *
- * The packet is returned unscathed to the caller, unless it is tunneled
- * or erroneous, in which case a non-zero return value tells the caller to
+ * The packet is returned unscathed to the caller, unless it is
+ * erroneous, in which case a non-zero return value tells the caller to
  * discard it.
  */
 
@@ -922,76 +1078,53 @@ X_ip_mforward(ip, ifp, m, imo)
     struct mbuf *m;
     struct ip_moptions *imo;
 {
-    register struct mfc *rt;
+    register struct mfc *rt = 0; /* XXX uninit warning */
     register u_char *ipoptions;
-    u_long tunnel_src;
     static struct sockproto	k_igmpproto 	= { AF_INET, IPPROTO_IGMP };
     static struct sockaddr_in 	k_igmpsrc	= { sizeof k_igmpsrc, AF_INET };
-    static struct sockaddr_in 	k_igmpdst 	= { sizeof k_igmpdst, AF_INET };
+    static int srctun = 0;
     register struct mbuf *mm;
-    register struct ip *k_data;
     int s;
+    vifi_t vifi;
+    struct vif *vifp;
 
-    if (mrtdebug > 1)
-      log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x (%s%d)\n",
-          ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp,
-          ifp->if_name, ifp->if_unit);
+    if (mrtdebug & DEBUG_FORWARD)
+	log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x",
+	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
 
     if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
 	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
 	/*
-	 * Packet arrived via a physical interface.
+	 * Packet arrived via a physical interface or
+	 * an encapsulated tunnel.
 	 */
-	tunnel_src = 0;
     } else {
 	/*
 	 * Packet arrived through a source-route tunnel.
-	 *
-	 * A source-route tunneled packet has a single NOP option and a
-	 * two-element
-	 * loose-source-and-record-route (LSRR) option immediately following
-	 * the fixed-size part of the IP header.  At this point in processing,
-	 * the IP header should contain the following IP addresses:
-	 *
-	 *	original source          - in the source address field
-	 *	destination group        - in the destination address field
-	 *	remote tunnel end-point  - in the first  element of LSRR
-	 *	one of this host's addrs - in the second element of LSRR
-	 *
-	 * NOTE: RFC-1075 would have the original source and remote tunnel
-	 *	 end-point addresses swapped.  However, that could cause
-	 *	 delivery of ICMP error messages to innocent applications
-	 *	 on intermediate routing hosts!  Therefore, we hereby
-	 *	 change the spec.
-	 */
-
-	/*
-	 * Verify that the tunnel options are well-formed.
+	 * Source-route tunnels are no longer supported.
 	 */
-	if (ipoptions[0] != IPOPT_NOP ||
-	    ipoptions[2] != 11 ||	/* LSRR option length   */
-	    ipoptions[3] != 12 ||	/* LSRR address pointer */
-	    (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) {
-	    mrtstat.mrts_bad_tunnel++;
-	    if (mrtdebug)
-		log(LOG_DEBUG,
-		    "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)\n",
-		    ntohl(ip->ip_src.s_addr),
-		    ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3],
-		    *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8]));
-	    return 1;
-	}
+	if ((srctun++ % 1000) == 0)
+	    log(LOG_ERR, "ip_mforward: received source-routed packet from %x",
+		ntohl(ip->ip_src.s_addr));
 
-	/*
-	 * Delete the tunnel options from the packet.
-	 */
-	ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions,
-		(unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN)));
-	m->m_len   -= TUNNEL_LEN;
-	ip->ip_len -= TUNNEL_LEN;
-	ip->ip_hl  -= TUNNEL_LEN >> 2;
+	return 1;
+    }
 
-	ifp = 0;
+    if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
+	if (ip->ip_ttl < 255)
+		ip->ip_ttl++;	/* compensate for -1 in *_send routines */
+	if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
+	    vifp = viftable + vifi;
+	    printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n",
+		ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi,
+		(vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
+		vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
+	}
+	return (ip_mdq(m, ifp, rt, vifi));
+    }
+    if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
+	printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
+	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr));
     }
 
     /*
@@ -1000,7 +1133,7 @@ X_ip_mforward(ip, ifp, m, imo)
      */
     if (ip->ip_ttl <= 1 ||
 	ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
-	return (int)tunnel_src;
+	return 0;
 
     /*
      * Determine forwarding vifs from the forwarding cache table
@@ -1011,10 +1144,8 @@ X_ip_mforward(ip, ifp, m, imo)
     /* Entry exists, so forward if necessary */
     if (rt != NULL) {
 	splx(s);
-	return (ip_mdq(m, ifp, tunnel_src, rt, imo));
-    }
-
-    else {
+	return (ip_mdq(m, ifp, rt, -1));
+    } else {
 	/*
 	 * If we don't have a route for packet's origin,
 	 * Make a copy of the packet &
@@ -1027,28 +1158,88 @@ X_ip_mforward(ip, ifp, m, imo)
 	register struct rtdetq *rte;
 	register struct mbuf *rte_m;
 	register u_long hash;
+	register int npkts;
+#ifdef UPCALL_TIMING
+	struct timeval tp;
+
+	GET_TIME(tp);
+#endif
 
 	mrtstat.mrts_no_route++;
-	if (mrtdebug)
-	    log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n",
+	if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
+	    log(LOG_DEBUG, "ip_mforward: no rte s %x g %x",
 		ntohl(ip->ip_src.s_addr),
 		ntohl(ip->ip_dst.s_addr));
 
+	/*
+	 * Allocate mbufs early so that we don't do extra work if we are
+	 * just going to fail anyway.
+	 */
+	MGET(mb_ntry, M_DONTWAIT, MT_DATA);
+	if (mb_ntry == NULL) {
+	    splx(s);
+	    return ENOBUFS;
+	}
+	mb0 = m_copy(m, 0, M_COPYALL);
+	if (mb0 == NULL) {
+	    m_free(mb_ntry);
+	    splx(s);
+	    return ENOBUFS;
+	}
+
 	/* is there an upcall waiting for this packet? */
-	hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr);
+	hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
 	for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
 	    rt = mtod(mb_rt, struct mfc *);
-	    if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) ==
-		 rt->mfc_origin.s_addr) &&
+	    if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
 		(ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
 		(mb_rt->m_act != NULL))
 		break;
 	}
 
 	if (mb_rt == NULL) {
+	    int hlen = ip->ip_hl << 2;
+	    int i;
+	    struct igmpmsg *im;
+
 	    /* no upcall, so make a new entry */
 	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
 	    if (mb_rt == NULL) {
+		m_free(mb_ntry);
+		m_free(mb0);
+		splx(s);
+		return ENOBUFS;
+	    }
+	    /* Make a copy of the header to send to the user level process */
+	    mm = m_copy(m, 0, hlen);
+	    if (mm && (M_HASCL(mm) || mm->m_len < hlen))
+		mm = m_pullup(mm, hlen);
+	    if (mm == NULL) {
+		m_free(mb_ntry);
+		m_free(mb0);
+		m_free(mb_rt);
+		splx(s);
+		return ENOBUFS;
+	    }
+
+	    /* 
+	     * Send message to routing daemon to install 
+	     * a route into the kernel table
+	     */
+	    k_igmpsrc.sin_addr = ip->ip_src;
+	    
+	    im = mtod(mm, struct igmpmsg *);
+	    im->im_msgtype	= IGMPMSG_NOCACHE;
+	    im->im_mbz		= 0;
+
+	    mrtstat.mrts_upcalls++;
+
+	    if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
+		log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full");
+		++mrtstat.mrts_upq_sockfull;
+		m_free(mb_ntry);
+		m_free(mb0);
+		m_free(mb_rt);
 		splx(s);
 		return ENOBUFS;
 	    }
@@ -1057,80 +1248,49 @@ X_ip_mforward(ip, ifp, m, imo)
 
 	    /* insert new entry at head of hash chain */
 	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
-	    rt->mfc_originmask.s_addr = (u_long)0xffffffff;
 	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
+	    rt->mfc_expire	      = UPCALL_EXPIRE;
+	    nexpire[hash]++;
+	    for (i = 0; i < numvifs; i++)
+		rt->mfc_ttls[i] = 0;
+	    rt->mfc_parent = -1;
 
 	    /* link into table */
-	    hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr);
 	    mb_rt->m_next  = mfctable[hash];
 	    mfctable[hash] = mb_rt;
 	    mb_rt->m_act = NULL;
 
-	}
-
-	/* determine if q has overflowed */
-	for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act)
-	    hash++;
-
-	if (hash > MAX_UPQ) {
-	    mrtstat.mrts_upq_ovflw++;
-	    splx(s);
-	    return 0;
-	}
+	    rte_m = mb_rt;
+	} else {
+	    /* determine if q has overflowed */
+	    for (rte_m = mb_rt, npkts = 0; rte_m->m_act; rte_m = rte_m->m_act)
+		npkts++;
 
-	/* add this packet and timing, ifp info to m_act */
-	MGET(mb_ntry, M_DONTWAIT, MT_DATA);
-	if (mb_ntry == NULL) {
-	    splx(s);
-	    return ENOBUFS;
+	    if (npkts > MAX_UPQ) {
+		mrtstat.mrts_upq_ovflw++;
+		m_free(mb_ntry);
+		m_free(mb0);
+		splx(s);
+		return 0;
+	    }
 	}
 
 	mb_ntry->m_act = NULL;
 	rte = mtod(mb_ntry, struct rtdetq *);
 
-	mb0 = m_copy(m, 0, M_COPYALL);
-	if (mb0 == NULL) {
-	    splx(s);
-	    return ENOBUFS;
-	}
-
 	rte->m 			= mb0;
 	rte->ifp 		= ifp;
-	rte->tunnel_src 	= tunnel_src;
-	rte->imo		= imo;
+#ifdef UPCALL_TIMING
+	rte->t			= tp;
+#endif
 
-	rte_m->m_act = mb_ntry;
+	/* Add this entry to the end of the queue */
+	rte_m->m_act		= mb_ntry;
 
 	splx(s);
 
-	if (hash == 0) {
-	    /*
-	     * Send message to routing daemon to install
-	     * a route into the kernel table
-	     */
-	    k_igmpsrc.sin_addr = ip->ip_src;
-	    k_igmpdst.sin_addr = ip->ip_dst;
-
-	    mm = m_copy(m, 0, M_COPYALL);
-	    if (mm == NULL) {
-		splx(s);
-		return ENOBUFS;
-	    }
-
-	    k_data = mtod(mm, struct ip *);
-	    k_data->ip_p = 0;
-
-	    mrtstat.mrts_upcalls++;
-
-          rip_ip_input(mm, ip_mrouter, (struct sockaddr *)&k_igmpsrc);
-
-	    /* set timer to cleanup entry if upcall is lost */
-	    timeout(cleanup_cache, (caddr_t)mb_rt, 100);
-	    timeout_val++;
-	}
-
 	return 0;
-    }
+    }		
 }
 
 #ifndef MROUTE_LKM
@@ -1142,99 +1302,160 @@ int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
  * Clean up the cache entry if upcall is not serviced
  */
 static void
-cleanup_cache(xmb_rt)
-	void *xmb_rt;
+expire_upcalls(void *unused)
 {
-    struct mbuf *mb_rt = xmb_rt;
-    struct mfc *rt;
-    u_long hash;
-    struct mbuf *prev_m0;
-    struct mbuf *m0;
-    struct mbuf *m;
+    struct mbuf *mb_rt, *m, **nptr;
     struct rtdetq *rte;
+    struct mfc *mfc;
+    int i;
     int s;
 
-    rt = mtod(mb_rt, struct mfc *);
-    hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr);
-
-    if (mrtdebug)
-	log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x\n",
-	    ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr),
-	    ntohl(rt->mfc_mcastgrp.s_addr));
-
-    mrtstat.mrts_cache_cleanups++;
-
-    /*
-     * determine entry to be cleaned up in cache table
-     */
     s = splnet();
-    for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next)
-	if (m0 == mb_rt)
-	    break;
+    for (i = 0; i < MFCTBLSIZ; i++) {
+	if (nexpire[i] == 0)
+	    continue;
+	nptr = &mfctable[i];
+	for (mb_rt = *nptr; mb_rt != NULL; mb_rt = *nptr) {
+	    mfc = mtod(mb_rt, struct mfc *);
 
-    /*
-     * drop all the packets
-     * free the mbuf with the pkt, if, timing info
-     */
-    while (mb_rt->m_act) {
-	m = mb_rt->m_act;
-	mb_rt->m_act = m->m_act;
+	    /*
+	     * Skip real cache entries
+	     * Make sure it wasn't marked to not expire (shouldn't happen)
+	     * If it expires now
+	     */
+	    if (mb_rt->m_act != NULL &&
+	        mfc->mfc_expire != 0 &&
+		--mfc->mfc_expire == 0) {
+		if (mrtdebug & DEBUG_EXPIRE)
+		    log(LOG_DEBUG, "expire_upcalls: expiring (%x %x)",
+			ntohl(mfc->mfc_origin.s_addr),
+			ntohl(mfc->mfc_mcastgrp.s_addr));
+		/*
+		 * drop all the packets
+		 * free the mbuf with the pkt, if, timing info
+		 */
+		while (mb_rt->m_act) {
+		    m = mb_rt->m_act;
+		    mb_rt->m_act = m->m_act;
+	     
+		    rte = mtod(m, struct rtdetq *);
+		    m_freem(rte->m);
+		    m_free(m);
+		}
+		++mrtstat.mrts_cache_cleanups;
+		nexpire[i]--;
 
-	rte = mtod(m, struct rtdetq *);
-	m_freem(rte->m);
-	m_free(m);
+		MFREE(mb_rt, *nptr);
+	    } else {
+		nptr = &mb_rt->m_next;
+	    }
+	}
     }
-
-    /*
-     * Delete the entry from the cache
-     */
-    if (prev_m0 != m0) {	/* if moved past head of list */
-	MFREE(m0, prev_m0->m_next);
-    } else			/* delete head of list, it is in the table */
-	mfctable[hash] = m_free(m0);
-
-    timeout_val--;
     splx(s);
+    timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
 }
 
 /*
  * Packet forwarding routine once entry in the cache is made
  */
 static int
-ip_mdq(m, ifp, tunnel_src, rt, imo)
+ip_mdq(m, ifp, rt, xmt_vif)
     register struct mbuf *m;
     register struct ifnet *ifp;
-    register u_long tunnel_src;
     register struct mfc *rt;
-    register struct ip_moptions *imo;
+    register vifi_t xmt_vif;
 {
     register struct ip  *ip = mtod(m, struct ip *);
     register vifi_t vifi;
     register struct vif *vifp;
+    register struct mbuf *tmp;
+    register int plen = ntohs(ip->ip_len);
+
+/*
+ * Macro to send packet on vif.  Since RSVP packets don't get counted on
+ * input, they shouldn't get counted on output, so statistics keeping is
+ * seperate.
+ */
+#define MC_SEND(ip,vifp,m) {                             \
+                if ((vifp)->v_flags & VIFF_TUNNEL)  	 \
+                    encap_send((ip), (vifp), (m));       \
+                else                                     \
+                    phyint_send((ip), (vifp), (m));      \
+}
+
+    /*
+     * If xmt_vif is not -1, send on only the requested vif.
+     *
+     * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
+     */
+    if (xmt_vif < numvifs) {
+	MC_SEND(ip, viftable + xmt_vif, m);
+	return 1;
+    }
 
     /*
      * Don't forward if it didn't arrive from the parent vif for its origin.
-     * Notes: v_ifp is zero for src route tunnels, multicast_decap_if
-     * for encapsulated tunnels and a real ifnet for non-tunnels so
-     * the first part of the if catches wrong physical interface or
-     * tunnel type; v_rmt_addr is zero for non-tunneled packets so
-     * the 2nd part catches both packets that arrive via a tunnel
-     * that shouldn't and packets that arrive via the wrong tunnel.
      */
     vifi = rt->mfc_parent;
-    if (viftable[vifi].v_ifp != ifp ||
-	(ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) {
+    if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
 	/* came in the wrong interface */
-	if (mrtdebug)
-	    log(LOG_DEBUG, "wrong if: ifp %x vifi %d\n",
-		ifp, vifi);
+	if (mrtdebug & DEBUG_FORWARD)
+	    log(LOG_DEBUG, "wrong if: ifp %x vifi %d vififp %x",
+		ifp, vifi, viftable[vifi].v_ifp); 
 	++mrtstat.mrts_wrong_if;
-	return (int)tunnel_src;
+	++rt->mfc_wrong_if;
+	/*
+	 * If we are doing PIM assert processing, and we are forwarding
+	 * packets on this interface, and it is a broadcast medium
+	 * interface (and not a tunnel), send a message to the routing daemon.
+	 */
+	if (pim_assert && rt->mfc_ttls[vifi] &&
+		(ifp->if_flags & IFF_BROADCAST) &&
+		!(viftable[vifi].v_flags & VIFF_TUNNEL)) {
+	    struct sockaddr_in k_igmpsrc;
+	    struct mbuf *mm;
+	    struct igmpmsg *im;
+	    int hlen = ip->ip_hl << 2;
+	    struct timeval now;
+	    register u_long delta;
+
+	    GET_TIME(now);
+
+	    TV_DELTA(rt->mfc_last_assert, now, delta);
+
+	    if (delta > ASSERT_MSG_TIME) {
+		mm = m_copy(m, 0, hlen);
+		if (mm && (M_HASCL(mm) || mm->m_len < hlen))
+		    mm = m_pullup(mm, hlen);
+		if (mm == NULL) {
+		    return ENOBUFS;
+		}
+
+		rt->mfc_last_assert = now;
+
+		im = mtod(mm, struct igmpmsg *);
+		im->im_msgtype	= IGMPMSG_WRONGVIF;
+		im->im_mbz		= 0;
+		im->im_vif		= vifi;
+
+		k_igmpsrc.sin_addr = im->im_src;
+
+		socket_send(ip_mrouter, m, &k_igmpsrc);
+	    }
+	}
+	return 0;
     }
 
-    /* increment the interface and s-g counters */
-    viftable[vifi].v_pkt_in++;
+    /* If I sourced this packet, it counts as output, else it was input. */
+    if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
+	viftable[vifi].v_pkt_out++;
+	viftable[vifi].v_bytes_out += plen;
+    } else {
+	viftable[vifi].v_pkt_in++;
+	viftable[vifi].v_bytes_in += plen;
+    }
     rt->mfc_pkt_cnt++;
+    rt->mfc_byte_cnt += plen;
 
     /*
      * For each vif, decide if a copy of the packet should be forwarded.
@@ -1242,40 +1463,26 @@ ip_mdq(m, ifp, tunnel_src, rt, imo)
      *		- the ttl exceeds the vif's threshold
      *		- there are group members downstream on interface
      */
-#define MC_SEND(ip,vifp,m) {                             \
-		(vifp)->v_pkt_out++;                     \
-                if ((vifp)->v_flags & VIFF_SRCRT)        \
-                    srcrt_send((ip), (vifp), (m));       \
-                else if ((vifp)->v_flags & VIFF_TUNNEL)  \
-                    encap_send((ip), (vifp), (m));       \
-                else                                     \
-                    phyint_send((ip), (vifp), (m));      \
-                }
-
-/* If no options or the imo_multicast_vif option is 0, don't do this part
- */
-    if ((imo != NULL) &&
-       (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/)
-    {
-        MC_SEND(ip,viftable+vifi,m);
-        return (1);        /* make sure we are done: No more physical sends */
-    }
-
     for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
 	if ((rt->mfc_ttls[vifi] > 0) &&
-	    (ip->ip_ttl > rt->mfc_ttls[vifi]))
+	    (ip->ip_ttl > rt->mfc_ttls[vifi])) {
+	    vifp->v_pkt_out++;
+	    vifp->v_bytes_out += plen;
 	    MC_SEND(ip, vifp, m);
+	}
 
     return 0;
 }
 
-/* check if a vif number is legal/ok. This is used by ip_output, to export
- * numvifs there,
+/*
+ * check if a vif number is legal/ok. This is used by ip_output, to export
+ * numvifs there, 
  */
 int
 X_legal_vif_num(vif)
     int vif;
-{   if (vif>=0 && vif<=numvifs)
+{
+    if (vif >= 0 && vif < numvifs)
        return(1);
     else
        return(0);
@@ -1285,6 +1492,23 @@ X_legal_vif_num(vif)
 int (*legal_vif_num)(int) = X_legal_vif_num;
 #endif
 
+/*
+ * Return the local address used by this vif
+ */
+u_long
+X_ip_mcast_src(vifi)
+    int vifi;
+{
+    if (vifi >= 0 && vifi < numvifs)
+	return viftable[vifi].v_lcl_addr.s_addr;
+    else
+	return INADDR_ANY;
+}
+
+#ifndef MROUTE_LKM
+u_long (*ip_mcast_src)(int) = X_ip_mcast_src;
+#endif
+
 static void
 phyint_send(ip, vifp, m)
     struct ip *ip;
@@ -1292,19 +1516,19 @@ phyint_send(ip, vifp, m)
     struct mbuf *m;
 {
     register struct mbuf *mb_copy;
-    int hlen = ip->ip_hl << 2;
+    register int hlen = ip->ip_hl << 2;
     register struct ip_moptions *imo;
 
-    if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL)
-	return;
-
     /*
-     * Make sure the header isn't in an cluster, because the sharing
-     * in clusters defeats the whole purpose of making the copy above.
+     * Make a new reference to the packet; make sure that
+     * the IP header is actually copied, not just referenced,
+     * so that ip_output() only scribbles on the copy.
      */
-    mb_copy = m_pullup(mb_copy, hlen);
+    mb_copy = m_copy(m, 0, M_COPYALL);
+    if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
+	mb_copy = m_pullup(mb_copy, hlen);
     if (mb_copy == NULL)
-	    return;
+	return;
 
     MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT);
     if (imo == NULL) {
@@ -1315,6 +1539,7 @@ phyint_send(ip, vifp, m)
     imo->imo_multicast_ifp  = vifp->v_ifp;
     imo->imo_multicast_ttl  = ip->ip_ttl - 1;
     imo->imo_multicast_loop = 1;
+    imo->imo_multicast_vif  = -1;
 
     if (vifp->v_rate_limit <= 0)
 	tbf_send_packet(vifp, mb_copy, imo);
@@ -1324,81 +1549,6 @@ phyint_send(ip, vifp, m)
 }
 
 static void
-srcrt_send(ip, vifp, m)
-    struct ip *ip;
-    struct vif *vifp;
-    struct mbuf *m;
-{
-    struct mbuf *mb_copy, *mb_opts;
-    int hlen = ip->ip_hl << 2;
-    register struct ip *ip_copy;
-    u_char *cp;
-
-    /*
-     * Make sure that adding the tunnel options won't exceed the
-     * maximum allowed number of option bytes.
-     */
-    if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) {
-	mrtstat.mrts_cant_tunnel++;
-	if (mrtdebug)
-	    log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u\n",
-		ntohl(ip->ip_src.s_addr));
-	return;
-    }
-
-    if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL)
-	return;
-
-    MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER);
-    if (mb_opts == NULL) {
-	m_freem(mb_copy);
-	return;
-    }
-    /*
-     * 'Delete' the base ip header from the mb_copy chain
-     */
-    mb_copy->m_len -= hlen;
-    mb_copy->m_data += hlen;
-    /*
-     * Make mb_opts be the new head of the packet chain.
-     * Any options of the packet were left in the old packet chain head
-     */
-    mb_opts->m_next = mb_copy;
-    mb_opts->m_len = hlen + TUNNEL_LEN;
-    mb_opts->m_data += MSIZE - mb_opts->m_len;
-    mb_opts->m_pkthdr.len = mb_copy->m_pkthdr.len + TUNNEL_LEN;
-    /*
-     * Copy the base ip header from the mb_copy chain to the new head mbuf
-     */
-    ip_copy = mtod(mb_opts, struct ip *);
-    bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), hlen);
-    ip_copy->ip_ttl--;
-    ip_copy->ip_dst = vifp->v_rmt_addr;	  /* remote tunnel end-point */
-    /*
-     * Adjust the ip header length to account for the tunnel options.
-     */
-    ip_copy->ip_hl  += TUNNEL_LEN >> 2;
-    ip_copy->ip_len += TUNNEL_LEN;
-    /*
-     * Add the NOP and LSRR after the base ip header
-     */
-    cp = mtod(mb_opts, u_char *) + IP_HDR_LEN;
-    *cp++ = IPOPT_NOP;
-    *cp++ = IPOPT_LSRR;
-    *cp++ = 11; /* LSRR option length */
-    *cp++ = 8;  /* LSSR pointer to second element */
-    *(u_long*)cp = vifp->v_lcl_addr.s_addr;	/* local tunnel end-point */
-    cp += 4;
-    *(u_long*)cp = ip->ip_dst.s_addr;		/* destination group */
-
-    if (vifp->v_rate_limit <= 0)
-	tbf_send_packet(vifp, mb_opts, 0);
-    else
-	tbf_control(vifp, mb_opts,
-		    mtod(mb_opts, struct ip *), ip_copy->ip_len, 0);
-}
-
-static void
 encap_send(ip, vifp, m)
     register struct ip *ip;
     register struct vif *vifp;
@@ -1471,11 +1621,12 @@ encap_send(ip, vifp, m)
  */
 void
 #ifdef MROUTE_LKM
-X_multiencap_decap(m)
+X_ipip_input(m)
 #else
-multiencap_decap(m)
+ipip_input(m, iphlen)
 #endif
-    register struct mbuf *m;
+	register struct mbuf *m;
+	int iphlen;
 {
     struct ifnet *ifp = m->m_pkthdr.rcvif;
     register struct ip *ip = mtod(m, struct ip *);
@@ -1484,9 +1635,9 @@ multiencap_decap(m)
     register struct ifqueue *ifq;
     register struct vif *vifp;
 
-    if (ip->ip_p != ENCAP_PROTO) {
-    	rip_input(m);
-	return;
+    if (!have_encap_tunnel) {
+	    rip_input(m);
+	    return;
     }
     /*
      * dump the packet if it's not to a multicast destination or if
@@ -1502,7 +1653,7 @@ multiencap_decap(m)
     }
     if (ip->ip_src.s_addr != last_encap_src) {
 	register struct vif *vife;
-
+	
 	vifp = viftable;
 	vife = vifp + numvifs;
 	last_encap_src = ip->ip_src.s_addr;
@@ -1520,7 +1671,7 @@ multiencap_decap(m)
 	mrtstat.mrts_cant_tunnel++; /*XXX*/
 	m_freem(m);
 	if (mrtdebug)
-          log(LOG_DEBUG, "ip_mforward: no tunnel with %x\n",
+          log(LOG_DEBUG, "ip_mforward: no tunnel with %x",
 		ntohl(ip->ip_src.s_addr));
 	return;
     }
@@ -1554,7 +1705,7 @@ multiencap_decap(m)
 /*
  * Token bucket filter module
  */
-void
+static void
 tbf_control(vifp, m, ip, p_len, imo)
 	register struct vif *vifp;
 	register struct mbuf *m;
@@ -1564,7 +1715,7 @@ tbf_control(vifp, m, ip, p_len, imo)
 {
     tbf_update_tokens(vifp);
 
-    /* if there are enough tokens,
+    /* if there are enough tokens, 
      * and the queue is empty,
      * send this packet out
      */
@@ -1601,11 +1752,11 @@ tbf_control(vifp, m, ip, p_len, imo)
     return;
 }
 
-/*
+/* 
  * adds a packet to the queue at the interface
  */
-void
-tbf_queue(vifp, m, ip, imo)
+static void
+tbf_queue(vifp, m, ip, imo) 
 	register struct vif *vifp;
 	register struct mbuf *m;
 	register struct ip *ip;
@@ -1627,10 +1778,10 @@ tbf_queue(vifp, m, ip, imo)
 }
 
 
-/*
+/* 
  * processes the queue at the interface
  */
-void
+static void
 tbf_process_q(vifp)
     register struct vif *vifp;
 {
@@ -1643,7 +1794,7 @@ tbf_process_q(vifp)
      */
     while (vifp->v_tbf->q_len > 0) {
 	/* locate the first packet */
-	pkt_1.pkt_len = ((qtable[index][0]).pkt_len);
+	pkt_1.pkt_len = (qtable[index][0]).pkt_len;
 	pkt_1.pkt_m   = (qtable[index][0]).pkt_m;
 	pkt_1.pkt_ip   = (qtable[index][0]).pkt_ip;
 	pkt_1.pkt_imo = (qtable[index][0]).pkt_imo;
@@ -1665,11 +1816,11 @@ tbf_process_q(vifp)
     splx(s);
 }
 
-/*
+/* 
  * removes the jth packet from the queue at the interface
  */
-void
-tbf_dequeue(vifp,j)
+static void
+tbf_dequeue(vifp,j) 
     register struct vif *vifp;
     register int j;
 {
@@ -1681,7 +1832,7 @@ tbf_dequeue(vifp,j)
 	qtable[index][i-1].pkt_len = qtable[index][i].pkt_len;
 	qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip;
 	qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo;
-    }
+    }		
     qtable[index][i-1].pkt_m = NULL;
     qtable[index][i-1].pkt_len = NULL;
     qtable[index][i-1].pkt_ip = NULL;
@@ -1690,15 +1841,15 @@ tbf_dequeue(vifp,j)
     vifp->v_tbf->q_len--;
 
     if (tbfdebug > 1)
-	log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d\n",vifp-viftable, i-1);
+	log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1);
 }
 
-void
+static void
 tbf_reprocess_q(xvifp)
 	void *xvifp;
 {
     register struct vif *vifp = xvifp;
-    if (ip_mrouter == NULL)
+    if (ip_mrouter == NULL) 
 	return;
 
     tbf_update_tokens(vifp);
@@ -1713,7 +1864,7 @@ tbf_reprocess_q(xvifp)
  * based on the precedence value and the priority obtained through
  * a lookup table - not yet implemented accurately!
  */
-int
+static int
 tbf_dq_sel(vifp, ip)
     register struct vif *vifp;
     register struct ip *ip;
@@ -1737,7 +1888,7 @@ tbf_dq_sel(vifp, ip)
     return(0);
 }
 
-void
+static void
 tbf_send_packet(vifp, m, imo)
     register struct vif *vifp;
     register struct mbuf *m;
@@ -1746,13 +1897,7 @@ tbf_send_packet(vifp, m, imo)
     int error;
     int s = splnet();
 
-    /* if source route tunnels */
-    if (vifp->v_flags & VIFF_SRCRT) {
-	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
-			  IP_FORWARDING, imo);
-	if (mrtdebug > 1)
-	    log(LOG_DEBUG, "srcrt_send on vif %d err %d\n", vifp-viftable, error);
-    } else if (vifp->v_flags & VIFF_TUNNEL) {
+    if (vifp->v_flags & VIFF_TUNNEL) {
 	/* If tunnel options */
 	ip_output(m, (struct mbuf *)0, (struct route *)0,
 		  IP_FORWARDING, imo);
@@ -1762,8 +1907,8 @@ tbf_send_packet(vifp, m, imo)
 			  IP_FORWARDING, imo);
 	FREE(imo, M_IPMOPTS);
 
-	if (mrtdebug > 1)
-	    log(LOG_DEBUG, "phyint_send on vif %d err %d\n", vifp-viftable, error);
+	if (mrtdebug & DEBUG_XMIT)
+	    log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error);
     }
     splx(s);
 }
@@ -1772,7 +1917,7 @@ tbf_send_packet(vifp, m, imo)
  * the elapsed time (between the last time and time now)
  * in milliseconds & update the no. of tokens in the bucket
  */
-void
+static void
 tbf_update_tokens(vifp)
     register struct vif *vifp;
 {
@@ -1800,54 +1945,255 @@ priority(vifp, ip)
     register struct vif *vifp;
     register struct ip *ip;
 {
-    register u_long graddr;
     register int prio;
 
-    /* temporary hack; will add general packet classifier some day */
-
-    prio = 50;  /* default priority */
+    /* temporary hack; may add general packet classifier some day */
 
-    /* check for source route options and add option length to get dst */
-    if (vifp->v_flags & VIFF_SRCRT)
-	graddr = ntohl((ip+8)->ip_dst.s_addr);
-    else
-	graddr = ntohl(ip->ip_dst.s_addr);
-
-    switch (graddr & 0xf) {
-	case 0x0: break;
-	case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */
-		  break;
-	case 0x2: break;
-	case 0x3: break;
-	case 0x4: break;
-	case 0x5: break;
-	case 0x6: break;
-	case 0x7: break;
-	case 0x8: break;
-	case 0x9: break;
-	case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */
-		  break;
-	case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */
-		  break;
-	case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */
-		  break;
-	case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */
-		  break;
-	case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */
-		  break;
-	case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */
-		  break;
+    /*
+     * The UDP port space is divided up into four priority ranges:
+     * [0, 16384)     : unclassified - lowest priority
+     * [16384, 32768) : audio - highest priority
+     * [32768, 49152) : whiteboard - medium priority
+     * [49152, 65536) : video - low priority
+     */
+    if (ip->ip_p == IPPROTO_UDP) {
+	struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
+	switch (ntohs(udp->uh_dport) & 0xc000) {
+	    case 0x4000:
+		prio = 70;
+		break;
+	    case 0x8000:
+		prio = 60;
+		break;
+	    case 0xc000:
+		prio = 55;
+		break;
+	    default:
+		prio = 50;
+		break;
+	}
+	if (tbfdebug > 1)
+		log(LOG_DEBUG, "port %x prio%d", ntohs(udp->uh_dport), prio);
+    } else {
+	    prio = 50;
     }
-
-    if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d\n", graddr, prio);
-
     return prio;
 }
 
 /*
- * End of token bucket filter modifications
+ * End of token bucket filter modifications 
  */
 
+int
+ip_rsvp_vif_init(so, m)
+    struct socket *so;
+    struct mbuf *m;
+{
+    int i;
+    register int s;
+
+    if (rsvpdebug)
+	printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
+	       so->so_type, so->so_proto->pr_protocol);
+
+    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
+	return EOPNOTSUPP;
+
+    /* Check mbuf. */
+    if (m == NULL || m->m_len != sizeof(int)) {
+	return EINVAL;
+    }
+    i = *(mtod(m, int *));
+ 
+    if (rsvpdebug)
+	printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on);
+ 
+    s = splnet();
+
+    /* Check vif. */
+    if (!legal_vif_num(i)) {
+	splx(s);
+	return EADDRNOTAVAIL;
+    }
+
+    /* Check if socket is available. */
+    if (viftable[i].v_rsvpd != NULL) {
+	splx(s);
+	return EADDRINUSE;
+    }
+
+    viftable[i].v_rsvpd = so;
+    /* This may seem silly, but we need to be sure we don't over-increment
+     * the RSVP counter, in case something slips up.
+     */
+    if (!viftable[i].v_rsvp_on) {
+	viftable[i].v_rsvp_on = 1;
+	rsvp_on++;
+    }
+
+    splx(s);
+    return 0;
+}
+
+int
+ip_rsvp_vif_done(so, m)
+    struct socket *so;
+    struct mbuf *m;
+{
+	int i;
+	register int s;
+ 
+    if (rsvpdebug)
+	printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
+	       so->so_type, so->so_proto->pr_protocol);
+ 
+    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
+	return EOPNOTSUPP;
+ 
+    /* Check mbuf. */
+    if (m == NULL || m->m_len != sizeof(int)) {
+	    return EINVAL;
+    }
+    i = *(mtod(m, int *));
+ 
+    s = splnet();
+ 
+    /* Check vif. */
+    if (!legal_vif_num(i)) {
+	splx(s);
+        return EADDRNOTAVAIL;
+    }
+
+    if (rsvpdebug)
+	printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",
+	       viftable[i].v_rsvpd, so);
+
+    viftable[i].v_rsvpd = NULL;
+    /* This may seem silly, but we need to be sure we don't over-decrement
+     * the RSVP counter, in case something slips up.
+     */
+    if (viftable[i].v_rsvp_on) {
+	viftable[i].v_rsvp_on = 0;
+	rsvp_on--;
+    }
+
+    splx(s);
+    return 0;
+}
+
+void
+ip_rsvp_force_done(so)
+    struct socket *so;
+{
+    int vifi;
+    register int s;
+
+    /* Don't bother if it is not the right type of socket. */
+    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
+	return;
+
+    s = splnet();
+
+    /* The socket may be attached to more than one vif...this
+     * is perfectly legal.
+     */
+    for (vifi = 0; vifi < numvifs; vifi++) {
+	if (viftable[vifi].v_rsvpd == so) {
+	    viftable[vifi].v_rsvpd = NULL;
+	    /* This may seem silly, but we need to be sure we don't
+	     * over-decrement the RSVP counter, in case something slips up.
+	     */
+	    if (viftable[vifi].v_rsvp_on) {
+		viftable[vifi].v_rsvp_on = 0;
+		rsvp_on--;
+	    }
+	}
+    }
+
+    splx(s);
+    return;
+}
+
+void
+rsvp_input(m, ifp)
+    struct mbuf *m;
+    struct ifnet *ifp;
+{
+    int vifi;
+    register struct ip *ip = mtod(m, struct ip *);
+    static struct sockaddr_in rsvp_src = { AF_INET };
+    register int s;
+
+    if (rsvpdebug)
+	printf("rsvp_input: rsvp_on %d\n",rsvp_on);
+
+    /* Can still get packets with rsvp_on = 0 if there is a local member
+     * of the group to which the RSVP packet is addressed.  But in this
+     * case we want to throw the packet away.
+     */
+    if (!rsvp_on) {
+	m_freem(m);
+	return;
+    }
+
+    /* If the old-style non-vif-associated socket is set, then use
+     * it and ignore the new ones.
+     */
+    if (ip_rsvpd != NULL) {
+	if (rsvpdebug)
+	    printf("rsvp_input: Sending packet up old-style socket\n");
+	rip_input(m);
+	return;
+    }
+
+    s = splnet();
+
+    if (rsvpdebug)
+	printf("rsvp_input: check vifs\n");
+
+    /* Find which vif the packet arrived on. */
+    for (vifi = 0; vifi < numvifs; vifi++) {
+	if (viftable[vifi].v_ifp == ifp)
+ 		break;
+ 	}
+ 
+    if (vifi == numvifs) {
+	/* Can't find vif packet arrived on. Drop packet. */
+	if (rsvpdebug)
+	    printf("rsvp_input: Can't find vif for packet...dropping it.\n");
+	m_freem(m);
+	splx(s);
+	return;
+    }
+
+    if (rsvpdebug)
+	printf("rsvp_input: check socket\n");
+
+    if (viftable[vifi].v_rsvpd == NULL) {
+	/* drop packet, since there is no specific socket for this
+	 * interface */
+	    if (rsvpdebug)
+		    printf("rsvp_input: No socket defined for vif %d\n",vifi);
+	    m_freem(m);
+	    splx(s);
+	    return;
+    }
+    rsvp_src.sin_addr = ip->ip_src;
+
+    if (rsvpdebug && m)
+	printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
+	       m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
+
+    if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
+	if (rsvpdebug)
+	    printf("rsvp_input: Failed to append to socket\n");
+    else
+	if (rsvpdebug)
+	    printf("rsvp_input: send packet up\n");
+    
+    splx(s);
+}
+
 #ifdef MROUTE_LKM
 #include <sys/conf.h>
 #include <sys/exec.h>
@@ -1884,7 +2230,7 @@ ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
 		old_mrt_ioctl = mrt_ioctl;
 		mrt_ioctl = X_mrt_ioctl;
               old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input;
-              inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_multiencap_decap;
+              inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_ipip_input;
 		old_legal_vif_num = legal_vif_num;
 		legal_vif_num = X_legal_vif_num;
 		ip_mrtproto = IGMP_DVMRP;
@@ -1921,5 +2267,3 @@ ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
 
 #endif /* MROUTE_LKM */
 #endif /* MROUTING */
-
-
author	wollman <wollman@FreeBSD.org>	1995-06-13 17:51:16 +0000
committer	wollman <wollman@FreeBSD.org>	1995-06-13 17:51:16 +0000
commit	20ad4f8359820cf12331c0335034438fc23ad604 (patch)
tree	b5de17b0bb0d03b7ed728b104d3d0f4a9752fd4a /sys/netinet/ip_mroute.c
parent	c0dfcf234735c518d2c756c46fddf87d2332c838 (diff)
download	FreeBSD-src-20ad4f8359820cf12331c0335034438fc23ad604.zip FreeBSD-src-20ad4f8359820cf12331c0335034438fc23ad604.tar.gz