diff options
-rw-r--r-- | sys/netinet/igmp.c | 193 | ||||
-rw-r--r-- | sys/netinet/igmp.h | 57 | ||||
-rw-r--r-- | sys/netinet/igmp_var.h | 22 | ||||
-rw-r--r-- | sys/netinet/in.h | 8 | ||||
-rw-r--r-- | sys/netinet/in_proto.c | 9 | ||||
-rw-r--r-- | sys/netinet/ip_input.c | 34 | ||||
-rw-r--r-- | sys/netinet/ip_mroute.c | 1646 | ||||
-rw-r--r-- | sys/netinet/ip_mroute.h | 135 | ||||
-rw-r--r-- | sys/netinet/ip_output.c | 25 | ||||
-rw-r--r-- | sys/netinet/ip_var.h | 7 | ||||
-rw-r--r-- | sys/netinet/raw_ip.c | 51 |
11 files changed, 1276 insertions, 911 deletions
diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c index dac0add..cd0a35e 100644 --- a/sys/netinet/igmp.c +++ b/sys/netinet/igmp.c @@ -43,8 +43,9 @@ * * Written by Steve Deering, Stanford, May 1988. * Modified by Rosen Sharma, Stanford, Aug 1994. + * Modified by Bill Fenner, Xerox PARC, Feb 1995. * - * MULTICAST 1.4 + * MULTICAST Revision: 3.3.1.2 */ #include <sys/param.h> @@ -69,9 +70,11 @@ struct igmpstat igmpstat; -static int igmp_timers_are_running = 0; +static int igmp_timers_are_running; static u_long igmp_all_hosts_group; -static struct router_info *Head = 0; +static u_long igmp_local_group; +static u_long igmp_local_group_mask; +static struct router_info *Head; static void igmp_sendpkt(struct in_multi *, int); static void igmp_sendleave(struct in_multi *); @@ -83,6 +86,11 @@ igmp_init() * To avoid byte-swapping the same value over and over again. */ igmp_all_hosts_group = htonl(INADDR_ALLHOSTS_GROUP); + igmp_local_group = htonl(0xe0000000); /* 224.0.0.0 */ + igmp_local_group_mask = htonl(0xffffff00); /* ........^ */ + + igmp_timers_are_running = 0; + Head = (struct router_info *) 0; } @@ -96,13 +104,12 @@ fill_rti(inm) printf("[igmp.c, _fill_rti] --> entering \n"); #endif while (rti) { - if (rti->ifp == inm->inm_ifp){ /* ? is it ok to compare */ - /* pointers */ + if (rti->ifp == inm->inm_ifp) { inm->inm_rti = rti; #ifdef IGMP_DEBUG printf("[igmp.c, _fill_rti] --> found old entry \n"); #endif - if (rti->type == IGMP_OLD_ROUTER) + if (rti->type == IGMP_OLD_ROUTER) return IGMP_HOST_MEMBERSHIP_REPORT; else return IGMP_HOST_NEW_MEMBERSHIP_REPORT; @@ -114,7 +121,7 @@ fill_rti(inm) rti->type = IGMP_NEW_ROUTER; rti->time = IGMP_AGE_THRESHOLD; rti->next = Head; - Head = rti; + Head = rti; inm->inm_rti = rti; #ifdef IGMP_DEBUG printf("[igmp.c, _fill_rti] --> created new entry \n"); @@ -132,7 +139,7 @@ find_rti(ifp) printf("[igmp.c, _find_rti] --> entering \n"); #endif while (rti) { - if (rti->ifp == ifp){ /* ? is it ok to compare pointers */ + if (rti->ifp == ifp) { #ifdef IGMP_DEBUG printf("[igmp.c, _find_rti] --> found old entry \n"); #endif @@ -166,7 +173,7 @@ igmp_input(m, iphlen) register struct in_ifaddr *ia; struct in_multistep step; struct router_info *rti; - + int timer; /** timer value in the igmp query header **/ ++igmpstat.igps_rcv_total; @@ -232,15 +239,16 @@ igmp_input(m, iphlen) /* * Start the timers in all of our membership records for * the interface on which the query arrived, except those - * that are already running and those that belong to the - * "all-hosts" group. + * that are already running and those that belong to a + * "local" group (224.0.0.X). */ IN_FIRST_MULTI(step, inm); while (inm != NULL) { - if (inm->inm_ifp == ifp + if (inm->inm_ifp == ifp && inm->inm_timer == 0 - && inm->inm_addr.s_addr - != igmp_all_hosts_group) { + && ((inm->inm_addr.s_addr + & igmp_local_group_mask) + != igmp_local_group)) { inm->inm_state = IGMP_DELAYING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY( @@ -254,44 +262,40 @@ igmp_input(m, iphlen) /* ** New Router */ - - if (ip->ip_dst.s_addr != igmp_all_hosts_group) { - if (!(m->m_flags & M_MCAST)) { - ++igmpstat.igps_rcv_badqueries; - m_freem(m); - return; - } + + if (!(m->m_flags & M_MCAST)) { + ++igmpstat.igps_rcv_badqueries; + m_freem(m); + return; } - if (ip->ip_dst.s_addr == igmp_all_hosts_group) { - - /* - * - Start the timers in all of our membership records - * for the interface on which the query arrived - * excl. those that belong to the "all-hosts" group. - * - For timers already running check if they need to - * be reset. - * - Use the igmp->igmp_code filed as the maximum - * delay possible - */ - IN_FIRST_MULTI(step, inm); - while (inm != NULL){ - switch(inm->inm_state){ + + /* + * - Start the timers in all of our membership records + * that the query applies to for the interface on + * which the query arrived excl. those that belong + * to a "local" group (224.0.0.X) + * - For timers already running check if they need to + * be reset. + * - Use the igmp->igmp_code field as the maximum + * delay possible + */ + IN_FIRST_MULTI(step, inm); + while (inm != NULL) { + if (inm->inm_ifp == ifp && + (inm->inm_addr.s_addr & igmp_local_group_mask) != + igmp_local_group && + (ip->ip_dst.s_addr == igmp_all_hosts_group || + ip->ip_dst.s_addr == inm->inm_addr.s_addr)) { + switch(inm->inm_state) { case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: - if (inm->inm_ifp == ifp && - inm->inm_addr.s_addr != - igmp_all_hosts_group) { inm->inm_timer = IGMP_RANDOM_DELAY(timer); igmp_timers_are_running = 1; inm->inm_state = IGMP_DELAYING_MEMBER; - } - break; + break; case IGMP_DELAYING_MEMBER: - if (inm->inm_ifp == ifp && - (inm->inm_timer > timer) && - inm->inm_addr.s_addr != - igmp_all_hosts_group) { + if (inm->inm_timer > timer) { inm->inm_timer = IGMP_RANDOM_DELAY(timer); igmp_timers_are_running = 1; inm->inm_state = IGMP_DELAYING_MEMBER; @@ -301,48 +305,17 @@ igmp_input(m, iphlen) inm->inm_state = IGMP_AWAKENING_MEMBER; break; } - IN_NEXT_MULTI(step, inm); - } - } else { - /* - ** group specific query - */ - - IN_FIRST_MULTI(step, inm); - while (inm != NULL) { - if (inm->inm_addr.s_addr == ip->ip_dst.s_addr) { - switch(inm->inm_state ){ - case IGMP_IDLE_MEMBER: - case IGMP_LAZY_MEMBER: - case IGMP_AWAKENING_MEMBER: - inm->inm_state = IGMP_DELAYING_MEMBER; - if (inm->inm_ifp == ifp ) { - inm->inm_timer = IGMP_RANDOM_DELAY(timer); - igmp_timers_are_running = 1; - inm->inm_state = IGMP_DELAYING_MEMBER; - } - break; - case IGMP_DELAYING_MEMBER: - inm->inm_state = IGMP_DELAYING_MEMBER; - if (inm->inm_ifp == ifp && - (inm->inm_timer > timer) ) { - inm->inm_timer = IGMP_RANDOM_DELAY(timer); - igmp_timers_are_running = 1; - inm->inm_state = IGMP_DELAYING_MEMBER; - } - break; - case IGMP_SLEEPING_MEMBER: - inm->inm_state = IGMP_AWAKENING_MEMBER; - break; - } } IN_NEXT_MULTI(step, inm); } - } } + break; case IGMP_HOST_MEMBERSHIP_REPORT: + /* + * an old report + */ ++igmpstat.igps_rcv_reports; if (ifp->if_flags & IFF_LOOPBACK) @@ -374,15 +347,11 @@ igmp_input(m, iphlen) * our timer for that group. */ IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm); - if (inm != NULL) { - inm->inm_timer = 0; - ++igmpstat.igps_rcv_ourreports; - } if (inm != NULL) { inm->inm_timer = 0; ++igmpstat.igps_rcv_ourreports; - + switch(inm->inm_state){ case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: @@ -391,32 +360,43 @@ igmp_input(m, iphlen) inm->inm_state = IGMP_SLEEPING_MEMBER; break; case IGMP_DELAYING_MEMBER: - /** check this out - this was if (oldrouter) **/ if (inm->inm_rti->type == IGMP_OLD_ROUTER) inm->inm_state = IGMP_LAZY_MEMBER; - else inm->inm_state = IGMP_SLEEPING_MEMBER; + else + inm->inm_state = IGMP_SLEEPING_MEMBER; break; } } - + break; case IGMP_HOST_NEW_MEMBERSHIP_REPORT: /* - * an new report + * a new report */ - ++igmpstat.igps_rcv_reports; + /* + * We can get confused and think there's someone + * else out there if we are a multicast router. + * For fast leave to work, we have to know that + * we are the only member. + */ + IFP_TO_IA(ifp, ia); + if (ia && ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr) + break; + + ++igmpstat.igps_rcv_reports; + if (ifp->if_flags & IFF_LOOPBACK) break; - + if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || igmp->igmp_group.s_addr != ip->ip_dst.s_addr) { ++igmpstat.igps_rcv_badreports; m_freem(m); return; } - + /* * KLUDGE: if the IP source address of the report has an * unspecified (i.e., zero) subnet number, as is allowed for @@ -427,10 +407,12 @@ igmp_input(m, iphlen) * determine the arrival interface of an incoming packet. */ if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0) { +/* #ifndef MROUTING XXX - I don't think the ifdef is necessary */ IFP_TO_IA(ifp, ia); +/* #endif */ if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet); } - + /* * If we belong to the group being reported, stop * our timer for that group. @@ -439,7 +421,7 @@ igmp_input(m, iphlen) if (inm != NULL) { inm->inm_timer = 0; ++igmpstat.igps_rcv_ourreports; - + switch(inm->inm_state){ case IGMP_DELAYING_MEMBER: case IGMP_IDLE_MEMBER: @@ -466,12 +448,12 @@ void igmp_joingroup(inm) struct in_multi *inm; { - register int s = splnet(); + int s = splnet(); inm->inm_state = IGMP_IDLE_MEMBER; - if (inm->inm_addr.s_addr == igmp_all_hosts_group || - inm->inm_ifp->if_flags & IFF_LOOPBACK) + if ((inm->inm_addr.s_addr & igmp_local_group_mask) == igmp_local_group + || inm->inm_ifp->if_flags & IFF_LOOPBACK) inm->inm_timer = 0; else { igmp_sendpkt(inm,fill_rti(inm)); @@ -487,14 +469,12 @@ void igmp_leavegroup(inm) struct in_multi *inm; { - /* - * No action required on leaving a group. - */ - switch(inm->inm_state){ + switch(inm->inm_state) { case IGMP_DELAYING_MEMBER: case IGMP_IDLE_MEMBER: - if (!(inm->inm_addr.s_addr == igmp_all_hosts_group || - inm->inm_ifp->if_flags & IFF_LOOPBACK)) + if (((inm->inm_addr.s_addr & igmp_local_group_mask) + != igmp_local_group) + && !(inm->inm_ifp->if_flags & IFF_LOOPBACK)) if (inm->inm_rti->type != IGMP_OLD_ROUTER) igmp_sendleave(inm); break; @@ -509,13 +489,14 @@ void igmp_fasttimo() { register struct in_multi *inm; - register int s; struct in_multistep step; + int s; /* * Quick check to see if any work needs to be done, in order * to minimize the overhead of fasttimo processing. */ + if (!igmp_timers_are_running) return; @@ -558,7 +539,7 @@ igmp_slowtimo() } rti = rti->next; } -#ifdef IGMP_DEBUG +#ifdef IGMP_DEBUG printf("[igmp.c,_slowtimo] -- > exiting \n"); #endif splx(s); @@ -608,6 +589,7 @@ igmp_sendpkt(inm, type) imo->imo_multicast_ifp = inm->inm_ifp; imo->imo_multicast_ttl = 1; + imo->imo_multicast_vif = -1; /* * Request loopback of the report if we are acting as a multicast * router, so that the process-level routing demon can hear it. @@ -618,7 +600,6 @@ igmp_sendpkt(inm, type) FREE(imo, M_IPMOPTS); ++igmpstat.igps_snd_reports; - } static void @@ -638,7 +619,7 @@ igmp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, switch(name[0]) { case IGMPCTL_STATS: - return sysctl_rdstruct(oldp, oldlenp, newp, &igmpstat, + return sysctl_rdstruct(oldp, oldlenp, newp, &igmpstat, sizeof igmpstat); default: return ENOPROTOOPT; diff --git a/sys/netinet/igmp.h b/sys/netinet/igmp.h index 4011b4c..a7038f3 100644 --- a/sys/netinet/igmp.h +++ b/sys/netinet/igmp.h @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * @(#)igmp.h 8.1 (Berkeley) 6/10/93 - * $Id: igmp.h,v 1.4 1994/09/06 22:42:17 wollman Exp $ + * $Id: igmp.h,v 1.4 1994/09/06 22:42:17 wollman Exp $ */ #ifndef _NETINET_IGMP_H_ @@ -46,7 +46,7 @@ * * Written by Steve Deering, Stanford, May 1988. * - * MULTICAST 1.2 + * MULTICAST Revision: 3.3.1.2 */ /* @@ -54,40 +54,53 @@ */ struct igmp { u_char igmp_type; /* version & type of IGMP message */ - u_char igmp_code; /* unused, should be zero */ + u_char igmp_code; /* subtype for routing msgs */ u_short igmp_cksum; /* IP-style checksum */ struct in_addr igmp_group; /* group address being reported */ }; /* (zero for queries) */ #define IGMP_MINLEN 8 -#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* message types, incl. version */ -#define IGMP_HOST_MEMBERSHIP_REPORT 0x12 -#define IGMP_DVMRP 0x13 /* for experimental multicast */ - /* routing protocol */ -#define IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16 -#define IGMP_HOST_LEAVE_MESSAGE 0x17 -#define IGMP_MTRACE 0x1f /* mcast traceroute messages */ -#define IGMP_MTRACE_RESP 0x1e /* traceroute resp. (to sender) */ +/* + * Message types, including version number. + */ +#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* Host membership query */ +#define IGMP_HOST_MEMBERSHIP_REPORT 0x12 /* Old membership report */ +#define IGMP_DVMRP 0x13 /* DVMRP routing message */ +#define IGMP_PIM 0x14 /* PIM routing message */ + +#define IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16 /* New membership report */ + +#define IGMP_HOST_LEAVE_MESSAGE 0x17 /* Leave-group message */ + +#define IGMP_MTRACE_RESP 0x1e /* traceroute resp. (to sender) */ +#define IGMP_MTRACE 0x1f /* mcast traceroute messages */ + +#define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */ + /* query (in seconds) */ -#define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */ #define IGMP_TIMER_SCALE 10 /* denotes that the igmp->timer filed */ - /*specifies time in 10th os seconds */ + /*specifies time in tenths of seconds */ +/* + * States for the IGMPv2 state table + */ #define IGMP_DELAYING_MEMBER 1 #define IGMP_IDLE_MEMBER 2 -#define IGMP_LAZY_MEMBER 3 -#define IGMP_SLEEPING_MEMBER 4 -#define IGMP_AWAKENING_MEMBER 5 - +#define IGMP_LAZY_MEMBER 3 +#define IGMP_SLEEPING_MEMBER 4 +#define IGMP_AWAKENING_MEMBER 5 +/* + * We must remember whether the querier is an old or a new router. + */ #define IGMP_OLD_ROUTER 0 #define IGMP_NEW_ROUTER 1 -#define IGMP_AGE_THRESHOLD 540 +/* + * Revert to new router if we haven't heard from an old router in + * this amount of time. + */ +#define IGMP_AGE_THRESHOLD 540 -#ifdef IGMP_STATES -static char *tostate[]={"","DELAYING_MEMBER","IDLE","LAZY","SLEEPING", - "AWAKENING" }; -#endif /* IGMP_STATES */ #endif /* _NETINET_IGMP_H_ */ diff --git a/sys/netinet/igmp_var.h b/sys/netinet/igmp_var.h index 0de226c..1c013c7 100644 --- a/sys/netinet/igmp_var.h +++ b/sys/netinet/igmp_var.h @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * @(#)igmp_var.h 8.1 (Berkeley) 7/19/93 - * $Id: igmp_var.h,v 1.4 1994/09/06 22:42:17 wollman Exp $ + * $Id: igmp_var.h,v 1.5 1995/02/16 00:27:42 wollman Exp $ */ #ifndef _NETINET_IGMP_VAR_H_ @@ -47,19 +47,19 @@ * * Written by Steve Deering, Stanford, May 1988. * - * MULTICAST 1.1 + * MULTICAST Revisiob: 3.3.1.1 */ struct igmpstat { - u_long igps_rcv_total; /* total IGMP messages received */ - u_long igps_rcv_tooshort; /* received with too few bytes */ - u_long igps_rcv_badsum; /* received with bad checksum */ - u_long igps_rcv_queries; /* received membership queries */ - u_long igps_rcv_badqueries; /* received invalid queries */ - u_long igps_rcv_reports; /* received membership reports */ - u_long igps_rcv_badreports; /* received invalid reports */ - u_long igps_rcv_ourreports; /* received reports for our groups */ - u_long igps_snd_reports; /* sent membership reports */ + u_int igps_rcv_total; /* total IGMP messages received */ + u_int igps_rcv_tooshort; /* received with too few bytes */ + u_int igps_rcv_badsum; /* received with bad checksum */ + u_int igps_rcv_queries; /* received membership queries */ + u_int igps_rcv_badqueries; /* received invalid queries */ + u_int igps_rcv_reports; /* received membership reports */ + u_int igps_rcv_badreports; /* received invalid reports */ + u_int igps_rcv_ourreports; /* received reports for our groups */ + u_int igps_snd_reports; /* sent membership reports */ }; #ifdef KERNEL diff --git a/sys/netinet/in.h b/sys/netinet/in.h index b9f8b1d..80c5b2b 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in.h 8.3 (Berkeley) 1/3/94 - * $Id: in.h,v 1.7 1995/02/14 23:04:50 wollman Exp $ + * $Id: in.h,v 1.9 1995/03/16 18:22:26 wollman Exp $ */ #ifndef _NETINET_IN_H_ @@ -49,6 +49,7 @@ #define IPPROTO_ICMP 1 /* control message protocol */ #define IPPROTO_IGMP 2 /* group mgmt protocol */ #define IPPROTO_GGP 3 /* gateway^2 (deprecated) */ +#define IPPROTO_IPIP 4 /* IP encapsulation in IP */ #define IPPROTO_TCP 6 /* tcp */ #define IPPROTO_EGP 8 /* exterior gateway protocol */ #define IPPROTO_PUP 12 /* pup */ @@ -166,14 +167,15 @@ struct ip_opts { #define IP_MULTICAST_VIF 14 /* set/get IP mcast virt. iface */ #define IP_RSVP_ON 15 /* enable RSVP in kernel */ #define IP_RSVP_OFF 16 /* disable RSVP in kernel */ - +#define IP_RSVP_VIF_ON 17 /* set RSVP per-vif socket */ +#define IP_RSVP_VIF_OFF 18 /* unset RSVP per-vif socket */ /* * Defaults and limits for options */ #define IP_DEFAULT_MULTICAST_TTL 1 /* normally limit m'casts to 1 hop */ #define IP_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ -#define IP_MAX_MEMBERSHIPS 20 /* per socket; must fit in one mbuf */ +#define IP_MAX_MEMBERSHIPS 20 /* per socket */ /* * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP. diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 1a5f6d9..d1d8677 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -81,7 +81,8 @@ int tp_init(), tp_slowtimo(), tp_drain(), tp_usrreq(); void eoninput(), eonctlinput(), eonprotoinit(); #endif /* EON */ -void multiencap_decap(struct mbuf *); +void rsvp_input(struct mbuf *, int); +void ipip_input(struct mbuf *, int); extern struct domain inetdomain; @@ -118,12 +119,12 @@ struct protosw inetsw[] = { igmp_init, igmp_fasttimo, igmp_slowtimo, 0, igmp_sysctl }, { SOCK_RAW, &inetdomain, IPPROTO_RSVP, PR_ATOMIC|PR_ADDR, - rip_input, rip_output, 0, rip_ctloutput, + rsvp_input, rip_output, 0, rip_ctloutput, rip_usrreq, 0, 0, 0, 0, }, { SOCK_RAW, &inetdomain, IPPROTO_ENCAP, PR_ATOMIC|PR_ADDR, - multiencap_decap, rip_output, 0, rip_ctloutput, + ipip_input, rip_output, 0, rip_ctloutput, rip_usrreq, 0, 0, 0, 0, }, @@ -160,7 +161,7 @@ struct protosw inetsw[] = { extern int in_inithead(void **, int); struct domain inetdomain = - { AF_INET, "internet", 0, 0, 0, + { AF_INET, "internet", 0, 0, 0, inetsw, &inetsw[sizeof(inetsw)/sizeof(inetsw[0])], 0, in_inithead, 32, sizeof(struct sockaddr_in) }; diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 22a4f45..a463193 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -65,6 +65,8 @@ #include <netinet/ip_fw.h> #include <sys/socketvar.h> +int rsvp_on = 0; +int ip_rsvp_on; struct socket *ip_rsvpd; #ifndef IPFORWARDING @@ -255,12 +257,12 @@ next: goto next; /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no - * matter if it is destined to another node, or whether it is + * matter if it is destined to another node, or whether it is * a multicast one, RSVP wants it! and prevents it from being forwarded * anywhere else. Also checks if the rsvp daemon is running before * grabbing the packet. */ - if (ip_rsvpd != NULL && ip->ip_p==IPPROTO_RSVP) + if (rsvp_on && ip->ip_p==IPPROTO_RSVP) goto ours; /* @@ -357,9 +359,9 @@ ours: /* * If packet came to us we count it... - * This way we count all incoming packets which has + * This way we count all incoming packets which has * not been forwarded... - * Do not convert ip_len to host byte order when + * Do not convert ip_len to host byte order when * counting,ppl already made it for us before.. */ if (ip_acct_cnt_ptr!=NULL) @@ -747,7 +749,7 @@ ip_dooptions(m) char buf[4*sizeof "123"]; strcpy(buf, inet_ntoa(ip->ip_dst)); - log(LOG_WARNING, + log(LOG_WARNING, "attempted source route from %s to %s\n", inet_ntoa(ip->ip_src), buf); type = ICMP_UNREACH; @@ -1207,17 +1209,17 @@ ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen) case IPCTL_DEFTTL: return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl)); case IPCTL_SOURCEROUTE: - return (sysctl_int(oldp, oldlenp, newp, newlen, + return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_dosourceroute)); #ifdef notyet case IPCTL_DEFMTU: return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu)); #endif case IPCTL_RTEXPIRE: - return (sysctl_int(oldp, oldlenp, newp, newlen, + return (sysctl_int(oldp, oldlenp, newp, newlen, &rtq_reallyold)); case IPCTL_RTMINEXPIRE: - return (sysctl_int(oldp, oldlenp, newp, newlen, + return (sysctl_int(oldp, oldlenp, newp, newlen, &rtq_minreallyold)); case IPCTL_RTMAXCACHE: return (sysctl_int(oldp, oldlenp, newp, newlen, @@ -1239,6 +1241,14 @@ ip_rsvp_init(struct socket *so) return EADDRINUSE; ip_rsvpd = so; + /* + * This may seem silly, but we need to be sure we don't over-increment + * the RSVP counter, in case something slips up. + */ + if (!ip_rsvp_on) { + ip_rsvp_on = 1; + rsvp_on++; + } return 0; } @@ -1247,5 +1257,13 @@ int ip_rsvp_done(void) { ip_rsvpd = NULL; + /* + * This may seem silly, but we need to be sure we don't over-decrement + * the RSVP counter, in case something slips up. + */ + if (ip_rsvp_on) { + ip_rsvp_on = 0; + rsvp_on--; + } return 0; } diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index 7226fd4..2abb1e1 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -6,8 +6,9 @@ * Modified by Mark J. Steiglitz, Stanford, May, 1991 * Modified by Van Jacobson, LBL, January 1993 * Modified by Ajit Thyagarajan, PARC, August 1993 + * Modified by Bill Fenner, PARC, April 1995 * - * MROUTING 1.8 + * MROUTING Revision: 3.5 */ @@ -19,6 +20,7 @@ #include <sys/protosw.h> #include <sys/errno.h> #include <sys/time.h> +#include <sys/kernel.h> #include <sys/ioctl.h> #include <sys/syslog.h> #include <sys/queue.h> @@ -33,6 +35,7 @@ #include <netinet/igmp.h> #include <netinet/igmp_var.h> #include <netinet/ip_mroute.h> +#include <netinet/udp.h> #ifndef NTOHL #if BYTE_ORDER != BIG_ENDIAN @@ -48,18 +51,20 @@ #endif #endif +extern int rsvp_on; + #ifndef MROUTING /* * Dummy routines and globals used when multicast routing is not compiled in. */ -u_int ip_mrtproto = 0; struct socket *ip_mrouter = NULL; +u_int ip_mrtproto = 0; struct mrtstat mrtstat; - +u_int rsvpdebug = 0; int -_ip_mrouter_cmd(cmd, so, m) +_ip_mrouter_set(cmd, so, m) int cmd; struct socket *so; struct mbuf *m; @@ -67,7 +72,19 @@ _ip_mrouter_cmd(cmd, so, m) return(EOPNOTSUPP); } -int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd; +int (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = _ip_mrouter_set; + + +int +_ip_mrouter_get(cmd, so, m) + int cmd; + struct socket *so; + struct mbuf **m; +{ + return(EOPNOTSUPP); +} + +int (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = _ip_mrouter_get; int _ip_mrouter_done() @@ -98,14 +115,72 @@ _mrt_ioctl(int req, caddr_t data, struct proc *p) int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; -void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */ +void +rsvp_input(m, iphlen) /* XXX must fixup manually */ + struct mbuf *m; + int iphlen; +{ + /* Can still get packets with rsvp_on = 0 if there is a local member + * of the group to which the RSVP packet is addressed. But in this + * case we want to throw the packet away. + */ + if (!rsvp_on) { + m_freem(m); + return; + } + + if (ip_rsvpd != NULL) { + if (rsvpdebug) + printf("rsvp_input: Sending packet up old-style socket\n"); + rip_input(m); + return; + } + /* Drop the packet */ + m_freem(m); +} + +void ipip_input(struct mbuf *m) { /* XXX must fixup manually */ rip_input(m); } int (*legal_vif_num)(int) = 0; +/* + * This should never be called, since IP_MULTICAST_VIF should fail, but + * just in case it does get called, the code a little lower in ip_output + * will assign the packet a local address. + */ +u_long +_ip_mcast_src(int vifi) { return INADDR_ANY; } +u_long (*ip_mcast_src)(int) = _ip_mcast_src; + +int +ip_rsvp_vif_init(so, m) + struct socket *so; + struct mbuf *m; +{ + return(EINVAL); +} + +int +ip_rsvp_vif_done(so, m) + struct socket *so; + struct mbuf *m; +{ + return(EINVAL); +} + +void +ip_rsvp_force_done(so) + struct socket *so; +{ + return; +} + #else /* MROUTING */ +#define M_HASCL(m) ((m)->m_flags & M_EXT) + #define INSIZ sizeof(struct in_addr) #define same(a1, a2) \ (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) @@ -130,16 +205,23 @@ extern int ip_mrtproto; #define RTE_FOUND 0x2 struct mbuf *mfctable[MFCTBLSIZ]; +u_char nexpire[MFCTBLSIZ]; struct vif viftable[MAXVIFS]; u_int mrtdebug = 0; /* debug level */ +#define DEBUG_MFC 0x02 +#define DEBUG_FORWARD 0x04 +#define DEBUG_EXPIRE 0x08 +#define DEBUG_XMIT 0x10 u_int tbfdebug = 0; /* tbf debug level */ +u_int rsvpdebug = 0; /* rsvp debug level */ -u_long timeout_val = 0; /* count of outstanding upcalls */ +#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ +#define UPCALL_EXPIRE 6 /* number of timeouts */ /* * Define the token bucket filter structures - * tbftable -> each vif has one of these for storing info - * qtable -> each interface has an associated queue of pkts + * tbftable -> each vif has one of these for storing info + * qtable -> each interface has an associated queue of pkts */ struct tbf tbftable[MAXVIFS]; @@ -156,7 +238,7 @@ struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; struct ifnet multicast_decap_if[MAXVIFS]; #define ENCAP_TTL 64 -#define ENCAP_PROTO 4 +#define ENCAP_PROTO IPPROTO_IPIP /* 4 */ /* prototype IP hdr for encapsulated packets */ struct ip multicast_encap_iphdr = { @@ -169,7 +251,7 @@ struct ip multicast_encap_iphdr = { sizeof(struct ip), /* total length */ 0, /* id */ 0, /* frag offset */ - ENCAP_TTL, ENCAP_PROTO, + ENCAP_TTL, ENCAP_PROTO, 0, /* checksum */ }; @@ -178,115 +260,82 @@ struct ip multicast_encap_iphdr = { */ static vifi_t numvifs = 0; static void (*encap_oldrawip)() = 0; +static int have_encap_tunnel = 0; /* - * one-back cache used by multiencap_decap to locate a tunnel's vif + * one-back cache used by ipip_input to locate a tunnel's vif * given a datagram's src ip address. */ static u_long last_encap_src; static struct vif *last_encap_vif; -static u_long nethash_fc(u_long, u_long); -static struct mfc *mfcfind(u_long, u_long); -int get_sg_cnt(struct sioc_sg_req *); -int get_vif_cnt(struct sioc_vif_req *); -int get_vifs(caddr_t); +static int get_sg_cnt(struct sioc_sg_req *); +static int get_vif_cnt(struct sioc_vif_req *); +int ip_mrouter_init(struct socket *, struct mbuf *); static int add_vif(struct vifctl *); static int del_vif(vifi_t *); static int add_mfc(struct mfcctl *); -static int del_mfc(struct delmfcctl *); -static void cleanup_cache(void *); -static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, - struct ip_moptions *); +static int del_mfc(struct mfcctl *); +static int get_version(struct mbuf *); +static int get_assert(struct mbuf *); +static int set_assert(int *); +static void expire_upcalls(void *); +static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, + vifi_t); static void phyint_send(struct ip *, struct vif *, struct mbuf *); -static void srcrt_send(struct ip *, struct vif *, struct mbuf *); static void encap_send(struct ip *, struct vif *, struct mbuf *); -void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, +static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, struct ip_moptions *); -void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); -void tbf_process_q(struct vif *); -void tbf_dequeue(struct vif *, int); -void tbf_reprocess_q(void *); -int tbf_dq_sel(struct vif *, struct ip *); -void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); -void tbf_update_tokens(struct vif *); +static void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); +static void tbf_process_q(struct vif *); +static void tbf_dequeue(struct vif *, int); +static void tbf_reprocess_q(void *); +static int tbf_dq_sel(struct vif *, struct ip *); +static void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); +static void tbf_update_tokens(struct vif *); static int priority(struct vif *, struct ip *); -static int ip_mrouter_init(struct socket *); -void multiencap_decap(struct mbuf *m); +void multiencap_decap(struct mbuf *); /* - * A simple hash function: returns MFCHASHMOD of the low-order octet of - * the argument's network or subnet number and the multicast group assoc. + * whether or not special PIM assert processing is enabled. */ -static u_long -nethash_fc(m,n) - register u_long m; - register u_long n; -{ - struct in_addr in1; - struct in_addr in2; - - in1.s_addr = m; - m = in_netof(in1); - while ((m & 0xff) == 0) m >>= 8; - - in2.s_addr = n; - n = in_netof(in2); - while ((n & 0xff) == 0) n >>= 8; - - return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); -} +static int pim_assert; +/* + * Rate limit for assert notification messages, in usec + */ +#define ASSERT_MSG_TIME 3000000 /* - * this is a direct-mapped cache used to speed the mapping from a - * datagram source address to the associated multicast route. Note - * that unlike mrttable, the hash is on IP address, not IP net number. + * Hash function for a source, group entry */ -#define MFCHASHSIZ 1024 -#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ - ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) -struct mfc *mfchash[MFCHASHSIZ]; +#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ + ((g) >> 20) ^ ((g) >> 10) ^ (g)) /* * Find a route for a given origin IP address and Multicast group address * Type of service parameter to be added in the future!!! */ + #define MFCFIND(o, g, rt) { \ - register u_int _mrhasho = o; \ - register u_int _mrhashg = g; \ - _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ + register struct mbuf *_mb_rt = mfctable[MFCHASH(o,g)]; \ + register struct mfc *_rt = NULL; \ + rt = NULL; \ ++mrtstat.mrts_mfc_lookups; \ - rt = mfchash[_mrhasho]; \ - if ((rt == NULL) || \ - ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ - (g != rt->mfc_mcastgrp.s_addr)) \ - if ((rt = mfcfind(o, g)) != NULL) \ - mfchash[_mrhasho] = rt; \ + while (_mb_rt) { \ + _rt = mtod(_mb_rt, struct mfc *); \ + if ((_rt->mfc_origin.s_addr == o) && \ + (_rt->mfc_mcastgrp.s_addr == g) && \ + (_mb_rt->m_act == NULL)) { \ + rt = _rt; \ + break; \ + } \ + _mb_rt = _mb_rt->m_next; \ + } \ + if (rt == NULL) { \ + ++mrtstat.mrts_mfc_misses; \ + } \ } -/* - * Find route by examining hash table entries - */ -static struct mfc * -mfcfind(origin, mcastgrp) - u_long origin; - u_long mcastgrp; -{ - register struct mbuf *mb_rt; - register struct mfc *rt; - register u_long hash; - - hash = nethash_fc(origin, mcastgrp); - for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { - rt = mtod(mb_rt, struct mfc *); - if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && - (mcastgrp == rt->mfc_mcastgrp.s_addr) && - (mb_rt->m_act == NULL)) - return (rt); - } - mrtstat.mrts_mfc_misses++; - return NULL; -} /* * Macros to compute elapsed time efficiently @@ -313,30 +362,63 @@ mfcfind(origin, mcastgrp) #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) +#ifdef UPCALL_TIMING +u_long upcall_data[51]; +static void collate(struct timeval *); +#endif /* UPCALL_TIMING */ + + /* - * Handle DVMRP setsockopt commands to modify the multicast routing tables. + * Handle MRT setsockopt commands to modify the multicast routing tables. */ int -X_ip_mrouter_cmd(cmd, so, m) +X_ip_mrouter_set(cmd, so, m) int cmd; struct socket *so; struct mbuf *m; { - if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; + if (cmd != MRT_INIT && so != ip_mrouter) return EACCES; switch (cmd) { - case DVMRP_INIT: return ip_mrouter_init(so); - case DVMRP_DONE: return ip_mrouter_done(); - case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); - case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); - case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); - case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); + case MRT_INIT: return ip_mrouter_init(so, m); + case MRT_DONE: return ip_mrouter_done(); + case MRT_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); + case MRT_DEL_VIF: return del_vif (mtod(m, vifi_t *)); + case MRT_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); + case MRT_DEL_MFC: return del_mfc (mtod(m, struct mfcctl *)); + case MRT_ASSERT: return set_assert(mtod(m, int *)); default: return EOPNOTSUPP; } } #ifndef MROUTE_LKM -int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd; +int (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = X_ip_mrouter_set; +#endif + +/* + * Handle MRT getsockopt commands + */ +int +X_ip_mrouter_get(cmd, so, m) + int cmd; + struct socket *so; + struct mbuf **m; +{ + struct mbuf *mb; + + if (so != ip_mrouter) return EACCES; + + *m = mb = m_get(M_WAIT, MT_SOOPTS); + + switch (cmd) { + case MRT_VERSION: return get_version(mb); + case MRT_ASSERT: return get_assert(mb); + default: return EOPNOTSUPP; + } +} + +#ifndef MROUTE_LKM +int (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = X_ip_mrouter_get; #endif /* @@ -350,18 +432,15 @@ X_mrt_ioctl(cmd, data) int error = 0; switch (cmd) { - case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ - return (get_vifs(data)); - break; - case (SIOCGETVIFCNT): - return (get_vif_cnt((struct sioc_vif_req *)data)); - break; - case (SIOCGETSGCNT): - return (get_sg_cnt((struct sioc_sg_req *)data)); - break; + case (SIOCGETVIFCNT): + return (get_vif_cnt((struct sioc_vif_req *)data)); + break; + case (SIOCGETSGCNT): + return (get_sg_cnt((struct sioc_sg_req *)data)); + break; default: - return (EINVAL); - break; + return (EINVAL); + break; } return error; } @@ -371,9 +450,9 @@ int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl; #endif /* - * returns the packet count for the source group provided + * returns the packet, byte, rpf-failure count for the source group provided */ -int +static int get_sg_cnt(req) register struct sioc_sg_req *req; { @@ -383,83 +462,73 @@ get_sg_cnt(req) s = splnet(); MFCFIND(req->src.s_addr, req->grp.s_addr, rt); splx(s); - if (rt != NULL) - req->count = rt->mfc_pkt_cnt; - else - req->count = 0xffffffff; + if (rt != NULL) { + req->pktcnt = rt->mfc_pkt_cnt; + req->bytecnt = rt->mfc_byte_cnt; + req->wrong_if = rt->mfc_wrong_if; + } else + req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; return 0; } /* - * returns the input and output packet counts on the interface provided + * returns the input and output packet and byte counts on the vif provided */ -int +static int get_vif_cnt(req) register struct sioc_vif_req *req; { register vifi_t vifi = req->vifi; + if (vifi >= numvifs) return EINVAL; + req->icount = viftable[vifi].v_pkt_in; req->ocount = viftable[vifi].v_pkt_out; + req->ibytes = viftable[vifi].v_bytes_in; + req->obytes = viftable[vifi].v_bytes_out; return 0; } -int -get_vifs(data) - char *data; -{ - struct vif_conf *vifc = (struct vif_conf *)data; - struct vif_req *vifrp, vifr; - int space, error=0; - - vifi_t vifi; - int s; - - space = vifc->vifc_len; - vifrp = vifc->vifc_req; - - s = splnet(); - vifc->vifc_num=numvifs; - - for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { - if (viftable[vifi].v_lcl_addr.s_addr != 0) { - vifr.v_flags=viftable[vifi].v_flags; - vifr.v_threshold=viftable[vifi].v_threshold; - vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; - vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; - strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); - if ((space -= sizeof(vifr)) < 0) { - splx(s); - return(ENOSPC); - } - error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); - if (error) { - splx(s); - return(error); - } - } - } - splx(s); - return 0; -} /* * Enable multicast routing */ -static int -ip_mrouter_init(so) +int +ip_mrouter_init(so, m) struct socket *so; + struct mbuf *m; { + int *v; + int i; + + if (mrtdebug) + log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d", + so->so_type, so->so_proto->pr_protocol); + if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; + if (!m || (m->m_len != sizeof(int *))) + return ENOPROTOOPT; + + v = mtod(m, int *); + if (*v != 1) + return ENOPROTOOPT; + if (ip_mrouter != NULL) return EADDRINUSE; ip_mrouter = so; + bzero((caddr_t)mfctable, sizeof(mfctable)); + bzero((caddr_t)nexpire, sizeof(nexpire)); + + pim_assert = 0; + + timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); + if (mrtdebug) - log(LOG_DEBUG, "ip_mrouter_init\n"); + log(LOG_DEBUG, "ip_mrouter_init"); return 0; } @@ -475,6 +544,7 @@ X_ip_mrouter_done() struct ifnet *ifp; struct ifreq ifr; struct mbuf *mb_rt; + struct mfc *rt; struct mbuf *m; struct rtdetq *rte; int s; @@ -499,52 +569,44 @@ X_ip_mrouter_done() bzero((caddr_t)tbftable, sizeof(tbftable)); bzero((caddr_t)viftable, sizeof(viftable)); numvifs = 0; + pim_assert = 0; + + untimeout(expire_upcalls, (caddr_t)NULL); /* - * Check if any outstanding timeouts remain + * Free all multicast forwarding cache entries. */ - if (timeout_val != 0) - for (i = 0; i < MFCTBLSIZ; i++) { - mb_rt = mfctable[i]; - while (mb_rt) { - if ( mb_rt->m_act != NULL) { - untimeout(cleanup_cache, (caddr_t)mb_rt); - while (mb_rt->m_act) { - m = mb_rt->m_act; - mb_rt->m_act = m->m_act; - rte = mtod(m, struct rtdetq *); - m_freem(rte->m); - m_free(m); - } - timeout_val--; + for (i = 0; i < MFCTBLSIZ; i++) { + mb_rt = mfctable[i]; + while (mb_rt) { + if (mb_rt->m_act != NULL) { + while (mb_rt->m_act) { + m = mb_rt->m_act; + mb_rt->m_act = m->m_act; + rte = mtod(m, struct rtdetq *); + m_freem(rte->m); + m_free(m); } - mb_rt = mb_rt->m_next; } - if (timeout_val == 0) - break; + mb_rt = m_free(mb_rt); } - - /* - * Free all multicast forwarding cache entries. - */ - for (i = 0; i < MFCTBLSIZ; i++) - m_freem(mfctable[i]); + } bzero((caddr_t)mfctable, sizeof(mfctable)); - bzero((caddr_t)mfchash, sizeof(mfchash)); /* * Reset de-encapsulation cache */ last_encap_src = NULL; last_encap_vif = NULL; - + have_encap_tunnel = 0; + ip_mrouter = NULL; splx(s); if (mrtdebug) - log(LOG_DEBUG, "ip_mrouter_done\n"); + log(LOG_DEBUG, "ip_mrouter_done"); return 0; } @@ -553,6 +615,51 @@ X_ip_mrouter_done() int (*ip_mrouter_done)(void) = X_ip_mrouter_done; #endif +static int +get_version(mb) + struct mbuf *mb; +{ + int *v; + + v = mtod(mb, int *); + + *v = 0x0305; /* XXX !!!! */ + mb->m_len = sizeof(int); + + return 0; +} + +/* + * Set PIM assert processing global + */ +static int +set_assert(i) + int *i; +{ + if ((*i != 1) && (*i != 0)) + return EINVAL; + + pim_assert = *i; + + return 0; +} + +/* + * Get PIM assert processing global + */ +static int +get_assert(m) + struct mbuf *m; +{ + int *i; + + i = mtod(m, int *); + + *i = pim_assert; + + return 0; +} + /* * Add a vif to the vif table */ @@ -579,20 +686,28 @@ add_vif(vifcp) if (vifcp->vifc_flags & VIFF_TUNNEL) { if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { - if (encap_oldrawip == 0) { - extern struct protosw inetsw[]; - register u_char pr = ip_protox[ENCAP_PROTO]; - - encap_oldrawip = inetsw[pr].pr_input; - inetsw[pr].pr_input = multiencap_decap; - for (s = 0; s < MAXVIFS; ++s) { - multicast_decap_if[s].if_name = "mdecap"; - multicast_decap_if[s].if_unit = s; + /* + * An encapsulating tunnel is wanted. Tell ipip_input() to + * start paying attention to encapsulated packets. + */ + if (have_encap_tunnel == 0) { + have_encap_tunnel = 1; + for (s = 0; s < MAXVIFS; ++s) { + multicast_decap_if[s].if_name = "mdecap"; + multicast_decap_if[s].if_unit = s; + } } - } - ifp = &multicast_decap_if[vifcp->vifc_vifi]; + /* + * Set interface to fake encapsulator interface + */ + ifp = &multicast_decap_if[vifcp->vifc_vifi]; + /* + * Prepare cached route entry + */ + bzero(&vifp->v_route, sizeof(vifp->v_route)); } else { - ifp = 0; + log(LOG_ERR, "Source routed tunnels not supported."); + return EOPNOTSUPP; } } else { /* Make sure the interface supports multicast */ @@ -622,22 +737,26 @@ add_vif(vifcp) vifp->v_rmt_addr = vifcp->vifc_rmt_addr; vifp->v_ifp = ifp; vifp->v_rate_limit= vifcp->vifc_rate_limit; + vifp->v_rsvp_on = 0; + vifp->v_rsvpd = NULL; /* initialize per vif pkt counters */ vifp->v_pkt_in = 0; vifp->v_pkt_out = 0; + vifp->v_bytes_in = 0; + vifp->v_bytes_out = 0; splx(s); /* Adjust numvifs up if the vifi is higher than numvifs */ if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; if (mrtdebug) - log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n", - vifcp->vifc_vifi, + log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d", + vifcp->vifc_vifi, ntohl(vifcp->vifc_lcl_addr.s_addr), (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", ntohl(vifcp->vifc_rmt_addr.s_addr), vifcp->vifc_threshold, - vifcp->vifc_rate_limit); + vifcp->vifc_rate_limit); return 0; } @@ -685,7 +804,7 @@ del_vif(vifip) splx(s); if (mrtdebug) - log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs); + log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs); return 0; } @@ -698,9 +817,7 @@ add_mfc(mfccp) struct mfcctl *mfccp; { struct mfc *rt; - struct mfc *rt1 = 0; register struct mbuf *mb_rt; - struct mbuf *prev_mb_rt; u_long hash; struct mbuf *mb_ntry; struct rtdetq *rte; @@ -708,81 +825,77 @@ add_mfc(mfccp) int s; int i; - rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); + MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt); /* If an entry already exists, just update the fields */ if (rt) { - if (mrtdebug) - log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x\n", + if (mrtdebug & DEBUG_MFC) + log(LOG_DEBUG,"add_mfc update o %x g %x p %x", ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr), - ntohl(mfccp->mfcc_originmask.s_addr), mfccp->mfcc_parent); s = splnet(); rt->mfc_parent = mfccp->mfcc_parent; for (i = 0; i < numvifs; i++) - VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; splx(s); return 0; } - /* + /* * Find the entry for which the upcall was made and update */ s = splnet(); - hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); - for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; - mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { + hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); + for (mb_rt = mfctable[hash], nstl = 0; mb_rt; mb_rt = mb_rt->m_next) { rt = mtod(mb_rt, struct mfc *); - if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) - == mfccp->mfcc_origin.s_addr) && + if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && (mb_rt->m_act != NULL)) { + + if (nstl++) + log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %x", + "multiple kernel entries", + ntohl(mfccp->mfcc_origin.s_addr), + ntohl(mfccp->mfcc_mcastgrp.s_addr), + mfccp->mfcc_parent, mb_rt->m_act); + + if (mrtdebug & DEBUG_MFC) + log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %x", + ntohl(mfccp->mfcc_origin.s_addr), + ntohl(mfccp->mfcc_mcastgrp.s_addr), + mfccp->mfcc_parent, mb_rt->m_act); - if (!nstl++) { - if (mrtdebug) - log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x\n", - ntohl(mfccp->mfcc_origin.s_addr), - ntohl(mfccp->mfcc_mcastgrp.s_addr), - ntohl(mfccp->mfcc_originmask.s_addr), - mfccp->mfcc_parent, mb_rt->m_act); - - rt->mfc_origin = mfccp->mfcc_origin; - rt->mfc_originmask = mfccp->mfcc_originmask; - rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; - rt->mfc_parent = mfccp->mfcc_parent; - for (i = 0; i < numvifs; i++) - VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); - /* initialize pkt counters per src-grp */ - rt->mfc_pkt_cnt = 0; - rt1 = rt; - } + rt->mfc_origin = mfccp->mfcc_origin; + rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; + rt->mfc_parent = mfccp->mfcc_parent; + for (i = 0; i < numvifs; i++) + rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; + /* initialize pkt counters per src-grp */ + rt->mfc_pkt_cnt = 0; + rt->mfc_byte_cnt = 0; + rt->mfc_wrong_if = 0; + rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; - /* prevent cleanup of cache entry */ - untimeout(cleanup_cache, (caddr_t)mb_rt); - timeout_val--; + rt->mfc_expire = 0; /* Don't clean this guy up */ + nexpire[hash]--; /* free packets Qed at the end of this entry */ while (mb_rt->m_act) { mb_ntry = mb_rt->m_act; rte = mtod(mb_ntry, struct rtdetq *); - ip_mdq(rte->m, rte->ifp, rte->tunnel_src, - rt1, rte->imo); +/* #ifdef RSVP_ISI */ + ip_mdq(rte->m, rte->ifp, rt, -1); +/* #endif */ mb_rt->m_act = mb_ntry->m_act; m_freem(rte->m); +#ifdef UPCALL_TIMING + collate(&(rte->t)); +#endif /* UPCALL_TIMING */ m_free(mb_ntry); } - - /* - * If more than one entry was created for a single upcall - * delete that entry - */ - if (nstl > 1) { - MFREE(mb_rt, prev_mb_rt->m_next); - mb_rt = prev_mb_rt; - } } } @@ -790,29 +903,31 @@ add_mfc(mfccp) * It is possible that an entry is being inserted without an upcall */ if (nstl == 0) { - if (mrtdebug) - log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x\n", + if (mrtdebug & DEBUG_MFC) + log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x p %x", hash, ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr), - ntohl(mfccp->mfcc_originmask.s_addr), mfccp->mfcc_parent); - - for (prev_mb_rt = mb_rt = mfctable[hash]; - mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { - + + for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { + rt = mtod(mb_rt, struct mfc *); - if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) - == mfccp->mfcc_origin.s_addr) && + if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { rt->mfc_origin = mfccp->mfcc_origin; - rt->mfc_originmask = mfccp->mfcc_originmask; rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; rt->mfc_parent = mfccp->mfcc_parent; for (i = 0; i < numvifs; i++) - VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; /* initialize pkt counters per src-grp */ rt->mfc_pkt_cnt = 0; + rt->mfc_byte_cnt = 0; + rt->mfc_wrong_if = 0; + rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; + if (rt->mfc_expire) + nexpire[hash]--; + rt->mfc_expire = 0; } } if (mb_rt == NULL) { @@ -822,19 +937,22 @@ add_mfc(mfccp) splx(s); return ENOBUFS; } - + rt = mtod(mb_rt, struct mfc *); - + /* insert new entry at head of hash chain */ rt->mfc_origin = mfccp->mfcc_origin; - rt->mfc_originmask = mfccp->mfcc_originmask; rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; rt->mfc_parent = mfccp->mfcc_parent; for (i = 0; i < numvifs; i++) - VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; /* initialize pkt counters per src-grp */ rt->mfc_pkt_cnt = 0; - + rt->mfc_byte_cnt = 0; + rt->mfc_wrong_if = 0; + rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; + rt->mfc_expire = 0; + /* link into table */ mb_rt->m_next = mfctable[hash]; mfctable[hash] = mb_rt; @@ -845,56 +963,73 @@ add_mfc(mfccp) return 0; } +#ifdef UPCALL_TIMING +/* + * collect delay statistics on the upcalls + */ +static void collate(t) +register struct timeval *t; +{ + register u_long d; + register struct timeval tp; + register u_long delta; + + GET_TIME(tp); + + if (TV_LT(*t, tp)) + { + TV_DELTA(tp, *t, delta); + + d = delta >> 10; + if (d > 50) + d = 50; + + ++upcall_data[d]; + } +} +#endif /* UPCALL_TIMING */ + /* * Delete an mfc entry */ static int del_mfc(mfccp) - struct delmfcctl *mfccp; + struct mfcctl *mfccp; { struct in_addr origin; struct in_addr mcastgrp; struct mfc *rt; struct mbuf *mb_rt; - struct mbuf *prev_mb_rt; + struct mbuf **nptr; u_long hash; - struct mfc **cmfc; - struct mfc **cmfcend; - int s; + int s, i; origin = mfccp->mfcc_origin; mcastgrp = mfccp->mfcc_mcastgrp; - hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); + hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); - if (mrtdebug) - log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x\n", + if (mrtdebug & DEBUG_MFC) + log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x", ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); - for (prev_mb_rt = mb_rt = mfctable[hash] - ; mb_rt - ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { + s = splnet(); + + nptr = &mfctable[hash]; + while ((mb_rt = *nptr) != NULL) { rt = mtod(mb_rt, struct mfc *); if (origin.s_addr == rt->mfc_origin.s_addr && mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && mb_rt->m_act == NULL) break; + + nptr = &mb_rt->m_next; } if (mb_rt == NULL) { - return ESRCH; + splx(s); + return EADDRNOTAVAIL; } - s = splnet(); - - cmfc = mfchash; - cmfcend = cmfc + MFCHASHSIZ; - for ( ; cmfc < cmfcend; ++cmfc) - if (*cmfc == rt) - *cmfc = 0; - - if (prev_mb_rt != mb_rt) { /* if moved past head of list */ - MFREE(mb_rt, prev_mb_rt->m_next); - } else /* delete head of list, it is in the table */ - mfctable[hash] = m_free(mb_rt); + MFREE(mb_rt, *nptr); splx(s); @@ -902,13 +1037,34 @@ del_mfc(mfccp) } /* + * Send a message to mrouted on the multicast routing socket + */ +static int +socket_send(s, mm, src) + struct socket *s; + struct mbuf *mm; + struct sockaddr_in *src; +{ + if (s) { + if (sbappendaddr(&s->so_rcv, + (struct sockaddr *)src, + mm, (struct mbuf *)0) != 0) { + sorwakeup(s); + return 0; + } + } + m_freem(mm); + return -1; +} + +/* * IP multicast forwarding function. This function assumes that the packet * pointed to by "ip" has arrived on (or is about to be sent to) the interface * pointed to by "ifp", and the packet is to be relayed to other networks * that have members of the packet's destination IP multicast group. * - * The packet is returned unscathed to the caller, unless it is tunneled - * or erroneous, in which case a non-zero return value tells the caller to + * The packet is returned unscathed to the caller, unless it is + * erroneous, in which case a non-zero return value tells the caller to * discard it. */ @@ -922,76 +1078,53 @@ X_ip_mforward(ip, ifp, m, imo) struct mbuf *m; struct ip_moptions *imo; { - register struct mfc *rt; + register struct mfc *rt = 0; /* XXX uninit warning */ register u_char *ipoptions; - u_long tunnel_src; static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; - static struct sockaddr_in k_igmpdst = { sizeof k_igmpdst, AF_INET }; + static int srctun = 0; register struct mbuf *mm; - register struct ip *k_data; int s; + vifi_t vifi; + struct vif *vifp; - if (mrtdebug > 1) - log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x (%s%d)\n", - ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp, - ifp->if_name, ifp->if_unit); + if (mrtdebug & DEBUG_FORWARD) + log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x", + ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { /* - * Packet arrived via a physical interface. + * Packet arrived via a physical interface or + * an encapsulated tunnel. */ - tunnel_src = 0; } else { /* * Packet arrived through a source-route tunnel. - * - * A source-route tunneled packet has a single NOP option and a - * two-element - * loose-source-and-record-route (LSRR) option immediately following - * the fixed-size part of the IP header. At this point in processing, - * the IP header should contain the following IP addresses: - * - * original source - in the source address field - * destination group - in the destination address field - * remote tunnel end-point - in the first element of LSRR - * one of this host's addrs - in the second element of LSRR - * - * NOTE: RFC-1075 would have the original source and remote tunnel - * end-point addresses swapped. However, that could cause - * delivery of ICMP error messages to innocent applications - * on intermediate routing hosts! Therefore, we hereby - * change the spec. - */ - - /* - * Verify that the tunnel options are well-formed. + * Source-route tunnels are no longer supported. */ - if (ipoptions[0] != IPOPT_NOP || - ipoptions[2] != 11 || /* LSRR option length */ - ipoptions[3] != 12 || /* LSRR address pointer */ - (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { - mrtstat.mrts_bad_tunnel++; - if (mrtdebug) - log(LOG_DEBUG, - "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)\n", - ntohl(ip->ip_src.s_addr), - ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], - *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); - return 1; - } + if ((srctun++ % 1000) == 0) + log(LOG_ERR, "ip_mforward: received source-routed packet from %x", + ntohl(ip->ip_src.s_addr)); - /* - * Delete the tunnel options from the packet. - */ - ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, - (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); - m->m_len -= TUNNEL_LEN; - ip->ip_len -= TUNNEL_LEN; - ip->ip_hl -= TUNNEL_LEN >> 2; + return 1; + } - ifp = 0; + if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) { + if (ip->ip_ttl < 255) + ip->ip_ttl++; /* compensate for -1 in *_send routines */ + if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { + vifp = viftable + vifi; + printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n", + ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi, + (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", + vifp->v_ifp->if_name, vifp->v_ifp->if_unit); + } + return (ip_mdq(m, ifp, rt, vifi)); + } + if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { + printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n", + ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr)); } /* @@ -1000,7 +1133,7 @@ X_ip_mforward(ip, ifp, m, imo) */ if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) - return (int)tunnel_src; + return 0; /* * Determine forwarding vifs from the forwarding cache table @@ -1011,10 +1144,8 @@ X_ip_mforward(ip, ifp, m, imo) /* Entry exists, so forward if necessary */ if (rt != NULL) { splx(s); - return (ip_mdq(m, ifp, tunnel_src, rt, imo)); - } - - else { + return (ip_mdq(m, ifp, rt, -1)); + } else { /* * If we don't have a route for packet's origin, * Make a copy of the packet & @@ -1027,28 +1158,88 @@ X_ip_mforward(ip, ifp, m, imo) register struct rtdetq *rte; register struct mbuf *rte_m; register u_long hash; + register int npkts; +#ifdef UPCALL_TIMING + struct timeval tp; + + GET_TIME(tp); +#endif mrtstat.mrts_no_route++; - if (mrtdebug) - log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n", + if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) + log(LOG_DEBUG, "ip_mforward: no rte s %x g %x", ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr)); + /* + * Allocate mbufs early so that we don't do extra work if we are + * just going to fail anyway. + */ + MGET(mb_ntry, M_DONTWAIT, MT_DATA); + if (mb_ntry == NULL) { + splx(s); + return ENOBUFS; + } + mb0 = m_copy(m, 0, M_COPYALL); + if (mb0 == NULL) { + m_free(mb_ntry); + splx(s); + return ENOBUFS; + } + /* is there an upcall waiting for this packet? */ - hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); + hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { rt = mtod(mb_rt, struct mfc *); - if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == - rt->mfc_origin.s_addr) && + if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && (mb_rt->m_act != NULL)) break; } if (mb_rt == NULL) { + int hlen = ip->ip_hl << 2; + int i; + struct igmpmsg *im; + /* no upcall, so make a new entry */ MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); if (mb_rt == NULL) { + m_free(mb_ntry); + m_free(mb0); + splx(s); + return ENOBUFS; + } + /* Make a copy of the header to send to the user level process */ + mm = m_copy(m, 0, hlen); + if (mm && (M_HASCL(mm) || mm->m_len < hlen)) + mm = m_pullup(mm, hlen); + if (mm == NULL) { + m_free(mb_ntry); + m_free(mb0); + m_free(mb_rt); + splx(s); + return ENOBUFS; + } + + /* + * Send message to routing daemon to install + * a route into the kernel table + */ + k_igmpsrc.sin_addr = ip->ip_src; + + im = mtod(mm, struct igmpmsg *); + im->im_msgtype = IGMPMSG_NOCACHE; + im->im_mbz = 0; + + mrtstat.mrts_upcalls++; + + if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { + log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full"); + ++mrtstat.mrts_upq_sockfull; + m_free(mb_ntry); + m_free(mb0); + m_free(mb_rt); splx(s); return ENOBUFS; } @@ -1057,80 +1248,49 @@ X_ip_mforward(ip, ifp, m, imo) /* insert new entry at head of hash chain */ rt->mfc_origin.s_addr = ip->ip_src.s_addr; - rt->mfc_originmask.s_addr = (u_long)0xffffffff; rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; + rt->mfc_expire = UPCALL_EXPIRE; + nexpire[hash]++; + for (i = 0; i < numvifs; i++) + rt->mfc_ttls[i] = 0; + rt->mfc_parent = -1; /* link into table */ - hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); mb_rt->m_next = mfctable[hash]; mfctable[hash] = mb_rt; mb_rt->m_act = NULL; - } - - /* determine if q has overflowed */ - for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) - hash++; - - if (hash > MAX_UPQ) { - mrtstat.mrts_upq_ovflw++; - splx(s); - return 0; - } + rte_m = mb_rt; + } else { + /* determine if q has overflowed */ + for (rte_m = mb_rt, npkts = 0; rte_m->m_act; rte_m = rte_m->m_act) + npkts++; - /* add this packet and timing, ifp info to m_act */ - MGET(mb_ntry, M_DONTWAIT, MT_DATA); - if (mb_ntry == NULL) { - splx(s); - return ENOBUFS; + if (npkts > MAX_UPQ) { + mrtstat.mrts_upq_ovflw++; + m_free(mb_ntry); + m_free(mb0); + splx(s); + return 0; + } } mb_ntry->m_act = NULL; rte = mtod(mb_ntry, struct rtdetq *); - mb0 = m_copy(m, 0, M_COPYALL); - if (mb0 == NULL) { - splx(s); - return ENOBUFS; - } - rte->m = mb0; rte->ifp = ifp; - rte->tunnel_src = tunnel_src; - rte->imo = imo; +#ifdef UPCALL_TIMING + rte->t = tp; +#endif - rte_m->m_act = mb_ntry; + /* Add this entry to the end of the queue */ + rte_m->m_act = mb_ntry; splx(s); - if (hash == 0) { - /* - * Send message to routing daemon to install - * a route into the kernel table - */ - k_igmpsrc.sin_addr = ip->ip_src; - k_igmpdst.sin_addr = ip->ip_dst; - - mm = m_copy(m, 0, M_COPYALL); - if (mm == NULL) { - splx(s); - return ENOBUFS; - } - - k_data = mtod(mm, struct ip *); - k_data->ip_p = 0; - - mrtstat.mrts_upcalls++; - - rip_ip_input(mm, ip_mrouter, (struct sockaddr *)&k_igmpsrc); - - /* set timer to cleanup entry if upcall is lost */ - timeout(cleanup_cache, (caddr_t)mb_rt, 100); - timeout_val++; - } - return 0; - } + } } #ifndef MROUTE_LKM @@ -1142,99 +1302,160 @@ int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, * Clean up the cache entry if upcall is not serviced */ static void -cleanup_cache(xmb_rt) - void *xmb_rt; +expire_upcalls(void *unused) { - struct mbuf *mb_rt = xmb_rt; - struct mfc *rt; - u_long hash; - struct mbuf *prev_m0; - struct mbuf *m0; - struct mbuf *m; + struct mbuf *mb_rt, *m, **nptr; struct rtdetq *rte; + struct mfc *mfc; + int i; int s; - rt = mtod(mb_rt, struct mfc *); - hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); - - if (mrtdebug) - log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x\n", - ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), - ntohl(rt->mfc_mcastgrp.s_addr)); - - mrtstat.mrts_cache_cleanups++; - - /* - * determine entry to be cleaned up in cache table - */ s = splnet(); - for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) - if (m0 == mb_rt) - break; + for (i = 0; i < MFCTBLSIZ; i++) { + if (nexpire[i] == 0) + continue; + nptr = &mfctable[i]; + for (mb_rt = *nptr; mb_rt != NULL; mb_rt = *nptr) { + mfc = mtod(mb_rt, struct mfc *); - /* - * drop all the packets - * free the mbuf with the pkt, if, timing info - */ - while (mb_rt->m_act) { - m = mb_rt->m_act; - mb_rt->m_act = m->m_act; + /* + * Skip real cache entries + * Make sure it wasn't marked to not expire (shouldn't happen) + * If it expires now + */ + if (mb_rt->m_act != NULL && + mfc->mfc_expire != 0 && + --mfc->mfc_expire == 0) { + if (mrtdebug & DEBUG_EXPIRE) + log(LOG_DEBUG, "expire_upcalls: expiring (%x %x)", + ntohl(mfc->mfc_origin.s_addr), + ntohl(mfc->mfc_mcastgrp.s_addr)); + /* + * drop all the packets + * free the mbuf with the pkt, if, timing info + */ + while (mb_rt->m_act) { + m = mb_rt->m_act; + mb_rt->m_act = m->m_act; + + rte = mtod(m, struct rtdetq *); + m_freem(rte->m); + m_free(m); + } + ++mrtstat.mrts_cache_cleanups; + nexpire[i]--; - rte = mtod(m, struct rtdetq *); - m_freem(rte->m); - m_free(m); + MFREE(mb_rt, *nptr); + } else { + nptr = &mb_rt->m_next; + } + } } - - /* - * Delete the entry from the cache - */ - if (prev_m0 != m0) { /* if moved past head of list */ - MFREE(m0, prev_m0->m_next); - } else /* delete head of list, it is in the table */ - mfctable[hash] = m_free(m0); - - timeout_val--; splx(s); + timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); } /* * Packet forwarding routine once entry in the cache is made */ static int -ip_mdq(m, ifp, tunnel_src, rt, imo) +ip_mdq(m, ifp, rt, xmt_vif) register struct mbuf *m; register struct ifnet *ifp; - register u_long tunnel_src; register struct mfc *rt; - register struct ip_moptions *imo; + register vifi_t xmt_vif; { register struct ip *ip = mtod(m, struct ip *); register vifi_t vifi; register struct vif *vifp; + register struct mbuf *tmp; + register int plen = ntohs(ip->ip_len); + +/* + * Macro to send packet on vif. Since RSVP packets don't get counted on + * input, they shouldn't get counted on output, so statistics keeping is + * seperate. + */ +#define MC_SEND(ip,vifp,m) { \ + if ((vifp)->v_flags & VIFF_TUNNEL) \ + encap_send((ip), (vifp), (m)); \ + else \ + phyint_send((ip), (vifp), (m)); \ +} + + /* + * If xmt_vif is not -1, send on only the requested vif. + * + * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) + */ + if (xmt_vif < numvifs) { + MC_SEND(ip, viftable + xmt_vif, m); + return 1; + } /* * Don't forward if it didn't arrive from the parent vif for its origin. - * Notes: v_ifp is zero for src route tunnels, multicast_decap_if - * for encapsulated tunnels and a real ifnet for non-tunnels so - * the first part of the if catches wrong physical interface or - * tunnel type; v_rmt_addr is zero for non-tunneled packets so - * the 2nd part catches both packets that arrive via a tunnel - * that shouldn't and packets that arrive via the wrong tunnel. */ vifi = rt->mfc_parent; - if (viftable[vifi].v_ifp != ifp || - (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { + if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { /* came in the wrong interface */ - if (mrtdebug) - log(LOG_DEBUG, "wrong if: ifp %x vifi %d\n", - ifp, vifi); + if (mrtdebug & DEBUG_FORWARD) + log(LOG_DEBUG, "wrong if: ifp %x vifi %d vififp %x", + ifp, vifi, viftable[vifi].v_ifp); ++mrtstat.mrts_wrong_if; - return (int)tunnel_src; + ++rt->mfc_wrong_if; + /* + * If we are doing PIM assert processing, and we are forwarding + * packets on this interface, and it is a broadcast medium + * interface (and not a tunnel), send a message to the routing daemon. + */ + if (pim_assert && rt->mfc_ttls[vifi] && + (ifp->if_flags & IFF_BROADCAST) && + !(viftable[vifi].v_flags & VIFF_TUNNEL)) { + struct sockaddr_in k_igmpsrc; + struct mbuf *mm; + struct igmpmsg *im; + int hlen = ip->ip_hl << 2; + struct timeval now; + register u_long delta; + + GET_TIME(now); + + TV_DELTA(rt->mfc_last_assert, now, delta); + + if (delta > ASSERT_MSG_TIME) { + mm = m_copy(m, 0, hlen); + if (mm && (M_HASCL(mm) || mm->m_len < hlen)) + mm = m_pullup(mm, hlen); + if (mm == NULL) { + return ENOBUFS; + } + + rt->mfc_last_assert = now; + + im = mtod(mm, struct igmpmsg *); + im->im_msgtype = IGMPMSG_WRONGVIF; + im->im_mbz = 0; + im->im_vif = vifi; + + k_igmpsrc.sin_addr = im->im_src; + + socket_send(ip_mrouter, m, &k_igmpsrc); + } + } + return 0; } - /* increment the interface and s-g counters */ - viftable[vifi].v_pkt_in++; + /* If I sourced this packet, it counts as output, else it was input. */ + if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { + viftable[vifi].v_pkt_out++; + viftable[vifi].v_bytes_out += plen; + } else { + viftable[vifi].v_pkt_in++; + viftable[vifi].v_bytes_in += plen; + } rt->mfc_pkt_cnt++; + rt->mfc_byte_cnt += plen; /* * For each vif, decide if a copy of the packet should be forwarded. @@ -1242,40 +1463,26 @@ ip_mdq(m, ifp, tunnel_src, rt, imo) * - the ttl exceeds the vif's threshold * - there are group members downstream on interface */ -#define MC_SEND(ip,vifp,m) { \ - (vifp)->v_pkt_out++; \ - if ((vifp)->v_flags & VIFF_SRCRT) \ - srcrt_send((ip), (vifp), (m)); \ - else if ((vifp)->v_flags & VIFF_TUNNEL) \ - encap_send((ip), (vifp), (m)); \ - else \ - phyint_send((ip), (vifp), (m)); \ - } - -/* If no options or the imo_multicast_vif option is 0, don't do this part - */ - if ((imo != NULL) && - (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) - { - MC_SEND(ip,viftable+vifi,m); - return (1); /* make sure we are done: No more physical sends */ - } - for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) if ((rt->mfc_ttls[vifi] > 0) && - (ip->ip_ttl > rt->mfc_ttls[vifi])) + (ip->ip_ttl > rt->mfc_ttls[vifi])) { + vifp->v_pkt_out++; + vifp->v_bytes_out += plen; MC_SEND(ip, vifp, m); + } return 0; } -/* check if a vif number is legal/ok. This is used by ip_output, to export - * numvifs there, +/* + * check if a vif number is legal/ok. This is used by ip_output, to export + * numvifs there, */ int X_legal_vif_num(vif) int vif; -{ if (vif>=0 && vif<=numvifs) +{ + if (vif >= 0 && vif < numvifs) return(1); else return(0); @@ -1285,6 +1492,23 @@ X_legal_vif_num(vif) int (*legal_vif_num)(int) = X_legal_vif_num; #endif +/* + * Return the local address used by this vif + */ +u_long +X_ip_mcast_src(vifi) + int vifi; +{ + if (vifi >= 0 && vifi < numvifs) + return viftable[vifi].v_lcl_addr.s_addr; + else + return INADDR_ANY; +} + +#ifndef MROUTE_LKM +u_long (*ip_mcast_src)(int) = X_ip_mcast_src; +#endif + static void phyint_send(ip, vifp, m) struct ip *ip; @@ -1292,19 +1516,19 @@ phyint_send(ip, vifp, m) struct mbuf *m; { register struct mbuf *mb_copy; - int hlen = ip->ip_hl << 2; + register int hlen = ip->ip_hl << 2; register struct ip_moptions *imo; - if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) - return; - /* - * Make sure the header isn't in an cluster, because the sharing - * in clusters defeats the whole purpose of making the copy above. + * Make a new reference to the packet; make sure that + * the IP header is actually copied, not just referenced, + * so that ip_output() only scribbles on the copy. */ - mb_copy = m_pullup(mb_copy, hlen); + mb_copy = m_copy(m, 0, M_COPYALL); + if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) + mb_copy = m_pullup(mb_copy, hlen); if (mb_copy == NULL) - return; + return; MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); if (imo == NULL) { @@ -1315,6 +1539,7 @@ phyint_send(ip, vifp, m) imo->imo_multicast_ifp = vifp->v_ifp; imo->imo_multicast_ttl = ip->ip_ttl - 1; imo->imo_multicast_loop = 1; + imo->imo_multicast_vif = -1; if (vifp->v_rate_limit <= 0) tbf_send_packet(vifp, mb_copy, imo); @@ -1324,81 +1549,6 @@ phyint_send(ip, vifp, m) } static void -srcrt_send(ip, vifp, m) - struct ip *ip; - struct vif *vifp; - struct mbuf *m; -{ - struct mbuf *mb_copy, *mb_opts; - int hlen = ip->ip_hl << 2; - register struct ip *ip_copy; - u_char *cp; - - /* - * Make sure that adding the tunnel options won't exceed the - * maximum allowed number of option bytes. - */ - if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { - mrtstat.mrts_cant_tunnel++; - if (mrtdebug) - log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u\n", - ntohl(ip->ip_src.s_addr)); - return; - } - - if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) - return; - - MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER); - if (mb_opts == NULL) { - m_freem(mb_copy); - return; - } - /* - * 'Delete' the base ip header from the mb_copy chain - */ - mb_copy->m_len -= hlen; - mb_copy->m_data += hlen; - /* - * Make mb_opts be the new head of the packet chain. - * Any options of the packet were left in the old packet chain head - */ - mb_opts->m_next = mb_copy; - mb_opts->m_len = hlen + TUNNEL_LEN; - mb_opts->m_data += MSIZE - mb_opts->m_len; - mb_opts->m_pkthdr.len = mb_copy->m_pkthdr.len + TUNNEL_LEN; - /* - * Copy the base ip header from the mb_copy chain to the new head mbuf - */ - ip_copy = mtod(mb_opts, struct ip *); - bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), hlen); - ip_copy->ip_ttl--; - ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ - /* - * Adjust the ip header length to account for the tunnel options. - */ - ip_copy->ip_hl += TUNNEL_LEN >> 2; - ip_copy->ip_len += TUNNEL_LEN; - /* - * Add the NOP and LSRR after the base ip header - */ - cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; - *cp++ = IPOPT_NOP; - *cp++ = IPOPT_LSRR; - *cp++ = 11; /* LSRR option length */ - *cp++ = 8; /* LSSR pointer to second element */ - *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ - cp += 4; - *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ - - if (vifp->v_rate_limit <= 0) - tbf_send_packet(vifp, mb_opts, 0); - else - tbf_control(vifp, mb_opts, - mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); -} - -static void encap_send(ip, vifp, m) register struct ip *ip; register struct vif *vifp; @@ -1471,11 +1621,12 @@ encap_send(ip, vifp, m) */ void #ifdef MROUTE_LKM -X_multiencap_decap(m) +X_ipip_input(m) #else -multiencap_decap(m) +ipip_input(m, iphlen) #endif - register struct mbuf *m; + register struct mbuf *m; + int iphlen; { struct ifnet *ifp = m->m_pkthdr.rcvif; register struct ip *ip = mtod(m, struct ip *); @@ -1484,9 +1635,9 @@ multiencap_decap(m) register struct ifqueue *ifq; register struct vif *vifp; - if (ip->ip_p != ENCAP_PROTO) { - rip_input(m); - return; + if (!have_encap_tunnel) { + rip_input(m); + return; } /* * dump the packet if it's not to a multicast destination or if @@ -1502,7 +1653,7 @@ multiencap_decap(m) } if (ip->ip_src.s_addr != last_encap_src) { register struct vif *vife; - + vifp = viftable; vife = vifp + numvifs; last_encap_src = ip->ip_src.s_addr; @@ -1520,7 +1671,7 @@ multiencap_decap(m) mrtstat.mrts_cant_tunnel++; /*XXX*/ m_freem(m); if (mrtdebug) - log(LOG_DEBUG, "ip_mforward: no tunnel with %x\n", + log(LOG_DEBUG, "ip_mforward: no tunnel with %x", ntohl(ip->ip_src.s_addr)); return; } @@ -1554,7 +1705,7 @@ multiencap_decap(m) /* * Token bucket filter module */ -void +static void tbf_control(vifp, m, ip, p_len, imo) register struct vif *vifp; register struct mbuf *m; @@ -1564,7 +1715,7 @@ tbf_control(vifp, m, ip, p_len, imo) { tbf_update_tokens(vifp); - /* if there are enough tokens, + /* if there are enough tokens, * and the queue is empty, * send this packet out */ @@ -1601,11 +1752,11 @@ tbf_control(vifp, m, ip, p_len, imo) return; } -/* +/* * adds a packet to the queue at the interface */ -void -tbf_queue(vifp, m, ip, imo) +static void +tbf_queue(vifp, m, ip, imo) register struct vif *vifp; register struct mbuf *m; register struct ip *ip; @@ -1627,10 +1778,10 @@ tbf_queue(vifp, m, ip, imo) } -/* +/* * processes the queue at the interface */ -void +static void tbf_process_q(vifp) register struct vif *vifp; { @@ -1643,7 +1794,7 @@ tbf_process_q(vifp) */ while (vifp->v_tbf->q_len > 0) { /* locate the first packet */ - pkt_1.pkt_len = ((qtable[index][0]).pkt_len); + pkt_1.pkt_len = (qtable[index][0]).pkt_len; pkt_1.pkt_m = (qtable[index][0]).pkt_m; pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; @@ -1665,11 +1816,11 @@ tbf_process_q(vifp) splx(s); } -/* +/* * removes the jth packet from the queue at the interface */ -void -tbf_dequeue(vifp,j) +static void +tbf_dequeue(vifp,j) register struct vif *vifp; register int j; { @@ -1681,7 +1832,7 @@ tbf_dequeue(vifp,j) qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; - } + } qtable[index][i-1].pkt_m = NULL; qtable[index][i-1].pkt_len = NULL; qtable[index][i-1].pkt_ip = NULL; @@ -1690,15 +1841,15 @@ tbf_dequeue(vifp,j) vifp->v_tbf->q_len--; if (tbfdebug > 1) - log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d\n",vifp-viftable, i-1); + log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1); } -void +static void tbf_reprocess_q(xvifp) void *xvifp; { register struct vif *vifp = xvifp; - if (ip_mrouter == NULL) + if (ip_mrouter == NULL) return; tbf_update_tokens(vifp); @@ -1713,7 +1864,7 @@ tbf_reprocess_q(xvifp) * based on the precedence value and the priority obtained through * a lookup table - not yet implemented accurately! */ -int +static int tbf_dq_sel(vifp, ip) register struct vif *vifp; register struct ip *ip; @@ -1737,7 +1888,7 @@ tbf_dq_sel(vifp, ip) return(0); } -void +static void tbf_send_packet(vifp, m, imo) register struct vif *vifp; register struct mbuf *m; @@ -1746,13 +1897,7 @@ tbf_send_packet(vifp, m, imo) int error; int s = splnet(); - /* if source route tunnels */ - if (vifp->v_flags & VIFF_SRCRT) { - error = ip_output(m, (struct mbuf *)0, (struct route *)0, - IP_FORWARDING, imo); - if (mrtdebug > 1) - log(LOG_DEBUG, "srcrt_send on vif %d err %d\n", vifp-viftable, error); - } else if (vifp->v_flags & VIFF_TUNNEL) { + if (vifp->v_flags & VIFF_TUNNEL) { /* If tunnel options */ ip_output(m, (struct mbuf *)0, (struct route *)0, IP_FORWARDING, imo); @@ -1762,8 +1907,8 @@ tbf_send_packet(vifp, m, imo) IP_FORWARDING, imo); FREE(imo, M_IPMOPTS); - if (mrtdebug > 1) - log(LOG_DEBUG, "phyint_send on vif %d err %d\n", vifp-viftable, error); + if (mrtdebug & DEBUG_XMIT) + log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error); } splx(s); } @@ -1772,7 +1917,7 @@ tbf_send_packet(vifp, m, imo) * the elapsed time (between the last time and time now) * in milliseconds & update the no. of tokens in the bucket */ -void +static void tbf_update_tokens(vifp) register struct vif *vifp; { @@ -1800,54 +1945,255 @@ priority(vifp, ip) register struct vif *vifp; register struct ip *ip; { - register u_long graddr; register int prio; - /* temporary hack; will add general packet classifier some day */ - - prio = 50; /* default priority */ + /* temporary hack; may add general packet classifier some day */ - /* check for source route options and add option length to get dst */ - if (vifp->v_flags & VIFF_SRCRT) - graddr = ntohl((ip+8)->ip_dst.s_addr); - else - graddr = ntohl(ip->ip_dst.s_addr); - - switch (graddr & 0xf) { - case 0x0: break; - case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ - break; - case 0x2: break; - case 0x3: break; - case 0x4: break; - case 0x5: break; - case 0x6: break; - case 0x7: break; - case 0x8: break; - case 0x9: break; - case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ - break; - case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ - break; - case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ - break; - case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ - break; - case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ - break; - case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ - break; + /* + * The UDP port space is divided up into four priority ranges: + * [0, 16384) : unclassified - lowest priority + * [16384, 32768) : audio - highest priority + * [32768, 49152) : whiteboard - medium priority + * [49152, 65536) : video - low priority + */ + if (ip->ip_p == IPPROTO_UDP) { + struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); + switch (ntohs(udp->uh_dport) & 0xc000) { + case 0x4000: + prio = 70; + break; + case 0x8000: + prio = 60; + break; + case 0xc000: + prio = 55; + break; + default: + prio = 50; + break; + } + if (tbfdebug > 1) + log(LOG_DEBUG, "port %x prio%d", ntohs(udp->uh_dport), prio); + } else { + prio = 50; } - - if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d\n", graddr, prio); - return prio; } /* - * End of token bucket filter modifications + * End of token bucket filter modifications */ +int +ip_rsvp_vif_init(so, m) + struct socket *so; + struct mbuf *m; +{ + int i; + register int s; + + if (rsvpdebug) + printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n", + so->so_type, so->so_proto->pr_protocol); + + if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) + return EOPNOTSUPP; + + /* Check mbuf. */ + if (m == NULL || m->m_len != sizeof(int)) { + return EINVAL; + } + i = *(mtod(m, int *)); + + if (rsvpdebug) + printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on); + + s = splnet(); + + /* Check vif. */ + if (!legal_vif_num(i)) { + splx(s); + return EADDRNOTAVAIL; + } + + /* Check if socket is available. */ + if (viftable[i].v_rsvpd != NULL) { + splx(s); + return EADDRINUSE; + } + + viftable[i].v_rsvpd = so; + /* This may seem silly, but we need to be sure we don't over-increment + * the RSVP counter, in case something slips up. + */ + if (!viftable[i].v_rsvp_on) { + viftable[i].v_rsvp_on = 1; + rsvp_on++; + } + + splx(s); + return 0; +} + +int +ip_rsvp_vif_done(so, m) + struct socket *so; + struct mbuf *m; +{ + int i; + register int s; + + if (rsvpdebug) + printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n", + so->so_type, so->so_proto->pr_protocol); + + if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) + return EOPNOTSUPP; + + /* Check mbuf. */ + if (m == NULL || m->m_len != sizeof(int)) { + return EINVAL; + } + i = *(mtod(m, int *)); + + s = splnet(); + + /* Check vif. */ + if (!legal_vif_num(i)) { + splx(s); + return EADDRNOTAVAIL; + } + + if (rsvpdebug) + printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n", + viftable[i].v_rsvpd, so); + + viftable[i].v_rsvpd = NULL; + /* This may seem silly, but we need to be sure we don't over-decrement + * the RSVP counter, in case something slips up. + */ + if (viftable[i].v_rsvp_on) { + viftable[i].v_rsvp_on = 0; + rsvp_on--; + } + + splx(s); + return 0; +} + +void +ip_rsvp_force_done(so) + struct socket *so; +{ + int vifi; + register int s; + + /* Don't bother if it is not the right type of socket. */ + if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) + return; + + s = splnet(); + + /* The socket may be attached to more than one vif...this + * is perfectly legal. + */ + for (vifi = 0; vifi < numvifs; vifi++) { + if (viftable[vifi].v_rsvpd == so) { + viftable[vifi].v_rsvpd = NULL; + /* This may seem silly, but we need to be sure we don't + * over-decrement the RSVP counter, in case something slips up. + */ + if (viftable[vifi].v_rsvp_on) { + viftable[vifi].v_rsvp_on = 0; + rsvp_on--; + } + } + } + + splx(s); + return; +} + +void +rsvp_input(m, ifp) + struct mbuf *m; + struct ifnet *ifp; +{ + int vifi; + register struct ip *ip = mtod(m, struct ip *); + static struct sockaddr_in rsvp_src = { AF_INET }; + register int s; + + if (rsvpdebug) + printf("rsvp_input: rsvp_on %d\n",rsvp_on); + + /* Can still get packets with rsvp_on = 0 if there is a local member + * of the group to which the RSVP packet is addressed. But in this + * case we want to throw the packet away. + */ + if (!rsvp_on) { + m_freem(m); + return; + } + + /* If the old-style non-vif-associated socket is set, then use + * it and ignore the new ones. + */ + if (ip_rsvpd != NULL) { + if (rsvpdebug) + printf("rsvp_input: Sending packet up old-style socket\n"); + rip_input(m); + return; + } + + s = splnet(); + + if (rsvpdebug) + printf("rsvp_input: check vifs\n"); + + /* Find which vif the packet arrived on. */ + for (vifi = 0; vifi < numvifs; vifi++) { + if (viftable[vifi].v_ifp == ifp) + break; + } + + if (vifi == numvifs) { + /* Can't find vif packet arrived on. Drop packet. */ + if (rsvpdebug) + printf("rsvp_input: Can't find vif for packet...dropping it.\n"); + m_freem(m); + splx(s); + return; + } + + if (rsvpdebug) + printf("rsvp_input: check socket\n"); + + if (viftable[vifi].v_rsvpd == NULL) { + /* drop packet, since there is no specific socket for this + * interface */ + if (rsvpdebug) + printf("rsvp_input: No socket defined for vif %d\n",vifi); + m_freem(m); + splx(s); + return; + } + rsvp_src.sin_addr = ip->ip_src; + + if (rsvpdebug && m) + printf("rsvp_input: m->m_len = %d, sbspace() = %d\n", + m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); + + if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) + if (rsvpdebug) + printf("rsvp_input: Failed to append to socket\n"); + else + if (rsvpdebug) + printf("rsvp_input: send packet up\n"); + + splx(s); +} + #ifdef MROUTE_LKM #include <sys/conf.h> #include <sys/exec.h> @@ -1884,7 +2230,7 @@ ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) old_mrt_ioctl = mrt_ioctl; mrt_ioctl = X_mrt_ioctl; old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input; - inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_multiencap_decap; + inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_ipip_input; old_legal_vif_num = legal_vif_num; legal_vif_num = X_legal_vif_num; ip_mrtproto = IGMP_DVMRP; @@ -1921,5 +2267,3 @@ ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { #endif /* MROUTE_LKM */ #endif /* MROUTING */ - - diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h index 4c815e8..fd5ef37 100644 --- a/sys/netinet/ip_mroute.h +++ b/sys/netinet/ip_mroute.h @@ -42,28 +42,29 @@ #define _NETINET_IP_MROUTE_H_ /* - * Definitions for the kernel part of DVMRP, - * a Distance-Vector Multicast Routing Protocol. - * (See RFC-1075.) + * Definitions for IP multicast forwarding. * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. * Modified by Ajit Thyagarajan, PARC, August 1993. * Modified by Ajit Thyagarajan, PARC, August 1994. * - * MROUTING 1.5 + * MROUTING Revision: 3.3.1.3 */ /* - * DVMRP-specific setsockopt commands. + * Multicast Routing set/getsockopt commands. */ -#define DVMRP_INIT 100 /* initialize forwarder */ -#define DVMRP_DONE 101 /* shut down forwarder */ -#define DVMRP_ADD_VIF 102 /* create virtual interface */ -#define DVMRP_DEL_VIF 103 /* delete virtual interface */ -#define DVMRP_ADD_MFC 104 /* insert forwarding cache entry */ -#define DVMRP_DEL_MFC 105 /* delete forwarding cache entry */ +#define MRT_INIT 100 /* initialize forwarder */ +#define MRT_DONE 101 /* shut down forwarder */ +#define MRT_ADD_VIF 102 /* create virtual interface */ +#define MRT_DEL_VIF 103 /* delete virtual interface */ +#define MRT_ADD_MFC 104 /* insert forwarding cache entry */ +#define MRT_DEL_MFC 105 /* delete forwarding cache entry */ +#define MRT_VERSION 106 /* get kernel version number */ +#define MRT_ASSERT 107 /* enable PIM assert processing */ + #define GET_TIME(t) microtime(&t) @@ -73,6 +74,7 @@ #define MAXVIFS 32 typedef u_long vifbitmap_t; typedef u_short vifi_t; /* type of a vif index */ +#define ALL_VIFS (vifi_t)-1 #define VIFM_SET(n, m) ((m) |= (1 << (n))) #define VIFM_CLR(n, m) ((m) &= ~(1 << (n))) @@ -83,59 +85,33 @@ typedef u_short vifi_t; /* type of a vif index */ /* - * Argument structure for DVMRP_ADD_VIF. - * (DVMRP_DEL_VIF takes a single vifi_t argument.) + * Argument structure for MRT_ADD_VIF. + * (MRT_DEL_VIF takes a single vifi_t argument.) */ struct vifctl { - vifi_t vifc_vifi; /* the index of the vif to be added */ - u_char vifc_flags; /* VIFF_ flags defined below */ - u_char vifc_threshold; /* min ttl required to forward on vif */ - u_int vifc_rate_limit; /* max tate */ + vifi_t vifc_vifi; /* the index of the vif to be added */ + u_char vifc_flags; /* VIFF_ flags defined below */ + u_char vifc_threshold; /* min ttl required to forward on vif */ + u_int vifc_rate_limit; /* max rate */ struct in_addr vifc_lcl_addr; /* local interface address */ struct in_addr vifc_rmt_addr; /* remote address (tunnels only) */ }; #define VIFF_TUNNEL 0x1 /* vif represents a tunnel end-point */ -#define VIFF_SRCRT 0x2 /* tunnel uses IP source routing */ +#define VIFF_SRCRT 0x2 /* tunnel uses IP source routing */ /* - * Argument structure for DVMRP_ADD_MFC + * Argument structure for MRT_ADD_MFC and MRT_DEL_MFC * (mfcc_tos to be added at a future point) */ struct mfcctl { - struct in_addr mfcc_origin; /* subnet origin of mcasts */ + struct in_addr mfcc_origin; /* ip origin of mcasts */ struct in_addr mfcc_mcastgrp; /* multicast group associated*/ - struct in_addr mfcc_originmask; /* subnet mask for origin */ vifi_t mfcc_parent; /* incoming vif */ u_char mfcc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ }; /* - * Argument structure for DVMRP_DEL_MFC - */ -struct delmfcctl { - struct in_addr mfcc_origin; /* subnet origin of multicasts */ - struct in_addr mfcc_mcastgrp; /* multicast group assoc. w/ origin */ -}; - -/* - * Argument structure used by RSVP daemon to get vif information - */ -struct vif_req { - u_char v_flags; /* VIFF_ flags defined above */ - u_char v_threshold; /* min ttl required to forward on vif */ - struct in_addr v_lcl_addr; /* local interface address */ - struct in_addr v_rmt_addr; - char v_if_name[IFNAMSIZ]; /* if name */ -}; - -struct vif_conf { - u_int vifc_len; - u_int vifc_num; - struct vif_req *vifc_req; -}; - -/* * The kernel's multicast routing statistics. */ struct mrtstat { @@ -151,6 +127,7 @@ struct mrtstat { u_long mrts_drop_sel; /* pkts dropped selectively */ u_long mrts_q_overflow; /* pkts dropped - Q overflow */ u_long mrts_pkt2large; /* pkts dropped - size > BKT SIZE */ + u_long mrts_upq_sockfull; /* upcalls dropped - socket full */ }; /* @@ -159,21 +136,28 @@ struct mrtstat { struct sioc_sg_req { struct in_addr src; struct in_addr grp; - u_long count; + u_long pktcnt; + u_long bytecnt; + u_long wrong_if; }; /* * Argument structure used by mrouted to get vif pkt counts */ struct sioc_vif_req { - vifi_t vifi; - u_long icount; - u_long ocount; + vifi_t vifi; /* vif number */ + u_long icount; /* Input packet count on vif */ + u_long ocount; /* Output packet count on vif */ + u_long ibytes; /* Input byte count on vif */ + u_long obytes; /* Output byte count on vif */ }; - + #ifdef KERNEL +/* + * The kernel's virtual-interface structure. + */ struct vif { u_char v_flags; /* VIFF_ flags defined above */ u_char v_threshold; /* min ttl required to forward on vif*/ @@ -184,30 +168,56 @@ struct vif { struct ifnet *v_ifp; /* pointer to interface */ u_long v_pkt_in; /* # pkts in on interface */ u_long v_pkt_out; /* # pkts out on interface */ + u_long v_bytes_in; /* # bytes in on interface */ + u_long v_bytes_out; /* # bytes out on interface */ + struct route v_route; /* cached route if this is a tunnel */ + u_int v_rsvp_on; /* RSVP listening on this vif */ + struct socket *v_rsvpd; /* RSVP daemon socket */ }; /* - * The kernel's multicast forwarding cache entry structure - * (A field for the type of service (mfc_tos) is to be added + * The kernel's multicast forwarding cache entry structure + * (A field for the type of service (mfc_tos) is to be added * at a future point) */ struct mfc { - struct in_addr mfc_origin; /* subnet origin of mcasts */ + struct in_addr mfc_origin; /* IP origin of mcasts */ struct in_addr mfc_mcastgrp; /* multicast group associated*/ - struct in_addr mfc_originmask; /* subnet mask for origin */ vifi_t mfc_parent; /* incoming vif */ u_char mfc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ u_long mfc_pkt_cnt; /* pkt count for src-grp */ + u_long mfc_byte_cnt; /* byte count for src-grp */ + u_long mfc_wrong_if; /* wrong if for src-grp */ + int mfc_expire; /* time to clean entry up */ + struct timeval mfc_last_assert; /* last time I sent an assert*/ +}; + +/* + * Struct used to communicate from kernel to multicast router + * note the convenient similarity to an IP packet + */ +struct igmpmsg { + u_long unused1; + u_long unused2; + u_char im_msgtype; /* what type of message */ +#define IGMPMSG_NOCACHE 1 +#define IGMPMSG_WRONGVIF 2 + u_char im_mbz; /* must be zero */ + u_char im_vif; /* vif rec'd on */ + u_char unused3; + struct in_addr im_src, im_dst; }; /* * Argument structure used for pkt info. while upcall is made */ struct rtdetq { - struct mbuf *m; - struct ifnet *ifp; - u_long tunnel_src; - struct ip_moptions *imo; + struct mbuf *m; /* A copy of the packet */ + struct ifnet *ifp; /* Interface pkt came in on */ + vifi_t xmt_vif; /* Saved copy of imo_multicast_vif */ +#ifdef UPCALL_TIMING + struct timeval t; /* Timestamp */ +#endif /* UPCALL_TIMING */ }; #define MFCTBLSIZ 256 @@ -220,7 +230,7 @@ struct rtdetq { #define MAX_UPQ 4 /* max. no of pkts in upcall Q */ /* - * Token Bucket filter code + * Token Bucket filter code */ #define MAX_BKT_SIZE 10000 /* 10K bytes size */ #define MAXQSIZE 10 /* max # of pkts in queue */ @@ -228,7 +238,7 @@ struct rtdetq { /* * queue structure at each vif */ -struct pkt_queue +struct pkt_queue { u_long pkt_len; /* length of packet in queue */ struct mbuf *pkt_m; /* pointer to packet mbuf */ @@ -246,7 +256,8 @@ struct tbf u_long q_len; /* length of queue at this vif */ }; -extern int (*ip_mrouter_cmd) __P((int, struct socket *, struct mbuf *)); +extern int (*ip_mrouter_set) __P((int, struct socket *, struct mbuf *)); +extern int (*ip_mrouter_get) __P((int, struct socket *, struct mbuf **)); extern int (*ip_mrouter_done) __P((void)); extern int (*mrt_ioctl) __P((int, caddr_t, struct proc *)); diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 8e6fb56..45c21cc 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -188,15 +188,20 @@ ip_output(m0, opt, ro, flags, imo) ip->ip_ttl = imo->imo_multicast_ttl; if (imo->imo_multicast_ifp != NULL) ifp = imo->imo_multicast_ifp; + if (imo->imo_multicast_vif != -1) + ip->ip_src.s_addr = + ip_mcast_src(imo->imo_multicast_vif); } else ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; /* * Confirm that the outgoing interface supports multicast. */ - if ((ifp->if_flags & IFF_MULTICAST) == 0) { - ipstat.ips_noroute++; - error = ENETUNREACH; - goto bad; + if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { + if ((ifp->if_flags & IFF_MULTICAST) == 0) { + ipstat.ips_noroute++; + error = ENETUNREACH; + goto bad; + } } /* * If source address not specified yet, use address @@ -805,7 +810,7 @@ ip_setmoptions(optname, imop, m) return (ENOBUFS); *imop = imo; imo->imo_multicast_ifp = NULL; - imo->imo_multicast_vif = 0; + imo->imo_multicast_vif = -1; imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; imo->imo_num_memberships = 0; @@ -823,7 +828,7 @@ ip_setmoptions(optname, imop, m) break; } i = *(mtod(m, int *)); - if (!legal_vif_num(i)) { + if (!legal_vif_num(i) && (i != -1)) { error = EINVAL; break; } @@ -907,7 +912,7 @@ ip_setmoptions(optname, imop, m) * the route to the given multicast address. */ if (mreq->imr_interface.s_addr == INADDR_ANY) { - ro.ro_rt = NULL; + bzero((caddr_t)&ro, sizeof(ro)); dst = (struct sockaddr_in *)&ro.ro_dst; dst->sin_len = sizeof(*dst); dst->sin_family = AF_INET; @@ -1036,7 +1041,7 @@ ip_setmoptions(optname, imop, m) * If all options have default values, no need to keep the mbuf. */ if (imo->imo_multicast_ifp == NULL && - imo->imo_multicast_vif == 0 && + imo->imo_multicast_vif == -1 && imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && imo->imo_num_memberships == 0) { @@ -1065,11 +1070,11 @@ ip_getmoptions(optname, imo, mp) switch (optname) { - case IP_MULTICAST_VIF: + case IP_MULTICAST_VIF: if (imo != NULL) *(mtod(*mp, int *)) = imo->imo_multicast_vif; else - *(mtod(*mp, int *)) = 7890; + *(mtod(*mp, int *)) = -1; (*mp)->m_len = sizeof(int); return(0); diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 73bd43d..3bfd8ff 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -71,11 +71,11 @@ struct ipq { * Note: ipf_next must be at same offset as ipq_next above */ struct ipasfrag { -#if BYTE_ORDER == LITTLE_ENDIAN +#if BYTE_ORDER == LITTLE_ENDIAN u_char ip_hl:4, ip_v:4; #endif -#if BYTE_ORDER == BIG_ENDIAN +#if BYTE_ORDER == BIG_ENDIAN u_char ip_v:4, ip_hl:4; #endif @@ -111,11 +111,11 @@ struct ipoption { */ struct ip_moptions { struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */ - u_long imo_multicast_vif; /* vif num outgoing multicasts */ u_char imo_multicast_ttl; /* TTL for outgoing multicasts */ u_char imo_multicast_loop; /* 1 => hear sends if a member */ u_short imo_num_memberships; /* no. memberships this socket */ struct in_multi *imo_membership[IP_MAX_MEMBERSHIPS]; + u_long imo_multicast_vif; /* vif num outgoing multicasts */ }; struct ipstat { @@ -160,6 +160,7 @@ extern u_char ip_protox[]; extern struct socket *ip_rsvpd; /* reservation protocol daemon */ extern struct socket *ip_mrouter; /* multicast routing daemon */ extern int (*legal_vif_num) __P((int)); +extern u_long (*ip_mcast_src) __P((int)); int ip_ctloutput __P((int, struct socket *, int, int, struct mbuf **)); void ip_deq __P((struct ipasfrag *)); diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 89697a0..133b6ac 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -136,27 +136,6 @@ rip_input(m) } } -void rip_ip_input(mm, ip_mrouter, src) - struct mbuf *mm; - register struct socket *ip_mrouter; - struct sockaddr *src; -{ - if (ip_mrouter) - { - if (sbappendaddr(&ip_mrouter->so_rcv, src, - mm, (struct mbuf *) 0) == 0) - m_freem(mm); - else - sorwakeup(ip_mrouter); - } - else - { - m_freem(mm); - ipstat.ips_noproto++; - ipstat.ips_delivered--; - } -} - /* * Generate IP header and pass packet to ip_output. * Tack on options user may have setup with control call. @@ -249,7 +228,7 @@ rip_ctloutput(op, so, level, optname, m) } if (op == PRCO_SETOPT) { - error=(*ip_fw_ctl_ptr)(optname, *m); + error=(*ip_fw_ctl_ptr)(optname, *m); if (*m) (void)m_free(*m); } @@ -261,13 +240,13 @@ rip_ctloutput(op, so, level, optname, m) case IP_ACCT_ADD: case IP_ACCT_CLR: case IP_ACCT_FLUSH: - case IP_ACCT_ZERO: + case IP_ACCT_ZERO: if (ip_acct_ctl_ptr==NULL) { if (*m) (void)m_free(*m); return(EINVAL); } - + if (op == PRCO_SETOPT) { error=(*ip_acct_ctl_ptr)(optname, *m); if (*m) @@ -285,16 +264,26 @@ rip_ctloutput(op, so, level, optname, m) return ip_rsvp_done(); break; - case DVMRP_INIT: - case DVMRP_DONE: - case DVMRP_ADD_VIF: - case DVMRP_DEL_VIF: - case DVMRP_ADD_MFC: - case DVMRP_DEL_MFC: + case IP_RSVP_VIF_ON: + return ip_rsvp_vif_init(so, *m); + + case IP_RSVP_VIF_OFF: + return ip_rsvp_vif_done(so, *m); + + case MRT_INIT: + case MRT_DONE: + case MRT_ADD_VIF: + case MRT_DEL_VIF: + case MRT_ADD_MFC: + case MRT_DEL_MFC: + case MRT_VERSION: + case MRT_ASSERT: if (op == PRCO_SETOPT) { - error = ip_mrouter_cmd(optname, so, *m); + error = ip_mrouter_set(optname, so, *m); if (*m) (void)m_free(*m); + } else if (op == PRCO_GETOPT) { + error = ip_mrouter_get(optname, so, m); } else error = EINVAL; return (error); |