diff options
74 files changed, 1305 insertions, 364 deletions
diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8 index 7796005..b32de33 100644 --- a/sbin/ipfw/ipfw.8 +++ b/sbin/ipfw/ipfw.8 @@ -479,6 +479,9 @@ When the packet can be associated with a local socket. .It Divert status Whether a packet came from a divert socket (e.g., .Xr natd 8 ) . +.It Fib annotation state +Whether a packet has been tagged for using a specific FIB (routing table) +in future forwarding decisions. .El .Pp Note that some of the above information, e.g.\& source MAC or IP addresses and @@ -842,6 +845,12 @@ for more information on and .Cm ngtee actions. +.It Cm setfib Ar fibnum +The packet is tagged so as to use the FIB (routing table) +.Ar fibnum +in any subsequent forwarding decisions. Initially this is +limited to the values 0 through 15. See +.Xr setfib 8 .El .Ss RULE BODY The body of a rule contains zero or more patterns (such as @@ -1144,6 +1153,9 @@ IPSec authentication headers .Pq Cm ah , and IPSec encapsulated security payload headers .Pq Cm esp . +.It Cm fib Ar fibnum +Matches a packet that has been tagged to use +the given FIB (routing table) number. .It Cm flow-id Ar labels Matches IPv6 packets containing any of the flow labels given in .Ar labels . diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index d937599..1a99cb1 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -341,6 +341,9 @@ enum tokens { TOK_IPV4, TOK_UNREACH6, TOK_RESET6, + + TOK_FIB, + TOK_SETFIB, }; struct _s_x dummynet_params[] = { @@ -413,6 +416,7 @@ struct _s_x rule_actions[] = { { "check-state", TOK_CHECKSTATE }, { "//", TOK_COMMENT }, { "nat", TOK_NAT }, + { "setfib", TOK_SETFIB }, { NULL, 0 } /* terminator */ }; @@ -443,6 +447,7 @@ struct _s_x rule_options[] = { { "via", TOK_VIA }, { "fragment", TOK_FRAG }, { "frag", TOK_FRAG }, + { "fib", TOK_FIB }, { "ipoptions", TOK_IPOPTS }, { "ipopts", TOK_IPOPTS }, { "iplen", TOK_IPLEN }, @@ -1615,6 +1620,10 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) PRINT_UINT_ARG("nat ", cmd->arg1); break; + case O_SETFIB: + PRINT_UINT_ARG("setfib ", cmd->arg1); + break; + default: printf("** unrecognized action %d len %d ", cmd->opcode, cmd->len); @@ -1817,6 +1826,10 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) printf(" frag"); break; + case O_FIB: + printf(" fib %u", cmd->arg1 ); + break; + case O_IN: printf(cmd->len & F_NOT ? " out" : " in"); break; @@ -2712,7 +2725,7 @@ help(void) "RULE-BODY: check-state [PARAMS] | ACTION [PARAMS] ADDR [OPTION_LIST]\n" "ACTION: check-state | allow | count | deny | unreach{,6} CODE |\n" " skipto N | {divert|tee} PORT | forward ADDR |\n" -" pipe N | queue N | nat N\n" +" pipe N | queue N | nat N | setfib FIB\n" "PARAMS: [log [logamount LOGLIMIT]] [altq QUEUE_NAME]\n" "ADDR: [ MAC dst src ether_type ] \n" " [ ip from IPADDR [ PORT ] to IPADDR [ PORTLIST ] ]\n" @@ -2728,7 +2741,7 @@ help(void) " estab | frag | {gid|uid} N | icmptypes LIST | in | out | ipid LIST |\n" " iplen LIST | ipoptions SPEC | ipprecedence | ipsec | iptos SPEC |\n" " ipttl LIST | ipversion VER | keep-state | layer2 | limit ... |\n" -" icmp6types LIST | ext6hdr LIST | flow-id N[,N] |\n" +" icmp6types LIST | ext6hdr LIST | flow-id N[,N] | fib FIB |\n" " mac ... | mac-type LIST | proto LIST | {recv|xmit|via} {IF|IPADDR} |\n" " setup | {tcpack|tcpseq|tcpwin} NN | tcpflags SPEC | tcpoptions SPEC |\n" " tcpdatalen LIST | verrevpath | versrcreach | antispoof\n" @@ -4865,6 +4878,7 @@ add(int ac, char *av[]) action->opcode = O_NAT; action->len = F_INSN_SIZE(ipfw_insn_nat); goto chkarg; + case TOK_QUEUE: action->opcode = O_QUEUE; goto chkarg; @@ -4946,6 +4960,21 @@ chkarg: action->opcode = O_COUNT; ac++; av--; /* go back... */ break; + + case TOK_SETFIB: + { + int numfibs; + + action->opcode = O_SETFIB; + NEED1("missing fib number"); + action->arg1 = strtoul(*av, NULL, 10); + if (sysctlbyname("net.fibs", &numfibs, &i, NULL, 0) == -1) + errx(EX_DATAERR, "fibs not suported.\n"); + if (action->arg1 >= numfibs) /* Temporary */ + errx(EX_DATAERR, "fib too large.\n"); + ac--; av++; + break; + } default: errx(EX_DATAERR, "invalid action %s\n", av[-1]); @@ -5626,6 +5655,12 @@ read_options: ac--; av++; break; + case TOK_FIB: + NEED1("fib requires fib number"); + fill_cmd(cmd, O_FIB, 0, strtoul(*av, NULL, 0)); + ac--; av++; + break; + default: errx(EX_USAGE, "unrecognised option [%d] %s\n", i, s); } diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 0e290ab..8c90cc9 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -509,6 +509,8 @@ options HWPMC_HOOKS # Other necessary kernel hooks options INET #Internet communications protocols options INET6 #IPv6 communications protocols +options ROUTETABLES=2 # max 16. 1 is back compatible. + # In order to enable IPSEC you MUST also add device crypto to # your kernel configuration options IPSEC #IP security (requires device crypto) diff --git a/sys/conf/options b/sys/conf/options index 07a8f45..6ebfa92 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -364,6 +364,7 @@ ETHER_II opt_ef.h ETHER_8023 opt_ef.h ETHER_8022 opt_ef.h ETHER_SNAP opt_ef.h +ROUTETABLES opt_route.h MROUTING opt_mrouting.h INET opt_inet.h INET6 opt_inet6.h diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c index 4a279fa..0eb2632 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c @@ -970,7 +970,7 @@ frdest_t *fdp; dst->sin_addr = fdp->fd_ip; dst->sin_len = sizeof(*dst); - rtalloc(ro); + in_rtalloc(ro, 0); if ((ifp == NULL) && (ro->ro_rt != NULL)) ifp = ro->ro_rt->rt_ifp; @@ -1158,7 +1158,7 @@ fr_info_t *fin; dst->sin_len = sizeof(*dst); dst->sin_family = AF_INET; dst->sin_addr = fin->fin_src; - rtalloc(&iproute); + in_rtalloc(&iproute, 0); if (iproute.ro_rt == NULL) return 0; return (fin->fin_ifp == iproute.ro_rt->rt_ifp); diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index 96bf2de..fd8c395 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -1839,7 +1839,14 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, pf_mtag->tag = rtag; if (r != NULL && r->rtableid >= 0) +#ifdef __FreeBSD__ + { + M_SETFIB(m, r->rtableid); +#endif pf_mtag->rtableid = r->rtableid; +#ifdef __FreeBSD__ + } +#endif #ifdef ALTQ if (r != NULL && r->qid) { pf_mtag->qid = r->qid; @@ -2004,7 +2011,14 @@ pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, #endif if (r->rtableid >= 0) +#ifdef __FreeBSD__ + { + M_SETFIB(m0, r->rtableid); +#endif pf_mtag->rtableid = r->rtableid; +#ifdef __FreeBSD__ + } +#endif #ifdef ALTQ if (r->qid) { @@ -2195,7 +2209,14 @@ pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid) if (tag > 0) pf_mtag->tag = tag; if (rtableid >= 0) +#ifdef __FreeBSD__ + { + M_SETFIB(m, rtableid); +#endif pf_mtag->rtableid = rtableid; +#ifdef __FreeBSD__ + } +#endif return (0); } @@ -3141,7 +3162,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) #ifdef RTF_PRCLONING rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING)); #else /* !RTF_PRCLONING */ - rtalloc_ign(&ro, RTF_CLONING); + in_rtalloc_ign(&ro, RTF_CLONING, 0); #endif #else /* ! __FreeBSD__ */ rtalloc_noclone(&ro, NO_CLONING); @@ -5946,7 +5967,11 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) goto out; #ifdef __FreeBSD__ - rtalloc_ign((struct route *)&ro, RTF_CLONING); +/* XXX MRT not always INET */ /* stick with table 0 though */ + if (af == AF_INET) + in_rtalloc_ign((struct route *)&ro, RTF_CLONING, 0); + else + rtalloc_ign((struct route *)&ro, RTF_CLONING); #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); #endif @@ -6025,7 +6050,10 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) # ifdef RTF_PRCLONING rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING)); # else /* !RTF_PRCLONING */ - rtalloc_ign((struct route *)&ro, RTF_CLONING); + if (af == AF_INET) + in_rtalloc_ign((struct route *)&ro, RTF_CLONING, 0); + else + rtalloc_ign((struct route *)&ro, RTF_CLONING); # endif #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); @@ -6105,7 +6133,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, dst->sin_addr = ip->ip_dst; if (r->rt == PF_FASTROUTE) { - rtalloc(ro); + in_rtalloc(ro, 0); if (ro->ro_rt == 0) { ipstat.ips_noroute++; goto bad; diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index f9110cb..f765029 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -1532,7 +1532,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #ifdef __FreeBSD__ /* ROUTEING */ - if (rule->rtableid > 0) + if (rule->rtableid > 0 && rule->rtableid < rt_numfibs) #else if (rule->rtableid > 0 && !rtable_exists(rule->rtableid)) #endif @@ -1795,7 +1795,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (newrule->rtableid > 0 && #ifdef __FreeBSD__ /* ROUTING */ - 1) + newrule->rtableid < rt_numfibs) #else !rtable_exists(newrule->rtableid)) #endif diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 829a31f..22db8a8 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -203,7 +203,7 @@ struct sysent sysent[] = { { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 172 = nosys */ { AS(freebsd6_pread_args), (sy_call_t *)freebsd6_pread, AUE_PREAD, NULL, 0, 0 }, /* 173 = freebsd6_pread */ { AS(freebsd6_pwrite_args), (sy_call_t *)freebsd6_pwrite, AUE_PWRITE, NULL, 0, 0 }, /* 174 = freebsd6_pwrite */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 175 = nosys */ + { AS(setfib_args), (sy_call_t *)setfib, AUE_NULL, NULL, 0, 0 }, /* 175 = setfib */ { AS(ntp_adjtime_args), (sy_call_t *)ntp_adjtime, AUE_NTP_ADJTIME, NULL, 0, 0 }, /* 176 = ntp_adjtime */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 177 = sfork */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 178 = getdescriptor */ diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 8e6b5f2..aeeaf33 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -199,7 +199,7 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, if (IOCGROUP(cmd) == 'i') error = ifioctl(so, cmd, data, td); else if (IOCGROUP(cmd) == 'r') - error = rtioctl(cmd, data); + error = rtioctl_fib(cmd, data, so->so_fibnum); else error = ((*so->so_proto->pr_usrreqs->pru_control) (so, cmd, data, 0, td)); diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index b3a5ff9..8fb0127 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -182,7 +182,7 @@ const char *syscallnames[] = { "#172", /* 172 = nosys */ "freebsd6_pread", /* 173 = freebsd6_pread */ "freebsd6_pwrite", /* 174 = freebsd6_pwrite */ - "#175", /* 175 = nosys */ + "setfib", /* 175 = setfib */ "ntp_adjtime", /* 176 = ntp_adjtime */ "#177", /* 177 = sfork */ "#178", /* 178 = getdescriptor */ diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 1e98317..4cb55fa 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -340,7 +340,7 @@ 174 AUE_PWRITE STD { ssize_t freebsd6_pwrite(int fd, \ const void *buf, \ size_t nbyte, int pad, off_t offset); } -175 AUE_NULL UNIMPL nosys +175 AUE_NULL STD { int setfib(int fibnum); } 176 AUE_NTP_ADJTIME STD { int ntp_adjtime(struct timex *tp); } 177 AUE_NULL UNIMPL sfork (BSD/OS 2.x) 178 AUE_NULL UNIMPL getdescriptor (BSD/OS 2.x) diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index b20f1ed..98558cc 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -959,6 +959,13 @@ systrace_args(int sysnum, void *params, u_int64_t *uarg, int *n_args) *n_args = 5; break; } + /* setfib */ + case 175: { + struct setfib_args *p = params; + iarg[0] = p->fibnum; /* int */ + *n_args = 1; + break; + } /* ntp_adjtime */ case 176: { struct ntp_adjtime_args *p = params; diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index b93cb2f..c9b6076 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -122,6 +122,7 @@ __FBSDID("$FreeBSD$"); #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/resourcevar.h> +#include <net/route.h> #include <sys/signalvar.h> #include <sys/stat.h> #include <sys/sx.h> @@ -360,6 +361,11 @@ socreate(int dom, struct socket **aso, int type, int proto, TAILQ_INIT(&so->so_comp); so->so_type = type; so->so_cred = crhold(cred); + if ((prp->pr_domain->dom_family == PF_INET) || + (prp->pr_domain->dom_family == PF_ROUTE)) + so->so_fibnum = td->td_proc->p_fibnum; + else + so->so_fibnum = 0; so->so_proto = prp; #ifdef MAC mac_socket_create(cred, so); @@ -2027,6 +2033,20 @@ sosetopt(struct socket *so, struct sockopt *sopt) SOCK_UNLOCK(so); break; + case SO_SETFIB: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (optval < 1 || optval > rt_numfibs) { + error = EINVAL; + goto bad; + } + if ((so->so_proto->pr_domain->dom_family == PF_INET) || + (so->so_proto->pr_domain->dom_family == PF_ROUTE)) { + so->so_fibnum = optval; + } else { + so->so_fibnum = 0; + } + break; case SO_SNDBUF: case SO_RCVBUF: case SO_SNDLOWAT: diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index e1d6187..7afe991 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -161,12 +161,25 @@ vfs_hang_addrlist(struct mount *mp, struct netexport *nep, * Seems silly to initialize every AF when most are not used, * do so on demand here */ - for (dom = domains; dom; dom = dom->dom_next) + for (dom = domains; dom; dom = dom->dom_next) { + KASSERT(((i == AF_INET) || (i == AF_INET6)), + ("unexpected protocol in vfs_hang_addrlist")); if (dom->dom_family == i && dom->dom_rtattach) { - dom->dom_rtattach((void **) &nep->ne_rtable[i], - dom->dom_rtoffset); + /* + * XXX MRT + * The INET and INET6 domains know the + * offset already. We don't need to send it + * So we just use it as a flag to say that + * we are or are not setting up a real routing + * table. Only IP and IPV6 need have this + * be 0 so all other protocols can stay the + * same (ABI compatible). + */ + dom->dom_rtattach( + (void **) &nep->ne_rtable[i], 0); break; } + } if ((rnh = nep->ne_rtable[i]) == NULL) { error = ENOBUFS; vfs_mount_error(mp, "%s %s %d", diff --git a/sys/net/if.c b/sys/net/if.c index c3c367b..85306a4 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -740,11 +740,14 @@ if_detach(struct ifnet *ifp) * to this interface...oh well... */ for (i = 1; i <= AF_MAX; i++) { - if ((rnh = rt_tables[i]) == NULL) + int j; + for (j = 0; j < rt_numfibs; j++) { + if ((rnh = rt_tables[j][i]) == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); (void) rnh->rnh_walktree(rnh, if_rtdel, ifp); RADIX_NODE_HEAD_UNLOCK(rnh); + } } /* Announce that the interface is gone. */ @@ -1010,9 +1013,9 @@ if_rtdel(struct radix_node *rn, void *arg) if ((rt->rt_flags & RTF_UP) == 0) return (0); - err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, + err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, - (struct rtentry **) NULL); + (struct rtentry **) NULL, rt->rt_fibnum); if (err) { log(LOG_WARNING, "if_rtdel: error %d\n", err); } diff --git a/sys/net/if_atmsubr.c b/sys/net/if_atmsubr.c index 9d1a7fa..1564737 100644 --- a/sys/net/if_atmsubr.c +++ b/sys/net/if_atmsubr.c @@ -158,7 +158,8 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst, * check route */ if (rt0 != NULL) { - error = rt_check(&rt, &rt0, dst); + error = rt_check_fib(&rt, &rt0, + dst, rt0->rt_fibnum); if (error) goto bad; RT_UNLOCK(rt); diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c index e001c29..65b2aff 100644 --- a/sys/net/if_fwsubr.c +++ b/sys/net/if_fwsubr.c @@ -103,7 +103,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, } if (rt0 != NULL) { - error = rt_check(&rt, &rt0, dst); + error = rt_check_fib(&rt, &rt0, dst, rt0->rt_fibnum); if (error) goto bad; RT_UNLOCK(rt); diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index 63f3c7d..8310881 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -46,6 +46,7 @@ #include <sys/time.h> #include <sys/sysctl.h> #include <sys/syslog.h> +#include <sys/proc.h> #include <sys/protosw.h> #include <sys/conf.h> #include <machine/cpu.h> @@ -155,6 +156,7 @@ gif_clone_create(ifc, unit, params) struct gif_softc *sc; sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO); + sc->gif_fibnum = curthread->td_proc->p_fibnum; GIF2IFP(sc) = if_alloc(IFT_GIF); if (GIF2IFP(sc) == NULL) { free(sc, M_GIF); @@ -441,6 +443,7 @@ gif_output(ifp, m, dst, rt) if (ifp->if_bridge) af = AF_LINK; + M_SETFIB(m, sc->gif_fibnum); /* inner AF-specific encapsulation */ /* XXX should we check if our outer source is legal? */ diff --git a/sys/net/if_gif.h b/sys/net/if_gif.h index 8e9ceb1..4e417fd 100644 --- a/sys/net/if_gif.h +++ b/sys/net/if_gif.h @@ -67,6 +67,7 @@ struct gif_softc { #endif } gifsc_gifscr; int gif_flags; + u_int gif_fibnum; const struct encaptab *encap_cookie4; const struct encaptab *encap_cookie6; void *gif_netgraph; /* ng_gif(4) netgraph node info */ diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c index b4b42b9..9045f06 100644 --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -58,6 +58,7 @@ #include <sys/module.h> #include <sys/mbuf.h> #include <sys/priv.h> +#include <sys/proc.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/sockio.h> @@ -201,6 +202,7 @@ gre_clone_create(ifc, unit, params) GRE2IFP(sc)->if_flags |= IFF_LINK0; sc->encap = NULL; sc->called = 0; + sc->gre_fibnum = curthread->td_proc->p_fibnum; sc->wccp_ver = WCCP_V1; if_attach(GRE2IFP(sc)); bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); @@ -395,6 +397,8 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, goto end; } + M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */ + gh = mtod(m, struct greip *); if (sc->g_proto == IPPROTO_GRE) { /* we don't have any GRE flags for now */ @@ -754,6 +758,7 @@ gre_compute_route(struct gre_softc *sc) * toggle last bit, so our interface is not found, but a less * specific route. I'd rather like to specify a shorter mask, * but this is not possible. Should work though. XXX + * XXX MRT Use a different FIB for the tunnel to solve this problem. */ if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) { ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^= @@ -765,7 +770,7 @@ gre_compute_route(struct gre_softc *sc) inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr)); #endif - rtalloc(ro); + rtalloc_fib(ro, sc->gre_fibnum); /* * check if this returned a route at all and this route is no diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h index 6c8e853..3c34bec 100644 --- a/sys/net/if_gre.h +++ b/sys/net/if_gre.h @@ -59,6 +59,7 @@ struct gre_softc { LIST_ENTRY(gre_softc) sc_list; int gre_unit; int gre_flags; + u_int gre_fibnum; /* use this fib for envelopes */ struct in_addr g_src; /* source address of gre packets */ struct in_addr g_dst; /* destination address of gre packets */ struct route route; /* routing entry that determines, where a diff --git a/sys/net/if_iso88025subr.c b/sys/net/if_iso88025subr.c index dd50923..f56101e 100644 --- a/sys/net/if_iso88025subr.c +++ b/sys/net/if_iso88025subr.c @@ -259,7 +259,8 @@ iso88025_output(ifp, m, dst, rt0) /* Calculate routing info length based on arp table entry */ /* XXX any better way to do this ? */ if (rt0 != NULL) { - error = rt_check(&rt, &rt0, dst); +/* XXX MRT *//* Guess only */ + error = rt_check_fib(&rt, &rt0, dst, rt0->rt_fibnum); if (error) goto bad; RT_UNLOCK(rt); diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c index 8f70df6..f373eaa 100644 --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -87,6 +87,7 @@ #include <sys/kernel.h> #include <sys/module.h> #include <sys/protosw.h> +#include <sys/proc.h> #include <sys/queue.h> #include <machine/cpu.h> @@ -136,6 +137,7 @@ struct stf_softc { struct route_in6 __sc_ro6; /* just for safety */ } __sc_ro46; #define sc_ro __sc_ro46.__sc_ro4 + u_int sc_fibnum; const struct encaptab *encap_cookie; }; #define STF2IFP(sc) ((sc)->sc_ifp) @@ -219,6 +221,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) return (ENOSPC); } ifp->if_softc = sc; + sc->sc_fibnum = curthread->td_proc->p_fibnum; /* * Set the name manually rather then using if_initname because @@ -521,7 +524,7 @@ stf_output(ifp, m, dst, rt) } if (sc->sc_ro.ro_rt == NULL) { - rtalloc(&sc->sc_ro); + rtalloc_fib(&sc->sc_ro, sc->sc_fibnum); if (sc->sc_ro.ro_rt == NULL) { m_freem(m); ifp->if_oerrors++; @@ -529,6 +532,7 @@ stf_output(ifp, m, dst, rt) } } + M_SETFIB(m, sc->sc_fibnum); ifp->if_opackets++; return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL); } @@ -599,7 +603,8 @@ stf_checkaddr4(sc, in, inifp) sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = *in; - rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); + rt = rtalloc1_fib((struct sockaddr *)&sin, 0, + 0UL, sc->sc_fibnum); if (!rt || rt->rt_ifp != inifp) { #if 0 log(LOG_WARNING, "%s: packet from 0x%x dropped " diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 8fbf729..d738e32 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -690,6 +690,8 @@ struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *); struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *); struct ifaddr *ifa_ifwithnet(struct sockaddr *); struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *); +struct ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int); + struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *); int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen); diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index d1db258..b04b42a 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -255,7 +255,7 @@ different: } void -rtalloc_mpath(struct route *ro, int hash) +rtalloc_mpath_fib(struct route *ro, int hash, u_int fibnum) { struct radix_node *rn0, *rn; int n; @@ -266,7 +266,7 @@ rtalloc_mpath(struct route *ro, int hash) */ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) return; /* XXX */ - ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0UL); + ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0UL, fibnum); /* if the route does not exist or it is not multipath, don't care */ if (ro->ro_rt == NULL) diff --git a/sys/net/radix_mpath.h b/sys/net/radix_mpath.h index 661aaf3..b9224c8 100644 --- a/sys/net/radix_mpath.h +++ b/sys/net/radix_mpath.h @@ -50,7 +50,8 @@ int rn_mpath_count(struct radix_node *); struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *); int rt_mpath_conflict(struct radix_node_head *, struct rtentry *, struct sockaddr *); -void rtalloc_mpath(struct route *, int); +void rtalloc_mpath_fib(struct route *, int, u_int); +#define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0) struct radix_node *rn_mpath_lookup(void *, void *, struct radix_node_head *); int rt_mpath_deldup(struct rtentry *, struct rtentry *); diff --git a/sys/net/route.c b/sys/net/route.c index d55c2f8..3ae5dbc 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -29,8 +29,13 @@ * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 * $FreeBSD$ */ +/************************************************************************ + * Note: In this file a 'fib' is a "forwarding information base" * + * Which is the new name for an in kernel routing (next hop) table. * + ***********************************************************************/ #include "opt_inet.h" +#include "opt_route.h" #include "opt_mrouting.h" #include "opt_mpath.h" @@ -39,6 +44,9 @@ #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/sysproto.h> +#include <sys/proc.h> #include <sys/domain.h> #include <sys/kernel.h> @@ -54,14 +62,45 @@ #include <vm/uma.h> +#ifndef ROUTETABLES + #define RT_NUMFIBS 1 + #define RT_MAXFIBS 1 +#else + /* while we use 4 bits in the mbuf flags, + * we are limited to 16 + */ + #if ROUTETABLES > RT_MAXFIBS + #define RT_NUMFIBS RT_MAXFIBS + #error "ROUTETABLES defined too big" + #else + #if ROUTETABLES == 0 + #define RT_NUMFIBS 1 + #else + #define RT_NUMFIBS ROUTETABLES + #endif + #endif +#endif + +u_int rt_numfibs = RT_NUMFIBS; +SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); +/* Eventually this will be a tunable */ +TUNABLE_INT("net.fibs", &rt_numfibs); + static struct rtstat rtstat; -struct radix_node_head *rt_tables[AF_MAX+1]; + +/* by default only the first 'row' of tables will be accessed. */ +/* + * XXXMRT When we fix netstat, and do this differnetly, + * we can allocate this dynamically. As long as we are keeping + * things backwards compaitble we need to allocate this + * statically. + */ +struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1]; static int rttrash; /* routes not in table but not freed */ static void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); -static void rtable_init(void **); /* compare two sockaddr structures */ #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) @@ -78,25 +117,83 @@ static void rtable_init(void **); */ #define RNTORT(p) ((struct rtentry *)(p)) -static void -rtable_init(void **table) +static uma_zone_t rtzone; /* Routing table UMA zone. */ + +#if 0 +/* default fib for tunnels to use */ +u_int tunnel_fib = 0; +SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, ""); +#endif + +/* + * handler for net.my_fibnum + */ +static int +sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) { - struct domain *dom; - for (dom = domains; dom; dom = dom->dom_next) - if (dom->dom_rtattach) - dom->dom_rtattach(&table[dom->dom_family], - dom->dom_rtoffset); + int fibnum; + int error; + + fibnum = curthread->td_proc->p_fibnum; + error = sysctl_handle_int(oidp, &fibnum, 0, req); + return (error); } -static uma_zone_t rtzone; /* Routing table UMA zone. */ +SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, + NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); static void route_init(void) { + int table; + struct domain *dom; + int fam; + + /* whack teh tunable ints into line. */ + if (rt_numfibs > RT_MAXFIBS) + rt_numfibs = RT_MAXFIBS; + if (rt_numfibs == 0) + rt_numfibs = 1; rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); rn_init(); /* initialize all zeroes, all ones, mask table */ - rtable_init((void **)rt_tables); + + for (dom = domains; dom; dom = dom->dom_next) { + if (dom->dom_rtattach) { + for (table = 0; table < rt_numfibs; table++) { + if ( (fam = dom->dom_family) == AF_INET || + table == 0) { + /* for now only AF_INET has > 1 table */ + /* XXX MRT + * rtattach will be also called + * from vfs_export.c but the + * offset will be 0 + * (only for AF_INET and AF_INET6 + * which don't need it anyhow) + */ + dom->dom_rtattach( + (void **)&rt_tables[table][fam], + dom->dom_rtoffset); + } else { + break; + } + } + } + } +} + +#ifndef _SYS_SYSPROTO_H_ +struct setfib_args { + int fibnum; +}; +#endif +int +setfib(struct thread *td, struct setfib_args *uap) +{ + if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) + return EINVAL; + td->td_proc->p_fibnum = uap->fibnum; + return (0); } /* @@ -105,7 +202,13 @@ route_init(void) void rtalloc(struct route *ro) { - rtalloc_ign(ro, 0UL); + rtalloc_ign_fib(ro, 0UL, 0); +} + +void +rtalloc_fib(struct route *ro, u_int fibnum) +{ + rtalloc_ign_fib(ro, 0UL, fibnum); } void @@ -119,7 +222,23 @@ rtalloc_ign(struct route *ro, u_long ignore) RTFREE(rt); ro->ro_rt = NULL; } - ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore); + ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0); + if (ro->ro_rt) + RT_UNLOCK(ro->ro_rt); +} + +void +rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) +{ + struct rtentry *rt; + + if ((rt = ro->ro_rt) != NULL) { + if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) + return; + RTFREE(rt); + ro->ro_rt = NULL; + } + ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } @@ -133,7 +252,14 @@ rtalloc_ign(struct route *ro, u_long ignore) struct rtentry * rtalloc1(struct sockaddr *dst, int report, u_long ignflags) { - struct radix_node_head *rnh = rt_tables[dst->sa_family]; + return (rtalloc1_fib(dst, report, ignflags, 0)); +} + +struct rtentry * +rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, + u_int fibnum) +{ + struct radix_node_head *rnh; struct rtentry *rt; struct radix_node *rn; struct rtentry *newrt; @@ -141,6 +267,10 @@ rtalloc1(struct sockaddr *dst, int report, u_long ignflags) u_long nflags; int err = 0, msgtype = RTM_MISS; + KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); + if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ + fibnum = 0; + rnh = rt_tables[fibnum][dst->sa_family]; newrt = NULL; /* * Look up the address in the table for that Address Family @@ -164,8 +294,8 @@ rtalloc1(struct sockaddr *dst, int report, u_long ignflags) * If it requires that it be cloned, do so. * (This implies it wasn't a HOST route.) */ - err = rtrequest(RTM_RESOLVE, dst, NULL, - NULL, 0, &newrt); + err = rtrequest_fib(RTM_RESOLVE, dst, NULL, + NULL, 0, &newrt, fibnum); if (err) { /* * If the cloning didn't succeed, maybe @@ -237,7 +367,7 @@ rtfree(struct rtentry *rt) struct radix_node_head *rnh; KASSERT(rt != NULL,("%s: NULL rt", __func__)); - rnh = rt_tables[rt_key(rt)->sa_family]; + rnh = rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family]; KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); RT_LOCK_ASSERT(rt); @@ -323,6 +453,17 @@ rtredirect(struct sockaddr *dst, int flags, struct sockaddr *src) { + rtredirect_fib(dst, gateway, netmask, flags, src, 0); +} + +void +rtredirect_fib(struct sockaddr *dst, + struct sockaddr *gateway, + struct sockaddr *netmask, + int flags, + struct sockaddr *src, + u_int fibnum) +{ struct rtentry *rt, *rt0 = NULL; int error = 0; short *stat = NULL; @@ -334,7 +475,7 @@ rtredirect(struct sockaddr *dst, error = ENETUNREACH; goto out; } - rt = rtalloc1(dst, 0, 0UL); /* NB: rt is locked */ + rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, @@ -377,7 +518,7 @@ rtredirect(struct sockaddr *dst, info.rti_info[RTAX_NETMASK] = netmask; info.rti_ifa = ifa; info.rti_flags = flags; - error = rtrequest1(RTM_ADD, &info, &rt); + error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); if (rt != NULL) { RT_LOCK(rt); EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); @@ -423,11 +564,17 @@ out: rt_missmsg(RTM_REDIRECT, &info, flags, error); } +int +rtioctl(u_long req, caddr_t data) +{ + return (rtioctl_fib(req, data, 0)); +} + /* * Routing table ioctl interface. */ int -rtioctl(u_long req, caddr_t data) +rtioctl_fib(u_long req, caddr_t data, u_int fibnum) { /* @@ -438,7 +585,7 @@ rtioctl(u_long req, caddr_t data) */ #ifdef INET /* Multicast goop, grrr... */ - return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP; + return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; #else /* INET */ return ENXIO; #endif /* INET */ @@ -447,6 +594,13 @@ rtioctl(u_long req, caddr_t data) struct ifaddr * ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) { + return (ifa_ifwithroute_fib(flags, dst, gateway, 0)); +} + +struct ifaddr * +ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway, + u_int fibnum) +{ register struct ifaddr *ifa; int not_found = 0; @@ -474,7 +628,7 @@ ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) if (ifa == NULL) ifa = ifa_ifwithnet(gateway); if (ifa == NULL) { - struct rtentry *rt = rtalloc1(gateway, 0, 0UL); + struct rtentry *rt = rtalloc1_fib(gateway, 0, 0UL, fibnum); if (rt == NULL) return (NULL); /* @@ -529,6 +683,18 @@ rtrequest(int req, int flags, struct rtentry **ret_nrt) { + return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0)); +} + +int +rtrequest_fib(int req, + struct sockaddr *dst, + struct sockaddr *gateway, + struct sockaddr *netmask, + int flags, + struct rtentry **ret_nrt, + u_int fibnum) +{ struct rt_addrinfo info; if (dst->sa_len == 0) @@ -539,7 +705,7 @@ rtrequest(int req, info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; - return rtrequest1(req, &info, ret_nrt); + return rtrequest1_fib(req, &info, ret_nrt, fibnum); } /* @@ -556,6 +722,12 @@ rtrequest(int req, int rt_getifa(struct rt_addrinfo *info) { + return (rt_getifa_fib(info, 0)); +} + +int +rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) +{ struct ifaddr *ifa; int error = 0; @@ -577,9 +749,11 @@ rt_getifa(struct rt_addrinfo *info) if (sa != NULL && info->rti_ifp != NULL) info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); else if (dst != NULL && gateway != NULL) - info->rti_ifa = ifa_ifwithroute(flags, dst, gateway); + info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway, + fibnum); else if (sa != NULL) - info->rti_ifa = ifa_ifwithroute(flags, sa, sa); + info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa, + fibnum); } if ((ifa = info->rti_ifa) != NULL) { if (info->rti_ifp == NULL) @@ -613,7 +787,7 @@ rtexpunge(struct rtentry *rt) /* * Find the correct routing tree to use for this Address Family */ - rnh = rt_tables[rt_key(rt)->sa_family]; + rnh = rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family]; if (rnh == NULL) return (EAFNOSUPPORT); @@ -680,6 +854,13 @@ bad: int rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) { + return (rtrequest1_fib(req, info, ret_nrt, 0)); +} + +int +rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, + u_int fibnum) +{ int error = 0; register struct rtentry *rt; register struct radix_node *rn; @@ -688,10 +869,13 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) struct sockaddr *ndst; #define senderr(x) { error = x ; goto bad; } + KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); + if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ + fibnum = 0; /* * Find the correct routing tree to use for this Address Family */ - rnh = rt_tables[dst->sa_family]; + rnh = rt_tables[fibnum][dst->sa_family]; if (rnh == NULL) return (EAFNOSUPPORT); RADIX_NODE_HEAD_LOCK(rnh); @@ -848,7 +1032,7 @@ deldone: (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) senderr(EINVAL); - if (info->rti_ifa == NULL && (error = rt_getifa(info))) + if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum))) senderr(error); ifa = info->rti_ifa; @@ -858,6 +1042,7 @@ deldone: senderr(ENOBUFS); RT_LOCK_INIT(rt); rt->rt_flags = RTF_UP | flags; + rt->rt_fibnum = fibnum; /* * Add the gateway. Possibly re-malloc-ing the storage for it * also add the rt_gwroute if possible. @@ -918,7 +1103,7 @@ deldone: * then we just blow it away and retry the insertion * of the new one. */ - rt2 = rtalloc1(dst, 0, 0); + rt2 = rtalloc1_fib(dst, 0, 0, fibnum); if (rt2 && rt2->rt_parent) { rtexpunge(rt2); RT_UNLOCK(rt2); @@ -1034,8 +1219,8 @@ rt_fixdelete(struct radix_node *rn, void *vp) if (rt->rt_parent == rt0 && !(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) { - return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), - rt->rt_flags, NULL); + return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), + rt->rt_flags, NULL, rt->rt_fibnum); } return 0; } @@ -1099,15 +1284,15 @@ rt_fixchange(struct radix_node *rn, void *vp) * changed/added under the node's mask. So, get rid of it. */ delete_rt: - return rtrequest(RTM_DELETE, rt_key(rt), NULL, - rt_mask(rt), rt->rt_flags, NULL); + return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL, + rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum); } int rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) { /* XXX dst may be overwritten, can we move this to below */ - struct radix_node_head *rnh = rt_tables[dst->sa_family]; + struct radix_node_head *rnh = rt_tables[rt->rt_fibnum][dst->sa_family]; int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); again: @@ -1138,7 +1323,7 @@ again: struct rtentry *gwrt; RT_UNLOCK(rt); /* XXX workaround LOR */ - gwrt = rtalloc1(gate, 1, 0); + gwrt = rtalloc1_fib(gate, 1, 0, rt->rt_fibnum); if (gwrt == rt) { RT_REMREF(rt); return (EADDRINUSE); /* failure */ @@ -1243,15 +1428,19 @@ rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netma * Set up a routing table entry, normally * for an interface. */ -int -rtinit(struct ifaddr *ifa, int cmd, int flags) +#define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ +static inline int +rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { struct sockaddr *dst; struct sockaddr *netmask; - struct mbuf *m = NULL; struct rtentry *rt = NULL; struct rt_addrinfo info; - int error=0; + int error = 0; + int startfib, endfib; + char tempbuf[_SOCKADDR_TMPSIZE]; + int didwork = 0; + int a_failure = 0; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; @@ -1260,126 +1449,190 @@ rtinit(struct ifaddr *ifa, int cmd, int flags) dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } + if ( dst->sa_family != AF_INET) + fibnum = 0; + if (fibnum == -1) { + startfib = 0; + endfib = rt_numfibs - 1; + } else { + KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); + startfib = fibnum; + endfib = fibnum; + } if (dst->sa_len == 0) return(EINVAL); /* - * If it's a delete, check that if it exists, it's on the correct - * interface or we might scrub a route to another ifa which would + * If it's a delete, check that if it exists, + * it's on the correct interface or we might scrub + * a route to another ifa which would * be confusing at best and possibly worse. */ if (cmd == RTM_DELETE) { - struct sockaddr *deldst; - struct radix_node_head *rnh; - struct radix_node *rn; - /* * It's a delete, so it should already exist.. * If it's a net, mask off the host bits * (Assuming we have a mask) + * XXX this is kinda inet specific.. */ if (netmask != NULL) { - m = m_get(M_DONTWAIT, MT_SONAME); - if (m == NULL) - return(ENOBUFS); - deldst = mtod(m, struct sockaddr *); - rt_maskedcopy(dst, deldst, netmask); - dst = deldst; + rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); + dst = (struct sockaddr *)tempbuf; } - /* - * Look up an rtentry that is in the routing tree and - * contains the correct info. - */ - if ((rnh = rt_tables[dst->sa_family]) == NULL) - goto bad; - RADIX_NODE_HEAD_LOCK(rnh); + } + /* + * Now go through all the requested tables (fibs) and do the + * requested action. Realistically, this will either be fib 0 + * for protocols that don't do multiple tables or all the + * tables for those that do. XXX For this version only AF_INET. + * When that changes code should be refactored to protocol + * independent parts and protocol dependent parts. + */ + for ( fibnum = startfib; fibnum <= endfib; fibnum++) { + if (cmd == RTM_DELETE) { + struct radix_node_head *rnh; + struct radix_node *rn; + /* + * Look up an rtentry that is in the routing tree and + * contains the correct info. + */ + if ((rnh = rt_tables[fibnum][dst->sa_family]) == NULL) + /* this table doesn't exist but others might */ + continue; + RADIX_NODE_HEAD_LOCK(rnh); #ifdef RADIX_MPATH - if (rn_mpath_capable(rnh)) { + if (rn_mpath_capable(rnh)) { - rn = rnh->rnh_matchaddr(dst, rnh); - if (rn == NULL) - error = ESRCH; - else { - rt = RNTORT(rn); - /* - * for interface route the rt->rt_gateway is - * sockaddr_intf for cloning ARP entries, so - * rt_mpath_matchgate must use the interface - * address - */ - rt = rt_mpath_matchgate(rt, ifa->ifa_addr); - if (!rt) + rn = rnh->rnh_matchaddr(dst, rnh); + if (rn == NULL) error = ESRCH; + else { + rt = RNTORT(rn); + /* + * for interface route the + * rt->rt_gateway is sockaddr_intf + * for cloning ARP entries, so + * rt_mpath_matchgate must use the + * interface address + */ + rt = rt_mpath_matchgate(rt, + ifa->ifa_addr); + if (!rt) + error = ESRCH; + } } - } - else + else #endif - error = ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL || - (rn->rn_flags & RNF_ROOT) || - RNTORT(rn)->rt_ifa != ifa || - !sa_equal((struct sockaddr *)rn->rn_key, dst)); - - RADIX_NODE_HEAD_UNLOCK(rnh); - if (error) { -bad: - if (m) - (void) m_free(m); - return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + rn = rnh->rnh_lookup(dst, netmask, rnh); + error = (rn == NULL || + (rn->rn_flags & RNF_ROOT) || + RNTORT(rn)->rt_ifa != ifa || + !sa_equal((struct sockaddr *)rn->rn_key, dst)); + RADIX_NODE_HEAD_UNLOCK(rnh); + if (error) { + /* this is only an error if bad on ALL tables */ + continue; + } } - } - /* - * Do the actual request - */ - bzero((caddr_t)&info, sizeof(info)); - info.rti_ifa = ifa; - info.rti_flags = flags | ifa->ifa_flags; - info.rti_info[RTAX_DST] = dst; - info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; - info.rti_info[RTAX_NETMASK] = netmask; - error = rtrequest1(cmd, &info, &rt); - if (error == 0 && rt != NULL) { /* - * notify any listening routing agents of the change + * Do the actual request */ - RT_LOCK(rt); + bzero((caddr_t)&info, sizeof(info)); + info.rti_ifa = ifa; + info.rti_flags = flags | ifa->ifa_flags; + info.rti_info[RTAX_DST] = dst; + info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; + info.rti_info[RTAX_NETMASK] = netmask; + error = rtrequest1_fib(cmd, &info, &rt, fibnum); + if (error == 0 && rt != NULL) { + /* + * notify any listening routing agents of the change + */ + RT_LOCK(rt); #ifdef RADIX_MPATH - /* - * in case address alias finds the first address - * e.g. ifconfig bge0 192.103.54.246/24 - * e.g. ifconfig bge0 192.103.54.247/24 - * the address set in the route is 192.103.54.246 - * so we need to replace it with 192.103.54.247 - */ - if (memcmp(rt->rt_ifa->ifa_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len)) { - IFAFREE(rt->rt_ifa); - IFAREF(ifa); - rt->rt_ifp = ifa->ifa_ifp; - rt->rt_ifa = ifa; - } -#endif - rt_newaddrmsg(cmd, ifa, error, rt); - if (cmd == RTM_DELETE) { /* - * If we are deleting, and we found an entry, then - * it's been removed from the tree.. now throw it away. + * in case address alias finds the first address + * e.g. ifconfig bge0 192.103.54.246/24 + * e.g. ifconfig bge0 192.103.54.247/24 + * the address set in the route is 192.103.54.246 + * so we need to replace it with 192.103.54.247 */ - RTFREE_LOCKED(rt); - } else { - if (cmd == RTM_ADD) { + if (memcmp(rt->rt_ifa->ifa_addr, + ifa->ifa_addr, ifa->ifa_addr->sa_len)) { + IFAFREE(rt->rt_ifa); + IFAREF(ifa); + rt->rt_ifp = ifa->ifa_ifp; + rt->rt_ifa = ifa; + } +#endif + rt_newaddrmsg(cmd, ifa, error, rt); + if (cmd == RTM_DELETE) { /* - * We just wanted to add it.. we don't actually - * need a reference. + * If we are deleting, and we found an entry, + * then it's been removed from the tree.. + * now throw it away. */ - RT_REMREF(rt); + RTFREE_LOCKED(rt); + } else { + if (cmd == RTM_ADD) { + /* + * We just wanted to add it.. + * we don't actually need a reference. + */ + RT_REMREF(rt); + } + RT_UNLOCK(rt); } - RT_UNLOCK(rt); + didwork = 1; + } + if (error) + a_failure = error; + } + if (cmd == RTM_DELETE) { + if (didwork) { + error = 0; + } else { + /* we only give an error if it wasn't in any table */ + error = ((flags & RTF_HOST) ? + EHOSTUNREACH : ENETUNREACH); + } + } else { + if (a_failure) { + /* return an error if any of them failed */ + error = a_failure; } } - if (m) - (void) m_free(m); return (error); } +/* special one for inet internal use. may not use. */ +int +rtinit_fib(struct ifaddr *ifa, int cmd, int flags) +{ + return (rtinit1(ifa, cmd, flags, -1)); +} + +/* + * Set up a routing table entry, normally + * for an interface. + */ +int +rtinit(struct ifaddr *ifa, int cmd, int flags) +{ + struct sockaddr *dst; + int fib = 0; + + if (flags & RTF_HOST) { + dst = ifa->ifa_dstaddr; + } else { + dst = ifa->ifa_addr; + } + + if (dst->sa_family == AF_INET) + fib = -1; + return (rtinit1(ifa, cmd, flags, fib)); +} + /* * rt_check() is invoked on each layer 2 output path, prior to * encapsulating outbound packets. @@ -1399,6 +1652,7 @@ bad: * final destination if directly reachable); * *lrt0 points to the cached route to the final destination; * *lrt is not meaningful; + * fibnum is the index to the correct network fib for this packet * * === Operation === * If the route is marked down try to find a new route. If the route @@ -1415,6 +1669,13 @@ bad: int rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst) { + return (rt_check_fib(lrt, lrt0, dst, 0)); +} + +int +rt_check_fib(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst, + u_int fibnum) +{ struct rtentry *rt; struct rtentry *rt0; int error; @@ -1426,7 +1687,7 @@ rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst) RT_LOCK(rt); if ((rt->rt_flags & RTF_UP) == 0) { RT_UNLOCK(rt); - rt = rtalloc1(dst, 1, 0UL); + rt = rtalloc1_fib(dst, 1, 0UL, fibnum); if (rt != NULL) { RT_REMREF(rt); /* XXX what about if change? */ @@ -1446,7 +1707,8 @@ rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst) rt0->rt_gwroute = NULL; lookup: RT_UNLOCK(rt0); - rt = rtalloc1(rt->rt_gateway, 1, 0UL); +/* XXX MRT link level looked up in table 0 */ + rt = rtalloc1_fib(rt->rt_gateway, 1, 0UL, 0); if (rt == rt0) { RT_REMREF(rt0); RT_UNLOCK(rt0); diff --git a/sys/net/route.h b/sys/net/route.h index e9f4980..7b8c460 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -82,6 +82,10 @@ struct rt_metrics { #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) +#define RT_MAXFIBS 16 +extern u_int rt_numfibs; /* number fo usable routing tables */ +extern u_int tunnel_fib; /* tunnels use these */ +extern u_int fwd_fib; /* packets being forwarded use these routes */ /* * XXX kernel function pointer `rt_output' is visible to applications. */ @@ -120,6 +124,7 @@ struct rtentry { caddr_t rt_llinfo; /* pointer to link level info cache */ struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ struct rtentry *rt_parent; /* cloning parent of this route */ + u_int rt_fibnum; /* which FIB */ #ifdef _KERNEL /* XXX ugly, user apps use this definition but don't have a mtx def */ struct mtx rt_mtx; /* mutex for routing entry */ @@ -325,11 +330,10 @@ struct rt_addrinfo { RTFREE_LOCKED(_rt); \ } while (0) -extern struct radix_node_head *rt_tables[AF_MAX+1]; +extern struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1]; struct ifmultiaddr; -int rt_getifa(struct rt_addrinfo *); void rt_ieee80211msg(struct ifnet *, int, void *, size_t); void rt_ifannouncemsg(struct ifnet *, int); void rt_ifmsg(struct ifnet *); @@ -350,11 +354,15 @@ int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); * RTFREE() uses an unlocked entry. */ +int rtexpunge(struct rtentry *); +void rtfree(struct rtentry *); + +/* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */ +/* Thes are used by old code not yet converted to use multiple FIBS */ +int rt_getifa(struct rt_addrinfo *); void rtalloc_ign(struct route *ro, u_long ignflags); void rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */ struct rtentry *rtalloc1(struct sockaddr *, int, u_long); -int rtexpunge(struct rtentry *); -void rtfree(struct rtentry *); int rtinit(struct ifaddr *, int, int); int rtioctl(u_long, caddr_t); void rtredirect(struct sockaddr *, struct sockaddr *, @@ -364,6 +372,25 @@ int rtrequest(int, struct sockaddr *, int rtrequest1(int, struct rt_addrinfo *, struct rtentry **); int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *); +/* defaults to "all" FIBs */ +int rtinit_fib(struct ifaddr *, int, int); + +/* XXX MRT NEW VERSIONS THAT USE FIBs + * For now the protocol indepedent versions are the same as the AF_INET ones + * but this will change.. + */ +int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum); +void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum); +void rtalloc_fib(struct route *ro, u_int fibnum); +struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int); +int rtioctl_fib(u_long, caddr_t, u_int); +void rtredirect_fib(struct sockaddr *, struct sockaddr *, + struct sockaddr *, int, struct sockaddr *, u_int); +int rtrequest_fib(int, struct sockaddr *, + struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); +int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int); +int rt_check_fib(struct rtentry **, struct rtentry **, struct sockaddr *, u_int); + #include <sys/eventhandler.h> typedef void (*rtevent_arp_update_fn)(void *, struct rtentry *, uint8_t *, struct sockaddr *); typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *); diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 5ea93d3..9511035 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -182,6 +182,7 @@ rts_attach(struct socket *so, int proto, struct thread *td) */ s = splnet(); so->so_pcb = (caddr_t)rp; + so->so_fibnum = td->td_proc->p_fibnum; error = raw_attach(so, proto); rp = sotorawcb(so); if (error) { @@ -387,7 +388,8 @@ route_output(struct mbuf *m, struct socket *so) if (info.rti_info[RTAX_GATEWAY] == NULL) senderr(EINVAL); saved_nrt = NULL; - error = rtrequest1(RTM_ADD, &info, &saved_nrt); + error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt, + so->so_fibnum); if (error == 0 && saved_nrt) { RT_LOCK(saved_nrt); rt_setmetrics(rtm->rtm_inits, @@ -401,7 +403,8 @@ route_output(struct mbuf *m, struct socket *so) case RTM_DELETE: saved_nrt = NULL; - error = rtrequest1(RTM_DELETE, &info, &saved_nrt); + error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, + so->so_fibnum); if (error == 0) { RT_LOCK(saved_nrt); rt = saved_nrt; @@ -412,7 +415,7 @@ route_output(struct mbuf *m, struct socket *so) case RTM_GET: case RTM_CHANGE: case RTM_LOCK: - rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family]; + rnh = rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family]; if (rnh == NULL) senderr(EAFNOSUPPORT); RADIX_NODE_HEAD_LOCK(rnh); @@ -530,7 +533,8 @@ route_output(struct mbuf *m, struct socket *so) !sa_equal(info.rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) { RT_UNLOCK(rt); - if ((error = rt_getifa(&info)) != 0) + if ((error = rt_getifa_fib(&info, + rt->rt_fibnum)) != 0) senderr(error); RT_LOCK(rt); } @@ -1278,7 +1282,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS) } else /* dump only one table */ i = lim = af; for (error = 0; error == 0 && i <= lim; i++) - if ((rnh = rt_tables[i]) != NULL) { + if ((rnh = rt_tables[curthread->td_proc->p_fibnum][i]) != NULL) { RADIX_NODE_HEAD_LOCK(rnh); error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w); diff --git a/sys/netatalk/at_extern.h b/sys/netatalk/at_extern.h index d81a877..cf11017 100644 --- a/sys/netatalk/at_extern.h +++ b/sys/netatalk/at_extern.h @@ -55,6 +55,7 @@ u_short at_cksum(struct mbuf *m, int skip); int at_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td); struct at_ifaddr *at_ifawithnet(struct sockaddr_at *); +int at_inithead(void**, int); void ddp_init(void); int ddp_output(struct mbuf *m, struct socket *so); int ddp_route(struct mbuf *m, struct route *ro); diff --git a/sys/netatalk/at_proto.c b/sys/netatalk/at_proto.c index 39b0dd6..f62df59 100644 --- a/sys/netatalk/at_proto.c +++ b/sys/netatalk/at_proto.c @@ -56,7 +56,7 @@ static struct domain atalkdomain = { .dom_name = "appletalk", .dom_protosw = atalksw, .dom_protoswNPROTOSW = &atalksw[sizeof(atalksw)/sizeof(atalksw[0])], - .dom_rtattach = rn_inithead, + .dom_rtattach = at_inithead, .dom_rtoffset = offsetof(struct sockaddr_at, sat_addr) << 3, .dom_maxrtkey = sizeof(struct sockaddr_at), }; diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c index 0f981ff..51a87dd 100644 --- a/sys/netgraph/netflow/netflow.c +++ b/sys/netgraph/netflow/netflow.c @@ -269,7 +269,8 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = fle->f.r.r_dst; - rt = rtalloc1((struct sockaddr *)&sin, 0, RTF_CLONING); + /* XXX MRT 0 as a default.. need the m here to get fib */ + rt = rtalloc1_fib((struct sockaddr *)&sin, 0, RTF_CLONING, 0); if (rt != NULL) { fle->f.fle_o_ifx = rt->rt_ifp->if_index; @@ -293,7 +294,8 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = fle->f.r.r_src; - rt = rtalloc1((struct sockaddr *)&sin, 0, RTF_CLONING); + /* XXX MRT 0 as a default revisit. need the mbuf for fib*/ + rt = rtalloc1_fib((struct sockaddr *)&sin, 0, RTF_CLONING, 0); if (rt != NULL) { if (rt_mask(rt)) fle->f.src_mask = bitcount32(((struct sockaddr_in *) diff --git a/sys/netinet/if_atm.c b/sys/netinet/if_atm.c index d19dea8..065f0c4 100644 --- a/sys/netinet/if_atm.c +++ b/sys/netinet/if_atm.c @@ -327,7 +327,7 @@ atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst, } if (rt == NULL) { - rt = RTALLOC1(dst, 0); + rt = RTALLOC1(dst, 0); /* link level on table 0 XXX MRT */ if (rt == NULL) goto bad; /* failed */ RT_REMREF(rt); /* don't keep LL references */ diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index b1133c9..6939dbb 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -116,7 +116,7 @@ static void arprequest(struct ifnet *, static void arpintr(struct mbuf *); static void arptimer(void *); static struct rtentry - *arplookup(u_long, int, int); + *arplookup(u_long, int, int, int); #ifdef INET static void in_arpinput(struct mbuf *); #endif @@ -138,7 +138,8 @@ arptimer(void *arg) */ RT_UNLOCK(rt); - rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL); + in_rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL, + rt->rt_fibnum); } /* @@ -362,6 +363,7 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, struct rtentry *rt = NULL; struct sockaddr_dl *sdl; int error; + int fibnum = 0; if (m) { if (m->m_flags & M_BCAST) { @@ -375,10 +377,14 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); return (0); } + fibnum = M_GETFIB(m); } if (rt0 != NULL) { - error = rt_check(&rt, &rt0, dst); + /* Look for a cached arp (ll) entry. */ + if (m == NULL) + fibnum = rt0->rt_fibnum; + error = in_rt_check(&rt, &rt0, dst, fibnum); if (error) { m_freem(m); return error; @@ -389,10 +395,14 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, } if (la == NULL) { /* - * We enter this block in case if rt0 was NULL, - * or if rt found by rt_check() didn't have llinfo. + * We enter this block if rt0 was NULL, + * or if rt found by in_rt_check() didn't have llinfo. + * we should get a cloned route, which since it should + * come from the local interface should have a ll entry. + * if may be incoplete but that's ok. + * XXXMRT if we haven't found a fibnum is that OK? */ - rt = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0); + rt = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0, fibnum); if (rt == NULL) { log(LOG_DEBUG, "arpresolve: can't allocate route for %s\n", @@ -582,6 +592,9 @@ in_arpinput(struct mbuf *m) int op, rif_len; int req_len; int bridged = 0; + u_int fibnum; + u_int goodfib = 0; + int firstpass = 1; #ifdef DEV_CARP int carp_match = 0; #endif @@ -674,133 +687,181 @@ match: } if (ifp->if_flags & IFF_STATICARP) goto reply; - rt = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0); - if (rt != NULL) { - sin.sin_addr.s_addr = isaddr.s_addr; - EVENTHANDLER_INVOKE(route_arp_update_event, rt, - ar_sha(ah), (struct sockaddr *)&sin); + /* + * We look for any FIBs that has this address to find + * the interface etc. + * For sanity checks that are FIB independent we abort the loop. + */ + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rt = arplookup(isaddr.s_addr, + itaddr.s_addr == myaddr.s_addr, 0, fibnum); + if (rt == NULL) + continue; + + sdl = SDL(rt->rt_gateway); + /* Only call this once */ + if (firstpass) { + sin.sin_addr.s_addr = isaddr.s_addr; + EVENTHANDLER_INVOKE(route_arp_update_event, rt, + ar_sha(ah), (struct sockaddr *)&sin); + } la = (struct llinfo_arp *)rt->rt_llinfo; if (la == NULL) { RT_UNLOCK(rt); - goto reply; + continue; } - } else - goto reply; - /* The following is not an error when doing bridging. */ - if (!bridged && rt->rt_ifp != ifp + if (firstpass) { + /* The following is not an error when doing bridging. */ + if (!bridged && rt->rt_ifp != ifp #ifdef DEV_CARP - && (ifp->if_type != IFT_CARP || !carp_match) + && (ifp->if_type != IFT_CARP || !carp_match) #endif - ) { - if (log_arp_wrong_iface) - log(LOG_ERR, "arp: %s is on %s but got reply from %*D on %s\n", - inet_ntoa(isaddr), - rt->rt_ifp->if_xname, - ifp->if_addrlen, (u_char *)ar_sha(ah), ":", - ifp->if_xname); - RT_UNLOCK(rt); - goto reply; - } - sdl = SDL(rt->rt_gateway); - if (sdl->sdl_alen && - bcmp(ar_sha(ah), LLADDR(sdl), sdl->sdl_alen)) { - if (rt->rt_expire) { - if (log_arp_movements) - log(LOG_INFO, "arp: %s moved from %*D to %*D on %s\n", - inet_ntoa(isaddr), - ifp->if_addrlen, (u_char *)LLADDR(sdl), ":", - ifp->if_addrlen, (u_char *)ar_sha(ah), ":", - ifp->if_xname); - } else { - RT_UNLOCK(rt); - if (log_arp_permanent_modify) - log(LOG_ERR, "arp: %*D attempts to modify " - "permanent entry for %s on %s\n", - ifp->if_addrlen, (u_char *)ar_sha(ah), ":", - inet_ntoa(isaddr), ifp->if_xname); - goto reply; - } - } - /* - * sanity check for the address length. - * XXX this does not work for protocols with variable address - * length. -is - */ - if (sdl->sdl_alen && - sdl->sdl_alen != ah->ar_hln) { - log(LOG_WARNING, - "arp from %*D: new addr len %d, was %d", - ifp->if_addrlen, (u_char *) ar_sha(ah), ":", - ah->ar_hln, sdl->sdl_alen); - } - if (ifp->if_addrlen != ah->ar_hln) { - log(LOG_WARNING, - "arp from %*D: addr len: new %d, i/f %d (ignored)", - ifp->if_addrlen, (u_char *) ar_sha(ah), ":", - ah->ar_hln, ifp->if_addrlen); - RT_UNLOCK(rt); - goto reply; - } - (void)memcpy(LLADDR(sdl), ar_sha(ah), - sdl->sdl_alen = ah->ar_hln); - /* - * If we receive an arp from a token-ring station over - * a token-ring nic then try to save the source - * routing info. - */ - if (ifp->if_type == IFT_ISO88025) { - struct iso88025_header *th = NULL; - struct iso88025_sockaddr_dl_data *trld; - - th = (struct iso88025_header *)m->m_pkthdr.header; - trld = SDL_ISO88025(sdl); - rif_len = TR_RCF_RIFLEN(th->rcf); - if ((th->iso88025_shost[0] & TR_RII) && - (rif_len > 2)) { - trld->trld_rcf = th->rcf; - trld->trld_rcf ^= htons(TR_RCF_DIR); - memcpy(trld->trld_route, th->rd, rif_len - 2); - trld->trld_rcf &= ~htons(TR_RCF_BCST_MASK); + ) { + if (log_arp_wrong_iface) + log(LOG_ERR, "arp: %s is on %s " + "but got reply from %*D " + "on %s\n", + inet_ntoa(isaddr), + rt->rt_ifp->if_xname, + ifp->if_addrlen, + (u_char *)ar_sha(ah), ":", + ifp->if_xname); + RT_UNLOCK(rt); + break; + } + if (sdl->sdl_alen && + bcmp(ar_sha(ah), LLADDR(sdl), sdl->sdl_alen)) { + if (rt->rt_expire) { + if (log_arp_movements) + log(LOG_INFO, + "arp: %s moved from %*D to %*D " + "on %s\n", + inet_ntoa(isaddr), + ifp->if_addrlen, + (u_char *)LLADDR(sdl), ":", + ifp->if_addrlen, + (u_char *)ar_sha(ah), ":", + ifp->if_xname); + } else { + RT_UNLOCK(rt); + if (log_arp_permanent_modify) + log(LOG_ERR, + "arp: %*D attempts to " + "modify permanent entry " + "for %s on %s\n", + ifp->if_addrlen, + (u_char *)ar_sha(ah), ":", + inet_ntoa(isaddr), + ifp->if_xname); + break; + } + } /* - * Set up source routing information for - * reply packet (XXX) + * sanity check for the address length. + * XXX this does not work for protocols + * with variable address length. -is */ - m->m_data -= rif_len; - m->m_len += rif_len; - m->m_pkthdr.len += rif_len; - } else { - th->iso88025_shost[0] &= ~TR_RII; - trld->trld_rcf = 0; + if (sdl->sdl_alen && + sdl->sdl_alen != ah->ar_hln) { + log(LOG_WARNING, + "arp from %*D: new addr len %d, was %d", + ifp->if_addrlen, (u_char *) ar_sha(ah), + ":", ah->ar_hln, sdl->sdl_alen); + } + if (ifp->if_addrlen != ah->ar_hln) { + log(LOG_WARNING, + "arp from %*D: addr len: " + "new %d, i/f %d (ignored)", + ifp->if_addrlen, (u_char *) ar_sha(ah), + ":", ah->ar_hln, ifp->if_addrlen); + RT_UNLOCK(rt); + break; + } + firstpass = 0; + goodfib = fibnum; } - m->m_data -= 8; - m->m_len += 8; - m->m_pkthdr.len += 8; - th->rcf = trld->trld_rcf; - } - if (rt->rt_expire) { - rt->rt_expire = time_uptime + arpt_keep; - callout_reset(&la->la_timer, hz * arpt_keep, arptimer, rt); - } - la->la_asked = 0; - la->la_preempt = arp_maxtries; - hold = la->la_hold; - la->la_hold = NULL; - RT_UNLOCK(rt); - if (hold != NULL) - (*ifp->if_output)(ifp, hold, rt_key(rt), rt); + /* Copy in the information received. */ + (void)memcpy(LLADDR(sdl), ar_sha(ah), + sdl->sdl_alen = ah->ar_hln); + /* + * If we receive an arp from a token-ring station over + * a token-ring nic then try to save the source routing info. + * XXXMRT Only minimal Token Ring support for MRT. + * Only do this on the first pass as if modifies the mbuf. + */ + if (ifp->if_type == IFT_ISO88025) { + struct iso88025_header *th = NULL; + struct iso88025_sockaddr_dl_data *trld; + + /* force the fib loop to end after this pass */ + fibnum = rt_numfibs - 1; + + th = (struct iso88025_header *)m->m_pkthdr.header; + trld = SDL_ISO88025(sdl); + rif_len = TR_RCF_RIFLEN(th->rcf); + if ((th->iso88025_shost[0] & TR_RII) && + (rif_len > 2)) { + trld->trld_rcf = th->rcf; + trld->trld_rcf ^= htons(TR_RCF_DIR); + memcpy(trld->trld_route, th->rd, rif_len - 2); + trld->trld_rcf &= ~htons(TR_RCF_BCST_MASK); + /* + * Set up source routing information for + * reply packet (XXX) + */ + m->m_data -= rif_len; + m->m_len += rif_len; + m->m_pkthdr.len += rif_len; + } else { + th->iso88025_shost[0] &= ~TR_RII; + trld->trld_rcf = 0; + } + m->m_data -= 8; + m->m_len += 8; + m->m_pkthdr.len += 8; + th->rcf = trld->trld_rcf; + } + + if (rt->rt_expire) { + rt->rt_expire = time_uptime + arpt_keep; + callout_reset(&la->la_timer, hz * arpt_keep, + arptimer, rt); + } + la->la_asked = 0; + la->la_preempt = arp_maxtries; + hold = la->la_hold; + la->la_hold = NULL; + RT_UNLOCK(rt); + if (hold != NULL) + (*ifp->if_output)(ifp, hold, rt_key(rt), rt); + } /* end of FIB loop */ reply: + + /* + * Decide if we have to respond to something. + */ if (op != ARPOP_REQUEST) goto drop; if (itaddr.s_addr == myaddr.s_addr) { - /* I am the target */ + /* Shortcut.. the receiving interface is the target. */ (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); } else { - rt = arplookup(itaddr.s_addr, 0, SIN_PROXY); + /* It's not asking for our address. But it still may + * be something we should answer. + * + * XXX MRT + * We assume that link level info is independent of + * the table used and so we use whichever we can and don't + * have a better option. + */ + /* Have we been asked to proxy for the target. */ + rt = arplookup(itaddr.s_addr, 0, SIN_PROXY, goodfib); if (rt == NULL) { + /* Nope, only intersted now if proxying everything. */ struct sockaddr_in sin; if (!arp_proxyall) @@ -811,7 +872,8 @@ reply: sin.sin_len = sizeof sin; sin.sin_addr = itaddr; - rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); + /* XXX MRT use table 0 for arp reply */ + rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0); if (!rt) goto drop; /* @@ -835,7 +897,8 @@ reply: */ sin.sin_addr = isaddr; - rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); + /* XXX MRT use table 0 for arp checks */ + rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0); if (!rt) goto drop; if (rt->rt_ifp != ifp) { @@ -905,7 +968,7 @@ drop: * Lookup or enter a new address in arptab. */ static struct rtentry * -arplookup(u_long addr, int create, int proxy) +arplookup(u_long addr, int create, int proxy, int fibnum) { struct rtentry *rt; struct sockaddr_inarp sin; @@ -917,7 +980,7 @@ arplookup(u_long addr, int create, int proxy) sin.sin_addr.s_addr = addr; if (proxy) sin.sin_other = SIN_PROXY; - rt = rtalloc1((struct sockaddr *)&sin, create, 0UL); + rt = in_rtalloc1((struct sockaddr *)&sin, create, 0UL, fibnum); if (rt == 0) return (0); diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c index 69a34f8..55b4ec7 100644 --- a/sys/netinet/in_gif.c +++ b/sys/netinet/in_gif.c @@ -191,6 +191,8 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) } bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip)); + M_SETFIB(m, sc->gif_fibnum); + if (dst->sin_family != sin_dst->sin_family || dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) { /* cache route doesn't match */ @@ -208,7 +210,7 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) } if (sc->gif_ro.ro_rt == NULL) { - rtalloc_ign(&sc->gif_ro, 0); + in_rtalloc_ign(&sc->gif_ro, 0, sc->gif_fibnum); if (sc->gif_ro.ro_rt == NULL) { m_freem(m); return ENETUNREACH; @@ -368,7 +370,9 @@ gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp) sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = ip->ip_src; - rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); + /* XXX MRT check for the interface we would use on output */ + rt = in_rtalloc1((struct sockaddr *)&sin, 0, + 0UL, sc->gif_fibnum); if (!rt || rt->rt_ifp != ifp) { #if 0 log(LOG_WARNING, "%s: packet from 0x%x dropped " diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c index be2208a..9f37f33 100644 --- a/sys/netinet/in_mcast.c +++ b/sys/netinet/in_mcast.c @@ -1025,7 +1025,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt) ro.ro_rt = NULL; *(struct sockaddr_in *)&ro.ro_dst = gsa->sin; - rtalloc_ign(&ro, RTF_CLONING); + in_rtalloc_ign(&ro, RTF_CLONING, + inp->inp_inc.inc_fibnum); if (ro.ro_rt != NULL) { ifp = ro.ro_rt->rt_ifp; KASSERT(ifp != NULL, ("%s: null ifp", diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 9b0b6a5..a9702c5 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -186,6 +186,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) bzero(inp, inp_zero_size); inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; + inp->inp_inc.inc_fibnum = so->so_fibnum; #ifdef MAC error = mac_inpcb_init(inp, M_NOWAIT); if (error != 0) @@ -605,7 +606,7 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, * Find out route to destination */ if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) - ia = ip_rtaddr(faddr); + ia = ip_rtaddr(faddr, inp->inp_inc.inc_fibnum); /* * If we found a route, use the address corresponding to * the outgoing interface. diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index afb4dd2..6e5c92e 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -101,7 +101,7 @@ struct in_endpoints { struct in_conninfo { u_int8_t inc_flags; u_int8_t inc_len; - u_int16_t inc_pad; /* XXX alignment for in_endpoints */ + u_int16_t inc_fibnum; /* XXX was pad, 16 bits is plenty */ /* protocol dependent part */ struct in_endpoints inc_ie; }; diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c index 8a5f978..aabf57e 100644 --- a/sys/netinet/in_rmx.c +++ b/sys/netinet/in_rmx.c @@ -110,7 +110,8 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, * Find out if it is because of an * ARP entry and delete it if so. */ - rt2 = rtalloc1((struct sockaddr *)sin, 0, RTF_CLONING); + rt2 = in_rtalloc1((struct sockaddr *)sin, 0, + RTF_CLONING, rt->rt_fibnum); if (rt2) { if (rt2->rt_flags & RTF_LLINFO && rt2->rt_flags & RTF_HOST && @@ -225,10 +226,10 @@ in_rtqkill(struct radix_node *rn, void *rock) if (rt->rt_refcnt > 0) panic("rtqkill route really not free"); - err = rtrequest(RTM_DELETE, + err = in_rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), - rt->rt_flags, 0); + rt->rt_flags, 0, rt->rt_fibnum); if (err) { log(LOG_WARNING, "in_rtqkill: error %d\n", err); } else { @@ -253,12 +254,31 @@ in_rtqkill(struct radix_node *rn, void *rock) static int rtq_timeout = RTQ_TIMEOUT; static struct callout rtq_timer; +static void in_rtqtimo_one(void *rock); + static void in_rtqtimo(void *rock) { + int fibnum; + void *newrock; + struct timeval atv; + + KASSERT((rock == (void *)rt_tables[0][AF_INET]), + ("in_rtqtimo: unexpected arg")); + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + if ((newrock = rt_tables[fibnum][AF_INET]) != NULL) + in_rtqtimo_one(newrock); + } + atv.tv_usec = 0; + atv.tv_sec = rtq_timeout; + callout_reset(&rtq_timer, tvtohz(&atv), in_rtqtimo, rock); +} + +static void +in_rtqtimo_one(void *rock) +{ struct radix_node_head *rnh = rock; struct rtqk_arg arg; - struct timeval atv; static time_t last_adjusted_timeout = 0; arg.found = arg.killed = 0; @@ -297,27 +317,29 @@ in_rtqtimo(void *rock) RADIX_NODE_HEAD_UNLOCK(rnh); } - atv.tv_usec = 0; - atv.tv_sec = arg.nextstop - time_uptime; - callout_reset(&rtq_timer, tvtohz(&atv), in_rtqtimo, rock); } void in_rtqdrain(void) { - struct radix_node_head *rnh = rt_tables[AF_INET]; + struct radix_node_head *rnh; struct rtqk_arg arg; + int fibnum; - arg.found = arg.killed = 0; - arg.rnh = rnh; - arg.nextstop = 0; - arg.draining = 1; - arg.updating = 0; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, in_rtqkill, &arg); - RADIX_NODE_HEAD_UNLOCK(rnh); + for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rnh = rt_tables[fibnum][AF_INET]; + arg.found = arg.killed = 0; + arg.rnh = rnh; + arg.nextstop = 0; + arg.draining = 1; + arg.updating = 0; + RADIX_NODE_HEAD_LOCK(rnh); + rnh->rnh_walktree(rnh, in_rtqkill, &arg); + RADIX_NODE_HEAD_UNLOCK(rnh); + } } +static int _in_rt_was_here; /* * Initialize our routing tree. */ @@ -326,18 +348,29 @@ in_inithead(void **head, int off) { struct radix_node_head *rnh; - if (!rn_inithead(head, off)) + /* XXX MRT + * This can be called from vfs_export.c too in which case 'off' + * will be 0. We know the correct value so just use that and + * return directly if it was 0. + * This is a hack that replaces an even worse hack on a bad hack + * on a bad design. After RELENG_7 this should be fixed but that + * will change the ABI, so for now do it this way. + */ + if (!rn_inithead(head, 32)) return 0; - if (head != (void **)&rt_tables[AF_INET]) /* BOGUS! */ - return 1; /* only do this for the real routing table */ + if (off == 0) /* XXX MRT see above */ + return 1; /* only do the rest for a real routing table */ rnh = *head; rnh->rnh_addaddr = in_addroute; rnh->rnh_matchaddr = in_matroute; rnh->rnh_close = in_clsroute; - callout_init(&rtq_timer, CALLOUT_MPSAFE); - in_rtqtimo(rnh); /* kick off timeout first time */ + if (_in_rt_was_here == 0 ) { + callout_init(&rtq_timer, CALLOUT_MPSAFE); + in_rtqtimo(rnh); /* kick off timeout first time */ + _in_rt_was_here = 1; + } return 1; } @@ -384,16 +417,81 @@ in_ifadown(struct ifaddr *ifa, int delete) { struct in_ifadown_arg arg; struct radix_node_head *rnh; + int fibnum; if (ifa->ifa_addr->sa_family != AF_INET) return 1; - rnh = rt_tables[AF_INET]; - arg.ifa = ifa; - arg.del = delete; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, in_ifadownkill, &arg); - RADIX_NODE_HEAD_UNLOCK(rnh); - ifa->ifa_flags &= ~IFA_ROUTE; /* XXXlocking? */ + for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rnh = rt_tables[fibnum][AF_INET]; + arg.ifa = ifa; + arg.del = delete; + RADIX_NODE_HEAD_LOCK(rnh); + rnh->rnh_walktree(rnh, in_ifadownkill, &arg); + RADIX_NODE_HEAD_UNLOCK(rnh); + ifa->ifa_flags &= ~IFA_ROUTE; /* XXXlocking? */ + } return 0; } + +/* + * inet versions of rt functions. These have fib extensions and + * for now will just reference the _fib variants. + * eventually this order will be reversed, + */ +void +in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum) +{ + rtalloc_ign_fib(ro, ignflags, fibnum); +} + +int +in_rtrequest( int req, + struct sockaddr *dst, + struct sockaddr *gateway, + struct sockaddr *netmask, + int flags, + struct rtentry **ret_nrt, + u_int fibnum) +{ + return (rtrequest_fib(req, dst, gateway, netmask, + flags, ret_nrt, fibnum)); +} + +struct rtentry * +in_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) +{ + return (rtalloc1_fib(dst, report, ignflags, fibnum)); +} + +int +in_rt_check(struct rtentry **lrt, struct rtentry **lrt0, + struct sockaddr *dst, u_int fibnum) +{ + return (rt_check_fib(lrt, lrt0, dst, fibnum)); +} + +void +in_rtredirect(struct sockaddr *dst, + struct sockaddr *gateway, + struct sockaddr *netmask, + int flags, + struct sockaddr *src, + u_int fibnum) +{ + rtredirect_fib(dst, gateway, netmask, flags, src, fibnum); +} + +void +in_rtalloc(struct route *ro, u_int fibnum) +{ + rtalloc_ign_fib(ro, 0UL, fibnum); +} + +#if 0 +int in_rt_getifa(struct rt_addrinfo *, u_int fibnum); +int in_rtioctl(u_long, caddr_t, u_int); +int in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int); +#endif + + diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 47a160a..d7f1e28 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -287,6 +287,7 @@ do { \ IN_NEXT_MULTI((step), (inm)); \ } while(0) +struct rtentry; struct route; struct ip_moptions; @@ -305,6 +306,21 @@ int in_ifadown(struct ifaddr *ifa, int); void in_ifscrub(struct ifnet *, struct in_ifaddr *); struct mbuf *ip_fastforward(struct mbuf *); +/* XXX */ +void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum); +void in_rtalloc(struct route *ro, u_int fibnum); +struct rtentry *in_rtalloc1(struct sockaddr *, int, u_long, u_int); +void in_rtredirect(struct sockaddr *, struct sockaddr *, + struct sockaddr *, int, struct sockaddr *, u_int); +int in_rtrequest(int, struct sockaddr *, + struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); +int in_rt_check(struct rtentry **, struct rtentry **, struct sockaddr *, u_int); + +#if 0 +int in_rt_getifa(struct rt_addrinfo *, u_int fibnum); +int in_rtioctl(u_long, caddr_t, u_int); +int in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int); +#endif #endif /* _KERNEL */ /* INET6 stuff */ diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 97b823f..bb8c74a 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -123,7 +123,7 @@ ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr.s_addr = dest.s_addr; - rtalloc_ign(ro, RTF_CLONING); + in_rtalloc_ign(ro, RTF_CLONING, M_GETFIB(m)); /* * Route there and interface still up? diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index b41c037..5dcdbb3 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -161,6 +161,9 @@ enum ipfw_opcodes { /* arguments (4 byte each) */ O_TAG, /* arg1=tag number */ O_TAGGED, /* arg1=tag number */ + O_SETFIB, /* arg1=FIB number */ + O_FIB, /* arg1=FIB desired fib number */ + O_LAST_OPCODE /* not an opcode! */ }; @@ -465,6 +468,7 @@ struct ipfw_flow_id { u_int32_t src_ip; u_int16_t dst_port; u_int16_t src_port; + u_int8_t fib; u_int8_t proto; u_int8_t flags; /* protocol-specific flags */ uint8_t addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */ diff --git a/sys/netinet/ip_fw2.c b/sys/netinet/ip_fw2.c index 39baa71..2346df6 100644 --- a/sys/netinet/ip_fw2.c +++ b/sys/netinet/ip_fw2.c @@ -492,7 +492,7 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) * multicast, or broadcast. */ static int -verify_path(struct in_addr src, struct ifnet *ifp) +verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) { struct route ro; struct sockaddr_in *dst; @@ -503,7 +503,7 @@ verify_path(struct in_addr src, struct ifnet *ifp) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = src; - rtalloc_ign(&ro, RTF_CLONING); + in_rtalloc_ign(&ro, RTF_CLONING, fib); if (ro.ro_rt == NULL) return 0; @@ -593,6 +593,7 @@ verify_path6(struct in6_addr *src, struct ifnet *ifp) dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(*dst); dst->sin6_addr = *src; + /* XXX MRT 0 for ipv6 at this time */ rtalloc_ign((struct route *)&ro, RTF_CLONING); if (ro.ro_rt == NULL) @@ -828,6 +829,10 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, snprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1); break; + case O_SETFIB: + snprintf(SNPARGS(action2, 0), "SetFib %d", + cmd->arg1); + break; case O_SKIPTO: snprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1); @@ -1500,6 +1505,7 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; id.proto = args->f_id.proto; id.addr_type = args->f_id.addr_type; + id.fib = M_GETFIB(args->m); if (IS_IP6_FLOW_ID (&(args->f_id))) { if (limit_mask & DYN_SRC_ADDR) @@ -1601,6 +1607,7 @@ send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, return (NULL); m->m_pkthdr.rcvif = (struct ifnet *)0; + M_SETFIB(m, id->fib); #ifdef MAC if (replyto != NULL) mac_netinet_firewall_reply(replyto, m); @@ -2200,6 +2207,7 @@ ipfw_chk(struct ip_fw_args *args) return (IP_FW_PASS); /* accept */ pktlen = m->m_pkthdr.len; + args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */ proto = args->f_id.proto = 0; /* mark f_id invalid */ /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */ @@ -2911,7 +2919,8 @@ check_body: verify_path6(&(args->f_id.src_ip6), m->m_pkthdr.rcvif) : #endif - verify_path(src_ip, m->m_pkthdr.rcvif))); + verify_path(src_ip, m->m_pkthdr.rcvif, + args->f_id.fib))); break; case O_VERSRCREACH: @@ -2922,7 +2931,7 @@ check_body: verify_path6(&(args->f_id.src_ip6), NULL) : #endif - verify_path(src_ip, NULL))); + verify_path(src_ip, NULL, args->f_id.fib))); break; case O_ANTISPOOF: @@ -2941,7 +2950,8 @@ check_body: m->m_pkthdr.rcvif) : #endif verify_path(src_ip, - m->m_pkthdr.rcvif); + m->m_pkthdr.rcvif, + args->f_id.fib); else match = 1; break; @@ -3043,6 +3053,11 @@ check_body: break; } + case O_FIB: /* try match the specified fib */ + if (args->f_id.fib == cmd->arg1) + match = 1; + break; + case O_TAGGED: { uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg : cmd->arg1; @@ -3203,7 +3218,6 @@ check_body: IP_FW_DIVERT : IP_FW_TEE; goto done; } - case O_COUNT: case O_SKIPTO: f->pcnt++; /* update stats */ @@ -3283,6 +3297,14 @@ check_body: IP_FW_NETGRAPH : IP_FW_NGTEE; goto done; + case O_SETFIB: + f->pcnt++; /* update stats */ + f->bcnt += pktlen; + f->timestamp = time_uptime; + M_SETFIB(m, cmd->arg1); + args->f_id.fib = cmd->arg1; + goto next_rule; + case O_NAT: { struct cfg_nat *t; int nat_id; @@ -3793,6 +3815,26 @@ check_ipfw_struct(struct ip_fw *rule, int size) goto bad_size; break; + case O_FIB: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; + if (cmd->arg1 >= rt_numfibs) { + printf("ipfw: invalid fib number %d\n", + cmd->arg1); + return EINVAL; + } + break; + + case O_SETFIB: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; + if (cmd->arg1 >= rt_numfibs) { + printf("ipfw: invalid fib number %d\n", + cmd->arg1); + return EINVAL; + } + goto check_action; + case O_UID: case O_GID: case O_JAIL: diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index 4f664bf..bed9536 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -227,6 +227,10 @@ stdreply: icmpelen = max(8, min(icmp_quotelen, oip->ip_len - oiphlen)); m_align(m, ICMP_MINLEN + icmplen); m->m_len = ICMP_MINLEN + icmplen; + /* XXX MRT make the outgoing packet use the same FIB + * that was associated with the incoming packet + */ + M_SETFIB(m, M_GETFIB(n)); icp = mtod(m, struct icmp *); icmpstat.icps_outhist[type]++; icp->icmp_type = type; @@ -295,6 +299,7 @@ icmp_input(struct mbuf *m, int off) int icmplen = ip->ip_len; int i, code; void (*ctlfunc)(int, struct sockaddr *, void *); + int fibnum; /* * Locate icmp structure in mbuf, and check @@ -576,10 +581,12 @@ reflect: } #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; - rtredirect((struct sockaddr *)&icmpsrc, - (struct sockaddr *)&icmpdst, - (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST, - (struct sockaddr *)&icmpgw); + for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) { + in_rtredirect((struct sockaddr *)&icmpsrc, + (struct sockaddr *)&icmpdst, + (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST, + (struct sockaddr *)&icmpgw, fibnum); + } pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc); #ifdef IPSEC key_sa_routechange((struct sockaddr *)&icmpsrc); @@ -693,7 +700,7 @@ icmp_reflect(struct mbuf *m) * When we don't have a route back to the packet source, stop here * and drop the packet. */ - ia = ip_rtaddr(ip->ip_dst); + ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m)); if (ia == NULL) { m_freem(m); icmpstat.icps_noroute++; diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 1eb9e4a..93ba871 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1198,7 +1198,7 @@ ipproto_unregister(u_char ipproto) * return internet address info of interface to be used to get there. */ struct in_ifaddr * -ip_rtaddr(struct in_addr dst) +ip_rtaddr(struct in_addr dst, u_int fibnum) { struct route sro; struct sockaddr_in *sin; @@ -1209,7 +1209,7 @@ ip_rtaddr(struct in_addr dst) sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = dst; - rtalloc_ign(&sro, RTF_CLONING); + in_rtalloc_ign(&sro, RTF_CLONING, fibnum); if (sro.ro_rt == NULL) return (NULL); @@ -1269,7 +1269,7 @@ ip_forward(struct mbuf *m, int srcrt) } #endif - ia = ip_rtaddr(ip->ip_dst); + ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m)); if (!srcrt && ia == NULL) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); return; @@ -1334,7 +1334,7 @@ ip_forward(struct mbuf *m, int srcrt) sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = ip->ip_dst; - rtalloc_ign(&ro, RTF_CLONING); + in_rtalloc_ign(&ro, RTF_CLONING, M_GETFIB(m)); rt = ro.ro_rt; @@ -1363,7 +1363,7 @@ ip_forward(struct mbuf *m, int srcrt) * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191. */ bzero(&ro, sizeof(ro)); - rtalloc_ign(&ro, RTF_CLONING); + rtalloc_ign_fib(&ro, RTF_CLONING, M_GETFIB(m)); error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index 6e0e124..d60e8bd 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -303,7 +303,7 @@ static int X_ip_mrouter_done(void); static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); static int X_ip_mrouter_set(struct socket *so, struct sockopt *m); static int X_legal_vif_num(int vif); -static int X_mrt_ioctl(int cmd, caddr_t data); +static int X_mrt_ioctl(int cmd, caddr_t data, int fibnum); static int get_sg_cnt(struct sioc_sg_req *); static int get_vif_cnt(struct sioc_vif_req *); @@ -552,7 +552,7 @@ X_ip_mrouter_get(struct socket *so, struct sockopt *sopt) * Handle ioctl commands to obtain information from the cache */ static int -X_mrt_ioctl(int cmd, caddr_t data) +X_mrt_ioctl(int cmd, caddr_t data, int fibnum) { int error = 0; diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h index c756d84..4043e44 100644 --- a/sys/netinet/ip_mroute.h +++ b/sys/netinet/ip_mroute.h @@ -359,7 +359,7 @@ struct sockopt; extern int (*ip_mrouter_set)(struct socket *, struct sockopt *); extern int (*ip_mrouter_get)(struct socket *, struct sockopt *); extern int (*ip_mrouter_done)(void); -extern int (*mrt_ioctl)(int, caddr_t); +extern int (*mrt_ioctl)(int, caddr_t, int); #endif /* _KERNEL */ diff --git a/sys/netinet/ip_options.c b/sys/netinet/ip_options.c index 72b6edd..0019f7a 100644 --- a/sys/netinet/ip_options.c +++ b/sys/netinet/ip_options.c @@ -233,7 +233,8 @@ dropit: if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL) ia = (INA)ifa_ifwithnet((SA)&ipaddr); } else - ia = ip_rtaddr(ipaddr.sin_addr); +/* XXX MRT 0 for routing */ + ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m)); if (ia == NULL) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; @@ -276,7 +277,7 @@ dropit: * same). */ if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL && - (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) { + (ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m))) == NULL) { type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; goto bad; diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 37995ef..231510a 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -230,10 +230,12 @@ again: */ if (ro->ro_rt == NULL) #ifdef RADIX_MPATH - rtalloc_mpath(ro, - ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr)); + rtalloc_mpath_fib(ro, + ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), + inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m)); #else - rtalloc_ign(ro, 0); + in_rtalloc_ign(ro, 0, + inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m)); #endif if (ro->ro_rt == NULL) { ipstat.ips_noroute++; diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index eef4e1f..436a4a0 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -209,7 +209,7 @@ int ipproto_unregister(u_char); struct mbuf * ip_reass(struct mbuf *); struct in_ifaddr * - ip_rtaddr(struct in_addr); + ip_rtaddr(struct in_addr, u_int fibnum); void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); void ip_slowtimo(void); diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 23ab1fe..2e9366f 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -95,7 +95,7 @@ int (*ip_mrouter_get)(struct socket *, struct sockopt *); int (*ip_mrouter_done)(void); int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); -int (*mrt_ioctl)(int, caddr_t); +int (*mrt_ioctl)(int, caddr_t, int); int (*legal_vif_num)(int); u_long (*ip_mcast_src)(int); diff --git a/sys/netinet/sctp_os_bsd.h b/sys/netinet/sctp_os_bsd.h index b165943..01c0fcb 100644 --- a/sys/netinet/sctp_os_bsd.h +++ b/sys/netinet/sctp_os_bsd.h @@ -399,7 +399,7 @@ typedef struct callout sctp_os_timer_t; typedef struct route sctp_route_t; typedef struct rtentry sctp_rtentry_t; -#define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL) +#define SCTP_RTALLOC(ro, vrf_id) in_rtalloc_ign((struct route *)ro, 0UL, vrf_id) /* Future zero copy wakeup/send function */ #define SCTP_ZERO_COPY_EVENT(inp, so) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index a344ae5..47763c1 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -453,6 +453,7 @@ findpcb: /* * If the INPCB does not exist then all data in the incoming * segment is discarded and an appropriate RST is sent back. + * XXX MRT Send RST using which routing table? */ if (inp == NULL) { /* diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index aaac6d6..36422197 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -471,6 +471,10 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); flags = TH_ACK; } else { + /* + * reuse the mbuf. + * XXX MRT We inherrit the FIB, which is lucky. + */ m_freem(m->m_next); m->m_next = NULL; m->m_data = (caddr_t)ipgen; @@ -1199,6 +1203,8 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) bzero(&inc, sizeof(inc)); inc.inc_flags = 0; /* IPv4 */ inc.inc_faddr = faddr; + inc.inc_fibnum = + inp->inp_inc.inc_fibnum; mtu = ntohs(icp->icmp_nextmtu); /* @@ -1595,7 +1601,7 @@ tcp_maxmtu(struct in_conninfo *inc, int *flags) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = inc->inc_faddr; - rtalloc_ign(&sro, RTF_CLONING); + in_rtalloc_ign(&sro, RTF_CLONING, inc->inc_fibnum); } if (sro.ro_rt != NULL) { ifp = sro.ro_rt->rt_ifp; diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index d5694f3..e19f095 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -671,6 +671,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) #endif inp = sotoinpcb(so); + inp->inp_inc.inc_fibnum = sc->sc_inc.inc_fibnum; + so->so_fibnum = sc->sc_inc.inc_fibnum; INP_WLOCK(inp); /* Insert new socket into PCB hash list. */ @@ -941,6 +943,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, else tcpstat.tcps_sc_completed++; +/* how do we find the inp for the new socket? */ if (sc != &scs) syncache_free(sc); return (1); @@ -1127,6 +1130,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, sc->sc_label = maclabel; #endif sc->sc_ipopts = ipopts; + sc->sc_inc.inc_fibnum = inp->inp_inc.inc_fibnum; bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo)); #ifdef INET6 if (!inc->inc_isipv6) diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index bcb634f..c24ca20 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -1708,7 +1708,8 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, rtp = &rt; } - error = rtrequest(RTM_ADD, (struct sockaddr *)&ia->ia_dstaddr, + error = rtrequest(RTM_ADD, + (struct sockaddr *)&ia->ia_dstaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&ia->ia_prefixmask, ia->ia_flags | rtflags, rtp); diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index 6962deb..29dca4f 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -821,15 +821,15 @@ in6_ifdetach(struct ifnet *ifp) /* XXX: should not fail */ return; /* XXX grab lock first to avoid LOR */ - if (rt_tables[AF_INET6] != NULL) { - RADIX_NODE_HEAD_LOCK(rt_tables[AF_INET6]); + if (rt_tables[0][AF_INET6] != NULL) { + RADIX_NODE_HEAD_LOCK(rt_tables[0][AF_INET6]); rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); if (rt) { if (rt->rt_ifp == ifp) rtexpunge(rt); RTFREE_LOCKED(rt); } - RADIX_NODE_HEAD_UNLOCK(rt_tables[AF_INET6]); + RADIX_NODE_HEAD_UNLOCK(rt_tables[0][AF_INET6]); } } diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index ea4c7ff..1c0164f 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -449,17 +449,21 @@ in6_rtqdrain(void) /* * Initialize our routing tree. + * XXX MRT When off == 0, we are being called from vfs_export.c + * so just set up their table and leave. (we know what the correct + * value should be so just use that).. FIX AFTER RELENG_7 is MFC'd + * see also comments in in_inithead() vfs_export.c and domain.h */ int in6_inithead(void **head, int off) { struct radix_node_head *rnh; - if (!rn_inithead(head, off)) - return 0; + if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3)) + return 0; /* See above */ - if (head != (void **)&rt_tables[AF_INET6]) /* BOGUS! */ - return 1; /* only do this for the real routing table */ + if (off == 0) /* See above */ + return 1; /* only do the rest for the real thing */ rnh = *head; rnh->rnh_addaddr = in6_addroute; diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index 08f9a83..39f5382 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -2019,7 +2019,8 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) void rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) { - struct radix_node_head *rnh = rt_tables[AF_INET6]; + + struct radix_node_head *rnh = rt_tables[0][AF_INET6]; int s = splnet(); /* We'll care only link-local addresses */ diff --git a/sys/netipx/ipx_proto.c b/sys/netipx/ipx_proto.c index a762ea7..56d091a 100644 --- a/sys/netipx/ipx_proto.c +++ b/sys/netipx/ipx_proto.c @@ -131,16 +131,26 @@ static struct protosw ipxsw[] = { }, }; +extern int ipx_inithead(void **, int); + static struct domain ipxdomain = { .dom_family = AF_IPX, .dom_name = "network systems", .dom_protosw = ipxsw, .dom_protoswNPROTOSW = &ipxsw[sizeof(ipxsw)/sizeof(ipxsw[0])], - .dom_rtattach = rn_inithead, + .dom_rtattach = ipx_inithead, .dom_rtoffset = 16, .dom_maxrtkey = sizeof(struct sockaddr_ipx) }; + +/* shim to adapt arguments */ +int +ipx_inithead(void **head, int offset) +{ + return rn_inithead(head, offset); +} + DOMAIN_SET(ipx); SYSCTL_NODE(_net, PF_IPX, ipx, CTLFLAG_RW, 0, "IPX/SPX"); diff --git a/sys/nfs4client/nfs4_vfsops.c b/sys/nfs4client/nfs4_vfsops.c index 6de47a9..2531f66 100644 --- a/sys/nfs4client/nfs4_vfsops.c +++ b/sys/nfs4client/nfs4_vfsops.c @@ -812,7 +812,8 @@ nfs4_do_setclientid(struct nfsmount *nmp, struct ucred *cred) #ifdef NFS4_USE_RPCCLNT ro.ro_dst = *nmp->nm_rpcclnt.rc_name; #endif - rtalloc(&ro); +/* XXX MRT NFS uses table 0 */ + in_rtalloc(&ro, 0); if (ro.ro_rt == NULL) { error = EHOSTUNREACH; goto nfsmout; diff --git a/sys/nfsclient/bootp_subr.c b/sys/nfsclient/bootp_subr.c index 44d4d97..a2c09c5 100644 --- a/sys/nfsclient/bootp_subr.c +++ b/sys/nfsclient/bootp_subr.c @@ -1137,11 +1137,12 @@ bootpc_adjust_interface(struct bootpc_ifcontext *ifctx, if (ifctx->gotgw != 0 || gctx->gotgw == 0) { clear_sinaddr(&defdst); clear_sinaddr(&defmask); - error = rtrequest(RTM_ADD, + /* XXX MRT just table 0 */ + error = rtrequest_fib(RTM_ADD, (struct sockaddr *) &defdst, (struct sockaddr *) gw, (struct sockaddr *) &defmask, - (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL); + (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL, 0); if (error != 0) { printf("bootpc_adjust_interface: " "add net route, error=%d\n", error); diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c index aac2e00..f342211 100644 --- a/sys/nfsclient/nfs_vfsops.c +++ b/sys/nfsclient/nfs_vfsops.c @@ -476,6 +476,7 @@ nfs_mountroot(struct mount *mp, struct thread *td) sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); + /* XXX MRT use table 0 for this sort of thing */ error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, diff --git a/sys/sys/domain.h b/sys/sys/domain.h index cf2e92c..c78e50b 100644 --- a/sys/sys/domain.h +++ b/sys/sys/domain.h @@ -57,6 +57,12 @@ struct domain { int (*dom_rtattach) /* initialize routing table */ (void **, int); int dom_rtoffset; /* an arg to rtattach, in bits */ + /* XXX MRT. + * rtoffset May be 0 if the domain supplies its own rtattach(), + * in which case, a 0 indicates it's being called from + * vfs_export.c (HACK) Only for AF_INET{,6} at this time. + * Temporary ABI compat hack.. fix post RELENG_7 + */ int dom_maxrtkey; /* for routing layer */ void *(*dom_ifattach)(struct ifnet *); void (*dom_ifdetach)(struct ifnet *, void *); diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index e0e685e..9229e29 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -192,6 +192,11 @@ struct mbuf { #define M_PROTO6 0x00080000 /* protocol-specific */ #define M_PROTO7 0x00100000 /* protocol-specific */ #define M_PROTO8 0x00200000 /* protocol-specific */ +/* + * For RELENG_{6,7} steal these flags for limited multiple routing table + * support. In RELENG_8 and beyond, use just one flag and a tag. + */ +#define M_FIB 0xF0000000 /* steal some bits to store fib number. */ #define M_NOTIFICATION M_PROTO5 /* SCTP notification */ @@ -206,7 +211,7 @@ struct mbuf { */ #define M_COPYFLAGS \ (M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\ - M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC) + M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB) /* * External buffer types: identify ext_buf type. @@ -277,7 +282,7 @@ struct mbstat { u_long m_mlen; /* length of data in an mbuf */ u_long m_mhlen; /* length of data in a header mbuf */ - /* Number of mbtypes (gives # elems in mbtypes[] array: */ + /* Number of mbtypes (gives # elems in mbtypes[] array) */ short m_numtypes; /* XXX: Sendfile stats should eventually move to their own struct */ @@ -957,6 +962,19 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start) m_tag_locate(m, MTAG_ABI_COMPAT, type, start)); } +/* XXX temporary FIB methods probably eventually use tags.*/ +#define M_FIBSHIFT 28 +#define M_FIBMASK 0x0F + +/* get the fib from an mbuf and if it is not set, return the default */ +#define M_GETFIB(_m) \ + ((((_m)->m_flags & M_FIB) >> M_FIBSHIFT) & M_FIBMASK) + +#define M_SETFIB(_m, _fib) do { \ + _m->m_flags &= ~M_FIB; \ + _m->m_flags |= (((_fib) << M_FIBSHIFT) & M_FIB); \ +} while (0) + #endif /* _KERNEL */ #ifdef MBUF_PROFILING diff --git a/sys/sys/proc.h b/sys/sys/proc.h index e320354..503f921 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -509,6 +509,7 @@ struct proc { struct pargs *p_args; /* (c) Process arguments. */ rlim_t p_cpulimit; /* (c) Current CPU limit in seconds. */ signed char p_nice; /* (c) Process "nice" value. */ + int p_fibnum; /* in this routing domain XXX MRT */ /* End area that is copied on creation. */ #define p_endcopy p_xstat diff --git a/sys/sys/socket.h b/sys/sys/socket.h index 713dd38..7ed9c70 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -138,6 +138,7 @@ typedef __uid_t uid_t; #define SO_LISTENQLIMIT 0x1011 /* socket's backlog limit */ #define SO_LISTENQLEN 0x1012 /* socket's complete queue length */ #define SO_LISTENINCQLEN 0x1013 /* socket's incomplete queue length */ +#define SO_SETFIB 0x1014 /* use this FIB to route */ #endif /* diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 40f3dd9..10338d8 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -146,6 +146,7 @@ struct socket { void *so_accept_filter_arg; /* saved filter args */ char *so_accept_filter_str; /* saved user args */ } *so_accf; + int so_fibnum; /* routing domain for this socket */ }; #define SB_EMPTY_FIXUP(sb) do { \ diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index 45c65ce..3b1b2c1 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -171,6 +171,7 @@ #define SYS_shmsys 171 #define SYS_freebsd6_pread 173 #define SYS_freebsd6_pwrite 174 +#define SYS_setfib 175 #define SYS_ntp_adjtime 176 #define SYS_setgid 181 #define SYS_setegid 182 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index c227b63..b0172f4 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -123,6 +123,7 @@ MIASM = \ shmsys.o \ freebsd6_pread.o \ freebsd6_pwrite.o \ + setfib.o \ ntp_adjtime.o \ setgid.o \ setegid.o \ diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index 1a0a10f..feb582a 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -555,6 +555,9 @@ struct freebsd6_pwrite_args { char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)]; char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; }; +struct setfib_args { + char fibnum_l_[PADL_(int)]; int fibnum; char fibnum_r_[PADR_(int)]; +}; struct ntp_adjtime_args { char tp_l_[PADL_(struct timex *)]; struct timex * tp; char tp_r_[PADR_(struct timex *)]; }; @@ -1744,6 +1747,7 @@ int msgsys(struct thread *, struct msgsys_args *); int shmsys(struct thread *, struct shmsys_args *); int freebsd6_pread(struct thread *, struct freebsd6_pread_args *); int freebsd6_pwrite(struct thread *, struct freebsd6_pwrite_args *); +int setfib(struct thread *, struct setfib_args *); int ntp_adjtime(struct thread *, struct ntp_adjtime_args *); int setgid(struct thread *, struct setgid_args *); int setegid(struct thread *, struct setegid_args *); @@ -2325,6 +2329,7 @@ int freebsd4_sigreturn(struct thread *, struct freebsd4_sigreturn_args *); #define SYS_AUE_shmsys AUE_SHMSYS #define SYS_AUE_freebsd6_pread AUE_PREAD #define SYS_AUE_freebsd6_pwrite AUE_PWRITE +#define SYS_AUE_setfib AUE_NULL #define SYS_AUE_ntp_adjtime AUE_NTP_ADJTIME #define SYS_AUE_setgid AUE_SETGID #define SYS_AUE_setegid AUE_SETEGID diff --git a/usr.sbin/setfib/Makefile b/usr.sbin/setfib/Makefile new file mode 100644 index 0000000..8508ed1 --- /dev/null +++ b/usr.sbin/setfib/Makefile @@ -0,0 +1,6 @@ +# @(#)Makefile 8.1 (Berkeley) 6/6/93 +# $FreeBSD$ + +PROG= setfib + +.include <bsd.prog.mk> diff --git a/usr.sbin/setfib/setfib.1 b/usr.sbin/setfib/setfib.1 new file mode 100644 index 0000000..29c4947 --- /dev/null +++ b/usr.sbin/setfib/setfib.1 @@ -0,0 +1,92 @@ +.\" Copyright (c) 2008 Cisco systems +.\" Author Julian Elischer. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd April 9, 2008 +.Dt SETFIB 1 +.Os +.Sh NAME +.Nm setfib +.Nd execute a utility with an altered default network view. +.Sh SYNOPSIS +.Nm +.Op Fl F +.Ar fib +.Ar utility +.Op Ar argument ... +.Sh DESCRIPTION +The +.Nm +utility runs +.Ar utility +with an different routing table. The table number +.Dq fib +will be used by default for all sockets started by this +process or descendents. +.Sh ENVIRONMENT +The +.Ev PATH +environment variable is used to locate the requested +.Ar utility +if the name contains no +.Ql / +characters. +.Sh EXIT STATUS +If +.Ar utility +is invoked, the exit status of +.Nm +is the exit status of +.Ar utility . +.Pp +An exit status of 126 indicates +.Ar utility +was found, but could not be executed. +An exit status of 127 indicates +.Ar utility +could not be found. +.Sh EXAMPLES +Execute utility +.Sq netstat +to view the second routing table. +.Pp +.Dl "setfib -F 1 netstat -rn" +or +.Dl "setfib 1 netstat -rn" +or +.Dl "setfib -1 netstat -rn" +.Sh SEE ALSO +.Xr setfib 2 , +.Xr setsockopt 2 +.Sh STANDARDS +The +.Nm +utility is a FreeBSD specific extension, however many UNIX like systems +have an equivalent function. +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 8.0 . diff --git a/usr.sbin/setfib/setfib.c b/usr.sbin/setfib/setfib.c new file mode 100644 index 0000000..3b15224 --- /dev/null +++ b/usr.sbin/setfib/setfib.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 1989, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2008 Cisco Systems, All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * setfib file skelaton taken from nice.c + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + + +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/sysctl.h> + +void usage(void); + +int +main(int argc, char *argv[]) +{ + long fib = 0; + int ch; + char *ep; + int numfibs; + int intsize = sizeof(int); + + if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1) + errx(1, "Multiple FIBS not supported"); + if (argc < 2) + usage(); + ep = argv[1]; + /* + * convert -N or N to -FN. (N is a number) + */ + if (ep[0]== '-' && isdigit((unsigned char)ep[1])) + ep++; + if (isdigit((unsigned char)*ep)) + if (asprintf(&argv[1], "-F%s", ep) < 0) + err(1, "asprintf"); + + while ((ch = getopt(argc, argv, "F:")) != -1) { + switch (ch) { + case 'F': + errno = 0; + fib = strtol(optarg, &ep, 10); + if (ep == optarg || *ep != '\0' || errno || + fib < 0 || fib >= numfibs) + errx(1, "%s: invalid FIB (max %s)", + optarg, numfibs - 1); + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (argc == 0) + usage(); + + errno = 0; + if (syscall(175, (int)fib)) + warn("setfib"); + execvp(*argv, argv); + err(errno == ENOENT ? 127 : 126, "%s", *argv); +} + +void +usage(void) +{ + + (void)fprintf(stderr, + "usage: setfib [-[F]]value command"); + exit(1); +} |