summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/NOTES2
-rw-r--r--sys/conf/options1
-rw-r--r--sys/contrib/ipfilter/netinet/ip_fil_freebsd.c4
-rw-r--r--sys/contrib/pf/net/pf.c36
-rw-r--r--sys/contrib/pf/net/pf_ioctl.c4
-rw-r--r--sys/kern/init_sysent.c2
-rw-r--r--sys/kern/sys_socket.c2
-rw-r--r--sys/kern/syscalls.c2
-rw-r--r--sys/kern/syscalls.master2
-rw-r--r--sys/kern/systrace_args.c7
-rw-r--r--sys/kern/uipc_socket.c20
-rw-r--r--sys/kern/vfs_export.c19
-rw-r--r--sys/net/if.c9
-rw-r--r--sys/net/if_atmsubr.c3
-rw-r--r--sys/net/if_fwsubr.c2
-rw-r--r--sys/net/if_gif.c3
-rw-r--r--sys/net/if_gif.h1
-rw-r--r--sys/net/if_gre.c7
-rw-r--r--sys/net/if_gre.h1
-rw-r--r--sys/net/if_iso88025subr.c3
-rw-r--r--sys/net/if_stf.c9
-rw-r--r--sys/net/if_var.h2
-rw-r--r--sys/net/radix_mpath.c4
-rw-r--r--sys/net/radix_mpath.h3
-rw-r--r--sys/net/route.c518
-rw-r--r--sys/net/route.h35
-rw-r--r--sys/net/rtsock.c14
-rw-r--r--sys/netatalk/at_extern.h1
-rw-r--r--sys/netatalk/at_proto.c2
-rw-r--r--sys/netgraph/netflow/netflow.c6
-rw-r--r--sys/netinet/if_atm.c2
-rw-r--r--sys/netinet/if_ether.c297
-rw-r--r--sys/netinet/in_gif.c8
-rw-r--r--sys/netinet/in_mcast.c3
-rw-r--r--sys/netinet/in_pcb.c3
-rw-r--r--sys/netinet/in_pcb.h2
-rw-r--r--sys/netinet/in_rmx.c154
-rw-r--r--sys/netinet/in_var.h16
-rw-r--r--sys/netinet/ip_fastfwd.c2
-rw-r--r--sys/netinet/ip_fw.h4
-rw-r--r--sys/netinet/ip_fw2.c54
-rw-r--r--sys/netinet/ip_icmp.c17
-rw-r--r--sys/netinet/ip_input.c10
-rw-r--r--sys/netinet/ip_mroute.c4
-rw-r--r--sys/netinet/ip_mroute.h2
-rw-r--r--sys/netinet/ip_options.c5
-rw-r--r--sys/netinet/ip_output.c8
-rw-r--r--sys/netinet/ip_var.h2
-rw-r--r--sys/netinet/raw_ip.c2
-rw-r--r--sys/netinet/sctp_os_bsd.h2
-rw-r--r--sys/netinet/tcp_input.c1
-rw-r--r--sys/netinet/tcp_subr.c8
-rw-r--r--sys/netinet/tcp_syncache.c4
-rw-r--r--sys/netinet6/in6.c3
-rw-r--r--sys/netinet6/in6_ifattach.c6
-rw-r--r--sys/netinet6/in6_rmx.c12
-rw-r--r--sys/netinet6/nd6_rtr.c3
-rw-r--r--sys/netipx/ipx_proto.c12
-rw-r--r--sys/nfs4client/nfs4_vfsops.c3
-rw-r--r--sys/nfsclient/bootp_subr.c5
-rw-r--r--sys/nfsclient/nfs_vfsops.c1
-rw-r--r--sys/sys/domain.h6
-rw-r--r--sys/sys/mbuf.h22
-rw-r--r--sys/sys/proc.h1
-rw-r--r--sys/sys/socket.h1
-rw-r--r--sys/sys/socketvar.h1
-rw-r--r--sys/sys/syscall.h1
-rw-r--r--sys/sys/syscall.mk1
-rw-r--r--sys/sys/sysproto.h5
69 files changed, 1055 insertions, 362 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 0e290ab..8c90cc9 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -509,6 +509,8 @@ options HWPMC_HOOKS # Other necessary kernel hooks
options INET #Internet communications protocols
options INET6 #IPv6 communications protocols
+options ROUTETABLES=2 # max 16. 1 is back compatible.
+
# In order to enable IPSEC you MUST also add device crypto to
# your kernel configuration
options IPSEC #IP security (requires device crypto)
diff --git a/sys/conf/options b/sys/conf/options
index 07a8f45..6ebfa92 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -364,6 +364,7 @@ ETHER_II opt_ef.h
ETHER_8023 opt_ef.h
ETHER_8022 opt_ef.h
ETHER_SNAP opt_ef.h
+ROUTETABLES opt_route.h
MROUTING opt_mrouting.h
INET opt_inet.h
INET6 opt_inet6.h
diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
index 4a279fa..0eb2632 100644
--- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
+++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
@@ -970,7 +970,7 @@ frdest_t *fdp;
dst->sin_addr = fdp->fd_ip;
dst->sin_len = sizeof(*dst);
- rtalloc(ro);
+ in_rtalloc(ro, 0);
if ((ifp == NULL) && (ro->ro_rt != NULL))
ifp = ro->ro_rt->rt_ifp;
@@ -1158,7 +1158,7 @@ fr_info_t *fin;
dst->sin_len = sizeof(*dst);
dst->sin_family = AF_INET;
dst->sin_addr = fin->fin_src;
- rtalloc(&iproute);
+ in_rtalloc(&iproute, 0);
if (iproute.ro_rt == NULL)
return 0;
return (fin->fin_ifp == iproute.ro_rt->rt_ifp);
diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c
index 96bf2de..fd8c395 100644
--- a/sys/contrib/pf/net/pf.c
+++ b/sys/contrib/pf/net/pf.c
@@ -1839,7 +1839,14 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af,
pf_mtag->tag = rtag;
if (r != NULL && r->rtableid >= 0)
+#ifdef __FreeBSD__
+ {
+ M_SETFIB(m, r->rtableid);
+#endif
pf_mtag->rtableid = r->rtableid;
+#ifdef __FreeBSD__
+ }
+#endif
#ifdef ALTQ
if (r != NULL && r->qid) {
pf_mtag->qid = r->qid;
@@ -2004,7 +2011,14 @@ pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
#endif
if (r->rtableid >= 0)
+#ifdef __FreeBSD__
+ {
+ M_SETFIB(m0, r->rtableid);
+#endif
pf_mtag->rtableid = r->rtableid;
+#ifdef __FreeBSD__
+ }
+#endif
#ifdef ALTQ
if (r->qid) {
@@ -2195,7 +2209,14 @@ pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid)
if (tag > 0)
pf_mtag->tag = tag;
if (rtableid >= 0)
+#ifdef __FreeBSD__
+ {
+ M_SETFIB(m, rtableid);
+#endif
pf_mtag->rtableid = rtableid;
+#ifdef __FreeBSD__
+ }
+#endif
return (0);
}
@@ -3141,7 +3162,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
#ifdef RTF_PRCLONING
rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
#else /* !RTF_PRCLONING */
- rtalloc_ign(&ro, RTF_CLONING);
+ in_rtalloc_ign(&ro, RTF_CLONING, 0);
#endif
#else /* ! __FreeBSD__ */
rtalloc_noclone(&ro, NO_CLONING);
@@ -5946,7 +5967,11 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
goto out;
#ifdef __FreeBSD__
- rtalloc_ign((struct route *)&ro, RTF_CLONING);
+/* XXX MRT not always INET */ /* stick with table 0 though */
+ if (af == AF_INET)
+ in_rtalloc_ign((struct route *)&ro, RTF_CLONING, 0);
+ else
+ rtalloc_ign((struct route *)&ro, RTF_CLONING);
#else /* ! __FreeBSD__ */
rtalloc_noclone((struct route *)&ro, NO_CLONING);
#endif
@@ -6025,7 +6050,10 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
# ifdef RTF_PRCLONING
rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING));
# else /* !RTF_PRCLONING */
- rtalloc_ign((struct route *)&ro, RTF_CLONING);
+ if (af == AF_INET)
+ in_rtalloc_ign((struct route *)&ro, RTF_CLONING, 0);
+ else
+ rtalloc_ign((struct route *)&ro, RTF_CLONING);
# endif
#else /* ! __FreeBSD__ */
rtalloc_noclone((struct route *)&ro, NO_CLONING);
@@ -6105,7 +6133,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
dst->sin_addr = ip->ip_dst;
if (r->rt == PF_FASTROUTE) {
- rtalloc(ro);
+ in_rtalloc(ro, 0);
if (ro->ro_rt == 0) {
ipstat.ips_noroute++;
goto bad;
diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c
index f9110cb..f765029 100644
--- a/sys/contrib/pf/net/pf_ioctl.c
+++ b/sys/contrib/pf/net/pf_ioctl.c
@@ -1532,7 +1532,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
#ifdef __FreeBSD__ /* ROUTEING */
- if (rule->rtableid > 0)
+ if (rule->rtableid > 0 && rule->rtableid < rt_numfibs)
#else
if (rule->rtableid > 0 && !rtable_exists(rule->rtableid))
#endif
@@ -1795,7 +1795,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (newrule->rtableid > 0 &&
#ifdef __FreeBSD__ /* ROUTING */
- 1)
+ newrule->rtableid < rt_numfibs)
#else
!rtable_exists(newrule->rtableid))
#endif
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 829a31f..22db8a8 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -203,7 +203,7 @@ struct sysent sysent[] = {
{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 172 = nosys */
{ AS(freebsd6_pread_args), (sy_call_t *)freebsd6_pread, AUE_PREAD, NULL, 0, 0 }, /* 173 = freebsd6_pread */
{ AS(freebsd6_pwrite_args), (sy_call_t *)freebsd6_pwrite, AUE_PWRITE, NULL, 0, 0 }, /* 174 = freebsd6_pwrite */
- { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 175 = nosys */
+ { AS(setfib_args), (sy_call_t *)setfib, AUE_NULL, NULL, 0, 0 }, /* 175 = setfib */
{ AS(ntp_adjtime_args), (sy_call_t *)ntp_adjtime, AUE_NTP_ADJTIME, NULL, 0, 0 }, /* 176 = ntp_adjtime */
{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 177 = sfork */
{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 178 = getdescriptor */
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index 8e6b5f2..aeeaf33 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -199,7 +199,7 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
if (IOCGROUP(cmd) == 'i')
error = ifioctl(so, cmd, data, td);
else if (IOCGROUP(cmd) == 'r')
- error = rtioctl(cmd, data);
+ error = rtioctl_fib(cmd, data, so->so_fibnum);
else
error = ((*so->so_proto->pr_usrreqs->pru_control)
(so, cmd, data, 0, td));
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index b3a5ff9..8fb0127 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -182,7 +182,7 @@ const char *syscallnames[] = {
"#172", /* 172 = nosys */
"freebsd6_pread", /* 173 = freebsd6_pread */
"freebsd6_pwrite", /* 174 = freebsd6_pwrite */
- "#175", /* 175 = nosys */
+ "setfib", /* 175 = setfib */
"ntp_adjtime", /* 176 = ntp_adjtime */
"#177", /* 177 = sfork */
"#178", /* 178 = getdescriptor */
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 1e98317..4cb55fa 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -340,7 +340,7 @@
174 AUE_PWRITE STD { ssize_t freebsd6_pwrite(int fd, \
const void *buf, \
size_t nbyte, int pad, off_t offset); }
-175 AUE_NULL UNIMPL nosys
+175 AUE_NULL STD { int setfib(int fibnum); }
176 AUE_NTP_ADJTIME STD { int ntp_adjtime(struct timex *tp); }
177 AUE_NULL UNIMPL sfork (BSD/OS 2.x)
178 AUE_NULL UNIMPL getdescriptor (BSD/OS 2.x)
diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c
index b20f1ed..98558cc 100644
--- a/sys/kern/systrace_args.c
+++ b/sys/kern/systrace_args.c
@@ -959,6 +959,13 @@ systrace_args(int sysnum, void *params, u_int64_t *uarg, int *n_args)
*n_args = 5;
break;
}
+ /* setfib */
+ case 175: {
+ struct setfib_args *p = params;
+ iarg[0] = p->fibnum; /* int */
+ *n_args = 1;
+ break;
+ }
/* ntp_adjtime */
case 176: {
struct ntp_adjtime_args *p = params;
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index b93cb2f..c9b6076 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -122,6 +122,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/resourcevar.h>
+#include <net/route.h>
#include <sys/signalvar.h>
#include <sys/stat.h>
#include <sys/sx.h>
@@ -360,6 +361,11 @@ socreate(int dom, struct socket **aso, int type, int proto,
TAILQ_INIT(&so->so_comp);
so->so_type = type;
so->so_cred = crhold(cred);
+ if ((prp->pr_domain->dom_family == PF_INET) ||
+ (prp->pr_domain->dom_family == PF_ROUTE))
+ so->so_fibnum = td->td_proc->p_fibnum;
+ else
+ so->so_fibnum = 0;
so->so_proto = prp;
#ifdef MAC
mac_socket_create(cred, so);
@@ -2027,6 +2033,20 @@ sosetopt(struct socket *so, struct sockopt *sopt)
SOCK_UNLOCK(so);
break;
+ case SO_SETFIB:
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (optval < 1 || optval > rt_numfibs) {
+ error = EINVAL;
+ goto bad;
+ }
+ if ((so->so_proto->pr_domain->dom_family == PF_INET) ||
+ (so->so_proto->pr_domain->dom_family == PF_ROUTE)) {
+ so->so_fibnum = optval;
+ } else {
+ so->so_fibnum = 0;
+ }
+ break;
case SO_SNDBUF:
case SO_RCVBUF:
case SO_SNDLOWAT:
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index e1d6187..7afe991 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -161,12 +161,25 @@ vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
* Seems silly to initialize every AF when most are not used,
* do so on demand here
*/
- for (dom = domains; dom; dom = dom->dom_next)
+ for (dom = domains; dom; dom = dom->dom_next) {
+ KASSERT(((i == AF_INET) || (i == AF_INET6)),
+ ("unexpected protocol in vfs_hang_addrlist"));
if (dom->dom_family == i && dom->dom_rtattach) {
- dom->dom_rtattach((void **) &nep->ne_rtable[i],
- dom->dom_rtoffset);
+ /*
+ * XXX MRT
+ * The INET and INET6 domains know the
+ * offset already. We don't need to send it
+ * So we just use it as a flag to say that
+ * we are or are not setting up a real routing
+ * table. Only IP and IPV6 need have this
+ * be 0 so all other protocols can stay the
+ * same (ABI compatible).
+ */
+ dom->dom_rtattach(
+ (void **) &nep->ne_rtable[i], 0);
break;
}
+ }
if ((rnh = nep->ne_rtable[i]) == NULL) {
error = ENOBUFS;
vfs_mount_error(mp, "%s %s %d",
diff --git a/sys/net/if.c b/sys/net/if.c
index c3c367b..85306a4 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -740,11 +740,14 @@ if_detach(struct ifnet *ifp)
* to this interface...oh well...
*/
for (i = 1; i <= AF_MAX; i++) {
- if ((rnh = rt_tables[i]) == NULL)
+ int j;
+ for (j = 0; j < rt_numfibs; j++) {
+ if ((rnh = rt_tables[j][i]) == NULL)
continue;
RADIX_NODE_HEAD_LOCK(rnh);
(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
RADIX_NODE_HEAD_UNLOCK(rnh);
+ }
}
/* Announce that the interface is gone. */
@@ -1010,9 +1013,9 @@ if_rtdel(struct radix_node *rn, void *arg)
if ((rt->rt_flags & RTF_UP) == 0)
return (0);
- err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
+ err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
rt_mask(rt), rt->rt_flags,
- (struct rtentry **) NULL);
+ (struct rtentry **) NULL, rt->rt_fibnum);
if (err) {
log(LOG_WARNING, "if_rtdel: error %d\n", err);
}
diff --git a/sys/net/if_atmsubr.c b/sys/net/if_atmsubr.c
index 9d1a7fa..1564737 100644
--- a/sys/net/if_atmsubr.c
+++ b/sys/net/if_atmsubr.c
@@ -158,7 +158,8 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
* check route
*/
if (rt0 != NULL) {
- error = rt_check(&rt, &rt0, dst);
+ error = rt_check_fib(&rt, &rt0,
+ dst, rt0->rt_fibnum);
if (error)
goto bad;
RT_UNLOCK(rt);
diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c
index e001c29..65b2aff 100644
--- a/sys/net/if_fwsubr.c
+++ b/sys/net/if_fwsubr.c
@@ -103,7 +103,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
}
if (rt0 != NULL) {
- error = rt_check(&rt, &rt0, dst);
+ error = rt_check_fib(&rt, &rt0, dst, rt0->rt_fibnum);
if (error)
goto bad;
RT_UNLOCK(rt);
diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c
index 63f3c7d..8310881 100644
--- a/sys/net/if_gif.c
+++ b/sys/net/if_gif.c
@@ -46,6 +46,7 @@
#include <sys/time.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
+#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/conf.h>
#include <machine/cpu.h>
@@ -155,6 +156,7 @@ gif_clone_create(ifc, unit, params)
struct gif_softc *sc;
sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
+ sc->gif_fibnum = curthread->td_proc->p_fibnum;
GIF2IFP(sc) = if_alloc(IFT_GIF);
if (GIF2IFP(sc) == NULL) {
free(sc, M_GIF);
@@ -441,6 +443,7 @@ gif_output(ifp, m, dst, rt)
if (ifp->if_bridge)
af = AF_LINK;
+ M_SETFIB(m, sc->gif_fibnum);
/* inner AF-specific encapsulation */
/* XXX should we check if our outer source is legal? */
diff --git a/sys/net/if_gif.h b/sys/net/if_gif.h
index 8e9ceb1..4e417fd 100644
--- a/sys/net/if_gif.h
+++ b/sys/net/if_gif.h
@@ -67,6 +67,7 @@ struct gif_softc {
#endif
} gifsc_gifscr;
int gif_flags;
+ u_int gif_fibnum;
const struct encaptab *encap_cookie4;
const struct encaptab *encap_cookie6;
void *gif_netgraph; /* ng_gif(4) netgraph node info */
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index b4b42b9..9045f06 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -58,6 +58,7 @@
#include <sys/module.h>
#include <sys/mbuf.h>
#include <sys/priv.h>
+#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@@ -201,6 +202,7 @@ gre_clone_create(ifc, unit, params)
GRE2IFP(sc)->if_flags |= IFF_LINK0;
sc->encap = NULL;
sc->called = 0;
+ sc->gre_fibnum = curthread->td_proc->p_fibnum;
sc->wccp_ver = WCCP_V1;
if_attach(GRE2IFP(sc));
bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
@@ -395,6 +397,8 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
goto end;
}
+ M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
+
gh = mtod(m, struct greip *);
if (sc->g_proto == IPPROTO_GRE) {
/* we don't have any GRE flags for now */
@@ -754,6 +758,7 @@ gre_compute_route(struct gre_softc *sc)
* toggle last bit, so our interface is not found, but a less
* specific route. I'd rather like to specify a shorter mask,
* but this is not possible. Should work though. XXX
+ * XXX MRT Use a different FIB for the tunnel to solve this problem.
*/
if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
@@ -765,7 +770,7 @@ gre_compute_route(struct gre_softc *sc)
inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
#endif
- rtalloc(ro);
+ rtalloc_fib(ro, sc->gre_fibnum);
/*
* check if this returned a route at all and this route is no
diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h
index 6c8e853..3c34bec 100644
--- a/sys/net/if_gre.h
+++ b/sys/net/if_gre.h
@@ -59,6 +59,7 @@ struct gre_softc {
LIST_ENTRY(gre_softc) sc_list;
int gre_unit;
int gre_flags;
+ u_int gre_fibnum; /* use this fib for envelopes */
struct in_addr g_src; /* source address of gre packets */
struct in_addr g_dst; /* destination address of gre packets */
struct route route; /* routing entry that determines, where a
diff --git a/sys/net/if_iso88025subr.c b/sys/net/if_iso88025subr.c
index dd50923..f56101e 100644
--- a/sys/net/if_iso88025subr.c
+++ b/sys/net/if_iso88025subr.c
@@ -259,7 +259,8 @@ iso88025_output(ifp, m, dst, rt0)
/* Calculate routing info length based on arp table entry */
/* XXX any better way to do this ? */
if (rt0 != NULL) {
- error = rt_check(&rt, &rt0, dst);
+/* XXX MRT *//* Guess only */
+ error = rt_check_fib(&rt, &rt0, dst, rt0->rt_fibnum);
if (error)
goto bad;
RT_UNLOCK(rt);
diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c
index 8f70df6..f373eaa 100644
--- a/sys/net/if_stf.c
+++ b/sys/net/if_stf.c
@@ -87,6 +87,7 @@
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/protosw.h>
+#include <sys/proc.h>
#include <sys/queue.h>
#include <machine/cpu.h>
@@ -136,6 +137,7 @@ struct stf_softc {
struct route_in6 __sc_ro6; /* just for safety */
} __sc_ro46;
#define sc_ro __sc_ro46.__sc_ro4
+ u_int sc_fibnum;
const struct encaptab *encap_cookie;
};
#define STF2IFP(sc) ((sc)->sc_ifp)
@@ -219,6 +221,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
return (ENOSPC);
}
ifp->if_softc = sc;
+ sc->sc_fibnum = curthread->td_proc->p_fibnum;
/*
* Set the name manually rather then using if_initname because
@@ -521,7 +524,7 @@ stf_output(ifp, m, dst, rt)
}
if (sc->sc_ro.ro_rt == NULL) {
- rtalloc(&sc->sc_ro);
+ rtalloc_fib(&sc->sc_ro, sc->sc_fibnum);
if (sc->sc_ro.ro_rt == NULL) {
m_freem(m);
ifp->if_oerrors++;
@@ -529,6 +532,7 @@ stf_output(ifp, m, dst, rt)
}
}
+ M_SETFIB(m, sc->sc_fibnum);
ifp->if_opackets++;
return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL);
}
@@ -599,7 +603,8 @@ stf_checkaddr4(sc, in, inifp)
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_addr = *in;
- rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+ rt = rtalloc1_fib((struct sockaddr *)&sin, 0,
+ 0UL, sc->sc_fibnum);
if (!rt || rt->rt_ifp != inifp) {
#if 0
log(LOG_WARNING, "%s: packet from 0x%x dropped "
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index 8fbf729..d738e32 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -690,6 +690,8 @@ struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithnet(struct sockaddr *);
struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
+struct ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int);
+
struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen);
diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c
index d1db258..b04b42a 100644
--- a/sys/net/radix_mpath.c
+++ b/sys/net/radix_mpath.c
@@ -255,7 +255,7 @@ different:
}
void
-rtalloc_mpath(struct route *ro, int hash)
+rtalloc_mpath_fib(struct route *ro, int hash, u_int fibnum)
{
struct radix_node *rn0, *rn;
int n;
@@ -266,7 +266,7 @@ rtalloc_mpath(struct route *ro, int hash)
*/
if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
return; /* XXX */
- ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0UL);
+ ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0UL, fibnum);
/* if the route does not exist or it is not multipath, don't care */
if (ro->ro_rt == NULL)
diff --git a/sys/net/radix_mpath.h b/sys/net/radix_mpath.h
index 661aaf3..b9224c8 100644
--- a/sys/net/radix_mpath.h
+++ b/sys/net/radix_mpath.h
@@ -50,7 +50,8 @@ int rn_mpath_count(struct radix_node *);
struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *);
int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
struct sockaddr *);
-void rtalloc_mpath(struct route *, int);
+void rtalloc_mpath_fib(struct route *, int, u_int);
+#define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0)
struct radix_node *rn_mpath_lookup(void *, void *,
struct radix_node_head *);
int rt_mpath_deldup(struct rtentry *, struct rtentry *);
diff --git a/sys/net/route.c b/sys/net/route.c
index d55c2f8..3ae5dbc 100644
--- a/sys/net/route.c
+++ b/sys/net/route.c
@@ -29,8 +29,13 @@
* @(#)route.c 8.3.1.1 (Berkeley) 2/23/95
* $FreeBSD$
*/
+/************************************************************************
+ * Note: In this file a 'fib' is a "forwarding information base" *
+ * Which is the new name for an in kernel routing (next hop) table. *
+ ***********************************************************************/
#include "opt_inet.h"
+#include "opt_route.h"
#include "opt_mrouting.h"
#include "opt_mpath.h"
@@ -39,6 +44,9 @@
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
+#include <sys/proc.h>
#include <sys/domain.h>
#include <sys/kernel.h>
@@ -54,14 +62,45 @@
#include <vm/uma.h>
+#ifndef ROUTETABLES
+ #define RT_NUMFIBS 1
+ #define RT_MAXFIBS 1
+#else
+ /* while we use 4 bits in the mbuf flags,
+ * we are limited to 16
+ */
+ #if ROUTETABLES > RT_MAXFIBS
+ #define RT_NUMFIBS RT_MAXFIBS
+ #error "ROUTETABLES defined too big"
+ #else
+ #if ROUTETABLES == 0
+ #define RT_NUMFIBS 1
+ #else
+ #define RT_NUMFIBS ROUTETABLES
+ #endif
+ #endif
+#endif
+
+u_int rt_numfibs = RT_NUMFIBS;
+SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
+/* Eventually this will be a tunable */
+TUNABLE_INT("net.fibs", &rt_numfibs);
+
static struct rtstat rtstat;
-struct radix_node_head *rt_tables[AF_MAX+1];
+
+/* by default only the first 'row' of tables will be accessed. */
+/*
+ * XXXMRT When we fix netstat, and do this differnetly,
+ * we can allocate this dynamically. As long as we are keeping
+ * things backwards compaitble we need to allocate this
+ * statically.
+ */
+struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1];
static int rttrash; /* routes not in table but not freed */
static void rt_maskedcopy(struct sockaddr *,
struct sockaddr *, struct sockaddr *);
-static void rtable_init(void **);
/* compare two sockaddr structures */
#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
@@ -78,25 +117,83 @@ static void rtable_init(void **);
*/
#define RNTORT(p) ((struct rtentry *)(p))
-static void
-rtable_init(void **table)
+static uma_zone_t rtzone; /* Routing table UMA zone. */
+
+#if 0
+/* default fib for tunnels to use */
+u_int tunnel_fib = 0;
+SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, "");
+#endif
+
+/*
+ * handler for net.my_fibnum
+ */
+static int
+sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
{
- struct domain *dom;
- for (dom = domains; dom; dom = dom->dom_next)
- if (dom->dom_rtattach)
- dom->dom_rtattach(&table[dom->dom_family],
- dom->dom_rtoffset);
+ int fibnum;
+ int error;
+
+ fibnum = curthread->td_proc->p_fibnum;
+ error = sysctl_handle_int(oidp, &fibnum, 0, req);
+ return (error);
}
-static uma_zone_t rtzone; /* Routing table UMA zone. */
+SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
+ NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
static void
route_init(void)
{
+ int table;
+ struct domain *dom;
+ int fam;
+
+ /* whack teh tunable ints into line. */
+ if (rt_numfibs > RT_MAXFIBS)
+ rt_numfibs = RT_MAXFIBS;
+ if (rt_numfibs == 0)
+ rt_numfibs = 1;
rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, 0);
rn_init(); /* initialize all zeroes, all ones, mask table */
- rtable_init((void **)rt_tables);
+
+ for (dom = domains; dom; dom = dom->dom_next) {
+ if (dom->dom_rtattach) {
+ for (table = 0; table < rt_numfibs; table++) {
+ if ( (fam = dom->dom_family) == AF_INET ||
+ table == 0) {
+ /* for now only AF_INET has > 1 table */
+ /* XXX MRT
+ * rtattach will be also called
+ * from vfs_export.c but the
+ * offset will be 0
+ * (only for AF_INET and AF_INET6
+ * which don't need it anyhow)
+ */
+ dom->dom_rtattach(
+ (void **)&rt_tables[table][fam],
+ dom->dom_rtoffset);
+ } else {
+ break;
+ }
+ }
+ }
+ }
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct setfib_args {
+ int fibnum;
+};
+#endif
+int
+setfib(struct thread *td, struct setfib_args *uap)
+{
+ if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
+ return EINVAL;
+ td->td_proc->p_fibnum = uap->fibnum;
+ return (0);
}
/*
@@ -105,7 +202,13 @@ route_init(void)
void
rtalloc(struct route *ro)
{
- rtalloc_ign(ro, 0UL);
+ rtalloc_ign_fib(ro, 0UL, 0);
+}
+
+void
+rtalloc_fib(struct route *ro, u_int fibnum)
+{
+ rtalloc_ign_fib(ro, 0UL, fibnum);
}
void
@@ -119,7 +222,23 @@ rtalloc_ign(struct route *ro, u_long ignore)
RTFREE(rt);
ro->ro_rt = NULL;
}
- ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore);
+ ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0);
+ if (ro->ro_rt)
+ RT_UNLOCK(ro->ro_rt);
+}
+
+void
+rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
+{
+ struct rtentry *rt;
+
+ if ((rt = ro->ro_rt) != NULL) {
+ if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
+ return;
+ RTFREE(rt);
+ ro->ro_rt = NULL;
+ }
+ ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
if (ro->ro_rt)
RT_UNLOCK(ro->ro_rt);
}
@@ -133,7 +252,14 @@ rtalloc_ign(struct route *ro, u_long ignore)
struct rtentry *
rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
{
- struct radix_node_head *rnh = rt_tables[dst->sa_family];
+ return (rtalloc1_fib(dst, report, ignflags, 0));
+}
+
+struct rtentry *
+rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
+ u_int fibnum)
+{
+ struct radix_node_head *rnh;
struct rtentry *rt;
struct radix_node *rn;
struct rtentry *newrt;
@@ -141,6 +267,10 @@ rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
u_long nflags;
int err = 0, msgtype = RTM_MISS;
+ KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
+ if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */
+ fibnum = 0;
+ rnh = rt_tables[fibnum][dst->sa_family];
newrt = NULL;
/*
* Look up the address in the table for that Address Family
@@ -164,8 +294,8 @@ rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
* If it requires that it be cloned, do so.
* (This implies it wasn't a HOST route.)
*/
- err = rtrequest(RTM_RESOLVE, dst, NULL,
- NULL, 0, &newrt);
+ err = rtrequest_fib(RTM_RESOLVE, dst, NULL,
+ NULL, 0, &newrt, fibnum);
if (err) {
/*
* If the cloning didn't succeed, maybe
@@ -237,7 +367,7 @@ rtfree(struct rtentry *rt)
struct radix_node_head *rnh;
KASSERT(rt != NULL,("%s: NULL rt", __func__));
- rnh = rt_tables[rt_key(rt)->sa_family];
+ rnh = rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
KASSERT(rnh != NULL,("%s: NULL rnh", __func__));
RT_LOCK_ASSERT(rt);
@@ -323,6 +453,17 @@ rtredirect(struct sockaddr *dst,
int flags,
struct sockaddr *src)
{
+ rtredirect_fib(dst, gateway, netmask, flags, src, 0);
+}
+
+void
+rtredirect_fib(struct sockaddr *dst,
+ struct sockaddr *gateway,
+ struct sockaddr *netmask,
+ int flags,
+ struct sockaddr *src,
+ u_int fibnum)
+{
struct rtentry *rt, *rt0 = NULL;
int error = 0;
short *stat = NULL;
@@ -334,7 +475,7 @@ rtredirect(struct sockaddr *dst,
error = ENETUNREACH;
goto out;
}
- rt = rtalloc1(dst, 0, 0UL); /* NB: rt is locked */
+ rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */
/*
* If the redirect isn't from our current router for this dst,
* it's either old or wrong. If it redirects us to ourselves,
@@ -377,7 +518,7 @@ rtredirect(struct sockaddr *dst,
info.rti_info[RTAX_NETMASK] = netmask;
info.rti_ifa = ifa;
info.rti_flags = flags;
- error = rtrequest1(RTM_ADD, &info, &rt);
+ error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
if (rt != NULL) {
RT_LOCK(rt);
EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
@@ -423,11 +564,17 @@ out:
rt_missmsg(RTM_REDIRECT, &info, flags, error);
}
+int
+rtioctl(u_long req, caddr_t data)
+{
+ return (rtioctl_fib(req, data, 0));
+}
+
/*
* Routing table ioctl interface.
*/
int
-rtioctl(u_long req, caddr_t data)
+rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
{
/*
@@ -438,7 +585,7 @@ rtioctl(u_long req, caddr_t data)
*/
#ifdef INET
/* Multicast goop, grrr... */
- return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP;
+ return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
#else /* INET */
return ENXIO;
#endif /* INET */
@@ -447,6 +594,13 @@ rtioctl(u_long req, caddr_t data)
struct ifaddr *
ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
{
+ return (ifa_ifwithroute_fib(flags, dst, gateway, 0));
+}
+
+struct ifaddr *
+ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
+ u_int fibnum)
+{
register struct ifaddr *ifa;
int not_found = 0;
@@ -474,7 +628,7 @@ ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
if (ifa == NULL)
ifa = ifa_ifwithnet(gateway);
if (ifa == NULL) {
- struct rtentry *rt = rtalloc1(gateway, 0, 0UL);
+ struct rtentry *rt = rtalloc1_fib(gateway, 0, 0UL, fibnum);
if (rt == NULL)
return (NULL);
/*
@@ -529,6 +683,18 @@ rtrequest(int req,
int flags,
struct rtentry **ret_nrt)
{
+ return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0));
+}
+
+int
+rtrequest_fib(int req,
+ struct sockaddr *dst,
+ struct sockaddr *gateway,
+ struct sockaddr *netmask,
+ int flags,
+ struct rtentry **ret_nrt,
+ u_int fibnum)
+{
struct rt_addrinfo info;
if (dst->sa_len == 0)
@@ -539,7 +705,7 @@ rtrequest(int req,
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = gateway;
info.rti_info[RTAX_NETMASK] = netmask;
- return rtrequest1(req, &info, ret_nrt);
+ return rtrequest1_fib(req, &info, ret_nrt, fibnum);
}
/*
@@ -556,6 +722,12 @@ rtrequest(int req,
int
rt_getifa(struct rt_addrinfo *info)
{
+ return (rt_getifa_fib(info, 0));
+}
+
+int
+rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
+{
struct ifaddr *ifa;
int error = 0;
@@ -577,9 +749,11 @@ rt_getifa(struct rt_addrinfo *info)
if (sa != NULL && info->rti_ifp != NULL)
info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
else if (dst != NULL && gateway != NULL)
- info->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
+ info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
+ fibnum);
else if (sa != NULL)
- info->rti_ifa = ifa_ifwithroute(flags, sa, sa);
+ info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
+ fibnum);
}
if ((ifa = info->rti_ifa) != NULL) {
if (info->rti_ifp == NULL)
@@ -613,7 +787,7 @@ rtexpunge(struct rtentry *rt)
/*
* Find the correct routing tree to use for this Address Family
*/
- rnh = rt_tables[rt_key(rt)->sa_family];
+ rnh = rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
if (rnh == NULL)
return (EAFNOSUPPORT);
@@ -680,6 +854,13 @@ bad:
int
rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
{
+ return (rtrequest1_fib(req, info, ret_nrt, 0));
+}
+
+int
+rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
+ u_int fibnum)
+{
int error = 0;
register struct rtentry *rt;
register struct radix_node *rn;
@@ -688,10 +869,13 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
struct sockaddr *ndst;
#define senderr(x) { error = x ; goto bad; }
+ KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
+ if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */
+ fibnum = 0;
/*
* Find the correct routing tree to use for this Address Family
*/
- rnh = rt_tables[dst->sa_family];
+ rnh = rt_tables[fibnum][dst->sa_family];
if (rnh == NULL)
return (EAFNOSUPPORT);
RADIX_NODE_HEAD_LOCK(rnh);
@@ -848,7 +1032,7 @@ deldone:
(gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
senderr(EINVAL);
- if (info->rti_ifa == NULL && (error = rt_getifa(info)))
+ if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum)))
senderr(error);
ifa = info->rti_ifa;
@@ -858,6 +1042,7 @@ deldone:
senderr(ENOBUFS);
RT_LOCK_INIT(rt);
rt->rt_flags = RTF_UP | flags;
+ rt->rt_fibnum = fibnum;
/*
* Add the gateway. Possibly re-malloc-ing the storage for it
* also add the rt_gwroute if possible.
@@ -918,7 +1103,7 @@ deldone:
* then we just blow it away and retry the insertion
* of the new one.
*/
- rt2 = rtalloc1(dst, 0, 0);
+ rt2 = rtalloc1_fib(dst, 0, 0, fibnum);
if (rt2 && rt2->rt_parent) {
rtexpunge(rt2);
RT_UNLOCK(rt2);
@@ -1034,8 +1219,8 @@ rt_fixdelete(struct radix_node *rn, void *vp)
if (rt->rt_parent == rt0 &&
!(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) {
- return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
- rt->rt_flags, NULL);
+ return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
+ rt->rt_flags, NULL, rt->rt_fibnum);
}
return 0;
}
@@ -1099,15 +1284,15 @@ rt_fixchange(struct radix_node *rn, void *vp)
* changed/added under the node's mask. So, get rid of it.
*/
delete_rt:
- return rtrequest(RTM_DELETE, rt_key(rt), NULL,
- rt_mask(rt), rt->rt_flags, NULL);
+ return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL,
+ rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum);
}
int
rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
{
/* XXX dst may be overwritten, can we move this to below */
- struct radix_node_head *rnh = rt_tables[dst->sa_family];
+ struct radix_node_head *rnh = rt_tables[rt->rt_fibnum][dst->sa_family];
int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
again:
@@ -1138,7 +1323,7 @@ again:
struct rtentry *gwrt;
RT_UNLOCK(rt); /* XXX workaround LOR */
- gwrt = rtalloc1(gate, 1, 0);
+ gwrt = rtalloc1_fib(gate, 1, 0, rt->rt_fibnum);
if (gwrt == rt) {
RT_REMREF(rt);
return (EADDRINUSE); /* failure */
@@ -1243,15 +1428,19 @@ rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netma
* Set up a routing table entry, normally
* for an interface.
*/
-int
-rtinit(struct ifaddr *ifa, int cmd, int flags)
+#define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
+static inline int
+rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
{
struct sockaddr *dst;
struct sockaddr *netmask;
- struct mbuf *m = NULL;
struct rtentry *rt = NULL;
struct rt_addrinfo info;
- int error=0;
+ int error = 0;
+ int startfib, endfib;
+ char tempbuf[_SOCKADDR_TMPSIZE];
+ int didwork = 0;
+ int a_failure = 0;
if (flags & RTF_HOST) {
dst = ifa->ifa_dstaddr;
@@ -1260,126 +1449,190 @@ rtinit(struct ifaddr *ifa, int cmd, int flags)
dst = ifa->ifa_addr;
netmask = ifa->ifa_netmask;
}
+ if ( dst->sa_family != AF_INET)
+ fibnum = 0;
+ if (fibnum == -1) {
+ startfib = 0;
+ endfib = rt_numfibs - 1;
+ } else {
+ KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
+ startfib = fibnum;
+ endfib = fibnum;
+ }
if (dst->sa_len == 0)
return(EINVAL);
/*
- * If it's a delete, check that if it exists, it's on the correct
- * interface or we might scrub a route to another ifa which would
+ * If it's a delete, check that if it exists,
+ * it's on the correct interface or we might scrub
+ * a route to another ifa which would
* be confusing at best and possibly worse.
*/
if (cmd == RTM_DELETE) {
- struct sockaddr *deldst;
- struct radix_node_head *rnh;
- struct radix_node *rn;
-
/*
* It's a delete, so it should already exist..
* If it's a net, mask off the host bits
* (Assuming we have a mask)
+ * XXX this is kinda inet specific..
*/
if (netmask != NULL) {
- m = m_get(M_DONTWAIT, MT_SONAME);
- if (m == NULL)
- return(ENOBUFS);
- deldst = mtod(m, struct sockaddr *);
- rt_maskedcopy(dst, deldst, netmask);
- dst = deldst;
+ rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
+ dst = (struct sockaddr *)tempbuf;
}
- /*
- * Look up an rtentry that is in the routing tree and
- * contains the correct info.
- */
- if ((rnh = rt_tables[dst->sa_family]) == NULL)
- goto bad;
- RADIX_NODE_HEAD_LOCK(rnh);
+ }
+ /*
+ * Now go through all the requested tables (fibs) and do the
+ * requested action. Realistically, this will either be fib 0
+ * for protocols that don't do multiple tables or all the
+ * tables for those that do. XXX For this version only AF_INET.
+ * When that changes code should be refactored to protocol
+ * independent parts and protocol dependent parts.
+ */
+ for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
+ if (cmd == RTM_DELETE) {
+ struct radix_node_head *rnh;
+ struct radix_node *rn;
+ /*
+ * Look up an rtentry that is in the routing tree and
+ * contains the correct info.
+ */
+ if ((rnh = rt_tables[fibnum][dst->sa_family]) == NULL)
+ /* this table doesn't exist but others might */
+ continue;
+ RADIX_NODE_HEAD_LOCK(rnh);
#ifdef RADIX_MPATH
- if (rn_mpath_capable(rnh)) {
+ if (rn_mpath_capable(rnh)) {
- rn = rnh->rnh_matchaddr(dst, rnh);
- if (rn == NULL)
- error = ESRCH;
- else {
- rt = RNTORT(rn);
- /*
- * for interface route the rt->rt_gateway is
- * sockaddr_intf for cloning ARP entries, so
- * rt_mpath_matchgate must use the interface
- * address
- */
- rt = rt_mpath_matchgate(rt, ifa->ifa_addr);
- if (!rt)
+ rn = rnh->rnh_matchaddr(dst, rnh);
+ if (rn == NULL)
error = ESRCH;
+ else {
+ rt = RNTORT(rn);
+ /*
+ * for interface route the
+ * rt->rt_gateway is sockaddr_intf
+ * for cloning ARP entries, so
+ * rt_mpath_matchgate must use the
+ * interface address
+ */
+ rt = rt_mpath_matchgate(rt,
+ ifa->ifa_addr);
+ if (!rt)
+ error = ESRCH;
+ }
}
- }
- else
+ else
#endif
- error = ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL ||
- (rn->rn_flags & RNF_ROOT) ||
- RNTORT(rn)->rt_ifa != ifa ||
- !sa_equal((struct sockaddr *)rn->rn_key, dst));
-
- RADIX_NODE_HEAD_UNLOCK(rnh);
- if (error) {
-bad:
- if (m)
- (void) m_free(m);
- return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+ rn = rnh->rnh_lookup(dst, netmask, rnh);
+ error = (rn == NULL ||
+ (rn->rn_flags & RNF_ROOT) ||
+ RNTORT(rn)->rt_ifa != ifa ||
+ !sa_equal((struct sockaddr *)rn->rn_key, dst));
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ if (error) {
+ /* this is only an error if bad on ALL tables */
+ continue;
+ }
}
- }
- /*
- * Do the actual request
- */
- bzero((caddr_t)&info, sizeof(info));
- info.rti_ifa = ifa;
- info.rti_flags = flags | ifa->ifa_flags;
- info.rti_info[RTAX_DST] = dst;
- info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
- info.rti_info[RTAX_NETMASK] = netmask;
- error = rtrequest1(cmd, &info, &rt);
- if (error == 0 && rt != NULL) {
/*
- * notify any listening routing agents of the change
+ * Do the actual request
*/
- RT_LOCK(rt);
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_ifa = ifa;
+ info.rti_flags = flags | ifa->ifa_flags;
+ info.rti_info[RTAX_DST] = dst;
+ info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
+ info.rti_info[RTAX_NETMASK] = netmask;
+ error = rtrequest1_fib(cmd, &info, &rt, fibnum);
+ if (error == 0 && rt != NULL) {
+ /*
+ * notify any listening routing agents of the change
+ */
+ RT_LOCK(rt);
#ifdef RADIX_MPATH
- /*
- * in case address alias finds the first address
- * e.g. ifconfig bge0 192.103.54.246/24
- * e.g. ifconfig bge0 192.103.54.247/24
- * the address set in the route is 192.103.54.246
- * so we need to replace it with 192.103.54.247
- */
- if (memcmp(rt->rt_ifa->ifa_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
- IFAFREE(rt->rt_ifa);
- IFAREF(ifa);
- rt->rt_ifp = ifa->ifa_ifp;
- rt->rt_ifa = ifa;
- }
-#endif
- rt_newaddrmsg(cmd, ifa, error, rt);
- if (cmd == RTM_DELETE) {
/*
- * If we are deleting, and we found an entry, then
- * it's been removed from the tree.. now throw it away.
+ * in case address alias finds the first address
+ * e.g. ifconfig bge0 192.103.54.246/24
+ * e.g. ifconfig bge0 192.103.54.247/24
+ * the address set in the route is 192.103.54.246
+ * so we need to replace it with 192.103.54.247
*/
- RTFREE_LOCKED(rt);
- } else {
- if (cmd == RTM_ADD) {
+ if (memcmp(rt->rt_ifa->ifa_addr,
+ ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
+ IFAFREE(rt->rt_ifa);
+ IFAREF(ifa);
+ rt->rt_ifp = ifa->ifa_ifp;
+ rt->rt_ifa = ifa;
+ }
+#endif
+ rt_newaddrmsg(cmd, ifa, error, rt);
+ if (cmd == RTM_DELETE) {
/*
- * We just wanted to add it.. we don't actually
- * need a reference.
+ * If we are deleting, and we found an entry,
+ * then it's been removed from the tree..
+ * now throw it away.
*/
- RT_REMREF(rt);
+ RTFREE_LOCKED(rt);
+ } else {
+ if (cmd == RTM_ADD) {
+ /*
+ * We just wanted to add it..
+ * we don't actually need a reference.
+ */
+ RT_REMREF(rt);
+ }
+ RT_UNLOCK(rt);
}
- RT_UNLOCK(rt);
+ didwork = 1;
+ }
+ if (error)
+ a_failure = error;
+ }
+ if (cmd == RTM_DELETE) {
+ if (didwork) {
+ error = 0;
+ } else {
+ /* we only give an error if it wasn't in any table */
+ error = ((flags & RTF_HOST) ?
+ EHOSTUNREACH : ENETUNREACH);
+ }
+ } else {
+ if (a_failure) {
+ /* return an error if any of them failed */
+ error = a_failure;
}
}
- if (m)
- (void) m_free(m);
return (error);
}
+/* special one for inet internal use. may not use. */
+int
+rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
+{
+ return (rtinit1(ifa, cmd, flags, -1));
+}
+
+/*
+ * Set up a routing table entry, normally
+ * for an interface.
+ */
+int
+rtinit(struct ifaddr *ifa, int cmd, int flags)
+{
+ struct sockaddr *dst;
+ int fib = 0;
+
+ if (flags & RTF_HOST) {
+ dst = ifa->ifa_dstaddr;
+ } else {
+ dst = ifa->ifa_addr;
+ }
+
+ if (dst->sa_family == AF_INET)
+ fib = -1;
+ return (rtinit1(ifa, cmd, flags, fib));
+}
+
/*
* rt_check() is invoked on each layer 2 output path, prior to
* encapsulating outbound packets.
@@ -1399,6 +1652,7 @@ bad:
* final destination if directly reachable);
* *lrt0 points to the cached route to the final destination;
* *lrt is not meaningful;
+ * fibnum is the index to the correct network fib for this packet
*
* === Operation ===
* If the route is marked down try to find a new route. If the route
@@ -1415,6 +1669,13 @@ bad:
int
rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
{
+ return (rt_check_fib(lrt, lrt0, dst, 0));
+}
+
+int
+rt_check_fib(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst,
+ u_int fibnum)
+{
struct rtentry *rt;
struct rtentry *rt0;
int error;
@@ -1426,7 +1687,7 @@ rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
RT_LOCK(rt);
if ((rt->rt_flags & RTF_UP) == 0) {
RT_UNLOCK(rt);
- rt = rtalloc1(dst, 1, 0UL);
+ rt = rtalloc1_fib(dst, 1, 0UL, fibnum);
if (rt != NULL) {
RT_REMREF(rt);
/* XXX what about if change? */
@@ -1446,7 +1707,8 @@ rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
rt0->rt_gwroute = NULL;
lookup:
RT_UNLOCK(rt0);
- rt = rtalloc1(rt->rt_gateway, 1, 0UL);
+/* XXX MRT link level looked up in table 0 */
+ rt = rtalloc1_fib(rt->rt_gateway, 1, 0UL, 0);
if (rt == rt0) {
RT_REMREF(rt0);
RT_UNLOCK(rt0);
diff --git a/sys/net/route.h b/sys/net/route.h
index e9f4980..7b8c460 100644
--- a/sys/net/route.h
+++ b/sys/net/route.h
@@ -82,6 +82,10 @@ struct rt_metrics {
#define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */
#define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ))
+#define RT_MAXFIBS 16
+extern u_int rt_numfibs; /* number fo usable routing tables */
+extern u_int tunnel_fib; /* tunnels use these */
+extern u_int fwd_fib; /* packets being forwarded use these routes */
/*
* XXX kernel function pointer `rt_output' is visible to applications.
*/
@@ -120,6 +124,7 @@ struct rtentry {
caddr_t rt_llinfo; /* pointer to link level info cache */
struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */
struct rtentry *rt_parent; /* cloning parent of this route */
+ u_int rt_fibnum; /* which FIB */
#ifdef _KERNEL
/* XXX ugly, user apps use this definition but don't have a mtx def */
struct mtx rt_mtx; /* mutex for routing entry */
@@ -325,11 +330,10 @@ struct rt_addrinfo {
RTFREE_LOCKED(_rt); \
} while (0)
-extern struct radix_node_head *rt_tables[AF_MAX+1];
+extern struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1];
struct ifmultiaddr;
-int rt_getifa(struct rt_addrinfo *);
void rt_ieee80211msg(struct ifnet *, int, void *, size_t);
void rt_ifannouncemsg(struct ifnet *, int);
void rt_ifmsg(struct ifnet *);
@@ -350,11 +354,15 @@ int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
* RTFREE() uses an unlocked entry.
*/
+int rtexpunge(struct rtentry *);
+void rtfree(struct rtentry *);
+
+/* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */
+/* Thes are used by old code not yet converted to use multiple FIBS */
+int rt_getifa(struct rt_addrinfo *);
void rtalloc_ign(struct route *ro, u_long ignflags);
void rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */
struct rtentry *rtalloc1(struct sockaddr *, int, u_long);
-int rtexpunge(struct rtentry *);
-void rtfree(struct rtentry *);
int rtinit(struct ifaddr *, int, int);
int rtioctl(u_long, caddr_t);
void rtredirect(struct sockaddr *, struct sockaddr *,
@@ -364,6 +372,25 @@ int rtrequest(int, struct sockaddr *,
int rtrequest1(int, struct rt_addrinfo *, struct rtentry **);
int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *);
+/* defaults to "all" FIBs */
+int rtinit_fib(struct ifaddr *, int, int);
+
+/* XXX MRT NEW VERSIONS THAT USE FIBs
+ * For now the protocol indepedent versions are the same as the AF_INET ones
+ * but this will change..
+ */
+int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum);
+void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum);
+void rtalloc_fib(struct route *ro, u_int fibnum);
+struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int);
+int rtioctl_fib(u_long, caddr_t, u_int);
+void rtredirect_fib(struct sockaddr *, struct sockaddr *,
+ struct sockaddr *, int, struct sockaddr *, u_int);
+int rtrequest_fib(int, struct sockaddr *,
+ struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
+int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int);
+int rt_check_fib(struct rtentry **, struct rtentry **, struct sockaddr *, u_int);
+
#include <sys/eventhandler.h>
typedef void (*rtevent_arp_update_fn)(void *, struct rtentry *, uint8_t *, struct sockaddr *);
typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *);
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index 5ea93d3..9511035 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -182,6 +182,7 @@ rts_attach(struct socket *so, int proto, struct thread *td)
*/
s = splnet();
so->so_pcb = (caddr_t)rp;
+ so->so_fibnum = td->td_proc->p_fibnum;
error = raw_attach(so, proto);
rp = sotorawcb(so);
if (error) {
@@ -387,7 +388,8 @@ route_output(struct mbuf *m, struct socket *so)
if (info.rti_info[RTAX_GATEWAY] == NULL)
senderr(EINVAL);
saved_nrt = NULL;
- error = rtrequest1(RTM_ADD, &info, &saved_nrt);
+ error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
+ so->so_fibnum);
if (error == 0 && saved_nrt) {
RT_LOCK(saved_nrt);
rt_setmetrics(rtm->rtm_inits,
@@ -401,7 +403,8 @@ route_output(struct mbuf *m, struct socket *so)
case RTM_DELETE:
saved_nrt = NULL;
- error = rtrequest1(RTM_DELETE, &info, &saved_nrt);
+ error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
+ so->so_fibnum);
if (error == 0) {
RT_LOCK(saved_nrt);
rt = saved_nrt;
@@ -412,7 +415,7 @@ route_output(struct mbuf *m, struct socket *so)
case RTM_GET:
case RTM_CHANGE:
case RTM_LOCK:
- rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
+ rnh = rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family];
if (rnh == NULL)
senderr(EAFNOSUPPORT);
RADIX_NODE_HEAD_LOCK(rnh);
@@ -530,7 +533,8 @@ route_output(struct mbuf *m, struct socket *so)
!sa_equal(info.rti_info[RTAX_IFA],
rt->rt_ifa->ifa_addr))) {
RT_UNLOCK(rt);
- if ((error = rt_getifa(&info)) != 0)
+ if ((error = rt_getifa_fib(&info,
+ rt->rt_fibnum)) != 0)
senderr(error);
RT_LOCK(rt);
}
@@ -1278,7 +1282,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
} else /* dump only one table */
i = lim = af;
for (error = 0; error == 0 && i <= lim; i++)
- if ((rnh = rt_tables[i]) != NULL) {
+ if ((rnh = rt_tables[curthread->td_proc->p_fibnum][i]) != NULL) {
RADIX_NODE_HEAD_LOCK(rnh);
error = rnh->rnh_walktree(rnh,
sysctl_dumpentry, &w);
diff --git a/sys/netatalk/at_extern.h b/sys/netatalk/at_extern.h
index d81a877..cf11017 100644
--- a/sys/netatalk/at_extern.h
+++ b/sys/netatalk/at_extern.h
@@ -55,6 +55,7 @@ u_short at_cksum(struct mbuf *m, int skip);
int at_control(struct socket *so, u_long cmd, caddr_t data,
struct ifnet *ifp, struct thread *td);
struct at_ifaddr *at_ifawithnet(struct sockaddr_at *);
+int at_inithead(void**, int);
void ddp_init(void);
int ddp_output(struct mbuf *m, struct socket *so);
int ddp_route(struct mbuf *m, struct route *ro);
diff --git a/sys/netatalk/at_proto.c b/sys/netatalk/at_proto.c
index 39b0dd6..f62df59 100644
--- a/sys/netatalk/at_proto.c
+++ b/sys/netatalk/at_proto.c
@@ -56,7 +56,7 @@ static struct domain atalkdomain = {
.dom_name = "appletalk",
.dom_protosw = atalksw,
.dom_protoswNPROTOSW = &atalksw[sizeof(atalksw)/sizeof(atalksw[0])],
- .dom_rtattach = rn_inithead,
+ .dom_rtattach = at_inithead,
.dom_rtoffset = offsetof(struct sockaddr_at, sat_addr) << 3,
.dom_maxrtkey = sizeof(struct sockaddr_at),
};
diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c
index 0f981ff..51a87dd 100644
--- a/sys/netgraph/netflow/netflow.c
+++ b/sys/netgraph/netflow/netflow.c
@@ -269,7 +269,8 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r,
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
sin.sin_addr = fle->f.r.r_dst;
- rt = rtalloc1((struct sockaddr *)&sin, 0, RTF_CLONING);
+ /* XXX MRT 0 as a default.. need the m here to get fib */
+ rt = rtalloc1_fib((struct sockaddr *)&sin, 0, RTF_CLONING, 0);
if (rt != NULL) {
fle->f.fle_o_ifx = rt->rt_ifp->if_index;
@@ -293,7 +294,8 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r,
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
sin.sin_addr = fle->f.r.r_src;
- rt = rtalloc1((struct sockaddr *)&sin, 0, RTF_CLONING);
+ /* XXX MRT 0 as a default revisit. need the mbuf for fib*/
+ rt = rtalloc1_fib((struct sockaddr *)&sin, 0, RTF_CLONING, 0);
if (rt != NULL) {
if (rt_mask(rt))
fle->f.src_mask = bitcount32(((struct sockaddr_in *)
diff --git a/sys/netinet/if_atm.c b/sys/netinet/if_atm.c
index d19dea8..065f0c4 100644
--- a/sys/netinet/if_atm.c
+++ b/sys/netinet/if_atm.c
@@ -327,7 +327,7 @@ atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst,
}
if (rt == NULL) {
- rt = RTALLOC1(dst, 0);
+ rt = RTALLOC1(dst, 0); /* link level on table 0 XXX MRT */
if (rt == NULL)
goto bad; /* failed */
RT_REMREF(rt); /* don't keep LL references */
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index b1133c9..6939dbb 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -116,7 +116,7 @@ static void arprequest(struct ifnet *,
static void arpintr(struct mbuf *);
static void arptimer(void *);
static struct rtentry
- *arplookup(u_long, int, int);
+ *arplookup(u_long, int, int, int);
#ifdef INET
static void in_arpinput(struct mbuf *);
#endif
@@ -138,7 +138,8 @@ arptimer(void *arg)
*/
RT_UNLOCK(rt);
- rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL);
+ in_rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL,
+ rt->rt_fibnum);
}
/*
@@ -362,6 +363,7 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
struct rtentry *rt = NULL;
struct sockaddr_dl *sdl;
int error;
+ int fibnum = 0;
if (m) {
if (m->m_flags & M_BCAST) {
@@ -375,10 +377,14 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
return (0);
}
+ fibnum = M_GETFIB(m);
}
if (rt0 != NULL) {
- error = rt_check(&rt, &rt0, dst);
+ /* Look for a cached arp (ll) entry. */
+ if (m == NULL)
+ fibnum = rt0->rt_fibnum;
+ error = in_rt_check(&rt, &rt0, dst, fibnum);
if (error) {
m_freem(m);
return error;
@@ -389,10 +395,14 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
}
if (la == NULL) {
/*
- * We enter this block in case if rt0 was NULL,
- * or if rt found by rt_check() didn't have llinfo.
+ * We enter this block if rt0 was NULL,
+ * or if rt found by in_rt_check() didn't have llinfo.
+ * we should get a cloned route, which since it should
+ * come from the local interface should have a ll entry.
+ * if may be incoplete but that's ok.
+ * XXXMRT if we haven't found a fibnum is that OK?
*/
- rt = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0);
+ rt = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0, fibnum);
if (rt == NULL) {
log(LOG_DEBUG,
"arpresolve: can't allocate route for %s\n",
@@ -582,6 +592,9 @@ in_arpinput(struct mbuf *m)
int op, rif_len;
int req_len;
int bridged = 0;
+ u_int fibnum;
+ u_int goodfib = 0;
+ int firstpass = 1;
#ifdef DEV_CARP
int carp_match = 0;
#endif
@@ -674,133 +687,181 @@ match:
}
if (ifp->if_flags & IFF_STATICARP)
goto reply;
- rt = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0);
- if (rt != NULL) {
- sin.sin_addr.s_addr = isaddr.s_addr;
- EVENTHANDLER_INVOKE(route_arp_update_event, rt,
- ar_sha(ah), (struct sockaddr *)&sin);
+ /*
+ * We look for any FIBs that has this address to find
+ * the interface etc.
+ * For sanity checks that are FIB independent we abort the loop.
+ */
+ for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+ rt = arplookup(isaddr.s_addr,
+ itaddr.s_addr == myaddr.s_addr, 0, fibnum);
+ if (rt == NULL)
+ continue;
+
+ sdl = SDL(rt->rt_gateway);
+ /* Only call this once */
+ if (firstpass) {
+ sin.sin_addr.s_addr = isaddr.s_addr;
+ EVENTHANDLER_INVOKE(route_arp_update_event, rt,
+ ar_sha(ah), (struct sockaddr *)&sin);
+ }
la = (struct llinfo_arp *)rt->rt_llinfo;
if (la == NULL) {
RT_UNLOCK(rt);
- goto reply;
+ continue;
}
- } else
- goto reply;
- /* The following is not an error when doing bridging. */
- if (!bridged && rt->rt_ifp != ifp
+ if (firstpass) {
+ /* The following is not an error when doing bridging. */
+ if (!bridged && rt->rt_ifp != ifp
#ifdef DEV_CARP
- && (ifp->if_type != IFT_CARP || !carp_match)
+ && (ifp->if_type != IFT_CARP || !carp_match)
#endif
- ) {
- if (log_arp_wrong_iface)
- log(LOG_ERR, "arp: %s is on %s but got reply from %*D on %s\n",
- inet_ntoa(isaddr),
- rt->rt_ifp->if_xname,
- ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
- ifp->if_xname);
- RT_UNLOCK(rt);
- goto reply;
- }
- sdl = SDL(rt->rt_gateway);
- if (sdl->sdl_alen &&
- bcmp(ar_sha(ah), LLADDR(sdl), sdl->sdl_alen)) {
- if (rt->rt_expire) {
- if (log_arp_movements)
- log(LOG_INFO, "arp: %s moved from %*D to %*D on %s\n",
- inet_ntoa(isaddr),
- ifp->if_addrlen, (u_char *)LLADDR(sdl), ":",
- ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
- ifp->if_xname);
- } else {
- RT_UNLOCK(rt);
- if (log_arp_permanent_modify)
- log(LOG_ERR, "arp: %*D attempts to modify "
- "permanent entry for %s on %s\n",
- ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
- inet_ntoa(isaddr), ifp->if_xname);
- goto reply;
- }
- }
- /*
- * sanity check for the address length.
- * XXX this does not work for protocols with variable address
- * length. -is
- */
- if (sdl->sdl_alen &&
- sdl->sdl_alen != ah->ar_hln) {
- log(LOG_WARNING,
- "arp from %*D: new addr len %d, was %d",
- ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
- ah->ar_hln, sdl->sdl_alen);
- }
- if (ifp->if_addrlen != ah->ar_hln) {
- log(LOG_WARNING,
- "arp from %*D: addr len: new %d, i/f %d (ignored)",
- ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
- ah->ar_hln, ifp->if_addrlen);
- RT_UNLOCK(rt);
- goto reply;
- }
- (void)memcpy(LLADDR(sdl), ar_sha(ah),
- sdl->sdl_alen = ah->ar_hln);
- /*
- * If we receive an arp from a token-ring station over
- * a token-ring nic then try to save the source
- * routing info.
- */
- if (ifp->if_type == IFT_ISO88025) {
- struct iso88025_header *th = NULL;
- struct iso88025_sockaddr_dl_data *trld;
-
- th = (struct iso88025_header *)m->m_pkthdr.header;
- trld = SDL_ISO88025(sdl);
- rif_len = TR_RCF_RIFLEN(th->rcf);
- if ((th->iso88025_shost[0] & TR_RII) &&
- (rif_len > 2)) {
- trld->trld_rcf = th->rcf;
- trld->trld_rcf ^= htons(TR_RCF_DIR);
- memcpy(trld->trld_route, th->rd, rif_len - 2);
- trld->trld_rcf &= ~htons(TR_RCF_BCST_MASK);
+ ) {
+ if (log_arp_wrong_iface)
+ log(LOG_ERR, "arp: %s is on %s "
+ "but got reply from %*D "
+ "on %s\n",
+ inet_ntoa(isaddr),
+ rt->rt_ifp->if_xname,
+ ifp->if_addrlen,
+ (u_char *)ar_sha(ah), ":",
+ ifp->if_xname);
+ RT_UNLOCK(rt);
+ break;
+ }
+ if (sdl->sdl_alen &&
+ bcmp(ar_sha(ah), LLADDR(sdl), sdl->sdl_alen)) {
+ if (rt->rt_expire) {
+ if (log_arp_movements)
+ log(LOG_INFO,
+ "arp: %s moved from %*D to %*D "
+ "on %s\n",
+ inet_ntoa(isaddr),
+ ifp->if_addrlen,
+ (u_char *)LLADDR(sdl), ":",
+ ifp->if_addrlen,
+ (u_char *)ar_sha(ah), ":",
+ ifp->if_xname);
+ } else {
+ RT_UNLOCK(rt);
+ if (log_arp_permanent_modify)
+ log(LOG_ERR,
+ "arp: %*D attempts to "
+ "modify permanent entry "
+ "for %s on %s\n",
+ ifp->if_addrlen,
+ (u_char *)ar_sha(ah), ":",
+ inet_ntoa(isaddr),
+ ifp->if_xname);
+ break;
+ }
+ }
/*
- * Set up source routing information for
- * reply packet (XXX)
+ * sanity check for the address length.
+ * XXX this does not work for protocols
+ * with variable address length. -is
*/
- m->m_data -= rif_len;
- m->m_len += rif_len;
- m->m_pkthdr.len += rif_len;
- } else {
- th->iso88025_shost[0] &= ~TR_RII;
- trld->trld_rcf = 0;
+ if (sdl->sdl_alen &&
+ sdl->sdl_alen != ah->ar_hln) {
+ log(LOG_WARNING,
+ "arp from %*D: new addr len %d, was %d",
+ ifp->if_addrlen, (u_char *) ar_sha(ah),
+ ":", ah->ar_hln, sdl->sdl_alen);
+ }
+ if (ifp->if_addrlen != ah->ar_hln) {
+ log(LOG_WARNING,
+ "arp from %*D: addr len: "
+ "new %d, i/f %d (ignored)",
+ ifp->if_addrlen, (u_char *) ar_sha(ah),
+ ":", ah->ar_hln, ifp->if_addrlen);
+ RT_UNLOCK(rt);
+ break;
+ }
+ firstpass = 0;
+ goodfib = fibnum;
}
- m->m_data -= 8;
- m->m_len += 8;
- m->m_pkthdr.len += 8;
- th->rcf = trld->trld_rcf;
- }
- if (rt->rt_expire) {
- rt->rt_expire = time_uptime + arpt_keep;
- callout_reset(&la->la_timer, hz * arpt_keep, arptimer, rt);
- }
- la->la_asked = 0;
- la->la_preempt = arp_maxtries;
- hold = la->la_hold;
- la->la_hold = NULL;
- RT_UNLOCK(rt);
- if (hold != NULL)
- (*ifp->if_output)(ifp, hold, rt_key(rt), rt);
+ /* Copy in the information received. */
+ (void)memcpy(LLADDR(sdl), ar_sha(ah),
+ sdl->sdl_alen = ah->ar_hln);
+ /*
+ * If we receive an arp from a token-ring station over
+ * a token-ring nic then try to save the source routing info.
+ * XXXMRT Only minimal Token Ring support for MRT.
+ * Only do this on the first pass as if modifies the mbuf.
+ */
+ if (ifp->if_type == IFT_ISO88025) {
+ struct iso88025_header *th = NULL;
+ struct iso88025_sockaddr_dl_data *trld;
+
+ /* force the fib loop to end after this pass */
+ fibnum = rt_numfibs - 1;
+
+ th = (struct iso88025_header *)m->m_pkthdr.header;
+ trld = SDL_ISO88025(sdl);
+ rif_len = TR_RCF_RIFLEN(th->rcf);
+ if ((th->iso88025_shost[0] & TR_RII) &&
+ (rif_len > 2)) {
+ trld->trld_rcf = th->rcf;
+ trld->trld_rcf ^= htons(TR_RCF_DIR);
+ memcpy(trld->trld_route, th->rd, rif_len - 2);
+ trld->trld_rcf &= ~htons(TR_RCF_BCST_MASK);
+ /*
+ * Set up source routing information for
+ * reply packet (XXX)
+ */
+ m->m_data -= rif_len;
+ m->m_len += rif_len;
+ m->m_pkthdr.len += rif_len;
+ } else {
+ th->iso88025_shost[0] &= ~TR_RII;
+ trld->trld_rcf = 0;
+ }
+ m->m_data -= 8;
+ m->m_len += 8;
+ m->m_pkthdr.len += 8;
+ th->rcf = trld->trld_rcf;
+ }
+
+ if (rt->rt_expire) {
+ rt->rt_expire = time_uptime + arpt_keep;
+ callout_reset(&la->la_timer, hz * arpt_keep,
+ arptimer, rt);
+ }
+ la->la_asked = 0;
+ la->la_preempt = arp_maxtries;
+ hold = la->la_hold;
+ la->la_hold = NULL;
+ RT_UNLOCK(rt);
+ if (hold != NULL)
+ (*ifp->if_output)(ifp, hold, rt_key(rt), rt);
+ } /* end of FIB loop */
reply:
+
+ /*
+ * Decide if we have to respond to something.
+ */
if (op != ARPOP_REQUEST)
goto drop;
if (itaddr.s_addr == myaddr.s_addr) {
- /* I am the target */
+ /* Shortcut.. the receiving interface is the target. */
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
} else {
- rt = arplookup(itaddr.s_addr, 0, SIN_PROXY);
+ /* It's not asking for our address. But it still may
+ * be something we should answer.
+ *
+ * XXX MRT
+ * We assume that link level info is independent of
+ * the table used and so we use whichever we can and don't
+ * have a better option.
+ */
+ /* Have we been asked to proxy for the target. */
+ rt = arplookup(itaddr.s_addr, 0, SIN_PROXY, goodfib);
if (rt == NULL) {
+ /* Nope, only intersted now if proxying everything. */
struct sockaddr_in sin;
if (!arp_proxyall)
@@ -811,7 +872,8 @@ reply:
sin.sin_len = sizeof sin;
sin.sin_addr = itaddr;
- rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+ /* XXX MRT use table 0 for arp reply */
+ rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
if (!rt)
goto drop;
/*
@@ -835,7 +897,8 @@ reply:
*/
sin.sin_addr = isaddr;
- rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+ /* XXX MRT use table 0 for arp checks */
+ rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
if (!rt)
goto drop;
if (rt->rt_ifp != ifp) {
@@ -905,7 +968,7 @@ drop:
* Lookup or enter a new address in arptab.
*/
static struct rtentry *
-arplookup(u_long addr, int create, int proxy)
+arplookup(u_long addr, int create, int proxy, int fibnum)
{
struct rtentry *rt;
struct sockaddr_inarp sin;
@@ -917,7 +980,7 @@ arplookup(u_long addr, int create, int proxy)
sin.sin_addr.s_addr = addr;
if (proxy)
sin.sin_other = SIN_PROXY;
- rt = rtalloc1((struct sockaddr *)&sin, create, 0UL);
+ rt = in_rtalloc1((struct sockaddr *)&sin, create, 0UL, fibnum);
if (rt == 0)
return (0);
diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c
index 69a34f8..55b4ec7 100644
--- a/sys/netinet/in_gif.c
+++ b/sys/netinet/in_gif.c
@@ -191,6 +191,8 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
}
bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip));
+ M_SETFIB(m, sc->gif_fibnum);
+
if (dst->sin_family != sin_dst->sin_family ||
dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) {
/* cache route doesn't match */
@@ -208,7 +210,7 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
}
if (sc->gif_ro.ro_rt == NULL) {
- rtalloc_ign(&sc->gif_ro, 0);
+ in_rtalloc_ign(&sc->gif_ro, 0, sc->gif_fibnum);
if (sc->gif_ro.ro_rt == NULL) {
m_freem(m);
return ENETUNREACH;
@@ -368,7 +370,9 @@ gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp)
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_addr = ip->ip_src;
- rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+ /* XXX MRT check for the interface we would use on output */
+ rt = in_rtalloc1((struct sockaddr *)&sin, 0,
+ 0UL, sc->gif_fibnum);
if (!rt || rt->rt_ifp != ifp) {
#if 0
log(LOG_WARNING, "%s: packet from 0x%x dropped "
diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c
index be2208a..9f37f33 100644
--- a/sys/netinet/in_mcast.c
+++ b/sys/netinet/in_mcast.c
@@ -1025,7 +1025,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
ro.ro_rt = NULL;
*(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
- rtalloc_ign(&ro, RTF_CLONING);
+ in_rtalloc_ign(&ro, RTF_CLONING,
+ inp->inp_inc.inc_fibnum);
if (ro.ro_rt != NULL) {
ifp = ro.ro_rt->rt_ifp;
KASSERT(ifp != NULL, ("%s: null ifp",
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 9b0b6a5..a9702c5 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -186,6 +186,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
bzero(inp, inp_zero_size);
inp->inp_pcbinfo = pcbinfo;
inp->inp_socket = so;
+ inp->inp_inc.inc_fibnum = so->so_fibnum;
#ifdef MAC
error = mac_inpcb_init(inp, M_NOWAIT);
if (error != 0)
@@ -605,7 +606,7 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
* Find out route to destination
*/
if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
- ia = ip_rtaddr(faddr);
+ ia = ip_rtaddr(faddr, inp->inp_inc.inc_fibnum);
/*
* If we found a route, use the address corresponding to
* the outgoing interface.
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index afb4dd2..6e5c92e 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -101,7 +101,7 @@ struct in_endpoints {
struct in_conninfo {
u_int8_t inc_flags;
u_int8_t inc_len;
- u_int16_t inc_pad; /* XXX alignment for in_endpoints */
+ u_int16_t inc_fibnum; /* XXX was pad, 16 bits is plenty */
/* protocol dependent part */
struct in_endpoints inc_ie;
};
diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c
index 8a5f978..aabf57e 100644
--- a/sys/netinet/in_rmx.c
+++ b/sys/netinet/in_rmx.c
@@ -110,7 +110,8 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
* Find out if it is because of an
* ARP entry and delete it if so.
*/
- rt2 = rtalloc1((struct sockaddr *)sin, 0, RTF_CLONING);
+ rt2 = in_rtalloc1((struct sockaddr *)sin, 0,
+ RTF_CLONING, rt->rt_fibnum);
if (rt2) {
if (rt2->rt_flags & RTF_LLINFO &&
rt2->rt_flags & RTF_HOST &&
@@ -225,10 +226,10 @@ in_rtqkill(struct radix_node *rn, void *rock)
if (rt->rt_refcnt > 0)
panic("rtqkill route really not free");
- err = rtrequest(RTM_DELETE,
+ err = in_rtrequest(RTM_DELETE,
(struct sockaddr *)rt_key(rt),
rt->rt_gateway, rt_mask(rt),
- rt->rt_flags, 0);
+ rt->rt_flags, 0, rt->rt_fibnum);
if (err) {
log(LOG_WARNING, "in_rtqkill: error %d\n", err);
} else {
@@ -253,12 +254,31 @@ in_rtqkill(struct radix_node *rn, void *rock)
static int rtq_timeout = RTQ_TIMEOUT;
static struct callout rtq_timer;
+static void in_rtqtimo_one(void *rock);
+
static void
in_rtqtimo(void *rock)
{
+ int fibnum;
+ void *newrock;
+ struct timeval atv;
+
+ KASSERT((rock == (void *)rt_tables[0][AF_INET]),
+ ("in_rtqtimo: unexpected arg"));
+ for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+ if ((newrock = rt_tables[fibnum][AF_INET]) != NULL)
+ in_rtqtimo_one(newrock);
+ }
+ atv.tv_usec = 0;
+ atv.tv_sec = rtq_timeout;
+ callout_reset(&rtq_timer, tvtohz(&atv), in_rtqtimo, rock);
+}
+
+static void
+in_rtqtimo_one(void *rock)
+{
struct radix_node_head *rnh = rock;
struct rtqk_arg arg;
- struct timeval atv;
static time_t last_adjusted_timeout = 0;
arg.found = arg.killed = 0;
@@ -297,27 +317,29 @@ in_rtqtimo(void *rock)
RADIX_NODE_HEAD_UNLOCK(rnh);
}
- atv.tv_usec = 0;
- atv.tv_sec = arg.nextstop - time_uptime;
- callout_reset(&rtq_timer, tvtohz(&atv), in_rtqtimo, rock);
}
void
in_rtqdrain(void)
{
- struct radix_node_head *rnh = rt_tables[AF_INET];
+ struct radix_node_head *rnh;
struct rtqk_arg arg;
+ int fibnum;
- arg.found = arg.killed = 0;
- arg.rnh = rnh;
- arg.nextstop = 0;
- arg.draining = 1;
- arg.updating = 0;
- RADIX_NODE_HEAD_LOCK(rnh);
- rnh->rnh_walktree(rnh, in_rtqkill, &arg);
- RADIX_NODE_HEAD_UNLOCK(rnh);
+ for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+ rnh = rt_tables[fibnum][AF_INET];
+ arg.found = arg.killed = 0;
+ arg.rnh = rnh;
+ arg.nextstop = 0;
+ arg.draining = 1;
+ arg.updating = 0;
+ RADIX_NODE_HEAD_LOCK(rnh);
+ rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ }
}
+static int _in_rt_was_here;
/*
* Initialize our routing tree.
*/
@@ -326,18 +348,29 @@ in_inithead(void **head, int off)
{
struct radix_node_head *rnh;
- if (!rn_inithead(head, off))
+ /* XXX MRT
+ * This can be called from vfs_export.c too in which case 'off'
+ * will be 0. We know the correct value so just use that and
+ * return directly if it was 0.
+ * This is a hack that replaces an even worse hack on a bad hack
+ * on a bad design. After RELENG_7 this should be fixed but that
+ * will change the ABI, so for now do it this way.
+ */
+ if (!rn_inithead(head, 32))
return 0;
- if (head != (void **)&rt_tables[AF_INET]) /* BOGUS! */
- return 1; /* only do this for the real routing table */
+ if (off == 0) /* XXX MRT see above */
+ return 1; /* only do the rest for a real routing table */
rnh = *head;
rnh->rnh_addaddr = in_addroute;
rnh->rnh_matchaddr = in_matroute;
rnh->rnh_close = in_clsroute;
- callout_init(&rtq_timer, CALLOUT_MPSAFE);
- in_rtqtimo(rnh); /* kick off timeout first time */
+ if (_in_rt_was_here == 0 ) {
+ callout_init(&rtq_timer, CALLOUT_MPSAFE);
+ in_rtqtimo(rnh); /* kick off timeout first time */
+ _in_rt_was_here = 1;
+ }
return 1;
}
@@ -384,16 +417,81 @@ in_ifadown(struct ifaddr *ifa, int delete)
{
struct in_ifadown_arg arg;
struct radix_node_head *rnh;
+ int fibnum;
if (ifa->ifa_addr->sa_family != AF_INET)
return 1;
- rnh = rt_tables[AF_INET];
- arg.ifa = ifa;
- arg.del = delete;
- RADIX_NODE_HEAD_LOCK(rnh);
- rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- ifa->ifa_flags &= ~IFA_ROUTE; /* XXXlocking? */
+ for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+ rnh = rt_tables[fibnum][AF_INET];
+ arg.ifa = ifa;
+ arg.del = delete;
+ RADIX_NODE_HEAD_LOCK(rnh);
+ rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ ifa->ifa_flags &= ~IFA_ROUTE; /* XXXlocking? */
+ }
return 0;
}
+
+/*
+ * inet versions of rt functions. These have fib extensions and
+ * for now will just reference the _fib variants.
+ * eventually this order will be reversed,
+ */
+void
+in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum)
+{
+ rtalloc_ign_fib(ro, ignflags, fibnum);
+}
+
+int
+in_rtrequest( int req,
+ struct sockaddr *dst,
+ struct sockaddr *gateway,
+ struct sockaddr *netmask,
+ int flags,
+ struct rtentry **ret_nrt,
+ u_int fibnum)
+{
+ return (rtrequest_fib(req, dst, gateway, netmask,
+ flags, ret_nrt, fibnum));
+}
+
+struct rtentry *
+in_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum)
+{
+ return (rtalloc1_fib(dst, report, ignflags, fibnum));
+}
+
+int
+in_rt_check(struct rtentry **lrt, struct rtentry **lrt0,
+ struct sockaddr *dst, u_int fibnum)
+{
+ return (rt_check_fib(lrt, lrt0, dst, fibnum));
+}
+
+void
+in_rtredirect(struct sockaddr *dst,
+ struct sockaddr *gateway,
+ struct sockaddr *netmask,
+ int flags,
+ struct sockaddr *src,
+ u_int fibnum)
+{
+ rtredirect_fib(dst, gateway, netmask, flags, src, fibnum);
+}
+
+void
+in_rtalloc(struct route *ro, u_int fibnum)
+{
+ rtalloc_ign_fib(ro, 0UL, fibnum);
+}
+
+#if 0
+int in_rt_getifa(struct rt_addrinfo *, u_int fibnum);
+int in_rtioctl(u_long, caddr_t, u_int);
+int in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int);
+#endif
+
+
diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h
index 47a160a..d7f1e28 100644
--- a/sys/netinet/in_var.h
+++ b/sys/netinet/in_var.h
@@ -287,6 +287,7 @@ do { \
IN_NEXT_MULTI((step), (inm)); \
} while(0)
+struct rtentry;
struct route;
struct ip_moptions;
@@ -305,6 +306,21 @@ int in_ifadown(struct ifaddr *ifa, int);
void in_ifscrub(struct ifnet *, struct in_ifaddr *);
struct mbuf *ip_fastforward(struct mbuf *);
+/* XXX */
+void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum);
+void in_rtalloc(struct route *ro, u_int fibnum);
+struct rtentry *in_rtalloc1(struct sockaddr *, int, u_long, u_int);
+void in_rtredirect(struct sockaddr *, struct sockaddr *,
+ struct sockaddr *, int, struct sockaddr *, u_int);
+int in_rtrequest(int, struct sockaddr *,
+ struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
+int in_rt_check(struct rtentry **, struct rtentry **, struct sockaddr *, u_int);
+
+#if 0
+int in_rt_getifa(struct rt_addrinfo *, u_int fibnum);
+int in_rtioctl(u_long, caddr_t, u_int);
+int in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int);
+#endif
#endif /* _KERNEL */
/* INET6 stuff */
diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c
index 97b823f..bb8c74a 100644
--- a/sys/netinet/ip_fastfwd.c
+++ b/sys/netinet/ip_fastfwd.c
@@ -123,7 +123,7 @@ ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr.s_addr = dest.s_addr;
- rtalloc_ign(ro, RTF_CLONING);
+ in_rtalloc_ign(ro, RTF_CLONING, M_GETFIB(m));
/*
* Route there and interface still up?
diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h
index b41c037..5dcdbb3 100644
--- a/sys/netinet/ip_fw.h
+++ b/sys/netinet/ip_fw.h
@@ -161,6 +161,9 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
O_TAG, /* arg1=tag number */
O_TAGGED, /* arg1=tag number */
+ O_SETFIB, /* arg1=FIB number */
+ O_FIB, /* arg1=FIB desired fib number */
+
O_LAST_OPCODE /* not an opcode! */
};
@@ -465,6 +468,7 @@ struct ipfw_flow_id {
u_int32_t src_ip;
u_int16_t dst_port;
u_int16_t src_port;
+ u_int8_t fib;
u_int8_t proto;
u_int8_t flags; /* protocol-specific flags */
uint8_t addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */
diff --git a/sys/netinet/ip_fw2.c b/sys/netinet/ip_fw2.c
index 39baa71..2346df6 100644
--- a/sys/netinet/ip_fw2.c
+++ b/sys/netinet/ip_fw2.c
@@ -492,7 +492,7 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
* multicast, or broadcast.
*/
static int
-verify_path(struct in_addr src, struct ifnet *ifp)
+verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
{
struct route ro;
struct sockaddr_in *dst;
@@ -503,7 +503,7 @@ verify_path(struct in_addr src, struct ifnet *ifp)
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr = src;
- rtalloc_ign(&ro, RTF_CLONING);
+ in_rtalloc_ign(&ro, RTF_CLONING, fib);
if (ro.ro_rt == NULL)
return 0;
@@ -593,6 +593,7 @@ verify_path6(struct in6_addr *src, struct ifnet *ifp)
dst->sin6_family = AF_INET6;
dst->sin6_len = sizeof(*dst);
dst->sin6_addr = *src;
+ /* XXX MRT 0 for ipv6 at this time */
rtalloc_ign((struct route *)&ro, RTF_CLONING);
if (ro.ro_rt == NULL)
@@ -828,6 +829,10 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
snprintf(SNPARGS(action2, 0), "Tee %d",
cmd->arg1);
break;
+ case O_SETFIB:
+ snprintf(SNPARGS(action2, 0), "SetFib %d",
+ cmd->arg1);
+ break;
case O_SKIPTO:
snprintf(SNPARGS(action2, 0), "SkipTo %d",
cmd->arg1);
@@ -1500,6 +1505,7 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0;
id.proto = args->f_id.proto;
id.addr_type = args->f_id.addr_type;
+ id.fib = M_GETFIB(args->m);
if (IS_IP6_FLOW_ID (&(args->f_id))) {
if (limit_mask & DYN_SRC_ADDR)
@@ -1601,6 +1607,7 @@ send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
return (NULL);
m->m_pkthdr.rcvif = (struct ifnet *)0;
+ M_SETFIB(m, id->fib);
#ifdef MAC
if (replyto != NULL)
mac_netinet_firewall_reply(replyto, m);
@@ -2200,6 +2207,7 @@ ipfw_chk(struct ip_fw_args *args)
return (IP_FW_PASS); /* accept */
pktlen = m->m_pkthdr.len;
+ args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */
proto = args->f_id.proto = 0; /* mark f_id invalid */
/* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
@@ -2911,7 +2919,8 @@ check_body:
verify_path6(&(args->f_id.src_ip6),
m->m_pkthdr.rcvif) :
#endif
- verify_path(src_ip, m->m_pkthdr.rcvif)));
+ verify_path(src_ip, m->m_pkthdr.rcvif,
+ args->f_id.fib)));
break;
case O_VERSRCREACH:
@@ -2922,7 +2931,7 @@ check_body:
verify_path6(&(args->f_id.src_ip6),
NULL) :
#endif
- verify_path(src_ip, NULL)));
+ verify_path(src_ip, NULL, args->f_id.fib)));
break;
case O_ANTISPOOF:
@@ -2941,7 +2950,8 @@ check_body:
m->m_pkthdr.rcvif) :
#endif
verify_path(src_ip,
- m->m_pkthdr.rcvif);
+ m->m_pkthdr.rcvif,
+ args->f_id.fib);
else
match = 1;
break;
@@ -3043,6 +3053,11 @@ check_body:
break;
}
+ case O_FIB: /* try match the specified fib */
+ if (args->f_id.fib == cmd->arg1)
+ match = 1;
+ break;
+
case O_TAGGED: {
uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
tablearg : cmd->arg1;
@@ -3203,7 +3218,6 @@ check_body:
IP_FW_DIVERT : IP_FW_TEE;
goto done;
}
-
case O_COUNT:
case O_SKIPTO:
f->pcnt++; /* update stats */
@@ -3283,6 +3297,14 @@ check_body:
IP_FW_NETGRAPH : IP_FW_NGTEE;
goto done;
+ case O_SETFIB:
+ f->pcnt++; /* update stats */
+ f->bcnt += pktlen;
+ f->timestamp = time_uptime;
+ M_SETFIB(m, cmd->arg1);
+ args->f_id.fib = cmd->arg1;
+ goto next_rule;
+
case O_NAT: {
struct cfg_nat *t;
int nat_id;
@@ -3793,6 +3815,26 @@ check_ipfw_struct(struct ip_fw *rule, int size)
goto bad_size;
break;
+ case O_FIB:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn))
+ goto bad_size;
+ if (cmd->arg1 >= rt_numfibs) {
+ printf("ipfw: invalid fib number %d\n",
+ cmd->arg1);
+ return EINVAL;
+ }
+ break;
+
+ case O_SETFIB:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn))
+ goto bad_size;
+ if (cmd->arg1 >= rt_numfibs) {
+ printf("ipfw: invalid fib number %d\n",
+ cmd->arg1);
+ return EINVAL;
+ }
+ goto check_action;
+
case O_UID:
case O_GID:
case O_JAIL:
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
index 4f664bf..bed9536 100644
--- a/sys/netinet/ip_icmp.c
+++ b/sys/netinet/ip_icmp.c
@@ -227,6 +227,10 @@ stdreply: icmpelen = max(8, min(icmp_quotelen, oip->ip_len - oiphlen));
m_align(m, ICMP_MINLEN + icmplen);
m->m_len = ICMP_MINLEN + icmplen;
+ /* XXX MRT make the outgoing packet use the same FIB
+ * that was associated with the incoming packet
+ */
+ M_SETFIB(m, M_GETFIB(n));
icp = mtod(m, struct icmp *);
icmpstat.icps_outhist[type]++;
icp->icmp_type = type;
@@ -295,6 +299,7 @@ icmp_input(struct mbuf *m, int off)
int icmplen = ip->ip_len;
int i, code;
void (*ctlfunc)(int, struct sockaddr *, void *);
+ int fibnum;
/*
* Locate icmp structure in mbuf, and check
@@ -576,10 +581,12 @@ reflect:
}
#endif
icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
- rtredirect((struct sockaddr *)&icmpsrc,
- (struct sockaddr *)&icmpdst,
- (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
- (struct sockaddr *)&icmpgw);
+ for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+ in_rtredirect((struct sockaddr *)&icmpsrc,
+ (struct sockaddr *)&icmpdst,
+ (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
+ (struct sockaddr *)&icmpgw, fibnum);
+ }
pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
#ifdef IPSEC
key_sa_routechange((struct sockaddr *)&icmpsrc);
@@ -693,7 +700,7 @@ icmp_reflect(struct mbuf *m)
* When we don't have a route back to the packet source, stop here
* and drop the packet.
*/
- ia = ip_rtaddr(ip->ip_dst);
+ ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
if (ia == NULL) {
m_freem(m);
icmpstat.icps_noroute++;
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 1eb9e4a..93ba871 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1198,7 +1198,7 @@ ipproto_unregister(u_char ipproto)
* return internet address info of interface to be used to get there.
*/
struct in_ifaddr *
-ip_rtaddr(struct in_addr dst)
+ip_rtaddr(struct in_addr dst, u_int fibnum)
{
struct route sro;
struct sockaddr_in *sin;
@@ -1209,7 +1209,7 @@ ip_rtaddr(struct in_addr dst)
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
sin->sin_addr = dst;
- rtalloc_ign(&sro, RTF_CLONING);
+ in_rtalloc_ign(&sro, RTF_CLONING, fibnum);
if (sro.ro_rt == NULL)
return (NULL);
@@ -1269,7 +1269,7 @@ ip_forward(struct mbuf *m, int srcrt)
}
#endif
- ia = ip_rtaddr(ip->ip_dst);
+ ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
if (!srcrt && ia == NULL) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
return;
@@ -1334,7 +1334,7 @@ ip_forward(struct mbuf *m, int srcrt)
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
sin->sin_addr = ip->ip_dst;
- rtalloc_ign(&ro, RTF_CLONING);
+ in_rtalloc_ign(&ro, RTF_CLONING, M_GETFIB(m));
rt = ro.ro_rt;
@@ -1363,7 +1363,7 @@ ip_forward(struct mbuf *m, int srcrt)
* the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191.
*/
bzero(&ro, sizeof(ro));
- rtalloc_ign(&ro, RTF_CLONING);
+ rtalloc_ign_fib(&ro, RTF_CLONING, M_GETFIB(m));
error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
index 6e0e124..d60e8bd 100644
--- a/sys/netinet/ip_mroute.c
+++ b/sys/netinet/ip_mroute.c
@@ -303,7 +303,7 @@ static int X_ip_mrouter_done(void);
static int X_ip_mrouter_get(struct socket *so, struct sockopt *m);
static int X_ip_mrouter_set(struct socket *so, struct sockopt *m);
static int X_legal_vif_num(int vif);
-static int X_mrt_ioctl(int cmd, caddr_t data);
+static int X_mrt_ioctl(int cmd, caddr_t data, int fibnum);
static int get_sg_cnt(struct sioc_sg_req *);
static int get_vif_cnt(struct sioc_vif_req *);
@@ -552,7 +552,7 @@ X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
* Handle ioctl commands to obtain information from the cache
*/
static int
-X_mrt_ioctl(int cmd, caddr_t data)
+X_mrt_ioctl(int cmd, caddr_t data, int fibnum)
{
int error = 0;
diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h
index c756d84..4043e44 100644
--- a/sys/netinet/ip_mroute.h
+++ b/sys/netinet/ip_mroute.h
@@ -359,7 +359,7 @@ struct sockopt;
extern int (*ip_mrouter_set)(struct socket *, struct sockopt *);
extern int (*ip_mrouter_get)(struct socket *, struct sockopt *);
extern int (*ip_mrouter_done)(void);
-extern int (*mrt_ioctl)(int, caddr_t);
+extern int (*mrt_ioctl)(int, caddr_t, int);
#endif /* _KERNEL */
diff --git a/sys/netinet/ip_options.c b/sys/netinet/ip_options.c
index 72b6edd..0019f7a 100644
--- a/sys/netinet/ip_options.c
+++ b/sys/netinet/ip_options.c
@@ -233,7 +233,8 @@ dropit:
if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
ia = (INA)ifa_ifwithnet((SA)&ipaddr);
} else
- ia = ip_rtaddr(ipaddr.sin_addr);
+/* XXX MRT 0 for routing */
+ ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m));
if (ia == NULL) {
type = ICMP_UNREACH;
code = ICMP_UNREACH_SRCFAIL;
@@ -276,7 +277,7 @@ dropit:
* same).
*/
if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
- (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
+ (ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m))) == NULL) {
type = ICMP_UNREACH;
code = ICMP_UNREACH_HOST;
goto bad;
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 37995ef..231510a 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -230,10 +230,12 @@ again:
*/
if (ro->ro_rt == NULL)
#ifdef RADIX_MPATH
- rtalloc_mpath(ro,
- ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr));
+ rtalloc_mpath_fib(ro,
+ ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
+ inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
#else
- rtalloc_ign(ro, 0);
+ in_rtalloc_ign(ro, 0,
+ inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
#endif
if (ro->ro_rt == NULL) {
ipstat.ips_noroute++;
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index eef4e1f..436a4a0 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -209,7 +209,7 @@ int ipproto_unregister(u_char);
struct mbuf *
ip_reass(struct mbuf *);
struct in_ifaddr *
- ip_rtaddr(struct in_addr);
+ ip_rtaddr(struct in_addr, u_int fibnum);
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
void ip_slowtimo(void);
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 23ab1fe..2e9366f 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -95,7 +95,7 @@ int (*ip_mrouter_get)(struct socket *, struct sockopt *);
int (*ip_mrouter_done)(void);
int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
struct ip_moptions *);
-int (*mrt_ioctl)(int, caddr_t);
+int (*mrt_ioctl)(int, caddr_t, int);
int (*legal_vif_num)(int);
u_long (*ip_mcast_src)(int);
diff --git a/sys/netinet/sctp_os_bsd.h b/sys/netinet/sctp_os_bsd.h
index b165943..01c0fcb 100644
--- a/sys/netinet/sctp_os_bsd.h
+++ b/sys/netinet/sctp_os_bsd.h
@@ -399,7 +399,7 @@ typedef struct callout sctp_os_timer_t;
typedef struct route sctp_route_t;
typedef struct rtentry sctp_rtentry_t;
-#define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL)
+#define SCTP_RTALLOC(ro, vrf_id) in_rtalloc_ign((struct route *)ro, 0UL, vrf_id)
/* Future zero copy wakeup/send function */
#define SCTP_ZERO_COPY_EVENT(inp, so)
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index a344ae5..47763c1 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -453,6 +453,7 @@ findpcb:
/*
* If the INPCB does not exist then all data in the incoming
* segment is discarded and an appropriate RST is sent back.
+ * XXX MRT Send RST using which routing table?
*/
if (inp == NULL) {
/*
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index aaac6d6..36422197 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -471,6 +471,10 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
flags = TH_ACK;
} else {
+ /*
+ * reuse the mbuf.
+ * XXX MRT We inherrit the FIB, which is lucky.
+ */
m_freem(m->m_next);
m->m_next = NULL;
m->m_data = (caddr_t)ipgen;
@@ -1199,6 +1203,8 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
bzero(&inc, sizeof(inc));
inc.inc_flags = 0; /* IPv4 */
inc.inc_faddr = faddr;
+ inc.inc_fibnum =
+ inp->inp_inc.inc_fibnum;
mtu = ntohs(icp->icmp_nextmtu);
/*
@@ -1595,7 +1601,7 @@ tcp_maxmtu(struct in_conninfo *inc, int *flags)
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr = inc->inc_faddr;
- rtalloc_ign(&sro, RTF_CLONING);
+ in_rtalloc_ign(&sro, RTF_CLONING, inc->inc_fibnum);
}
if (sro.ro_rt != NULL) {
ifp = sro.ro_rt->rt_ifp;
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index d5694f3..e19f095 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -671,6 +671,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
#endif
inp = sotoinpcb(so);
+ inp->inp_inc.inc_fibnum = sc->sc_inc.inc_fibnum;
+ so->so_fibnum = sc->sc_inc.inc_fibnum;
INP_WLOCK(inp);
/* Insert new socket into PCB hash list. */
@@ -941,6 +943,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
else
tcpstat.tcps_sc_completed++;
+/* how do we find the inp for the new socket? */
if (sc != &scs)
syncache_free(sc);
return (1);
@@ -1127,6 +1130,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc->sc_label = maclabel;
#endif
sc->sc_ipopts = ipopts;
+ sc->sc_inc.inc_fibnum = inp->inp_inc.inc_fibnum;
bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
#ifdef INET6
if (!inc->inc_isipv6)
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index bcb634f..c24ca20 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -1708,7 +1708,8 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
rtp = &rt;
}
- error = rtrequest(RTM_ADD, (struct sockaddr *)&ia->ia_dstaddr,
+ error = rtrequest(RTM_ADD,
+ (struct sockaddr *)&ia->ia_dstaddr,
(struct sockaddr *)&ia->ia_addr,
(struct sockaddr *)&ia->ia_prefixmask,
ia->ia_flags | rtflags, rtp);
diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c
index 6962deb..29dca4f 100644
--- a/sys/netinet6/in6_ifattach.c
+++ b/sys/netinet6/in6_ifattach.c
@@ -821,15 +821,15 @@ in6_ifdetach(struct ifnet *ifp)
/* XXX: should not fail */
return;
/* XXX grab lock first to avoid LOR */
- if (rt_tables[AF_INET6] != NULL) {
- RADIX_NODE_HEAD_LOCK(rt_tables[AF_INET6]);
+ if (rt_tables[0][AF_INET6] != NULL) {
+ RADIX_NODE_HEAD_LOCK(rt_tables[0][AF_INET6]);
rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
if (rt) {
if (rt->rt_ifp == ifp)
rtexpunge(rt);
RTFREE_LOCKED(rt);
}
- RADIX_NODE_HEAD_UNLOCK(rt_tables[AF_INET6]);
+ RADIX_NODE_HEAD_UNLOCK(rt_tables[0][AF_INET6]);
}
}
diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c
index ea4c7ff..1c0164f 100644
--- a/sys/netinet6/in6_rmx.c
+++ b/sys/netinet6/in6_rmx.c
@@ -449,17 +449,21 @@ in6_rtqdrain(void)
/*
* Initialize our routing tree.
+ * XXX MRT When off == 0, we are being called from vfs_export.c
+ * so just set up their table and leave. (we know what the correct
+ * value should be so just use that).. FIX AFTER RELENG_7 is MFC'd
+ * see also comments in in_inithead() vfs_export.c and domain.h
*/
int
in6_inithead(void **head, int off)
{
struct radix_node_head *rnh;
- if (!rn_inithead(head, off))
- return 0;
+ if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3))
+ return 0; /* See above */
- if (head != (void **)&rt_tables[AF_INET6]) /* BOGUS! */
- return 1; /* only do this for the real routing table */
+ if (off == 0) /* See above */
+ return 1; /* only do the rest for the real thing */
rnh = *head;
rnh->rnh_addaddr = in6_addroute;
diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c
index 08f9a83..39f5382 100644
--- a/sys/netinet6/nd6_rtr.c
+++ b/sys/netinet6/nd6_rtr.c
@@ -2019,7 +2019,8 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
void
rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
{
- struct radix_node_head *rnh = rt_tables[AF_INET6];
+
+ struct radix_node_head *rnh = rt_tables[0][AF_INET6];
int s = splnet();
/* We'll care only link-local addresses */
diff --git a/sys/netipx/ipx_proto.c b/sys/netipx/ipx_proto.c
index a762ea7..56d091a 100644
--- a/sys/netipx/ipx_proto.c
+++ b/sys/netipx/ipx_proto.c
@@ -131,16 +131,26 @@ static struct protosw ipxsw[] = {
},
};
+extern int ipx_inithead(void **, int);
+
static struct domain ipxdomain = {
.dom_family = AF_IPX,
.dom_name = "network systems",
.dom_protosw = ipxsw,
.dom_protoswNPROTOSW = &ipxsw[sizeof(ipxsw)/sizeof(ipxsw[0])],
- .dom_rtattach = rn_inithead,
+ .dom_rtattach = ipx_inithead,
.dom_rtoffset = 16,
.dom_maxrtkey = sizeof(struct sockaddr_ipx)
};
+
+/* shim to adapt arguments */
+int
+ipx_inithead(void **head, int offset)
+{
+ return rn_inithead(head, offset);
+}
+
DOMAIN_SET(ipx);
SYSCTL_NODE(_net, PF_IPX, ipx, CTLFLAG_RW, 0,
"IPX/SPX");
diff --git a/sys/nfs4client/nfs4_vfsops.c b/sys/nfs4client/nfs4_vfsops.c
index 6de47a9..2531f66 100644
--- a/sys/nfs4client/nfs4_vfsops.c
+++ b/sys/nfs4client/nfs4_vfsops.c
@@ -812,7 +812,8 @@ nfs4_do_setclientid(struct nfsmount *nmp, struct ucred *cred)
#ifdef NFS4_USE_RPCCLNT
ro.ro_dst = *nmp->nm_rpcclnt.rc_name;
#endif
- rtalloc(&ro);
+/* XXX MRT NFS uses table 0 */
+ in_rtalloc(&ro, 0);
if (ro.ro_rt == NULL) {
error = EHOSTUNREACH;
goto nfsmout;
diff --git a/sys/nfsclient/bootp_subr.c b/sys/nfsclient/bootp_subr.c
index 44d4d97..a2c09c5 100644
--- a/sys/nfsclient/bootp_subr.c
+++ b/sys/nfsclient/bootp_subr.c
@@ -1137,11 +1137,12 @@ bootpc_adjust_interface(struct bootpc_ifcontext *ifctx,
if (ifctx->gotgw != 0 || gctx->gotgw == 0) {
clear_sinaddr(&defdst);
clear_sinaddr(&defmask);
- error = rtrequest(RTM_ADD,
+ /* XXX MRT just table 0 */
+ error = rtrequest_fib(RTM_ADD,
(struct sockaddr *) &defdst,
(struct sockaddr *) gw,
(struct sockaddr *) &defmask,
- (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL);
+ (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL, 0);
if (error != 0) {
printf("bootpc_adjust_interface: "
"add net route, error=%d\n", error);
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
index aac2e00..f342211 100644
--- a/sys/nfsclient/nfs_vfsops.c
+++ b/sys/nfsclient/nfs_vfsops.c
@@ -476,6 +476,7 @@ nfs_mountroot(struct mount *mp, struct thread *td)
sin = mask;
sin.sin_family = AF_INET;
sin.sin_len = sizeof(sin);
+ /* XXX MRT use table 0 for this sort of thing */
error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
(struct sockaddr *)&nd->mygateway,
(struct sockaddr *)&mask,
diff --git a/sys/sys/domain.h b/sys/sys/domain.h
index cf2e92c..c78e50b 100644
--- a/sys/sys/domain.h
+++ b/sys/sys/domain.h
@@ -57,6 +57,12 @@ struct domain {
int (*dom_rtattach) /* initialize routing table */
(void **, int);
int dom_rtoffset; /* an arg to rtattach, in bits */
+ /* XXX MRT.
+ * rtoffset May be 0 if the domain supplies its own rtattach(),
+ * in which case, a 0 indicates it's being called from
+ * vfs_export.c (HACK) Only for AF_INET{,6} at this time.
+ * Temporary ABI compat hack.. fix post RELENG_7
+ */
int dom_maxrtkey; /* for routing layer */
void *(*dom_ifattach)(struct ifnet *);
void (*dom_ifdetach)(struct ifnet *, void *);
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index e0e685e..9229e29 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -192,6 +192,11 @@ struct mbuf {
#define M_PROTO6 0x00080000 /* protocol-specific */
#define M_PROTO7 0x00100000 /* protocol-specific */
#define M_PROTO8 0x00200000 /* protocol-specific */
+/*
+ * For RELENG_{6,7} steal these flags for limited multiple routing table
+ * support. In RELENG_8 and beyond, use just one flag and a tag.
+ */
+#define M_FIB 0xF0000000 /* steal some bits to store fib number. */
#define M_NOTIFICATION M_PROTO5 /* SCTP notification */
@@ -206,7 +211,7 @@ struct mbuf {
*/
#define M_COPYFLAGS \
(M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\
- M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC)
+ M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB)
/*
* External buffer types: identify ext_buf type.
@@ -277,7 +282,7 @@ struct mbstat {
u_long m_mlen; /* length of data in an mbuf */
u_long m_mhlen; /* length of data in a header mbuf */
- /* Number of mbtypes (gives # elems in mbtypes[] array: */
+ /* Number of mbtypes (gives # elems in mbtypes[] array) */
short m_numtypes;
/* XXX: Sendfile stats should eventually move to their own struct */
@@ -957,6 +962,19 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start)
m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
}
+/* XXX temporary FIB methods probably eventually use tags.*/
+#define M_FIBSHIFT 28
+#define M_FIBMASK 0x0F
+
+/* get the fib from an mbuf and if it is not set, return the default */
+#define M_GETFIB(_m) \
+ ((((_m)->m_flags & M_FIB) >> M_FIBSHIFT) & M_FIBMASK)
+
+#define M_SETFIB(_m, _fib) do { \
+ _m->m_flags &= ~M_FIB; \
+ _m->m_flags |= (((_fib) << M_FIBSHIFT) & M_FIB); \
+} while (0)
+
#endif /* _KERNEL */
#ifdef MBUF_PROFILING
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index e320354..503f921 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -509,6 +509,7 @@ struct proc {
struct pargs *p_args; /* (c) Process arguments. */
rlim_t p_cpulimit; /* (c) Current CPU limit in seconds. */
signed char p_nice; /* (c) Process "nice" value. */
+ int p_fibnum; /* in this routing domain XXX MRT */
/* End area that is copied on creation. */
#define p_endcopy p_xstat
diff --git a/sys/sys/socket.h b/sys/sys/socket.h
index 713dd38..7ed9c70 100644
--- a/sys/sys/socket.h
+++ b/sys/sys/socket.h
@@ -138,6 +138,7 @@ typedef __uid_t uid_t;
#define SO_LISTENQLIMIT 0x1011 /* socket's backlog limit */
#define SO_LISTENQLEN 0x1012 /* socket's complete queue length */
#define SO_LISTENINCQLEN 0x1013 /* socket's incomplete queue length */
+#define SO_SETFIB 0x1014 /* use this FIB to route */
#endif
/*
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 40f3dd9..10338d8 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -146,6 +146,7 @@ struct socket {
void *so_accept_filter_arg; /* saved filter args */
char *so_accept_filter_str; /* saved user args */
} *so_accf;
+ int so_fibnum; /* routing domain for this socket */
};
#define SB_EMPTY_FIXUP(sb) do { \
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
index 45c65ce..3b1b2c1 100644
--- a/sys/sys/syscall.h
+++ b/sys/sys/syscall.h
@@ -171,6 +171,7 @@
#define SYS_shmsys 171
#define SYS_freebsd6_pread 173
#define SYS_freebsd6_pwrite 174
+#define SYS_setfib 175
#define SYS_ntp_adjtime 176
#define SYS_setgid 181
#define SYS_setegid 182
diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk
index c227b63..b0172f4 100644
--- a/sys/sys/syscall.mk
+++ b/sys/sys/syscall.mk
@@ -123,6 +123,7 @@ MIASM = \
shmsys.o \
freebsd6_pread.o \
freebsd6_pwrite.o \
+ setfib.o \
ntp_adjtime.o \
setgid.o \
setegid.o \
diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h
index 1a0a10f..feb582a 100644
--- a/sys/sys/sysproto.h
+++ b/sys/sys/sysproto.h
@@ -555,6 +555,9 @@ struct freebsd6_pwrite_args {
char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
};
+struct setfib_args {
+ char fibnum_l_[PADL_(int)]; int fibnum; char fibnum_r_[PADR_(int)];
+};
struct ntp_adjtime_args {
char tp_l_[PADL_(struct timex *)]; struct timex * tp; char tp_r_[PADR_(struct timex *)];
};
@@ -1744,6 +1747,7 @@ int msgsys(struct thread *, struct msgsys_args *);
int shmsys(struct thread *, struct shmsys_args *);
int freebsd6_pread(struct thread *, struct freebsd6_pread_args *);
int freebsd6_pwrite(struct thread *, struct freebsd6_pwrite_args *);
+int setfib(struct thread *, struct setfib_args *);
int ntp_adjtime(struct thread *, struct ntp_adjtime_args *);
int setgid(struct thread *, struct setgid_args *);
int setegid(struct thread *, struct setegid_args *);
@@ -2325,6 +2329,7 @@ int freebsd4_sigreturn(struct thread *, struct freebsd4_sigreturn_args *);
#define SYS_AUE_shmsys AUE_SHMSYS
#define SYS_AUE_freebsd6_pread AUE_PREAD
#define SYS_AUE_freebsd6_pwrite AUE_PWRITE
+#define SYS_AUE_setfib AUE_NULL
#define SYS_AUE_ntp_adjtime AUE_NTP_ADJTIME
#define SYS_AUE_setgid AUE_SETGID
#define SYS_AUE_setegid AUE_SETEGID
OpenPOWER on IntegriCloud