diff options
-rw-r--r-- | sys/conf/files | 1 | ||||
-rw-r--r-- | sys/conf/options | 1 | ||||
-rw-r--r-- | sys/net/radix.c | 22 | ||||
-rw-r--r-- | sys/net/radix.h | 1 | ||||
-rw-r--r-- | sys/net/route.c | 122 | ||||
-rw-r--r-- | sys/net/route.h | 3 | ||||
-rw-r--r-- | sys/net/rtsock.c | 20 | ||||
-rw-r--r-- | sys/netinet/in_proto.c | 8 | ||||
-rw-r--r-- | sys/netinet/ip_output.c | 9 | ||||
-rw-r--r-- | sys/netinet6/in6_proto.c | 8 | ||||
-rw-r--r-- | sys/netinet6/in6_src.c | 9 | ||||
-rw-r--r-- | sys/netinet6/nd6_nbr.c | 14 |
12 files changed, 217 insertions, 1 deletions
diff --git a/sys/conf/files b/sys/conf/files index 25232bf..e386327 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1670,6 +1670,7 @@ net/ppp_deflate.c optional ppp_deflate net/ppp_tty.c optional ppp net/pfil.c optional ether | inet net/radix.c standard +net/radix_mpath.c standard net/raw_cb.c standard net/raw_usrreq.c standard net/route.c standard diff --git a/sys/conf/options b/sys/conf/options index 8d2a521..09acf41 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -393,6 +393,7 @@ NETATALK opt_atalk.h PPP_BSDCOMP opt_ppp.h PPP_DEFLATE opt_ppp.h PPP_FILTER opt_ppp.h +RADIX_MPATH opt_mpath.h SLIP_IFF_OPTS opt_slip.h TCPDEBUG TCP_SIGNATURE opt_inet.h diff --git a/sys/net/radix.c b/sys/net/radix.c index 0f718b7..030aa58 100644 --- a/sys/net/radix.c +++ b/sys/net/radix.c @@ -48,6 +48,13 @@ #include <net/radix.h> #endif +#include "opt_mpath.h" + +#ifdef RADIX_MPATH +#include <net/radix_mpath.h> +#endif + + static int rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f, void *w); static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); @@ -630,6 +637,21 @@ rn_addroute(v_arg, n_arg, head, treenodes) saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); if (keyduplicated) { for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { +#ifdef RADIX_MPATH + /* permit multipath, if enabled for the family */ + if (rn_mpath_capable(head) && netmask == tt->rn_mask) { + /* + * go down to the end of multipaths, so that + * new entry goes into the end of rn_dupedkey + * chain. + */ + do { + t = tt; + tt = tt->rn_dupedkey; + } while (tt && t->rn_mask == tt->rn_mask); + break; + } +#endif if (tt->rn_mask == netmask) return (0); if (netmask == 0 || diff --git a/sys/net/radix.h b/sys/net/radix.h index ca53095..376fdda 100644 --- a/sys/net/radix.h +++ b/sys/net/radix.h @@ -130,6 +130,7 @@ struct radix_node_head { void (*rnh_close) /* do something when the last ref drops */ (struct radix_node *rn, struct radix_node_head *head); struct radix_node rnh_nodes[3]; /* empty tree for common case */ + int rnh_multipath; /* multipath capable ? */ #ifdef _KERNEL struct mtx rnh_mtx; /* locks entire radix tree */ #endif diff --git a/sys/net/route.c b/sys/net/route.c index 757ed6d..c41af97 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -32,6 +32,7 @@ #include "opt_inet.h" #include "opt_mrouting.h" +#include "opt_mpath.h" #include <sys/param.h> #include <sys/systm.h> @@ -44,6 +45,10 @@ #include <net/if.h> #include <net/route.h> +#ifdef RADIX_MPATH +#include <net/radix_mpath.h> +#endif + #include <netinet/in.h> #include <netinet/ip_mroute.h> @@ -700,6 +705,67 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) } switch (req) { case RTM_DELETE: +#ifdef RADIX_MPATH + /* + * if we got multipath routes, we require users to specify + * a matching RTAX_GATEWAY. + */ + if (rn_mpath_capable(rnh)) { + struct rtentry *rto = NULL; + + rn = rnh->rnh_matchaddr(dst, rnh); + if (rn == NULL) + senderr(ESRCH); + rto = rt = RNTORT(rn); + rt = rt_mpath_matchgate(rt, gateway); + if (!rt) + senderr(ESRCH); + /* + * this is the first entry in the chain + */ + if (rto == rt) { + rn = rn_mpath_next((struct radix_node *)rt); + /* + * there is another entry, now it's active + */ + if (rn) { + rto = RNTORT(rn); + RT_LOCK(rto); + rto->rt_flags |= RTF_UP; + RT_UNLOCK(rto); + } else if (rt->rt_flags & RTF_GATEWAY) { + /* + * For gateway routes, we need to + * make sure that we we are deleting + * the correct gateway. + * rt_mpath_matchgate() does not + * check the case when there is only + * one route in the chain. + */ + if (gateway && + (rt->rt_gateway->sa_len != gateway->sa_len || + memcmp(rt->rt_gateway, gateway, gateway->sa_len))) + senderr(ESRCH); + } + /* + * use the normal delete code to remove + * the first entry + */ + goto normal_rtdel; + } + /* + * if the entry is 2nd and on up + */ + if (!rt_mpath_deldup(rto, rt)) + panic ("rtrequest1: rt_mpath_deldup"); + RT_LOCK(rt); + RT_ADDREF(rt); + rt->rt_flags &= ~RTF_UP; + goto deldone; /* done with the RTM_DELETE command */ + } +#endif + +normal_rtdel: /* * Remove the item from the tree and return it. * Complain if it is not there and do no more processing. @@ -740,6 +806,7 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) ifa->ifa_rtrequest(RTM_DELETE, rt, info); +deldone: /* * One more rtentry floating around that is not * linked to the routing table. rttrash will be decremented @@ -822,6 +889,22 @@ rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; +#ifdef RADIX_MPATH + /* do not permit exactly the same dst/mask/gw pair */ + if (rn_mpath_capable(rnh) && + rt_mpath_conflict(rnh, rt, netmask)) { + if (rt->rt_gwroute) + RTFREE(rt->rt_gwroute); + if (rt->rt_ifa) { + IFAFREE(rt->rt_ifa); + } + Free(rt_key(rt)); + RT_LOCK_DESTROY(rt); + uma_zfree(rtzone, rt); + senderr(EEXIST); + } +#endif + /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); if (rn == NULL) { @@ -1166,7 +1249,7 @@ rtinit(struct ifaddr *ifa, int cmd, int flags) struct mbuf *m = NULL; struct rtentry *rt = NULL; struct rt_addrinfo info; - int error; + int error=0; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; @@ -1208,10 +1291,32 @@ rtinit(struct ifaddr *ifa, int cmd, int flags) if ((rnh = rt_tables[dst->sa_family]) == NULL) goto bad; RADIX_NODE_HEAD_LOCK(rnh); +#ifdef RADIX_MPATH + if (rn_mpath_capable(rnh)) { + + rn = rnh->rnh_matchaddr(dst, rnh); + if (rn == NULL) + error = ESRCH; + else { + rt = RNTORT(rn); + /* + * for interface route the rt->rt_gateway is + * sockaddr_intf for cloning ARP entries, so + * rt_mpath_matchgate must use the interface + * address + */ + rt = rt_mpath_matchgate(rt, ifa->ifa_addr); + if (!rt) + error = ESRCH; + } + } + else +#endif error = ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL || (rn->rn_flags & RNF_ROOT) || RNTORT(rn)->rt_ifa != ifa || !sa_equal((struct sockaddr *)rn->rn_key, dst)); + RADIX_NODE_HEAD_UNLOCK(rnh); if (error) { bad: @@ -1235,6 +1340,21 @@ bad: * notify any listening routing agents of the change */ RT_LOCK(rt); +#ifdef RADIX_MPATH + /* + * in case address alias finds the first address + * e.g. ifconfig bge0 192.103.54.246/24 + * e.g. ifconfig bge0 192.103.54.247/24 + * the address set in the route is 192.103.54.246 + * so we need to replace it with 192.103.54.247 + */ + if (memcmp(rt->rt_ifa->ifa_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len)) { + IFAFREE(rt->rt_ifa); + IFAREF(ifa); + rt->rt_ifp = ifa->ifa_ifp; + rt->rt_ifa = ifa; + } +#endif rt_newaddrmsg(cmd, ifa, error, rt); if (cmd == RTM_DELETE) { /* diff --git a/sys/net/route.h b/sys/net/route.h index 01b2957..e9f4980 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -97,6 +97,9 @@ struct mbuf; */ #ifndef RNF_NORMAL #include <net/radix.h> +#ifdef RADIX_MPATH +#include <net/radix_mpath.h> +#endif #endif struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 2893f4b..5ea93d3 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -30,6 +30,8 @@ * $FreeBSD$ */ #include "opt_sctp.h" +#include "opt_mpath.h" + #include <sys/param.h> #include <sys/domain.h> #include <sys/kernel.h> @@ -420,6 +422,24 @@ route_output(struct mbuf *m, struct socket *so) RADIX_NODE_HEAD_UNLOCK(rnh); senderr(ESRCH); } +#ifdef RADIX_MPATH + /* + * for RTM_CHANGE/LOCK, if we got multipath routes, + * we require users to specify a matching RTAX_GATEWAY. + * + * for RTM_GET, gate is optional even with multipath. + * if gate == NULL the first match is returned. + * (no need to call rt_mpath_matchgate if gate == NULL) + */ + if (rn_mpath_capable(rnh) && + (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) { + rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]); + if (!rt) { + RADIX_NODE_HEAD_UNLOCK(rnh); + senderr(ESRCH); + } + } +#endif RT_LOCK(rt); RT_ADDREF(rt); RADIX_NODE_HEAD_UNLOCK(rnh); diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 480da0e..aac2104 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include "opt_pf.h" #include "opt_carp.h" #include "opt_sctp.h" +#include "opt_mpath.h" #include <sys/param.h> #include <sys/systm.h> @@ -51,6 +52,9 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/route.h> +#ifdef RADIX_MPATH +#include <net/radix_mpath.h> +#endif #include <netinet/in.h> #include <netinet/in_systm.h> @@ -352,7 +356,11 @@ struct domain inetdomain = { .dom_name = "internet", .dom_protosw = inetsw, .dom_protoswNPROTOSW = &inetsw[sizeof(inetsw)/sizeof(inetsw[0])], +#ifdef RADIX_MPATH + .dom_rtattach = rn4_mpath_inithead, +#else .dom_rtattach = in_inithead, +#endif .dom_rtoffset = 32, .dom_maxrtkey = sizeof(struct sockaddr_in) }; diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 78aff7f..7ce2104 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include "opt_ipsec.h" #include "opt_mac.h" #include "opt_mbuf_stress_test.h" +#include "opt_mpath.h" #include <sys/param.h> #include <sys/systm.h> @@ -54,6 +55,9 @@ __FBSDID("$FreeBSD$"); #include <net/netisr.h> #include <net/pfil.h> #include <net/route.h> +#ifdef RADIX_MPATH +#include <net/radix_mpath.h> +#endif #include <netinet/in.h> #include <netinet/in_systm.h> @@ -225,7 +229,12 @@ again: * operation (as it is for ARP). */ if (ro->ro_rt == NULL) +#ifdef RADIX_MPATH + rtalloc_mpath(ro, + ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr)); +#else rtalloc_ign(ro, 0); +#endif if (ro->ro_rt == NULL) { ipstat.ips_noroute++; error = EHOSTUNREACH; diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index 7e6a75e..9cf8091 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include "opt_ipstealth.h" #include "opt_carp.h" #include "opt_sctp.h" +#include "opt_mpath.h" #include <sys/param.h> #include <sys/socket.h> @@ -83,6 +84,9 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/radix.h> #include <net/route.h> +#ifdef RADIX_MPATH +#include <net/radix_mpath.h> +#endif #include <netinet/in.h> #include <netinet/in_systm.h> @@ -347,7 +351,11 @@ struct domain inet6domain = { .dom_protosw = (struct protosw *)inet6sw, .dom_protoswNPROTOSW = (struct protosw *) &inet6sw[sizeof(inet6sw)/sizeof(inet6sw[0])], +#ifdef RADIX_MPATH + .dom_rtattach = rn6_mpath_inithead, +#else .dom_rtattach = in6_inithead, +#endif .dom_rtoffset = offsetof(struct sockaddr_in6, sin6_addr) << 3, .dom_maxrtkey = sizeof(struct sockaddr_in6), .dom_ifattach = in6_domifattach, diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index 0508db2..eeba82b 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_mpath.h" #include <sys/param.h> #include <sys/systm.h> @@ -84,6 +85,9 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/route.h> +#ifdef RADIX_MPATH +#include <net/radix_mpath.h> +#endif #include <netinet/in.h> #include <netinet/in_var.h> @@ -568,7 +572,12 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, sa6->sin6_scope_id = 0; if (clone) { +#ifdef RADIX_MPATH + rtalloc_mpath((struct route *)ro, + ntohl(sa6->sin6_addr.s6_addr32[3])); +#else rtalloc((struct route *)ro); +#endif } else { ro->ro_rt = rtalloc1(&((struct route *)ro) ->ro_dst, 0, 0UL); diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index fce616b..86f6ff8 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_carp.h" +#include "opt_mpath.h" #include <sys/param.h> #include <sys/systm.h> @@ -55,6 +56,9 @@ __FBSDID("$FreeBSD$"); #include <net/if_dl.h> #include <net/if_var.h> #include <net/route.h> +#ifdef RADIX_MPATH +#include <net/radix_mpath.h> +#endif #include <netinet/in.h> #include <netinet/in_var.h> @@ -208,13 +212,23 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) struct rtentry *rt; struct sockaddr_in6 tsin6; int need_proxy; +#ifdef RADIX_MPATH + struct route_in6 ro; +#endif bzero(&tsin6, sizeof tsin6); tsin6.sin6_len = sizeof(struct sockaddr_in6); tsin6.sin6_family = AF_INET6; tsin6.sin6_addr = taddr6; +#ifdef RADIX_MPATH + bzero(&ro, sizeof(ro)); + ro.ro_dst = tsin6; + rtalloc_mpath((struct route *)&ro, RTF_ANNOUNCE); + rt = ro.ro_rt; +#else rt = rtalloc1((struct sockaddr *)&tsin6, 0, 0); +#endif need_proxy = (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 && rt->rt_gateway->sa_family == AF_LINK); if (rt) |