summaryrefslogtreecommitdiffstats
path: root/sys/netinet6/in6.c
diff options
context:
space:
mode:
authorqingli <qingli@FreeBSD.org>2008-12-15 06:10:57 +0000
committerqingli <qingli@FreeBSD.org>2008-12-15 06:10:57 +0000
commitec826ad5c7f97de814529d3b3bae7950f91d9a5d (patch)
tree281ff6a89cacadf7e72f506b037ca41229a23bf6 /sys/netinet6/in6.c
parent664c3aeb0118ccccb068f485e30353a47923b4d0 (diff)
downloadFreeBSD-src-ec826ad5c7f97de814529d3b3bae7950f91d9a5d.zip
FreeBSD-src-ec826ad5c7f97de814529d3b3bae7950f91d9a5d.tar.gz
This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables 2. removing as much locking dependencies among these layers as possible to allow for some parallelism in the search operations 3. simplify the logic in the routing code, The most notable end result is the obsolescent of the route cloning (RTF_CLONING) concept, which translated into code reduction in both IPv4 ARP and IPv6 NDP related modules, and size reduction in struct rtentry{}. The change in design obsoletes the semantics of RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland applications such as "arp" and "ndp" have been modified to reflect those changes. The output from "netstat -r" shows only the routing entries. Quite a few developers have contributed to this project in the past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and Andre Oppermann. And most recently: - Kip Macy revised the locking code completely, thus completing the last piece of the puzzle, Kip has also been conducting active functional testing - Sam Leffler has helped me improving/refactoring the code, and provided valuable reviews - Julian Elischer setup the perforce tree for me and has helped me maintaining that branch before the svn conversion
Diffstat (limited to 'sys/netinet6/in6.c')
-rw-r--r--sys/netinet6/in6.c446
1 files changed, 254 insertions, 192 deletions
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index c784845..8127281 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -88,6 +88,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/in_var.h>
+#include <net/if_llatbl.h>
#include <netinet/if_ether.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
@@ -135,152 +136,7 @@ static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
struct in6_multihead in6_multihead; /* XXX BSS initialization */
int (*faithprefix_p)(struct in6_addr *);
-/*
- * Subroutine for in6_ifaddloop() and in6_ifremloop().
- * This routine does actual work.
- */
-static void
-in6_ifloop_request(int cmd, struct ifaddr *ifa)
-{
- struct sockaddr_in6 all1_sa;
- struct rtentry *nrt = NULL;
- int e;
- char ip6buf[INET6_ADDRSTRLEN];
-
- bzero(&all1_sa, sizeof(all1_sa));
- all1_sa.sin6_family = AF_INET6;
- all1_sa.sin6_len = sizeof(struct sockaddr_in6);
- all1_sa.sin6_addr = in6mask128;
-
- /*
- * We specify the address itself as the gateway, and set the
- * RTF_LLINFO flag, so that the corresponding host route would have
- * the flag, and thus applications that assume traditional behavior
- * would be happy. Note that we assume the caller of the function
- * (probably implicitly) set nd6_rtrequest() to ifa->ifa_rtrequest,
- * which changes the outgoing interface to the loopback interface.
- */
- e = rtrequest(cmd, ifa->ifa_addr, ifa->ifa_addr,
- (struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt);
- if (e != 0) {
- /* XXX need more descriptive message */
-
- log(LOG_ERR, "in6_ifloop_request: "
- "%s operation failed for %s (errno=%d)\n",
- cmd == RTM_ADD ? "ADD" : "DELETE",
- ip6_sprintf(ip6buf,
- &((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr), e);
- }
-
- /*
- * Report the addition/removal of the address to the routing socket.
- * XXX: since we called rtinit for a p2p interface with a destination,
- * we end up reporting twice in such a case. Should we rather
- * omit the second report?
- */
- if (nrt) {
- RT_LOCK(nrt);
- /*
- * Make sure rt_ifa be equal to IFA, the second argument of
- * the function. We need this because when we refer to
- * rt_ifa->ia6_flags in ip6_input, we assume that the rt_ifa
- * points to the address instead of the loopback address.
- */
- if (cmd == RTM_ADD && ifa != nrt->rt_ifa) {
- IFAFREE(nrt->rt_ifa);
- IFAREF(ifa);
- nrt->rt_ifa = ifa;
- }
-
- rt_newaddrmsg(cmd, ifa, e, nrt);
- if (cmd == RTM_DELETE)
- RTFREE_LOCKED(nrt);
- else {
- /* the cmd must be RTM_ADD here */
- RT_REMREF(nrt);
- RT_UNLOCK(nrt);
- }
- }
-}
-
-/*
- * Add ownaddr as loopback rtentry. We previously add the route only if
- * necessary (ex. on a p2p link). However, since we now manage addresses
- * separately from prefixes, we should always add the route. We can't
- * rely on the cloning mechanism from the corresponding interface route
- * any more.
- */
-void
-in6_ifaddloop(struct ifaddr *ifa)
-{
- struct rtentry *rt;
- int need_loop;
-
- /* If there is no loopback entry, allocate one. */
- rt = rtalloc1(ifa->ifa_addr, 0, 0);
- need_loop = (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
- (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0);
- if (rt)
- RTFREE_LOCKED(rt);
- if (need_loop)
- in6_ifloop_request(RTM_ADD, ifa);
-}
-
-/*
- * Remove loopback rtentry of ownaddr generated by in6_ifaddloop(),
- * if it exists.
- */
-void
-in6_ifremloop(struct ifaddr *ifa)
-{
- INIT_VNET_INET6(curvnet);
- struct in6_ifaddr *ia;
- struct rtentry *rt;
- int ia_count = 0;
-
- /*
- * Some of BSD variants do not remove cloned routes
- * from an interface direct route, when removing the direct route
- * (see comments in net/net_osdep.h). Even for variants that do remove
- * cloned routes, they could fail to remove the cloned routes when
- * we handle multple addresses that share a common prefix.
- * So, we should remove the route corresponding to the deleted address.
- */
- /*
- * Delete the entry only if exact one ifa exists. More than one ifa
- * can exist if we assign a same single address to multiple
- * (probably p2p) interfaces.
- * XXX: we should avoid such a configuration in IPv6...
- */
- for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
- if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &ia->ia_addr.sin6_addr)) {
- ia_count++;
- if (ia_count > 1)
- break;
- }
- }
-
- if (ia_count == 1) {
- /*
- * Before deleting, check if a corresponding loopbacked host
- * route surely exists. With this check, we can avoid to
- * delete an interface direct route whose destination is same
- * as the address being removed. This can happen when removing
- * a subnet-router anycast address on an interface attahced
- * to a shared medium.
- */
- rt = rtalloc1(ifa->ifa_addr, 0, 0);
- if (rt != NULL) {
- if ((rt->rt_flags & RTF_HOST) != 0 &&
- (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
- RTFREE_LOCKED(rt);
- in6_ifloop_request(RTM_DELETE, ifa);
- } else
- RT_UNLOCK(rt);
- }
- }
-}
int
in6_mask2len(struct in6_addr *mask, u_char *lim0)
@@ -1131,10 +987,9 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
}
}
if (!rt) {
- /* XXX: we need RTF_CLONING to fake nd6_rtrequest */
error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
(struct sockaddr *)&ia->ia_addr,
- (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING,
+ (struct sockaddr *)&mltmask, RTF_UP,
(struct rtentry **)0);
if (error)
goto cleanup;
@@ -1208,7 +1063,7 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
if (!rt) {
error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
(struct sockaddr *)&ia->ia_addr,
- (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING,
+ (struct sockaddr *)&mltmask, RTF_UP,
(struct rtentry **)0);
if (error)
goto cleanup;
@@ -1287,34 +1142,16 @@ in6_purgeaddr(struct ifaddr *ifa)
{
struct ifnet *ifp = ifa->ifa_ifp;
struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
- char ip6buf[INET6_ADDRSTRLEN];
struct in6_multi_mship *imm;
/* stop DAD processing */
nd6_dad_stop(ifa);
- /*
- * delete route to the destination of the address being purged.
- * The interface must be p2p or loopback in this case.
- */
- if ((ia->ia_flags & IFA_ROUTE) != 0 && ia->ia_dstaddr.sin6_len != 0) {
- int e;
-
- if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST))
- != 0) {
- log(LOG_ERR, "in6_purgeaddr: failed to remove "
- "a route to the p2p destination: %s on %s, "
- "errno=%d\n",
- ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
- if_name(ifp), e);
- /* proceed anyway... */
- } else
- ia->ia_flags &= ~IFA_ROUTE;
- }
-
- /* Remove ownaddr's loopback rtentry, if it exists. */
- in6_ifremloop(&(ia->ia_ifa));
-
+ IF_AFDATA_LOCK(ifp);
+ lla_lookup(LLTABLE6(ifp), (LLE_DELETE | LLE_IFADDR),
+ (struct sockaddr *)&ia->ia_addr);
+ IF_AFDATA_UNLOCK(ifp);
+
/*
* leave from multicast groups we have joined for the interface
*/
@@ -1688,26 +1525,15 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
/* we could do in(6)_socktrim here, but just omit it at this moment. */
- if (newhost) {
- /*
- * set the rtrequest function to create llinfo. It also
- * adjust outgoing interface of the route for the local
- * address when called via in6_ifaddloop() below.
- */
- ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
- }
-
/*
* Special case:
* If a new destination address is specified for a point-to-point
* interface, install a route to the destination as an interface
- * direct route. In addition, if the link is expected to have neighbor
- * cache entries, specify RTF_LLINFO so that a cache entry for the
- * destination address will be created.
- * created
+ * direct route.
* XXX: the logic below rejects assigning multiple addresses on a p2p
* interface that share the same destination.
*/
+#if 0 /* QL - verify */
plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
ia->ia_dstaddr.sin6_family == AF_INET6) {
@@ -1715,7 +1541,6 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
struct rtentry *rt = NULL, **rtp = NULL;
if (nd6_need_cache(ifp) != 0) {
- rtflags |= RTF_LLINFO;
rtp = &rt;
}
@@ -1744,16 +1569,36 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
}
ia->ia_flags |= IFA_ROUTE;
}
- if (plen < 128) {
- /*
- * The RTF_CLONING flag is necessary for in6_is_ifloop_auto().
- */
- ia->ia_ifa.ifa_flags |= RTF_CLONING;
+#else
+ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
+ if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
+ ia->ia_dstaddr.sin6_family == AF_INET6) {
+ if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD,
+ RTF_UP | RTF_HOST)) != 0)
+ return (error);
+ ia->ia_flags |= IFA_ROUTE;
}
+#endif
/* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */
- if (newhost)
- in6_ifaddloop(&(ia->ia_ifa));
+ if (newhost) {
+ struct llentry *ln;
+
+ IF_AFDATA_LOCK(ifp);
+ ia->ia_ifa.ifa_rtrequest = NULL;
+
+ /* XXX QL
+ * we need to report rt_newaddrmsg
+ */
+ ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR | LLE_EXCLUSIVE),
+ (struct sockaddr *)&ia->ia_addr);
+ IF_AFDATA_UNLOCK(ifp);
+ if (ln) {
+ ln->la_expire = 0; /* for IPv6 this means permanent */
+ ln->ln_state = ND6_LLINFO_REACHABLE;
+ LLE_WUNLOCK(ln);
+ }
+ }
return (error);
}
@@ -2237,6 +2082,214 @@ in6_if2idlen(struct ifnet *ifp)
}
}
+#include <sys/sysctl.h>
+
+struct in6_llentry {
+ struct llentry base;
+ struct sockaddr_in6 l3_addr6;
+};
+
+static struct llentry *
+in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
+{
+ struct in6_llentry *lle;
+
+ lle = malloc(sizeof(struct in6_llentry), M_LLTABLE,
+ M_DONTWAIT | M_ZERO);
+ if (lle == NULL) /* NB: caller generates msg */
+ return NULL;
+
+ callout_init(&lle->base.ln_timer_ch, CALLOUT_MPSAFE);
+ lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr;
+ lle->base.lle_refcnt = 1;
+ LLE_LOCK_INIT(&lle->base);
+ return &lle->base;
+}
+
+/*
+ * Deletes an address from the address table.
+ * This function is called by the timer functions
+ * such as arptimer() and nd6_llinfo_timer(), and
+ * the caller does the locking.
+ */
+static void
+in6_lltable_free(struct lltable *llt, struct llentry *lle)
+{
+ free(lle, M_LLTABLE);
+}
+
+static int
+in6_lltable_rtcheck(struct ifnet *ifp, const struct sockaddr *l3addr)
+{
+ struct rtentry *rt;
+ char ip6buf[INET6_ADDRSTRLEN];
+
+ KASSERT(l3addr->sa_family == AF_INET6,
+ ("sin_family %d", l3addr->sa_family));
+
+ /* XXX rtalloc1 should take a const param */
+ rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
+ if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) {
+ struct ifaddr *ifa;
+ /*
+ * Create an ND6 cache for an IPv6 neighbor
+ * that is not covered by our own prefix.
+ */
+ /* XXX ifaof_ifpforaddr should take a const param */
+ ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp);
+ if (ifa != NULL) {
+ if (rt != NULL)
+ rtfree(rt);
+ return 0;
+ }
+ log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n",
+ ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr));
+ if (rt != NULL)
+ rtfree(rt);
+ return EINVAL;
+ }
+ rtfree(rt);
+ return 0;
+}
+
+static struct llentry *
+in6_lltable_lookup(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr)
+{
+ const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
+ struct ifnet *ifp = llt->llt_ifp;
+ struct llentry *lle;
+ struct llentries *lleh;
+ u_int hashkey;
+
+ IF_AFDATA_LOCK_ASSERT(ifp);
+ KASSERT(l3addr->sa_family == AF_INET6,
+ ("sin_family %d", l3addr->sa_family));
+
+ hashkey = sin6->sin6_addr.s6_addr32[3];
+ lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
+ LIST_FOREACH(lle, lleh, lle_next) {
+ if (lle->la_flags & LLE_DELETED)
+ continue;
+ if (bcmp(L3_ADDR(lle), l3addr, l3addr->sa_len) == 0)
+ break;
+ }
+
+ if (lle == NULL) {
+ if (!(flags & LLE_CREATE))
+ return (NULL);
+ /*
+ * A route that covers the given address must have
+ * been installed 1st because we are doing a resolution,
+ * verify this.
+ */
+ if (!(flags & LLE_IFADDR) &&
+ in6_lltable_rtcheck(ifp, l3addr) != 0)
+ return NULL;
+
+ lle = in6_lltable_new(l3addr, flags);
+ if (lle == NULL) {
+ log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
+ return NULL;
+ }
+ lle->la_flags = flags & ~LLE_CREATE;
+ if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
+ bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
+ lle->la_flags |= (LLE_VALID | LLE_STATIC);
+ }
+
+ lle->lle_tbl = llt;
+ lle->lle_head = lleh;
+ LIST_INSERT_HEAD(lleh, lle, lle_next);
+ } else if (flags & LLE_DELETE) {
+ LLE_WLOCK(lle);
+ lle->la_flags = LLE_DELETED;
+ LLE_WUNLOCK(lle);
+#ifdef DIAGNOSTICS
+ log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
+#endif
+ lle = (void *)-1;
+ }
+ if (LLE_IS_VALID(lle)) {
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WLOCK(lle);
+ else
+ LLE_RLOCK(lle);
+ }
+ return (lle);
+}
+
+static int
+in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
+{
+ struct ifnet *ifp = llt->llt_ifp;
+ struct llentry *lle;
+ /* XXX stack use */
+ struct {
+ struct rt_msghdr rtm;
+ struct sockaddr_in6 sin6;
+ /*
+ * ndp.c assumes that sdl is word aligned
+ */
+#ifdef __LP64__
+ uint32_t pad;
+#endif
+ struct sockaddr_dl sdl;
+ } ndpc;
+ int i, error;
+
+ /* XXXXX
+ * current IFNET_RLOCK() is mapped to IFNET_WLOCK()
+ * so it is okay to use this ASSERT, change it when
+ * IFNET lock is finalized
+ */
+ IFNET_WLOCK_ASSERT();
+
+ error = 0;
+ for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
+ LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
+ struct sockaddr_dl *sdl;
+
+ /* skip deleted or invalid entries */
+ if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID)
+ continue;
+ /*
+ * produce a msg made of:
+ * struct rt_msghdr;
+ * struct sockaddr_in6 (IPv6)
+ * struct sockaddr_dl;
+ */
+ bzero(&ndpc, sizeof(ndpc));
+ ndpc.rtm.rtm_msglen = sizeof(ndpc);
+ ndpc.sin6.sin6_family = AF_INET6;
+ ndpc.sin6.sin6_len = sizeof(ndpc.sin6);
+ bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle));
+
+ /* publish */
+ if (lle->la_flags & LLE_PUB)
+ ndpc.rtm.rtm_flags |= RTF_ANNOUNCE;
+
+ sdl = &ndpc.sdl;
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_len = sizeof(*sdl);
+ sdl->sdl_alen = ifp->if_addrlen;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = ifp->if_type;
+ bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
+ ndpc.rtm.rtm_rmx.rmx_expire =
+ lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
+ ndpc.rtm.rtm_flags |= RTF_HOST;
+ if (lle->la_flags & LLE_STATIC)
+ ndpc.rtm.rtm_flags |= RTF_STATIC;
+ ndpc.rtm.rtm_index = ifp->if_index;
+ error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc));
+ if (error)
+ break;
+ }
+ }
+ return error;
+}
+
void *
in6_domifattach(struct ifnet *ifp)
{
@@ -2256,6 +2309,14 @@ in6_domifattach(struct ifnet *ifp)
ext->nd_ifinfo = nd6_ifattach(ifp);
ext->scope6_id = scope6_ifattach(ifp);
+ ext->lltable = lltable_init(ifp, AF_INET6);
+ if (ext->lltable != NULL) {
+ ext->lltable->llt_new = in6_lltable_new;
+ ext->lltable->llt_free = in6_lltable_free;
+ ext->lltable->llt_rtcheck = in6_lltable_rtcheck;
+ ext->lltable->llt_lookup = in6_lltable_lookup;
+ ext->lltable->llt_dump = in6_lltable_dump;
+ }
return ext;
}
@@ -2266,6 +2327,7 @@ in6_domifdetach(struct ifnet *ifp, void *aux)
scope6_ifdetach(ext->scope6_id);
nd6_ifdetach(ext->nd_ifinfo);
+ lltable_free(ext->lltable);
free(ext->in6_ifstat, M_IFADDR);
free(ext->icmp6_ifstat, M_IFADDR);
free(ext, M_IFADDR);
OpenPOWER on IntegriCloud