diff options
Diffstat (limited to 'sys')
49 files changed, 1510 insertions, 2227 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 4c66d14..6825dd4 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1958,7 +1958,7 @@ device fatm #Fore PCA200E device hatm #Fore/Marconi HE155/622 device patm #IDT77252 cards (ProATM and IDT) device utopia #ATM PHY driver -options NATM #native ATM +#options NATM #native ATM options LIBMBPOOL #needed by patm, iatm diff --git a/sys/conf/files b/sys/conf/files index d09ead4..633b411 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2172,6 +2172,7 @@ net/if_gre.c optional gre net/if_iso88025subr.c optional token net/if_lagg.c optional lagg net/if_loop.c optional loop +net/if_llatbl.c standard net/if_media.c standard net/if_mib.c standard net/if_ppp.c optional ppp diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index 2959830..d69372b 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -3162,7 +3162,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) #ifdef RTF_PRCLONING rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING)); #else /* !RTF_PRCLONING */ - in_rtalloc_ign(&ro, RTF_CLONING, 0); + in_rtalloc_ign(&ro, 0, 0); #endif #else /* ! __FreeBSD__ */ rtalloc_noclone(&ro, NO_CLONING); @@ -3183,7 +3183,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING)); #else /* !RTF_PRCLONING */ - rtalloc_ign((struct route *)&ro6, RTF_CLONING); + rtalloc_ign((struct route *)&ro6, 0); #endif #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro6, NO_CLONING); @@ -5986,9 +5986,9 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) #ifdef __FreeBSD__ /* XXX MRT not always INET */ /* stick with table 0 though */ if (af == AF_INET) - in_rtalloc_ign((struct route *)&ro, RTF_CLONING, 0); + in_rtalloc_ign((struct route *)&ro, 0, 0); else - rtalloc_ign((struct route *)&ro, RTF_CLONING); + rtalloc_ign((struct route *)&ro, 0); #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); #endif @@ -6068,9 +6068,9 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING)); # else /* !RTF_PRCLONING */ if (af == AF_INET) - in_rtalloc_ign((struct route *)&ro, RTF_CLONING, 0); + in_rtalloc_ign((struct route *)&ro, 0, 0); else - rtalloc_ign((struct route *)&ro, RTF_CLONING); + rtalloc_ign((struct route *)&ro, 0); # endif #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); diff --git a/sys/contrib/rdma/rdma_addr.c b/sys/contrib/rdma/rdma_addr.c index e052b80..971b4de 100644 --- a/sys/contrib/rdma/rdma_addr.c +++ b/sys/contrib/rdma/rdma_addr.c @@ -163,6 +163,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in) struct route iproute; struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; char dmac[ETHER_ADDR_LEN]; + struct llentry *lle; bzero(&iproute, sizeof iproute); *dst = *dst_in; @@ -172,7 +173,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in) return; arpresolve(iproute.ro_rt->rt_ifp, iproute.ro_rt, NULL, - rt_key(iproute.ro_rt), dmac); + rt_key(iproute.ro_rt), dmac, &lle); RTFREE(iproute.ro_rt); } @@ -186,6 +187,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, struct route iproute; struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; char dmac[ETHER_ADDR_LEN]; + struct llentry *lle; bzero(&iproute, sizeof iproute); *dst = *dst_in; @@ -202,7 +204,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, goto put; } ret = arpresolve(iproute.ro_rt->rt_ifp, iproute.ro_rt, NULL, - rt_key(iproute.ro_rt), dmac); + rt_key(iproute.ro_rt), dmac, &lle); if (ret) { goto put; } diff --git a/sys/dev/cxgb/ulp/tom/cxgb_l2t.c b/sys/dev/cxgb/ulp/tom/cxgb_l2t.c index 67856e6..2484923 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_l2t.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_l2t.c @@ -93,15 +93,15 @@ arp_hash(u32 key, int ifindex, const struct l2t_data *d) } static inline void -neigh_replace(struct l2t_entry *e, struct rtentry *rt) +neigh_replace(struct l2t_entry *e, struct llentry *neigh) { - RT_LOCK(rt); - RT_ADDREF(rt); - RT_UNLOCK(rt); + LLE_WLOCK(neigh); + LLE_ADDREF(neigh); + LLE_WUNLOCK(neigh); if (e->neigh) - RTFREE(e->neigh); - e->neigh = rt; + LLE_FREE(e->neigh); + e->neigh = neigh; } /* @@ -164,7 +164,7 @@ arpq_enqueue(struct l2t_entry *e, struct mbuf *m) int t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m, struct l2t_entry *e) { - struct rtentry *rt = e->neigh; + struct llentry *lle = e->neigh; struct sockaddr_in sin; bzero(&sin, sizeof(struct sockaddr_in)); @@ -177,7 +177,7 @@ again: switch (e->state) { case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ arpresolve(rt->rt_ifp, rt, NULL, - (struct sockaddr *)&sin, e->dmac); + (struct sockaddr *)&sin, e->dmac, &lle); mtx_lock(&e->lock); if (e->state == L2T_STATE_STALE) e->state = L2T_STATE_VALID; @@ -201,7 +201,7 @@ again: * entries when there's no memory. */ if (arpresolve(rt->rt_ifp, rt, NULL, - (struct sockaddr *)&sin, e->dmac) == 0) { + (struct sockaddr *)&sin, e->dmac, &lle) == 0) { CTR6(KTR_CXGB, "mac=%x:%x:%x:%x:%x:%x\n", e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]); @@ -222,12 +222,12 @@ again: void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e) { - struct rtentry *rt; struct mbuf *m0; struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr.s_addr = e->addr; + struct llentry *lle; if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) return; @@ -237,7 +237,7 @@ again: switch (e->state) { case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ arpresolve(rt->rt_ifp, rt, NULL, - (struct sockaddr *)&sin, e->dmac); + (struct sockaddr *)&sin, e->dmac, &lle); mtx_lock(&e->lock); if (e->state == L2T_STATE_STALE) { e->state = L2T_STATE_VALID; @@ -263,7 +263,7 @@ again: * entries when there's no memory. */ arpresolve(rt->rt_ifp, rt, NULL, - (struct sockaddr *)&sin, e->dmac); + (struct sockaddr *)&sin, e->dmac, &lle); } return; @@ -321,18 +321,18 @@ found: void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e) { - struct rtentry *rt = NULL; - + struct llentry *lle; + mtx_lock(&e->lock); if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */ - rt = e->neigh; + lle = e->neigh; e->neigh = NULL; } mtx_unlock(&e->lock); atomic_add_int(&d->nfree, 1); - if (rt) - RTFREE(rt); + if (lle) + LLE_FREE(lle); } @@ -341,11 +341,8 @@ t3_l2e_free(struct l2t_data *d, struct l2t_entry *e) * Must be called with softirqs disabled. */ static inline void -reuse_entry(struct l2t_entry *e, struct rtentry *neigh) +reuse_entry(struct l2t_entry *e, struct llentry *neigh) { - struct llinfo_arp *la; - - la = (struct llinfo_arp *)neigh->rt_llinfo; mtx_lock(&e->lock); /* avoid race with t3_l2t_free */ if (neigh != e->neigh) @@ -362,13 +359,13 @@ reuse_entry(struct l2t_entry *e, struct rtentry *neigh) } struct l2t_entry * -t3_l2t_get(struct t3cdev *dev, struct rtentry *neigh, struct ifnet *ifp, +t3_l2t_get(struct t3cdev *dev, struct llentry *neigh, struct ifnet *ifp, struct sockaddr *sa) { struct l2t_entry *e; struct l2t_data *d = L2DATA(dev); u32 addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr; - int ifidx = neigh->rt_ifp->if_index; + int ifidx = ifp->if_index; int hash = arp_hash(addr, ifidx, d); unsigned int smt_idx = ((struct port_info *)ifp->if_softc)->port_id; @@ -448,20 +445,19 @@ handle_failed_resolution(struct t3cdev *dev, struct mbuf *arpq) } void -t3_l2t_update(struct t3cdev *dev, struct rtentry *neigh, +t3_l2t_update(struct t3cdev *dev, struct llentry *neigh, uint8_t *enaddr, struct sockaddr *sa) { struct l2t_entry *e; struct mbuf *arpq = NULL; struct l2t_data *d = L2DATA(dev); u32 addr = *(u32 *) &((struct sockaddr_in *)sa)->sin_addr; - int ifidx = neigh->rt_ifp->if_index; int hash = arp_hash(addr, ifidx, d); struct llinfo_arp *la; rw_rlock(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) - if (e->addr == addr && e->ifindex == ifidx) { + if (e->addr == addr) { mtx_lock(&e->lock); goto found; } diff --git a/sys/dev/cxgb/ulp/tom/cxgb_l2t.h b/sys/dev/cxgb/ulp/tom/cxgb_l2t.h index 1d547d3..308ba66 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_l2t.h +++ b/sys/dev/cxgb/ulp/tom/cxgb_l2t.h @@ -68,7 +68,7 @@ struct l2t_entry { int ifindex; /* neighbor's net_device's ifindex */ uint16_t smt_idx; /* SMT index */ uint16_t vlan; /* VLAN TCI (id: bits 0-11, prio: 13-15 */ - struct rtentry *neigh; /* associated neighbour */ + struct llentry *neigh; /* associated neighbour */ struct l2t_entry *first; /* start of hash chain */ struct l2t_entry *next; /* next l2t_entry on chain */ struct mbuf *arpq_head; /* queue of packets awaiting resolution */ diff --git a/sys/modules/cxgb/Makefile b/sys/modules/cxgb/Makefile index 7d56988..71cbb7a 100644 --- a/sys/modules/cxgb/Makefile +++ b/sys/modules/cxgb/Makefile @@ -25,7 +25,7 @@ _toe_header = ${_sysdir}/netinet/toedev.h .if exists(${_toe_header}) _toecore = toecore -_tom = tom +#_tom = tom .endif .if ${MACHINE_ARCH} == "i386" && exists(${_toe_header}) diff --git a/sys/net/if.c b/sys/net/if.c index 5142a5c..6f829b7 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -60,6 +60,7 @@ #include <sys/jail.h> #include <sys/vimage.h> #include <machine/stdarg.h> +#include <vm/uma.h> #include <net/if.h> #include <net/if_arp.h> @@ -1366,6 +1367,9 @@ done: return (ifa); } +#include <net/route.h> +#include <net/if_llatbl.h> + /* * Default action when installing a route with a Link Level gateway. * Lookup an appropriate real ifa to point to. diff --git a/sys/net/if_arcsubr.c b/sys/net/if_arcsubr.c index 0f5fef3..023b647 100644 --- a/sys/net/if_arcsubr.c +++ b/sys/net/if_arcsubr.c @@ -64,6 +64,7 @@ #include <net/if_arc.h> #include <net/if_arp.h> #include <net/bpf.h> +#include <net/if_llatbl.h> #if defined(INET) || defined(INET6) #include <netinet/in.h> @@ -108,6 +109,7 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, u_int8_t atype, adst; int loop_copy = 0; int isphds; + struct llentry *lle; if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) @@ -127,7 +129,7 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, else if (ifp->if_flags & IFF_NOARP) adst = ntohl(SIN(dst)->sin_addr.s_addr) & 0xFF; else { - error = arpresolve(ifp, rt0, m, dst, &adst); + error = arpresolve(ifp, rt0, m, dst, &adst, &lle); if (error) return (error == EWOULDBLOCK ? 0 : error); } @@ -165,7 +167,7 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, #endif #ifdef INET6 case AF_INET6: - error = nd6_storelladdr(ifp, rt0, m, dst, (u_char *)&adst); + error = nd6_storelladdr(ifp, rt0, m, dst, (u_char *)&adst, &lle); if (error) return (error); atype = ARCTYPE_INET6; diff --git a/sys/net/if_atmsubr.c b/sys/net/if_atmsubr.c index 9d1a7fa..7e36187 100644 --- a/sys/net/if_atmsubr.c +++ b/sys/net/if_atmsubr.c @@ -153,22 +153,11 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst, case AF_INET: case AF_INET6: { - struct rtentry *rt = NULL; - /* - * check route - */ - if (rt0 != NULL) { - error = rt_check(&rt, &rt0, dst); - if (error) - goto bad; - RT_UNLOCK(rt); - } - if (dst->sa_family == AF_INET6) etype = ETHERTYPE_IPV6; else etype = ETHERTYPE_IP; - if (!atmresolve(rt, m, dst, &atmdst)) { + if (!atmresolve(rt0, m, dst, &atmdst)) { m = NULL; /* XXX: atmresolve already free'd it */ senderr(EHOSTUNREACH); diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 4524fdd..e5978c6 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -64,6 +64,7 @@ #include <net/ethernet.h> #include <net/if_bridgevar.h> #include <net/if_vlan_var.h> +#include <net/if_llatbl.h> #include <net/pf_mtag.h> #include <net/vnet.h> @@ -87,6 +88,7 @@ #include <netipx/ipx.h> #include <netipx/ipx_if.h> #endif + int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m); int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, short *tp, int *hlen); @@ -151,6 +153,7 @@ static int ether_ipfw; #endif #endif + /* * Ethernet output routine. * Encapsulate a packet of type family for the local net. @@ -164,6 +167,7 @@ ether_output(struct ifnet *ifp, struct mbuf *m, short type; int error, hdrcmplt = 0; u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN]; + struct llentry *lle = NULL; struct ether_header *eh; struct pf_mtag *t; int loop_copy = 1; @@ -186,7 +190,7 @@ ether_output(struct ifnet *ifp, struct mbuf *m, switch (dst->sa_family) { #ifdef INET case AF_INET: - error = arpresolve(ifp, rt0, m, dst, edst); + error = arpresolve(ifp, rt0, m, dst, edst, &lle); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); @@ -221,7 +225,7 @@ ether_output(struct ifnet *ifp, struct mbuf *m, #endif #ifdef INET6 case AF_INET6: - error = nd6_storelladdr(ifp, rt0, m, dst, (u_char *)edst); + error = nd6_storelladdr(ifp, rt0, m, dst, (u_char *)edst, &lle); if (error) return error; type = htons(ETHERTYPE_IPV6); @@ -289,6 +293,17 @@ ether_output(struct ifnet *ifp, struct mbuf *m, senderr(EAFNOSUPPORT); } + if (lle != NULL && (lle->la_flags & LLE_IFADDR)) { + int csum_flags = 0; + if (m->m_pkthdr.csum_flags & CSUM_IP) + csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID); + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) + csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); + m->m_pkthdr.csum_flags |= csum_flags; + m->m_pkthdr.csum_data = 0xffff; + return (if_simloop(ifp, m, dst->sa_family, 0)); + } + /* * Add local net header. If no space in first mbuf, * allocate another. diff --git a/sys/net/if_fddisubr.c b/sys/net/if_fddisubr.c index e37d7ae..26d64e0 100644 --- a/sys/net/if_fddisubr.c +++ b/sys/net/if_fddisubr.c @@ -55,6 +55,7 @@ #include <net/if_dl.h> #include <net/if_llc.h> #include <net/if_types.h> +#include <net/if_llatbl.h> #include <net/ethernet.h> #include <net/netisr.h> @@ -120,6 +121,7 @@ fddi_output(ifp, m, dst, rt0) int loop_copy = 0, error = 0, hdrcmplt = 0; u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN]; struct fddi_header *fh; + struct llentry *lle; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); @@ -137,7 +139,7 @@ fddi_output(ifp, m, dst, rt0) switch (dst->sa_family) { #ifdef INET case AF_INET: { - error = arpresolve(ifp, rt0, m, dst, edst); + error = arpresolve(ifp, rt0, m, dst, edst, &lle); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); @@ -173,7 +175,7 @@ fddi_output(ifp, m, dst, rt0) #endif /* INET */ #ifdef INET6 case AF_INET6: - error = nd6_storelladdr(ifp, rt0, m, dst, (u_char *)edst); + error = nd6_storelladdr(ifp, rt0, m, dst, (u_char *)edst, &lle); if (error) return (error); /* Something bad happened */ type = htons(ETHERTYPE_IPV6); diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c index 77e7b70..b087370 100644 --- a/sys/net/if_fwsubr.c +++ b/sys/net/if_fwsubr.c @@ -51,6 +51,7 @@ #include <net/if_types.h> #include <net/bpf.h> #include <net/firewire.h> +#include <net/if_llatbl.h> #if defined(INET) || defined(INET6) #include <netinet/in.h> @@ -80,7 +81,6 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, { struct fw_com *fc = IFP2FWC(ifp); int error, type; - struct rtentry *rt = NULL; struct m_tag *mtag; union fw_encap *enc; struct fw_hwaddr *destfw; @@ -89,6 +89,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct mbuf *mtail; int unicast, dgl, foff; static int next_dgl; + struct llentry *lle; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); @@ -102,13 +103,6 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, goto bad; } - if (rt0 != NULL) { - error = rt_check(&rt, &rt0, dst); - if (error) - goto bad; - RT_UNLOCK(rt); - } - /* * For unicast, we make a tag to store the lladdr of the * destination. This might not be the first time we have seen @@ -144,7 +138,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, * doesn't fit into the arp model. */ if (unicast) { - error = arpresolve(ifp, rt, m, dst, (u_char *) destfw); + error = arpresolve(ifp, rt0, m, dst, (u_char *) destfw, &lle); if (error) return (error == EWOULDBLOCK ? 0 : error); } @@ -173,8 +167,8 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, #ifdef INET6 case AF_INET6: if (unicast) { - error = nd6_storelladdr(fc->fc_ifp, rt, m, dst, - (u_char *) destfw); + error = nd6_storelladdr(fc->fc_ifp, rt0, m, dst, + (u_char *) destfw, &lle); if (error) return (error); } diff --git a/sys/net/if_iso88025subr.c b/sys/net/if_iso88025subr.c index 00e323a..2bc336c 100644 --- a/sys/net/if_iso88025subr.c +++ b/sys/net/if_iso88025subr.c @@ -59,6 +59,7 @@ #include <net/if_dl.h> #include <net/if_llc.h> #include <net/if_types.h> +#include <net/if_llatbl.h> #include <net/ethernet.h> #include <net/netisr.h> @@ -243,7 +244,7 @@ iso88025_output(ifp, m, dst, rt0) struct iso88025_header *th; struct iso88025_header gen_th; struct sockaddr_dl *sdl = NULL; - struct rtentry *rt = NULL; + struct llentry *lle; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); @@ -260,14 +261,8 @@ iso88025_output(ifp, m, dst, rt0) /* Calculate routing info length based on arp table entry */ /* XXX any better way to do this ? */ - if (rt0 != NULL) { - error = rt_check(&rt, &rt0, dst); - if (error) - goto bad; - RT_UNLOCK(rt); - } - if (rt && (sdl = (struct sockaddr_dl *)rt->rt_gateway)) + if (rt0 && (sdl = (struct sockaddr_dl *)rt0->rt_gateway)) if (SDL_ISO88025(sdl)->trld_rcf != 0) rif_len = TR_RCF_RIFLEN(SDL_ISO88025(sdl)->trld_rcf); @@ -289,7 +284,7 @@ iso88025_output(ifp, m, dst, rt0) switch (dst->sa_family) { #ifdef INET case AF_INET: - error = arpresolve(ifp, rt0, m, dst, edst); + error = arpresolve(ifp, rt0, m, dst, edst, &lle); if (error) return (error == EWOULDBLOCK ? 0 : error); snap_type = ETHERTYPE_IP; @@ -324,7 +319,7 @@ iso88025_output(ifp, m, dst, rt0) #endif /* INET */ #ifdef INET6 case AF_INET6: - error = nd6_storelladdr(ifp, rt0, m, dst, (u_char *)edst); + error = nd6_storelladdr(ifp, rt0, m, dst, (u_char *)edst, &lle); if (error) return (error); snap_type = ETHERTYPE_IPV6; diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 62c2b99..c4cb4f6 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -169,8 +169,6 @@ struct ifnet { void *if_bridge; /* bridge glue */ - struct lltable *lltables; /* list of L3-L2 resolution tables */ - struct label *if_label; /* interface MAC label */ /* these are only used by IPv6 */ @@ -181,6 +179,7 @@ struct ifnet { struct task if_starttask; /* task for IFF_NEEDSGIANT */ struct task if_linktask; /* task for link change events */ struct mtx if_addr_mtx; /* mutex to protect address lists */ + LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */ /* protected by if_addr_mtx */ @@ -365,6 +364,9 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #define IF_AFDATA_UNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_mtx) #define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_mtx) +#define IF_AFDATA_LOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_mtx, MA_OWNED) +#define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_mtx, MA_NOTOWNED) + #define IFF_LOCKGIANT(ifp) do { \ if ((ifp)->if_flags & IFF_NEEDSGIANT) \ mtx_lock(&Giant); \ diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index 68233fe..8d94d01 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -266,8 +266,8 @@ rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum) * be done for sendto(3) case? */ if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) - return; /* XXX */ - ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, RTF_CLONING, fibnum); + return; + ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum); /* if the route does not exist or it is not multipath, don't care */ if (ro->ro_rt == NULL) diff --git a/sys/net/route.c b/sys/net/route.c index be71e13..9359514 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -41,6 +41,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/syslog.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/socket.h> @@ -53,6 +54,7 @@ #include <sys/vimage.h> #include <net/if.h> +#include <net/if_dl.h> #include <net/route.h> #ifdef RADIX_MPATH @@ -269,8 +271,7 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, struct radix_node *rn; struct rtentry *newrt; struct rt_addrinfo info; - u_long nflags; - int needresolve = 0, err = 0, msgtype = RTM_MISS; + int err = 0, msgtype = RTM_MISS; int needlock; KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); @@ -283,10 +284,9 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, */ if (rnh == NULL) { V_rtstat.rts_unreach++; - goto miss2; + goto miss; } needlock = !(ignflags & RTF_RNH_LOCKED); -retry: if (needlock) RADIX_NODE_HEAD_RLOCK(rnh); #ifdef INVARIANTS @@ -295,103 +295,33 @@ retry: #endif rn = rnh->rnh_matchaddr(dst, rnh); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { - newrt = rt = RNTORT(rn); - nflags = rt->rt_flags & ~ignflags; - if (report && (nflags & RTF_CLONING)) { - if (needlock && !RADIX_NODE_HEAD_LOCK_TRY_UPGRADE(rnh)) { - RADIX_NODE_HEAD_RUNLOCK(rnh); - RADIX_NODE_HEAD_LOCK(rnh); - /* - * lookup again to make sure it wasn't changed - */ - rn = rnh->rnh_matchaddr(dst, rnh); - if (!(rn && ((rn->rn_flags & RNF_ROOT) == 0))) { - RADIX_NODE_HEAD_UNLOCK(rnh); - needresolve = 0; - log(LOG_INFO, "retrying route lookup ...\n"); - goto retry; - } - } - needresolve = 1; - } else { - RT_LOCK(newrt); - RT_ADDREF(newrt); - if (needlock) - RADIX_NODE_HEAD_RUNLOCK(rnh); - goto done; - } - } + RT_LOCK(newrt); + RT_ADDREF(newrt); + if (needlock) + RADIX_NODE_HEAD_RUNLOCK(rnh); + goto done; + + } else if (needlock) + RADIX_NODE_HEAD_RUNLOCK(rnh); + /* - * if needresolve is set then we have the exclusive lock - * and we need to keep it held for the benefit of rtrequest_fib + * Either we hit the root or couldn't find any match, + * Which basically means + * "caint get there frm here" */ - if (!needresolve && needlock) - RADIX_NODE_HEAD_RUNLOCK(rnh); - - if (needresolve) { - RADIX_NODE_HEAD_WLOCK_ASSERT(rnh); + V_rtstat.rts_unreach++; +miss: + if (report) { /* - * We are apparently adding (report = 0 in delete). - * If it requires that it be cloned, do so. - * (This implies it wasn't a HOST route.) + * If required, report the failure to the supervising + * Authorities. + * For a delete, this is not an error. (report == 0) */ - err = rtrequest_fib(RTM_RESOLVE, dst, NULL, - NULL, RTF_RNH_LOCKED, &newrt, fibnum); - if (err) { - /* - * If the cloning didn't succeed, maybe - * what we have will do. Return that. - */ - newrt = rt; /* existing route */ - RT_LOCK(newrt); - RT_ADDREF(newrt); - goto miss; - } - KASSERT(newrt, ("no route and no error")); - RT_LOCK(newrt); - if (newrt->rt_flags & RTF_XRESOLVE) { - /* - * If the new route specifies it be - * externally resolved, then go do that. - */ - msgtype = RTM_RESOLVE; - goto miss; - } - /* Inform listeners of the new route. */ bzero(&info, sizeof(info)); - info.rti_info[RTAX_DST] = rt_key(newrt); - info.rti_info[RTAX_NETMASK] = rt_mask(newrt); - info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway; - if (newrt->rt_ifp != NULL) { - info.rti_info[RTAX_IFP] = - newrt->rt_ifp->if_addr->ifa_addr; - info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr; - } - rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0); - if (needlock) - RADIX_NODE_HEAD_UNLOCK(rnh); - } else { - /* - * Either we hit the root or couldn't find any match, - * Which basically means - * "caint get there frm here" - */ - V_rtstat.rts_unreach++; - miss: - if (needlock && needresolve) - RADIX_NODE_HEAD_UNLOCK(rnh); - miss2: if (report) { - /* - * If required, report the failure to the supervising - * Authorities. - * For a delete, this is not an error. (report == 0) - */ - bzero(&info, sizeof(info)); - info.rti_info[RTAX_DST] = dst; - rt_missmsg(msgtype, &info, 0, err); - } - } + info.rti_info[RTAX_DST] = dst; + rt_missmsg(msgtype, &info, 0, err); + } done: if (newrt) RT_LOCK_ASSERT(newrt); @@ -420,7 +350,7 @@ rtfree(struct rtentry *rt) */ RT_REMREF(rt); if (rt->rt_refcnt > 0) { - printf("%s: %p has %lu refs\n", __func__, rt, rt->rt_refcnt); + log(LOG_DEBUG, "%s: %p has %d refs\t", __func__, rt, rt->rt_refcnt); goto done; } @@ -461,8 +391,6 @@ rtfree(struct rtentry *rt) */ if (rt->rt_ifa) IFAFREE(rt->rt_ifa); - rt->rt_parent = NULL; /* NB: no refcnt on parent */ - /* * The key is separatly alloc'd so free it (see rt_setgate()). * This also frees the gateway, as they are always malloc'd @@ -715,14 +643,6 @@ ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway, return (ifa); } -static walktree_f_t rt_fixdelete; -static walktree_f_t rt_fixchange; - -struct rtfc_arg { - struct rtentry *rt0; - struct radix_node_head *rnh; -}; - /* * Do appropriate manipulations of a routing tree given * all the bits of info needed @@ -828,8 +748,13 @@ rtexpunge(struct rtentry *rt) struct ifaddr *ifa; int error = 0; + /* + * Find the correct routing tree to use for this Address Family + */ rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family]; RT_LOCK_ASSERT(rt); + if (rnh == NULL) + return (EAFNOSUPPORT); RADIX_NODE_HEAD_LOCK_ASSERT(rnh); #if 0 /* @@ -840,13 +765,6 @@ rtexpunge(struct rtentry *rt) KASSERT(rt->rt_refcnt <= 1, ("bogus refcnt %ld", rt->rt_refcnt)); #endif /* - * Find the correct routing tree to use for this Address Family - */ - rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family]; - if (rnh == NULL) - return (EAFNOSUPPORT); - - /* * Remove the item from the tree; it should be there, * but when callers invoke us blindly it may not (sigh). */ @@ -863,24 +781,6 @@ rtexpunge(struct rtentry *rt) rt->rt_flags &= ~RTF_UP; /* - * Now search what's left of the subtree for any cloned - * routes which might have been formed from this node. - */ - if ((rt->rt_flags & RTF_CLONING) && rt_mask(rt)) - rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), - rt_fixdelete, rt); - - /* - * Remove any external references we may have. - * This might result in another rtentry being freed if - * we held its last reference. - */ - if (rt->rt_gwroute) { - RTFREE(rt->rt_gwroute); - rt->rt_gwroute = NULL; - } - - /* * Give the protocol a chance to keep things in sync. */ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) { @@ -904,12 +804,6 @@ bad: } int -rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) -{ - return (rtrequest1_fib(req, info, ret_nrt, 0)); -} - -int rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) { @@ -941,10 +835,9 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, * If we are adding a host route then we don't want to put * a netmask in the tree, nor do we want to clone it. */ - if (flags & RTF_HOST) { + if (flags & RTF_HOST) netmask = NULL; - flags &= ~RTF_CLONING; - } + switch (req) { case RTM_DELETE: #ifdef RADIX_MPATH @@ -1023,26 +916,6 @@ normal_rtdel: rt->rt_flags &= ~RTF_UP; /* - * Now search what's left of the subtree for any cloned - * routes which might have been formed from this node. - */ - if ((rt->rt_flags & RTF_CLONING) && - rt_mask(rt)) { - rnh->rnh_walktree_from(rnh, dst, rt_mask(rt), - rt_fixdelete, rt); - } - - /* - * Remove any external references we may have. - * This might result in another rtentry being freed if - * we held its last reference. - */ - if (rt->rt_gwroute) { - RTFREE(rt->rt_gwroute); - rt->rt_gwroute = NULL; - } - - /* * give the protocol a chance to keep things in sync. */ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) @@ -1069,20 +942,12 @@ deldone: } else RTFREE_LOCKED(rt); break; - case RTM_RESOLVE: - if (ret_nrt == NULL || (rt = *ret_nrt) == NULL) - senderr(EINVAL); - ifa = rt->rt_ifa; - /* XXX locking? */ - flags = rt->rt_flags & - ~(RTF_CLONING | RTF_STATIC); - flags |= RTF_WASCLONED; - gateway = rt->rt_gateway; - if ((netmask = rt->rt_genmask) == NULL) - flags |= RTF_HOST; - goto makeroute; - + /* + * resolve was only used for route cloning + * here for compat + */ + break; case RTM_ADD: if ((flags & RTF_GATEWAY) && !gateway) senderr(EINVAL); @@ -1093,8 +958,6 @@ deldone: if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum))) senderr(error); ifa = info->rti_ifa; - - makeroute: rt = uma_zalloc(rtzone, M_NOWAIT | M_ZERO); if (rt == NULL) senderr(ENOBUFS); @@ -1103,7 +966,7 @@ deldone: rt->rt_fibnum = fibnum; /* * Add the gateway. Possibly re-malloc-ing the storage for it - * also add the rt_gwroute if possible. + * */ RT_LOCK(rt); if ((error = rt_setgate(rt, dst, gateway)) != 0) { @@ -1138,8 +1001,6 @@ deldone: /* do not permit exactly the same dst/mask/gw pair */ if (rn_mpath_capable(rnh) && rt_mpath_conflict(rnh, rt, netmask)) { - if (rt->rt_gwroute) - RTFREE(rt->rt_gwroute); if (rt->rt_ifa) { IFAFREE(rt->rt_ifa); } @@ -1152,34 +1013,11 @@ deldone: /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); - if (rn == NULL) { - struct rtentry *rt2; - /* - * Uh-oh, we already have one of these in the tree. - * We do a special hack: if the route that's already - * there was generated by the cloning mechanism - * then we just blow it away and retry the insertion - * of the new one. - */ - rt2 = rtalloc1_fib(dst, 0, RTF_RNH_LOCKED, fibnum); - if (rt2 && rt2->rt_parent) { - rtexpunge(rt2); - RT_UNLOCK(rt2); - rn = rnh->rnh_addaddr(ndst, netmask, - rnh, rt->rt_nodes); - } else if (rt2) { - /* undo the extra ref we got */ - RTFREE_LOCKED(rt2); - } - } - /* * If it still failed to go into the tree, * then un-make it (this should be a function) */ if (rn == NULL) { - if (rt->rt_gwroute) - RTFREE(rt->rt_gwroute); if (rt->rt_ifa) IFAFREE(rt->rt_ifa); Free(rt_key(rt)); @@ -1188,33 +1026,6 @@ deldone: senderr(EEXIST); } - rt->rt_parent = NULL; - - /* - * If we got here from RESOLVE, then we are cloning - * so clone the rest, and note that we - * are a clone (and increment the parent's references) - */ - if (req == RTM_RESOLVE) { - KASSERT(ret_nrt && *ret_nrt, - ("no route to clone from")); - rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */ - rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */ - if ((*ret_nrt)->rt_flags & RTF_CLONING) { - /* - * NB: We do not bump the refcnt on the parent - * entry under the assumption that it will - * remain so long as we do. This is - * important when deleting the parent route - * as this operation requires traversing - * the tree to delete all clones and futzing - * with refcnts requires us to double-lock - * parent through this back reference. - */ - rt->rt_parent = *ret_nrt; - } - } - /* * If this protocol has something to add to this then * allow it to do that as well. @@ -1223,20 +1034,6 @@ deldone: ifa->ifa_rtrequest(req, rt, info); /* - * We repeat the same procedure from rt_setgate() here because - * it doesn't fire when we call it there because the node - * hasn't been added to the tree yet. - */ - if (req == RTM_ADD && - !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) { - struct rtfc_arg arg; - arg.rnh = rnh; - arg.rt0 = rt; - rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), - rt_fixchange, &arg); - } - - /* * actually return a resultant rtentry and * give the caller a single reference. */ @@ -1263,159 +1060,21 @@ bad: #undef ifpaddr #undef flags -/* - * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family'' - * (i.e., the routes related to it by the operation of cloning). This - * routine is iterated over all potential former-child-routes by way of - * rnh->rnh_walktree_from() above, and those that actually are children of - * the late parent (passed in as VP here) are themselves deleted. - */ -static int -rt_fixdelete(struct radix_node *rn, void *vp) -{ - struct rtentry *rt = RNTORT(rn); - struct rtentry *rt0 = vp; - - if (rt->rt_parent == rt0 && - !(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) { - return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), - rt->rt_flags|RTF_RNH_LOCKED, NULL, rt->rt_fibnum); - } - return 0; -} - -/* - * This routine is called from rt_setgate() to do the analogous thing for - * adds and changes. There is the added complication in this case of a - * middle insert; i.e., insertion of a new network route between an older - * network route and (cloned) host routes. For this reason, a simple check - * of rt->rt_parent is insufficient; each candidate route must be tested - * against the (mask, value) of the new route (passed as before in vp) - * to see if the new route matches it. - * - * XXX - it may be possible to do fixdelete() for changes and reserve this - * routine just for adds. I'm not sure why I thought it was necessary to do - * changes this way. - */ - -static int -rt_fixchange(struct radix_node *rn, void *vp) -{ - struct rtentry *rt = RNTORT(rn); - struct rtfc_arg *ap = vp; - struct rtentry *rt0 = ap->rt0; - struct radix_node_head *rnh = ap->rnh; - u_char *xk1, *xm1, *xk2, *xmp; - int i, len, mlen; - - /* make sure we have a parent, and route is not pinned or cloning */ - if (!rt->rt_parent || - (rt->rt_flags & (RTF_PINNED | RTF_CLONING))) - return 0; - - if (rt->rt_parent == rt0) /* parent match */ - goto delete_rt; - /* - * There probably is a function somewhere which does this... - * if not, there should be. - */ - len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len); - - xk1 = (u_char *)rt_key(rt0); - xm1 = (u_char *)rt_mask(rt0); - xk2 = (u_char *)rt_key(rt); - - /* avoid applying a less specific route */ - xmp = (u_char *)rt_mask(rt->rt_parent); - mlen = rt_key(rt->rt_parent)->sa_len; - if (mlen > rt_key(rt0)->sa_len) /* less specific route */ - return 0; - for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) - if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) - return 0; /* less specific route */ - - for (i = rnh->rnh_treetop->rn_offset; i < len; i++) - if ((xk2[i] & xm1[i]) != xk1[i]) - return 0; /* no match */ - - /* - * OK, this node is a clone, and matches the node currently being - * changed/added under the node's mask. So, get rid of it. - */ -delete_rt: - return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL, - rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum); -} - int rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) { INIT_VNET_NET(curvnet); /* XXX dst may be overwritten, can we move this to below */ + int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); +#ifdef INVARIANTS struct radix_node_head *rnh = V_rt_tables[rt->rt_fibnum][dst->sa_family]; - int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); +#endif -again: RT_LOCK_ASSERT(rt); RADIX_NODE_HEAD_LOCK_ASSERT(rnh); /* - * A host route with the destination equal to the gateway - * will interfere with keeping LLINFO in the routing - * table, so disallow it. - */ - if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) == - (RTF_HOST|RTF_GATEWAY)) && - dst->sa_len == gate->sa_len && - bcmp(dst, gate, dst->sa_len) == 0) { - /* - * The route might already exist if this is an RTM_CHANGE - * or a routing redirect, so try to delete it. - */ - if (rt_key(rt)) - rtexpunge(rt); - return EADDRNOTAVAIL; - } - - /* - * Cloning loop avoidance in case of bad configuration. - */ - if (rt->rt_flags & RTF_GATEWAY) { - struct rtentry *gwrt; - - RT_UNLOCK(rt); /* XXX workaround LOR */ - gwrt = rtalloc1_fib(gate, 1, RTF_RNH_LOCKED, rt->rt_fibnum); - if (gwrt == rt) { - RT_REMREF(rt); - return (EADDRINUSE); /* failure */ - } - /* - * Try to reacquire the lock on rt, and if it fails, - * clean state and restart from scratch. - */ - if (!RT_TRYLOCK(rt)) { - RTFREE_LOCKED(gwrt); - RT_LOCK(rt); - goto again; - } - /* - * If there is already a gwroute, then drop it. If we - * are asked to replace route with itself, then do - * not leak its refcounter. - */ - if (rt->rt_gwroute != NULL) { - if (rt->rt_gwroute == gwrt) { - RT_REMREF(rt->rt_gwroute); - } else - RTFREE(rt->rt_gwroute); - } - - if ((rt->rt_gwroute = gwrt) != NULL) - RT_UNLOCK(rt->rt_gwroute); - } - - /* * Prepare to store the gateway in rt->rt_gateway. * Both dst and gateway are stored one after the other in the same * malloc'd chunk. If we have room, we can reuse the old buffer, @@ -1446,21 +1105,7 @@ again: */ bcopy(gate, rt->rt_gateway, glen); - /* - * This isn't going to do anything useful for host routes, so - * don't bother. Also make sure we have a reasonable mask - * (we don't yet have one during adds). - */ - if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) { - struct rtfc_arg arg; - - arg.rnh = rnh; - arg.rt0 = rt; - rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), - rt_fixchange, &arg); - } - - return 0; + return (0); } static void @@ -1500,6 +1145,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) char tempbuf[_SOCKADDR_TMPSIZE]; int didwork = 0; int a_failure = 0; + static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; @@ -1604,7 +1250,14 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) info.rti_ifa = ifa; info.rti_flags = flags | ifa->ifa_flags; info.rti_info[RTAX_DST] = dst; - info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; + /* + * doing this for compatibility reasons + */ + if (cmd == RTM_ADD) + info.rti_info[RTAX_GATEWAY] = + (struct sockaddr *)&null_sdl; + else + info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; error = rtrequest1_fib(cmd, &info, &rt, fibnum); if (error == 0 && rt != NULL) { @@ -1628,6 +1281,15 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) rt->rt_ifa = ifa; } #endif + /* + * doing this for compatibility reasons + */ + if (cmd == RTM_ADD) { + ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = + rt->rt_ifp->if_type; + ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = + rt->rt_ifp->if_index; + } rt_newaddrmsg(cmd, ifa, error, rt); if (cmd == RTM_DELETE) { /* @@ -1696,147 +1358,5 @@ rtinit(struct ifaddr *ifa, int cmd, int flags) return (rtinit1(ifa, cmd, flags, fib)); } -/* - * rt_check() is invoked on each layer 2 output path, prior to - * encapsulating outbound packets. - * - * The function is mostly used to find a routing entry for the gateway, - * which in some protocol families could also point to the link-level - * address for the gateway itself (the side effect of revalidating the - * route to the destination is rather pointless at this stage, we did it - * already a moment before in the pr_output() routine to locate the ifp - * and gateway to use). - * - * When we remove the layer-3 to layer-2 mapping tables from the - * routing table, this function can be removed. - * - * === On input === - * *dst is the address of the NEXT HOP (which coincides with the - * final destination if directly reachable); - * *lrt0 points to the cached route to the final destination; - * *lrt is not meaningful; - * (*lrt0 has no ref held on it by us so REMREF is not needed. - * Refs only account for major structural references and not usages, - * which is actually a bit of a problem.) - * - * === Operation === - * If the route is marked down try to find a new route. If the route - * to the gateway is gone, try to setup a new route. Otherwise, - * if the route is marked for packets to be rejected, enforce that. - * Note that rtalloc returns an rtentry with an extra REF that we may - * need to lose. - * - * === On return === - * *dst is unchanged; - * *lrt0 points to the (possibly new) route to the final destination - * *lrt points to the route to the next hop [LOCKED] - * - * Their values are meaningful ONLY if no error is returned. - * - * To follow this you have to remember that: - * RT_REMREF reduces the reference count by 1 but doesn't check it for 0 (!) - * RTFREE_LOCKED includes an RT_REMREF (or an rtfree if refs == 1) - * and an RT_UNLOCK - * RTFREE does an RT_LOCK and an RTFREE_LOCKED - * The gwroute pointer counts as a reference on the rtentry to which it points. - * so when we add it we use the ref that rtalloc gives us and when we lose it - * we need to remove the reference. - * RT_TEMP_UNLOCK does an RT_ADDREF before freeing the lock, and - * RT_RELOCK locks it (it can't have gone away due to the ref) and - * drops the ref, possibly freeing it and zeroing the pointer if - * the ref goes to 0 (unlocking in the process). - */ -int -rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst) -{ - struct rtentry *rt; - struct rtentry *rt0; - u_int fibnum; - - KASSERT(*lrt0 != NULL, ("rt_check")); - rt0 = *lrt0; - rt = NULL; - fibnum = rt0->rt_fibnum; - - /* NB: the locking here is tortuous... */ - RT_LOCK(rt0); -retry: - if (rt0 && (rt0->rt_flags & RTF_UP) == 0) { - /* Current rt0 is useless, try get a replacement. */ - RT_UNLOCK(rt0); - rt0 = NULL; - } - if (rt0 == NULL) { - rt0 = rtalloc1_fib(dst, 1, 0UL, fibnum); - if (rt0 == NULL) { - return (EHOSTUNREACH); - } - RT_REMREF(rt0); /* don't need the reference. */ - } - - if (rt0->rt_flags & RTF_GATEWAY) { - if ((rt = rt0->rt_gwroute) != NULL) { - RT_LOCK(rt); /* NB: gwroute */ - if ((rt->rt_flags & RTF_UP) == 0) { - /* gw route is dud. ignore/lose it */ - RTFREE_LOCKED(rt); /* unref (&unlock) gwroute */ - rt = rt0->rt_gwroute = NULL; - } - } - - if (rt == NULL) { /* NOT AN ELSE CLAUSE */ - RT_TEMP_UNLOCK(rt0); /* MUST return to undo this */ - rt = rtalloc1_fib(rt0->rt_gateway, 1, 0UL, fibnum); - if ((rt == rt0) || (rt == NULL)) { - /* the best we can do is not good enough */ - if (rt) { - RT_REMREF(rt); /* assumes ref > 0 */ - RT_UNLOCK(rt); - } - RTFREE(rt0); /* lock, unref, (unlock) */ - return (ENETUNREACH); - } - /* - * Relock it and lose the added reference. - * All sorts of things could have happenned while we - * had no lock on it, so check for them. - */ - RT_RELOCK(rt0); - if (rt0 == NULL || ((rt0->rt_flags & RTF_UP) == 0)) - /* Ru-roh.. what we had is no longer any good */ - goto retry; - /* - * While we were away, someone replaced the gateway. - * Since a reference count is involved we can't just - * overwrite it. - */ - if (rt0->rt_gwroute) { - if (rt0->rt_gwroute != rt) { - RTFREE_LOCKED(rt); - goto retry; - } - } else { - rt0->rt_gwroute = rt; - } - } - RT_LOCK_ASSERT(rt); - RT_UNLOCK(rt0); - } else { - /* think of rt as having the lock from now on.. */ - rt = rt0; - } - /* XXX why are we inspecting rmx_expire? */ - if ((rt->rt_flags & RTF_REJECT) && - (rt->rt_rmx.rmx_expire == 0 || - time_uptime < rt->rt_rmx.rmx_expire)) { - RT_UNLOCK(rt); - return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); - } - - *lrt = rt; - *lrt0 = rt0; - return (0); -} - /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */ SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); diff --git a/sys/net/route.h b/sys/net/route.h index fb8df39..5062198 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -135,15 +135,11 @@ struct rtentry { #define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key))) #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) struct sockaddr *rt_gateway; /* value */ - u_long rt_flags; /* up/down?, host/net */ + int rt_flags; /* up/down?, host/net */ + int rt_refcnt; /* # held references */ struct ifnet *rt_ifp; /* the answer: interface to use */ struct ifaddr *rt_ifa; /* the answer: interface address to use */ struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */ - long rt_refcnt; /* # held references */ - struct sockaddr *rt_genmask; /* for generation of cloned routes */ - caddr_t rt_llinfo; /* pointer to link level info cache */ - struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ - struct rtentry *rt_parent; /* cloning parent of this route */ u_int rt_fibnum; /* which FIB */ #ifdef _KERNEL /* XXX ugly, user apps use this definition but don't have a mtx def */ @@ -175,9 +171,9 @@ struct ortentry { #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ #define RTF_DONE 0x40 /* message confirmed */ /* 0x80 unused, was RTF_DELCLONE */ -#define RTF_CLONING 0x100 /* generate new routes on use */ +/* 0x100 unused, was RTF_CLONING */ #define RTF_XRESOLVE 0x200 /* external daemon resolves name */ -#define RTF_LLINFO 0x400 /* generated by link layer (e.g. ARP) */ +/* 0x400 unused, was RTF_LLINFO */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ @@ -188,7 +184,7 @@ struct ortentry { #define RTF_PRCLONING 0x10000 /* unused, for compatibility */ #endif -#define RTF_WASCLONED 0x20000 /* route generated through cloning */ +/* 0x20000 unused, was RTF_WASCLONED */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ /* 0x80000 unused */ #define RTF_PINNED 0x100000 /* future use */ @@ -326,14 +322,14 @@ struct rt_addrinfo { #define RT_ADDREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ KASSERT((_rt)->rt_refcnt >= 0, \ - ("negative refcnt %ld", (_rt)->rt_refcnt)); \ + ("negative refcnt %d", (_rt)->rt_refcnt)); \ (_rt)->rt_refcnt++; \ } while (0) #define RT_REMREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ KASSERT((_rt)->rt_refcnt > 0, \ - ("bogus refcnt %ld", (_rt)->rt_refcnt)); \ + ("bogus refcnt %d", (_rt)->rt_refcnt)); \ (_rt)->rt_refcnt--; \ } while (0) @@ -409,7 +405,6 @@ void rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *); int rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **); -int rtrequest1(int, struct rt_addrinfo *, struct rtentry **); /* defaults to "all" FIBs */ int rtinit_fib(struct ifaddr *, int, int); diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 5003b03..30591c7 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -53,6 +53,7 @@ #include <sys/vimage.h> #include <net/if.h> +#include <net/if_llatbl.h> #include <net/netisr.h> #include <net/raw_cb.h> #include <net/route.h> @@ -496,19 +497,6 @@ route_output(struct mbuf *m, struct socket *so) (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) senderr(EINVAL); - if (info.rti_info[RTAX_GENMASK]) { - struct radix_node *t; - t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1); - if (t != NULL && - bcmp((char *)(void *)info.rti_info[RTAX_GENMASK] + 1, - (char *)(void *)t->rn_key + 1, - ((struct sockaddr *)t->rn_key)->sa_len - 1) == 0) - info.rti_info[RTAX_GENMASK] = - (struct sockaddr *)t->rn_key; - else - senderr(ENOBUFS); - } - /* * Verify that the caller has the appropriate privilege; RTM_GET * is the only operation the non-superuser is allowed. @@ -526,6 +514,11 @@ route_output(struct mbuf *m, struct socket *so) if (info.rti_info[RTAX_GATEWAY] == NULL) senderr(EINVAL); saved_nrt = NULL; + /* support for new ARP code */ + if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) { + error = lla_rt_output(rtm, &info); + break; + } error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt, so->so_fibnum); if (error == 0 && saved_nrt) { @@ -534,13 +527,18 @@ route_output(struct mbuf *m, struct socket *so) &rtm->rtm_rmx, &saved_nrt->rt_rmx); rtm->rtm_index = saved_nrt->rt_ifp->if_index; RT_REMREF(saved_nrt); - saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK]; RT_UNLOCK(saved_nrt); } break; case RTM_DELETE: saved_nrt = NULL; + /* support for new ARP code */ + if (info.rti_info[RTAX_GATEWAY] && + (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK)) { + error = lla_rt_output(rtm, &info); + break; + } error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, so->so_fibnum); if (error == 0) { @@ -612,7 +610,7 @@ route_output(struct mbuf *m, struct socket *so) info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); - info.rti_info[RTAX_GENMASK] = rt->rt_genmask; + info.rti_info[RTAX_GENMASK] = 0; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { ifp = rt->rt_ifp; if (ifp) { @@ -699,8 +697,7 @@ route_output(struct mbuf *m, struct socket *so) RT_UNLOCK(rt); senderr(error); } - if (!(rt->rt_flags & RTF_LLINFO)) - rt->rt_flags |= RTF_GATEWAY; + rt->rt_flags |= RTF_GATEWAY; } if (info.rti_ifa != NULL && info.rti_ifa != rt->rt_ifa) { @@ -718,8 +715,6 @@ route_output(struct mbuf *m, struct socket *so) rtm->rtm_index = rt->rt_ifp->if_index; if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info); - if (info.rti_info[RTAX_GENMASK]) - rt->rt_genmask = info.rti_info[RTAX_GENMASK]; /* FALLTHROUGH */ case RTM_LOCK: /* We don't support locks anymore */ @@ -1261,7 +1256,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw) info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); - info.rti_info[RTAX_GENMASK] = rt->rt_genmask; + info.rti_info[RTAX_GENMASK] = 0; if (rt->rt_ifp) { info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr; info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; @@ -1440,6 +1435,11 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS) RADIX_NODE_HEAD_UNLOCK(rnh); } else if (af != 0) error = EAFNOSUPPORT; + /* + * take care of llinfo entries + */ + if (w.w_op == NET_RT_FLAGS) + error = lltable_sysctl_dumparp(af, w.w_req); break; case NET_RT_IFLIST: diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c index 5443629..411730a 100644 --- a/sys/netgraph/netflow/netflow.c +++ b/sys/netgraph/netflow/netflow.c @@ -270,7 +270,7 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, sin.sin_family = AF_INET; sin.sin_addr = fle->f.r.r_dst; /* XXX MRT 0 as a default.. need the m here to get fib */ - rt = rtalloc1_fib((struct sockaddr *)&sin, 0, RTF_CLONING, 0); + rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, 0); if (rt != NULL) { fle->f.fle_o_ifx = rt->rt_ifp->if_index; @@ -295,7 +295,7 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, sin.sin_family = AF_INET; sin.sin_addr = fle->f.r.r_src; /* XXX MRT 0 as a default revisit. need the mbuf for fib*/ - rt = rtalloc1_fib((struct sockaddr *)&sin, 0, RTF_CLONING, 0); + rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, 0); if (rt != NULL) { if (rt_mask(rt)) fle->f.src_mask = bitcount32(((struct sockaddr_in *) diff --git a/sys/netinet/if_atm.c b/sys/netinet/if_atm.c index 065f0c4..d534a18 100644 --- a/sys/netinet/if_atm.c +++ b/sys/netinet/if_atm.c @@ -127,10 +127,6 @@ atm_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) break; } - if ((rt->rt_flags & RTF_CLONING) != 0) { - printf("atm_rtrequest: cloning route detected?\n"); - break; - } if (gate->sa_family != AF_LINK || gate->sa_len < sizeof(null_sdl)) { log(LOG_DEBUG, "atm_rtrequest: bad gateway value"); @@ -332,8 +328,6 @@ atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst, goto bad; /* failed */ RT_REMREF(rt); /* don't keep LL references */ if ((rt->rt_flags & RTF_GATEWAY) != 0 || - (rt->rt_flags & RTF_LLINFO) == 0 || - /* XXX: are we using LLINFO? */ rt->rt_gateway->sa_family != AF_LINK) { RT_UNLOCK(rt); goto bad; diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index da7946d..9a67712 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_var.h> +#include <net/if_llatbl.h> #include <netinet/if_ether.h> #include <netinet/vinet.h> @@ -79,6 +80,7 @@ __FBSDID("$FreeBSD$"); #define SIN(s) ((struct sockaddr_in *)s) #define SDL(s) ((struct sockaddr_dl *)s) +#define LLTABLE(ifp) ((struct lltable *)(ifp)->if_afdata[AF_INET]) SYSCTL_DECL(_net_link_ether); SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); @@ -87,23 +89,13 @@ SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); #ifdef VIMAGE_GLOBALS static int arpt_keep; /* once resolved, good for 20 more minutes */ static int arp_maxtries; -static int useloopback; /* use loopback interface for local traffic */ +int useloopback; /* use loopback interface for local traffic */ static int arp_proxyall; #endif SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, arpt_keep, 0, "ARP entry lifetime in seconds"); -#define rt_expire rt_rmx.rmx_expire - -struct llinfo_arp { - struct callout la_timer; - struct rtentry *la_rt; - struct mbuf *la_hold; /* last packet until resolved/timeout */ - u_short la_preempt; /* countdown for pre-expiry arps */ - u_short la_asked; /* # requests sent */ -}; - static struct ifqueue arpintrq; SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, maxtries, @@ -117,196 +109,65 @@ SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, proxyall, "Enable proxy ARP for all suitable requests"); static void arp_init(void); -static void arp_rtrequest(int, struct rtentry *, struct rt_addrinfo *); -static void arprequest(struct ifnet *, +void arprequest(struct ifnet *, struct in_addr *, struct in_addr *, u_char *); static void arpintr(struct mbuf *); static void arptimer(void *); -static struct rtentry - *arplookup(u_long, int, int, int); #ifdef INET static void in_arpinput(struct mbuf *); #endif +#ifdef AF_INET +void arp_ifscrub(struct ifnet *ifp, uint32_t addr); + /* - * Timeout routine. + * called by in_ifscrub to remove entry from the table when + * the interface goes away */ -static void -arptimer(void *arg) +void +arp_ifscrub(struct ifnet *ifp, uint32_t addr) { - struct rtentry *rt = (struct rtentry *)arg; - - RT_LOCK_ASSERT(rt); - /* - * The lock is needed to close a theoretical race - * between spontaneous expiry and intentional removal. - * We still got an extra reference on rtentry, so can - * safely pass pointers to its contents. - */ - RT_UNLOCK(rt); - - in_rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL, - rt->rt_fibnum); + struct sockaddr_in addr4; + + bzero((void *)&addr4, sizeof(addr4)); + addr4.sin_len = sizeof(addr4); + addr4.sin_family = AF_INET; + addr4.sin_addr.s_addr = addr; + IF_AFDATA_LOCK(ifp); + lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR), + (struct sockaddr *)&addr4); + IF_AFDATA_UNLOCK(ifp); } +#endif /* - * Parallel to llc_rtrequest. + * Timeout routine. Age arp_tab entries periodically. */ static void -arp_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) +arptimer(void *arg) { - INIT_VNET_NET(curvnet); - INIT_VNET_INET(curvnet); - struct sockaddr *gate; - struct llinfo_arp *la; - static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; - struct in_ifaddr *ia; - struct ifaddr *ifa; - - RT_LOCK_ASSERT(rt); + struct ifnet *ifp; + struct llentry *lle = (struct llentry *)arg; - if (rt->rt_flags & RTF_GATEWAY) + if (lle == NULL) { + panic("%s: NULL entry!\n", __func__); return; - gate = rt->rt_gateway; - la = (struct llinfo_arp *)rt->rt_llinfo; - switch (req) { - - case RTM_ADD: - /* - * XXX: If this is a manually added route to interface - * such as older version of routed or gated might provide, - * restore cloning bit. - */ - if ((rt->rt_flags & RTF_HOST) == 0 && - rt_mask(rt) != NULL && - SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) - rt->rt_flags |= RTF_CLONING; - if (rt->rt_flags & RTF_CLONING) { - /* - * Case 1: This route should come from a route to iface. - */ - rt_setgate(rt, rt_key(rt), - (struct sockaddr *)&null_sdl); - gate = rt->rt_gateway; - SDL(gate)->sdl_type = rt->rt_ifp->if_type; - SDL(gate)->sdl_index = rt->rt_ifp->if_index; - rt->rt_expire = time_uptime; - break; - } - /* Announce a new entry if requested. */ - if (rt->rt_flags & RTF_ANNOUNCE) - arprequest(rt->rt_ifp, - &SIN(rt_key(rt))->sin_addr, - &SIN(rt_key(rt))->sin_addr, - (u_char *)LLADDR(SDL(gate))); - /*FALLTHROUGH*/ - case RTM_RESOLVE: - if (gate->sa_family != AF_LINK || - gate->sa_len < sizeof(null_sdl)) { - log(LOG_DEBUG, "%s: bad gateway %s%s\n", __func__, - inet_ntoa(SIN(rt_key(rt))->sin_addr), - (gate->sa_family != AF_LINK) ? - " (!AF_LINK)": ""); - break; - } - SDL(gate)->sdl_type = rt->rt_ifp->if_type; - SDL(gate)->sdl_index = rt->rt_ifp->if_index; - if (la != 0) - break; /* This happens on a route change */ - /* - * Case 2: This route may come from cloning, or a manual route - * add with a LL address. - */ - R_Zalloc(la, struct llinfo_arp *, sizeof(*la)); - rt->rt_llinfo = (caddr_t)la; - if (la == 0) { - log(LOG_DEBUG, "%s: malloc failed\n", __func__); - break; - } - /* - * We are storing a route entry outside of radix tree. So, - * it can be found and accessed by other means than radix - * lookup. The routing code assumes that any rtentry detached - * from radix can be destroyed safely. To prevent this, we - * add an additional reference. - */ - RT_ADDREF(rt); - la->la_rt = rt; - rt->rt_flags |= RTF_LLINFO; - callout_init_mtx(&la->la_timer, &rt->rt_mtx, - CALLOUT_RETURNUNLOCKED); - -#ifdef INET + } + ifp = lle->lle_tbl->llt_ifp; + IF_AFDATA_LOCK(ifp); + LLE_WLOCK(lle); + if ((lle->la_flags & LLE_DELETED) || + (time_second >= lle->la_expire)) { + if (!callout_pending(&lle->la_timer) && + callout_active(&lle->la_timer)) + (void) llentry_free(lle); + } else { /* - * This keeps the multicast addresses from showing up - * in `arp -a' listings as unresolved. It's not actually - * functional. Then the same for broadcast. + * Still valid, just drop our reference */ - if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr)) && - rt->rt_ifp->if_type != IFT_ARCNET) { - ETHER_MAP_IP_MULTICAST(&SIN(rt_key(rt))->sin_addr, - LLADDR(SDL(gate))); - SDL(gate)->sdl_alen = 6; - rt->rt_expire = 0; - } - if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) { - memcpy(LLADDR(SDL(gate)), rt->rt_ifp->if_broadcastaddr, - rt->rt_ifp->if_addrlen); - SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen; - rt->rt_expire = 0; - } -#endif - - TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { - if (ia->ia_ifp == rt->rt_ifp && - SIN(rt_key(rt))->sin_addr.s_addr == - (IA_SIN(ia))->sin_addr.s_addr) - break; - } - if (ia) { - /* - * This test used to be - * if (loif.if_flags & IFF_UP) - * It allowed local traffic to be forced - * through the hardware by configuring the loopback down. - * However, it causes problems during network configuration - * for boards that can't receive packets they send. - * It is now necessary to clear "useloopback" and remove - * the route to force traffic out to the hardware. - */ - rt->rt_expire = 0; - bcopy(IF_LLADDR(rt->rt_ifp), LLADDR(SDL(gate)), - SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen); - if (V_useloopback) { - rt->rt_ifp = V_loif; - rt->rt_rmx.rmx_mtu = V_loif->if_mtu; - } - - /* - * make sure to set rt->rt_ifa to the interface - * address we are using, otherwise we will have trouble - * with source address selection. - */ - ifa = &ia->ia_ifa; - if (ifa != rt->rt_ifa) { - IFAFREE(rt->rt_ifa); - IFAREF(ifa); - rt->rt_ifa = ifa; - } - } - break; - - case RTM_DELETE: - if (la == NULL) /* XXX: at least CARP does this. */ - break; - callout_stop(&la->la_timer); - rt->rt_llinfo = NULL; - rt->rt_flags &= ~RTF_LLINFO; - RT_REMREF(rt); - if (la->la_hold) - m_freem(la->la_hold); - Free((caddr_t)la); + LLE_FREE_LOCKED(lle); } + IF_AFDATA_UNLOCK(ifp); } /* @@ -315,14 +176,37 @@ arp_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) * - arp header target ip address * - arp header source ethernet address */ -static void -arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip, +void +arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip, u_char *enaddr) { struct mbuf *m; struct arphdr *ah; struct sockaddr sa; + if (sip == NULL) { + /* XXX don't believe this can happen (or explain why) */ + /* + * The caller did not supply a source address, try to find + * a compatible one among those assigned to this interface. + */ + struct ifaddr *ifa; + + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (!ifa->ifa_addr || + ifa->ifa_addr->sa_family != AF_INET) + continue; + sip = &SIN(ifa->ifa_addr)->sin_addr; + if (0 == ((sip->s_addr ^ tip->s_addr) & + SIN(ifa->ifa_netmask)->sin_addr.s_addr) ) + break; /* found it. */ + } + if (sip == NULL) { + printf("%s: cannot find matching address\n", __func__); + return; + } + } + if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) return; m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) + @@ -345,8 +229,6 @@ arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip, sa.sa_len = 2; m->m_flags |= M_BCAST; (*ifp->if_output)(ifp, m, &sa, (struct rtentry *)0); - - return; } /* @@ -365,16 +247,15 @@ arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip, */ int arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, - struct sockaddr *dst, u_char *desten) + struct sockaddr *dst, u_char *desten, struct llentry **lle) { INIT_VNET_INET(ifp->if_vnet); - struct llinfo_arp *la = NULL; - struct rtentry *rt = NULL; - struct sockaddr_dl *sdl; - int error; - int fibnum = -1; + struct llentry *la = 0; + u_int flags; + int error, renew; - if (m) { + *lle = NULL; + if (m != NULL) { if (m->m_flags & M_BCAST) { /* broadcast */ (void)memcpy(desten, @@ -386,110 +267,78 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); return (0); } - fibnum = M_GETFIB(m); } - if (rt0 != NULL) { - /* Look for a cached arp (ll) entry. */ - if (m == NULL) - fibnum = rt0->rt_fibnum; - error = rt_check(&rt, &rt0, dst); - if (error) { - m_freem(m); - return error; - } - la = (struct llinfo_arp *)rt->rt_llinfo; - if (la == NULL) - RT_UNLOCK(rt); - } + flags = (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) ? 0 : LLE_CREATE; - /* - * If we had no mbuf and no route, then hope the caller - * has a fib in mind because we are running out of ideas. - * I think this should not happen in current code. - * (kmacy would know). + /* XXXXX + * Since this function returns an llentry, the + * lock is held by the caller. + * XXX if caller is required to hold lock, assert it */ - if (fibnum == -1) - fibnum = curthread->td_proc->p_fibnum; /* last gasp */ - +retry: + IF_AFDATA_LOCK(ifp); + la = lla_lookup(LLTABLE(ifp), flags, dst); + IF_AFDATA_UNLOCK(ifp); if (la == NULL) { - /* - * We enter this block if rt0 was NULL, - * or if rt found by rt_check() didn't have llinfo. - * we should get a cloned route, which since it should - * come from the local interface should have a ll entry. - * It may be incomplete but that's ok. - */ - rt = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0, fibnum); - if (rt == NULL) { - log(LOG_DEBUG, - "arpresolve: can't allocate route for %s\n", - inet_ntoa(SIN(dst)->sin_addr)); - m_freem(m); - return (EINVAL); /* XXX */ - } - la = (struct llinfo_arp *)rt->rt_llinfo; - if (la == NULL) { - RT_UNLOCK(rt); + if (flags & LLE_CREATE) log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s\n", inet_ntoa(SIN(dst)->sin_addr)); - m_freem(m); - return (EINVAL); /* XXX */ - } - } - sdl = SDL(rt->rt_gateway); - /* - * Check the address family and length is valid, the address - * is resolved; otherwise, try to resolve. - */ - if ((rt->rt_expire == 0 || rt->rt_expire > time_uptime) && - sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) { - - bcopy(LLADDR(sdl), desten, sdl->sdl_alen); + m_freem(m); + return (EINVAL); + } + if ((la->la_flags & LLE_VALID) && + ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) { + bcopy(&la->ll_addr, desten, ifp->if_addrlen); /* * If entry has an expiry time and it is approaching, - * send an ARP request. + * see if we need to send an ARP request within this + * arpt_down interval. */ - if ((rt->rt_expire != 0) && - (time_uptime + la->la_preempt > rt->rt_expire)) { - struct in_addr sin = - SIN(rt->rt_ifa->ifa_addr)->sin_addr; + if (!(la->la_flags & LLE_STATIC) && + time_uptime + la->la_preempt > la->la_expire) { + arprequest(ifp, NULL, + &SIN(dst)->sin_addr, IF_LLADDR(ifp)); la->la_preempt--; - RT_UNLOCK(rt); - arprequest(ifp, &sin, &SIN(dst)->sin_addr, - IF_LLADDR(ifp)); - return (0); - } - - RT_UNLOCK(rt); - return (0); - } - /* - * If ARP is disabled or static on this interface, stop. - * XXX - * Probably should not allocate empty llinfo struct if we are - * not going to be sending out an arp request. - */ - if (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) { - RT_UNLOCK(rt); + } + + *lle = la; + error = 0; + goto done; + } + + if (la->la_flags & LLE_STATIC) { /* should not happen! */ + log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n", + inet_ntoa(SIN(dst)->sin_addr)); m_freem(m); - return (EINVAL); + error = EINVAL; + goto done; + } + + renew = (la->la_asked == 0 || la->la_expire != time_uptime); + if ((renew || m != NULL) && (flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + LLE_RUNLOCK(la); + goto retry; } /* * There is an arptab entry, but no ethernet address * response yet. Replace the held mbuf with this * latest one. */ - if (m) { - if (la->la_hold) + if (m != NULL) { + if (la->la_hold != NULL) m_freem(la->la_hold); la->la_hold = m; + if (renew == 0 && (flags & LLE_EXCLUSIVE)) { + flags &= ~LLE_EXCLUSIVE; + LLE_DOWNGRADE(la); + } + } - KASSERT(rt->rt_expire > 0, ("sending ARP request for static entry")); - /* * Return EWOULDBLOCK if we have tried less than arp_maxtries. It * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH @@ -499,22 +348,24 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, if (la->la_asked < V_arp_maxtries) error = EWOULDBLOCK; /* First request. */ else - error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH; + error = + (rt0->rt_flags & RTF_GATEWAY) ? EHOSTDOWN : EHOSTUNREACH; - if (la->la_asked == 0 || rt->rt_expire != time_uptime) { - struct in_addr sin = - SIN(rt->rt_ifa->ifa_addr)->sin_addr; - - rt->rt_expire = time_uptime; - callout_reset(&la->la_timer, hz, arptimer, rt); + if (renew) { + LLE_ADDREF(la); + la->la_expire = time_uptime; + callout_reset(&la->la_timer, hz, arptimer, la); la->la_asked++; - RT_UNLOCK(rt); - - arprequest(ifp, &sin, &SIN(dst)->sin_addr, + LLE_WUNLOCK(la); + arprequest(ifp, NULL, &SIN(dst)->sin_addr, IF_LLADDR(ifp)); - } else - RT_UNLOCK(rt); - + return (error); + } +done: + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(la); + else + LLE_RUNLOCK(la); return (error); } @@ -598,21 +449,17 @@ in_arpinput(struct mbuf *m) { struct arphdr *ah; struct ifnet *ifp = m->m_pkthdr.rcvif; - struct llinfo_arp *la; + struct llentry *la = NULL; struct rtentry *rt; struct ifaddr *ifa; struct in_ifaddr *ia; - struct sockaddr_dl *sdl; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; - struct mbuf *hold; u_int8_t *enaddr = NULL; - int op, rif_len; + int op, flags; + struct mbuf *m0; int req_len; int bridged = 0, is_bridge = 0; - u_int fibnum; - u_int goodfib = 0; - int firstpass = 1; #ifdef DEV_CARP int carp_match = 0; #endif @@ -649,7 +496,7 @@ in_arpinput(struct mbuf *m) */ LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) { if (((bridged && ia->ia_ifp->if_bridge != NULL) || - (ia->ia_ifp == ifp)) && + ia->ia_ifp == ifp) && itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) goto match; #ifdef DEV_CARP @@ -663,7 +510,7 @@ in_arpinput(struct mbuf *m) } LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash) if (((bridged && ia->ia_ifp->if_bridge != NULL) || - (ia->ia_ifp == ifp)) && + ia->ia_ifp == ifp) && isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) goto match; @@ -729,191 +576,94 @@ match: } if (ifp->if_flags & IFF_STATICARP) goto reply; - /* - * We look for any FIB that has this address to find - * the interface etc. - * For sanity checks that are FIB independent we abort the loop. - */ - for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { - rt = arplookup(isaddr.s_addr, - itaddr.s_addr == myaddr.s_addr, 0, fibnum); - if (rt == NULL) - continue; - - sdl = SDL(rt->rt_gateway); - /* Only call this once */ - if (firstpass) { - sin.sin_addr.s_addr = isaddr.s_addr; - EVENTHANDLER_INVOKE(route_arp_update_event, rt, - ar_sha(ah), (struct sockaddr *)&sin); - } - - la = (struct llinfo_arp *)rt->rt_llinfo; - if (la == NULL) { - RT_UNLOCK(rt); - continue; - } - if (firstpass) { - /* The following is not an error when doing bridging. */ - if (!bridged && rt->rt_ifp != ifp + bzero(&sin, sizeof(sin)); + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_family = AF_INET; + sin.sin_addr = isaddr; + flags = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0; + flags |= LLE_EXCLUSIVE; + IF_AFDATA_LOCK(ifp); + la = lla_lookup(LLTABLE(ifp), flags, (struct sockaddr *)&sin); + IF_AFDATA_UNLOCK(ifp); + if (la != NULL) { + /* the following is not an error when doing bridging */ + if (!bridged && la->lle_tbl->llt_ifp != ifp #ifdef DEV_CARP - && (ifp->if_type != IFT_CARP || !carp_match) + && (ifp->if_type != IFT_CARP || !carp_match) #endif - ) { - if (log_arp_wrong_iface) - log(LOG_ERR, "arp: %s is on %s " - "but got reply from %*D " - "on %s\n", - inet_ntoa(isaddr), - rt->rt_ifp->if_xname, - ifp->if_addrlen, - (u_char *)ar_sha(ah), ":", - ifp->if_xname); - RT_UNLOCK(rt); - break; - } - if (sdl->sdl_alen && - bcmp(ar_sha(ah), LLADDR(sdl), sdl->sdl_alen)) { - if (rt->rt_expire) { - if (log_arp_movements) - log(LOG_INFO, - "arp: %s moved from %*D to %*D " - "on %s\n", - inet_ntoa(isaddr), - ifp->if_addrlen, - (u_char *)LLADDR(sdl), ":", - ifp->if_addrlen, - (u_char *)ar_sha(ah), ":", - ifp->if_xname); - } else { - RT_UNLOCK(rt); - if (log_arp_permanent_modify) - log(LOG_ERR, - "arp: %*D attempts to " - "modify permanent entry " - "for %s on %s\n", - ifp->if_addrlen, - (u_char *)ar_sha(ah), ":", - inet_ntoa(isaddr), - ifp->if_xname); - break; - } - } - /* - * sanity check for the address length. - * XXX this does not work for protocols - * with variable address length. -is - */ - if (sdl->sdl_alen && - sdl->sdl_alen != ah->ar_hln) { - log(LOG_WARNING, - "arp from %*D: new addr len %d, was %d", - ifp->if_addrlen, (u_char *) ar_sha(ah), - ":", ah->ar_hln, sdl->sdl_alen); + ) { + if (log_arp_wrong_iface) + log(LOG_ERR, "arp: %s is on %s " + "but got reply from %*D on %s\n", + inet_ntoa(isaddr), + la->lle_tbl->llt_ifp->if_xname, + ifp->if_addrlen, (u_char *)ar_sha(ah), ":", + ifp->if_xname); + goto reply; + } + if ((la->la_flags & LLE_VALID) && + bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) { + if (la->la_flags & LLE_STATIC) { + log(LOG_ERR, + "arp: %*D attempts to modify permanent " + "entry for %s on %s\n", + ifp->if_addrlen, (u_char *)ar_sha(ah), ":", + inet_ntoa(isaddr), ifp->if_xname); + goto reply; } - if (ifp->if_addrlen != ah->ar_hln) { - log(LOG_WARNING, - "arp from %*D: addr len: " - "new %d, i/f %d (ignored)", - ifp->if_addrlen, (u_char *) ar_sha(ah), - ":", ah->ar_hln, ifp->if_addrlen); - RT_UNLOCK(rt); - break; + if (log_arp_movements) { + log(LOG_INFO, "arp: %s moved from %*D " + "to %*D on %s\n", + inet_ntoa(isaddr), + ifp->if_addrlen, + (u_char *)&la->ll_addr, ":", + ifp->if_addrlen, (u_char *)ar_sha(ah), ":", + ifp->if_xname); } - firstpass = 0; - goodfib = fibnum; } - - /* Copy in the information received. */ - (void)memcpy(LLADDR(sdl), ar_sha(ah), - sdl->sdl_alen = ah->ar_hln); - /* - * If we receive an arp from a token-ring station over - * a token-ring nic then try to save the source routing info. - * XXXMRT Only minimal Token Ring support for MRT. - * Only do this on the first pass as if modifies the mbuf. - */ - if (ifp->if_type == IFT_ISO88025) { - struct iso88025_header *th = NULL; - struct iso88025_sockaddr_dl_data *trld; - - /* force the fib loop to end after this pass */ - fibnum = rt_numfibs - 1; - - th = (struct iso88025_header *)m->m_pkthdr.header; - trld = SDL_ISO88025(sdl); - rif_len = TR_RCF_RIFLEN(th->rcf); - if ((th->iso88025_shost[0] & TR_RII) && - (rif_len > 2)) { - trld->trld_rcf = th->rcf; - trld->trld_rcf ^= htons(TR_RCF_DIR); - memcpy(trld->trld_route, th->rd, rif_len - 2); - trld->trld_rcf &= ~htons(TR_RCF_BCST_MASK); - /* - * Set up source routing information for - * reply packet (XXX) - */ - m->m_data -= rif_len; - m->m_len += rif_len; - m->m_pkthdr.len += rif_len; - } else { - th->iso88025_shost[0] &= ~TR_RII; - trld->trld_rcf = 0; - } - m->m_data -= 8; - m->m_len += 8; - m->m_pkthdr.len += 8; - th->rcf = trld->trld_rcf; + + if (ifp->if_addrlen != ah->ar_hln) { + log(LOG_WARNING, + "arp from %*D: addr len: new %d, i/f %d (ignored)", + ifp->if_addrlen, (u_char *) ar_sha(ah), ":", + ah->ar_hln, ifp->if_addrlen); + goto reply; } + (void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen); + la->la_flags |= LLE_VALID; - if (rt->rt_expire) { - rt->rt_expire = time_uptime + V_arpt_keep; + if (!(la->la_flags & LLE_STATIC)) { + la->la_expire = time_uptime + V_arpt_keep; callout_reset(&la->la_timer, hz * V_arpt_keep, - arptimer, rt); + arptimer, la); } la->la_asked = 0; la->la_preempt = V_arp_maxtries; - hold = la->la_hold; - la->la_hold = NULL; - RT_UNLOCK(rt); - if (hold != NULL) - (*ifp->if_output)(ifp, hold, rt_key(rt), rt); - } /* end of FIB loop */ + if (la->la_hold != NULL) { + m0 = la->la_hold; + la->la_hold = 0; + memcpy(&sa, L3_ADDR(la), sizeof(sa)); + LLE_WUNLOCK(la); + + (*ifp->if_output)(ifp, m0, &sa, NULL); + return; + } + } reply: - - /* - * Decide if we have to respond to something. - */ if (op != ARPOP_REQUEST) goto drop; + if (itaddr.s_addr == myaddr.s_addr) { /* Shortcut.. the receiving interface is the target. */ (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); } else { - /* It's not asking for our address. But it still may - * be something we should answer. - * - * XXX MRT - * We assume that link level info is independent of - * the table used and so we use whichever we can and don't - * have a better option. - */ - /* Have we been asked to proxy for the target. */ - rt = arplookup(itaddr.s_addr, 0, SIN_PROXY, goodfib); - if (rt == NULL) { - /* Nope, only intersted now if proxying everything. */ - struct sockaddr_in sin; - + if (la == NULL) { if (!V_arp_proxyall) goto drop; - bzero(&sin, sizeof sin); - sin.sin_family = AF_INET; - sin.sin_len = sizeof sin; sin.sin_addr = itaddr; - /* XXX MRT use table 0 for arp reply */ rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0); if (!rt) @@ -964,19 +714,18 @@ reply: * Otherwise we may conflict with the host we are * proxying for. */ - if (rt->rt_ifp != ifp && - (rt->rt_ifp->if_bridge != ifp->if_bridge || + if (la->lle_tbl->llt_ifp != ifp && + (la->lle_tbl->llt_ifp->if_bridge != ifp->if_bridge || ifp->if_bridge == NULL)) { - RT_UNLOCK(rt); goto drop; } - sdl = SDL(rt->rt_gateway); (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); - (void)memcpy(ar_sha(ah), LLADDR(sdl), ah->ar_hln); - RT_UNLOCK(rt); + (void)memcpy(ar_sha(ah), &la->ll_addr, ah->ar_hln); } } + if (la != NULL) + LLE_WUNLOCK(la); if (itaddr.s_addr == myaddr.s_addr && IN_LINKLOCAL(ntohl(itaddr.s_addr))) { /* RFC 3927 link-local IPv4; always reply by broadcast. */ @@ -1002,68 +751,34 @@ reply: return; drop: + if (la != NULL) + LLE_WUNLOCK(la); m_freem(m); } #endif -/* - * Lookup or enter a new address in arptab. - */ -static struct rtentry * -arplookup(u_long addr, int create, int proxy, int fibnum) -{ - struct rtentry *rt; - struct sockaddr_inarp sin; - const char *why = 0; - - bzero(&sin, sizeof(sin)); - sin.sin_len = sizeof(sin); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = addr; - if (proxy) - sin.sin_other = SIN_PROXY; - rt = in_rtalloc1((struct sockaddr *)&sin, create, 0UL, fibnum); - if (rt == 0) - return (0); - - if (rt->rt_flags & RTF_GATEWAY) - why = "host is not on local network"; - else if ((rt->rt_flags & RTF_LLINFO) == 0) - why = "could not allocate llinfo"; - else if (rt->rt_gateway->sa_family != AF_LINK) - why = "gateway route is not ours"; - - if (why) { -#define ISDYNCLONE(_rt) \ - (((_rt)->rt_flags & (RTF_STATIC | RTF_WASCLONED)) == RTF_WASCLONED) - if (create) - log(LOG_DEBUG, "arplookup %s failed: %s\n", - inet_ntoa(sin.sin_addr), why); - /* - * If there are no references to this Layer 2 route, - * and it is a cloned route, and not static, and - * arplookup() is creating the route, then purge - * it from the routing table as it is probably bogus. - */ - if (rt->rt_refcnt == 1 && ISDYNCLONE(rt)) - rtexpunge(rt); - RTFREE_LOCKED(rt); - return (0); -#undef ISDYNCLONE - } else { - RT_REMREF(rt); - return (rt); - } -} - void arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) { + struct llentry *lle; + if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) arprequest(ifp, &IA_SIN(ifa)->sin_addr, &IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp)); - ifa->ifa_rtrequest = arp_rtrequest; - ifa->ifa_flags |= RTF_CLONING; + /* + * interface address is considered static entry + * because the output of the arp utility shows + * that L2 entry as permanent + */ + IF_AFDATA_LOCK(ifp); + lle = lla_lookup(LLTABLE(ifp), (LLE_CREATE | LLE_IFADDR | LLE_STATIC), + (struct sockaddr *)IA_SIN(ifa)); + IF_AFDATA_UNLOCK(ifp); + if (lle == NULL) + log(LOG_INFO, "arp_ifinit: cannot create arp " + "entry for interface address\n"); + LLE_RUNLOCK(lle); + ifa->ifa_rtrequest = NULL; } void @@ -1072,8 +787,7 @@ arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr) if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) arprequest(ifp, &IA_SIN(ifa)->sin_addr, &IA_SIN(ifa)->sin_addr, enaddr); - ifa->ifa_rtrequest = arp_rtrequest; - ifa->ifa_flags |= RTF_CLONING; + ifa->ifa_rtrequest = NULL; } static void diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h index 14df15f..ba4e2f6 100644 --- a/sys/netinet/if_ether.h +++ b/sys/netinet/if_ether.h @@ -109,8 +109,11 @@ struct sockaddr_inarp { extern u_char ether_ipmulticast_min[ETHER_ADDR_LEN]; extern u_char ether_ipmulticast_max[ETHER_ADDR_LEN]; +struct llentry; + int arpresolve(struct ifnet *ifp, struct rtentry *rt, - struct mbuf *m, struct sockaddr *dst, u_char *desten); + struct mbuf *m, struct sockaddr *dst, u_char *desten, + struct llentry **lle); void arp_ifinit(struct ifnet *, struct ifaddr *); void arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *); #endif diff --git a/sys/netinet/in.c b/sys/netinet/in.c index 9b6dd48..852ef6d 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include <sys/vimage.h> #include <net/if.h> +#include <net/if_llatbl.h> #include <net/if_types.h> #include <net/route.h> @@ -871,6 +872,8 @@ in_addprefix(struct in_ifaddr *target, int flags) return (error); } +extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr); + /* * If there is no other address in the system that can serve a route to the * same prefix, remove the route. Hand over the route to the new address @@ -893,6 +896,8 @@ in_scrubprefix(struct in_ifaddr *target) prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; + /* remove arp cache */ + arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr); } TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { @@ -1015,3 +1020,238 @@ in_ifdetach(struct ifnet *ifp) in_pcbpurgeif0(&V_udbinfo, ifp); in_purgemaddrs(ifp); } + +#include <sys/syslog.h> +#include <net/if_dl.h> +#include <netinet/if_ether.h> + +struct in_llentry { + struct llentry base; + struct sockaddr_in l3_addr4; +}; + +static struct llentry * +in_lltable_new(const struct sockaddr *l3addr, u_int flags) +{ + struct in_llentry *lle; + + lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO); + if (lle == NULL) /* NB: caller generates msg */ + return NULL; + + callout_init(&lle->base.la_timer, CALLOUT_MPSAFE); + /* + * For IPv4 this will trigger "arpresolve" to generate + * an ARP request. + */ + lle->base.la_expire = time_second; /* mark expired */ + lle->l3_addr4 = *(const struct sockaddr_in *)l3addr; + lle->base.lle_refcnt = 1; + LLE_LOCK_INIT(&lle->base); + return &lle->base; +} + +/* + * Deletes an address from the address table. + * This function is called by the timer functions + * such as arptimer() and nd6_llinfo_timer(), and + * the caller does the locking. + */ +static void +in_lltable_free(struct lltable *llt, struct llentry *lle) +{ + free(lle, M_LLTABLE); +} + +static int +in_lltable_rtcheck(struct ifnet *ifp, const struct sockaddr *l3addr) +{ + struct rtentry *rt; + + KASSERT(l3addr->sa_family == AF_INET, + ("sin_family %d", l3addr->sa_family)); + + /* XXX rtalloc1 should take a const param */ + rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0); + if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { + log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", + inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); + if (rt != NULL) + RTFREE_LOCKED(rt); + return (EINVAL); + } + RTFREE_LOCKED(rt); + return 0; +} + +/* + * Return NULL if not found or marked for deletion. + * If found return lle read locked. + */ +static struct llentry * +in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) +{ + const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + struct llentries *lleh; + u_int hashkey; + + IF_AFDATA_LOCK_ASSERT(ifp); + KASSERT(l3addr->sa_family == AF_INET, + ("sin_family %d", l3addr->sa_family)); + + hashkey = sin->sin_addr.s_addr; + lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; + LIST_FOREACH(lle, lleh, lle_next) { + if (lle->la_flags & LLE_DELETED) + continue; + if (bcmp(L3_ADDR(lle), l3addr, sizeof(struct sockaddr_in)) == 0) + break; + } + if (lle == NULL) { +#ifdef DIAGNOSTICS + if (flags & LLE_DELETE) + log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle); +#endif + if (!(flags & LLE_CREATE)) + return (NULL); + /* + * A route that covers the given address must have + * been installed 1st because we are doing a resolution, + * verify this. + */ + if (!(flags & LLE_IFADDR) && + in_lltable_rtcheck(ifp, l3addr) != 0) + goto done; + + lle = in_lltable_new(l3addr, flags); + if (lle == NULL) { + log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); + goto done; + } + lle->la_flags = flags & ~LLE_CREATE; + if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { + bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen); + lle->la_flags |= (LLE_VALID | LLE_STATIC); + } + + lle->lle_tbl = llt; + lle->lle_head = lleh; + LIST_INSERT_HEAD(lleh, lle, lle_next); + } else if (flags & LLE_DELETE) { + if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { + LLE_WLOCK(lle); + lle->la_flags = LLE_DELETED; + LLE_WUNLOCK(lle); +#ifdef DIAGNOSTICS + log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); +#endif + } + lle = (void *)-1; + + } + if (lle != NULL && lle != (void *)-1) { + if (flags & LLE_EXCLUSIVE) + LLE_WLOCK(lle); + else + LLE_RLOCK(lle); + } +done: + return (lle); +} + +static int +in_lltable_dump(struct lltable *llt, struct sysctl_req *wr) +{ +#define SIN(lle) ((struct sockaddr_in *) L3_ADDR(lle)) + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + /* XXX stack use */ + struct { + struct rt_msghdr rtm; + struct sockaddr_inarp sin; + struct sockaddr_dl sdl; + } arpc; + int error, i; + + /* XXXXX + * current IFNET_RLOCK() is mapped to IFNET_WLOCK() + * so it is okay to use this ASSERT, change it when + * IFNET lock is finalized + */ + IFNET_WLOCK_ASSERT(); + + error = 0; + for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { + LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { + struct sockaddr_dl *sdl; + + /* skip deleted entries */ + if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID) + continue; + /* + * produce a msg made of: + * struct rt_msghdr; + * struct sockaddr_inarp; (IPv4) + * struct sockaddr_dl; + */ + bzero(&arpc, sizeof(arpc)); + arpc.rtm.rtm_msglen = sizeof(arpc); + arpc.sin.sin_family = AF_INET; + arpc.sin.sin_len = sizeof(arpc.sin); + arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr; + + /* publish */ + if (lle->la_flags & LLE_PUB) { + arpc.rtm.rtm_flags |= RTF_ANNOUNCE; + /* proxy only */ + if (lle->la_flags & LLE_PROXY) + arpc.sin.sin_other = SIN_PROXY; + } + + sdl = &arpc.sdl; + sdl->sdl_family = AF_LINK; + sdl->sdl_len = sizeof(*sdl); + sdl->sdl_alen = ifp->if_addrlen; + sdl->sdl_index = ifp->if_index; + sdl->sdl_type = ifp->if_type; + bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); + + arpc.rtm.rtm_rmx.rmx_expire = + lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; + arpc.rtm.rtm_flags |= RTF_HOST; + if (lle->la_flags & LLE_STATIC) + arpc.rtm.rtm_flags |= RTF_STATIC; + arpc.rtm.rtm_index = ifp->if_index; + error = SYSCTL_OUT(wr, &arpc, sizeof(arpc)); + if (error) + break; + } + } + return error; +#undef SIN +} + +void * +in_domifattach(struct ifnet *ifp) +{ + struct lltable *llt = lltable_init(ifp, AF_INET); + + if (llt != NULL) { + llt->llt_new = in_lltable_new; + llt->llt_free = in_lltable_free; + llt->llt_rtcheck = in_lltable_rtcheck; + llt->llt_lookup = in_lltable_lookup; + llt->llt_dump = in_lltable_dump; + } + return (llt); +} + +void +in_domifdetach(struct ifnet *ifp __unused, void *aux) +{ + struct lltable *llt = (struct lltable *)aux; + + lltable_free(llt); +} diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c index e7916dd..6f5dfe1 100644 --- a/sys/netinet/in_mcast.c +++ b/sys/netinet/in_mcast.c @@ -1036,7 +1036,7 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt) ro.ro_rt = NULL; *(struct sockaddr_in *)&ro.ro_dst = gsa->sin; - in_rtalloc_ign(&ro, RTF_CLONING, + in_rtalloc_ign(&ro, 0, inp->inp_inc.inc_fibnum); if (ro.ro_rt != NULL) { ifp = ro.ro_rt->rt_ifp; diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index c5515da..84bff92 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -570,7 +570,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, * Find out route to destination. */ if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) - in_rtalloc_ign(&sro, RTF_CLONING, inp->inp_inc.inc_fibnum); + in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum); /* * If we found a route, use the address corresponding to diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index a93f1f2..f3fbe0c 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_systm.h> +#include <netinet/in_var.h> #include <netinet/ip.h> #include <netinet/ip_var.h> #include <netinet/ip_icmp.h> @@ -364,7 +365,9 @@ struct domain inetdomain = { .dom_rtattach = in_inithead, #endif .dom_rtoffset = 32, - .dom_maxrtkey = sizeof(struct sockaddr_in) + .dom_maxrtkey = sizeof(struct sockaddr_in), + .dom_ifattach = in_domifattach, + .dom_ifdetach = in_domifdetach }; DOMAIN_SET(inet); diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c index e61e36a..f9e9d98 100644 --- a/sys/netinet/in_rmx.c +++ b/sys/netinet/in_rmx.c @@ -75,8 +75,8 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, { struct rtentry *rt = (struct rtentry *)treenodes; struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt); - struct radix_node *ret; + RADIX_NODE_HEAD_WLOCK_ASSERT(head); /* * A little bit of help for both IP output and input: * For host routes, we make sure that RTF_BROADCAST @@ -106,31 +106,7 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp) rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; - ret = rn_addroute(v_arg, n_arg, head, treenodes); - if (ret == NULL && rt->rt_flags & RTF_HOST) { - struct rtentry *rt2; - /* - * We are trying to add a host route, but can't. - * Find out if it is because of an - * ARP entry and delete it if so. - */ - rt2 = in_rtalloc1((struct sockaddr *)sin, 0, - RTF_CLONING|RTF_RNH_LOCKED, rt->rt_fibnum); - if (rt2) { - if (rt2->rt_flags & RTF_LLINFO && - rt2->rt_flags & RTF_HOST && - rt2->rt_gateway && - rt2->rt_gateway->sa_family == AF_LINK) { - rtexpunge(rt2); - RTFREE_LOCKED(rt2); - ret = rn_addroute(v_arg, n_arg, head, - treenodes); - } else - RTFREE_LOCKED(rt2); - } - } - - return ret; + return (rn_addroute(v_arg, n_arg, head, treenodes)); } /* @@ -187,13 +163,10 @@ in_clsroute(struct radix_node *rn, struct radix_node_head *head) if (!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ - if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) - return; - if (rt->rt_flags & RTPRF_OURS) return; - if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC))) + if (!(rt->rt_flags & RTF_DYNAMIC)) return; /* @@ -434,7 +407,6 @@ in_ifadownkill(struct radix_node *rn, void *xap) * the routes that rtrequest() would have in any case, * so that behavior is not needed there. */ - rt->rt_flags &= ~RTF_CLONING; rtexpunge(rt); } RT_UNLOCK(rt); diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 7facfa8..73868ad 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -321,6 +321,9 @@ void ip_input(struct mbuf *); int in_ifadown(struct ifaddr *ifa, int); void in_ifscrub(struct ifnet *, struct in_ifaddr *); struct mbuf *ip_fastforward(struct mbuf *); +void *in_domifattach(struct ifnet *); +void in_domifdetach(struct ifnet *, void *); + /* XXX */ void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum); diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 9f6bc31..6d96163 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -371,14 +371,6 @@ carp_setroute(struct carp_softc *sc, int cmd) (cmd == RTM_DELETE && count == 0)) rtinit(ifa, cmd, RTF_UP | RTF_HOST); } -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) { - if (cmd == RTM_ADD) - in6_ifaddloop(ifa); - else - in6_ifremloop(ifa); - } -#endif /* INET6 */ } splx(s); } diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 407b08c..ea106df 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -128,7 +128,7 @@ ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr.s_addr = dest.s_addr; - in_rtalloc_ign(ro, RTF_CLONING, M_GETFIB(m)); + in_rtalloc_ign(ro, 0, M_GETFIB(m)); /* * Route there and interface still up? diff --git a/sys/netinet/ip_fw2.c b/sys/netinet/ip_fw2.c index e91fc9b..eae49c8 100644 --- a/sys/netinet/ip_fw2.c +++ b/sys/netinet/ip_fw2.c @@ -528,7 +528,7 @@ verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = src; - in_rtalloc_ign(&ro, RTF_CLONING, fib); + in_rtalloc_ign(&ro, 0, fib); if (ro.ro_rt == NULL) return 0; @@ -620,7 +620,7 @@ verify_path6(struct in6_addr *src, struct ifnet *ifp) dst->sin6_len = sizeof(*dst); dst->sin6_addr = *src; /* XXX MRT 0 for ipv6 at this time */ - rtalloc_ign((struct route *)&ro, RTF_CLONING); + rtalloc_ign((struct route *)&ro, 0); if (ro.ro_rt == NULL) return 0; diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index c99c53e..4a169ff 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1286,7 +1286,7 @@ ip_rtaddr(struct in_addr dst, u_int fibnum) sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = dst; - in_rtalloc_ign(&sro, RTF_CLONING, fibnum); + in_rtalloc_ign(&sro, 0, fibnum); if (sro.ro_rt == NULL) return (NULL); @@ -1412,7 +1412,7 @@ ip_forward(struct mbuf *m, int srcrt) sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = ip->ip_dst; - in_rtalloc_ign(&ro, RTF_CLONING, M_GETFIB(m)); + in_rtalloc_ign(&ro, 0, M_GETFIB(m)); rt = ro.ro_rt; diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 03cf56f..6cda8aa 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -567,7 +567,6 @@ passout: * to avoid confusing lower layers. */ m->m_flags &= ~(M_PROTOFLAGS); - error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro->ro_rt); goto done; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 2c2337e..e7a6dff 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1659,7 +1659,7 @@ tcp_maxmtu(struct in_conninfo *inc, int *flags) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = inc->inc_faddr; - in_rtalloc_ign(&sro, RTF_CLONING, inc->inc_fibnum); + in_rtalloc_ign(&sro, 0, inc->inc_fibnum); } if (sro.ro_rt != NULL) { ifp = sro.ro_rt->rt_ifp; @@ -1694,7 +1694,7 @@ tcp_maxmtu6(struct in_conninfo *inc, int *flags) sro6.ro_dst.sin6_family = AF_INET6; sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); sro6.ro_dst.sin6_addr = inc->inc6_faddr; - rtalloc_ign((struct route *)&sro6, RTF_CLONING); + rtalloc_ign((struct route *)&sro6, 0); } if (sro6.ro_rt != NULL) { ifp = sro6.ro_rt->rt_ifp; diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 59bc95a..229aaa2 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -85,6 +85,7 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/if_dl.h> +#include <net/if_llatbl.h> #include <net/if_types.h> #include <net/route.h> #include <net/vnet.h> @@ -2573,32 +2574,32 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) { /* target lladdr option */ - struct rtentry *rt_router = NULL; int len; - struct sockaddr_dl *sdl; + struct llentry *ln; struct nd_opt_hdr *nd_opt; char *lladdr; - rt_router = nd6_lookup(router_ll6, 0, ifp); - if (!rt_router) + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(router_ll6, 0, ifp); + IF_AFDATA_UNLOCK(ifp); + if (!ln) goto nolladdropt; + len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ /* safety check */ if (len + (p - (u_char *)ip6) > maxlen) goto nolladdropt; - if (!(rt_router->rt_flags & RTF_GATEWAY) && - (rt_router->rt_flags & RTF_LLINFO) && - (rt_router->rt_gateway->sa_family == AF_LINK) && - (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) && - sdl->sdl_alen) { + + if (ln->la_flags & LLE_VALID) { nd_opt = (struct nd_opt_hdr *)p; nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); - bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen); + bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen); p += len; } + LLE_RUNLOCK(ln); } nolladdropt:; diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index c784845..8127281 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -88,6 +88,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_var.h> +#include <net/if_llatbl.h> #include <netinet/if_ether.h> #include <netinet/in_systm.h> #include <netinet/ip.h> @@ -135,152 +136,7 @@ static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *); struct in6_multihead in6_multihead; /* XXX BSS initialization */ int (*faithprefix_p)(struct in6_addr *); -/* - * Subroutine for in6_ifaddloop() and in6_ifremloop(). - * This routine does actual work. - */ -static void -in6_ifloop_request(int cmd, struct ifaddr *ifa) -{ - struct sockaddr_in6 all1_sa; - struct rtentry *nrt = NULL; - int e; - char ip6buf[INET6_ADDRSTRLEN]; - - bzero(&all1_sa, sizeof(all1_sa)); - all1_sa.sin6_family = AF_INET6; - all1_sa.sin6_len = sizeof(struct sockaddr_in6); - all1_sa.sin6_addr = in6mask128; - - /* - * We specify the address itself as the gateway, and set the - * RTF_LLINFO flag, so that the corresponding host route would have - * the flag, and thus applications that assume traditional behavior - * would be happy. Note that we assume the caller of the function - * (probably implicitly) set nd6_rtrequest() to ifa->ifa_rtrequest, - * which changes the outgoing interface to the loopback interface. - */ - e = rtrequest(cmd, ifa->ifa_addr, ifa->ifa_addr, - (struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt); - if (e != 0) { - /* XXX need more descriptive message */ - - log(LOG_ERR, "in6_ifloop_request: " - "%s operation failed for %s (errno=%d)\n", - cmd == RTM_ADD ? "ADD" : "DELETE", - ip6_sprintf(ip6buf, - &((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr), e); - } - - /* - * Report the addition/removal of the address to the routing socket. - * XXX: since we called rtinit for a p2p interface with a destination, - * we end up reporting twice in such a case. Should we rather - * omit the second report? - */ - if (nrt) { - RT_LOCK(nrt); - /* - * Make sure rt_ifa be equal to IFA, the second argument of - * the function. We need this because when we refer to - * rt_ifa->ia6_flags in ip6_input, we assume that the rt_ifa - * points to the address instead of the loopback address. - */ - if (cmd == RTM_ADD && ifa != nrt->rt_ifa) { - IFAFREE(nrt->rt_ifa); - IFAREF(ifa); - nrt->rt_ifa = ifa; - } - - rt_newaddrmsg(cmd, ifa, e, nrt); - if (cmd == RTM_DELETE) - RTFREE_LOCKED(nrt); - else { - /* the cmd must be RTM_ADD here */ - RT_REMREF(nrt); - RT_UNLOCK(nrt); - } - } -} - -/* - * Add ownaddr as loopback rtentry. We previously add the route only if - * necessary (ex. on a p2p link). However, since we now manage addresses - * separately from prefixes, we should always add the route. We can't - * rely on the cloning mechanism from the corresponding interface route - * any more. - */ -void -in6_ifaddloop(struct ifaddr *ifa) -{ - struct rtentry *rt; - int need_loop; - - /* If there is no loopback entry, allocate one. */ - rt = rtalloc1(ifa->ifa_addr, 0, 0); - need_loop = (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || - (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0); - if (rt) - RTFREE_LOCKED(rt); - if (need_loop) - in6_ifloop_request(RTM_ADD, ifa); -} - -/* - * Remove loopback rtentry of ownaddr generated by in6_ifaddloop(), - * if it exists. - */ -void -in6_ifremloop(struct ifaddr *ifa) -{ - INIT_VNET_INET6(curvnet); - struct in6_ifaddr *ia; - struct rtentry *rt; - int ia_count = 0; - - /* - * Some of BSD variants do not remove cloned routes - * from an interface direct route, when removing the direct route - * (see comments in net/net_osdep.h). Even for variants that do remove - * cloned routes, they could fail to remove the cloned routes when - * we handle multple addresses that share a common prefix. - * So, we should remove the route corresponding to the deleted address. - */ - /* - * Delete the entry only if exact one ifa exists. More than one ifa - * can exist if we assign a same single address to multiple - * (probably p2p) interfaces. - * XXX: we should avoid such a configuration in IPv6... - */ - for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) { - if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &ia->ia_addr.sin6_addr)) { - ia_count++; - if (ia_count > 1) - break; - } - } - - if (ia_count == 1) { - /* - * Before deleting, check if a corresponding loopbacked host - * route surely exists. With this check, we can avoid to - * delete an interface direct route whose destination is same - * as the address being removed. This can happen when removing - * a subnet-router anycast address on an interface attahced - * to a shared medium. - */ - rt = rtalloc1(ifa->ifa_addr, 0, 0); - if (rt != NULL) { - if ((rt->rt_flags & RTF_HOST) != 0 && - (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { - RTFREE_LOCKED(rt); - in6_ifloop_request(RTM_DELETE, ifa); - } else - RT_UNLOCK(rt); - } - } -} int in6_mask2len(struct in6_addr *mask, u_char *lim0) @@ -1131,10 +987,9 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, } } if (!rt) { - /* XXX: we need RTF_CLONING to fake nd6_rtrequest */ error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING, + (struct sockaddr *)&mltmask, RTF_UP, (struct rtentry **)0); if (error) goto cleanup; @@ -1208,7 +1063,7 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, if (!rt) { error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING, + (struct sockaddr *)&mltmask, RTF_UP, (struct rtentry **)0); if (error) goto cleanup; @@ -1287,34 +1142,16 @@ in6_purgeaddr(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; - char ip6buf[INET6_ADDRSTRLEN]; struct in6_multi_mship *imm; /* stop DAD processing */ nd6_dad_stop(ifa); - /* - * delete route to the destination of the address being purged. - * The interface must be p2p or loopback in this case. - */ - if ((ia->ia_flags & IFA_ROUTE) != 0 && ia->ia_dstaddr.sin6_len != 0) { - int e; - - if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) - != 0) { - log(LOG_ERR, "in6_purgeaddr: failed to remove " - "a route to the p2p destination: %s on %s, " - "errno=%d\n", - ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), - if_name(ifp), e); - /* proceed anyway... */ - } else - ia->ia_flags &= ~IFA_ROUTE; - } - - /* Remove ownaddr's loopback rtentry, if it exists. */ - in6_ifremloop(&(ia->ia_ifa)); - + IF_AFDATA_LOCK(ifp); + lla_lookup(LLTABLE6(ifp), (LLE_DELETE | LLE_IFADDR), + (struct sockaddr *)&ia->ia_addr); + IF_AFDATA_UNLOCK(ifp); + /* * leave from multicast groups we have joined for the interface */ @@ -1688,26 +1525,15 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, /* we could do in(6)_socktrim here, but just omit it at this moment. */ - if (newhost) { - /* - * set the rtrequest function to create llinfo. It also - * adjust outgoing interface of the route for the local - * address when called via in6_ifaddloop() below. - */ - ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; - } - /* * Special case: * If a new destination address is specified for a point-to-point * interface, install a route to the destination as an interface - * direct route. In addition, if the link is expected to have neighbor - * cache entries, specify RTF_LLINFO so that a cache entry for the - * destination address will be created. - * created + * direct route. * XXX: the logic below rejects assigning multiple addresses on a p2p * interface that share the same destination. */ +#if 0 /* QL - verify */ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) { @@ -1715,7 +1541,6 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, struct rtentry *rt = NULL, **rtp = NULL; if (nd6_need_cache(ifp) != 0) { - rtflags |= RTF_LLINFO; rtp = &rt; } @@ -1744,16 +1569,36 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, } ia->ia_flags |= IFA_ROUTE; } - if (plen < 128) { - /* - * The RTF_CLONING flag is necessary for in6_is_ifloop_auto(). - */ - ia->ia_ifa.ifa_flags |= RTF_CLONING; +#else + plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ + if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && + ia->ia_dstaddr.sin6_family == AF_INET6) { + if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, + RTF_UP | RTF_HOST)) != 0) + return (error); + ia->ia_flags |= IFA_ROUTE; } +#endif /* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */ - if (newhost) - in6_ifaddloop(&(ia->ia_ifa)); + if (newhost) { + struct llentry *ln; + + IF_AFDATA_LOCK(ifp); + ia->ia_ifa.ifa_rtrequest = NULL; + + /* XXX QL + * we need to report rt_newaddrmsg + */ + ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR | LLE_EXCLUSIVE), + (struct sockaddr *)&ia->ia_addr); + IF_AFDATA_UNLOCK(ifp); + if (ln) { + ln->la_expire = 0; /* for IPv6 this means permanent */ + ln->ln_state = ND6_LLINFO_REACHABLE; + LLE_WUNLOCK(ln); + } + } return (error); } @@ -2237,6 +2082,214 @@ in6_if2idlen(struct ifnet *ifp) } } +#include <sys/sysctl.h> + +struct in6_llentry { + struct llentry base; + struct sockaddr_in6 l3_addr6; +}; + +static struct llentry * +in6_lltable_new(const struct sockaddr *l3addr, u_int flags) +{ + struct in6_llentry *lle; + + lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, + M_DONTWAIT | M_ZERO); + if (lle == NULL) /* NB: caller generates msg */ + return NULL; + + callout_init(&lle->base.ln_timer_ch, CALLOUT_MPSAFE); + lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr; + lle->base.lle_refcnt = 1; + LLE_LOCK_INIT(&lle->base); + return &lle->base; +} + +/* + * Deletes an address from the address table. + * This function is called by the timer functions + * such as arptimer() and nd6_llinfo_timer(), and + * the caller does the locking. + */ +static void +in6_lltable_free(struct lltable *llt, struct llentry *lle) +{ + free(lle, M_LLTABLE); +} + +static int +in6_lltable_rtcheck(struct ifnet *ifp, const struct sockaddr *l3addr) +{ + struct rtentry *rt; + char ip6buf[INET6_ADDRSTRLEN]; + + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + /* XXX rtalloc1 should take a const param */ + rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0); + if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { + struct ifaddr *ifa; + /* + * Create an ND6 cache for an IPv6 neighbor + * that is not covered by our own prefix. + */ + /* XXX ifaof_ifpforaddr should take a const param */ + ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp); + if (ifa != NULL) { + if (rt != NULL) + rtfree(rt); + return 0; + } + log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", + ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr)); + if (rt != NULL) + rtfree(rt); + return EINVAL; + } + rtfree(rt); + return 0; +} + +static struct llentry * +in6_lltable_lookup(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + struct llentries *lleh; + u_int hashkey; + + IF_AFDATA_LOCK_ASSERT(ifp); + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + hashkey = sin6->sin6_addr.s6_addr32[3]; + lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; + LIST_FOREACH(lle, lleh, lle_next) { + if (lle->la_flags & LLE_DELETED) + continue; + if (bcmp(L3_ADDR(lle), l3addr, l3addr->sa_len) == 0) + break; + } + + if (lle == NULL) { + if (!(flags & LLE_CREATE)) + return (NULL); + /* + * A route that covers the given address must have + * been installed 1st because we are doing a resolution, + * verify this. + */ + if (!(flags & LLE_IFADDR) && + in6_lltable_rtcheck(ifp, l3addr) != 0) + return NULL; + + lle = in6_lltable_new(l3addr, flags); + if (lle == NULL) { + log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); + return NULL; + } + lle->la_flags = flags & ~LLE_CREATE; + if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { + bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen); + lle->la_flags |= (LLE_VALID | LLE_STATIC); + } + + lle->lle_tbl = llt; + lle->lle_head = lleh; + LIST_INSERT_HEAD(lleh, lle, lle_next); + } else if (flags & LLE_DELETE) { + LLE_WLOCK(lle); + lle->la_flags = LLE_DELETED; + LLE_WUNLOCK(lle); +#ifdef DIAGNOSTICS + log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); +#endif + lle = (void *)-1; + } + if (LLE_IS_VALID(lle)) { + if (flags & LLE_EXCLUSIVE) + LLE_WLOCK(lle); + else + LLE_RLOCK(lle); + } + return (lle); +} + +static int +in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr) +{ + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + /* XXX stack use */ + struct { + struct rt_msghdr rtm; + struct sockaddr_in6 sin6; + /* + * ndp.c assumes that sdl is word aligned + */ +#ifdef __LP64__ + uint32_t pad; +#endif + struct sockaddr_dl sdl; + } ndpc; + int i, error; + + /* XXXXX + * current IFNET_RLOCK() is mapped to IFNET_WLOCK() + * so it is okay to use this ASSERT, change it when + * IFNET lock is finalized + */ + IFNET_WLOCK_ASSERT(); + + error = 0; + for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { + LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { + struct sockaddr_dl *sdl; + + /* skip deleted or invalid entries */ + if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID) + continue; + /* + * produce a msg made of: + * struct rt_msghdr; + * struct sockaddr_in6 (IPv6) + * struct sockaddr_dl; + */ + bzero(&ndpc, sizeof(ndpc)); + ndpc.rtm.rtm_msglen = sizeof(ndpc); + ndpc.sin6.sin6_family = AF_INET6; + ndpc.sin6.sin6_len = sizeof(ndpc.sin6); + bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle)); + + /* publish */ + if (lle->la_flags & LLE_PUB) + ndpc.rtm.rtm_flags |= RTF_ANNOUNCE; + + sdl = &ndpc.sdl; + sdl->sdl_family = AF_LINK; + sdl->sdl_len = sizeof(*sdl); + sdl->sdl_alen = ifp->if_addrlen; + sdl->sdl_index = ifp->if_index; + sdl->sdl_type = ifp->if_type; + bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); + ndpc.rtm.rtm_rmx.rmx_expire = + lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; + ndpc.rtm.rtm_flags |= RTF_HOST; + if (lle->la_flags & LLE_STATIC) + ndpc.rtm.rtm_flags |= RTF_STATIC; + ndpc.rtm.rtm_index = ifp->if_index; + error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc)); + if (error) + break; + } + } + return error; +} + void * in6_domifattach(struct ifnet *ifp) { @@ -2256,6 +2309,14 @@ in6_domifattach(struct ifnet *ifp) ext->nd_ifinfo = nd6_ifattach(ifp); ext->scope6_id = scope6_ifattach(ifp); + ext->lltable = lltable_init(ifp, AF_INET6); + if (ext->lltable != NULL) { + ext->lltable->llt_new = in6_lltable_new; + ext->lltable->llt_free = in6_lltable_free; + ext->lltable->llt_rtcheck = in6_lltable_rtcheck; + ext->lltable->llt_lookup = in6_lltable_lookup; + ext->lltable->llt_dump = in6_lltable_dump; + } return ext; } @@ -2266,6 +2327,7 @@ in6_domifdetach(struct ifnet *ifp, void *aux) scope6_ifdetach(ext->scope6_id); nd6_ifdetach(ext->nd_ifinfo); + lltable_free(ext->lltable); free(ext->in6_ifstat, M_IFADDR); free(ext->icmp6_ifstat, M_IFADDR); free(ext, M_IFADDR); diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index bb54341..42980f7 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -124,6 +124,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt); struct radix_node *ret; + RADIX_NODE_HEAD_WLOCK_ASSERT(head); if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) rt->rt_flags |= RTF_MULTICAST; @@ -153,27 +154,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp); ret = rn_addroute(v_arg, n_arg, head, treenodes); - if (ret == NULL && rt->rt_flags & RTF_HOST) { - struct rtentry *rt2; - /* - * We are trying to add a host route, but can't. - * Find out if it is because of an - * ARP entry and delete it if so. - */ - rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED|RTF_CLONING); - if (rt2) { - if (rt2->rt_flags & RTF_LLINFO && - rt2->rt_flags & RTF_HOST && - rt2->rt_gateway && - rt2->rt_gateway->sa_family == AF_LINK) { - rtexpunge(rt2); - RTFREE_LOCKED(rt2); - ret = rn_addroute(v_arg, n_arg, head, - treenodes); - } else - RTFREE_LOCKED(rt2); - } - } else if (ret == NULL && rt->rt_flags & RTF_CLONING) { + if (ret == NULL) { struct rtentry *rt2; /* * We are trying to add a net route, but can't. @@ -187,10 +168,9 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, * net route entry, 3ffe:0501:: -> if0. * This case should not raise an error. */ - rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED|RTF_CLONING); + rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED); if (rt2) { - if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY)) - == RTF_CLONING + if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0) && rt2->rt_gateway && rt2->rt_gateway->sa_family == AF_LINK && rt2->rt_ifp == rt->rt_ifp) { @@ -199,7 +179,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, RTFREE_LOCKED(rt2); } } - return ret; + return (ret); } /* @@ -255,12 +235,6 @@ in6_clsroute(struct radix_node *rn, struct radix_node_head *head) if (!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ - if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) - return; - - if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED) - return; - /* * As requested by David Greenman: * If rtq_reallyold6 is 0, just delete the route without @@ -307,7 +281,7 @@ in6_rtqkill(struct radix_node *rn, void *rock) err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), - rt->rt_flags, 0); + rt->rt_flags|RTF_RNH_LOCKED, 0); if (err) { log(LOG_WARNING, "in6_rtqkill: error %d", err); } else { diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index 4d7723a..c343634 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -87,6 +87,7 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/route.h> +#include <net/if_llatbl.h> #ifdef RADIX_MPATH #include <net/radix_mpath.h> #endif @@ -131,7 +132,7 @@ int ip6_prefer_tempaddr; static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **, int, int)); + struct rtentry **, int)); static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); @@ -479,8 +480,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, static int selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, int clone, - int norouteok) + struct ifnet **retifp, struct rtentry **retrt, int norouteok) { INIT_VNET_INET6(curvnet); int error = 0; @@ -536,9 +536,10 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, */ if (opts && opts->ip6po_nexthop) { struct route_in6 *ron; - + struct llentry *la; + sin6_next = satosin6(opts->ip6po_nexthop); - + /* at this moment, we only support AF_INET6 next hops */ if (sin6_next->sin6_family != AF_INET6) { error = EAFNOSUPPORT; /* or should we proceed? */ @@ -550,6 +551,36 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * by that address must be a neighbor of the sending host. */ ron = &opts->ip6po_nextroute; + /* + * XXX what do we do here? + * PLZ to be fixing + */ + + + if (ron->ro_rt == NULL) { + rtalloc((struct route *)ron); /* multi path case? */ + if (ron->ro_rt == NULL) { + if (ron->ro_rt) { + RTFREE(ron->ro_rt); + ron->ro_rt = NULL; + } + error = EHOSTUNREACH; + goto done; + } + } + + rt = ron->ro_rt; + ifp = rt->rt_ifp; + IF_AFDATA_LOCK(ifp); + la = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6_next->sin6_addr); + IF_AFDATA_UNLOCK(ifp); + if (la) + LLE_RUNLOCK(la); + else { + error = EHOSTUNREACH; + goto done; + } +#if 0 if ((ron->ro_rt && (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != (RTF_UP | RTF_LLINFO)) || @@ -573,16 +604,14 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, goto done; } } - rt = ron->ro_rt; - ifp = rt->rt_ifp; +#endif /* * When cloning is required, try to allocate a route to the * destination so that the caller can store path MTU * information. */ - if (!clone) - goto done; + goto done; } /* @@ -608,21 +637,17 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, *sa6 = *dstsock; sa6->sin6_scope_id = 0; - if (clone) { #ifdef RADIX_MPATH rtalloc_mpath((struct route *)ro, ntohl(sa6->sin6_addr.s6_addr32[3])); -#else - rtalloc((struct route *)ro); -#endif - } else { +#else ro->ro_rt = rtalloc1(&((struct route *)ro) - ->ro_dst, 0, 0UL); + ->ro_dst, 0, 0UL); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); - } +#endif } - + /* * do not care about the result if we have the nexthop * explicitly specified. @@ -693,7 +718,7 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } if ((error = selectroute(dstsock, opts, mopts, ro, retifp, - &rt, 0, 1)) != 0) { + &rt, 1)) != 0) { if (ro == &sro && rt && rt == sro.ro_rt) RTFREE(rt); return (error); @@ -745,11 +770,11 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, int in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, int clone) + struct ifnet **retifp, struct rtentry **retrt) { return (selectroute(dstsock, opts, mopts, ro, retifp, - retrt, clone, 0)); + retrt, 0)); } /* diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h index 957155a..b5dba5e 100644 --- a/sys/netinet6/in6_var.h +++ b/sys/netinet6/in6_var.h @@ -88,13 +88,17 @@ struct in6_addrlifetime { struct nd_ifinfo; struct scope6_id; +struct lltable; struct in6_ifextra { struct in6_ifstat *in6_ifstat; struct icmp6_ifstat *icmp6_ifstat; struct nd_ifinfo *nd_ifinfo; struct scope6_id *scope6_id; + struct lltable *lltable; }; +#define LLTABLE6(ifp) (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable) + struct in6_ifaddr { struct ifaddr ia_ifa; /* protocol-independent info */ #define ia_ifp ia_ifa.ifa_ifp diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 0c259e3..475888f 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_systm.h> +#include <net/if_llatbl.h> #ifdef INET #include <netinet/ip.h> #include <netinet/ip_icmp.h> @@ -311,9 +312,11 @@ ip6_input(struct mbuf *m) u_int32_t plen; u_int32_t rtalert = ~0; int nxt, ours = 0; - struct ifnet *deliverifp = NULL; + struct ifnet *deliverifp = NULL, *ifp = NULL; struct in6_addr odst; int srcrt = 0; + struct llentry *lle = NULL; + struct sockaddr_in6 dst6; #ifdef IPSEC /* @@ -548,6 +551,24 @@ passin: /* * Unicast check */ + + bzero(&dst6, sizeof(dst6)); + dst6.sin6_family = AF_INET6; + dst6.sin6_len = sizeof(struct sockaddr_in6); + dst6.sin6_addr = ip6->ip6_dst; + ifp = m->m_pkthdr.rcvif; + IF_AFDATA_LOCK(ifp); + lle = lla_lookup(LLTABLE6(ifp), 0, + (struct sockaddr *)&dst6); + IF_AFDATA_UNLOCK(ifp); + if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) { + ours = 1; + deliverifp = ifp; + LLE_RUNLOCK(lle); + goto hbhcheck; + } + LLE_RUNLOCK(lle); + if (V_ip6_forward_rt.ro_rt != NULL && (V_ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 && IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 6e5d0d0..c0f614f 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -615,7 +615,7 @@ again: dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; if ((error = in6_selectroute(&dst_sa, opt, im6o, ro, - &ifp, &rt, 0)) != 0) { + &ifp, &rt)) != 0) { switch (error) { case EHOSTUNREACH: V_ip6stat.ip6s_noroute++; diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index 5975cc9..55bc5db 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -398,7 +398,7 @@ struct in6_addr *in6_selectsrc __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ifnet **, int *)); int in6_selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **, int)); + struct rtentry **)); u_int32_t ip6_randomid __P((void)); u_int32_t ip6_randomflowlabel __P((void)); #endif /* _KERNEL */ diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 18e81c3..5095d23 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$"); #include <sys/protosw.h> #include <sys/errno.h> #include <sys/syslog.h> +#include <sys/lock.h> +#include <sys/rwlock.h> #include <sys/queue.h> #include <sys/sysctl.h> @@ -61,6 +63,8 @@ __FBSDID("$FreeBSD$"); #include <net/vnet.h> #include <netinet/in.h> +#include <net/if_llatbl.h> +#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) #include <netinet/if_ether.h> #include <netinet6/in6_var.h> #include <netinet/ip6.h> @@ -98,8 +102,9 @@ int nd6_maxqueuelen; int nd6_debug; /* for debugging? */ +#if 0 static int nd6_inuse, nd6_allocated; -struct llinfo_nd6 llinfo_nd6; +#endif struct nd_drhead nd_defrouter; struct nd_prhead nd_prefix; @@ -114,9 +119,9 @@ static int nd6_is_new_addr_neighbor __P((struct sockaddr_in6 *, static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); static void nd6_slowtimo(void *); static int regen_tmpaddr(struct in6_ifaddr *); -static struct llinfo_nd6 *nd6_free(struct rtentry *, int); +static struct llentry *nd6_free(struct llentry *, int); static void nd6_llinfo_timer(void *); -static void clear_llinfo_pqueue(struct llinfo_nd6 *); +static void clear_llinfo_pqueue(struct llentry *); #ifdef VIMAGE_GLOBALS struct callout nd6_slowtimo_ch; @@ -162,8 +167,13 @@ nd6_init(void) V_dad_ignore_ns = 0; /* ignore NS in DAD - specwise incorrect*/ V_dad_maxtry = 15; /* max # of *tries* to transmit DAD packet */ + /* + * XXX just to get this to compile KMM + */ +#ifdef notyet V_llinfo_nd6.ln_next = &V_llinfo_nd6; V_llinfo_nd6.ln_prev = &V_llinfo_nd6; +#endif LIST_INIT(&V_nd_prefix); V_ip6_use_tempaddr = 0; @@ -424,14 +434,23 @@ skip1: * ND6 timer routine to handle ND6 entries */ void -nd6_llinfo_settimer(struct llinfo_nd6 *ln, long tick) +nd6_llinfo_settimer_locked(struct llentry *ln, long tick) { if (tick < 0) { - ln->ln_expire = 0; + ln->la_expire = 0; ln->ln_ntick = 0; callout_stop(&ln->ln_timer_ch); + /* + * XXX - do we know that there is + * callout installed? i.e. are we + * guaranteed that we're not dropping + * a reference that we did not add? + * KMM + */ + LLE_REMREF(ln); } else { - ln->ln_expire = time_second + tick / hz; + ln->la_expire = time_second + tick / hz; + LLE_ADDREF(ln); if (tick > INT_MAX) { ln->ln_ntick = tick - INT_MAX; callout_reset(&ln->ln_timer_ch, INT_MAX, @@ -444,16 +463,34 @@ nd6_llinfo_settimer(struct llinfo_nd6 *ln, long tick) } } +void +nd6_llinfo_settimer(struct llentry *ln, long tick) +{ + + LLE_WLOCK(ln); + nd6_llinfo_settimer_locked(ln, tick); + LLE_WUNLOCK(ln); +} + static void nd6_llinfo_timer(void *arg) { - struct llinfo_nd6 *ln; - struct rtentry *rt; + struct llentry *ln; struct in6_addr *dst; struct ifnet *ifp; struct nd_ifinfo *ndi = NULL; - ln = (struct llinfo_nd6 *)arg; + ln = (struct llentry *)arg; + if (ln == NULL) { + panic("%s: NULL entry!\n", __func__); + return; + } + + if ((ifp = ((ln->lle_tbl != NULL) ? ln->lle_tbl->llt_ifp : NULL)) == NULL) + panic("ln ifp == NULL"); + + CURVNET_SET(ifp->if_vnet); + INIT_VNET_INET6(curvnet); if (ln->ln_ntick > 0) { if (ln->ln_ntick > INT_MAX) { @@ -463,52 +500,44 @@ nd6_llinfo_timer(void *arg) ln->ln_ntick = 0; nd6_llinfo_settimer(ln, ln->ln_ntick); } - return; + goto done; } - if ((rt = ln->ln_rt) == NULL) - panic("ln->ln_rt == NULL"); - if ((ifp = rt->rt_ifp) == NULL) - panic("ln->ln_rt->rt_ifp == NULL"); ndi = ND_IFINFO(ifp); + dst = &L3_ADDR_SIN6(ln)->sin6_addr; + if ((ln->la_flags & LLE_STATIC) || (ln->la_expire > time_second)) { + goto done; + } - CURVNET_SET(ifp->if_vnet); - INIT_VNET_INET6(curvnet); - - /* sanity check */ - if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln) - panic("rt_llinfo(%p) is not equal to ln(%p)", - rt->rt_llinfo, ln); - if (rt_key(rt) == NULL) - panic("rt key is NULL in nd6_timer(ln=%p)", ln); - - dst = &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; + if (ln->la_flags & LLE_DELETED) { + (void)nd6_free(ln, 0); + goto done; + } switch (ln->ln_state) { case ND6_LLINFO_INCOMPLETE: - if (ln->ln_asked < V_nd6_mmaxtries) { - ln->ln_asked++; + if (ln->la_asked < V_nd6_mmaxtries) { + ln->la_asked++; nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000); nd6_ns_output(ifp, NULL, dst, ln, 0); } else { - struct mbuf *m = ln->ln_hold; + struct mbuf *m = ln->la_hold; if (m) { struct mbuf *m0; /* - * assuming every packet in ln_hold has the + * assuming every packet in la_hold has the * same IP header */ m0 = m->m_nextpkt; m->m_nextpkt = NULL; icmp6_error2(m, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_ADDR, 0, rt->rt_ifp); + ICMP6_DST_UNREACH_ADDR, 0, ifp); - ln->ln_hold = m0; + ln->la_hold = m0; clear_llinfo_pqueue(ln); } - if (rt && rt->rt_llinfo) - (void)nd6_free(rt, 0); + (void)nd6_free(ln, 0); ln = NULL; } break; @@ -522,8 +551,7 @@ nd6_llinfo_timer(void *arg) case ND6_LLINFO_STALE: /* Garbage Collection(RFC 2461 5.3) */ if (!ND6_LLINFO_PERMANENT(ln)) { - if (rt && rt->rt_llinfo) - (void)nd6_free(rt, 1); + (void)nd6_free(ln, 1); ln = NULL; } break; @@ -531,7 +559,7 @@ nd6_llinfo_timer(void *arg) case ND6_LLINFO_DELAY: if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { /* We need NUD */ - ln->ln_asked = 1; + ln->la_asked = 1; ln->ln_state = ND6_LLINFO_PROBE; nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000); nd6_ns_output(ifp, dst, dst, ln, 0); @@ -541,31 +569,20 @@ nd6_llinfo_timer(void *arg) } break; case ND6_LLINFO_PROBE: - if (ln->ln_asked < V_nd6_umaxtries) { - ln->ln_asked++; + if (ln->la_asked < V_nd6_umaxtries) { + ln->la_asked++; nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000); nd6_ns_output(ifp, dst, dst, ln, 0); - } else if (rt->rt_ifa != NULL && - rt->rt_ifa->ifa_addr->sa_family == AF_INET6 && - (((struct in6_ifaddr *)rt->rt_ifa)->ia_flags & IFA_ROUTE)) { - /* - * This is an unreachable neighbor whose address is - * specified as the destination of a p2p interface - * (see in6_ifinit()). We should not free the entry - * since this is sort of a "static" entry generated - * via interface address configuration. - */ - ln->ln_asked = 0; - ln->ln_expire = 0; /* make it permanent */ - ln->ln_state = ND6_LLINFO_STALE; } else { - if (rt && rt->rt_llinfo) - (void)nd6_free(rt, 0); + (void)nd6_free(ln, 0); ln = NULL; } break; } CURVNET_RESTORE(); +done: + if (ln != NULL) + LLE_FREE(ln); } @@ -772,7 +789,6 @@ void nd6_purge(struct ifnet *ifp) { INIT_VNET_INET6(ifp->if_vnet); - struct llinfo_nd6 *ln, *nln; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; @@ -829,132 +845,54 @@ nd6_purge(struct ifnet *ifp) nd6_setdefaultiface(0); if (!V_ip6_forwarding && V_ip6_accept_rtadv) { /* XXX: too restrictive? */ - /* refresh default router list */ + /* refresh default router list + * + * + */ defrouter_select(); + } - /* - * Nuke neighbor cache entries for the ifp. - * Note that rt->rt_ifp may not be the same as ifp, - * due to KAME goto ours hack. See RTM_RESOLVE case in - * nd6_rtrequest(), and ip6_input(). + /* XXXXX + * We do not nuke the neighbor cache entries here any more + * because the neighbor cache is kept in if_afdata[AF_INET6]. + * nd6_purge() is invoked by in6_ifdetach() which is called + * from if_detach() where everything gets purged. So let + * in6_domifdetach() do the actual L2 table purging work. */ - ln = V_llinfo_nd6.ln_next; - while (ln && ln != &V_llinfo_nd6) { - struct rtentry *rt; - struct sockaddr_dl *sdl; - - nln = ln->ln_next; - rt = ln->ln_rt; - if (rt && rt->rt_gateway && - rt->rt_gateway->sa_family == AF_LINK) { - sdl = (struct sockaddr_dl *)rt->rt_gateway; - if (sdl->sdl_index == ifp->if_index) - nln = nd6_free(rt, 0); - } - ln = nln; - } } -struct rtentry * -nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp) +/* + * the caller acquires and releases the lock on the lltbls + * Returns the llentry locked + */ +struct llentry * +nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp) { INIT_VNET_INET6(curvnet); - struct rtentry *rt; struct sockaddr_in6 sin6; - char ip6buf[INET6_ADDRSTRLEN]; - + struct llentry *ln; + int llflags = 0; + bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; - rt = rtalloc1((struct sockaddr *)&sin6, create, 0UL); - if (rt) { - if ((rt->rt_flags & RTF_LLINFO) == 0 && create) { - /* - * This is the case for the default route. - * If we want to create a neighbor cache for the - * address, we should free the route for the - * destination and allocate an interface route. - */ - RTFREE_LOCKED(rt); - rt = NULL; - } - } - if (rt == NULL) { - if (create && ifp) { - int e; - /* - * If no route is available and create is set, - * we allocate a host route for the destination - * and treat it like an interface route. - * This hack is necessary for a neighbor which can't - * be covered by our own prefix. - */ - struct ifaddr *ifa = - ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp); - if (ifa == NULL) - return (NULL); + IF_AFDATA_LOCK_ASSERT(ifp); - /* - * Create a new route. RTF_LLINFO is necessary - * to create a Neighbor Cache entry for the - * destination in nd6_rtrequest which will be - * called in rtrequest via ifa->ifa_rtrequest. - */ - if ((e = rtrequest(RTM_ADD, (struct sockaddr *)&sin6, - ifa->ifa_addr, (struct sockaddr *)&all1_sa, - (ifa->ifa_flags | RTF_HOST | RTF_LLINFO) & - ~RTF_CLONING, &rt)) != 0) { - log(LOG_ERR, - "nd6_lookup: failed to add route for a " - "neighbor(%s), errno=%d\n", - ip6_sprintf(ip6buf, addr6), e); - } - if (rt == NULL) - return (NULL); - RT_LOCK(rt); - if (rt->rt_llinfo) { - struct llinfo_nd6 *ln = - (struct llinfo_nd6 *)rt->rt_llinfo; - ln->ln_state = ND6_LLINFO_NOSTATE; - } - } else - return (NULL); - } - RT_LOCK_ASSERT(rt); - RT_REMREF(rt); - /* - * Validation for the entry. - * Note that the check for rt_llinfo is necessary because a cloned - * route from a parent route that has the L flag (e.g. the default - * route to a p2p interface) may have the flag, too, while the - * destination is not actually a neighbor. - * XXX: we can't use rt->rt_ifp to check for the interface, since - * it might be the loopback interface if the entry is for our - * own address on a non-loopback interface. Instead, we should - * use rt->rt_ifa->ifa_ifp, which would specify the REAL - * interface. - * Note also that ifa_ifp and ifp may differ when we connect two - * interfaces to a same link, install a link prefix to an interface, - * and try to install a neighbor cache on an interface that does not - * have a route to the prefix. - */ - if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || - rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL || - (ifp && rt->rt_ifa->ifa_ifp != ifp)) { - if (create) { - nd6log((LOG_DEBUG, - "nd6_lookup: failed to lookup %s (if = %s)\n", - ip6_sprintf(ip6buf, addr6), - ifp ? if_name(ifp) : "unspec")); - } - RT_UNLOCK(rt); - return (NULL); + if (flags & ND6_CREATE) + llflags |= LLE_CREATE; + if (flags & ND6_EXCLUSIVE) + llflags |= LLE_EXCLUSIVE; + + ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6); + if ((ln != NULL) && (flags & LLE_CREATE)) { + ln->ln_state = ND6_LLINFO_NOSTATE; + callout_init(&ln->ln_timer_ch, 0); } - RT_UNLOCK(rt); /* XXX not ready to return rt locked */ - return (rt); + + return (ln); } /* @@ -1040,7 +978,10 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) int nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) { + struct llentry *lle; + int rc = 0; + IF_AFDATA_UNLOCK_ASSERT(ifp); if (nd6_is_new_addr_neighbor(addr, ifp)) return (1); @@ -1048,10 +989,13 @@ nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) * Even if the address matches none of our addresses, it might be * in the neighbor cache. */ - if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL) - return (1); - - return (0); + IF_AFDATA_LOCK(ifp); + if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) { + LLE_RUNLOCK(lle); + rc = 1; + } + IF_AFDATA_UNLOCK(ifp); + return (rc); } /* @@ -1060,13 +1004,13 @@ nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) * make it global, unless you have a strong reason for the change, and are sure * that the change is safe. */ -static struct llinfo_nd6 * -nd6_free(struct rtentry *rt, int gc) +static struct llentry * +nd6_free(struct llentry *ln, int gc) { INIT_VNET_INET6(curvnet); - struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next; - struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; + struct llentry *next; struct nd_defrouter *dr; + struct ifnet *ifp=NULL; /* * we used to have pfctlinput(PRC_HOSTDEAD) here. @@ -1079,8 +1023,7 @@ nd6_free(struct rtentry *rt, int gc) if (!V_ip6_forwarding) { int s; s = splnet(); - dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, - rt->rt_ifp); + dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp); if (dr != NULL && dr->expire && ln->ln_state == ND6_LLINFO_STALE && gc) { @@ -1102,7 +1045,7 @@ nd6_free(struct rtentry *rt, int gc) else nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz); splx(s); - return (ln->ln_next); + return (LIST_NEXT(ln, lle_next)); } if (ln->ln_router || dr) { @@ -1111,7 +1054,7 @@ nd6_free(struct rtentry *rt, int gc) * is in the Default Router List. * See a corresponding comment in nd6_na_input(). */ - rt6_flush(&in6, rt->rt_ifp); + rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp); } if (dr) { @@ -1152,15 +1095,13 @@ nd6_free(struct rtentry *rt, int gc) * might have freed other entries (particularly the old next entry) as * a side effect (XXX). */ - next = ln->ln_next; + next = LIST_NEXT(ln, lle_next); - /* - * Detach the route from the routing tree and the list of neighbor - * caches, and disable the route entry not to be used in already - * cached routes. - */ - rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, - rt_mask(rt), 0, (struct rtentry **)0); + ifp = ln->lle_tbl->llt_ifp; + IF_AFDATA_LOCK(ifp); + LLE_WLOCK(ln); + llentry_free(ln); + IF_AFDATA_UNLOCK(ifp); return (next); } @@ -1174,297 +1115,42 @@ void nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force) { INIT_VNET_INET6(curvnet); - struct llinfo_nd6 *ln; + struct llentry *ln; + struct ifnet *ifp; - /* - * If the caller specified "rt", use that. Otherwise, resolve the - * routing table by supplied "dst6". - */ - if (rt == NULL) { - if (dst6 == NULL) - return; - if ((rt = nd6_lookup(dst6, 0, NULL)) == NULL) - return; - } - - if ((rt->rt_flags & RTF_GATEWAY) != 0 || - (rt->rt_flags & RTF_LLINFO) == 0 || - rt->rt_llinfo == NULL || rt->rt_gateway == NULL || - rt->rt_gateway->sa_family != AF_LINK) { - /* This is not a host route. */ + if ((dst6 == NULL) || (rt == NULL)) return; - } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; - if (ln->ln_state < ND6_LLINFO_REACHABLE) + ifp = rt->rt_ifp; + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL); + IF_AFDATA_UNLOCK(ifp); + if (ln == NULL) return; + if (ln->ln_state < ND6_LLINFO_REACHABLE) + goto done; + /* * if we get upper-layer reachability confirmation many times, * it is possible we have false information. */ if (!force) { ln->ln_byhint++; - if (ln->ln_byhint > V_nd6_maxnudhint) - return; + if (ln->ln_byhint > V_nd6_maxnudhint) { + goto done; + } } - ln->ln_state = ND6_LLINFO_REACHABLE; + ln->ln_state = ND6_LLINFO_REACHABLE; if (!ND6_LLINFO_PERMANENT(ln)) { nd6_llinfo_settimer(ln, (long)ND_IFINFO(rt->rt_ifp)->reachable * hz); } +done: + LLE_WUNLOCK(ln); } -/* - * info - XXX unused - */ -void -nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) -{ - struct sockaddr *gate = rt->rt_gateway; - struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; - static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; - struct ifnet *ifp = rt->rt_ifp; - struct ifaddr *ifa; - INIT_VNET_NET(ifp->if_vnet); - INIT_VNET_INET6(ifp->if_vnet); - - RT_LOCK_ASSERT(rt); - - if ((rt->rt_flags & RTF_GATEWAY) != 0) - return; - - if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) { - /* - * This is probably an interface direct route for a link - * which does not need neighbor caches (e.g. fe80::%lo0/64). - * We do not need special treatment below for such a route. - * Moreover, the RTF_LLINFO flag which would be set below - * would annoy the ndp(8) command. - */ - return; - } - - if (req == RTM_RESOLVE && - (nd6_need_cache(ifp) == 0 || /* stf case */ - !nd6_is_new_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), - ifp))) { - /* - * FreeBSD and BSD/OS often make a cloned host route based - * on a less-specific route (e.g. the default route). - * If the less specific route does not have a "gateway" - * (this is the case when the route just goes to a p2p or an - * stf interface), we'll mistakenly make a neighbor cache for - * the host route, and will see strange neighbor solicitation - * for the corresponding destination. In order to avoid the - * confusion, we check if the destination of the route is - * a neighbor in terms of neighbor discovery, and stop the - * process if not. Additionally, we remove the LLINFO flag - * so that ndp(8) will not try to get the neighbor information - * of the destination. - */ - rt->rt_flags &= ~RTF_LLINFO; - return; - } - - switch (req) { - case RTM_ADD: - /* - * There is no backward compatibility :) - * - * if ((rt->rt_flags & RTF_HOST) == 0 && - * SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) - * rt->rt_flags |= RTF_CLONING; - */ - if ((rt->rt_flags & RTF_CLONING) || - ((rt->rt_flags & RTF_LLINFO) && ln == NULL)) { - /* - * Case 1: This route should come from a route to - * interface (RTF_CLONING case) or the route should be - * treated as on-link but is currently not - * (RTF_LLINFO && ln == NULL case). - */ - rt_setgate(rt, rt_key(rt), - (struct sockaddr *)&null_sdl); - gate = rt->rt_gateway; - SDL(gate)->sdl_type = ifp->if_type; - SDL(gate)->sdl_index = ifp->if_index; - if (ln) - nd6_llinfo_settimer(ln, 0); - if ((rt->rt_flags & RTF_CLONING) != 0) - break; - } - /* - * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here. - * We don't do that here since llinfo is not ready yet. - * - * There are also couple of other things to be discussed: - * - unsolicited NA code needs improvement beforehand - * - RFC2461 says we MAY send multicast unsolicited NA - * (7.2.6 paragraph 4), however, it also says that we - * SHOULD provide a mechanism to prevent multicast NA storm. - * we don't have anything like it right now. - * note that the mechanism needs a mutual agreement - * between proxies, which means that we need to implement - * a new protocol, or a new kludge. - * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA. - * we need to check ip6forwarding before sending it. - * (or should we allow proxy ND configuration only for - * routers? there's no mention about proxy ND from hosts) - */ - /* FALLTHROUGH */ - case RTM_RESOLVE: - if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) { - /* - * Address resolution isn't necessary for a point to - * point link, so we can skip this test for a p2p link. - */ - if (gate->sa_family != AF_LINK || - gate->sa_len < sizeof(null_sdl)) { - log(LOG_DEBUG, - "nd6_rtrequest: bad gateway value: %s\n", - if_name(ifp)); - break; - } - SDL(gate)->sdl_type = ifp->if_type; - SDL(gate)->sdl_index = ifp->if_index; - } - if (ln != NULL) - break; /* This happens on a route change */ - /* - * Case 2: This route may come from cloning, or a manual route - * add with a LL address. - */ - R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln)); - rt->rt_llinfo = (caddr_t)ln; - if (ln == NULL) { - log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n"); - break; - } - V_nd6_inuse++; - V_nd6_allocated++; - bzero(ln, sizeof(*ln)); - RT_ADDREF(rt); - ln->ln_rt = rt; - callout_init(&ln->ln_timer_ch, 0); - - /* this is required for "ndp" command. - shin */ - if (req == RTM_ADD) { - /* - * gate should have some valid AF_LINK entry, - * and ln->ln_expire should have some lifetime - * which is specified by ndp command. - */ - ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - } else { - /* - * When req == RTM_RESOLVE, rt is created and - * initialized in rtrequest(), so rt_expire is 0. - */ - ln->ln_state = ND6_LLINFO_NOSTATE; - nd6_llinfo_settimer(ln, 0); - } - rt->rt_flags |= RTF_LLINFO; - ln->ln_next = V_llinfo_nd6.ln_next; - V_llinfo_nd6.ln_next = ln; - ln->ln_prev = &V_llinfo_nd6; - ln->ln_next->ln_prev = ln; - - /* - * check if rt_key(rt) is one of my address assigned - * to the interface. - */ - ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp, - &SIN6(rt_key(rt))->sin6_addr); - if (ifa) { - caddr_t macp = nd6_ifptomac(ifp); - nd6_llinfo_settimer(ln, -1); - ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - if (macp) { - bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen); - SDL(gate)->sdl_alen = ifp->if_addrlen; - } - if (V_nd6_useloopback) { - rt->rt_ifp = &V_loif[0]; /* XXX */ - /* - * Make sure rt_ifa be equal to the ifaddr - * corresponding to the address. - * We need this because when we refer - * rt_ifa->ia6_flags in ip6_input, we assume - * that the rt_ifa points to the address instead - * of the loopback address. - */ - if (ifa != rt->rt_ifa) { - IFAFREE(rt->rt_ifa); - IFAREF(ifa); - rt->rt_ifa = ifa; - } - } - } else if (rt->rt_flags & RTF_ANNOUNCE) { - nd6_llinfo_settimer(ln, -1); - ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - - /* join solicited node multicast for proxy ND */ - if (ifp->if_flags & IFF_MULTICAST) { - struct in6_addr llsol; - int error; - - llsol = SIN6(rt_key(rt))->sin6_addr; - llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; - llsol.s6_addr32[1] = 0; - llsol.s6_addr32[2] = htonl(1); - llsol.s6_addr8[12] = 0xff; - if (in6_setscope(&llsol, ifp, NULL)) - break; - if (in6_addmulti(&llsol, ifp, - &error, 0) == NULL) { - char ip6buf[INET6_ADDRSTRLEN]; - nd6log((LOG_ERR, "%s: failed to join " - "%s (errno=%d)\n", if_name(ifp), - ip6_sprintf(ip6buf, &llsol), - error)); - } - } - } - break; - - case RTM_DELETE: - if (ln == NULL) - break; - /* leave from solicited node multicast for proxy ND */ - if ((rt->rt_flags & RTF_ANNOUNCE) != 0 && - (ifp->if_flags & IFF_MULTICAST) != 0) { - struct in6_addr llsol; - struct in6_multi *in6m; - - llsol = SIN6(rt_key(rt))->sin6_addr; - llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; - llsol.s6_addr32[1] = 0; - llsol.s6_addr32[2] = htonl(1); - llsol.s6_addr8[12] = 0xff; - if (in6_setscope(&llsol, ifp, NULL) == 0) { - IN6_LOOKUP_MULTI(llsol, ifp, in6m); - if (in6m) - in6_delmulti(in6m); - } else - ; /* XXX: should not happen. bark here? */ - } - V_nd6_inuse--; - ln->ln_next->ln_prev = ln->ln_prev; - ln->ln_prev->ln_next = ln->ln_next; - ln->ln_prev = NULL; - nd6_llinfo_settimer(ln, -1); - RT_REMREF(rt); - rt->rt_llinfo = 0; - rt->rt_flags &= ~RTF_LLINFO; - clear_llinfo_pqueue(ln); - Free((caddr_t)ln); - } -} int nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) @@ -1477,7 +1163,6 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; struct nd_defrouter *dr; struct nd_prefix *pr; - struct rtentry *rt; int i = 0, error = 0; int s; @@ -1667,25 +1352,25 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) } case SIOCGNBRINFO_IN6: { - struct llinfo_nd6 *ln; + struct llentry *ln; struct in6_addr nb_addr = nbi->addr; /* make local for safety */ if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0) return (error); - s = splnet(); - if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL) { + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(&nb_addr, 0, ifp); + IF_AFDATA_UNLOCK(ifp); + + if (ln == NULL) { error = EINVAL; - splx(s); break; } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; nbi->state = ln->ln_state; - nbi->asked = ln->ln_asked; + nbi->asked = ln->la_asked; nbi->isrouter = ln->ln_router; - nbi->expire = ln->ln_expire; - splx(s); - + nbi->expire = ln->la_expire; + LLE_RUNLOCK(ln); break; } case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ @@ -1703,20 +1388,27 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * * type - ICMP6 type * code - type dependent information + * + * XXXXX + * The caller of this function already acquired the ndp + * cache table lock because the cache entry is returned. */ -struct rtentry * +struct llentry * nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, int lladdrlen, int type, int code) { INIT_VNET_INET6(curvnet); - struct rtentry *rt = NULL; - struct llinfo_nd6 *ln = NULL; + struct llentry *ln = NULL; int is_newentry; - struct sockaddr_dl *sdl = NULL; int do_update; int olladdr; int llchange; + int flags = 0; int newstate = 0; + struct sockaddr_in6 sin6; + struct mbuf *chain = NULL; + + IF_AFDATA_UNLOCK_ASSERT(ifp); if (ifp == NULL) panic("ifp == NULL in nd6_cache_lladdr"); @@ -1736,40 +1428,29 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, * Spec says nothing in sections for RA, RS and NA. There's small * description on it in NS section (RFC 2461 7.2.3). */ - - rt = nd6_lookup(from, 0, ifp); - if (rt == NULL) { - rt = nd6_lookup(from, 1, ifp); + flags |= lladdr ? ND6_EXCLUSIVE : 0; + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(from, flags, ifp); + if (ln) + IF_AFDATA_UNLOCK(ifp); + if (ln == NULL) { + flags |= LLE_EXCLUSIVE; + ln = nd6_lookup(from, flags |ND6_CREATE, ifp); + IF_AFDATA_UNLOCK(ifp); is_newentry = 1; } else { /* do nothing if static ndp is set */ - if (rt->rt_flags & RTF_STATIC) - return NULL; + if (ln->la_flags & LLE_STATIC) + goto done; is_newentry = 0; } - - if (rt == NULL) - return NULL; - if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) { -fail: - (void)nd6_free(rt, 0); - return NULL; - } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; if (ln == NULL) - goto fail; - if (rt->rt_gateway == NULL) - goto fail; - if (rt->rt_gateway->sa_family != AF_LINK) - goto fail; - sdl = SDL(rt->rt_gateway); - - olladdr = (sdl->sdl_alen) ? 1 : 0; + return (NULL); + + olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; if (olladdr && lladdr) { - if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen)) - llchange = 1; - else - llchange = 0; + llchange = bcmp(lladdr, &ln->ll_addr, + ifp->if_addrlen); } else llchange = 0; @@ -1789,8 +1470,8 @@ fail: * Record source link-layer address * XXX is it dependent to ifp->if_type? */ - sdl->sdl_alen = ifp->if_addrlen; - bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen); + bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); + ln->la_flags |= LLE_VALID; } if (!is_newentry) { @@ -1821,17 +1502,17 @@ fail: * we must set the timer now, although it is actually * meaningless. */ - nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); - if (ln->ln_hold) { + if (ln->la_hold) { struct mbuf *m_hold, *m_hold_next; /* - * reset the ln_hold in advance, to explicitly - * prevent a ln_hold lookup in nd6_output() + * reset the la_hold in advance, to explicitly + * prevent a la_hold lookup in nd6_output() * (wouldn't happen, though...) */ - for (m_hold = ln->ln_hold, ln->ln_hold = NULL; + for (m_hold = ln->la_hold, ln->la_hold = NULL; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; @@ -1841,14 +1522,14 @@ fail: * just set the 2nd argument as the * 1st one. */ - nd6_output(ifp, ifp, m_hold, - (struct sockaddr_in6 *)rt_key(rt), - rt); + nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); } + if (chain) + memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); } } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* probe right away */ - nd6_llinfo_settimer((void *)ln, 0); + nd6_llinfo_settimer_locked((void *)ln, 0); } } @@ -1917,6 +1598,17 @@ fail: break; } + if (ln) { + if (flags & ND6_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + if (ln->la_flags & LLE_STATIC) + ln = NULL; + } + if (chain) + nd6_output_flush(ifp, ifp, chain, &sin6, NULL); + /* * When the link-layer address of a router changes, select the * best router again. In particular, when the neighbor entry is newly @@ -1932,10 +1624,24 @@ fail: * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ - if (do_update && ln->ln_router && !V_ip6_forwarding && V_ip6_accept_rtadv) + if (do_update && ln->ln_router && !V_ip6_forwarding && V_ip6_accept_rtadv) { + /* + * guaranteed recursion + */ defrouter_select(); - - return rt; + } + + return (ln); +done: + if (ln) { + if (flags & ND6_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + if (ln->la_flags & LLE_STATIC) + ln = NULL; + } + return (ln); } static void @@ -1969,18 +1675,45 @@ nd6_slowtimo(void *arg) CURVNET_RESTORE(); } -#define senderr(e) { error = (e); goto bad;} int nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, struct sockaddr_in6 *dst, struct rtentry *rt0) { + + return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL)); +} + + +/* + * Note that I'm not enforcing any global serialization + * lle state or asked changes here as the logic is too + * complicated to avoid having to always acquire an exclusive + * lock + * KMM + * + */ +#define senderr(e) { error = (e); goto bad;} + +int +nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, + struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle, + struct mbuf **tail) +{ INIT_VNET_INET6(curvnet); struct mbuf *m = m0; struct rtentry *rt = rt0; - struct sockaddr_in6 *gw6 = NULL; - struct llinfo_nd6 *ln = NULL; + struct llentry *ln = lle; int error = 0; + int flags = 0; + +#ifdef INVARIANTS + if (lle) { + + LLE_WLOCK_ASSERT(lle); + KASSERT(tail != NULL, (" lle locked but no tail pointer passed")); + } +#endif if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) goto sendpkt; @@ -1990,81 +1723,6 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, /* * next hop determination. This routine is derived from ether_output. */ - /* NB: the locking here is tortuous... */ - if (rt != NULL) - RT_LOCK(rt); -again: - if (rt != NULL) { - if ((rt->rt_flags & RTF_UP) == 0) { - RT_UNLOCK(rt); - rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL); - if (rt != NULL) { - RT_REMREF(rt); - if (rt->rt_ifp != ifp) - /* - * XXX maybe we should update ifp too, - * but the original code didn't and I - * don't know what is correct here. - */ - goto again; - } else - senderr(EHOSTUNREACH); - } - - if (rt->rt_flags & RTF_GATEWAY) { - gw6 = (struct sockaddr_in6 *)rt->rt_gateway; - - /* - * We skip link-layer address resolution and NUD - * if the gateway is not a neighbor from ND point - * of view, regardless of the value of nd_ifinfo.flags. - * The second condition is a bit tricky; we skip - * if the gateway is our own address, which is - * sometimes used to install a route to a p2p link. - */ - if (!nd6_is_addr_neighbor(gw6, ifp) || - in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) { - RT_UNLOCK(rt); - /* - * We allow this kind of tricky route only - * when the outgoing interface is p2p. - * XXX: we may need a more generic rule here. - */ - if ((ifp->if_flags & IFF_POINTOPOINT) == 0) - senderr(EHOSTUNREACH); - - goto sendpkt; - } - - if (rt->rt_gwroute == NULL) - goto lookup; - rt = rt->rt_gwroute; - RT_LOCK(rt); /* NB: gwroute */ - if ((rt->rt_flags & RTF_UP) == 0) { - RTFREE_LOCKED(rt); /* unlock gwroute */ - rt = rt0; - rt0->rt_gwroute = NULL; - lookup: - RT_UNLOCK(rt0); - rt = rtalloc1(rt->rt_gateway, 1, 0UL); - if (rt == rt0) { - RT_REMREF(rt0); - RT_UNLOCK(rt0); - senderr(EHOSTUNREACH); - } - RT_LOCK(rt0); - if (rt0->rt_gwroute != NULL) - RTFREE(rt0->rt_gwroute); - rt0->rt_gwroute = rt; - if (rt == NULL) { - RT_UNLOCK(rt0); - senderr(EHOSTUNREACH); - } - } - RT_UNLOCK(rt0); - } - RT_UNLOCK(rt); - } /* * Address resolution or Neighbor Unreachability Detection @@ -2073,20 +1731,25 @@ again: * or an anycast address(i.e. not a multicast). */ - /* Look up the neighbor cache for the nexthop */ - if (rt && (rt->rt_flags & RTF_LLINFO) != 0) - ln = (struct llinfo_nd6 *)rt->rt_llinfo; - else { - /* - * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), - * the condition below is not very efficient. But we believe - * it is tolerable, because this should be a rare case. - */ - if (nd6_is_addr_neighbor(dst, ifp) && - (rt = nd6_lookup(&dst->sin6_addr, 1, ifp)) != NULL) - ln = (struct llinfo_nd6 *)rt->rt_llinfo; - } - if (ln == NULL || rt == NULL) { + flags = ((m != NULL) || (lle != NULL)) ? LLE_EXCLUSIVE : 0; + if (ln == NULL) { + retry: + IF_AFDATA_LOCK(rt->rt_ifp); + ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst); + IF_AFDATA_UNLOCK(rt->rt_ifp); + if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) { + /* + * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), + * the condition below is not very efficient. But we believe + * it is tolerable, because this should be a rare case. + */ + flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0); + IF_AFDATA_LOCK(rt->rt_ifp); + ln = nd6_lookup(&dst->sin6_addr, flags, ifp); + IF_AFDATA_UNLOCK(rt->rt_ifp); + } + } + if (ln == NULL) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { char ip6buf[INET6_ADDRSTRLEN]; @@ -2096,15 +1759,18 @@ again: ip6_sprintf(ip6buf, &dst->sin6_addr), ln, rt); senderr(EIO); /* XXX: good error? */ } - goto sendpkt; /* send anyway */ } /* We don't have to do link-layer address resolution on a p2p link. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ln->ln_state < ND6_LLINFO_REACHABLE) { + if ((flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + goto retry; + } ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } /* @@ -2115,9 +1781,14 @@ again: * (RFC 2461 7.3.3) */ if (ln->ln_state == ND6_LLINFO_STALE) { - ln->ln_asked = 0; + if ((flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + LLE_RUNLOCK(ln); + goto retry; + } + ln->la_asked = 0; ln->ln_state = ND6_LLINFO_DELAY; - nd6_llinfo_settimer(ln, (long)V_nd6_delay * hz); + nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz); } /* @@ -2137,12 +1808,18 @@ again: */ if (ln->ln_state == ND6_LLINFO_NOSTATE) ln->ln_state = ND6_LLINFO_INCOMPLETE; - if (ln->ln_hold) { + + if ((flags & LLE_EXCLUSIVE) == 0) { + flags |= LLE_EXCLUSIVE; + LLE_RUNLOCK(ln); + goto retry; + } + if (ln->la_hold) { struct mbuf *m_hold; int i; - + i = 0; - for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) { + for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) { i++; if (m_hold->m_nextpkt == NULL) { m_hold->m_nextpkt = m; @@ -2150,21 +1827,32 @@ again: } } while (i >= V_nd6_maxqueuelen) { - m_hold = ln->ln_hold; - ln->ln_hold = ln->ln_hold->m_nextpkt; + m_hold = ln->la_hold; + ln->la_hold = ln->la_hold->m_nextpkt; m_freem(m_hold); i--; } } else { - ln->ln_hold = m; + ln->la_hold = m; } - + /* + * We did the lookup (no lle arg) so we + * need to do the unlock here + */ + if (lle == NULL) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + } + /* * If there has been no NS for the neighbor after entering the * INCOMPLETE state, send the first solicitation. */ - if (!ND6_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) { - ln->ln_asked++; + if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) { + ln->la_asked++; + nd6_llinfo_settimer(ln, (long)ND_IFINFO(ifp)->retrans * hz / 1000); nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); @@ -2177,23 +1865,82 @@ again: error = ENETDOWN; /* better error? */ goto bad; } + /* + * ln is valid and the caller did not pass in + * an llentry + */ + if (ln && (lle == NULL)) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + } #ifdef MAC mac_netinet6_nd6_send(ifp, m); #endif + if (lle != NULL) { + if (*tail == NULL) + *tail = m; + else + (*tail)->m_nextpkt = m; + return (error); + } if ((ifp->if_flags & IFF_LOOPBACK) != 0) { return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst, rt)); } - return ((*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt)); + error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt); + return (error); bad: + /* + * ln is valid and the caller did not pass in + * an llentry + */ + if (ln && (lle == NULL)) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + } if (m) m_freem(m); return (error); } #undef senderr + +int +nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, + struct sockaddr_in6 *dst, struct rtentry *rt) +{ + struct mbuf *m, *m_head; + struct ifnet *outifp; + int error = 0; + + m_head = chain; + if ((ifp->if_flags & IFF_LOOPBACK) != 0) + outifp = origifp; + else + outifp = ifp; + + while (m_head) { + m = m_head; + m_head = m_head->m_nextpkt; + error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt); + } + + /* + * XXX + * note that intermediate errors are blindly ignored - but this is + * the same convention as used with nd6_output when called by + * nd6_cache_lladdr + */ + return (error); +} + + int nd6_need_cache(struct ifnet *ifp) { @@ -2229,14 +1976,18 @@ nd6_need_cache(struct ifnet *ifp) } } +/* + * the callers of this function need to be re-worked to drop + * the lle lock, drop here for now + */ int nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, - struct sockaddr *dst, u_char *desten) + struct sockaddr *dst, u_char *desten, struct llentry **lle) { - struct sockaddr_dl *sdl; - struct rtentry *rt; - int error; + struct llentry *ln; + *lle = NULL; + IF_AFDATA_UNLOCK_ASSERT(ifp); if (m->m_flags & M_MCAST) { int i; @@ -2271,48 +2022,42 @@ nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, } } - if (rt0 == NULL) { - /* this could happen, if we could not allocate memory */ - m_freem(m); - return (ENOMEM); - } - - error = rt_check(&rt, &rt0, dst); - if (error) { - m_freem(m); - return (error); - } - RT_UNLOCK(rt); - if (rt->rt_gateway->sa_family != AF_LINK) { - printf("nd6_storelladdr: something odd happens\n"); - m_freem(m); - return (EINVAL); - } - sdl = SDL(rt->rt_gateway); - if (sdl->sdl_alen == 0) { - /* this should be impossible, but we bark here for debugging */ - printf("nd6_storelladdr: sdl_alen == 0\n"); + /* + * the entry should have been created in nd6_store_lladdr + */ + IF_AFDATA_LOCK(ifp); + ln = lla_lookup(LLTABLE6(ifp), 0, dst); + IF_AFDATA_UNLOCK(ifp); + if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) { + if (ln) + LLE_RUNLOCK(ln); + /* this could happen, if we could not allocate memory */ m_freem(m); - return (EINVAL); + return (1); } - bcopy(LLADDR(sdl), desten, sdl->sdl_alen); + bcopy(&ln->ll_addr, desten, ifp->if_addrlen); + *lle = ln; + LLE_RUNLOCK(ln); + /* + * A *small* use after free race exists here + */ return (0); } -static void -clear_llinfo_pqueue(struct llinfo_nd6 *ln) +static void +clear_llinfo_pqueue(struct llentry *ln) { struct mbuf *m_hold, *m_hold_next; - for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold_next) { + for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; m_freem(m_hold); } - ln->ln_hold = NULL; + ln->la_hold = NULL; return; } diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index 4d3c06b..f4ccd07 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -41,20 +41,7 @@ #include <sys/queue.h> #include <sys/callout.h> -struct llinfo_nd6 { - struct llinfo_nd6 *ln_next; - struct llinfo_nd6 *ln_prev; - struct rtentry *ln_rt; - struct mbuf *ln_hold; /* last packet until resolved/timeout */ - long ln_asked; /* number of queries already sent for this addr */ - u_long ln_expire; /* lifetime for NDP state transition */ - short ln_state; /* reachability state */ - short ln_router; /* 2^0: ND6 router bit */ - int ln_byhint; /* # of times we made it reachable by UL hint */ - - long ln_ntick; - struct callout ln_timer_ch; -}; +struct llentry; #define ND6_LLINFO_NOSTATE -2 /* @@ -72,7 +59,7 @@ struct llinfo_nd6 { #define ND6_LLINFO_PROBE 4 #define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE) -#define ND6_LLINFO_PERMANENT(n) (((n)->ln_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE)) +#define ND6_LLINFO_PERMANENT(n) (((n)->la_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE)) struct nd_ifinfo { u_int32_t linkmtu; /* LinkMTU */ @@ -98,6 +85,9 @@ struct nd_ifinfo { */ #define ND6_IFF_DONT_SET_IFROUTE 0x10 +#define ND6_CREATE LLE_CREATE +#define ND6_EXCLUSIVE LLE_EXCLUSIVE + #ifdef _KERNEL #define ND_IFINFO(ifp) \ (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo) @@ -336,7 +326,6 @@ extern int nd6_mmaxtries; extern int nd6_useloopback; extern int nd6_maxnudhint; extern int nd6_gctimer; -extern struct llinfo_nd6 llinfo_nd6; extern struct nd_drhead nd_defrouter; extern struct nd_prhead nd_prefix; extern int nd6_debug; @@ -388,23 +377,28 @@ int nd6_is_addr_neighbor __P((struct sockaddr_in6 *, struct ifnet *)); void nd6_option_init __P((void *, int, union nd_opts *)); struct nd_opt_hdr *nd6_option __P((union nd_opts *)); int nd6_options __P((union nd_opts *)); -struct rtentry *nd6_lookup __P((struct in6_addr *, int, struct ifnet *)); +struct llentry *nd6_lookup __P((struct in6_addr *, int, struct ifnet *)); void nd6_setmtu __P((struct ifnet *)); -void nd6_llinfo_settimer __P((struct llinfo_nd6 *, long)); +void nd6_llinfo_settimer __P((struct llentry *, long)); +void nd6_llinfo_settimer_locked __P((struct llentry *, long)); void nd6_timer __P((void *)); void nd6_purge __P((struct ifnet *)); void nd6_nud_hint __P((struct rtentry *, struct in6_addr *, int)); int nd6_resolve __P((struct ifnet *, struct rtentry *, struct mbuf *, struct sockaddr *, u_char *)); -void nd6_rtrequest __P((int, struct rtentry *, struct rt_addrinfo *)); int nd6_ioctl __P((u_long, caddr_t, struct ifnet *)); -struct rtentry *nd6_cache_lladdr __P((struct ifnet *, struct in6_addr *, +struct llentry *nd6_cache_lladdr __P((struct ifnet *, struct in6_addr *, char *, int, int, int)); int nd6_output __P((struct ifnet *, struct ifnet *, struct mbuf *, struct sockaddr_in6 *, struct rtentry *)); +int nd6_output_lle __P((struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct rtentry *, struct llentry *, + struct mbuf **)); +int nd6_output_flush __P((struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct rtentry *)); int nd6_need_cache __P((struct ifnet *)); int nd6_storelladdr __P((struct ifnet *, struct rtentry *, struct mbuf *, - struct sockaddr *, u_char *)); + struct sockaddr *, u_char *, struct llentry **)); /* nd6_nbr.c */ void nd6_na_input __P((struct mbuf *, int, int)); @@ -412,7 +406,7 @@ void nd6_na_output __P((struct ifnet *, const struct in6_addr *, const struct in6_addr *, u_long, int, struct sockaddr *)); void nd6_ns_input __P((struct mbuf *, int, int)); void nd6_ns_output __P((struct ifnet *, const struct in6_addr *, - const struct in6_addr *, struct llinfo_nd6 *, int)); + const struct in6_addr *, struct llentry *, int)); caddr_t nd6_ifptomac __P((struct ifnet *)); void nd6_dad_start __P((struct ifaddr *, int)); void nd6_dad_stop __P((struct ifaddr *)); diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index ecfad0e..2dcabdb 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -41,6 +41,8 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> +#include <sys/lock.h> +#include <sys/rwlock.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/sockio.h> @@ -63,6 +65,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_var.h> +#include <net/if_llatbl.h> +#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) #include <netinet6/in6_var.h> #include <netinet6/in6_ifattach.h> #include <netinet/ip6.h> @@ -167,7 +171,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) src_sa6.sin6_family = AF_INET6; src_sa6.sin6_len = sizeof(src_sa6); src_sa6.sin6_addr = saddr6; - if (!nd6_is_addr_neighbor(&src_sa6, ifp)) { + if (nd6_is_addr_neighbor(&src_sa6, ifp) == 0) { nd6log((LOG_INFO, "nd6_ns_input: " "NS packet from non-neighbor\n")); goto bad; @@ -378,8 +382,8 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) * dad - duplicate address detection */ void -nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, - const struct in6_addr *taddr6, struct llinfo_nd6 *ln, int dad) +nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, + const struct in6_addr *taddr6, struct llentry *ln, int dad) { INIT_VNET_INET6(ifp->if_vnet); struct mbuf *m; @@ -470,14 +474,14 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, struct ip6_hdr *hip6; /* hold ip6 */ struct in6_addr *hsrc = NULL; - if (ln && ln->ln_hold) { + if (ln && ln->la_hold) { /* - * assuming every packet in ln_hold has the same IP + * assuming every packet in la_hold has the same IP * header */ - hip6 = mtod(ln->ln_hold, struct ip6_hdr *); + hip6 = mtod(ln->la_hold, struct ip6_hdr *); /* XXX pullup? */ - if (sizeof(*hip6) < ln->ln_hold->m_len) + if (sizeof(*hip6) < ln->la_hold->m_len) hsrc = &hip6->ip6_src; else hsrc = NULL; @@ -600,10 +604,10 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) char *lladdr = NULL; int lladdrlen = 0; struct ifaddr *ifa; - struct llinfo_nd6 *ln; - struct rtentry *rt; - struct sockaddr_dl *sdl; + struct llentry *ln = NULL; union nd_opts ndopts; + struct mbuf *chain = NULL; + struct sockaddr_in6 sin6; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; if (ip6->ip6_hlim != 255) { @@ -697,35 +701,37 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * If no neighbor cache entry is found, NA SHOULD silently be * discarded. */ - rt = nd6_lookup(&taddr6, 0, ifp); - if ((rt == NULL) || - ((ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) || - ((sdl = SDL(rt->rt_gateway)) == NULL)) + IF_AFDATA_LOCK(ifp); + ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp); + IF_AFDATA_UNLOCK(ifp); + if (ln == NULL) { goto freeit; + } if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* * If the link-layer has address, and no lladdr option came, * discard the packet. */ - if (ifp->if_addrlen && lladdr == NULL) + if (ifp->if_addrlen && lladdr == NULL) { goto freeit; + } /* * Record link-layer address, and update the state. */ - sdl->sdl_alen = ifp->if_addrlen; - bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen); + bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); + ln->la_flags |= LLE_VALID; if (is_solicited) { ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if (!ND6_LLINFO_PERMANENT(ln)) { - nd6_llinfo_settimer(ln, - (long)ND_IFINFO(rt->rt_ifp)->reachable * hz); + nd6_llinfo_settimer_locked(ln, + (long)ND_IFINFO(ln->lle_tbl->llt_ifp)->reachable * hz); } } else { ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } if ((ln->ln_router = is_router) != 0) { /* @@ -744,8 +750,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) if (lladdr == NULL) llchange = 0; else { - if (sdl->sdl_alen) { - if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen)) + if (ln->la_flags & LLE_VALID) { + if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen)) llchange = 1; else llchange = 0; @@ -779,7 +785,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) */ if (ln->ln_state == ND6_LLINFO_REACHABLE) { ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz); + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } goto freeit; } else if (is_override /* (2a) */ @@ -789,8 +795,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * Update link-local address, if any. */ if (lladdr != NULL) { - sdl->sdl_alen = ifp->if_addrlen; - bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen); + bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); + ln->la_flags |= LLE_VALID; } /* @@ -802,13 +808,13 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if (!ND6_LLINFO_PERMANENT(ln)) { - nd6_llinfo_settimer(ln, + nd6_llinfo_settimer_locked(ln, (long)ND_IFINFO(ifp)->reachable * hz); } } else { if (lladdr != NULL && llchange) { ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer(ln, + nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } } @@ -822,9 +828,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) */ struct nd_defrouter *dr; struct in6_addr *in6; - int s; - in6 = &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; + in6 = &L3_ADDR_SIN6(ln)->sin6_addr; /* * Lock to protect the default router list. @@ -832,8 +837,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * is only called under the network software interrupt * context. However, we keep it just for safety. */ - s = splnet(); - dr = defrouter_lookup(in6, ifp); + dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp); if (dr) defrtrlist_del(dr); else if (!V_ip6_forwarding) { @@ -846,21 +850,23 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) */ rt6_flush(&ip6->ip6_src, ifp); } - splx(s); } ln->ln_router = is_router; } - rt->rt_flags &= ~RTF_REJECT; - ln->ln_asked = 0; - if (ln->ln_hold) { + /* XXX - QL + * Does this matter? + * rt->rt_flags &= ~RTF_REJECT; + */ + ln->la_asked = 0; + if (ln->la_hold) { struct mbuf *m_hold, *m_hold_next; /* - * reset the ln_hold in advance, to explicitly - * prevent a ln_hold lookup in nd6_output() + * reset the la_hold in advance, to explicitly + * prevent a la_hold lookup in nd6_output() * (wouldn't happen, though...) */ - for (m_hold = ln->ln_hold; + for (m_hold = ln->la_hold, ln->la_hold = NULL; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; @@ -868,17 +874,25 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * we assume ifp is not a loopback here, so just set * the 2nd argument as the 1st one. */ - nd6_output(ifp, ifp, m_hold, - (struct sockaddr_in6 *)rt_key(rt), rt); + nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); } - ln->ln_hold = NULL; } - freeit: + if (ln) { + if (chain) + memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); + LLE_WUNLOCK(ln); + + if (chain) + nd6_output_flush(ifp, ifp, chain, &sin6, NULL); + } m_freem(m); return; bad: + if (ln) + LLE_WUNLOCK(ln); + V_icmp6stat.icp6s_badna++; m_freem(m); } diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index c9ed36d..7b4af3c 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include <net/vnet.h> #include <netinet/in.h> +#include <net/if_llatbl.h> #include <netinet6/in6_var.h> #include <netinet6/in6_ifattach.h> #include <netinet/ip6.h> @@ -471,10 +472,8 @@ defrouter_addreq(struct nd_defrouter *new) (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt); if (newrt) { - RT_LOCK(newrt); nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ - RT_REMREF(newrt); - RT_UNLOCK(newrt); + RTFREE(newrt); } if (error == 0) new->installed = 1; @@ -615,8 +614,7 @@ defrouter_select(void) INIT_VNET_INET6(curvnet); int s = splnet(); struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL; - struct rtentry *rt = NULL; - struct llinfo_nd6 *ln = NULL; + struct llentry *ln = NULL; /* * This function should be called only when acting as an autoconfigured @@ -648,12 +646,13 @@ defrouter_select(void) */ for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = TAILQ_NEXT(dr, dr_entry)) { + IF_AFDATA_LOCK(dr->ifp); if (selected_dr == NULL && - (rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && - (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && + (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln)) { selected_dr = dr; } + IF_AFDATA_UNLOCK(dr->ifp); if (dr->installed && installed_dr == NULL) installed_dr = dr; @@ -676,12 +675,14 @@ defrouter_select(void) selected_dr = TAILQ_FIRST(&V_nd_defrouter); else selected_dr = TAILQ_NEXT(installed_dr, dr_entry); - } else if (installed_dr && - (rt = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && - (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && - ND6_IS_LLINFO_PROBREACH(ln) && - rtpref(selected_dr) <= rtpref(installed_dr)) { - selected_dr = installed_dr; + } else if (installed_dr) { + IF_AFDATA_LOCK(installed_dr->ifp); + if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && + ND6_IS_LLINFO_PROBREACH(ln) && + rtpref(selected_dr) <= rtpref(installed_dr)) { + selected_dr = installed_dr; + } + IF_AFDATA_UNLOCK(installed_dr->ifp); } /* @@ -1323,18 +1324,19 @@ static struct nd_pfxrouter * find_pfxlist_reachable_router(struct nd_prefix *pr) { struct nd_pfxrouter *pfxrtr; - struct rtentry *rt; - struct llinfo_nd6 *ln; + struct llentry *ln; for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr; pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) { - if ((rt = nd6_lookup(&pfxrtr->router->rtaddr, 0, + IF_AFDATA_LOCK(pfxrtr->router->ifp); + if ((ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp)) && - (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && - ND6_IS_LLINFO_PROBREACH(ln)) + ND6_IS_LLINFO_PROBREACH(ln)) { + IF_AFDATA_UNLOCK(pfxrtr->router->ifp); break; /* found */ + } + IF_AFDATA_UNLOCK(pfxrtr->router->ifp); } - return (pfxrtr); } @@ -1541,8 +1543,10 @@ nd6_prefix_onlink(struct nd_prefix *pr) struct nd_prefix *opr; u_long rtflags; int error = 0; + struct radix_node_head *rnh; struct rtentry *rt = NULL; char ip6buf[INET6_ADDRSTRLEN]; + struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { @@ -1609,21 +1613,24 @@ nd6_prefix_onlink(struct nd_prefix *pr) bzero(&mask6, sizeof(mask6)); mask6.sin6_len = sizeof(mask6); mask6.sin6_addr = pr->ndpr_mask; - rtflags = ifa->ifa_flags | RTF_CLONING | RTF_UP; - if (nd6_need_cache(ifp)) { - /* explicitly set in case ifa_flags does not set the flag. */ - rtflags |= RTF_CLONING; - } else { - /* - * explicitly clear the cloning bit in case ifa_flags sets it. - */ - rtflags &= ~RTF_CLONING; - } + rtflags = ifa->ifa_flags | RTF_UP; error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt); if (error == 0) { - if (rt != NULL) /* this should be non NULL, though */ + if (rt != NULL) /* this should be non NULL, though */ { + rnh = V_rt_tables[rt->rt_fibnum][AF_INET6]; + RADIX_NODE_HEAD_LOCK(rnh); + RT_LOCK(rt); + if (!rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl)) { + ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = + rt->rt_ifp->if_type; + ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = + rt->rt_ifp->if_index; + } + RADIX_NODE_HEAD_UNLOCK(rnh); nd6_rtmsg(RTM_ADD, rt); + RT_UNLOCK(rt); + } pr->ndpr_stateflags |= NDPRF_ONLINK; } else { char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN]; diff --git a/sys/netinet6/vinet6.h b/sys/netinet6/vinet6.h index d6c3f33..e271d4f 100644 --- a/sys/netinet6/vinet6.h +++ b/sys/netinet6/vinet6.h @@ -78,7 +78,6 @@ struct vnet_inet6 { int _nd6_inuse; int _nd6_allocated; int _nd6_onlink_ns_rfc4861; - struct llinfo_nd6 _llinfo_nd6; struct nd_drhead _nd_defrouter; struct nd_prhead _nd_prefix; struct ifnet * _nd6_defifp; diff --git a/sys/sys/param.h b/sys/sys/param.h index 497b362..f8a4b2a 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -57,7 +57,7 @@ * is created, otherwise 1. */ #undef __FreeBSD_version -#define __FreeBSD_version 800058 /* Master, propagated to newvers */ +#define __FreeBSD_version 800059 /* Master, propagated to newvers */ #ifndef LOCORE #include <sys/types.h> |