diff options
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/bpf_filter.c | 2 | ||||
-rw-r--r-- | sys/net/if.c | 18 | ||||
-rw-r--r-- | sys/net/if.h | 2 | ||||
-rw-r--r-- | sys/net/if_arcsubr.c | 1 | ||||
-rw-r--r-- | sys/net/if_atmsubr.c | 1 | ||||
-rw-r--r-- | sys/net/if_debug.c | 1 | ||||
-rw-r--r-- | sys/net/if_ethersubr.c | 2 | ||||
-rw-r--r-- | sys/net/if_fddisubr.c | 1 | ||||
-rw-r--r-- | sys/net/if_fwsubr.c | 1 | ||||
-rw-r--r-- | sys/net/if_gif.c | 1 | ||||
-rw-r--r-- | sys/net/if_gre.c | 83 | ||||
-rw-r--r-- | sys/net/if_gre.h | 2 | ||||
-rw-r--r-- | sys/net/if_iso88025subr.c | 1 | ||||
-rw-r--r-- | sys/net/if_lagg.c | 3 | ||||
-rw-r--r-- | sys/net/if_llatbl.h | 3 | ||||
-rw-r--r-- | sys/net/if_spppfr.c | 2 | ||||
-rw-r--r-- | sys/net/if_spppsubr.c | 1 | ||||
-rw-r--r-- | sys/net/if_stf.c | 911 | ||||
-rw-r--r-- | sys/net/if_tun.c | 1 | ||||
-rw-r--r-- | sys/net/if_var.h | 5 | ||||
-rw-r--r-- | sys/net/route.c | 7 | ||||
-rw-r--r-- | sys/net/route.h | 2 | ||||
-rw-r--r-- | sys/net/rtsock.c | 14 |
23 files changed, 301 insertions, 764 deletions
diff --git a/sys/net/bpf_filter.c b/sys/net/bpf_filter.c index 1bd1609..672dfaa 100644 --- a/sys/net/bpf_filter.c +++ b/sys/net/bpf_filter.c @@ -177,6 +177,8 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) bpf_u_int32 k; u_int32_t mem[BPF_MEMWORDS]; + bzero(mem, sizeof(mem)); + if (pc == NULL) /* * No filter means accept all. diff --git a/sys/net/if.c b/sys/net/if.c index b7c2ad1..2b7a24a 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -58,6 +58,8 @@ #include <sys/taskqueue.h> #include <sys/domain.h> #include <sys/jail.h> +#include <sys/priv.h> + #include <machine/stdarg.h> #include <vm/uma.h> @@ -2135,6 +2137,20 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) free(odescrbuf, M_IFDESCR); break; + case SIOCGIFFIB: + ifr->ifr_fib = ifp->if_fib; + break; + + case SIOCSIFFIB: + error = priv_check(td, PRIV_NET_SETIFFIB); + if (error) + return (error); + if (ifr->ifr_fib >= rt_numfibs) + return (EINVAL); + + ifp->if_fib = ifr->ifr_fib; + break; + case SIOCSIFFLAGS: error = priv_check(td, PRIV_NET_SETIFFLAGS); if (error) @@ -2467,6 +2483,8 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) error = ifconf(SIOCGIFCONF, (void *)&ifc); CURVNET_RESTORE(); + if (error == 0) + ifc32->ifc_len = ifc.ifc_len; return (error); } #endif diff --git a/sys/net/if.h b/sys/net/if.h index 06521cb..d1f3883 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -325,6 +325,7 @@ struct ifreq { int ifru_media; caddr_t ifru_data; int ifru_cap[2]; + u_int ifru_fib; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ @@ -341,6 +342,7 @@ struct ifreq { #define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ #define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #define ifr_index ifr_ifru.ifru_index /* interface index */ +#define ifr_fib ifr_ifru.ifru_fib /* interface fib */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ diff --git a/sys/net/if_arcsubr.c b/sys/net/if_arcsubr.c index 0e23b24..2a58d5a 100644 --- a/sys/net/if_arcsubr.c +++ b/sys/net/if_arcsubr.c @@ -608,6 +608,7 @@ arc_input(struct ifnet *ifp, struct mbuf *m) m_freem(m); return; } + M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); } diff --git a/sys/net/if_atmsubr.c b/sys/net/if_atmsubr.c index d8a3313..fb63fd5 100644 --- a/sys/net/if_atmsubr.c +++ b/sys/net/if_atmsubr.c @@ -332,6 +332,7 @@ atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m, return; } } + M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); } diff --git a/sys/net/if_debug.c b/sys/net/if_debug.c index dcf504e..e7319374 100644 --- a/sys/net/if_debug.c +++ b/sys/net/if_debug.c @@ -86,6 +86,7 @@ if_show_ifnet(struct ifnet *ifp) IF_DB_PRINTF("%d", if_snd.ifq_drv_maxlen); IF_DB_PRINTF("%d", if_snd.altq_type); IF_DB_PRINTF("%x", if_snd.altq_flags); + IF_DB_PRINTF("%u", if_fib); #undef IF_DB_PRINTF } diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index be90f5a..aa8113b 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -692,6 +692,8 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) m_adj(m, ETHER_VLAN_ENCAP_LEN); } + M_SETFIB(m, ifp->if_fib); + /* Allow ng_ether(4) to claim this frame. */ if (IFP2AC(ifp)->ac_netgraph != NULL) { KASSERT(ng_ether_input_p != NULL, diff --git a/sys/net/if_fddisubr.c b/sys/net/if_fddisubr.c index 13fdd72..d98029c 100644 --- a/sys/net/if_fddisubr.c +++ b/sys/net/if_fddisubr.c @@ -550,6 +550,7 @@ fddi_input(ifp, m) ifp->if_noproto++; goto dropanyway; } + M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); return; diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c index eac049a..7c57feb 100644 --- a/sys/net/if_fwsubr.c +++ b/sys/net/if_fwsubr.c @@ -627,6 +627,7 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src) return; } + M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); } diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index 4a8df34..2b6b2b4 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -609,6 +609,7 @@ gif_input(m, af, ifp) ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; + M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); } diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c index de968af..5f8156b 100644 --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -48,6 +48,7 @@ #include <sys/param.h> #include <sys/jail.h> #include <sys/kernel.h> +#include <sys/libkern.h> #include <sys/malloc.h> #include <sys/module.h> #include <sys/mbuf.h> @@ -91,6 +92,14 @@ #define GRENAME "gre" +#define MTAG_COOKIE_GRE 1307983903 +#define MTAG_GRE_NESTING 1 +struct mtag_gre_nesting { + uint16_t count; + uint16_t max; + struct ifnet *ifp[]; +}; + /* * gre_mtx protects all global variables in if_gre.c. * XXX: gre_softc data not protected yet. @@ -196,7 +205,6 @@ gre_clone_create(ifc, unit, params) sc->g_proto = IPPROTO_GRE; GRE2IFP(sc)->if_flags |= IFF_LINK0; sc->encap = NULL; - sc->called = 0; sc->gre_fibnum = curthread->td_proc->p_fibnum; sc->wccp_ver = WCCP_V1; sc->key = 0; @@ -240,23 +248,77 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct gre_softc *sc = ifp->if_softc; struct greip *gh; struct ip *ip; + struct m_tag *mtag; + struct mtag_gre_nesting *gt; + size_t len; u_short gre_ip_id = 0; uint8_t gre_ip_tos = 0; u_int16_t etype = 0; struct mobile_h mob_h; u_int32_t af; - int extra = 0; + int extra = 0, max; /* - * gre may cause infinite recursion calls when misconfigured. - * We'll prevent this by introducing upper limit. + * gre may cause infinite recursion calls when misconfigured. High + * nesting level may cause stack exhaustion. We'll prevent this by + * detecting loops and by introducing upper limit. */ - if (++(sc->called) > max_gre_nesting) { - printf("%s: gre_output: recursively called too many " - "times(%d)\n", if_name(GRE2IFP(sc)), sc->called); - m_freem(m); - error = EIO; /* is there better errno? */ - goto end; + mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL); + if (mtag != NULL) { + struct ifnet **ifp2; + + gt = (struct mtag_gre_nesting *)(mtag + 1); + gt->count++; + if (gt->count > min(gt->max,max_gre_nesting)) { + printf("%s: hit maximum recursion limit %u on %s\n", + __func__, gt->count - 1, ifp->if_xname); + m_freem(m); + error = EIO; /* is there better errno? */ + goto end; + } + + ifp2 = gt->ifp; + for (max = gt->count - 1; max > 0; max--) { + if (*ifp2 == ifp) + break; + ifp2++; + } + if (*ifp2 == ifp) { + printf("%s: detected loop with nexting %u on %s\n", + __func__, gt->count-1, ifp->if_xname); + m_freem(m); + error = EIO; /* is there better errno? */ + goto end; + } + *ifp2 = ifp; + + } else { + /* + * Given that people should NOT increase max_gre_nesting beyond + * their real needs, we allocate once per packet rather than + * allocating an mtag once per passing through gre. + * + * Note: the sysctl does not actually check for saneness, so we + * limit the maximum numbers of possible recursions here. + */ + max = imin(max_gre_nesting, 256); + /* If someone sets the sysctl <= 0, we want at least 1. */ + max = imax(max, 1); + len = sizeof(struct mtag_gre_nesting) + + max * sizeof(struct ifnet *); + mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len, + M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + error = ENOMEM; + goto end; + } + gt = (struct mtag_gre_nesting *)(mtag + 1); + bzero(gt, len); + gt->count = 1; + gt->max = max; + *gt->ifp = ifp; + m_tag_prepend(m, mtag); } if (!((ifp->if_flags & IFF_UP) && @@ -444,7 +506,6 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, error = ip_output(m, NULL, &sc->route, IP_FORWARDING, (struct ip_moptions *)NULL, (struct inpcb *)NULL); end: - sc->called = 0; if (error) ifp->if_oerrors++; return (error); diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h index e23daef..74d16b1 100644 --- a/sys/net/if_gre.h +++ b/sys/net/if_gre.h @@ -61,8 +61,6 @@ struct gre_softc { const struct encaptab *encap; /* encapsulation cookie */ - int called; /* infinite recursion preventer */ - uint32_t key; /* key included in outgoing GRE packets */ /* zero means none */ diff --git a/sys/net/if_iso88025subr.c b/sys/net/if_iso88025subr.c index 933065e..7961df0 100644 --- a/sys/net/if_iso88025subr.c +++ b/sys/net/if_iso88025subr.c @@ -680,6 +680,7 @@ iso88025_input(ifp, m) break; } + M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); return; diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 2548c65..d540966 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -1221,14 +1221,15 @@ lagg_input(struct ifnet *ifp, struct mbuf *m) struct lagg_softc *sc = lp->lp_softc; struct ifnet *scifp = sc->sc_ifp; + LAGG_RLOCK(sc); if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (lp->lp_flags & LAGG_PORT_DISABLED) || sc->sc_proto == LAGG_PROTO_NONE) { + LAGG_RUNLOCK(sc); m_freem(m); return (NULL); } - LAGG_RLOCK(sc); ETHER_BPF_MTAP(scifp, m); m = (*sc->sc_input)(sc, lp, m); diff --git a/sys/net/if_llatbl.h b/sys/net/if_llatbl.h index 1f30f37..4b18353 100644 --- a/sys/net/if_llatbl.h +++ b/sys/net/if_llatbl.h @@ -151,7 +151,6 @@ struct lltable { int llt_af; struct ifnet *llt_ifp; - struct llentry * (*llt_new)(const struct sockaddr *, u_int); void (*llt_free)(struct lltable *, struct llentry *); void (*llt_prefix_free)(struct lltable *, const struct sockaddr *prefix, @@ -159,8 +158,6 @@ struct lltable { u_int flags); struct llentry * (*llt_lookup)(struct lltable *, u_int flags, const struct sockaddr *l3addr); - int (*llt_rtcheck)(struct ifnet *, u_int flags, - const struct sockaddr *); int (*llt_dump)(struct lltable *, struct sysctl_req *); }; diff --git a/sys/net/if_spppfr.c b/sys/net/if_spppfr.c index ae4f959..ca84656 100644 --- a/sys/net/if_spppfr.c +++ b/sys/net/if_spppfr.c @@ -280,6 +280,8 @@ drop: ++ifp->if_ierrors; if (! (ifp->if_flags & IFF_UP)) goto drop; + M_SETFIB(m, ifp->if_fib); + /* Check queue. */ if (netisr_queue(isr, m)) { /* (0) on success. */ if (debug) diff --git a/sys/net/if_spppsubr.c b/sys/net/if_spppsubr.c index 3f0faaf..363d9aa 100644 --- a/sys/net/if_spppsubr.c +++ b/sys/net/if_spppsubr.c @@ -737,6 +737,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m) goto drop; SPPP_UNLOCK(sp); + M_SETFIB(m, ifp->if_fib); /* Check queue. */ if (netisr_queue(isr, m)) { /* (0) on success. */ if (debug) diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c index 7d136fd..e32956e 100644 --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -3,7 +3,6 @@ /*- * Copyright (C) 2000 WIDE Project. - * Copyright (c) 2010-2011 Hiroki Sato <hrs@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +31,7 @@ */ /* - * 6to4 interface, based on RFC 3056 + 6rd (RFC 5969) support. + * 6to4 interface, based on RFC3056. * * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting. * There is no address mapping defined from IPv6 multicast address to IPv4 @@ -61,7 +60,7 @@ * ICMPv6: * - Redirects cannot be used due to the lack of link-local address. * - * stf interface does not have, and will not need, a link-local address. + * stf interface does not have, and will not need, a link-local address. * It seems to have no real benefit and does not help the above symptoms much. * Even if we assign link-locals to interface, we cannot really * use link-local unicast/multicast on top of 6to4 cloud (since there's no @@ -73,11 +72,6 @@ * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt * for details. The code tries to filter out some of malicious packets. * Note that there is no way to be 100% secure. - * - * 6rd (RFC 5969) extension is enabled when an IPv6 GUA other than - * 2002::/16 is assigned. The stf(4) calculates a 6rd delegated - * prefix from a 6rd prefix and an IPv4 address. - * */ #include "opt_inet.h" @@ -126,41 +120,15 @@ #include <security/mac/mac_framework.h> -#define STF_DEBUG 1 -#define ip_sprintf(buf, a) \ - sprintf(buf, "%d.%d.%d.%d", \ - (ntohl((a)->s_addr)>>24)&0xFF, \ - (ntohl((a)->s_addr)>>16)&0xFF, \ - (ntohl((a)->s_addr)>>8)&0xFF, \ - (ntohl((a)->s_addr))&0xFF); -#if STF_DEBUG -#define DEBUG_PRINTF(a, ...) \ - do { \ - if (V_stf_debug >= a) \ - printf(__VA_ARGS__); \ - } while (0) -#else -#define DEBUG_PRINTF(a, ...) -#endif - SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface"); -static VNET_DEFINE(int, stf_route_cache) = 1; -#define V_stf_route_cache VNET(stf_route_cache) -SYSCTL_VNET_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW, - &VNET_NAME(stf_route_cache), 0, - "Enable caching of IPv4 routes for 6to4 output."); - -#if STF_DEBUG -static VNET_DEFINE(int, stf_debug) = 0; -#define V_stf_debug VNET(stf_debug) -SYSCTL_VNET_INT(_net_link_stf, OID_AUTO, stf_debug, CTLFLAG_RW, - &VNET_NAME(stf_debug), 0, - "Enable displaying verbose debug message of stf interfaces"); -#endif +static int stf_route_cache = 1; +SYSCTL_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW, + &stf_route_cache, 0, "Caching of IPv4 routes for 6to4 Output"); #define STFNAME "stf" +#define STFUNIT 0 #define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002) @@ -177,27 +145,17 @@ struct stf_softc { struct route_in6 __sc_ro6; /* just for safety */ } __sc_ro46; #define sc_ro __sc_ro46.__sc_ro4 - struct mtx sc_mtx; + struct mtx sc_ro_mtx; u_int sc_fibnum; const struct encaptab *encap_cookie; - u_int sc_flags; - eventhandler_tag sc_ifaddr_event_tag; - LIST_ENTRY(stf_softc) stf_list; }; #define STF2IFP(sc) ((sc)->sc_ifp) -static struct mtx stf_mtx; +/* + * Note that mutable fields in the softc are not currently locked. + * We do lock sc_ro in stf_output though. + */ static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface"); -static VNET_DEFINE(LIST_HEAD(, stf_softc), stf_softc_list); -#define V_stf_softc_list VNET(stf_softc_list) - -#define STF_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "stf softc", \ - NULL, MTX_DEF); -#define STF_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) -#define STF_LOCK(sc) mtx_lock(&(sc)->sc_mtx) -#define STF_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) -#define STF_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) - static const int ip_stf_ttl = 40; extern struct domain inetdomain; @@ -212,6 +170,8 @@ struct protosw in_stf_protosw = { .pr_usrreqs = &rip_usrreqs }; +static char *stfnames[] = {"stf0", "stf", "6to4", NULL}; + static int stfmodevent(module_t, int, void *); static int stf_encapcheck(const struct mbuf *, int, int, void *); static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *); @@ -224,47 +184,68 @@ static int stf_checkaddr6(struct stf_softc *, struct in6_addr *, struct ifnet *); static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int stf_ioctl(struct ifnet *, u_long, caddr_t); -static int stf_is_up(struct ifnet *); -#define STF_GETIN4_USE_CACHE 1 -static struct sockaddr_in *stf_getin4addr(struct sockaddr_in *, - struct ifaddr *, - int); -static struct sockaddr_in *stf_getin4addr_in6(struct sockaddr_in *, - struct ifaddr *, - struct in6_addr *); -static struct sockaddr_in *stf_getin4addr_sin6(struct sockaddr_in *, - struct ifaddr *, - struct sockaddr_in6 *); -static void stf_ifaddr_change(void *, struct ifnet *); +static int stf_clone_match(struct if_clone *, const char *); +static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t); +static int stf_clone_destroy(struct if_clone *, struct ifnet *); +struct if_clone stf_cloner = IFC_CLONE_INITIALIZER(STFNAME, NULL, 0, + NULL, stf_clone_match, stf_clone_create, stf_clone_destroy); -static int stf_clone_create(struct if_clone *, int, caddr_t); -static void stf_clone_destroy(struct ifnet *); +static int +stf_clone_match(struct if_clone *ifc, const char *name) +{ + int i; -IFC_SIMPLE_DECLARE(stf, 0); + for(i = 0; stfnames[i] != NULL; i++) { + if (strcmp(stfnames[i], name) == 0) + return (1); + } + + return (0); +} static int -stf_clone_create(struct if_clone *ifc, int unit, caddr_t params) +stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) { + int err, unit; struct stf_softc *sc; struct ifnet *ifp; + /* + * We can only have one unit, but since unit allocation is + * already locked, we use it to keep from allocating extra + * interfaces. + */ + unit = STFUNIT; + err = ifc_alloc_unit(ifc, &unit); + if (err != 0) + return (err); + sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO); - sc->sc_fibnum = curthread->td_proc->p_fibnum; ifp = STF2IFP(sc) = if_alloc(IFT_STF); - if (sc->sc_ifp == NULL) { + if (ifp == NULL) { free(sc, M_STF); - return (ENOMEM); + ifc_free_unit(ifc, unit); + return (ENOSPC); } - STF_LOCK_INIT(sc); ifp->if_softc = sc; - if_initname(ifp, ifc->ifc_name, unit); + sc->sc_fibnum = curthread->td_proc->p_fibnum; + /* + * Set the name manually rather then using if_initname because + * we don't conform to the default naming convention for interfaces. + */ + strlcpy(ifp->if_xname, name, IFNAMSIZ); + ifp->if_dname = ifc->ifc_name; + ifp->if_dunit = IF_DUNIT_NONE; + + mtx_init(&(sc)->sc_ro_mtx, "stf ro", NULL, MTX_DEF); sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6, stf_encapcheck, &in_stf_protosw, sc); if (sc->encap_cookie == NULL) { if_printf(ifp, "attach failed\n"); free(sc, M_STF); + ifc_free_unit(ifc, unit); return (ENOMEM); } @@ -274,63 +255,41 @@ stf_clone_create(struct if_clone *ifc, int unit, caddr_t params) ifp->if_snd.ifq_maxlen = ifqmaxlen; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); - - mtx_lock(&stf_mtx); - LIST_INSERT_HEAD(&V_stf_softc_list, sc, stf_list); - mtx_unlock(&stf_mtx); - - sc->sc_ifaddr_event_tag = - EVENTHANDLER_REGISTER(ifaddr_event, stf_ifaddr_change, NULL, - EVENTHANDLER_PRI_ANY); - return (0); } -static void -stf_clone_destroy(struct ifnet *ifp) +static int +stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) { struct stf_softc *sc = ifp->if_softc; int err; - mtx_lock(&stf_mtx); - LIST_REMOVE(sc, stf_list); - mtx_unlock(&stf_mtx); - - EVENTHANDLER_DEREGISTER(ifaddr_event, sc->sc_ifaddr_event_tag); - err = encap_detach(sc->encap_cookie); KASSERT(err == 0, ("Unexpected error detaching encap_cookie")); + mtx_destroy(&(sc)->sc_ro_mtx); bpfdetach(ifp); if_detach(ifp); if_free(ifp); - STF_LOCK_DESTROY(sc); free(sc, M_STF); + ifc_free_unit(ifc, STFUNIT); - return; -} - -static void -vnet_stf_init(const void *unused __unused) -{ - - LIST_INIT(&V_stf_softc_list); + return (0); } -VNET_SYSINIT(vnet_stf_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_stf_init, - NULL); static int -stfmodevent(module_t mod, int type, void *data) +stfmodevent(mod, type, data) + module_t mod; + int type; + void *data; { switch (type) { case MOD_LOAD: - mtx_init(&stf_mtx, "stf_mtx", NULL, MTX_DEF); if_clone_attach(&stf_cloner); break; case MOD_UNLOAD: if_clone_detach(&stf_cloner); - mtx_destroy(&stf_mtx); break; default: return (EOPNOTSUPP); @@ -346,31 +305,28 @@ static moduledata_t stf_mod = { }; DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); -MODULE_VERSION(if_stf, 1); static int -stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +stf_encapcheck(m, off, proto, arg) + const struct mbuf *m; + int off; + int proto; + void *arg; { struct ip ip; struct in6_ifaddr *ia6; - struct sockaddr_in ia6_in4addr; - struct sockaddr_in ia6_in4mask; - struct sockaddr_in *sin; struct stf_softc *sc; - struct ifnet *ifp; - int ret = 0; + struct in_addr a, b, mask; - DEBUG_PRINTF(1, "%s: enter\n", __func__); sc = (struct stf_softc *)arg; if (sc == NULL) return 0; - ifp = STF2IFP(sc); - if ((ifp->if_flags & IFF_UP) == 0) + if ((STF2IFP(sc)->if_flags & IFF_UP) == 0) return 0; /* IFF_LINK0 means "no decapsulation" */ - if ((ifp->if_flags & IFF_LINK0) != 0) + if ((STF2IFP(sc)->if_flags & IFF_LINK0) != 0) return 0; if (proto != IPPROTO_IPV6) @@ -382,162 +338,86 @@ stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg) if (ip.ip_v != 4) return 0; - /* Lookup an ia6 whose IPv4 addr encoded in the IPv6 addr is valid. */ - ia6 = stf_getsrcifa6(ifp); + ia6 = stf_getsrcifa6(STF2IFP(sc)); if (ia6 == NULL) return 0; - sin = stf_getin4addr(&ia6_in4addr, &ia6->ia_ifa, STF_GETIN4_USE_CACHE); - if (sin == NULL) - return 0; -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip6_sprintf(buf, &satosin6(ia6->ia_ifa.ifa_addr)->sin6_addr); - DEBUG_PRINTF(1, "%s: ia6->ia_ifa.ifa_addr = %s\n", __func__, buf); - ip6_sprintf(buf, &ia6->ia_addr.sin6_addr); - DEBUG_PRINTF(1, "%s: ia6->ia_addr = %s\n", __func__, buf); - ip6_sprintf(buf, &satosin6(ia6->ia_ifa.ifa_netmask)->sin6_addr); - DEBUG_PRINTF(1, "%s: ia6->ia_ifa.ifa_netmask = %s\n", __func__, buf); - ip6_sprintf(buf, &ia6->ia_prefixmask.sin6_addr); - DEBUG_PRINTF(1, "%s: ia6->ia_prefixmask = %s\n", __func__, buf); - - ip_sprintf(buf, &ia6_in4addr.sin_addr); - DEBUG_PRINTF(1, "%s: ia6_in4addr.sin_addr = %s\n", __func__, buf); - ip_sprintf(buf, &ip.ip_src); - DEBUG_PRINTF(1, "%s: ip.ip_src = %s\n", __func__, buf); - ip_sprintf(buf, &ip.ip_dst); - DEBUG_PRINTF(1, "%s: ip.ip_dst = %s\n", __func__, buf); - } -#endif /* * check if IPv4 dst matches the IPv4 address derived from the * local 6to4 address. * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:... */ - DEBUG_PRINTF(1, "%s: check1: ia6_in4addr.sin_addr == ip.ip_dst?\n", __func__); - if (ia6_in4addr.sin_addr.s_addr != ip.ip_dst.s_addr) { - DEBUG_PRINTF(1, "%s: check1: false. Ignore this packet.\n", __func__); - goto freeit; + if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst, + sizeof(ip.ip_dst)) != 0) { + ifa_free(&ia6->ia_ifa); + return 0; } - DEBUG_PRINTF(1, "%s: check2: ia6->ia_addr is 2002::/16?\n", __func__); - if (IN6_IS_ADDR_6TO4(&ia6->ia_addr.sin6_addr)) { - /* 6to4 (RFC 3056) */ - /* - * check if IPv4 src matches the IPv4 address derived - * from the local 6to4 address masked by prefixmask. - * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 - * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 - */ - DEBUG_PRINTF(1, "%s: check2: true.\n", __func__); - - memcpy(&ia6_in4mask.sin_addr, - GET_V4(&ia6->ia_prefixmask.sin6_addr), - sizeof(ia6_in4mask)); -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip_sprintf(buf, &ia6_in4addr.sin_addr); - DEBUG_PRINTF(1, "%s: ia6->ia_addr = %s\n", - __func__, buf); - ip_sprintf(buf, &ip.ip_src); - DEBUG_PRINTF(1, "%s: ip.ip_src = %s\n", - __func__, buf); - ip_sprintf(buf, &ia6_in4mask.sin_addr); - DEBUG_PRINTF(1, "%s: ia6->ia_prefixmask = %s\n", - __func__, buf); - - DEBUG_PRINTF(1, "%s: check3: ia6_in4addr.sin_addr & mask == ip.ip_src & mask\n", - __func__); - } -#endif - - if ((ia6_in4addr.sin_addr.s_addr & ia6_in4mask.sin_addr.s_addr) != - (ip.ip_src.s_addr & ia6_in4mask.sin_addr.s_addr)) { - DEBUG_PRINTF(1, "%s: check3: false. Ignore this packet.\n", - __func__); - goto freeit; - } - } else { - /* 6rd (RFC 5569) */ - DEBUG_PRINTF(1, "%s: check2: false. 6rd.\n", __func__); - /* - * No restriction on the src address in the case of - * 6rd because the stf(4) interface always has a - * prefix which covers whole of IPv4 src address - * range. So, stf_output() will catch all of - * 6rd-capsuled IPv4 traffic with suspicious inner dst - * IPv4 address (i.e. the IPv6 destination address is - * one the admin does not like to route to outside), - * and then it discard them silently. - */ - } - DEBUG_PRINTF(1, "%s: all clear!\n", __func__); - /* stf interface makes single side match only */ - ret = 32; -freeit: + /* + * check if IPv4 src matches the IPv4 address derived from the + * local 6to4 address masked by prefixmask. + * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 + * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 + */ + bzero(&a, sizeof(a)); + bcopy(GET_V4(&ia6->ia_addr.sin6_addr), &a, sizeof(a)); + bcopy(GET_V4(&ia6->ia_prefixmask.sin6_addr), &mask, sizeof(mask)); ifa_free(&ia6->ia_ifa); + a.s_addr &= mask.s_addr; + b = ip.ip_src; + b.s_addr &= mask.s_addr; + if (a.s_addr != b.s_addr) + return 0; - return (ret); + /* stf interface makes single side match only */ + return 32; } static struct in6_ifaddr * -stf_getsrcifa6(struct ifnet *ifp) +stf_getsrcifa6(ifp) + struct ifnet *ifp; { - struct ifaddr *ifa; + struct ifaddr *ia; struct in_ifaddr *ia4; - struct sockaddr_in *sin; - struct sockaddr_in in4; + struct sockaddr_in6 *sin6; + struct in_addr in; if_addr_rlock(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family != AF_INET6) + TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { + if (ia->ifa_addr->sa_family != AF_INET6) continue; - if ((sin = stf_getin4addr(&in4, ifa, - STF_GETIN4_USE_CACHE)) == NULL) + sin6 = (struct sockaddr_in6 *)ia->ifa_addr; + if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) continue; - LIST_FOREACH(ia4, INADDR_HASH(sin->sin_addr.s_addr), ia_hash) - if (ia4->ia_addr.sin_addr.s_addr == sin->sin_addr.s_addr) + + bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in)); + LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash) + if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) break; if (ia4 == NULL) continue; -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip6_sprintf(buf, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr); - DEBUG_PRINTF(1, "%s: ifa->ifa_addr->sin6_addr = %s\n", - __func__, buf); - ip_sprintf(buf, &ia4->ia_addr.sin_addr); - DEBUG_PRINTF(1, "%s: ia4->ia_addr.sin_addr = %s\n", - __func__, buf); - } -#endif - ifa_ref(ifa); + ifa_ref(ia); if_addr_runlock(ifp); - return (ifatoia6(ifa)); + return (struct in6_ifaddr *)ia; } if_addr_runlock(ifp); - return NULL; } static int -stf_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct route *ro) +stf_output(ifp, m, dst, ro) + struct ifnet *ifp; + struct mbuf *m; + struct sockaddr *dst; + struct route *ro; { struct stf_softc *sc; struct sockaddr_in6 *dst6; struct route *cached_route; - struct sockaddr_in *sin; - struct sockaddr_in in4; + struct in_addr in4; + caddr_t ptr; struct sockaddr_in *dst4; u_int8_t tos; struct ip *ip; @@ -599,28 +479,20 @@ stf_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct route /* * Pickup the right outer dst addr from the list of candidates. * ip6_dst has priority as it may be able to give us shorter IPv4 hops. - * ip6_dst: destination addr in the packet header. - * dst6: destination addr specified in function argument. */ - DEBUG_PRINTF(1, "%s: dst addr selection\n", __func__); - sin = stf_getin4addr_in6(&in4, &ia6->ia_ifa, &ip6->ip6_dst); - if (sin == NULL) - sin = stf_getin4addr_in6(&in4, &ia6->ia_ifa, &dst6->sin6_addr); - if (sin == NULL) { + ptr = NULL; + if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst)) + ptr = GET_V4(&ip6->ip6_dst); + else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr)) + ptr = GET_V4(&dst6->sin6_addr); + else { ifa_free(&ia6->ia_ifa); m_freem(m); ifp->if_oerrors++; return ENETUNREACH; } -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); + bcopy(ptr, &in4, sizeof(in4)); - ip_sprintf(buf, &sin->sin_addr); - DEBUG_PRINTF(1, "%s: ip_dst = %s\n", __func__, buf); - } -#endif if (bpf_peers_present(ifp->if_bpf)) { /* * We need to prepend the address family as @@ -644,26 +516,11 @@ stf_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct route ip = mtod(m, struct ip *); bzero(ip, sizeof(*ip)); - bcopy(&in4.sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)); - - sin = stf_getin4addr_sin6(&in4, &ia6->ia_ifa, &ia6->ia_addr); - if (sin == NULL) { - ifa_free(&ia6->ia_ifa); - m_freem(m); - ifp->if_oerrors++; - return ENETUNREACH; - } - bcopy(&in4.sin_addr, &ip->ip_src, sizeof(ip->ip_src)); -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - ip_sprintf(buf, &ip->ip_src); - DEBUG_PRINTF(1, "%s: ip_src = %s\n", __func__, buf); - } -#endif + bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr), + &ip->ip_src, sizeof(ip->ip_src)); ifa_free(&ia6->ia_ifa); + bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst)); ip->ip_p = IPPROTO_IPV6; ip->ip_ttl = ip_stf_ttl; ip->ip_len = m->m_pkthdr.len; /*host order*/ @@ -672,7 +529,7 @@ stf_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct route else ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); - if (!V_stf_route_cache) { + if (!stf_route_cache) { cached_route = NULL; goto sendit; } @@ -680,7 +537,7 @@ stf_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct route /* * Do we have a cached route? */ - STF_LOCK(sc); + mtx_lock(&(sc)->sc_ro_mtx); dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst; if (dst4->sin_family != AF_INET || bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) { @@ -698,15 +555,8 @@ stf_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct route rtalloc_fib(&sc->sc_ro, sc->sc_fibnum); if (sc->sc_ro.ro_rt == NULL) { m_freem(m); + mtx_unlock(&(sc)->sc_ro_mtx); ifp->if_oerrors++; - STF_UNLOCK(sc); - return ENETUNREACH; - } - if (sc->sc_ro.ro_rt->rt_ifp == ifp) { - /* infinite loop detection */ - m_free(m); - ifp->if_oerrors++; - STF_UNLOCK(sc); return ENETUNREACH; } } @@ -715,31 +565,34 @@ stf_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct route sendit: M_SETFIB(m, sc->sc_fibnum); ifp->if_opackets++; - DEBUG_PRINTF(1, "%s: ip_output dispatch.\n", __func__); error = ip_output(m, NULL, cached_route, 0, NULL, NULL); if (cached_route != NULL) - STF_UNLOCK(sc); + mtx_unlock(&(sc)->sc_ro_mtx); return error; } static int -isrfc1918addr(struct in_addr *in) +isrfc1918addr(in) + struct in_addr *in; { /* * returns 1 if private address range: * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 */ - if ((ntohl(in->s_addr) & 0xff000000) == 10 << 24 || - (ntohl(in->s_addr) & 0xfff00000) == (172 * 256 + 16) << 16 || - (ntohl(in->s_addr) & 0xffff0000) == (192 * 256 + 168) << 16 ) + if ((ntohl(in->s_addr) & 0xff000000) >> 24 == 10 || + (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 || + (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168) return 1; return 0; } static int -stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp) +stf_checkaddr4(sc, in, inifp) + struct stf_softc *sc; + struct in_addr *in; + struct ifnet *inifp; /* incoming interface */ { struct in_ifaddr *ia4; @@ -755,10 +608,20 @@ stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp) } /* + * reject packets with private address range. + * (requirement from RFC3056 section 2 1st paragraph) + */ + if (isrfc1918addr(in)) + return -1; + + /* * reject packets with broadcast */ IN_IFADDR_RLOCK(); - TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) { + for (ia4 = TAILQ_FIRST(&V_in_ifaddrhead); + ia4; + ia4 = TAILQ_NEXT(ia4, ia_link)) + { if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) continue; if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) { @@ -777,7 +640,7 @@ stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp) bzero(&sin, sizeof(sin)); sin.sin_family = AF_INET; - sin.sin_len = sizeof(sin); + sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = *in; rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0UL, sc->sc_fibnum); @@ -798,7 +661,10 @@ stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp) } static int -stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp) +stf_checkaddr6(sc, in6, inifp) + struct stf_softc *sc; + struct in6_addr *in6; + struct ifnet *inifp; /* incoming interface */ { /* * check 6to4 addresses @@ -822,7 +688,9 @@ stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp) } void -in_stf_input(struct mbuf *m, int off) +in_stf_input(m, off) + struct mbuf *m; + int off; { int proto; struct stf_softc *sc; @@ -830,7 +698,6 @@ in_stf_input(struct mbuf *m, int off) struct ip6_hdr *ip6; u_int8_t otos, itos; struct ifnet *ifp; - struct route_in6 rin6; proto = mtod(m, struct ip *)->ip_p; @@ -854,17 +721,6 @@ in_stf_input(struct mbuf *m, int off) mac_ifnet_create_mbuf(ifp, m); #endif -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip_sprintf(buf, &ip->ip_dst); - DEBUG_PRINTF(1, "%s: ip->ip_dst = %s\n", __func__, buf); - ip_sprintf(buf, &ip->ip_src); - DEBUG_PRINTF(1, "%s: ip->ip_src = %s\n", __func__, buf); - } -#endif /* * perform sanity check against outer src/dst. * for source, perform ingress filter as well. @@ -885,17 +741,6 @@ in_stf_input(struct mbuf *m, int off) } ip6 = mtod(m, struct ip6_hdr *); -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip6_sprintf(buf, &ip6->ip6_dst); - DEBUG_PRINTF(1, "%s: ip6->ip6_dst = %s\n", __func__, buf); - ip6_sprintf(buf, &ip6->ip6_src); - DEBUG_PRINTF(1, "%s: ip6->ip6_src = %s\n", __func__, buf); - } -#endif /* * perform sanity check against inner src/dst. * for source, perform ingress filter as well. @@ -906,41 +751,6 @@ in_stf_input(struct mbuf *m, int off) return; } - /* - * reject packets with private address range. - * (requirement from RFC3056 section 2 1st paragraph) - */ - if ((IN6_IS_ADDR_6TO4(&ip6->ip6_src) && isrfc1918addr(&ip->ip_src)) || - (IN6_IS_ADDR_6TO4(&ip6->ip6_dst) && isrfc1918addr(&ip->ip_dst))) { - m_freem(m); - return; - } - - /* - * Ignore if the destination is the same stf interface because - * all of valid IPv6 outgoing traffic should go interfaces - * except for it. - */ - memset(&rin6, 0, sizeof(rin6)); - rin6.ro_dst.sin6_len = sizeof(rin6.ro_dst); - rin6.ro_dst.sin6_family = AF_INET6; - memcpy(&rin6.ro_dst.sin6_addr, &ip6->ip6_dst, - sizeof(rin6.ro_dst.sin6_addr)); - rtalloc((struct route *)&rin6); - if (rin6.ro_rt == NULL) { - DEBUG_PRINTF(1, "%s: no IPv6 dst. Ignored.\n", __func__); - m_free(m); - return; - } - if ((rin6.ro_rt->rt_ifp == ifp) && - (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &rin6.ro_dst.sin6_addr))) { - DEBUG_PRINTF(1, "%s: IPv6 dst is the same stf. Ignored.\n", __func__); - RTFREE(rin6.ro_rt); - m_free(m); - return; - } - RTFREE(rin6.ro_rt); - itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; if ((ifp->if_flags & IFF_LINK1) != 0) ip_ecn_egress(ECN_ALLOWED, &otos, &itos); @@ -950,7 +760,7 @@ in_stf_input(struct mbuf *m, int off) ip6->ip6_flow |= htonl((u_int32_t)itos << 20); m->m_pkthdr.rcvif = ifp; - + if (bpf_peers_present(ifp->if_bpf)) { /* * We need to prepend the address family as @@ -963,7 +773,6 @@ in_stf_input(struct mbuf *m, int off) bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); } - DEBUG_PRINTF(1, "%s: netisr_dispatch(NETISR_IPV6)\n", __func__); /* * Put the packet to the network layer input queue according to the * specified address family. @@ -972,414 +781,56 @@ in_stf_input(struct mbuf *m, int off) */ ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; + M_SETFIB(m, ifp->if_fib); netisr_dispatch(NETISR_IPV6, m); } /* ARGSUSED */ static void -stf_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) +stf_rtrequest(cmd, rt, info) + int cmd; + struct rtentry *rt; + struct rt_addrinfo *info; { - RT_LOCK_ASSERT(rt); rt->rt_rmx.rmx_mtu = IPV6_MMTU; } -/* Check whether we have at least one instance with IFF_UP. */ static int -stf_is_up(struct ifnet *ifp) -{ - struct stf_softc *scp; - struct stf_softc *sc_cur; - struct stf_softc *sc_is_up; - - sc_is_up = NULL; - if ((sc_cur = ifp->if_softc) == NULL) - return (EINVAL); - - mtx_lock(&stf_mtx); - LIST_FOREACH(scp, &V_stf_softc_list, stf_list) { - if (scp == sc_cur) - continue; - if ((STF2IFP(scp)->if_flags & IFF_UP) != 0) { - sc_is_up = scp; - break; - } - } - mtx_unlock(&stf_mtx); - - /* We already has at least one instance with IFF_UP. */ - if (stf_is_up != NULL) - return (ENOSPC); - - return (0); -} - -static struct sockaddr_in * -stf_getin4addr_in6(struct sockaddr_in *sin, - struct ifaddr *ifa, - struct in6_addr *in6) -{ - struct sockaddr_in6 sin6; - - DEBUG_PRINTF(1, "%s: enter.\n", __func__); - if (ifa == NULL || in6 == NULL) - return NULL; - - memset(&sin6, 0, sizeof(sin6)); - memcpy(&sin6.sin6_addr, in6, sizeof(sin6.sin6_addr)); - sin6.sin6_len = sizeof(sin6); - sin6.sin6_family = AF_INET6; - - return(stf_getin4addr_sin6(sin, ifa, &sin6)); -} - -static struct sockaddr_in * -stf_getin4addr_sin6(struct sockaddr_in *sin, - struct ifaddr *ifa, - struct sockaddr_in6 *sin6) -{ - struct in6_ifaddr ia6; - int i; - - DEBUG_PRINTF(1, "%s: enter.\n", __func__); - if (ifa == NULL || sin6 == NULL) - return NULL; - - memset(&ia6, 0, sizeof(ia6)); - memcpy(&ia6, ifatoia6(ifa), sizeof(ia6)); - - /* - * Use prefixmask information from ifa, and - * address information from sin6. - */ - ia6.ia_addr.sin6_family = AF_INET6; - ia6.ia_ifa.ifa_addr = (struct sockaddr *)&ia6.ia_addr; - ia6.ia_ifa.ifa_dstaddr = NULL; - ia6.ia_ifa.ifa_netmask = (struct sockaddr *)&ia6.ia_prefixmask; - -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip6_sprintf(buf, &sin6->sin6_addr); - DEBUG_PRINTF(1, "%s: sin6->sin6_addr = %s\n", __func__, buf); - ip6_sprintf(buf, &ia6.ia_addr.sin6_addr); - DEBUG_PRINTF(1, "%s: ia6.ia_addr.sin6_addr = %s\n", __func__, buf); - ip6_sprintf(buf, &ia6.ia_prefixmask.sin6_addr); - DEBUG_PRINTF(1, "%s: ia6.ia_prefixmask.sin6_addr = %s\n", __func__, buf); - } -#endif - - /* - * When (src addr & src mask) != (dst (sin6) addr & src mask), - * the dst is not in the 6rd domain. The IPv4 address must - * not be used. - */ - for (i = 0; i < sizeof(ia6.ia_addr.sin6_addr); i++) { - if ((((u_char *)&ia6.ia_addr.sin6_addr)[i] & - ((u_char *)&ia6.ia_prefixmask.sin6_addr)[i]) - != - (((u_char *)&sin6->sin6_addr)[i] & - ((u_char *)&ia6.ia_prefixmask.sin6_addr)[i])) - return NULL; - } - - /* After the mask check, overwrite ia6.ia_addr with sin6. */ - memcpy(&ia6.ia_addr, sin6, sizeof(ia6.ia_addr)); - return(stf_getin4addr(sin, (struct ifaddr *)&ia6, 0)); -} - -static struct sockaddr_in * -stf_getin4addr(struct sockaddr_in *sin, - struct ifaddr *ifa, - int flags) -{ - struct in_addr *in; - struct sockaddr_in6 *sin6; - struct sockaddr_in6 *sin6d; - struct in6_ifaddr *ia6; - - DEBUG_PRINTF(1, "%s: enter.\n", __func__); - if (ifa == NULL || - ifa->ifa_addr == NULL || - ifa->ifa_addr->sa_family != AF_INET6) - return NULL; - - sin6 = satosin6(ifa->ifa_addr); - ia6 = ifatoia6(ifa); - - if (ifa->ifa_dstaddr != NULL) { - switch (ifa->ifa_dstaddr->sa_family) { - case AF_INET6: - sin6d = satosin6(ifa->ifa_dstaddr); - if (IN6_IS_ADDR_UNSPECIFIED(&sin6d->sin6_addr)) - break; - if (IN6_IS_ADDR_V4COMPAT(&sin6d->sin6_addr)) { - memset(sin, 0, sizeof(*sin)); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = - *(const u_int32_t *)(const void *)(&sin6d->sin6_addr.s6_addr[12]); - if (flags & STF_GETIN4_USE_CACHE) { - /* - * XXX: ifa_dstaddr is used as a cache of the - * extracted IPv4 address. - */ - memcpy(sin, satosin(ifa->ifa_dstaddr), sizeof(*sin)); - ifa->ifa_dstaddr->sa_family = AF_INET; - } -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip_sprintf(buf, &sin->sin_addr); - DEBUG_PRINTF(1, "%s: specified dst address was used = %s\n", __func__, buf); - } -#endif - return (sin); - } else { - DEBUG_PRINTF(1, "Not a V4COMPAT address!\n"); - return (NULL); - } - /* NOT REACHED */ - break; - case AF_INET: - if (flags & STF_GETIN4_USE_CACHE) { - /* - * XXX: ifa_dstaddr is used as a cache of the - * extracted IPv4 address. - */ - memcpy(sin, satosin(ifa->ifa_dstaddr), sizeof(*sin)); - ifa->ifa_dstaddr->sa_family = AF_INET; -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip_sprintf(buf, &sin->sin_addr); - DEBUG_PRINTF(1, "%s: cached address was used = %s\n", __func__, buf); - } -#endif - return (sin); - } - } - } - memset(sin, 0, sizeof(*sin)); - in = &sin->sin_addr; - -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip6_sprintf(buf, &sin6->sin6_addr); - DEBUG_PRINTF(1, "%s: sin6->sin6_addr = %s\n", __func__, buf); - } -#endif - - if (IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { - /* 6to4 (RFC 3056) */ - bcopy(GET_V4(&sin6->sin6_addr), in, sizeof(*in)); - if (isrfc1918addr(in)) - return NULL; - } else { - /* 6rd (RFC 5569) */ - struct in6_addr buf; - u_char *p = (u_char *)&buf; - u_char *q = (u_char *)in; - u_int residue = 0; - u_char mask; - int i; - u_int plen; - - /* - * 6rd-relays IPv6 prefix is located at a 32-bit just - * after the prefix edge. - */ - plen = in6_mask2len(&satosin6(ifa->ifa_netmask)->sin6_addr, NULL); - if (32 < plen) - return NULL; - - memcpy(&buf, &sin6->sin6_addr, sizeof(buf)); - p += plen / 8; - residue = plen % 8; - mask = ~((u_char)(-1) >> residue); - - /* - * The p points head of the IPv4 address part in - * bytes. The residue is a bit-shift factor when - * prefixlen is not a multiple of 8. - */ - for (i = 0; i < 4; i++) { - DEBUG_PRINTF(2, "p[%d] = %d\n", i, p[i]); - DEBUG_PRINTF(2, "residue = %d\n", residue); - if (residue) { - p[i] <<= residue; - DEBUG_PRINTF(2, "p[%d] << residue = %d\n", - i, p[i]); - DEBUG_PRINTF(2, "mask = %x\n", - mask); - DEBUG_PRINTF(2, "p[%d + 1] & mask = %d\n", - i, p[i + 1] & mask); - DEBUG_PRINTF(2, "p[%d + 1] & mask >> (8 - residue) = %d\n", - i, (p[i + 1] & mask) >> (8-residue)); - p[i] |= ((p[i+1] & mask) >> (8 - residue)); - } - q[i] = p[i]; - } - } -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip_sprintf(buf, in); - DEBUG_PRINTF(1, "%s: in->in_addr = %s\n", __func__, buf); - DEBUG_PRINTF(1, "%s: leave\n", __func__); - } -#endif - if (flags & STF_GETIN4_USE_CACHE) { - DEBUG_PRINTF(1, "%s: try to memset 0 to ia_dstaddr.\n", __func__); - memset(&ia6->ia_dstaddr, 0, sizeof(ia6->ia_dstaddr)); - DEBUG_PRINTF(1, "%s: try to access ifa->ifa_dstaddr.\n", __func__); - ifa->ifa_dstaddr = (struct sockaddr *)&ia6->ia_dstaddr; - DEBUG_PRINTF(1, "%s: try to memcpy ifa->ifa_dstaddr.\n", __func__); - memcpy((struct sockaddr_in *)ifa->ifa_dstaddr, - sin, sizeof(struct sockaddr_in)); - DEBUG_PRINTF(1, "%s: try to set sa_family.\n", __func__); - ifa->ifa_dstaddr->sa_family = AF_INET; - DEBUG_PRINTF(1, "%s: in->in_addr is stored in ifa_dstaddr.\n", - __func__); - } - return (sin); -} - -static void -stf_ifaddr_change(void *arg __unused, struct ifnet *ifp) -{ - struct sockaddr_in in4; - struct ifaddr *ifa; - - DEBUG_PRINTF(1, "%s: enter.\n", __func__); - - if_addr_rlock(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr == NULL) - continue; - if (ifa->ifa_addr->sa_family != AF_INET6) - continue; - if (ifa->ifa_dstaddr != NULL) { - DEBUG_PRINTF(1, "%s: ifa->ifa_dstaddr != NULL!.\n", __func__); - -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip6_sprintf(buf, &satosin6(ifa->ifa_addr)->sin6_addr); - DEBUG_PRINTF(1, "%s: ifa_addr = %s\n", __func__, buf); - } -#endif - switch (ifa->ifa_dstaddr->sa_family) { - case AF_INET: -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip_sprintf(buf, &satosin(ifa->ifa_dstaddr)->sin_addr); - DEBUG_PRINTF(1, "%s: ifa_dstaddr = %s\n", __func__, buf); - } -#endif - continue; - case AF_INET6: -#if STF_DEBUG - { - char buf[INET6_ADDRSTRLEN + 1]; - memset(&buf, 0, sizeof(buf)); - - ip6_sprintf(buf, &satosin6(ifa->ifa_dstaddr)->sin6_addr); - DEBUG_PRINTF(1, "%s: ifa_dstaddr = %s\n", __func__, buf); - } -#endif - if (IN6_IS_ADDR_V4COMPAT(&satosin6(ifa->ifa_dstaddr)->sin6_addr)) { - } - if (!IN6_IS_ADDR_UNSPECIFIED(&satosin6(ifa->ifa_dstaddr)->sin6_addr)) - continue; - } - } - DEBUG_PRINTF(1, "%s: ifa->ifa_dstaddr == NULL or ::!.\n", __func__); - /* - * Extract IPv4 address from IPv6 address, - * then store it into ifa_dstaddr as the - * destination. - */ - if (stf_getin4addr(&in4, ifa, STF_GETIN4_USE_CACHE) == NULL) { - ifatoia6(ifa)->ia_flags |= IN6_IFF_DETACHED; - continue; - } - } - if_addr_runlock(ifp); -} - -static int -stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +stf_ioctl(ifp, cmd, data) + struct ifnet *ifp; + u_long cmd; + caddr_t data; { struct ifaddr *ifa; struct ifreq *ifr; -/* - struct in6_aliasreq *ifra6; -*/ - struct in6_aliasreq ifra; -/* - struct sockaddr_in6 *sa6; -*/ + struct sockaddr_in6 *sin6; + struct in_addr addr; int error; - memset(&ifra, 0, sizeof(ifra)); - /* - * Sanity check: if more than two interfaces have IFF_UP, do - * if_down() for all of them except for the specified one. - */ - if (ifp->if_flags & IFF_UP) { - struct stf_softc *sc_cur = ifp->if_softc; - struct stf_softc *sc; - - mtx_lock(&stf_mtx); - LIST_FOREACH(sc, &V_stf_softc_list, stf_list) { - if (sc == sc_cur) - continue; - if ((STF2IFP(sc)->if_flags & IFF_UP) != 0) { - if_printf(STF2IFP(sc), - "marked as DOWN because at least " - "one instance of stf(4) is already " - "working.\n"); - if_down(STF2IFP(sc)); - } - } - mtx_unlock(&stf_mtx); - } - error = 0; switch (cmd) { case SIOCSIFADDR: - DEBUG_PRINTF(1, "enter SIOCSIFADDR.\n"); ifa = (struct ifaddr *)data; - if (ifa == NULL) { + if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; break; } - if (ifa->ifa_addr->sa_family == AF_INET6 && - ifa->ifa_dstaddr->sa_family == AF_INET && - ifa->ifa_netmask->sa_family == AF_INET6) { - ifa->ifa_rtrequest = stf_rtrequest; - ifp->if_flags |= IFF_UP; - } else { + sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; + if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { error = EINVAL; break; } + bcopy(GET_V4(&sin6->sin6_addr), &addr, sizeof(addr)); + if (isrfc1918addr(&addr)) { + error = EINVAL; + break; + } + + ifa->ifa_rtrequest = stf_rtrequest; + ifp->if_flags |= IFF_UP; break; + case SIOCADDMULTI: case SIOCDELMULTI: ifr = (struct ifreq *)data; diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c index 49a5249..d74c9fe 100644 --- a/sys/net/if_tun.c +++ b/sys/net/if_tun.c @@ -929,6 +929,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag) ifp->if_ibytes += m->m_pkthdr.len; ifp->if_ipackets++; CURVNET_SET(ifp->if_vnet); + M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); CURVNET_RESTORE(); return (0); diff --git a/sys/net/if_var.h b/sys/net/if_var.h index b3ecb7d..d6eb7f1 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -195,14 +195,15 @@ struct ifnet { /* protected by if_addr_mtx */ void *if_pf_kif; void *if_lagg; /* lagg glue */ - u_char if_alloctype; /* if_type at time of allocation */ + u_char if_alloctype; /* if_type at time of allocation */ + u_int if_fib; /* interface FIB */ /* * Spare fields are added so that we can modify sensitive data * structures without changing the kernel binary interface, and must * be used with care where binary compatibility is required. */ - char if_cspare[3]; + char if_cspare[3]; char *if_description; /* interface description */ void *if_pspare[7]; int if_ispare[4]; diff --git a/sys/net/route.c b/sys/net/route.c index a41efa9..9c7dfac 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -116,12 +116,6 @@ VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ #define V_rtzone VNET(rtzone) -#if 0 -/* default fib for tunnels to use */ -u_int tunnel_fib = 0; -SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, ""); -#endif - /* * handler for net.my_fibnum */ @@ -1189,6 +1183,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, rt0 = NULL; /* XXX * "flow-table" only support IPv4 at the moment. + * XXX-BZ as of r205066 it would support IPv6. */ #ifdef INET if (dst->sa_family == AF_INET) { diff --git a/sys/net/route.h b/sys/net/route.h index 5031533..0bc72d7 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -108,8 +108,6 @@ struct rt_metrics { #endif extern u_int rt_numfibs; /* number fo usable routing tables */ -extern u_int tunnel_fib; /* tunnels use these */ -extern u_int fwd_fib; /* packets being forwarded use these routes */ /* * XXX kernel function pointer `rt_output' is visible to applications. */ diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 72d36b1..999557a 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -373,14 +373,14 @@ rts_sockaddr(struct socket *so, struct sockaddr **nam) static struct pr_usrreqs route_usrreqs = { .pru_abort = rts_abort, .pru_attach = rts_attach, - .pru_bind = raw_bind, - .pru_connect = raw_connect, + .pru_bind = rts_bind, + .pru_connect = rts_connect, .pru_detach = rts_detach, - .pru_disconnect = raw_disconnect, - .pru_peeraddr = raw_peeraddr, - .pru_send = raw_send, - .pru_shutdown = raw_shutdown, - .pru_sockaddr = raw_sockaddr, + .pru_disconnect = rts_disconnect, + .pru_peeraddr = rts_peeraddr, + .pru_send = rts_send, + .pru_shutdown = rts_shutdown, + .pru_sockaddr = rts_sockaddr, .pru_close = rts_close, }; |