diff options
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/if_gre.c | 1369 | ||||
-rw-r--r-- | sys/net/if_gre.h | 212 | ||||
-rw-r--r-- | sys/net/if_me.c | 647 |
3 files changed, 1422 insertions, 806 deletions
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c index 8954f5c..4953348 100644 --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -1,8 +1,6 @@ -/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */ -/* $FreeBSD$ */ - /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. + * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org> * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -30,16 +28,12 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. + * + * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */ -/* - * Encapsulate L3 protocols into IP - * See RFC 2784 (successor of RFC 1701 and 1702) for more details. - * If_gre is compatible with Cisco GRE tunnels, so you can - * have a NetBSD box as the other end of a tunnel interface of a Cisco - * router. See gre(4) for more details. - * Also supported: IP in IP encaps (proto 55) as of RFC 2004 - */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" @@ -47,6 +41,7 @@ #include <sys/param.h> #include <sys/jail.h> #include <sys/kernel.h> +#include <sys/lock.h> #include <sys/libkern.h> #include <sys/malloc.h> #include <sys/module.h> @@ -54,9 +49,12 @@ #include <sys/priv.h> #include <sys/proc.h> #include <sys/protosw.h> +#include <sys/rmlock.h> #include <sys/socket.h> #include <sys/sockio.h> +#include <sys/sx.h> #include <sys/sysctl.h> +#include <sys/syslog.h> #include <sys/systm.h> #include <net/ethernet.h> @@ -64,85 +62,71 @@ #include <net/if_var.h> #include <net/if_clone.h> #include <net/if_types.h> -#include <net/route.h> +#include <net/netisr.h> #include <net/vnet.h> -#ifdef INET #include <netinet/in.h> +#ifdef INET #include <netinet/in_systm.h> #include <netinet/in_var.h> #include <netinet/ip.h> -#include <netinet/ip_gre.h> #include <netinet/ip_var.h> -#include <netinet/ip_encap.h> -#else -#error "Huh? if_gre without inet?" #endif -#include <net/bpf.h> +#ifdef INET6 +#include <netinet/ip6.h> +#include <netinet6/in6_var.h> +#include <netinet6/ip6_var.h> +#include <netinet6/scope6_var.h> +#endif +#include <netinet/ip_encap.h> +#include <net/bpf.h> #include <net/if_gre.h> -/* - * It is not easy to calculate the right value for a GRE MTU. - * We leave this task to the admin and use the same default that - * other vendors use. - */ -#define GREMTU 1476 - -#define MTAG_COOKIE_GRE 1307983903 -#define MTAG_GRE_NESTING 1 -struct mtag_gre_nesting { - uint16_t count; - uint16_t max; - struct ifnet *ifp[]; -}; - -/* - * gre_mtx protects all global variables in if_gre.c. - * XXX: gre_softc data not protected yet. - */ -VNET_DEFINE(struct mtx, gre_mtx); -VNET_DEFINE(struct gre_softc_head, gre_softc_list); +#include <machine/in_cksum.h> +#include <security/mac/mac_framework.h> +#define GREMTU 1500 static const char grename[] = "gre"; static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation"); +static VNET_DEFINE(struct mtx, gre_mtx); +#define V_gre_mtx VNET(gre_mtx) +#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \ + MTX_DEF) +#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx) +#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx) +#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx) + +static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list); +#define V_gre_softc_list VNET(gre_softc_list) +static struct sx gre_ioctl_sx; +SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl"); static int gre_clone_create(struct if_clone *, int, caddr_t); static void gre_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, gre_cloner); #define V_gre_cloner VNET(gre_cloner) +static void gre_qflush(struct ifnet *); +static int gre_transmit(struct ifnet *, struct mbuf *); static int gre_ioctl(struct ifnet *, u_long, caddr_t); static int gre_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); -static int gre_compute_route(struct gre_softc *sc); +static void gre_updatehdr(struct gre_softc *); +static int gre_set_tunnel(struct ifnet *, struct sockaddr *, + struct sockaddr *); +static void gre_delete_tunnel(struct ifnet *); +int gre_input(struct mbuf **, int *, int); #ifdef INET -extern struct domain inetdomain; -static const struct protosw in_gre_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_GRE, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = gre_input, - .pr_output = rip_output, - .pr_ctlinput = rip_ctlinput, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; -static const struct protosw in_mobile_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_MOBILE, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = gre_mobile_input, - .pr_output = rip_output, - .pr_ctlinput = rip_ctlinput, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; +extern int in_gre_attach(struct gre_softc *); +extern int in_gre_output(struct mbuf *, int, int); +#endif +#ifdef INET6 +extern int in6_gre_attach(struct gre_softc *); +extern int in6_gre_output(struct mbuf *, int, int); #endif SYSCTL_DECL(_net_link); @@ -159,6 +143,7 @@ static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, */ #define MAX_GRE_NEST 1 #endif + static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST; #define V_max_gre_nesting VNET(max_gre_nesting) SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, @@ -191,34 +176,22 @@ gre_clone_create(struct if_clone *ifc, int unit, caddr_t params) struct gre_softc *sc; sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO); - + sc->gre_fibnum = curthread->td_proc->p_fibnum; GRE2IFP(sc) = if_alloc(IFT_TUNNEL); - if (GRE2IFP(sc) == NULL) { - free(sc, M_GRE); - return (ENOSPC); - } - + GRE_LOCK_INIT(sc); GRE2IFP(sc)->if_softc = sc; if_initname(GRE2IFP(sc), grename, unit); - GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen; - GRE2IFP(sc)->if_addrlen = 0; - GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */ - GRE2IFP(sc)->if_mtu = GREMTU; + GRE2IFP(sc)->if_mtu = sc->gre_mtu = GREMTU; GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; GRE2IFP(sc)->if_output = gre_output; GRE2IFP(sc)->if_ioctl = gre_ioctl; - sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY; - sc->g_proto = IPPROTO_GRE; - GRE2IFP(sc)->if_flags |= IFF_LINK0; - sc->encap = NULL; - sc->gre_fibnum = curthread->td_proc->p_fibnum; - sc->wccp_ver = WCCP_V1; - sc->key = 0; + GRE2IFP(sc)->if_transmit = gre_transmit; + GRE2IFP(sc)->if_qflush = gre_qflush; if_attach(GRE2IFP(sc)); bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); GRE_LIST_LOCK(); - LIST_INSERT_HEAD(&V_gre_softc_list, sc, sc_list); + LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list); GRE_LIST_UNLOCK(); return (0); } @@ -226,687 +199,749 @@ gre_clone_create(struct if_clone *ifc, int unit, caddr_t params) static void gre_clone_destroy(struct ifnet *ifp) { - struct gre_softc *sc = ifp->if_softc; + struct gre_softc *sc; + sx_xlock(&gre_ioctl_sx); + sc = ifp->if_softc; + gre_delete_tunnel(ifp); GRE_LIST_LOCK(); - LIST_REMOVE(sc, sc_list); + LIST_REMOVE(sc, gre_list); GRE_LIST_UNLOCK(); - -#ifdef INET - if (sc->encap != NULL) - encap_detach(sc->encap); -#endif bpfdetach(ifp); if_detach(ifp); + ifp->if_softc = NULL; + sx_xunlock(&gre_ioctl_sx); + if_free(ifp); + GRE_LOCK_DESTROY(sc); free(sc, M_GRE); } -/* - * The output routine. Takes a packet and encapsulates it in the protocol - * given by sc->g_proto. See also RFC 1701 and RFC 2004 - */ static int -gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, - struct route *ro) +gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - int error = 0; - struct gre_softc *sc = ifp->if_softc; - struct greip *gh; - struct ip *ip; - struct m_tag *mtag; - struct mtag_gre_nesting *gt; - size_t len; - u_short gre_ip_id = 0; - uint8_t gre_ip_tos = 0; - u_int16_t etype = 0; - struct mobile_h mob_h; - u_int32_t af; - int extra = 0, max; - - /* - * gre may cause infinite recursion calls when misconfigured. High - * nesting level may cause stack exhaustion. We'll prevent this by - * detecting loops and by introducing upper limit. - */ - mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL); - if (mtag != NULL) { - struct ifnet **ifp2; - - gt = (struct mtag_gre_nesting *)(mtag + 1); - gt->count++; - if (gt->count > min(gt->max, V_max_gre_nesting)) { - printf("%s: hit maximum recursion limit %u on %s\n", - __func__, gt->count - 1, ifp->if_xname); - m_freem(m); - error = EIO; /* is there better errno? */ - goto end; - } - - ifp2 = gt->ifp; - for (max = gt->count - 1; max > 0; max--) { - if (*ifp2 == ifp) - break; - ifp2++; - } - if (*ifp2 == ifp) { - printf("%s: detected loop with nexting %u on %s\n", - __func__, gt->count-1, ifp->if_xname); - m_freem(m); - error = EIO; /* is there better errno? */ - goto end; - } - *ifp2 = ifp; + GRE_RLOCK_TRACKER; + struct ifreq *ifr = (struct ifreq *)data; + struct sockaddr *src, *dst; + struct gre_softc *sc; +#ifdef INET + struct sockaddr_in *sin = NULL; +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6 = NULL; +#endif + uint32_t opt; + int error; - } else { - /* - * Given that people should NOT increase max_gre_nesting beyond - * their real needs, we allocate once per packet rather than - * allocating an mtag once per passing through gre. - * - * Note: the sysctl does not actually check for saneness, so we - * limit the maximum numbers of possible recursions here. - */ - max = imin(V_max_gre_nesting, 256); - /* If someone sets the sysctl <= 0, we want at least 1. */ - max = imax(max, 1); - len = sizeof(struct mtag_gre_nesting) + - max * sizeof(struct ifnet *); - mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len, - M_NOWAIT); - if (mtag == NULL) { - m_freem(m); - error = ENOMEM; - goto end; - } - gt = (struct mtag_gre_nesting *)(mtag + 1); - bzero(gt, len); - gt->count = 1; - gt->max = max; - *gt->ifp = ifp; - m_tag_prepend(m, mtag); + switch (cmd) { + case SIOCSIFMTU: + /* XXX: */ + if (ifr->ifr_mtu < 576) + return (EINVAL); + break; + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + case SIOCSIFFLAGS: + case SIOCADDMULTI: + case SIOCDELMULTI: + return (0); + case GRESADDRS: + case GRESADDRD: + case GREGADDRS: + case GREGADDRD: + case GRESPROTO: + case GREGPROTO: + return (EOPNOTSUPP); } - - if (!((ifp->if_flags & IFF_UP) && - (ifp->if_drv_flags & IFF_DRV_RUNNING)) || - sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) { - m_freem(m); - error = ENETDOWN; + src = dst = NULL; + sx_xlock(&gre_ioctl_sx); + sc = ifp->if_softc; + if (sc == NULL) { + error = ENXIO; goto end; } - - gh = NULL; - ip = NULL; - - /* BPF writes need to be handled specially. */ - if (dst->sa_family == AF_UNSPEC) - bcopy(dst->sa_data, &af, sizeof(af)); - else - af = dst->sa_family; - - if (bpf_peers_present(ifp->if_bpf)) - bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); - - if ((ifp->if_flags & IFF_MONITOR) != 0) { - m_freem(m); - error = ENETDOWN; + error = 0; + switch (cmd) { + case SIOCSIFMTU: + GRE_WLOCK(sc); + sc->gre_mtu = ifr->ifr_mtu; + gre_updatehdr(sc); + GRE_WUNLOCK(sc); goto end; - } - - m->m_flags &= ~(M_BCAST|M_MCAST); - - if (sc->g_proto == IPPROTO_MOBILE) { - if (af == AF_INET) { - struct mbuf *m0; - int msiz; - - ip = mtod(m, struct ip *); - - /* - * RFC2004 specifies that fragmented diagrams shouldn't - * be encapsulated. - */ - if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - m_freem(m); - error = EINVAL; /* is there better errno? */ - goto end; - } - memset(&mob_h, 0, MOB_H_SIZ_L); - mob_h.proto = (ip->ip_p) << 8; - mob_h.odst = ip->ip_dst.s_addr; - ip->ip_dst.s_addr = sc->g_dst.s_addr; - - /* - * If the packet comes from our host, we only change - * the destination address in the IP header. - * Else we also need to save and change the source - */ - if (in_hosteq(ip->ip_src, sc->g_src)) { - msiz = MOB_H_SIZ_S; - } else { - mob_h.proto |= MOB_H_SBIT; - mob_h.osrc = ip->ip_src.s_addr; - ip->ip_src.s_addr = sc->g_src.s_addr; - msiz = MOB_H_SIZ_L; - } - mob_h.proto = htons(mob_h.proto); - mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz); - - if ((m->m_data - msiz) < m->m_pktdat) { - m0 = m_gethdr(M_NOWAIT, MT_DATA); - if (m0 == NULL) { - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - m_freem(m); - error = ENOBUFS; - goto end; - } - m0->m_next = m; - m->m_data += sizeof(struct ip); - m->m_len -= sizeof(struct ip); - m0->m_pkthdr.len = m->m_pkthdr.len + msiz; - m0->m_len = msiz + sizeof(struct ip); - m0->m_data += max_linkhdr; - memcpy(mtod(m0, caddr_t), (caddr_t)ip, - sizeof(struct ip)); - m = m0; - } else { /* we have some space left in the old one */ - m->m_data -= msiz; - m->m_len += msiz; - m->m_pkthdr.len += msiz; - bcopy(ip, mtod(m, caddr_t), - sizeof(struct ip)); - } - ip = mtod(m, struct ip *); - memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz); - ip->ip_len = htons(ntohs(ip->ip_len) + msiz); - } else { /* AF_INET */ - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - m_freem(m); - error = EINVAL; - goto end; - } - } else if (sc->g_proto == IPPROTO_GRE) { - switch (af) { - case AF_INET: - ip = mtod(m, struct ip *); - gre_ip_tos = ip->ip_tos; - gre_ip_id = ip->ip_id; - if (sc->wccp_ver == WCCP_V2) { - extra = sizeof(uint32_t); - etype = WCCP_PROTOCOL_TYPE; - } else { - etype = ETHERTYPE_IP; - } + case SIOCSIFPHYADDR: +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: +#endif + error = EINVAL; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + src = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_dstaddr); break; +#endif #ifdef INET6 - case AF_INET6: - gre_ip_id = ip_newid(); - etype = ETHERTYPE_IPV6; + case SIOCSIFPHYADDR_IN6: + src = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_dstaddr); break; #endif default: - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - m_freem(m); error = EAFNOSUPPORT; goto end; } - - /* Reserve space for GRE header + optional GRE key */ - int hdrlen = sizeof(struct greip) + extra; - if (sc->key) - hdrlen += sizeof(uint32_t); - M_PREPEND(m, hdrlen, M_NOWAIT); - } else { - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - m_freem(m); - error = EINVAL; - goto end; - } - - if (m == NULL) { /* mbuf allocation failed */ - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - error = ENOBUFS; - goto end; - } - - M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */ - - gh = mtod(m, struct greip *); - if (sc->g_proto == IPPROTO_GRE) { - uint32_t *options = gh->gi_options; - - memset((void *)gh, 0, sizeof(struct greip) + extra); - gh->gi_ptype = htons(etype); - gh->gi_flags = 0; - - /* Add key option */ - if (sc->key) - { - gh->gi_flags |= htons(GRE_KP); - *(options++) = htonl(sc->key); - } - } - - gh->gi_pr = sc->g_proto; - if (sc->g_proto != IPPROTO_MOBILE) { - gh->gi_src = sc->g_src; - gh->gi_dst = sc->g_dst; - ((struct ip*)gh)->ip_v = IPPROTO_IPV4; - ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2; - ((struct ip*)gh)->ip_ttl = GRE_TTL; - ((struct ip*)gh)->ip_tos = gre_ip_tos; - ((struct ip*)gh)->ip_id = gre_ip_id; - gh->gi_len = htons(m->m_pkthdr.len); - } - - if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); - if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); - /* - * Send it off and with IP_FORWARD flag to prevent it from - * overwriting the ip_id again. ip_id is already set to the - * ip_id of the encapsulated packet. - */ - error = ip_output(m, NULL, &sc->route, IP_FORWARDING, - (struct ip_moptions *)NULL, (struct inpcb *)NULL); - end: - if (error) - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - return (error); -} - -static int -gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) -{ - struct ifreq *ifr = (struct ifreq *)data; - struct in_aliasreq *aifr = (struct in_aliasreq *)data; - struct gre_softc *sc = ifp->if_softc; - struct sockaddr_in si; - struct sockaddr *sa = NULL; - int error, adj; - struct sockaddr_in sp, sm, dp, dm; - uint32_t key; - - error = 0; - adj = 0; + /* sa_family must be equal */ + if (src->sa_family != dst->sa_family || + src->sa_len != dst->sa_len) + goto end; - switch (cmd) { - case SIOCSIFADDR: - ifp->if_flags |= IFF_UP; - break; - case SIOCSIFFLAGS: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0) - break; - if ((ifr->ifr_flags & IFF_LINK0) != 0) - sc->g_proto = IPPROTO_GRE; - else - sc->g_proto = IPPROTO_MOBILE; - if ((ifr->ifr_flags & IFF_LINK2) != 0) - sc->wccp_ver = WCCP_V2; - else - sc->wccp_ver = WCCP_V1; - goto recompute; - case SIOCSIFMTU: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0) - break; - if (ifr->ifr_mtu < 576) { - error = EINVAL; - break; - } - ifp->if_mtu = ifr->ifr_mtu; - break; - case SIOCGIFMTU: - ifr->ifr_mtu = GRE2IFP(sc)->if_mtu; - break; - case SIOCADDMULTI: - /* - * XXXRW: Isn't this priv_checkr() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0) - break; - if (ifr == 0) { - error = EAFNOSUPPORT; - break; - } - switch (ifr->ifr_addr.sa_family) { + /* validate sa_len */ + switch (src->sa_family) { #ifdef INET case AF_INET: + if (src->sa_len != sizeof(struct sockaddr_in)) + goto end; break; #endif #ifdef INET6 case AF_INET6: + if (src->sa_len != sizeof(struct sockaddr_in6)) + goto end; break; #endif default: error = EAFNOSUPPORT; - break; + goto end; } - break; - case SIOCDELMULTI: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0) - break; - if (ifr == 0) { - error = EAFNOSUPPORT; - break; + /* check sa_family looks sane for the cmd */ + error = EAFNOSUPPORT; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + if (src->sa_family == AF_INET) + break; + goto end; +#endif +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: + if (src->sa_family == AF_INET6) + break; + goto end; +#endif } - switch (ifr->ifr_addr.sa_family) { + error = EADDRNOTAVAIL; + switch (src->sa_family) { #ifdef INET case AF_INET: + if (satosin(src)->sin_addr.s_addr == INADDR_ANY || + satosin(dst)->sin_addr.s_addr == INADDR_ANY) + goto end; break; #endif #ifdef INET6 case AF_INET6: - break; -#endif - default: - error = EAFNOSUPPORT; - break; - } - break; - case GRESPROTO: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) - break; - sc->g_proto = ifr->ifr_flags; - switch (sc->g_proto) { - case IPPROTO_GRE: - ifp->if_flags |= IFF_LINK0; - break; - case IPPROTO_MOBILE: - ifp->if_flags &= ~IFF_LINK0; - break; - default: - error = EPROTONOSUPPORT; - break; - } - goto recompute; - case GREGPROTO: - ifr->ifr_flags = sc->g_proto; - break; - case GRESADDRS: - case GRESADDRD: - error = priv_check(curthread, PRIV_NET_GRE); - if (error) - return (error); - /* - * set tunnel endpoints, compute a less specific route - * to the remote end and mark if as up - */ - sa = &ifr->ifr_addr; - if (cmd == GRESADDRS) - sc->g_src = (satosin(sa))->sin_addr; - if (cmd == GRESADDRD) - sc->g_dst = (satosin(sa))->sin_addr; - recompute: -#ifdef INET - if (sc->encap != NULL) { - encap_detach(sc->encap); - sc->encap = NULL; - } + if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) + || + IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) + goto end; + /* + * Check validity of the scope zone ID of the + * addresses, and convert it into the kernel + * internal form if necessary. + */ + error = sa6_embedscope(satosin6(src), 0); + if (error != 0) + goto end; + error = sa6_embedscope(satosin6(dst), 0); + if (error != 0) + goto end; #endif - if ((sc->g_src.s_addr != INADDR_ANY) && - (sc->g_dst.s_addr != INADDR_ANY)) { - bzero(&sp, sizeof(sp)); - bzero(&sm, sizeof(sm)); - bzero(&dp, sizeof(dp)); - bzero(&dm, sizeof(dm)); - sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len = - sizeof(struct sockaddr_in); - sp.sin_family = sm.sin_family = dp.sin_family = - dm.sin_family = AF_INET; - sp.sin_addr = sc->g_src; - dp.sin_addr = sc->g_dst; - sm.sin_addr.s_addr = dm.sin_addr.s_addr = - INADDR_BROADCAST; -#ifdef INET - sc->encap = encap_attach(AF_INET, sc->g_proto, - sintosa(&sp), sintosa(&sm), sintosa(&dp), - sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ? - &in_gre_protosw : &in_mobile_protosw, sc); - if (sc->encap == NULL) - printf("%s: unable to attach encap\n", - if_name(GRE2IFP(sc))); -#endif - if (sc->route.ro_rt != 0) /* free old route */ - RTFREE(sc->route.ro_rt); - if (gre_compute_route(sc) == 0) - ifp->if_drv_flags |= IFF_DRV_RUNNING; - else - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - } - break; - case GREGADDRS: - memset(&si, 0, sizeof(si)); - si.sin_family = AF_INET; - si.sin_len = sizeof(struct sockaddr_in); - si.sin_addr.s_addr = sc->g_src.s_addr; - sa = sintosa(&si); - error = prison_if(curthread->td_ucred, sa); - if (error != 0) - break; - ifr->ifr_addr = *sa; - break; - case GREGADDRD: - memset(&si, 0, sizeof(si)); - si.sin_family = AF_INET; - si.sin_len = sizeof(struct sockaddr_in); - si.sin_addr.s_addr = sc->g_dst.s_addr; - sa = sintosa(&si); - error = prison_if(curthread->td_ucred, sa); - if (error != 0) - break; - ifr->ifr_addr = *sa; + }; + error = gre_set_tunnel(ifp, src, dst); break; - case SIOCSIFPHYADDR: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) - break; - if (aifr->ifra_addr.sin_family != AF_INET || - aifr->ifra_dstaddr.sin_family != AF_INET) { - error = EAFNOSUPPORT; - break; - } - if (aifr->ifra_addr.sin_len != sizeof(si) || - aifr->ifra_dstaddr.sin_len != sizeof(si)) { - error = EINVAL; - break; - } - sc->g_src = aifr->ifra_addr.sin_addr; - sc->g_dst = aifr->ifra_dstaddr.sin_addr; - goto recompute; case SIOCDIFPHYADDR: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) - break; - sc->g_src.s_addr = INADDR_ANY; - sc->g_dst.s_addr = INADDR_ANY; - goto recompute; + gre_delete_tunnel(ifp); + break; case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: #ifdef INET6 case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: #endif - if (sc->g_src.s_addr == INADDR_ANY) { + if (sc->gre_family == 0) { error = EADDRNOTAVAIL; break; } - memset(&si, 0, sizeof(si)); - si.sin_family = AF_INET; - si.sin_len = sizeof(struct sockaddr_in); - si.sin_addr.s_addr = sc->g_src.s_addr; - error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); - if (error != 0) + GRE_RLOCK(sc); + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + if (sc->gre_family != AF_INET) { + error = EADDRNOTAVAIL; + break; + } + sin = (struct sockaddr_in *)&ifr->ifr_addr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); break; - bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr)); - break; - case SIOCGIFPDSTADDR: -#ifdef INET6 - case SIOCGIFPDSTADDR_IN6: #endif - if (sc->g_dst.s_addr == INADDR_ANY) { - error = EADDRNOTAVAIL; +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + if (sc->gre_family != AF_INET6) { + error = EADDRNOTAVAIL; + break; + } + sin6 = (struct sockaddr_in6 *) + &(((struct in6_ifreq *)data)->ifr_addr); + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); break; +#endif } - memset(&si, 0, sizeof(si)); - si.sin_family = AF_INET; - si.sin_len = sizeof(struct sockaddr_in); - si.sin_addr.s_addr = sc->g_dst.s_addr; - error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); + if (error == 0) { + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + sin->sin_addr = sc->gre_oip.ip_src; + break; + case SIOCGIFPDSTADDR: + sin->sin_addr = sc->gre_oip.ip_dst; + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + sin6->sin6_addr = sc->gre_oip6.ip6_src; + break; + case SIOCGIFPDSTADDR_IN6: + sin6->sin6_addr = sc->gre_oip6.ip6_dst; + break; +#endif + } + } + GRE_RUNLOCK(sc); if (error != 0) break; - bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr)); + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin); + if (error != 0) + memset(sin, 0, sizeof(*sin)); + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin6); + if (error == 0) + error = sa6_recoverscope(sin6); + if (error != 0) + memset(sin6, 0, sizeof(*sin6)); +#endif + } break; case GRESKEY: - error = priv_check(curthread, PRIV_NET_GRE); - if (error) - break; - error = copyin(ifr->ifr_data, &key, sizeof(key)); - if (error) + if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; - /* adjust MTU for option header */ - if (key == 0 && sc->key != 0) /* clear */ - adj += sizeof(key); - else if (key != 0 && sc->key == 0) /* set */ - adj -= sizeof(key); - - if (ifp->if_mtu + adj < 576) { - error = EINVAL; + if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0) break; + if (sc->gre_key != opt) { + GRE_WLOCK(sc); + sc->gre_key = opt; + gre_updatehdr(sc); + GRE_WUNLOCK(sc); } - ifp->if_mtu += adj; - sc->key = key; break; case GREGKEY: - error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key)); + error = copyout(&sc->gre_key, ifr->ifr_data, sizeof(sc->gre_key)); + break; + case GRESOPTS: + if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) + break; + if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0) + break; + if (opt & ~GRE_OPTMASK) + error = EINVAL; + else { + if (sc->gre_options != opt) { + GRE_WLOCK(sc); + sc->gre_options = opt; + gre_updatehdr(sc); + GRE_WUNLOCK(sc); + } + } break; + case GREGOPTS: + error = copyout(&sc->gre_options, ifr->ifr_data, + sizeof(sc->gre_options)); + break; default: error = EINVAL; break; } - +end: + sx_xunlock(&gre_ioctl_sx); return (error); } -/* - * computes a route to our destination that is not the one - * which would be taken by ip_output(), as this one will loop back to - * us. If the interface is p2p as a--->b, then a routing entry exists - * If we now send a packet to b (e.g. ping b), this will come down here - * gets src=a, dst=b tacked on and would from ip_output() sent back to - * if_gre. - * Goal here is to compute a route to b that is less specific than - * a-->b. We know that this one exists as in normal operation we have - * at least a default route which matches. - */ -static int -gre_compute_route(struct gre_softc *sc) +static void +gre_updatehdr(struct gre_softc *sc) +{ + struct grehdr *gh = NULL; + uint32_t *opts; + uint16_t flags; + + GRE_WLOCK_ASSERT(sc); + switch (sc->gre_family) { +#ifdef INET + case AF_INET: + sc->gre_hlen = sizeof(struct greip); + sc->gre_oip.ip_v = IPPROTO_IPV4; + sc->gre_oip.ip_hl = sizeof(struct ip) >> 2; + sc->gre_oip.ip_p = IPPROTO_GRE; + gh = &sc->gre_gihdr->gi_gre; + break; +#endif +#ifdef INET6 + case AF_INET6: + sc->gre_hlen = sizeof(struct greip6); + sc->gre_oip6.ip6_vfc = IPV6_VERSION; + sc->gre_oip6.ip6_nxt = IPPROTO_GRE; + gh = &sc->gre_gi6hdr->gi6_gre; + break; +#endif + default: + return; + } + flags = 0; + opts = gh->gre_opts; + if (sc->gre_options & GRE_ENABLE_CSUM) { + flags |= GRE_FLAGS_CP; + sc->gre_hlen += 2 * sizeof(uint16_t); + *opts++ = 0; + } + if (sc->gre_key != 0) { + flags |= GRE_FLAGS_KP; + sc->gre_hlen += sizeof(uint32_t); + *opts++ = htonl(sc->gre_key); + } + if (sc->gre_options & GRE_ENABLE_SEQ) { + flags |= GRE_FLAGS_SP; + sc->gre_hlen += sizeof(uint32_t); + *opts++ = 0; + } else + sc->gre_oseq = 0; + gh->gre_flags = htons(flags); + GRE2IFP(sc)->if_mtu = sc->gre_mtu - sc->gre_hlen; +} + +static void +gre_detach(struct gre_softc *sc) { - struct route *ro; - ro = &sc->route; + sx_assert(&gre_ioctl_sx, SA_XLOCKED); + if (sc->gre_ecookie != NULL) + encap_detach(sc->gre_ecookie); + sc->gre_ecookie = NULL; +} - memset(ro, 0, sizeof(struct route)); - ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; - ro->ro_dst.sa_family = AF_INET; - ro->ro_dst.sa_len = sizeof(ro->ro_dst); +static int +gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src, + struct sockaddr *dst) +{ + struct gre_softc *sc, *tsc; +#ifdef INET6 + struct ip6_hdr *ip6; +#endif +#ifdef INET + struct ip *ip; +#endif + void *hdr; + int error; - /* - * toggle last bit, so our interface is not found, but a less - * specific route. I'd rather like to specify a shorter mask, - * but this is not possible. Should work though. XXX - * XXX MRT Use a different FIB for the tunnel to solve this problem. - */ - if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) { - ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^= - htonl(0x01); + sx_assert(&gre_ioctl_sx, SA_XLOCKED); + GRE_LIST_LOCK(); + sc = ifp->if_softc; + LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) { + if (tsc == sc || tsc->gre_family != src->sa_family) + continue; +#ifdef INET + if (tsc->gre_family == AF_INET && + tsc->gre_oip.ip_src.s_addr == + satosin(src)->sin_addr.s_addr && + tsc->gre_oip.ip_dst.s_addr == + satosin(dst)->sin_addr.s_addr) { + GRE_LIST_UNLOCK(); + return (EADDRNOTAVAIL); + } +#endif +#ifdef INET6 + if (tsc->gre_family == AF_INET6 && + IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src, + &satosin6(src)->sin6_addr) && + IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst, + &satosin6(dst)->sin6_addr)) { + GRE_LIST_UNLOCK(); + return (EADDRNOTAVAIL); + } +#endif } + GRE_LIST_UNLOCK(); -#ifdef DIAGNOSTIC - printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)), - inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr)); + switch (src->sa_family) { +#ifdef INET + case AF_INET: + hdr = ip = malloc(sizeof(struct greip) + + 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); + ip->ip_src = satosin(src)->sin_addr; + ip->ip_dst = satosin(dst)->sin_addr; + break; +#endif +#ifdef INET6 + case AF_INET6: + hdr = ip6 = malloc(sizeof(struct greip6) + + 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); + ip6->ip6_src = satosin6(src)->sin6_addr; + ip6->ip6_dst = satosin6(dst)->sin6_addr; + break; +#endif + default: + return (EAFNOSUPPORT); + } + if (sc->gre_family != src->sa_family) + gre_detach(sc); + GRE_WLOCK(sc); + if (sc->gre_family != 0) + free(sc->gre_hdr, M_GRE); + sc->gre_family = src->sa_family; + sc->gre_hdr = hdr; + sc->gre_oseq = 0; + sc->gre_iseq = UINT32_MAX; + gre_updatehdr(sc); + GRE_WUNLOCK(sc); + + switch (src->sa_family) { +#ifdef INET + case AF_INET: + error = in_gre_attach(sc); + break; +#endif +#ifdef INET6 + case AF_INET6: + error = in6_gre_attach(sc); + break; #endif + } + if (error == 0) + ifp->if_drv_flags |= IFF_DRV_RUNNING; + return (error); +} - rtalloc_fib(ro, sc->gre_fibnum); +static void +gre_delete_tunnel(struct ifnet *ifp) +{ + struct gre_softc *sc = ifp->if_softc; + int family; + + GRE_WLOCK(sc); + family = sc->gre_family; + sc->gre_family = 0; + GRE_WUNLOCK(sc); + if (family != 0) { + gre_detach(sc); + free(sc->gre_hdr, M_GRE); + } + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; +} +int +gre_input(struct mbuf **mp, int *offp, int proto) +{ + struct gre_softc *sc; + struct grehdr *gh; + struct ifnet *ifp; + struct mbuf *m; + uint32_t *opts, key; + uint16_t flags; + int hlen, isr, af; + + m = *mp; + sc = encap_getarg(m); + KASSERT(sc != NULL, ("encap_getarg returned NULL")); + + ifp = GRE2IFP(sc); + gh = (struct grehdr *)mtodo(m, *offp); + flags = ntohs(gh->gre_flags); + if (flags & ~GRE_FLAGS_MASK) + goto drop; + opts = gh->gre_opts; + hlen = 2 * sizeof(uint16_t); + if (flags & GRE_FLAGS_CP) { + /* reserved1 field must be zero */ + if (((uint16_t *)opts)[1] != 0) + goto drop; + if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0) + goto drop; + hlen += 2 * sizeof(uint16_t); + opts++; + } + if (flags & GRE_FLAGS_KP) { + key = ntohl(*opts); + hlen += sizeof(uint32_t); + opts++; + } else + key = 0; /* - * check if this returned a route at all and this route is no - * recursion to ourself - */ - if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) { -#ifdef DIAGNOSTIC - if (ro->ro_rt == NULL) - printf(" - no route found!\n"); - else - printf(" - route loops back to ourself!\n"); -#endif - return EADDRNOTAVAIL; + if (sc->gre_key != 0 && (key != sc->gre_key || key != 0)) + goto drop; + */ + if (flags & GRE_FLAGS_SP) { + /* seq = ntohl(*opts); */ + hlen += sizeof(uint32_t); } + switch (ntohs(gh->gre_proto)) { + case ETHERTYPE_WCCP: + /* + * For WCCP skip an additional 4 bytes if after GRE header + * doesn't follow an IP header. + */ + if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40) + hlen += sizeof(uint32_t); + /* FALLTHROUGH */ + case ETHERTYPE_IP: + isr = NETISR_IP; + af = AF_INET; + break; + case ETHERTYPE_IPV6: + isr = NETISR_IPV6; + af = AF_INET6; + break; + default: + goto drop; + } + m_adj(m, *offp + hlen); + m_clrprotoflags(m); + m->m_pkthdr.rcvif = ifp; + M_SETFIB(m, sc->gre_fibnum); +#ifdef MAC + mac_ifnet_create_mbuf(ifp, m); +#endif + BPF_MTAP2(ifp, &af, sizeof(af), m); + if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); + if ((ifp->if_flags & IFF_MONITOR) != 0) + m_freem(m); + else + netisr_dispatch(isr, m); + return (IPPROTO_DONE); +drop: + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); + m_freem(m); + return (IPPROTO_DONE); +} - /* - * now change it back - else ip_output will just drop - * the route and search one to this interface ... - */ - if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) - ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; +#define MTAG_GRE 1307983903 +static int +gre_check_nesting(struct ifnet *ifp, struct mbuf *m) +{ + struct m_tag *mtag; + int count; + + count = 1; + mtag = NULL; + while ((mtag = m_tag_locate(m, MTAG_GRE, 0, NULL)) != NULL) { + if (*(struct ifnet **)(mtag + 1) == ifp) { + log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname); + return (EIO); + } + count++; + } + if (count > V_max_gre_nesting) { + log(LOG_NOTICE, + "%s: if_output recursively called too many times(%d)\n", + ifp->if_xname, count); + return (EIO); + } + mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT); + if (mtag == NULL) + return (ENOMEM); + *(struct ifnet **)(mtag + 1) = ifp; + m_tag_prepend(m, mtag); + return (0); +} + +static int +gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + struct route *ro) +{ + uint32_t af; + int error; -#ifdef DIAGNOSTIC - printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp), - inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr)); - printf("\n"); +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error != 0) + goto drop; #endif + if ((ifp->if_flags & IFF_MONITOR) != 0 || + (ifp->if_flags & IFF_UP) == 0) { + error = ENETDOWN; + goto drop; + } + + error = gre_check_nesting(ifp, m); + if (error != 0) + goto drop; - return 0; + m->m_flags &= ~(M_BCAST|M_MCAST); + if (dst->sa_family == AF_UNSPEC) + bcopy(dst->sa_data, &af, sizeof(af)); + else + af = dst->sa_family; + BPF_MTAP2(ifp, &af, sizeof(af), m); + m->m_pkthdr.csum_data = af; /* save af for if_transmit */ + return (ifp->if_transmit(ifp, m)); +drop: + m_freem(m); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (error); } -/* - * do a checksum of a buffer - much like in_cksum, which operates on - * mbufs. - */ -u_int16_t -gre_in_cksum(u_int16_t *p, u_int len) +static void +gre_setseqn(struct grehdr *gh, uint32_t seq) +{ + uint32_t *opts; + uint16_t flags; + + opts = gh->gre_opts; + flags = ntohs(gh->gre_flags); + KASSERT((flags & GRE_FLAGS_SP) != 0, + ("gre_setseqn called, but GRE_FLAGS_SP isn't set ")); + if (flags & GRE_FLAGS_CP) + opts++; + if (flags & GRE_FLAGS_KP) + opts++; + *opts = htonl(seq); +} + +static int +gre_transmit(struct ifnet *ifp, struct mbuf *m) { - u_int32_t sum = 0; - int nwords = len >> 1; - - while (nwords-- != 0) - sum += *p++; - - if (len & 1) { - union { - u_short w; - u_char c[2]; - } u; - u.c[0] = *(u_char *)p; - u.c[1] = 0; - sum += u.w; + GRE_RLOCK_TRACKER; + struct gre_softc *sc; + struct grehdr *gh; + uint32_t iaf, oaf, oseq; + int error, hlen, olen, plen; + int want_seq, want_csum; + + plen = 0; + sc = ifp->if_softc; + if (sc == NULL) { + error = ENETDOWN; + m_freem(m); + goto drop; + } + GRE_RLOCK(sc); + if (sc->gre_family == 0) { + GRE_RUNLOCK(sc); + error = ENETDOWN; + m_freem(m); + goto drop; + } + iaf = m->m_pkthdr.csum_data; + oaf = sc->gre_family; + hlen = sc->gre_hlen; + want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0; + if (want_seq) + oseq = sc->gre_oseq++; /* XXX */ + want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0; + M_SETFIB(m, sc->gre_fibnum); + M_PREPEND(m, hlen, M_NOWAIT); + if (m == NULL) { + GRE_RUNLOCK(sc); + error = ENOBUFS; + goto drop; + } + bcopy(sc->gre_hdr, mtod(m, void *), hlen); + GRE_RUNLOCK(sc); + switch (oaf) { +#ifdef INET + case AF_INET: + olen = sizeof(struct ip); + break; +#endif +#ifdef INET6 + case AF_INET6: + olen = sizeof(struct ip6_hdr); + break; +#endif + default: + error = ENETDOWN; + goto drop; } + gh = (struct grehdr *)mtodo(m, olen); + switch (iaf) { +#ifdef INET + case AF_INET: + gh->gre_proto = htons(ETHERTYPE_IP); + break; +#endif +#ifdef INET6 + case AF_INET6: + gh->gre_proto = htons(ETHERTYPE_IPV6); + break; +#endif + default: + error = ENETDOWN; + goto drop; + } + if (want_seq) + gre_setseqn(gh, oseq); + if (want_csum) { + *(uint16_t *)gh->gre_opts = in_cksum_skip(m, + m->m_pkthdr.len, olen); + } + plen = m->m_pkthdr.len - hlen; + switch (oaf) { +#ifdef INET + case AF_INET: + error = in_gre_output(m, iaf, hlen); + break; +#endif +#ifdef INET6 + case AF_INET6: + error = in6_gre_output(m, iaf, hlen); + break; +#endif + default: + m_freem(m); + error = ENETDOWN; + }; +drop: + if (error) + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + else { + if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); + } + return (error); +} + +static void +gre_qflush(struct ifnet *ifp __unused) +{ - /* end-around-carry */ - sum = (sum >> 16) + (sum & 0xffff); - sum += (sum >> 16); - return (~sum); } static int diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h index cb2a44b..3a48efe 100644 --- a/sys/net/if_gre.h +++ b/sys/net/if_gre.h @@ -1,8 +1,6 @@ -/* $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */ -/* $FreeBSD$ */ - /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. + * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org> * All rights reserved * * This code is derived from software contributed to The NetBSD Foundation @@ -28,166 +26,102 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. + * + * $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ + * $FreeBSD$ */ -#ifndef _NET_IF_GRE_H -#define _NET_IF_GRE_H +#ifndef _NET_IF_GRE_H_ +#define _NET_IF_GRE_H_ -#include <sys/ioccom.h> #ifdef _KERNEL -#include <sys/queue.h> - -/* - * Version of the WCCP, need to be configured manually since - * header for version 2 is the same but IP payload is prepended - * with additional 4-bytes field. - */ -typedef enum { - WCCP_V1 = 0, - WCCP_V2 -} wccp_ver_t; - -struct gre_softc { - struct ifnet *sc_ifp; - LIST_ENTRY(gre_softc) sc_list; - int gre_unit; - int gre_flags; - u_int gre_fibnum; /* use this fib for envelopes */ - struct in_addr g_src; /* source address of gre packets */ - struct in_addr g_dst; /* destination address of gre packets */ - struct route route; /* routing entry that determines, where a - encapsulated packet should go */ - u_char g_proto; /* protocol of encapsulator */ - - const struct encaptab *encap; /* encapsulation cookie */ - - uint32_t key; /* key included in outgoing GRE packets */ - /* zero means none */ - - wccp_ver_t wccp_ver; /* version of the WCCP */ -}; -#define GRE2IFP(sc) ((sc)->sc_ifp) - - -struct gre_h { - u_int16_t flags; /* GRE flags */ - u_int16_t ptype; /* protocol type of payload typically - Ether protocol type*/ - uint32_t options[0]; /* optional options */ -/* - * from here on: fields are optional, presence indicated by flags - * - u_int_16 checksum checksum (one-complements of GRE header - and payload - Present if (ck_pres | rt_pres == 1). - Valid if (ck_pres == 1). - u_int_16 offset offset from start of routing filed to - first octet of active SRE (see below). - Present if (ck_pres | rt_pres == 1). - Valid if (rt_pres == 1). - u_int_32 key inserted by encapsulator e.g. for - authentication - Present if (key_pres ==1 ). - u_int_32 seq_num Sequence number to allow for packet order - Present if (seq_pres ==1 ). - struct gre_sre[] routing Routing fileds (see below) - Present if (rt_pres == 1) - */ +/* GRE header according to RFC 2784 and RFC 2890 */ +struct grehdr { + uint16_t gre_flags; /* GRE flags */ +#define GRE_FLAGS_CP 0x8000 /* checksum present */ +#define GRE_FLAGS_KP 0x2000 /* key present */ +#define GRE_FLAGS_SP 0x1000 /* sequence present */ +#define GRE_FLAGS_MASK (GRE_FLAGS_CP|GRE_FLAGS_KP|GRE_FLAGS_SP) + uint16_t gre_proto; /* protocol type */ + uint32_t gre_opts[0]; /* optional fields */ } __packed; +#ifdef INET struct greip { - struct ip gi_i; - struct gre_h gi_g; + struct ip gi_ip; + struct grehdr gi_gre; } __packed; +#endif -#define gi_pr gi_i.ip_p -#define gi_len gi_i.ip_len -#define gi_src gi_i.ip_src -#define gi_dst gi_i.ip_dst -#define gi_ptype gi_g.ptype -#define gi_flags gi_g.flags -#define gi_options gi_g.options +#ifdef INET6 +struct greip6 { + struct ip6_hdr gi6_ip6; + struct grehdr gi6_gre; +} __packed; +#endif -#define GRE_CP 0x8000 /* Checksum Present */ -#define GRE_RP 0x4000 /* Routing Present */ -#define GRE_KP 0x2000 /* Key Present */ -#define GRE_SP 0x1000 /* Sequence Present */ -#define GRE_SS 0x0800 /* Strict Source Route */ +struct gre_softc { + struct ifnet *gre_ifp; + LIST_ENTRY(gre_softc) gre_list; + struct rmlock gre_lock; + int gre_family; /* AF of delivery header */ + uint32_t gre_iseq; + uint32_t gre_oseq; + uint32_t gre_key; + uint32_t gre_options; + uint32_t gre_mtu; + u_int gre_fibnum; + u_int gre_hlen; /* header size */ + union { + void *hdr; +#ifdef INET + struct greip *gihdr; +#endif +#ifdef INET6 + struct greip6 *gi6hdr; +#endif + } gre_uhdr; + const struct encaptab *gre_ecookie; +}; +#define GRE2IFP(sc) ((sc)->gre_ifp) +#define GRE_LOCK_INIT(sc) rm_init(&(sc)->gre_lock, "gre softc") +#define GRE_LOCK_DESTROY(sc) rm_destroy(&(sc)->gre_lock) +#define GRE_RLOCK_TRACKER struct rm_priotracker gre_tracker +#define GRE_RLOCK(sc) rm_rlock(&(sc)->gre_lock, &gre_tracker) +#define GRE_RUNLOCK(sc) rm_runlock(&(sc)->gre_lock, &gre_tracker) +#define GRE_RLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_RLOCKED) +#define GRE_WLOCK(sc) rm_wlock(&(sc)->gre_lock) +#define GRE_WUNLOCK(sc) rm_wunlock(&(sc)->gre_lock) +#define GRE_WLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_WLOCKED) + +#define gre_hdr gre_uhdr.hdr +#define gre_gihdr gre_uhdr.gihdr +#define gre_gi6hdr gre_uhdr.gi6hdr +#define gre_oip gre_gihdr->gi_ip +#define gre_oip6 gre_gi6hdr->gi6_ip6 /* * CISCO uses special type for GRE tunnel created as part of WCCP * connection, while in fact those packets are just IPv4 encapsulated * into GRE. */ -#define WCCP_PROTOCOL_TYPE 0x883E - -/* - * gre_sre defines a Source route Entry. These are needed if packets - * should be routed over more than one tunnel hop by hop - */ -struct gre_sre { - u_int16_t sre_family; /* address family */ - u_char sre_offset; /* offset to first octet of active entry */ - u_char sre_length; /* number of octets in the SRE. - sre_lengthl==0 -> last entry. */ - u_char *sre_rtinfo; /* the routing information */ -}; - -struct greioctl { - int unit; - struct in_addr addr; -}; - -/* for mobile encaps */ - -struct mobile_h { - u_int16_t proto; /* protocol and S-bit */ - u_int16_t hcrc; /* header checksum */ - u_int32_t odst; /* original destination address */ - u_int32_t osrc; /* original source addr, if S-bit set */ -} __packed; - -struct mobip_h { - struct ip mi; - struct mobile_h mh; -} __packed; - - -#define MOB_H_SIZ_S (sizeof(struct mobile_h) - sizeof(u_int32_t)) -#define MOB_H_SIZ_L (sizeof(struct mobile_h)) -#define MOB_H_SBIT 0x0080 - -#define GRE_TTL 30 - +#define ETHERTYPE_WCCP 0x883E #endif /* _KERNEL */ -/* - * ioctls needed to manipulate the interface - */ - #define GRESADDRS _IOW('i', 101, struct ifreq) #define GRESADDRD _IOW('i', 102, struct ifreq) #define GREGADDRS _IOWR('i', 103, struct ifreq) #define GREGADDRD _IOWR('i', 104, struct ifreq) #define GRESPROTO _IOW('i' , 105, struct ifreq) #define GREGPROTO _IOWR('i', 106, struct ifreq) -#define GREGKEY _IOWR('i', 107, struct ifreq) -#define GRESKEY _IOW('i', 108, struct ifreq) -#ifdef _KERNEL -LIST_HEAD(gre_softc_head, gre_softc); -VNET_DECLARE(struct gre_softc_head, gre_softc_list); -#define V_gre_softc_list VNET(gre_softc_list) +#define GREGKEY _IOWR('i', 107, struct ifreq) +#define GRESKEY _IOW('i', 108, struct ifreq) +#define GREGOPTS _IOWR('i', 109, struct ifreq) +#define GRESOPTS _IOW('i', 110, struct ifreq) -VNET_DECLARE(struct mtx, gre_mtx); -#define V_gre_mtx VNET(gre_mtx) -#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \ - MTX_DEF) -#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx) -#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx) -#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx) +#define GRE_ENABLE_CSUM 0x0001 +#define GRE_ENABLE_SEQ 0x0002 +#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ) -u_int16_t gre_in_cksum(u_int16_t *, u_int); -#endif /* _KERNEL */ - -#endif +#endif /* _NET_IF_GRE_H_ */ diff --git a/sys/net/if_me.c b/sys/net/if_me.c new file mode 100644 index 0000000..a00bdd2 --- /dev/null +++ b/sys/net/if_me.c @@ -0,0 +1,647 @@ +/*- + * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/jail.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/libkern.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mbuf.h> +#include <sys/priv.h> +#include <sys/proc.h> +#include <sys/protosw.h> +#include <sys/rmlock.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/sx.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/systm.h> + +#include <net/bpf.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <net/if_var.h> +#include <net/if_clone.h> +#include <net/if_types.h> +#include <net/netisr.h> +#include <net/vnet.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/in_var.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/ip_encap.h> + +#include <machine/in_cksum.h> +#include <security/mac/mac_framework.h> + +#define MEMTU 1500 +static const char mename[] = "me"; +static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP"); +static VNET_DEFINE(struct mtx, me_mtx); +#define V_me_mtx VNET(me_mtx) +/* Minimal forwarding header RFC 2004 */ +struct mobhdr { + uint8_t mob_proto; /* protocol */ + uint8_t mob_flags; /* flags */ +#define MOB_FLAGS_SP 0x80 /* source present */ + uint16_t mob_csum; /* header checksum */ + struct in_addr mob_dst; /* original destination address */ + struct in_addr mob_src; /* original source addr (optional) */ +} __packed; + +struct me_softc { + struct ifnet *me_ifp; + LIST_ENTRY(me_softc) me_list; + struct rmlock me_lock; + u_int me_fibnum; + const struct encaptab *me_ecookie; + struct in_addr me_src; + struct in_addr me_dst; +}; +#define ME2IFP(sc) ((sc)->me_ifp) +#define ME_READY(sc) ((sc)->me_src.s_addr != 0) +#define ME_LOCK_INIT(sc) rm_init(&(sc)->me_lock, "me softc") +#define ME_LOCK_DESTROY(sc) rm_destroy(&(sc)->me_lock) +#define ME_RLOCK_TRACKER struct rm_priotracker me_tracker +#define ME_RLOCK(sc) rm_rlock(&(sc)->me_lock, &me_tracker) +#define ME_RUNLOCK(sc) rm_runlock(&(sc)->me_lock, &me_tracker) +#define ME_RLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_RLOCKED) +#define ME_WLOCK(sc) rm_wlock(&(sc)->me_lock) +#define ME_WUNLOCK(sc) rm_wunlock(&(sc)->me_lock) +#define ME_WLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_WLOCKED) + +#define ME_LIST_LOCK_INIT(x) mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF) +#define ME_LIST_LOCK_DESTROY(x) mtx_destroy(&V_me_mtx) +#define ME_LIST_LOCK(x) mtx_lock(&V_me_mtx) +#define ME_LIST_UNLOCK(x) mtx_unlock(&V_me_mtx) + +static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list); +#define V_me_softc_list VNET(me_softc_list) +static struct sx me_ioctl_sx; +SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl"); + +static int me_clone_create(struct if_clone *, int, caddr_t); +static void me_clone_destroy(struct ifnet *); +static VNET_DEFINE(struct if_clone *, me_cloner); +#define V_me_cloner VNET(me_cloner) + +static void me_qflush(struct ifnet *); +static int me_transmit(struct ifnet *, struct mbuf *); +static int me_ioctl(struct ifnet *, u_long, caddr_t); +static int me_output(struct ifnet *, struct mbuf *, + const struct sockaddr *, struct route *); +static int me_input(struct mbuf **, int *, int); + +static int me_set_tunnel(struct ifnet *, struct sockaddr_in *, + struct sockaddr_in *); +static void me_delete_tunnel(struct ifnet *); + +SYSCTL_DECL(_net_link); +static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0, + "Minimal Encapsulation for IP (RFC 2004)"); +#ifndef MAX_ME_NEST +#define MAX_ME_NEST 1 +#endif + +static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST; +#define V_max_me_nesting VNET(max_me_nesting) +SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, + &VNET_NAME(max_me_nesting), 0, "Max nested tunnels"); + +extern struct domain inetdomain; +static const struct protosw in_mobile_protosw = { + .pr_type = SOCK_RAW, + .pr_domain = &inetdomain, + .pr_protocol = IPPROTO_MOBILE, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = me_input, + .pr_output = rip_output, + .pr_ctlinput = rip_ctlinput, + .pr_ctloutput = rip_ctloutput, + .pr_usrreqs = &rip_usrreqs +}; + +static void +vnet_me_init(const void *unused __unused) +{ + LIST_INIT(&V_me_softc_list); + ME_LIST_LOCK_INIT(); + V_me_cloner = if_clone_simple(mename, me_clone_create, + me_clone_destroy, 0); +} +VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_me_init, NULL); + +static void +vnet_me_uninit(const void *unused __unused) +{ + + if_clone_detach(V_me_cloner); + ME_LIST_LOCK_DESTROY(); +} +VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_me_uninit, NULL); + +static int +me_clone_create(struct if_clone *ifc, int unit, caddr_t params) +{ + struct me_softc *sc; + + sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO); + sc->me_fibnum = curthread->td_proc->p_fibnum; + ME2IFP(sc) = if_alloc(IFT_TUNNEL); + ME_LOCK_INIT(sc); + ME2IFP(sc)->if_softc = sc; + if_initname(ME2IFP(sc), mename, unit); + + ME2IFP(sc)->if_mtu = MEMTU - sizeof(struct mobhdr); + ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; + ME2IFP(sc)->if_output = me_output; + ME2IFP(sc)->if_ioctl = me_ioctl; + ME2IFP(sc)->if_transmit = me_transmit; + ME2IFP(sc)->if_qflush = me_qflush; + if_attach(ME2IFP(sc)); + bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t)); + ME_LIST_LOCK(); + LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list); + ME_LIST_UNLOCK(); + return (0); +} + +static void +me_clone_destroy(struct ifnet *ifp) +{ + struct me_softc *sc; + + sx_xlock(&me_ioctl_sx); + sc = ifp->if_softc; + me_delete_tunnel(ifp); + ME_LIST_LOCK(); + LIST_REMOVE(sc, me_list); + ME_LIST_UNLOCK(); + bpfdetach(ifp); + if_detach(ifp); + ifp->if_softc = NULL; + sx_xunlock(&me_ioctl_sx); + + if_free(ifp); + ME_LOCK_DESTROY(sc); + free(sc, M_IFME); +} + +static int +me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + ME_RLOCK_TRACKER; + struct ifreq *ifr = (struct ifreq *)data; + struct sockaddr_in *src, *dst; + struct me_softc *sc; + int error; + + switch (cmd) { + case SIOCSIFMTU: + if (ifr->ifr_mtu < 576) + return (EINVAL); + ifp->if_mtu = ifr->ifr_mtu - sizeof(struct mobhdr); + return (0); + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + case SIOCSIFFLAGS: + case SIOCADDMULTI: + case SIOCDELMULTI: + return (0); + } + sx_xlock(&me_ioctl_sx); + sc = ifp->if_softc; + if (sc == NULL) { + error = ENXIO; + goto end; + } + error = 0; + switch (cmd) { + case SIOCSIFPHYADDR: + src = (struct sockaddr_in *) + &(((struct in_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr_in *) + &(((struct in_aliasreq *)data)->ifra_dstaddr); + if (src->sin_family != dst->sin_family || + src->sin_family != AF_INET || + src->sin_len != dst->sin_len || + src->sin_len != sizeof(struct sockaddr_in)) { + error = EINVAL; + break; + } + if (src->sin_addr.s_addr == INADDR_ANY || + dst->sin_addr.s_addr == INADDR_ANY) { + error = EADDRNOTAVAIL; + break; + } + error = me_set_tunnel(ifp, src, dst); + break; + case SIOCDIFPHYADDR: + me_delete_tunnel(ifp); + break; + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + ME_RLOCK(sc); + if (!ME_READY(sc)) { + error = EADDRNOTAVAIL; + ME_RUNLOCK(sc); + break; + } + src = (struct sockaddr_in *)&ifr->ifr_addr; + memset(src, 0, sizeof(*src)); + src->sin_family = AF_INET; + src->sin_len = sizeof(*src); + switch (cmd) { + case SIOCGIFPSRCADDR: + src->sin_addr = sc->me_src; + break; + case SIOCGIFPDSTADDR: + src->sin_addr = sc->me_dst; + break; + } + ME_RUNLOCK(sc); + error = prison_if(curthread->td_ucred, sintosa(src)); + if (error != 0) + memset(src, 0, sizeof(*src)); + break; + default: + error = EINVAL; + break; + } +end: + sx_xunlock(&me_ioctl_sx); + return (error); +} + +static int +me_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +{ + ME_RLOCK_TRACKER; + struct me_softc *sc; + struct ip *ip; + int ret; + + sc = (struct me_softc *)arg; + if ((ME2IFP(sc)->if_flags & IFF_UP) == 0) + return (0); + + M_ASSERTPKTHDR(m); + + if (m->m_pkthdr.len < sizeof(struct ip) + sizeof(struct mobhdr) - + sizeof(struct in_addr)) + return (0); + + ret = 0; + ME_RLOCK(sc); + if (ME_READY(sc)) { + ip = mtod(m, struct ip *); + if (sc->me_src.s_addr == ip->ip_dst.s_addr && + sc->me_dst.s_addr == ip->ip_src.s_addr) + ret = 32 * 2; + } + ME_RUNLOCK(sc); + return (ret); +} + +static int +me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src, + struct sockaddr_in *dst) +{ + struct me_softc *sc, *tsc; + + sx_assert(&me_ioctl_sx, SA_XLOCKED); + ME_LIST_LOCK(); + sc = ifp->if_softc; + LIST_FOREACH(tsc, &V_me_softc_list, me_list) { + if (tsc == sc || !ME_READY(tsc)) + continue; + if (tsc->me_src.s_addr == src->sin_addr.s_addr && + tsc->me_dst.s_addr == dst->sin_addr.s_addr) { + ME_LIST_UNLOCK(); + return (EADDRNOTAVAIL); + } + } + ME_LIST_UNLOCK(); + + ME_WLOCK(sc); + sc->me_dst = dst->sin_addr; + sc->me_src = src->sin_addr; + ME_WUNLOCK(sc); + + if (sc->me_ecookie == NULL) + sc->me_ecookie = encap_attach_func(AF_INET, IPPROTO_MOBILE, + me_encapcheck, &in_mobile_protosw, sc); + if (sc->me_ecookie != NULL) + ifp->if_drv_flags |= IFF_DRV_RUNNING; + return (0); +} + +static void +me_delete_tunnel(struct ifnet *ifp) +{ + struct me_softc *sc = ifp->if_softc; + + sx_assert(&me_ioctl_sx, SA_XLOCKED); + if (sc->me_ecookie != NULL) + encap_detach(sc->me_ecookie); + sc->me_ecookie = NULL; + ME_WLOCK(sc); + sc->me_src.s_addr = 0; + sc->me_dst.s_addr = 0; + ME_WUNLOCK(sc); + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; +} + +static uint16_t +me_in_cksum(uint16_t *p, int nwords) +{ + uint32_t sum = 0; + + while (nwords-- > 0) + sum += *p++; + sum = (sum >> 16) + (sum & 0xffff); + sum += (sum >> 16); + return (~sum); +} + +int +me_input(struct mbuf **mp, int *offp, int proto) +{ + struct me_softc *sc; + struct mobhdr *mh; + struct ifnet *ifp; + struct mbuf *m; + struct ip *ip; + int hlen; + + m = *mp; + sc = encap_getarg(m); + KASSERT(sc != NULL, ("encap_getarg returned NULL")); + + ifp = ME2IFP(sc); + /* checks for short packets */ + hlen = sizeof(struct mobhdr); + if (m->m_pkthdr.len < sizeof(struct ip) + hlen) + hlen -= sizeof(struct in_addr); + if (m->m_len < sizeof(struct ip) + hlen) + m = m_pullup(m, sizeof(struct ip) + hlen); + if (m == NULL) + goto drop; + mh = (struct mobhdr *)mtodo(m, sizeof(struct ip)); + /* check for wrong flags */ + if (mh->mob_flags & (~MOB_FLAGS_SP)) { + m_freem(m); + goto drop; + } + if (mh->mob_flags) { + if (hlen != sizeof(struct mobhdr)) { + m_freem(m); + goto drop; + } + } else + hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); + /* check mobile header checksum */ + if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) { + m_freem(m); + goto drop; + } +#ifdef MAC + mac_ifnet_create_mbuf(ifp, m); +#endif + ip = mtod(m, struct ip *); + ip->ip_dst = mh->mob_dst; + ip->ip_p = mh->mob_proto; + ip->ip_sum = 0; + ip->ip_len = htons(m->m_pkthdr.len - hlen); + if (mh->mob_flags) + ip->ip_src = mh->mob_src; + memmove(mtodo(m, hlen), ip, sizeof(struct ip)); + m_adj(m, hlen); + m_clrprotoflags(m); + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); + M_SETFIB(m, sc->me_fibnum); + hlen = AF_INET; + BPF_MTAP2(ifp, &hlen, sizeof(hlen), m); + if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); + if ((ifp->if_flags & IFF_MONITOR) != 0) + m_freem(m); + else + netisr_dispatch(NETISR_IP, m); + return (IPPROTO_DONE); +drop: + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); + return (IPPROTO_DONE); +} + +#define MTAG_ME 1414491977 +static int +me_check_nesting(struct ifnet *ifp, struct mbuf *m) +{ + struct m_tag *mtag; + int count; + + count = 1; + mtag = NULL; + while ((mtag = m_tag_locate(m, MTAG_ME, 0, NULL)) != NULL) { + if (*(struct ifnet **)(mtag + 1) == ifp) { + log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname); + return (EIO); + } + count++; + } + if (count > V_max_me_nesting) { + log(LOG_NOTICE, + "%s: if_output recursively called too many times(%d)\n", + ifp->if_xname, count); + return (EIO); + } + mtag = m_tag_alloc(MTAG_ME, 0, sizeof(struct ifnet *), M_NOWAIT); + if (mtag == NULL) + return (ENOMEM); + *(struct ifnet **)(mtag + 1) = ifp; + m_tag_prepend(m, mtag); + return (0); +} + +static int +me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + struct route *ro) +{ + uint32_t af; + int error; + +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error != 0) + goto drop; +#endif + if ((ifp->if_flags & IFF_MONITOR) != 0 || + (ifp->if_flags & IFF_UP) == 0) { + error = ENETDOWN; + goto drop; + } + + error = me_check_nesting(ifp, m); + if (error != 0) + goto drop; + + m->m_flags &= ~(M_BCAST|M_MCAST); + if (dst->sa_family == AF_UNSPEC) + bcopy(dst->sa_data, &af, sizeof(af)); + else + af = dst->sa_family; + if (af != AF_INET) { + error = EAFNOSUPPORT; + goto drop; + } + BPF_MTAP2(ifp, &af, sizeof(af), m); + return (ifp->if_transmit(ifp, m)); +drop: + m_freem(m); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (error); +} + +static int +me_transmit(struct ifnet *ifp, struct mbuf *m) +{ + ME_RLOCK_TRACKER; + struct mobhdr mh; + struct me_softc *sc; + struct ip *ip; + int error, hlen, plen; + + sc = ifp->if_softc; + if (sc == NULL) { + error = ENETDOWN; + m_freem(m); + goto drop; + } + if (m->m_len < sizeof(struct ip)) + m = m_pullup(m, sizeof(struct ip)); + if (m == NULL) { + error = ENOBUFS; + goto drop; + } + ip = mtod(m, struct ip *); + /* Fragmented datagramms shouldn't be encapsulated */ + if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { + error = EINVAL; + m_freem(m); + goto drop; + } + mh.mob_proto = ip->ip_p; + mh.mob_src = ip->ip_src; + mh.mob_dst = ip->ip_dst; + ME_RLOCK(sc); + if (!ME_READY(sc)) { + ME_RUNLOCK(sc); + error = ENETDOWN; + m_freem(m); + goto drop; + } + if (in_hosteq(sc->me_src, ip->ip_src)) { + hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); + mh.mob_flags = 0; + } else { + hlen = sizeof(struct mobhdr); + mh.mob_flags = MOB_FLAGS_SP; + } + plen = m->m_pkthdr.len; + ip->ip_src = sc->me_src; + ip->ip_dst = sc->me_dst; + M_SETFIB(m, sc->me_fibnum); + ME_RUNLOCK(sc); + M_PREPEND(m, hlen, M_NOWAIT); + if (m == NULL) { + error = ENOBUFS; + goto drop; + } + if (m->m_len < sizeof(struct ip) + hlen) + m = m_pullup(m, sizeof(struct ip) + hlen); + if (m == NULL) { + error = ENOBUFS; + goto drop; + } + memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip)); + ip = mtod(m, struct ip *); + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_p = IPPROTO_MOBILE; + ip->ip_sum = 0; + mh.mob_csum = 0; + mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t)); + bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen); + error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); +drop: + if (error) + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + else { + if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); + } + return (error); +} + +static void +me_qflush(struct ifnet *ifp __unused) +{ + +} + +static int +memodevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + case MOD_UNLOAD: + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +static moduledata_t me_mod = { + "if_me", + memodevent, + 0 +}; + +DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(if_me, 1); |