summaryrefslogtreecommitdiffstats
path: root/sys/net
diff options
context:
space:
mode:
Diffstat (limited to 'sys/net')
-rw-r--r--sys/net/if_gre.c1369
-rw-r--r--sys/net/if_gre.h212
-rw-r--r--sys/net/if_me.c647
3 files changed, 1422 insertions, 806 deletions
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index 8954f5c..4953348 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -1,8 +1,6 @@
-/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
-/* $FreeBSD$ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -30,16 +28,12 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
*/
-/*
- * Encapsulate L3 protocols into IP
- * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
- * If_gre is compatible with Cisco GRE tunnels, so you can
- * have a NetBSD box as the other end of a tunnel interface of a Cisco
- * router. See gre(4) for more details.
- * Also supported: IP in IP encaps (proto 55) as of RFC 2004
- */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -47,6 +41,7 @@
#include <sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/module.h>
@@ -54,9 +49,12 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
+#include <sys/sx.h>
#include <sys/sysctl.h>
+#include <sys/syslog.h>
#include <sys/systm.h>
#include <net/ethernet.h>
@@ -64,85 +62,71 @@
#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
-#include <net/route.h>
+#include <net/netisr.h>
#include <net/vnet.h>
-#ifdef INET
#include <netinet/in.h>
+#ifdef INET
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
-#include <netinet/ip_gre.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_encap.h>
-#else
-#error "Huh? if_gre without inet?"
#endif
-#include <net/bpf.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
+#endif
+#include <netinet/ip_encap.h>
+#include <net/bpf.h>
#include <net/if_gre.h>
-/*
- * It is not easy to calculate the right value for a GRE MTU.
- * We leave this task to the admin and use the same default that
- * other vendors use.
- */
-#define GREMTU 1476
-
-#define MTAG_COOKIE_GRE 1307983903
-#define MTAG_GRE_NESTING 1
-struct mtag_gre_nesting {
- uint16_t count;
- uint16_t max;
- struct ifnet *ifp[];
-};
-
-/*
- * gre_mtx protects all global variables in if_gre.c.
- * XXX: gre_softc data not protected yet.
- */
-VNET_DEFINE(struct mtx, gre_mtx);
-VNET_DEFINE(struct gre_softc_head, gre_softc_list);
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+#define GREMTU 1500
static const char grename[] = "gre";
static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
+static VNET_DEFINE(struct mtx, gre_mtx);
+#define V_gre_mtx VNET(gre_mtx)
+#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
+ MTX_DEF)
+#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx)
+#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx)
+#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx)
+
+static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list);
+#define V_gre_softc_list VNET(gre_softc_list)
+static struct sx gre_ioctl_sx;
+SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
static int gre_clone_create(struct if_clone *, int, caddr_t);
static void gre_clone_destroy(struct ifnet *);
static VNET_DEFINE(struct if_clone *, gre_cloner);
#define V_gre_cloner VNET(gre_cloner)
+static void gre_qflush(struct ifnet *);
+static int gre_transmit(struct ifnet *, struct mbuf *);
static int gre_ioctl(struct ifnet *, u_long, caddr_t);
static int gre_output(struct ifnet *, struct mbuf *,
const struct sockaddr *, struct route *);
-static int gre_compute_route(struct gre_softc *sc);
+static void gre_updatehdr(struct gre_softc *);
+static int gre_set_tunnel(struct ifnet *, struct sockaddr *,
+ struct sockaddr *);
+static void gre_delete_tunnel(struct ifnet *);
+int gre_input(struct mbuf **, int *, int);
#ifdef INET
-extern struct domain inetdomain;
-static const struct protosw in_gre_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_GRE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_input,
- .pr_output = rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-static const struct protosw in_mobile_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_MOBILE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_mobile_input,
- .pr_output = rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
+extern int in_gre_attach(struct gre_softc *);
+extern int in_gre_output(struct mbuf *, int, int);
+#endif
+#ifdef INET6
+extern int in6_gre_attach(struct gre_softc *);
+extern int in6_gre_output(struct mbuf *, int, int);
#endif
SYSCTL_DECL(_net_link);
@@ -159,6 +143,7 @@ static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
*/
#define MAX_GRE_NEST 1
#endif
+
static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST;
#define V_max_gre_nesting VNET(max_gre_nesting)
SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
@@ -191,34 +176,22 @@ gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
struct gre_softc *sc;
sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
-
+ sc->gre_fibnum = curthread->td_proc->p_fibnum;
GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
- if (GRE2IFP(sc) == NULL) {
- free(sc, M_GRE);
- return (ENOSPC);
- }
-
+ GRE_LOCK_INIT(sc);
GRE2IFP(sc)->if_softc = sc;
if_initname(GRE2IFP(sc), grename, unit);
- GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
- GRE2IFP(sc)->if_addrlen = 0;
- GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
- GRE2IFP(sc)->if_mtu = GREMTU;
+ GRE2IFP(sc)->if_mtu = sc->gre_mtu = GREMTU;
GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
GRE2IFP(sc)->if_output = gre_output;
GRE2IFP(sc)->if_ioctl = gre_ioctl;
- sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
- sc->g_proto = IPPROTO_GRE;
- GRE2IFP(sc)->if_flags |= IFF_LINK0;
- sc->encap = NULL;
- sc->gre_fibnum = curthread->td_proc->p_fibnum;
- sc->wccp_ver = WCCP_V1;
- sc->key = 0;
+ GRE2IFP(sc)->if_transmit = gre_transmit;
+ GRE2IFP(sc)->if_qflush = gre_qflush;
if_attach(GRE2IFP(sc));
bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
GRE_LIST_LOCK();
- LIST_INSERT_HEAD(&V_gre_softc_list, sc, sc_list);
+ LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list);
GRE_LIST_UNLOCK();
return (0);
}
@@ -226,687 +199,749 @@ gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
static void
gre_clone_destroy(struct ifnet *ifp)
{
- struct gre_softc *sc = ifp->if_softc;
+ struct gre_softc *sc;
+ sx_xlock(&gre_ioctl_sx);
+ sc = ifp->if_softc;
+ gre_delete_tunnel(ifp);
GRE_LIST_LOCK();
- LIST_REMOVE(sc, sc_list);
+ LIST_REMOVE(sc, gre_list);
GRE_LIST_UNLOCK();
-
-#ifdef INET
- if (sc->encap != NULL)
- encap_detach(sc->encap);
-#endif
bpfdetach(ifp);
if_detach(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&gre_ioctl_sx);
+
if_free(ifp);
+ GRE_LOCK_DESTROY(sc);
free(sc, M_GRE);
}
-/*
- * The output routine. Takes a packet and encapsulates it in the protocol
- * given by sc->g_proto. See also RFC 1701 and RFC 2004
- */
static int
-gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
- struct route *ro)
+gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- int error = 0;
- struct gre_softc *sc = ifp->if_softc;
- struct greip *gh;
- struct ip *ip;
- struct m_tag *mtag;
- struct mtag_gre_nesting *gt;
- size_t len;
- u_short gre_ip_id = 0;
- uint8_t gre_ip_tos = 0;
- u_int16_t etype = 0;
- struct mobile_h mob_h;
- u_int32_t af;
- int extra = 0, max;
-
- /*
- * gre may cause infinite recursion calls when misconfigured. High
- * nesting level may cause stack exhaustion. We'll prevent this by
- * detecting loops and by introducing upper limit.
- */
- mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
- if (mtag != NULL) {
- struct ifnet **ifp2;
-
- gt = (struct mtag_gre_nesting *)(mtag + 1);
- gt->count++;
- if (gt->count > min(gt->max, V_max_gre_nesting)) {
- printf("%s: hit maximum recursion limit %u on %s\n",
- __func__, gt->count - 1, ifp->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
-
- ifp2 = gt->ifp;
- for (max = gt->count - 1; max > 0; max--) {
- if (*ifp2 == ifp)
- break;
- ifp2++;
- }
- if (*ifp2 == ifp) {
- printf("%s: detected loop with nexting %u on %s\n",
- __func__, gt->count-1, ifp->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
- *ifp2 = ifp;
+ GRE_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr *src, *dst;
+ struct gre_softc *sc;
+#ifdef INET
+ struct sockaddr_in *sin = NULL;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6 = NULL;
+#endif
+ uint32_t opt;
+ int error;
- } else {
- /*
- * Given that people should NOT increase max_gre_nesting beyond
- * their real needs, we allocate once per packet rather than
- * allocating an mtag once per passing through gre.
- *
- * Note: the sysctl does not actually check for saneness, so we
- * limit the maximum numbers of possible recursions here.
- */
- max = imin(V_max_gre_nesting, 256);
- /* If someone sets the sysctl <= 0, we want at least 1. */
- max = imax(max, 1);
- len = sizeof(struct mtag_gre_nesting) +
- max * sizeof(struct ifnet *);
- mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
- M_NOWAIT);
- if (mtag == NULL) {
- m_freem(m);
- error = ENOMEM;
- goto end;
- }
- gt = (struct mtag_gre_nesting *)(mtag + 1);
- bzero(gt, len);
- gt->count = 1;
- gt->max = max;
- *gt->ifp = ifp;
- m_tag_prepend(m, mtag);
+ switch (cmd) {
+ case SIOCSIFMTU:
+ /* XXX: */
+ if (ifr->ifr_mtu < 576)
+ return (EINVAL);
+ break;
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ return (0);
+ case GRESADDRS:
+ case GRESADDRD:
+ case GREGADDRS:
+ case GREGADDRD:
+ case GRESPROTO:
+ case GREGPROTO:
+ return (EOPNOTSUPP);
}
-
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
- sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
- m_freem(m);
- error = ENETDOWN;
+ src = dst = NULL;
+ sx_xlock(&gre_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
goto end;
}
-
- gh = NULL;
- ip = NULL;
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC)
- bcopy(dst->sa_data, &af, sizeof(af));
- else
- af = dst->sa_family;
-
- if (bpf_peers_present(ifp->if_bpf))
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
-
- if ((ifp->if_flags & IFF_MONITOR) != 0) {
- m_freem(m);
- error = ENETDOWN;
+ error = 0;
+ switch (cmd) {
+ case SIOCSIFMTU:
+ GRE_WLOCK(sc);
+ sc->gre_mtu = ifr->ifr_mtu;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
goto end;
- }
-
- m->m_flags &= ~(M_BCAST|M_MCAST);
-
- if (sc->g_proto == IPPROTO_MOBILE) {
- if (af == AF_INET) {
- struct mbuf *m0;
- int msiz;
-
- ip = mtod(m, struct ip *);
-
- /*
- * RFC2004 specifies that fragmented diagrams shouldn't
- * be encapsulated.
- */
- if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- m_freem(m);
- error = EINVAL; /* is there better errno? */
- goto end;
- }
- memset(&mob_h, 0, MOB_H_SIZ_L);
- mob_h.proto = (ip->ip_p) << 8;
- mob_h.odst = ip->ip_dst.s_addr;
- ip->ip_dst.s_addr = sc->g_dst.s_addr;
-
- /*
- * If the packet comes from our host, we only change
- * the destination address in the IP header.
- * Else we also need to save and change the source
- */
- if (in_hosteq(ip->ip_src, sc->g_src)) {
- msiz = MOB_H_SIZ_S;
- } else {
- mob_h.proto |= MOB_H_SBIT;
- mob_h.osrc = ip->ip_src.s_addr;
- ip->ip_src.s_addr = sc->g_src.s_addr;
- msiz = MOB_H_SIZ_L;
- }
- mob_h.proto = htons(mob_h.proto);
- mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
-
- if ((m->m_data - msiz) < m->m_pktdat) {
- m0 = m_gethdr(M_NOWAIT, MT_DATA);
- if (m0 == NULL) {
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- m_freem(m);
- error = ENOBUFS;
- goto end;
- }
- m0->m_next = m;
- m->m_data += sizeof(struct ip);
- m->m_len -= sizeof(struct ip);
- m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
- m0->m_len = msiz + sizeof(struct ip);
- m0->m_data += max_linkhdr;
- memcpy(mtod(m0, caddr_t), (caddr_t)ip,
- sizeof(struct ip));
- m = m0;
- } else { /* we have some space left in the old one */
- m->m_data -= msiz;
- m->m_len += msiz;
- m->m_pkthdr.len += msiz;
- bcopy(ip, mtod(m, caddr_t),
- sizeof(struct ip));
- }
- ip = mtod(m, struct ip *);
- memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
- ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
- } else { /* AF_INET */
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- m_freem(m);
- error = EINVAL;
- goto end;
- }
- } else if (sc->g_proto == IPPROTO_GRE) {
- switch (af) {
- case AF_INET:
- ip = mtod(m, struct ip *);
- gre_ip_tos = ip->ip_tos;
- gre_ip_id = ip->ip_id;
- if (sc->wccp_ver == WCCP_V2) {
- extra = sizeof(uint32_t);
- etype = WCCP_PROTOCOL_TYPE;
- } else {
- etype = ETHERTYPE_IP;
- }
+ case SIOCSIFPHYADDR:
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+#endif
+ error = EINVAL;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
break;
+#endif
#ifdef INET6
- case AF_INET6:
- gre_ip_id = ip_newid();
- etype = ETHERTYPE_IPV6;
+ case SIOCSIFPHYADDR_IN6:
+ src = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_dstaddr);
break;
#endif
default:
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- m_freem(m);
error = EAFNOSUPPORT;
goto end;
}
-
- /* Reserve space for GRE header + optional GRE key */
- int hdrlen = sizeof(struct greip) + extra;
- if (sc->key)
- hdrlen += sizeof(uint32_t);
- M_PREPEND(m, hdrlen, M_NOWAIT);
- } else {
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- m_freem(m);
- error = EINVAL;
- goto end;
- }
-
- if (m == NULL) { /* mbuf allocation failed */
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- error = ENOBUFS;
- goto end;
- }
-
- M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
-
- gh = mtod(m, struct greip *);
- if (sc->g_proto == IPPROTO_GRE) {
- uint32_t *options = gh->gi_options;
-
- memset((void *)gh, 0, sizeof(struct greip) + extra);
- gh->gi_ptype = htons(etype);
- gh->gi_flags = 0;
-
- /* Add key option */
- if (sc->key)
- {
- gh->gi_flags |= htons(GRE_KP);
- *(options++) = htonl(sc->key);
- }
- }
-
- gh->gi_pr = sc->g_proto;
- if (sc->g_proto != IPPROTO_MOBILE) {
- gh->gi_src = sc->g_src;
- gh->gi_dst = sc->g_dst;
- ((struct ip*)gh)->ip_v = IPPROTO_IPV4;
- ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
- ((struct ip*)gh)->ip_ttl = GRE_TTL;
- ((struct ip*)gh)->ip_tos = gre_ip_tos;
- ((struct ip*)gh)->ip_id = gre_ip_id;
- gh->gi_len = htons(m->m_pkthdr.len);
- }
-
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
- if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
- /*
- * Send it off and with IP_FORWARD flag to prevent it from
- * overwriting the ip_id again. ip_id is already set to the
- * ip_id of the encapsulated packet.
- */
- error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
- (struct ip_moptions *)NULL, (struct inpcb *)NULL);
- end:
- if (error)
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- return (error);
-}
-
-static int
-gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct ifreq *ifr = (struct ifreq *)data;
- struct in_aliasreq *aifr = (struct in_aliasreq *)data;
- struct gre_softc *sc = ifp->if_softc;
- struct sockaddr_in si;
- struct sockaddr *sa = NULL;
- int error, adj;
- struct sockaddr_in sp, sm, dp, dm;
- uint32_t key;
-
- error = 0;
- adj = 0;
+ /* sa_family must be equal */
+ if (src->sa_family != dst->sa_family ||
+ src->sa_len != dst->sa_len)
+ goto end;
- switch (cmd) {
- case SIOCSIFADDR:
- ifp->if_flags |= IFF_UP;
- break;
- case SIOCSIFFLAGS:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
- break;
- if ((ifr->ifr_flags & IFF_LINK0) != 0)
- sc->g_proto = IPPROTO_GRE;
- else
- sc->g_proto = IPPROTO_MOBILE;
- if ((ifr->ifr_flags & IFF_LINK2) != 0)
- sc->wccp_ver = WCCP_V2;
- else
- sc->wccp_ver = WCCP_V1;
- goto recompute;
- case SIOCSIFMTU:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
- break;
- if (ifr->ifr_mtu < 576) {
- error = EINVAL;
- break;
- }
- ifp->if_mtu = ifr->ifr_mtu;
- break;
- case SIOCGIFMTU:
- ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
- break;
- case SIOCADDMULTI:
- /*
- * XXXRW: Isn't this priv_checkr() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
- break;
- if (ifr == 0) {
- error = EAFNOSUPPORT;
- break;
- }
- switch (ifr->ifr_addr.sa_family) {
+ /* validate sa_len */
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
+ if (src->sa_len != sizeof(struct sockaddr_in))
+ goto end;
break;
#endif
#ifdef INET6
case AF_INET6:
+ if (src->sa_len != sizeof(struct sockaddr_in6))
+ goto end;
break;
#endif
default:
error = EAFNOSUPPORT;
- break;
+ goto end;
}
- break;
- case SIOCDELMULTI:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
- break;
- if (ifr == 0) {
- error = EAFNOSUPPORT;
- break;
+ /* check sa_family looks sane for the cmd */
+ error = EAFNOSUPPORT;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ if (src->sa_family == AF_INET)
+ break;
+ goto end;
+#endif
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ if (src->sa_family == AF_INET6)
+ break;
+ goto end;
+#endif
}
- switch (ifr->ifr_addr.sa_family) {
+ error = EADDRNOTAVAIL;
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
+ if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+ satosin(dst)->sin_addr.s_addr == INADDR_ANY)
+ goto end;
break;
#endif
#ifdef INET6
case AF_INET6:
- break;
-#endif
- default:
- error = EAFNOSUPPORT;
- break;
- }
- break;
- case GRESPROTO:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
- break;
- sc->g_proto = ifr->ifr_flags;
- switch (sc->g_proto) {
- case IPPROTO_GRE:
- ifp->if_flags |= IFF_LINK0;
- break;
- case IPPROTO_MOBILE:
- ifp->if_flags &= ~IFF_LINK0;
- break;
- default:
- error = EPROTONOSUPPORT;
- break;
- }
- goto recompute;
- case GREGPROTO:
- ifr->ifr_flags = sc->g_proto;
- break;
- case GRESADDRS:
- case GRESADDRD:
- error = priv_check(curthread, PRIV_NET_GRE);
- if (error)
- return (error);
- /*
- * set tunnel endpoints, compute a less specific route
- * to the remote end and mark if as up
- */
- sa = &ifr->ifr_addr;
- if (cmd == GRESADDRS)
- sc->g_src = (satosin(sa))->sin_addr;
- if (cmd == GRESADDRD)
- sc->g_dst = (satosin(sa))->sin_addr;
- recompute:
-#ifdef INET
- if (sc->encap != NULL) {
- encap_detach(sc->encap);
- sc->encap = NULL;
- }
+ if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+ ||
+ IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+ goto end;
+ /*
+ * Check validity of the scope zone ID of the
+ * addresses, and convert it into the kernel
+ * internal form if necessary.
+ */
+ error = sa6_embedscope(satosin6(src), 0);
+ if (error != 0)
+ goto end;
+ error = sa6_embedscope(satosin6(dst), 0);
+ if (error != 0)
+ goto end;
#endif
- if ((sc->g_src.s_addr != INADDR_ANY) &&
- (sc->g_dst.s_addr != INADDR_ANY)) {
- bzero(&sp, sizeof(sp));
- bzero(&sm, sizeof(sm));
- bzero(&dp, sizeof(dp));
- bzero(&dm, sizeof(dm));
- sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
- sizeof(struct sockaddr_in);
- sp.sin_family = sm.sin_family = dp.sin_family =
- dm.sin_family = AF_INET;
- sp.sin_addr = sc->g_src;
- dp.sin_addr = sc->g_dst;
- sm.sin_addr.s_addr = dm.sin_addr.s_addr =
- INADDR_BROADCAST;
-#ifdef INET
- sc->encap = encap_attach(AF_INET, sc->g_proto,
- sintosa(&sp), sintosa(&sm), sintosa(&dp),
- sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
- &in_gre_protosw : &in_mobile_protosw, sc);
- if (sc->encap == NULL)
- printf("%s: unable to attach encap\n",
- if_name(GRE2IFP(sc)));
-#endif
- if (sc->route.ro_rt != 0) /* free old route */
- RTFREE(sc->route.ro_rt);
- if (gre_compute_route(sc) == 0)
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- else
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- }
- break;
- case GREGADDRS:
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- sa = sintosa(&si);
- error = prison_if(curthread->td_ucred, sa);
- if (error != 0)
- break;
- ifr->ifr_addr = *sa;
- break;
- case GREGADDRD:
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- sa = sintosa(&si);
- error = prison_if(curthread->td_ucred, sa);
- if (error != 0)
- break;
- ifr->ifr_addr = *sa;
+ };
+ error = gre_set_tunnel(ifp, src, dst);
break;
- case SIOCSIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- if (aifr->ifra_addr.sin_family != AF_INET ||
- aifr->ifra_dstaddr.sin_family != AF_INET) {
- error = EAFNOSUPPORT;
- break;
- }
- if (aifr->ifra_addr.sin_len != sizeof(si) ||
- aifr->ifra_dstaddr.sin_len != sizeof(si)) {
- error = EINVAL;
- break;
- }
- sc->g_src = aifr->ifra_addr.sin_addr;
- sc->g_dst = aifr->ifra_dstaddr.sin_addr;
- goto recompute;
case SIOCDIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- sc->g_src.s_addr = INADDR_ANY;
- sc->g_dst.s_addr = INADDR_ANY;
- goto recompute;
+ gre_delete_tunnel(ifp);
+ break;
case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
#endif
- if (sc->g_src.s_addr == INADDR_ANY) {
+ if (sc->gre_family == 0) {
error = EADDRNOTAVAIL;
break;
}
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
+ GRE_RLOCK(sc);
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ if (sc->gre_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
break;
- bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
- break;
- case SIOCGIFPDSTADDR:
-#ifdef INET6
- case SIOCGIFPDSTADDR_IN6:
#endif
- if (sc->g_dst.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ if (sc->gre_family != AF_INET6) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin6 = (struct sockaddr_in6 *)
+ &(((struct in6_ifreq *)data)->ifr_addr);
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
break;
+#endif
}
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+ if (error == 0) {
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ sin->sin_addr = sc->gre_oip.ip_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ sin->sin_addr = sc->gre_oip.ip_dst;
+ break;
+#endif
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ sin6->sin6_addr = sc->gre_oip6.ip6_src;
+ break;
+ case SIOCGIFPDSTADDR_IN6:
+ sin6->sin6_addr = sc->gre_oip6.ip6_dst;
+ break;
+#endif
+ }
+ }
+ GRE_RUNLOCK(sc);
if (error != 0)
break;
- bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin);
+ if (error != 0)
+ memset(sin, 0, sizeof(*sin));
+ break;
+#endif
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin6);
+ if (error == 0)
+ error = sa6_recoverscope(sin6);
+ if (error != 0)
+ memset(sin6, 0, sizeof(*sin6));
+#endif
+ }
break;
case GRESKEY:
- error = priv_check(curthread, PRIV_NET_GRE);
- if (error)
- break;
- error = copyin(ifr->ifr_data, &key, sizeof(key));
- if (error)
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
- /* adjust MTU for option header */
- if (key == 0 && sc->key != 0) /* clear */
- adj += sizeof(key);
- else if (key != 0 && sc->key == 0) /* set */
- adj -= sizeof(key);
-
- if (ifp->if_mtu + adj < 576) {
- error = EINVAL;
+ if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
break;
+ if (sc->gre_key != opt) {
+ GRE_WLOCK(sc);
+ sc->gre_key = opt;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
}
- ifp->if_mtu += adj;
- sc->key = key;
break;
case GREGKEY:
- error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key));
+ error = copyout(&sc->gre_key, ifr->ifr_data, sizeof(sc->gre_key));
+ break;
+ case GRESOPTS:
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+ break;
+ if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
+ break;
+ if (opt & ~GRE_OPTMASK)
+ error = EINVAL;
+ else {
+ if (sc->gre_options != opt) {
+ GRE_WLOCK(sc);
+ sc->gre_options = opt;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
+ }
+ }
break;
+ case GREGOPTS:
+ error = copyout(&sc->gre_options, ifr->ifr_data,
+ sizeof(sc->gre_options));
+ break;
default:
error = EINVAL;
break;
}
-
+end:
+ sx_xunlock(&gre_ioctl_sx);
return (error);
}
-/*
- * computes a route to our destination that is not the one
- * which would be taken by ip_output(), as this one will loop back to
- * us. If the interface is p2p as a--->b, then a routing entry exists
- * If we now send a packet to b (e.g. ping b), this will come down here
- * gets src=a, dst=b tacked on and would from ip_output() sent back to
- * if_gre.
- * Goal here is to compute a route to b that is less specific than
- * a-->b. We know that this one exists as in normal operation we have
- * at least a default route which matches.
- */
-static int
-gre_compute_route(struct gre_softc *sc)
+static void
+gre_updatehdr(struct gre_softc *sc)
+{
+ struct grehdr *gh = NULL;
+ uint32_t *opts;
+ uint16_t flags;
+
+ GRE_WLOCK_ASSERT(sc);
+ switch (sc->gre_family) {
+#ifdef INET
+ case AF_INET:
+ sc->gre_hlen = sizeof(struct greip);
+ sc->gre_oip.ip_v = IPPROTO_IPV4;
+ sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
+ sc->gre_oip.ip_p = IPPROTO_GRE;
+ gh = &sc->gre_gihdr->gi_gre;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sc->gre_hlen = sizeof(struct greip6);
+ sc->gre_oip6.ip6_vfc = IPV6_VERSION;
+ sc->gre_oip6.ip6_nxt = IPPROTO_GRE;
+ gh = &sc->gre_gi6hdr->gi6_gre;
+ break;
+#endif
+ default:
+ return;
+ }
+ flags = 0;
+ opts = gh->gre_opts;
+ if (sc->gre_options & GRE_ENABLE_CSUM) {
+ flags |= GRE_FLAGS_CP;
+ sc->gre_hlen += 2 * sizeof(uint16_t);
+ *opts++ = 0;
+ }
+ if (sc->gre_key != 0) {
+ flags |= GRE_FLAGS_KP;
+ sc->gre_hlen += sizeof(uint32_t);
+ *opts++ = htonl(sc->gre_key);
+ }
+ if (sc->gre_options & GRE_ENABLE_SEQ) {
+ flags |= GRE_FLAGS_SP;
+ sc->gre_hlen += sizeof(uint32_t);
+ *opts++ = 0;
+ } else
+ sc->gre_oseq = 0;
+ gh->gre_flags = htons(flags);
+ GRE2IFP(sc)->if_mtu = sc->gre_mtu - sc->gre_hlen;
+}
+
+static void
+gre_detach(struct gre_softc *sc)
{
- struct route *ro;
- ro = &sc->route;
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ if (sc->gre_ecookie != NULL)
+ encap_detach(sc->gre_ecookie);
+ sc->gre_ecookie = NULL;
+}
- memset(ro, 0, sizeof(struct route));
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
- ro->ro_dst.sa_family = AF_INET;
- ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+static int
+gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
+ struct sockaddr *dst)
+{
+ struct gre_softc *sc, *tsc;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+#ifdef INET
+ struct ip *ip;
+#endif
+ void *hdr;
+ int error;
- /*
- * toggle last bit, so our interface is not found, but a less
- * specific route. I'd rather like to specify a shorter mask,
- * but this is not possible. Should work though. XXX
- * XXX MRT Use a different FIB for the tunnel to solve this problem.
- */
- if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
- htonl(0x01);
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ GRE_LIST_LOCK();
+ sc = ifp->if_softc;
+ LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) {
+ if (tsc == sc || tsc->gre_family != src->sa_family)
+ continue;
+#ifdef INET
+ if (tsc->gre_family == AF_INET &&
+ tsc->gre_oip.ip_src.s_addr ==
+ satosin(src)->sin_addr.s_addr &&
+ tsc->gre_oip.ip_dst.s_addr ==
+ satosin(dst)->sin_addr.s_addr) {
+ GRE_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+#endif
+#ifdef INET6
+ if (tsc->gre_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src,
+ &satosin6(src)->sin6_addr) &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst,
+ &satosin6(dst)->sin6_addr)) {
+ GRE_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+#endif
}
+ GRE_LIST_UNLOCK();
-#ifdef DIAGNOSTIC
- printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
- inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ hdr = ip = malloc(sizeof(struct greip) +
+ 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+ ip->ip_src = satosin(src)->sin_addr;
+ ip->ip_dst = satosin(dst)->sin_addr;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ hdr = ip6 = malloc(sizeof(struct greip6) +
+ 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+ ip6->ip6_src = satosin6(src)->sin6_addr;
+ ip6->ip6_dst = satosin6(dst)->sin6_addr;
+ break;
+#endif
+ default:
+ return (EAFNOSUPPORT);
+ }
+ if (sc->gre_family != src->sa_family)
+ gre_detach(sc);
+ GRE_WLOCK(sc);
+ if (sc->gre_family != 0)
+ free(sc->gre_hdr, M_GRE);
+ sc->gre_family = src->sa_family;
+ sc->gre_hdr = hdr;
+ sc->gre_oseq = 0;
+ sc->gre_iseq = UINT32_MAX;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
+
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = in_gre_attach(sc);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gre_attach(sc);
+ break;
#endif
+ }
+ if (error == 0)
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ return (error);
+}
- rtalloc_fib(ro, sc->gre_fibnum);
+static void
+gre_delete_tunnel(struct ifnet *ifp)
+{
+ struct gre_softc *sc = ifp->if_softc;
+ int family;
+
+ GRE_WLOCK(sc);
+ family = sc->gre_family;
+ sc->gre_family = 0;
+ GRE_WUNLOCK(sc);
+ if (family != 0) {
+ gre_detach(sc);
+ free(sc->gre_hdr, M_GRE);
+ }
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+}
+int
+gre_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct gre_softc *sc;
+ struct grehdr *gh;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ uint32_t *opts, key;
+ uint16_t flags;
+ int hlen, isr, af;
+
+ m = *mp;
+ sc = encap_getarg(m);
+ KASSERT(sc != NULL, ("encap_getarg returned NULL"));
+
+ ifp = GRE2IFP(sc);
+ gh = (struct grehdr *)mtodo(m, *offp);
+ flags = ntohs(gh->gre_flags);
+ if (flags & ~GRE_FLAGS_MASK)
+ goto drop;
+ opts = gh->gre_opts;
+ hlen = 2 * sizeof(uint16_t);
+ if (flags & GRE_FLAGS_CP) {
+ /* reserved1 field must be zero */
+ if (((uint16_t *)opts)[1] != 0)
+ goto drop;
+ if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
+ goto drop;
+ hlen += 2 * sizeof(uint16_t);
+ opts++;
+ }
+ if (flags & GRE_FLAGS_KP) {
+ key = ntohl(*opts);
+ hlen += sizeof(uint32_t);
+ opts++;
+ } else
+ key = 0;
/*
- * check if this returned a route at all and this route is no
- * recursion to ourself
- */
- if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
-#ifdef DIAGNOSTIC
- if (ro->ro_rt == NULL)
- printf(" - no route found!\n");
- else
- printf(" - route loops back to ourself!\n");
-#endif
- return EADDRNOTAVAIL;
+ if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
+ goto drop;
+ */
+ if (flags & GRE_FLAGS_SP) {
+ /* seq = ntohl(*opts); */
+ hlen += sizeof(uint32_t);
}
+ switch (ntohs(gh->gre_proto)) {
+ case ETHERTYPE_WCCP:
+ /*
+ * For WCCP skip an additional 4 bytes if after GRE header
+ * doesn't follow an IP header.
+ */
+ if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
+ hlen += sizeof(uint32_t);
+ /* FALLTHROUGH */
+ case ETHERTYPE_IP:
+ isr = NETISR_IP;
+ af = AF_INET;
+ break;
+ case ETHERTYPE_IPV6:
+ isr = NETISR_IPV6;
+ af = AF_INET6;
+ break;
+ default:
+ goto drop;
+ }
+ m_adj(m, *offp + hlen);
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ M_SETFIB(m, sc->gre_fibnum);
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if ((ifp->if_flags & IFF_MONITOR) != 0)
+ m_freem(m);
+ else
+ netisr_dispatch(isr, m);
+ return (IPPROTO_DONE);
+drop:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ return (IPPROTO_DONE);
+}
- /*
- * now change it back - else ip_output will just drop
- * the route and search one to this interface ...
- */
- if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
+#define MTAG_GRE 1307983903
+static int
+gre_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ int count;
+
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_GRE, 0, NULL)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > V_max_gre_nesting) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ ifp->if_xname, count);
+ return (EIO);
+ }
+ mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
+
+static int
+gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
+ int error;
-#ifdef DIAGNOSTIC
- printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
- inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
- printf("\n");
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error != 0)
+ goto drop;
#endif
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0) {
+ error = ENETDOWN;
+ goto drop;
+ }
+
+ error = gre_check_nesting(ifp, m);
+ if (error != 0)
+ goto drop;
- return 0;
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ m->m_pkthdr.csum_data = af; /* save af for if_transmit */
+ return (ifp->if_transmit(ifp, m));
+drop:
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (error);
}
-/*
- * do a checksum of a buffer - much like in_cksum, which operates on
- * mbufs.
- */
-u_int16_t
-gre_in_cksum(u_int16_t *p, u_int len)
+static void
+gre_setseqn(struct grehdr *gh, uint32_t seq)
+{
+ uint32_t *opts;
+ uint16_t flags;
+
+ opts = gh->gre_opts;
+ flags = ntohs(gh->gre_flags);
+ KASSERT((flags & GRE_FLAGS_SP) != 0,
+ ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
+ if (flags & GRE_FLAGS_CP)
+ opts++;
+ if (flags & GRE_FLAGS_KP)
+ opts++;
+ *opts = htonl(seq);
+}
+
+static int
+gre_transmit(struct ifnet *ifp, struct mbuf *m)
{
- u_int32_t sum = 0;
- int nwords = len >> 1;
-
- while (nwords-- != 0)
- sum += *p++;
-
- if (len & 1) {
- union {
- u_short w;
- u_char c[2];
- } u;
- u.c[0] = *(u_char *)p;
- u.c[1] = 0;
- sum += u.w;
+ GRE_RLOCK_TRACKER;
+ struct gre_softc *sc;
+ struct grehdr *gh;
+ uint32_t iaf, oaf, oseq;
+ int error, hlen, olen, plen;
+ int want_seq, want_csum;
+
+ plen = 0;
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ GRE_RLOCK(sc);
+ if (sc->gre_family == 0) {
+ GRE_RUNLOCK(sc);
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ iaf = m->m_pkthdr.csum_data;
+ oaf = sc->gre_family;
+ hlen = sc->gre_hlen;
+ want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0;
+ if (want_seq)
+ oseq = sc->gre_oseq++; /* XXX */
+ want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0;
+ M_SETFIB(m, sc->gre_fibnum);
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ GRE_RUNLOCK(sc);
+ error = ENOBUFS;
+ goto drop;
+ }
+ bcopy(sc->gre_hdr, mtod(m, void *), hlen);
+ GRE_RUNLOCK(sc);
+ switch (oaf) {
+#ifdef INET
+ case AF_INET:
+ olen = sizeof(struct ip);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ olen = sizeof(struct ip6_hdr);
+ break;
+#endif
+ default:
+ error = ENETDOWN;
+ goto drop;
}
+ gh = (struct grehdr *)mtodo(m, olen);
+ switch (iaf) {
+#ifdef INET
+ case AF_INET:
+ gh->gre_proto = htons(ETHERTYPE_IP);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ gh->gre_proto = htons(ETHERTYPE_IPV6);
+ break;
+#endif
+ default:
+ error = ENETDOWN;
+ goto drop;
+ }
+ if (want_seq)
+ gre_setseqn(gh, oseq);
+ if (want_csum) {
+ *(uint16_t *)gh->gre_opts = in_cksum_skip(m,
+ m->m_pkthdr.len, olen);
+ }
+ plen = m->m_pkthdr.len - hlen;
+ switch (oaf) {
+#ifdef INET
+ case AF_INET:
+ error = in_gre_output(m, iaf, hlen);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gre_output(m, iaf, hlen);
+ break;
+#endif
+ default:
+ m_freem(m);
+ error = ENETDOWN;
+ };
+drop:
+ if (error)
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ else {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+ }
+ return (error);
+}
+
+static void
+gre_qflush(struct ifnet *ifp __unused)
+{
- /* end-around-carry */
- sum = (sum >> 16) + (sum & 0xffff);
- sum += (sum >> 16);
- return (~sum);
}
static int
diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h
index cb2a44b..3a48efe 100644
--- a/sys/net/if_gre.h
+++ b/sys/net/if_gre.h
@@ -1,8 +1,6 @@
-/* $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */
-/* $FreeBSD$ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -28,166 +26,102 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $
+ * $FreeBSD$
*/
-#ifndef _NET_IF_GRE_H
-#define _NET_IF_GRE_H
+#ifndef _NET_IF_GRE_H_
+#define _NET_IF_GRE_H_
-#include <sys/ioccom.h>
#ifdef _KERNEL
-#include <sys/queue.h>
-
-/*
- * Version of the WCCP, need to be configured manually since
- * header for version 2 is the same but IP payload is prepended
- * with additional 4-bytes field.
- */
-typedef enum {
- WCCP_V1 = 0,
- WCCP_V2
-} wccp_ver_t;
-
-struct gre_softc {
- struct ifnet *sc_ifp;
- LIST_ENTRY(gre_softc) sc_list;
- int gre_unit;
- int gre_flags;
- u_int gre_fibnum; /* use this fib for envelopes */
- struct in_addr g_src; /* source address of gre packets */
- struct in_addr g_dst; /* destination address of gre packets */
- struct route route; /* routing entry that determines, where a
- encapsulated packet should go */
- u_char g_proto; /* protocol of encapsulator */
-
- const struct encaptab *encap; /* encapsulation cookie */
-
- uint32_t key; /* key included in outgoing GRE packets */
- /* zero means none */
-
- wccp_ver_t wccp_ver; /* version of the WCCP */
-};
-#define GRE2IFP(sc) ((sc)->sc_ifp)
-
-
-struct gre_h {
- u_int16_t flags; /* GRE flags */
- u_int16_t ptype; /* protocol type of payload typically
- Ether protocol type*/
- uint32_t options[0]; /* optional options */
-/*
- * from here on: fields are optional, presence indicated by flags
- *
- u_int_16 checksum checksum (one-complements of GRE header
- and payload
- Present if (ck_pres | rt_pres == 1).
- Valid if (ck_pres == 1).
- u_int_16 offset offset from start of routing filed to
- first octet of active SRE (see below).
- Present if (ck_pres | rt_pres == 1).
- Valid if (rt_pres == 1).
- u_int_32 key inserted by encapsulator e.g. for
- authentication
- Present if (key_pres ==1 ).
- u_int_32 seq_num Sequence number to allow for packet order
- Present if (seq_pres ==1 ).
- struct gre_sre[] routing Routing fileds (see below)
- Present if (rt_pres == 1)
- */
+/* GRE header according to RFC 2784 and RFC 2890 */
+struct grehdr {
+ uint16_t gre_flags; /* GRE flags */
+#define GRE_FLAGS_CP 0x8000 /* checksum present */
+#define GRE_FLAGS_KP 0x2000 /* key present */
+#define GRE_FLAGS_SP 0x1000 /* sequence present */
+#define GRE_FLAGS_MASK (GRE_FLAGS_CP|GRE_FLAGS_KP|GRE_FLAGS_SP)
+ uint16_t gre_proto; /* protocol type */
+ uint32_t gre_opts[0]; /* optional fields */
} __packed;
+#ifdef INET
struct greip {
- struct ip gi_i;
- struct gre_h gi_g;
+ struct ip gi_ip;
+ struct grehdr gi_gre;
} __packed;
+#endif
-#define gi_pr gi_i.ip_p
-#define gi_len gi_i.ip_len
-#define gi_src gi_i.ip_src
-#define gi_dst gi_i.ip_dst
-#define gi_ptype gi_g.ptype
-#define gi_flags gi_g.flags
-#define gi_options gi_g.options
+#ifdef INET6
+struct greip6 {
+ struct ip6_hdr gi6_ip6;
+ struct grehdr gi6_gre;
+} __packed;
+#endif
-#define GRE_CP 0x8000 /* Checksum Present */
-#define GRE_RP 0x4000 /* Routing Present */
-#define GRE_KP 0x2000 /* Key Present */
-#define GRE_SP 0x1000 /* Sequence Present */
-#define GRE_SS 0x0800 /* Strict Source Route */
+struct gre_softc {
+ struct ifnet *gre_ifp;
+ LIST_ENTRY(gre_softc) gre_list;
+ struct rmlock gre_lock;
+ int gre_family; /* AF of delivery header */
+ uint32_t gre_iseq;
+ uint32_t gre_oseq;
+ uint32_t gre_key;
+ uint32_t gre_options;
+ uint32_t gre_mtu;
+ u_int gre_fibnum;
+ u_int gre_hlen; /* header size */
+ union {
+ void *hdr;
+#ifdef INET
+ struct greip *gihdr;
+#endif
+#ifdef INET6
+ struct greip6 *gi6hdr;
+#endif
+ } gre_uhdr;
+ const struct encaptab *gre_ecookie;
+};
+#define GRE2IFP(sc) ((sc)->gre_ifp)
+#define GRE_LOCK_INIT(sc) rm_init(&(sc)->gre_lock, "gre softc")
+#define GRE_LOCK_DESTROY(sc) rm_destroy(&(sc)->gre_lock)
+#define GRE_RLOCK_TRACKER struct rm_priotracker gre_tracker
+#define GRE_RLOCK(sc) rm_rlock(&(sc)->gre_lock, &gre_tracker)
+#define GRE_RUNLOCK(sc) rm_runlock(&(sc)->gre_lock, &gre_tracker)
+#define GRE_RLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_RLOCKED)
+#define GRE_WLOCK(sc) rm_wlock(&(sc)->gre_lock)
+#define GRE_WUNLOCK(sc) rm_wunlock(&(sc)->gre_lock)
+#define GRE_WLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_WLOCKED)
+
+#define gre_hdr gre_uhdr.hdr
+#define gre_gihdr gre_uhdr.gihdr
+#define gre_gi6hdr gre_uhdr.gi6hdr
+#define gre_oip gre_gihdr->gi_ip
+#define gre_oip6 gre_gi6hdr->gi6_ip6
/*
* CISCO uses special type for GRE tunnel created as part of WCCP
* connection, while in fact those packets are just IPv4 encapsulated
* into GRE.
*/
-#define WCCP_PROTOCOL_TYPE 0x883E
-
-/*
- * gre_sre defines a Source route Entry. These are needed if packets
- * should be routed over more than one tunnel hop by hop
- */
-struct gre_sre {
- u_int16_t sre_family; /* address family */
- u_char sre_offset; /* offset to first octet of active entry */
- u_char sre_length; /* number of octets in the SRE.
- sre_lengthl==0 -> last entry. */
- u_char *sre_rtinfo; /* the routing information */
-};
-
-struct greioctl {
- int unit;
- struct in_addr addr;
-};
-
-/* for mobile encaps */
-
-struct mobile_h {
- u_int16_t proto; /* protocol and S-bit */
- u_int16_t hcrc; /* header checksum */
- u_int32_t odst; /* original destination address */
- u_int32_t osrc; /* original source addr, if S-bit set */
-} __packed;
-
-struct mobip_h {
- struct ip mi;
- struct mobile_h mh;
-} __packed;
-
-
-#define MOB_H_SIZ_S (sizeof(struct mobile_h) - sizeof(u_int32_t))
-#define MOB_H_SIZ_L (sizeof(struct mobile_h))
-#define MOB_H_SBIT 0x0080
-
-#define GRE_TTL 30
-
+#define ETHERTYPE_WCCP 0x883E
#endif /* _KERNEL */
-/*
- * ioctls needed to manipulate the interface
- */
-
#define GRESADDRS _IOW('i', 101, struct ifreq)
#define GRESADDRD _IOW('i', 102, struct ifreq)
#define GREGADDRS _IOWR('i', 103, struct ifreq)
#define GREGADDRD _IOWR('i', 104, struct ifreq)
#define GRESPROTO _IOW('i' , 105, struct ifreq)
#define GREGPROTO _IOWR('i', 106, struct ifreq)
-#define GREGKEY _IOWR('i', 107, struct ifreq)
-#define GRESKEY _IOW('i', 108, struct ifreq)
-#ifdef _KERNEL
-LIST_HEAD(gre_softc_head, gre_softc);
-VNET_DECLARE(struct gre_softc_head, gre_softc_list);
-#define V_gre_softc_list VNET(gre_softc_list)
+#define GREGKEY _IOWR('i', 107, struct ifreq)
+#define GRESKEY _IOW('i', 108, struct ifreq)
+#define GREGOPTS _IOWR('i', 109, struct ifreq)
+#define GRESOPTS _IOW('i', 110, struct ifreq)
-VNET_DECLARE(struct mtx, gre_mtx);
-#define V_gre_mtx VNET(gre_mtx)
-#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
- MTX_DEF)
-#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx)
-#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx)
-#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx)
+#define GRE_ENABLE_CSUM 0x0001
+#define GRE_ENABLE_SEQ 0x0002
+#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
-u_int16_t gre_in_cksum(u_int16_t *, u_int);
-#endif /* _KERNEL */
-
-#endif
+#endif /* _NET_IF_GRE_H_ */
diff --git a/sys/net/if_me.c b/sys/net/if_me.c
new file mode 100644
index 0000000..a00bdd2
--- /dev/null
+++ b/sys/net/if_me.c
@@ -0,0 +1,647 @@
+/*-
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mbuf.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_encap.h>
+
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
+#define MEMTU 1500
+static const char mename[] = "me";
+static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
+static VNET_DEFINE(struct mtx, me_mtx);
+#define V_me_mtx VNET(me_mtx)
+/* Minimal forwarding header RFC 2004 */
+struct mobhdr {
+ uint8_t mob_proto; /* protocol */
+ uint8_t mob_flags; /* flags */
+#define MOB_FLAGS_SP 0x80 /* source present */
+ uint16_t mob_csum; /* header checksum */
+ struct in_addr mob_dst; /* original destination address */
+ struct in_addr mob_src; /* original source addr (optional) */
+} __packed;
+
+struct me_softc {
+ struct ifnet *me_ifp;
+ LIST_ENTRY(me_softc) me_list;
+ struct rmlock me_lock;
+ u_int me_fibnum;
+ const struct encaptab *me_ecookie;
+ struct in_addr me_src;
+ struct in_addr me_dst;
+};
+#define ME2IFP(sc) ((sc)->me_ifp)
+#define ME_READY(sc) ((sc)->me_src.s_addr != 0)
+#define ME_LOCK_INIT(sc) rm_init(&(sc)->me_lock, "me softc")
+#define ME_LOCK_DESTROY(sc) rm_destroy(&(sc)->me_lock)
+#define ME_RLOCK_TRACKER struct rm_priotracker me_tracker
+#define ME_RLOCK(sc) rm_rlock(&(sc)->me_lock, &me_tracker)
+#define ME_RUNLOCK(sc) rm_runlock(&(sc)->me_lock, &me_tracker)
+#define ME_RLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_RLOCKED)
+#define ME_WLOCK(sc) rm_wlock(&(sc)->me_lock)
+#define ME_WUNLOCK(sc) rm_wunlock(&(sc)->me_lock)
+#define ME_WLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_WLOCKED)
+
+#define ME_LIST_LOCK_INIT(x) mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF)
+#define ME_LIST_LOCK_DESTROY(x) mtx_destroy(&V_me_mtx)
+#define ME_LIST_LOCK(x) mtx_lock(&V_me_mtx)
+#define ME_LIST_UNLOCK(x) mtx_unlock(&V_me_mtx)
+
+static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list);
+#define V_me_softc_list VNET(me_softc_list)
+static struct sx me_ioctl_sx;
+SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
+
+static int me_clone_create(struct if_clone *, int, caddr_t);
+static void me_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, me_cloner);
+#define V_me_cloner VNET(me_cloner)
+
+static void me_qflush(struct ifnet *);
+static int me_transmit(struct ifnet *, struct mbuf *);
+static int me_ioctl(struct ifnet *, u_long, caddr_t);
+static int me_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
+static int me_input(struct mbuf **, int *, int);
+
+static int me_set_tunnel(struct ifnet *, struct sockaddr_in *,
+ struct sockaddr_in *);
+static void me_delete_tunnel(struct ifnet *);
+
+SYSCTL_DECL(_net_link);
+static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0,
+ "Minimal Encapsulation for IP (RFC 2004)");
+#ifndef MAX_ME_NEST
+#define MAX_ME_NEST 1
+#endif
+
+static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST;
+#define V_max_me_nesting VNET(max_me_nesting)
+SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
+
+extern struct domain inetdomain;
+static const struct protosw in_mobile_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_MOBILE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = me_input,
+ .pr_output = rip_output,
+ .pr_ctlinput = rip_ctlinput,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
+
+static void
+vnet_me_init(const void *unused __unused)
+{
+ LIST_INIT(&V_me_softc_list);
+ ME_LIST_LOCK_INIT();
+ V_me_cloner = if_clone_simple(mename, me_clone_create,
+ me_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_me_init, NULL);
+
+static void
+vnet_me_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_me_cloner);
+ ME_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_me_uninit, NULL);
+
+static int
+me_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct me_softc *sc;
+
+ sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
+ sc->me_fibnum = curthread->td_proc->p_fibnum;
+ ME2IFP(sc) = if_alloc(IFT_TUNNEL);
+ ME_LOCK_INIT(sc);
+ ME2IFP(sc)->if_softc = sc;
+ if_initname(ME2IFP(sc), mename, unit);
+
+ ME2IFP(sc)->if_mtu = MEMTU - sizeof(struct mobhdr);
+ ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
+ ME2IFP(sc)->if_output = me_output;
+ ME2IFP(sc)->if_ioctl = me_ioctl;
+ ME2IFP(sc)->if_transmit = me_transmit;
+ ME2IFP(sc)->if_qflush = me_qflush;
+ if_attach(ME2IFP(sc));
+ bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
+ ME_LIST_LOCK();
+ LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list);
+ ME_LIST_UNLOCK();
+ return (0);
+}
+
+static void
+me_clone_destroy(struct ifnet *ifp)
+{
+ struct me_softc *sc;
+
+ sx_xlock(&me_ioctl_sx);
+ sc = ifp->if_softc;
+ me_delete_tunnel(ifp);
+ ME_LIST_LOCK();
+ LIST_REMOVE(sc, me_list);
+ ME_LIST_UNLOCK();
+ bpfdetach(ifp);
+ if_detach(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&me_ioctl_sx);
+
+ if_free(ifp);
+ ME_LOCK_DESTROY(sc);
+ free(sc, M_IFME);
+}
+
+static int
+me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ ME_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr_in *src, *dst;
+ struct me_softc *sc;
+ int error;
+
+ switch (cmd) {
+ case SIOCSIFMTU:
+ if (ifr->ifr_mtu < 576)
+ return (EINVAL);
+ ifp->if_mtu = ifr->ifr_mtu - sizeof(struct mobhdr);
+ return (0);
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ return (0);
+ }
+ sx_xlock(&me_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
+ goto end;
+ }
+ error = 0;
+ switch (cmd) {
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr_in *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr_in *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
+ if (src->sin_family != dst->sin_family ||
+ src->sin_family != AF_INET ||
+ src->sin_len != dst->sin_len ||
+ src->sin_len != sizeof(struct sockaddr_in)) {
+ error = EINVAL;
+ break;
+ }
+ if (src->sin_addr.s_addr == INADDR_ANY ||
+ dst->sin_addr.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ error = me_set_tunnel(ifp, src, dst);
+ break;
+ case SIOCDIFPHYADDR:
+ me_delete_tunnel(ifp);
+ break;
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ ME_RLOCK(sc);
+ if (!ME_READY(sc)) {
+ error = EADDRNOTAVAIL;
+ ME_RUNLOCK(sc);
+ break;
+ }
+ src = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(src, 0, sizeof(*src));
+ src->sin_family = AF_INET;
+ src->sin_len = sizeof(*src);
+ switch (cmd) {
+ case SIOCGIFPSRCADDR:
+ src->sin_addr = sc->me_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ src->sin_addr = sc->me_dst;
+ break;
+ }
+ ME_RUNLOCK(sc);
+ error = prison_if(curthread->td_ucred, sintosa(src));
+ if (error != 0)
+ memset(src, 0, sizeof(*src));
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+end:
+ sx_xunlock(&me_ioctl_sx);
+ return (error);
+}
+
+static int
+me_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+{
+ ME_RLOCK_TRACKER;
+ struct me_softc *sc;
+ struct ip *ip;
+ int ret;
+
+ sc = (struct me_softc *)arg;
+ if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
+
+ M_ASSERTPKTHDR(m);
+
+ if (m->m_pkthdr.len < sizeof(struct ip) + sizeof(struct mobhdr) -
+ sizeof(struct in_addr))
+ return (0);
+
+ ret = 0;
+ ME_RLOCK(sc);
+ if (ME_READY(sc)) {
+ ip = mtod(m, struct ip *);
+ if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
+ sc->me_dst.s_addr == ip->ip_src.s_addr)
+ ret = 32 * 2;
+ }
+ ME_RUNLOCK(sc);
+ return (ret);
+}
+
+static int
+me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src,
+ struct sockaddr_in *dst)
+{
+ struct me_softc *sc, *tsc;
+
+ sx_assert(&me_ioctl_sx, SA_XLOCKED);
+ ME_LIST_LOCK();
+ sc = ifp->if_softc;
+ LIST_FOREACH(tsc, &V_me_softc_list, me_list) {
+ if (tsc == sc || !ME_READY(tsc))
+ continue;
+ if (tsc->me_src.s_addr == src->sin_addr.s_addr &&
+ tsc->me_dst.s_addr == dst->sin_addr.s_addr) {
+ ME_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+ }
+ ME_LIST_UNLOCK();
+
+ ME_WLOCK(sc);
+ sc->me_dst = dst->sin_addr;
+ sc->me_src = src->sin_addr;
+ ME_WUNLOCK(sc);
+
+ if (sc->me_ecookie == NULL)
+ sc->me_ecookie = encap_attach_func(AF_INET, IPPROTO_MOBILE,
+ me_encapcheck, &in_mobile_protosw, sc);
+ if (sc->me_ecookie != NULL)
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ return (0);
+}
+
+static void
+me_delete_tunnel(struct ifnet *ifp)
+{
+ struct me_softc *sc = ifp->if_softc;
+
+ sx_assert(&me_ioctl_sx, SA_XLOCKED);
+ if (sc->me_ecookie != NULL)
+ encap_detach(sc->me_ecookie);
+ sc->me_ecookie = NULL;
+ ME_WLOCK(sc);
+ sc->me_src.s_addr = 0;
+ sc->me_dst.s_addr = 0;
+ ME_WUNLOCK(sc);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+}
+
+static uint16_t
+me_in_cksum(uint16_t *p, int nwords)
+{
+ uint32_t sum = 0;
+
+ while (nwords-- > 0)
+ sum += *p++;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16);
+ return (~sum);
+}
+
+int
+me_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct me_softc *sc;
+ struct mobhdr *mh;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct ip *ip;
+ int hlen;
+
+ m = *mp;
+ sc = encap_getarg(m);
+ KASSERT(sc != NULL, ("encap_getarg returned NULL"));
+
+ ifp = ME2IFP(sc);
+ /* checks for short packets */
+ hlen = sizeof(struct mobhdr);
+ if (m->m_pkthdr.len < sizeof(struct ip) + hlen)
+ hlen -= sizeof(struct in_addr);
+ if (m->m_len < sizeof(struct ip) + hlen)
+ m = m_pullup(m, sizeof(struct ip) + hlen);
+ if (m == NULL)
+ goto drop;
+ mh = (struct mobhdr *)mtodo(m, sizeof(struct ip));
+ /* check for wrong flags */
+ if (mh->mob_flags & (~MOB_FLAGS_SP)) {
+ m_freem(m);
+ goto drop;
+ }
+ if (mh->mob_flags) {
+ if (hlen != sizeof(struct mobhdr)) {
+ m_freem(m);
+ goto drop;
+ }
+ } else
+ hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
+ /* check mobile header checksum */
+ if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) {
+ m_freem(m);
+ goto drop;
+ }
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+ ip = mtod(m, struct ip *);
+ ip->ip_dst = mh->mob_dst;
+ ip->ip_p = mh->mob_proto;
+ ip->ip_sum = 0;
+ ip->ip_len = htons(m->m_pkthdr.len - hlen);
+ if (mh->mob_flags)
+ ip->ip_src = mh->mob_src;
+ memmove(mtodo(m, hlen), ip, sizeof(struct ip));
+ m_adj(m, hlen);
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
+ M_SETFIB(m, sc->me_fibnum);
+ hlen = AF_INET;
+ BPF_MTAP2(ifp, &hlen, sizeof(hlen), m);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if ((ifp->if_flags & IFF_MONITOR) != 0)
+ m_freem(m);
+ else
+ netisr_dispatch(NETISR_IP, m);
+ return (IPPROTO_DONE);
+drop:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ return (IPPROTO_DONE);
+}
+
+#define MTAG_ME 1414491977
+static int
+me_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ int count;
+
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_ME, 0, NULL)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > V_max_me_nesting) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ ifp->if_xname, count);
+ return (EIO);
+ }
+ mtag = m_tag_alloc(MTAG_ME, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
+
+static int
+me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
+ int error;
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error != 0)
+ goto drop;
+#endif
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0) {
+ error = ENETDOWN;
+ goto drop;
+ }
+
+ error = me_check_nesting(ifp, m);
+ if (error != 0)
+ goto drop;
+
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+ if (af != AF_INET) {
+ error = EAFNOSUPPORT;
+ goto drop;
+ }
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ return (ifp->if_transmit(ifp, m));
+drop:
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (error);
+}
+
+static int
+me_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ ME_RLOCK_TRACKER;
+ struct mobhdr mh;
+ struct me_softc *sc;
+ struct ip *ip;
+ int error, hlen, plen;
+
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ if (m->m_len < sizeof(struct ip))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto drop;
+ }
+ ip = mtod(m, struct ip *);
+ /* Fragmented datagramms shouldn't be encapsulated */
+ if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
+ error = EINVAL;
+ m_freem(m);
+ goto drop;
+ }
+ mh.mob_proto = ip->ip_p;
+ mh.mob_src = ip->ip_src;
+ mh.mob_dst = ip->ip_dst;
+ ME_RLOCK(sc);
+ if (!ME_READY(sc)) {
+ ME_RUNLOCK(sc);
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ if (in_hosteq(sc->me_src, ip->ip_src)) {
+ hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
+ mh.mob_flags = 0;
+ } else {
+ hlen = sizeof(struct mobhdr);
+ mh.mob_flags = MOB_FLAGS_SP;
+ }
+ plen = m->m_pkthdr.len;
+ ip->ip_src = sc->me_src;
+ ip->ip_dst = sc->me_dst;
+ M_SETFIB(m, sc->me_fibnum);
+ ME_RUNLOCK(sc);
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto drop;
+ }
+ if (m->m_len < sizeof(struct ip) + hlen)
+ m = m_pullup(m, sizeof(struct ip) + hlen);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto drop;
+ }
+ memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip));
+ ip = mtod(m, struct ip *);
+ ip->ip_len = htons(m->m_pkthdr.len);
+ ip->ip_p = IPPROTO_MOBILE;
+ ip->ip_sum = 0;
+ mh.mob_csum = 0;
+ mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t));
+ bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen);
+ error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
+drop:
+ if (error)
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ else {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+ }
+ return (error);
+}
+
+static void
+me_qflush(struct ifnet *ifp __unused)
+{
+
+}
+
+static int
+memodevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t me_mod = {
+ "if_me",
+ memodevent,
+ 0
+};
+
+DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_me, 1);
OpenPOWER on IntegriCloud