summaryrefslogtreecommitdiffstats
path: root/sys/net
diff options
context:
space:
mode:
authorae <ae@FreeBSD.org>2014-11-07 19:13:19 +0000
committerae <ae@FreeBSD.org>2014-11-07 19:13:19 +0000
commit7144dc8bc249ce3b8a458f377cd5f00ce98d91b9 (patch)
tree63c3d4ed6b0c4afb2640b0e3d10ed1b045f2d63f /sys/net
parent6306f795601112ca48fd1c75441b82ff98ca8b57 (diff)
downloadFreeBSD-src-7144dc8bc249ce3b8a458f377cd5f00ce98d91b9.zip
FreeBSD-src-7144dc8bc249ce3b8a458f377cd5f00ce98d91b9.tar.gz
Overhaul if_gre(4).
Split it into two modules: if_gre(4) for GRE encapsulation and if_me(4) for minimal encapsulation within IP. gre(4) changes: * convert to if_transmit; * rework locking: protect access to softc with rmlock, protect from concurrent ioctls with sx lock; * correct interface accounting for outgoing datagramms (count only payload size); * implement generic support for using IPv6 as delivery header; * make implementation conform to the RFC 2784 and partially to RFC 2890; * add support for GRE checksums - calculate for outgoing datagramms and check for inconming datagramms; * add support for sending sequence number in GRE header; * remove support of cached routes. This fixes problem, when gre(4) doesn't work at system startup. But this also removes support for having tunnels with the same addresses for inner and outer header. * deprecate support for various GREXXX ioctls, that doesn't used in FreeBSD. Use our standard ioctls for tunnels. me(4): * implementation conform to RFC 2004; * use if_transmit; * use the same locking model as gre(4); PR: 164475 Differential Revision: D1023 No objections from: net@ Relnotes: yes Sponsored by: Yandex LLC
Diffstat (limited to 'sys/net')
-rw-r--r--sys/net/if_gre.c1369
-rw-r--r--sys/net/if_gre.h212
-rw-r--r--sys/net/if_me.c647
3 files changed, 1422 insertions, 806 deletions
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index 8954f5c..4953348 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -1,8 +1,6 @@
-/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
-/* $FreeBSD$ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -30,16 +28,12 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
*/
-/*
- * Encapsulate L3 protocols into IP
- * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
- * If_gre is compatible with Cisco GRE tunnels, so you can
- * have a NetBSD box as the other end of a tunnel interface of a Cisco
- * router. See gre(4) for more details.
- * Also supported: IP in IP encaps (proto 55) as of RFC 2004
- */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -47,6 +41,7 @@
#include <sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/module.h>
@@ -54,9 +49,12 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
+#include <sys/sx.h>
#include <sys/sysctl.h>
+#include <sys/syslog.h>
#include <sys/systm.h>
#include <net/ethernet.h>
@@ -64,85 +62,71 @@
#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
-#include <net/route.h>
+#include <net/netisr.h>
#include <net/vnet.h>
-#ifdef INET
#include <netinet/in.h>
+#ifdef INET
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
-#include <netinet/ip_gre.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_encap.h>
-#else
-#error "Huh? if_gre without inet?"
#endif
-#include <net/bpf.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
+#endif
+#include <netinet/ip_encap.h>
+#include <net/bpf.h>
#include <net/if_gre.h>
-/*
- * It is not easy to calculate the right value for a GRE MTU.
- * We leave this task to the admin and use the same default that
- * other vendors use.
- */
-#define GREMTU 1476
-
-#define MTAG_COOKIE_GRE 1307983903
-#define MTAG_GRE_NESTING 1
-struct mtag_gre_nesting {
- uint16_t count;
- uint16_t max;
- struct ifnet *ifp[];
-};
-
-/*
- * gre_mtx protects all global variables in if_gre.c.
- * XXX: gre_softc data not protected yet.
- */
-VNET_DEFINE(struct mtx, gre_mtx);
-VNET_DEFINE(struct gre_softc_head, gre_softc_list);
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+#define GREMTU 1500
static const char grename[] = "gre";
static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
+static VNET_DEFINE(struct mtx, gre_mtx);
+#define V_gre_mtx VNET(gre_mtx)
+#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
+ MTX_DEF)
+#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx)
+#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx)
+#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx)
+
+static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list);
+#define V_gre_softc_list VNET(gre_softc_list)
+static struct sx gre_ioctl_sx;
+SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
static int gre_clone_create(struct if_clone *, int, caddr_t);
static void gre_clone_destroy(struct ifnet *);
static VNET_DEFINE(struct if_clone *, gre_cloner);
#define V_gre_cloner VNET(gre_cloner)
+static void gre_qflush(struct ifnet *);
+static int gre_transmit(struct ifnet *, struct mbuf *);
static int gre_ioctl(struct ifnet *, u_long, caddr_t);
static int gre_output(struct ifnet *, struct mbuf *,
const struct sockaddr *, struct route *);
-static int gre_compute_route(struct gre_softc *sc);
+static void gre_updatehdr(struct gre_softc *);
+static int gre_set_tunnel(struct ifnet *, struct sockaddr *,
+ struct sockaddr *);
+static void gre_delete_tunnel(struct ifnet *);
+int gre_input(struct mbuf **, int *, int);
#ifdef INET
-extern struct domain inetdomain;
-static const struct protosw in_gre_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_GRE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_input,
- .pr_output = rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-static const struct protosw in_mobile_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_MOBILE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_mobile_input,
- .pr_output = rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
+extern int in_gre_attach(struct gre_softc *);
+extern int in_gre_output(struct mbuf *, int, int);
+#endif
+#ifdef INET6
+extern int in6_gre_attach(struct gre_softc *);
+extern int in6_gre_output(struct mbuf *, int, int);
#endif
SYSCTL_DECL(_net_link);
@@ -159,6 +143,7 @@ static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
*/
#define MAX_GRE_NEST 1
#endif
+
static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST;
#define V_max_gre_nesting VNET(max_gre_nesting)
SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
@@ -191,34 +176,22 @@ gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
struct gre_softc *sc;
sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
-
+ sc->gre_fibnum = curthread->td_proc->p_fibnum;
GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
- if (GRE2IFP(sc) == NULL) {
- free(sc, M_GRE);
- return (ENOSPC);
- }
-
+ GRE_LOCK_INIT(sc);
GRE2IFP(sc)->if_softc = sc;
if_initname(GRE2IFP(sc), grename, unit);
- GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
- GRE2IFP(sc)->if_addrlen = 0;
- GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
- GRE2IFP(sc)->if_mtu = GREMTU;
+ GRE2IFP(sc)->if_mtu = sc->gre_mtu = GREMTU;
GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
GRE2IFP(sc)->if_output = gre_output;
GRE2IFP(sc)->if_ioctl = gre_ioctl;
- sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
- sc->g_proto = IPPROTO_GRE;
- GRE2IFP(sc)->if_flags |= IFF_LINK0;
- sc->encap = NULL;
- sc->gre_fibnum = curthread->td_proc->p_fibnum;
- sc->wccp_ver = WCCP_V1;
- sc->key = 0;
+ GRE2IFP(sc)->if_transmit = gre_transmit;
+ GRE2IFP(sc)->if_qflush = gre_qflush;
if_attach(GRE2IFP(sc));
bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
GRE_LIST_LOCK();
- LIST_INSERT_HEAD(&V_gre_softc_list, sc, sc_list);
+ LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list);
GRE_LIST_UNLOCK();
return (0);
}
@@ -226,687 +199,749 @@ gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
static void
gre_clone_destroy(struct ifnet *ifp)
{
- struct gre_softc *sc = ifp->if_softc;
+ struct gre_softc *sc;
+ sx_xlock(&gre_ioctl_sx);
+ sc = ifp->if_softc;
+ gre_delete_tunnel(ifp);
GRE_LIST_LOCK();
- LIST_REMOVE(sc, sc_list);
+ LIST_REMOVE(sc, gre_list);
GRE_LIST_UNLOCK();
-
-#ifdef INET
- if (sc->encap != NULL)
- encap_detach(sc->encap);
-#endif
bpfdetach(ifp);
if_detach(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&gre_ioctl_sx);
+
if_free(ifp);
+ GRE_LOCK_DESTROY(sc);
free(sc, M_GRE);
}
-/*
- * The output routine. Takes a packet and encapsulates it in the protocol
- * given by sc->g_proto. See also RFC 1701 and RFC 2004
- */
static int
-gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
- struct route *ro)
+gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- int error = 0;
- struct gre_softc *sc = ifp->if_softc;
- struct greip *gh;
- struct ip *ip;
- struct m_tag *mtag;
- struct mtag_gre_nesting *gt;
- size_t len;
- u_short gre_ip_id = 0;
- uint8_t gre_ip_tos = 0;
- u_int16_t etype = 0;
- struct mobile_h mob_h;
- u_int32_t af;
- int extra = 0, max;
-
- /*
- * gre may cause infinite recursion calls when misconfigured. High
- * nesting level may cause stack exhaustion. We'll prevent this by
- * detecting loops and by introducing upper limit.
- */
- mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
- if (mtag != NULL) {
- struct ifnet **ifp2;
-
- gt = (struct mtag_gre_nesting *)(mtag + 1);
- gt->count++;
- if (gt->count > min(gt->max, V_max_gre_nesting)) {
- printf("%s: hit maximum recursion limit %u on %s\n",
- __func__, gt->count - 1, ifp->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
-
- ifp2 = gt->ifp;
- for (max = gt->count - 1; max > 0; max--) {
- if (*ifp2 == ifp)
- break;
- ifp2++;
- }
- if (*ifp2 == ifp) {
- printf("%s: detected loop with nexting %u on %s\n",
- __func__, gt->count-1, ifp->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
- *ifp2 = ifp;
+ GRE_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr *src, *dst;
+ struct gre_softc *sc;
+#ifdef INET
+ struct sockaddr_in *sin = NULL;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6 = NULL;
+#endif
+ uint32_t opt;
+ int error;
- } else {
- /*
- * Given that people should NOT increase max_gre_nesting beyond
- * their real needs, we allocate once per packet rather than
- * allocating an mtag once per passing through gre.
- *
- * Note: the sysctl does not actually check for saneness, so we
- * limit the maximum numbers of possible recursions here.
- */
- max = imin(V_max_gre_nesting, 256);
- /* If someone sets the sysctl <= 0, we want at least 1. */
- max = imax(max, 1);
- len = sizeof(struct mtag_gre_nesting) +
- max * sizeof(struct ifnet *);
- mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
- M_NOWAIT);
- if (mtag == NULL) {
- m_freem(m);
- error = ENOMEM;
- goto end;
- }
- gt = (struct mtag_gre_nesting *)(mtag + 1);
- bzero(gt, len);
- gt->count = 1;
- gt->max = max;
- *gt->ifp = ifp;
- m_tag_prepend(m, mtag);
+ switch (cmd) {
+ case SIOCSIFMTU:
+ /* XXX: */
+ if (ifr->ifr_mtu < 576)
+ return (EINVAL);
+ break;
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ return (0);
+ case GRESADDRS:
+ case GRESADDRD:
+ case GREGADDRS:
+ case GREGADDRD:
+ case GRESPROTO:
+ case GREGPROTO:
+ return (EOPNOTSUPP);
}
-
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
- sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
- m_freem(m);
- error = ENETDOWN;
+ src = dst = NULL;
+ sx_xlock(&gre_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
goto end;
}
-
- gh = NULL;
- ip = NULL;
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC)
- bcopy(dst->sa_data, &af, sizeof(af));
- else
- af = dst->sa_family;
-
- if (bpf_peers_present(ifp->if_bpf))
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
-
- if ((ifp->if_flags & IFF_MONITOR) != 0) {
- m_freem(m);
- error = ENETDOWN;
+ error = 0;
+ switch (cmd) {
+ case SIOCSIFMTU:
+ GRE_WLOCK(sc);
+ sc->gre_mtu = ifr->ifr_mtu;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
goto end;
- }
-
- m->m_flags &= ~(M_BCAST|M_MCAST);
-
- if (sc->g_proto == IPPROTO_MOBILE) {
- if (af == AF_INET) {
- struct mbuf *m0;
- int msiz;
-
- ip = mtod(m, struct ip *);
-
- /*
- * RFC2004 specifies that fragmented diagrams shouldn't
- * be encapsulated.
- */
- if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- m_freem(m);
- error = EINVAL; /* is there better errno? */
- goto end;
- }
- memset(&mob_h, 0, MOB_H_SIZ_L);
- mob_h.proto = (ip->ip_p) << 8;
- mob_h.odst = ip->ip_dst.s_addr;
- ip->ip_dst.s_addr = sc->g_dst.s_addr;
-
- /*
- * If the packet comes from our host, we only change
- * the destination address in the IP header.
- * Else we also need to save and change the source
- */
- if (in_hosteq(ip->ip_src, sc->g_src)) {
- msiz = MOB_H_SIZ_S;
- } else {
- mob_h.proto |= MOB_H_SBIT;
- mob_h.osrc = ip->ip_src.s_addr;
- ip->ip_src.s_addr = sc->g_src.s_addr;
- msiz = MOB_H_SIZ_L;
- }
- mob_h.proto = htons(mob_h.proto);
- mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
-
- if ((m->m_data - msiz) < m->m_pktdat) {
- m0 = m_gethdr(M_NOWAIT, MT_DATA);
- if (m0 == NULL) {
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- m_freem(m);
- error = ENOBUFS;
- goto end;
- }
- m0->m_next = m;
- m->m_data += sizeof(struct ip);
- m->m_len -= sizeof(struct ip);
- m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
- m0->m_len = msiz + sizeof(struct ip);
- m0->m_data += max_linkhdr;
- memcpy(mtod(m0, caddr_t), (caddr_t)ip,
- sizeof(struct ip));
- m = m0;
- } else { /* we have some space left in the old one */
- m->m_data -= msiz;
- m->m_len += msiz;
- m->m_pkthdr.len += msiz;
- bcopy(ip, mtod(m, caddr_t),
- sizeof(struct ip));
- }
- ip = mtod(m, struct ip *);
- memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
- ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
- } else { /* AF_INET */
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- m_freem(m);
- error = EINVAL;
- goto end;
- }
- } else if (sc->g_proto == IPPROTO_GRE) {
- switch (af) {
- case AF_INET:
- ip = mtod(m, struct ip *);
- gre_ip_tos = ip->ip_tos;
- gre_ip_id = ip->ip_id;
- if (sc->wccp_ver == WCCP_V2) {
- extra = sizeof(uint32_t);
- etype = WCCP_PROTOCOL_TYPE;
- } else {
- etype = ETHERTYPE_IP;
- }
+ case SIOCSIFPHYADDR:
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+#endif
+ error = EINVAL;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
break;
+#endif
#ifdef INET6
- case AF_INET6:
- gre_ip_id = ip_newid();
- etype = ETHERTYPE_IPV6;
+ case SIOCSIFPHYADDR_IN6:
+ src = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_dstaddr);
break;
#endif
default:
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- m_freem(m);
error = EAFNOSUPPORT;
goto end;
}
-
- /* Reserve space for GRE header + optional GRE key */
- int hdrlen = sizeof(struct greip) + extra;
- if (sc->key)
- hdrlen += sizeof(uint32_t);
- M_PREPEND(m, hdrlen, M_NOWAIT);
- } else {
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- m_freem(m);
- error = EINVAL;
- goto end;
- }
-
- if (m == NULL) { /* mbuf allocation failed */
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- error = ENOBUFS;
- goto end;
- }
-
- M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
-
- gh = mtod(m, struct greip *);
- if (sc->g_proto == IPPROTO_GRE) {
- uint32_t *options = gh->gi_options;
-
- memset((void *)gh, 0, sizeof(struct greip) + extra);
- gh->gi_ptype = htons(etype);
- gh->gi_flags = 0;
-
- /* Add key option */
- if (sc->key)
- {
- gh->gi_flags |= htons(GRE_KP);
- *(options++) = htonl(sc->key);
- }
- }
-
- gh->gi_pr = sc->g_proto;
- if (sc->g_proto != IPPROTO_MOBILE) {
- gh->gi_src = sc->g_src;
- gh->gi_dst = sc->g_dst;
- ((struct ip*)gh)->ip_v = IPPROTO_IPV4;
- ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
- ((struct ip*)gh)->ip_ttl = GRE_TTL;
- ((struct ip*)gh)->ip_tos = gre_ip_tos;
- ((struct ip*)gh)->ip_id = gre_ip_id;
- gh->gi_len = htons(m->m_pkthdr.len);
- }
-
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
- if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
- /*
- * Send it off and with IP_FORWARD flag to prevent it from
- * overwriting the ip_id again. ip_id is already set to the
- * ip_id of the encapsulated packet.
- */
- error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
- (struct ip_moptions *)NULL, (struct inpcb *)NULL);
- end:
- if (error)
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- return (error);
-}
-
-static int
-gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct ifreq *ifr = (struct ifreq *)data;
- struct in_aliasreq *aifr = (struct in_aliasreq *)data;
- struct gre_softc *sc = ifp->if_softc;
- struct sockaddr_in si;
- struct sockaddr *sa = NULL;
- int error, adj;
- struct sockaddr_in sp, sm, dp, dm;
- uint32_t key;
-
- error = 0;
- adj = 0;
+ /* sa_family must be equal */
+ if (src->sa_family != dst->sa_family ||
+ src->sa_len != dst->sa_len)
+ goto end;
- switch (cmd) {
- case SIOCSIFADDR:
- ifp->if_flags |= IFF_UP;
- break;
- case SIOCSIFFLAGS:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
- break;
- if ((ifr->ifr_flags & IFF_LINK0) != 0)
- sc->g_proto = IPPROTO_GRE;
- else
- sc->g_proto = IPPROTO_MOBILE;
- if ((ifr->ifr_flags & IFF_LINK2) != 0)
- sc->wccp_ver = WCCP_V2;
- else
- sc->wccp_ver = WCCP_V1;
- goto recompute;
- case SIOCSIFMTU:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
- break;
- if (ifr->ifr_mtu < 576) {
- error = EINVAL;
- break;
- }
- ifp->if_mtu = ifr->ifr_mtu;
- break;
- case SIOCGIFMTU:
- ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
- break;
- case SIOCADDMULTI:
- /*
- * XXXRW: Isn't this priv_checkr() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
- break;
- if (ifr == 0) {
- error = EAFNOSUPPORT;
- break;
- }
- switch (ifr->ifr_addr.sa_family) {
+ /* validate sa_len */
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
+ if (src->sa_len != sizeof(struct sockaddr_in))
+ goto end;
break;
#endif
#ifdef INET6
case AF_INET6:
+ if (src->sa_len != sizeof(struct sockaddr_in6))
+ goto end;
break;
#endif
default:
error = EAFNOSUPPORT;
- break;
+ goto end;
}
- break;
- case SIOCDELMULTI:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
- break;
- if (ifr == 0) {
- error = EAFNOSUPPORT;
- break;
+ /* check sa_family looks sane for the cmd */
+ error = EAFNOSUPPORT;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ if (src->sa_family == AF_INET)
+ break;
+ goto end;
+#endif
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ if (src->sa_family == AF_INET6)
+ break;
+ goto end;
+#endif
}
- switch (ifr->ifr_addr.sa_family) {
+ error = EADDRNOTAVAIL;
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
+ if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+ satosin(dst)->sin_addr.s_addr == INADDR_ANY)
+ goto end;
break;
#endif
#ifdef INET6
case AF_INET6:
- break;
-#endif
- default:
- error = EAFNOSUPPORT;
- break;
- }
- break;
- case GRESPROTO:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
- break;
- sc->g_proto = ifr->ifr_flags;
- switch (sc->g_proto) {
- case IPPROTO_GRE:
- ifp->if_flags |= IFF_LINK0;
- break;
- case IPPROTO_MOBILE:
- ifp->if_flags &= ~IFF_LINK0;
- break;
- default:
- error = EPROTONOSUPPORT;
- break;
- }
- goto recompute;
- case GREGPROTO:
- ifr->ifr_flags = sc->g_proto;
- break;
- case GRESADDRS:
- case GRESADDRD:
- error = priv_check(curthread, PRIV_NET_GRE);
- if (error)
- return (error);
- /*
- * set tunnel endpoints, compute a less specific route
- * to the remote end and mark if as up
- */
- sa = &ifr->ifr_addr;
- if (cmd == GRESADDRS)
- sc->g_src = (satosin(sa))->sin_addr;
- if (cmd == GRESADDRD)
- sc->g_dst = (satosin(sa))->sin_addr;
- recompute:
-#ifdef INET
- if (sc->encap != NULL) {
- encap_detach(sc->encap);
- sc->encap = NULL;
- }
+ if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+ ||
+ IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+ goto end;
+ /*
+ * Check validity of the scope zone ID of the
+ * addresses, and convert it into the kernel
+ * internal form if necessary.
+ */
+ error = sa6_embedscope(satosin6(src), 0);
+ if (error != 0)
+ goto end;
+ error = sa6_embedscope(satosin6(dst), 0);
+ if (error != 0)
+ goto end;
#endif
- if ((sc->g_src.s_addr != INADDR_ANY) &&
- (sc->g_dst.s_addr != INADDR_ANY)) {
- bzero(&sp, sizeof(sp));
- bzero(&sm, sizeof(sm));
- bzero(&dp, sizeof(dp));
- bzero(&dm, sizeof(dm));
- sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
- sizeof(struct sockaddr_in);
- sp.sin_family = sm.sin_family = dp.sin_family =
- dm.sin_family = AF_INET;
- sp.sin_addr = sc->g_src;
- dp.sin_addr = sc->g_dst;
- sm.sin_addr.s_addr = dm.sin_addr.s_addr =
- INADDR_BROADCAST;
-#ifdef INET
- sc->encap = encap_attach(AF_INET, sc->g_proto,
- sintosa(&sp), sintosa(&sm), sintosa(&dp),
- sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
- &in_gre_protosw : &in_mobile_protosw, sc);
- if (sc->encap == NULL)
- printf("%s: unable to attach encap\n",
- if_name(GRE2IFP(sc)));
-#endif
- if (sc->route.ro_rt != 0) /* free old route */
- RTFREE(sc->route.ro_rt);
- if (gre_compute_route(sc) == 0)
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- else
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- }
- break;
- case GREGADDRS:
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- sa = sintosa(&si);
- error = prison_if(curthread->td_ucred, sa);
- if (error != 0)
- break;
- ifr->ifr_addr = *sa;
- break;
- case GREGADDRD:
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- sa = sintosa(&si);
- error = prison_if(curthread->td_ucred, sa);
- if (error != 0)
- break;
- ifr->ifr_addr = *sa;
+ };
+ error = gre_set_tunnel(ifp, src, dst);
break;
- case SIOCSIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- if (aifr->ifra_addr.sin_family != AF_INET ||
- aifr->ifra_dstaddr.sin_family != AF_INET) {
- error = EAFNOSUPPORT;
- break;
- }
- if (aifr->ifra_addr.sin_len != sizeof(si) ||
- aifr->ifra_dstaddr.sin_len != sizeof(si)) {
- error = EINVAL;
- break;
- }
- sc->g_src = aifr->ifra_addr.sin_addr;
- sc->g_dst = aifr->ifra_dstaddr.sin_addr;
- goto recompute;
case SIOCDIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- sc->g_src.s_addr = INADDR_ANY;
- sc->g_dst.s_addr = INADDR_ANY;
- goto recompute;
+ gre_delete_tunnel(ifp);
+ break;
case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
#endif
- if (sc->g_src.s_addr == INADDR_ANY) {
+ if (sc->gre_family == 0) {
error = EADDRNOTAVAIL;
break;
}
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
+ GRE_RLOCK(sc);
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ if (sc->gre_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
break;
- bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
- break;
- case SIOCGIFPDSTADDR:
-#ifdef INET6
- case SIOCGIFPDSTADDR_IN6:
#endif
- if (sc->g_dst.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ if (sc->gre_family != AF_INET6) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin6 = (struct sockaddr_in6 *)
+ &(((struct in6_ifreq *)data)->ifr_addr);
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
break;
+#endif
}
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+ if (error == 0) {
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ sin->sin_addr = sc->gre_oip.ip_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ sin->sin_addr = sc->gre_oip.ip_dst;
+ break;
+#endif
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ sin6->sin6_addr = sc->gre_oip6.ip6_src;
+ break;
+ case SIOCGIFPDSTADDR_IN6:
+ sin6->sin6_addr = sc->gre_oip6.ip6_dst;
+ break;
+#endif
+ }
+ }
+ GRE_RUNLOCK(sc);
if (error != 0)
break;
- bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin);
+ if (error != 0)
+ memset(sin, 0, sizeof(*sin));
+ break;
+#endif
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin6);
+ if (error == 0)
+ error = sa6_recoverscope(sin6);
+ if (error != 0)
+ memset(sin6, 0, sizeof(*sin6));
+#endif
+ }
break;
case GRESKEY:
- error = priv_check(curthread, PRIV_NET_GRE);
- if (error)
- break;
- error = copyin(ifr->ifr_data, &key, sizeof(key));
- if (error)
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
- /* adjust MTU for option header */
- if (key == 0 && sc->key != 0) /* clear */
- adj += sizeof(key);
- else if (key != 0 && sc->key == 0) /* set */
- adj -= sizeof(key);
-
- if (ifp->if_mtu + adj < 576) {
- error = EINVAL;
+ if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
break;
+ if (sc->gre_key != opt) {
+ GRE_WLOCK(sc);
+ sc->gre_key = opt;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
}
- ifp->if_mtu += adj;
- sc->key = key;
break;
case GREGKEY:
- error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key));
+ error = copyout(&sc->gre_key, ifr->ifr_data, sizeof(sc->gre_key));
+ break;
+ case GRESOPTS:
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+ break;
+ if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
+ break;
+ if (opt & ~GRE_OPTMASK)
+ error = EINVAL;
+ else {
+ if (sc->gre_options != opt) {
+ GRE_WLOCK(sc);
+ sc->gre_options = opt;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
+ }
+ }
break;
+ case GREGOPTS:
+ error = copyout(&sc->gre_options, ifr->ifr_data,
+ sizeof(sc->gre_options));
+ break;
default:
error = EINVAL;
break;
}
-
+end:
+ sx_xunlock(&gre_ioctl_sx);
return (error);
}
-/*
- * computes a route to our destination that is not the one
- * which would be taken by ip_output(), as this one will loop back to
- * us. If the interface is p2p as a--->b, then a routing entry exists
- * If we now send a packet to b (e.g. ping b), this will come down here
- * gets src=a, dst=b tacked on and would from ip_output() sent back to
- * if_gre.
- * Goal here is to compute a route to b that is less specific than
- * a-->b. We know that this one exists as in normal operation we have
- * at least a default route which matches.
- */
-static int
-gre_compute_route(struct gre_softc *sc)
+static void
+gre_updatehdr(struct gre_softc *sc)
+{
+ struct grehdr *gh = NULL;
+ uint32_t *opts;
+ uint16_t flags;
+
+ GRE_WLOCK_ASSERT(sc);
+ switch (sc->gre_family) {
+#ifdef INET
+ case AF_INET:
+ sc->gre_hlen = sizeof(struct greip);
+ sc->gre_oip.ip_v = IPPROTO_IPV4;
+ sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
+ sc->gre_oip.ip_p = IPPROTO_GRE;
+ gh = &sc->gre_gihdr->gi_gre;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sc->gre_hlen = sizeof(struct greip6);
+ sc->gre_oip6.ip6_vfc = IPV6_VERSION;
+ sc->gre_oip6.ip6_nxt = IPPROTO_GRE;
+ gh = &sc->gre_gi6hdr->gi6_gre;
+ break;
+#endif
+ default:
+ return;
+ }
+ flags = 0;
+ opts = gh->gre_opts;
+ if (sc->gre_options & GRE_ENABLE_CSUM) {
+ flags |= GRE_FLAGS_CP;
+ sc->gre_hlen += 2 * sizeof(uint16_t);
+ *opts++ = 0;
+ }
+ if (sc->gre_key != 0) {
+ flags |= GRE_FLAGS_KP;
+ sc->gre_hlen += sizeof(uint32_t);
+ *opts++ = htonl(sc->gre_key);
+ }
+ if (sc->gre_options & GRE_ENABLE_SEQ) {
+ flags |= GRE_FLAGS_SP;
+ sc->gre_hlen += sizeof(uint32_t);
+ *opts++ = 0;
+ } else
+ sc->gre_oseq = 0;
+ gh->gre_flags = htons(flags);
+ GRE2IFP(sc)->if_mtu = sc->gre_mtu - sc->gre_hlen;
+}
+
+static void
+gre_detach(struct gre_softc *sc)
{
- struct route *ro;
- ro = &sc->route;
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ if (sc->gre_ecookie != NULL)
+ encap_detach(sc->gre_ecookie);
+ sc->gre_ecookie = NULL;
+}
- memset(ro, 0, sizeof(struct route));
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
- ro->ro_dst.sa_family = AF_INET;
- ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+static int
+gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
+ struct sockaddr *dst)
+{
+ struct gre_softc *sc, *tsc;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+#ifdef INET
+ struct ip *ip;
+#endif
+ void *hdr;
+ int error;
- /*
- * toggle last bit, so our interface is not found, but a less
- * specific route. I'd rather like to specify a shorter mask,
- * but this is not possible. Should work though. XXX
- * XXX MRT Use a different FIB for the tunnel to solve this problem.
- */
- if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
- htonl(0x01);
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ GRE_LIST_LOCK();
+ sc = ifp->if_softc;
+ LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) {
+ if (tsc == sc || tsc->gre_family != src->sa_family)
+ continue;
+#ifdef INET
+ if (tsc->gre_family == AF_INET &&
+ tsc->gre_oip.ip_src.s_addr ==
+ satosin(src)->sin_addr.s_addr &&
+ tsc->gre_oip.ip_dst.s_addr ==
+ satosin(dst)->sin_addr.s_addr) {
+ GRE_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+#endif
+#ifdef INET6
+ if (tsc->gre_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src,
+ &satosin6(src)->sin6_addr) &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst,
+ &satosin6(dst)->sin6_addr)) {
+ GRE_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+#endif
}
+ GRE_LIST_UNLOCK();
-#ifdef DIAGNOSTIC
- printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
- inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ hdr = ip = malloc(sizeof(struct greip) +
+ 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+ ip->ip_src = satosin(src)->sin_addr;
+ ip->ip_dst = satosin(dst)->sin_addr;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ hdr = ip6 = malloc(sizeof(struct greip6) +
+ 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+ ip6->ip6_src = satosin6(src)->sin6_addr;
+ ip6->ip6_dst = satosin6(dst)->sin6_addr;
+ break;
+#endif
+ default:
+ return (EAFNOSUPPORT);
+ }
+ if (sc->gre_family != src->sa_family)
+ gre_detach(sc);
+ GRE_WLOCK(sc);
+ if (sc->gre_family != 0)
+ free(sc->gre_hdr, M_GRE);
+ sc->gre_family = src->sa_family;
+ sc->gre_hdr = hdr;
+ sc->gre_oseq = 0;
+ sc->gre_iseq = UINT32_MAX;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
+
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = in_gre_attach(sc);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gre_attach(sc);
+ break;
#endif
+ }
+ if (error == 0)
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ return (error);
+}
- rtalloc_fib(ro, sc->gre_fibnum);
+static void
+gre_delete_tunnel(struct ifnet *ifp)
+{
+ struct gre_softc *sc = ifp->if_softc;
+ int family;
+
+ GRE_WLOCK(sc);
+ family = sc->gre_family;
+ sc->gre_family = 0;
+ GRE_WUNLOCK(sc);
+ if (family != 0) {
+ gre_detach(sc);
+ free(sc->gre_hdr, M_GRE);
+ }
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+}
+int
+gre_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct gre_softc *sc;
+ struct grehdr *gh;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ uint32_t *opts, key;
+ uint16_t flags;
+ int hlen, isr, af;
+
+ m = *mp;
+ sc = encap_getarg(m);
+ KASSERT(sc != NULL, ("encap_getarg returned NULL"));
+
+ ifp = GRE2IFP(sc);
+ gh = (struct grehdr *)mtodo(m, *offp);
+ flags = ntohs(gh->gre_flags);
+ if (flags & ~GRE_FLAGS_MASK)
+ goto drop;
+ opts = gh->gre_opts;
+ hlen = 2 * sizeof(uint16_t);
+ if (flags & GRE_FLAGS_CP) {
+ /* reserved1 field must be zero */
+ if (((uint16_t *)opts)[1] != 0)
+ goto drop;
+ if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
+ goto drop;
+ hlen += 2 * sizeof(uint16_t);
+ opts++;
+ }
+ if (flags & GRE_FLAGS_KP) {
+ key = ntohl(*opts);
+ hlen += sizeof(uint32_t);
+ opts++;
+ } else
+ key = 0;
/*
- * check if this returned a route at all and this route is no
- * recursion to ourself
- */
- if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
-#ifdef DIAGNOSTIC
- if (ro->ro_rt == NULL)
- printf(" - no route found!\n");
- else
- printf(" - route loops back to ourself!\n");
-#endif
- return EADDRNOTAVAIL;
+ if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
+ goto drop;
+ */
+ if (flags & GRE_FLAGS_SP) {
+ /* seq = ntohl(*opts); */
+ hlen += sizeof(uint32_t);
}
+ switch (ntohs(gh->gre_proto)) {
+ case ETHERTYPE_WCCP:
+ /*
+ * For WCCP skip an additional 4 bytes if after GRE header
+ * doesn't follow an IP header.
+ */
+ if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
+ hlen += sizeof(uint32_t);
+ /* FALLTHROUGH */
+ case ETHERTYPE_IP:
+ isr = NETISR_IP;
+ af = AF_INET;
+ break;
+ case ETHERTYPE_IPV6:
+ isr = NETISR_IPV6;
+ af = AF_INET6;
+ break;
+ default:
+ goto drop;
+ }
+ m_adj(m, *offp + hlen);
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ M_SETFIB(m, sc->gre_fibnum);
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if ((ifp->if_flags & IFF_MONITOR) != 0)
+ m_freem(m);
+ else
+ netisr_dispatch(isr, m);
+ return (IPPROTO_DONE);
+drop:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ return (IPPROTO_DONE);
+}
- /*
- * now change it back - else ip_output will just drop
- * the route and search one to this interface ...
- */
- if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
+#define MTAG_GRE 1307983903
+static int
+gre_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ int count;
+
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_GRE, 0, NULL)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > V_max_gre_nesting) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ ifp->if_xname, count);
+ return (EIO);
+ }
+ mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
+
+static int
+gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
+ int error;
-#ifdef DIAGNOSTIC
- printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
- inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
- printf("\n");
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error != 0)
+ goto drop;
#endif
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0) {
+ error = ENETDOWN;
+ goto drop;
+ }
+
+ error = gre_check_nesting(ifp, m);
+ if (error != 0)
+ goto drop;
- return 0;
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ m->m_pkthdr.csum_data = af; /* save af for if_transmit */
+ return (ifp->if_transmit(ifp, m));
+drop:
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (error);
}
-/*
- * do a checksum of a buffer - much like in_cksum, which operates on
- * mbufs.
- */
-u_int16_t
-gre_in_cksum(u_int16_t *p, u_int len)
+static void
+gre_setseqn(struct grehdr *gh, uint32_t seq)
+{
+ uint32_t *opts;
+ uint16_t flags;
+
+ opts = gh->gre_opts;
+ flags = ntohs(gh->gre_flags);
+ KASSERT((flags & GRE_FLAGS_SP) != 0,
+ ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
+ if (flags & GRE_FLAGS_CP)
+ opts++;
+ if (flags & GRE_FLAGS_KP)
+ opts++;
+ *opts = htonl(seq);
+}
+
+static int
+gre_transmit(struct ifnet *ifp, struct mbuf *m)
{
- u_int32_t sum = 0;
- int nwords = len >> 1;
-
- while (nwords-- != 0)
- sum += *p++;
-
- if (len & 1) {
- union {
- u_short w;
- u_char c[2];
- } u;
- u.c[0] = *(u_char *)p;
- u.c[1] = 0;
- sum += u.w;
+ GRE_RLOCK_TRACKER;
+ struct gre_softc *sc;
+ struct grehdr *gh;
+ uint32_t iaf, oaf, oseq;
+ int error, hlen, olen, plen;
+ int want_seq, want_csum;
+
+ plen = 0;
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ GRE_RLOCK(sc);
+ if (sc->gre_family == 0) {
+ GRE_RUNLOCK(sc);
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ iaf = m->m_pkthdr.csum_data;
+ oaf = sc->gre_family;
+ hlen = sc->gre_hlen;
+ want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0;
+ if (want_seq)
+ oseq = sc->gre_oseq++; /* XXX */
+ want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0;
+ M_SETFIB(m, sc->gre_fibnum);
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ GRE_RUNLOCK(sc);
+ error = ENOBUFS;
+ goto drop;
+ }
+ bcopy(sc->gre_hdr, mtod(m, void *), hlen);
+ GRE_RUNLOCK(sc);
+ switch (oaf) {
+#ifdef INET
+ case AF_INET:
+ olen = sizeof(struct ip);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ olen = sizeof(struct ip6_hdr);
+ break;
+#endif
+ default:
+ error = ENETDOWN;
+ goto drop;
}
+ gh = (struct grehdr *)mtodo(m, olen);
+ switch (iaf) {
+#ifdef INET
+ case AF_INET:
+ gh->gre_proto = htons(ETHERTYPE_IP);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ gh->gre_proto = htons(ETHERTYPE_IPV6);
+ break;
+#endif
+ default:
+ error = ENETDOWN;
+ goto drop;
+ }
+ if (want_seq)
+ gre_setseqn(gh, oseq);
+ if (want_csum) {
+ *(uint16_t *)gh->gre_opts = in_cksum_skip(m,
+ m->m_pkthdr.len, olen);
+ }
+ plen = m->m_pkthdr.len - hlen;
+ switch (oaf) {
+#ifdef INET
+ case AF_INET:
+ error = in_gre_output(m, iaf, hlen);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gre_output(m, iaf, hlen);
+ break;
+#endif
+ default:
+ m_freem(m);
+ error = ENETDOWN;
+ };
+drop:
+ if (error)
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ else {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+ }
+ return (error);
+}
+
+static void
+gre_qflush(struct ifnet *ifp __unused)
+{
- /* end-around-carry */
- sum = (sum >> 16) + (sum & 0xffff);
- sum += (sum >> 16);
- return (~sum);
}
static int
diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h
index cb2a44b..3a48efe 100644
--- a/sys/net/if_gre.h
+++ b/sys/net/if_gre.h
@@ -1,8 +1,6 @@
-/* $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */
-/* $FreeBSD$ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -28,166 +26,102 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $
+ * $FreeBSD$
*/
-#ifndef _NET_IF_GRE_H
-#define _NET_IF_GRE_H
+#ifndef _NET_IF_GRE_H_
+#define _NET_IF_GRE_H_
-#include <sys/ioccom.h>
#ifdef _KERNEL
-#include <sys/queue.h>
-
-/*
- * Version of the WCCP, need to be configured manually since
- * header for version 2 is the same but IP payload is prepended
- * with additional 4-bytes field.
- */
-typedef enum {
- WCCP_V1 = 0,
- WCCP_V2
-} wccp_ver_t;
-
-struct gre_softc {
- struct ifnet *sc_ifp;
- LIST_ENTRY(gre_softc) sc_list;
- int gre_unit;
- int gre_flags;
- u_int gre_fibnum; /* use this fib for envelopes */
- struct in_addr g_src; /* source address of gre packets */
- struct in_addr g_dst; /* destination address of gre packets */
- struct route route; /* routing entry that determines, where a
- encapsulated packet should go */
- u_char g_proto; /* protocol of encapsulator */
-
- const struct encaptab *encap; /* encapsulation cookie */
-
- uint32_t key; /* key included in outgoing GRE packets */
- /* zero means none */
-
- wccp_ver_t wccp_ver; /* version of the WCCP */
-};
-#define GRE2IFP(sc) ((sc)->sc_ifp)
-
-
-struct gre_h {
- u_int16_t flags; /* GRE flags */
- u_int16_t ptype; /* protocol type of payload typically
- Ether protocol type*/
- uint32_t options[0]; /* optional options */
-/*
- * from here on: fields are optional, presence indicated by flags
- *
- u_int_16 checksum checksum (one-complements of GRE header
- and payload
- Present if (ck_pres | rt_pres == 1).
- Valid if (ck_pres == 1).
- u_int_16 offset offset from start of routing filed to
- first octet of active SRE (see below).
- Present if (ck_pres | rt_pres == 1).
- Valid if (rt_pres == 1).
- u_int_32 key inserted by encapsulator e.g. for
- authentication
- Present if (key_pres ==1 ).
- u_int_32 seq_num Sequence number to allow for packet order
- Present if (seq_pres ==1 ).
- struct gre_sre[] routing Routing fileds (see below)
- Present if (rt_pres == 1)
- */
+/* GRE header according to RFC 2784 and RFC 2890 */
+struct grehdr {
+ uint16_t gre_flags; /* GRE flags */
+#define GRE_FLAGS_CP 0x8000 /* checksum present */
+#define GRE_FLAGS_KP 0x2000 /* key present */
+#define GRE_FLAGS_SP 0x1000 /* sequence present */
+#define GRE_FLAGS_MASK (GRE_FLAGS_CP|GRE_FLAGS_KP|GRE_FLAGS_SP)
+ uint16_t gre_proto; /* protocol type */
+ uint32_t gre_opts[0]; /* optional fields */
} __packed;
+#ifdef INET
struct greip {
- struct ip gi_i;
- struct gre_h gi_g;
+ struct ip gi_ip;
+ struct grehdr gi_gre;
} __packed;
+#endif
-#define gi_pr gi_i.ip_p
-#define gi_len gi_i.ip_len
-#define gi_src gi_i.ip_src
-#define gi_dst gi_i.ip_dst
-#define gi_ptype gi_g.ptype
-#define gi_flags gi_g.flags
-#define gi_options gi_g.options
+#ifdef INET6
+struct greip6 {
+ struct ip6_hdr gi6_ip6;
+ struct grehdr gi6_gre;
+} __packed;
+#endif
-#define GRE_CP 0x8000 /* Checksum Present */
-#define GRE_RP 0x4000 /* Routing Present */
-#define GRE_KP 0x2000 /* Key Present */
-#define GRE_SP 0x1000 /* Sequence Present */
-#define GRE_SS 0x0800 /* Strict Source Route */
+struct gre_softc {
+ struct ifnet *gre_ifp;
+ LIST_ENTRY(gre_softc) gre_list;
+ struct rmlock gre_lock;
+ int gre_family; /* AF of delivery header */
+ uint32_t gre_iseq;
+ uint32_t gre_oseq;
+ uint32_t gre_key;
+ uint32_t gre_options;
+ uint32_t gre_mtu;
+ u_int gre_fibnum;
+ u_int gre_hlen; /* header size */
+ union {
+ void *hdr;
+#ifdef INET
+ struct greip *gihdr;
+#endif
+#ifdef INET6
+ struct greip6 *gi6hdr;
+#endif
+ } gre_uhdr;
+ const struct encaptab *gre_ecookie;
+};
+#define GRE2IFP(sc) ((sc)->gre_ifp)
+#define GRE_LOCK_INIT(sc) rm_init(&(sc)->gre_lock, "gre softc")
+#define GRE_LOCK_DESTROY(sc) rm_destroy(&(sc)->gre_lock)
+#define GRE_RLOCK_TRACKER struct rm_priotracker gre_tracker
+#define GRE_RLOCK(sc) rm_rlock(&(sc)->gre_lock, &gre_tracker)
+#define GRE_RUNLOCK(sc) rm_runlock(&(sc)->gre_lock, &gre_tracker)
+#define GRE_RLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_RLOCKED)
+#define GRE_WLOCK(sc) rm_wlock(&(sc)->gre_lock)
+#define GRE_WUNLOCK(sc) rm_wunlock(&(sc)->gre_lock)
+#define GRE_WLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_WLOCKED)
+
+#define gre_hdr gre_uhdr.hdr
+#define gre_gihdr gre_uhdr.gihdr
+#define gre_gi6hdr gre_uhdr.gi6hdr
+#define gre_oip gre_gihdr->gi_ip
+#define gre_oip6 gre_gi6hdr->gi6_ip6
/*
* CISCO uses special type for GRE tunnel created as part of WCCP
* connection, while in fact those packets are just IPv4 encapsulated
* into GRE.
*/
-#define WCCP_PROTOCOL_TYPE 0x883E
-
-/*
- * gre_sre defines a Source route Entry. These are needed if packets
- * should be routed over more than one tunnel hop by hop
- */
-struct gre_sre {
- u_int16_t sre_family; /* address family */
- u_char sre_offset; /* offset to first octet of active entry */
- u_char sre_length; /* number of octets in the SRE.
- sre_lengthl==0 -> last entry. */
- u_char *sre_rtinfo; /* the routing information */
-};
-
-struct greioctl {
- int unit;
- struct in_addr addr;
-};
-
-/* for mobile encaps */
-
-struct mobile_h {
- u_int16_t proto; /* protocol and S-bit */
- u_int16_t hcrc; /* header checksum */
- u_int32_t odst; /* original destination address */
- u_int32_t osrc; /* original source addr, if S-bit set */
-} __packed;
-
-struct mobip_h {
- struct ip mi;
- struct mobile_h mh;
-} __packed;
-
-
-#define MOB_H_SIZ_S (sizeof(struct mobile_h) - sizeof(u_int32_t))
-#define MOB_H_SIZ_L (sizeof(struct mobile_h))
-#define MOB_H_SBIT 0x0080
-
-#define GRE_TTL 30
-
+#define ETHERTYPE_WCCP 0x883E
#endif /* _KERNEL */
-/*
- * ioctls needed to manipulate the interface
- */
-
#define GRESADDRS _IOW('i', 101, struct ifreq)
#define GRESADDRD _IOW('i', 102, struct ifreq)
#define GREGADDRS _IOWR('i', 103, struct ifreq)
#define GREGADDRD _IOWR('i', 104, struct ifreq)
#define GRESPROTO _IOW('i' , 105, struct ifreq)
#define GREGPROTO _IOWR('i', 106, struct ifreq)
-#define GREGKEY _IOWR('i', 107, struct ifreq)
-#define GRESKEY _IOW('i', 108, struct ifreq)
-#ifdef _KERNEL
-LIST_HEAD(gre_softc_head, gre_softc);
-VNET_DECLARE(struct gre_softc_head, gre_softc_list);
-#define V_gre_softc_list VNET(gre_softc_list)
+#define GREGKEY _IOWR('i', 107, struct ifreq)
+#define GRESKEY _IOW('i', 108, struct ifreq)
+#define GREGOPTS _IOWR('i', 109, struct ifreq)
+#define GRESOPTS _IOW('i', 110, struct ifreq)
-VNET_DECLARE(struct mtx, gre_mtx);
-#define V_gre_mtx VNET(gre_mtx)
-#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
- MTX_DEF)
-#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx)
-#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx)
-#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx)
+#define GRE_ENABLE_CSUM 0x0001
+#define GRE_ENABLE_SEQ 0x0002
+#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
-u_int16_t gre_in_cksum(u_int16_t *, u_int);
-#endif /* _KERNEL */
-
-#endif
+#endif /* _NET_IF_GRE_H_ */
diff --git a/sys/net/if_me.c b/sys/net/if_me.c
new file mode 100644
index 0000000..a00bdd2
--- /dev/null
+++ b/sys/net/if_me.c
@@ -0,0 +1,647 @@
+/*-
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mbuf.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_encap.h>
+
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
+#define MEMTU 1500
+static const char mename[] = "me";
+static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
+static VNET_DEFINE(struct mtx, me_mtx);
+#define V_me_mtx VNET(me_mtx)
+/* Minimal forwarding header RFC 2004 */
+struct mobhdr {
+ uint8_t mob_proto; /* protocol */
+ uint8_t mob_flags; /* flags */
+#define MOB_FLAGS_SP 0x80 /* source present */
+ uint16_t mob_csum; /* header checksum */
+ struct in_addr mob_dst; /* original destination address */
+ struct in_addr mob_src; /* original source addr (optional) */
+} __packed;
+
+struct me_softc {
+ struct ifnet *me_ifp;
+ LIST_ENTRY(me_softc) me_list;
+ struct rmlock me_lock;
+ u_int me_fibnum;
+ const struct encaptab *me_ecookie;
+ struct in_addr me_src;
+ struct in_addr me_dst;
+};
+#define ME2IFP(sc) ((sc)->me_ifp)
+#define ME_READY(sc) ((sc)->me_src.s_addr != 0)
+#define ME_LOCK_INIT(sc) rm_init(&(sc)->me_lock, "me softc")
+#define ME_LOCK_DESTROY(sc) rm_destroy(&(sc)->me_lock)
+#define ME_RLOCK_TRACKER struct rm_priotracker me_tracker
+#define ME_RLOCK(sc) rm_rlock(&(sc)->me_lock, &me_tracker)
+#define ME_RUNLOCK(sc) rm_runlock(&(sc)->me_lock, &me_tracker)
+#define ME_RLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_RLOCKED)
+#define ME_WLOCK(sc) rm_wlock(&(sc)->me_lock)
+#define ME_WUNLOCK(sc) rm_wunlock(&(sc)->me_lock)
+#define ME_WLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_WLOCKED)
+
+#define ME_LIST_LOCK_INIT(x) mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF)
+#define ME_LIST_LOCK_DESTROY(x) mtx_destroy(&V_me_mtx)
+#define ME_LIST_LOCK(x) mtx_lock(&V_me_mtx)
+#define ME_LIST_UNLOCK(x) mtx_unlock(&V_me_mtx)
+
+static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list);
+#define V_me_softc_list VNET(me_softc_list)
+static struct sx me_ioctl_sx;
+SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
+
+static int me_clone_create(struct if_clone *, int, caddr_t);
+static void me_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, me_cloner);
+#define V_me_cloner VNET(me_cloner)
+
+static void me_qflush(struct ifnet *);
+static int me_transmit(struct ifnet *, struct mbuf *);
+static int me_ioctl(struct ifnet *, u_long, caddr_t);
+static int me_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
+static int me_input(struct mbuf **, int *, int);
+
+static int me_set_tunnel(struct ifnet *, struct sockaddr_in *,
+ struct sockaddr_in *);
+static void me_delete_tunnel(struct ifnet *);
+
+SYSCTL_DECL(_net_link);
+static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0,
+ "Minimal Encapsulation for IP (RFC 2004)");
+#ifndef MAX_ME_NEST
+#define MAX_ME_NEST 1
+#endif
+
+static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST;
+#define V_max_me_nesting VNET(max_me_nesting)
+SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
+
+extern struct domain inetdomain;
+static const struct protosw in_mobile_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_MOBILE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = me_input,
+ .pr_output = rip_output,
+ .pr_ctlinput = rip_ctlinput,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
+
+static void
+vnet_me_init(const void *unused __unused)
+{
+ LIST_INIT(&V_me_softc_list);
+ ME_LIST_LOCK_INIT();
+ V_me_cloner = if_clone_simple(mename, me_clone_create,
+ me_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_me_init, NULL);
+
+static void
+vnet_me_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_me_cloner);
+ ME_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_me_uninit, NULL);
+
+static int
+me_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct me_softc *sc;
+
+ sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
+ sc->me_fibnum = curthread->td_proc->p_fibnum;
+ ME2IFP(sc) = if_alloc(IFT_TUNNEL);
+ ME_LOCK_INIT(sc);
+ ME2IFP(sc)->if_softc = sc;
+ if_initname(ME2IFP(sc), mename, unit);
+
+ ME2IFP(sc)->if_mtu = MEMTU - sizeof(struct mobhdr);
+ ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
+ ME2IFP(sc)->if_output = me_output;
+ ME2IFP(sc)->if_ioctl = me_ioctl;
+ ME2IFP(sc)->if_transmit = me_transmit;
+ ME2IFP(sc)->if_qflush = me_qflush;
+ if_attach(ME2IFP(sc));
+ bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
+ ME_LIST_LOCK();
+ LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list);
+ ME_LIST_UNLOCK();
+ return (0);
+}
+
+static void
+me_clone_destroy(struct ifnet *ifp)
+{
+ struct me_softc *sc;
+
+ sx_xlock(&me_ioctl_sx);
+ sc = ifp->if_softc;
+ me_delete_tunnel(ifp);
+ ME_LIST_LOCK();
+ LIST_REMOVE(sc, me_list);
+ ME_LIST_UNLOCK();
+ bpfdetach(ifp);
+ if_detach(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&me_ioctl_sx);
+
+ if_free(ifp);
+ ME_LOCK_DESTROY(sc);
+ free(sc, M_IFME);
+}
+
+static int
+me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ ME_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr_in *src, *dst;
+ struct me_softc *sc;
+ int error;
+
+ switch (cmd) {
+ case SIOCSIFMTU:
+ if (ifr->ifr_mtu < 576)
+ return (EINVAL);
+ ifp->if_mtu = ifr->ifr_mtu - sizeof(struct mobhdr);
+ return (0);
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ return (0);
+ }
+ sx_xlock(&me_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
+ goto end;
+ }
+ error = 0;
+ switch (cmd) {
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr_in *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr_in *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
+ if (src->sin_family != dst->sin_family ||
+ src->sin_family != AF_INET ||
+ src->sin_len != dst->sin_len ||
+ src->sin_len != sizeof(struct sockaddr_in)) {
+ error = EINVAL;
+ break;
+ }
+ if (src->sin_addr.s_addr == INADDR_ANY ||
+ dst->sin_addr.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ error = me_set_tunnel(ifp, src, dst);
+ break;
+ case SIOCDIFPHYADDR:
+ me_delete_tunnel(ifp);
+ break;
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ ME_RLOCK(sc);
+ if (!ME_READY(sc)) {
+ error = EADDRNOTAVAIL;
+ ME_RUNLOCK(sc);
+ break;
+ }
+ src = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(src, 0, sizeof(*src));
+ src->sin_family = AF_INET;
+ src->sin_len = sizeof(*src);
+ switch (cmd) {
+ case SIOCGIFPSRCADDR:
+ src->sin_addr = sc->me_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ src->sin_addr = sc->me_dst;
+ break;
+ }
+ ME_RUNLOCK(sc);
+ error = prison_if(curthread->td_ucred, sintosa(src));
+ if (error != 0)
+ memset(src, 0, sizeof(*src));
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+end:
+ sx_xunlock(&me_ioctl_sx);
+ return (error);
+}
+
+static int
+me_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+{
+ ME_RLOCK_TRACKER;
+ struct me_softc *sc;
+ struct ip *ip;
+ int ret;
+
+ sc = (struct me_softc *)arg;
+ if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
+
+ M_ASSERTPKTHDR(m);
+
+ if (m->m_pkthdr.len < sizeof(struct ip) + sizeof(struct mobhdr) -
+ sizeof(struct in_addr))
+ return (0);
+
+ ret = 0;
+ ME_RLOCK(sc);
+ if (ME_READY(sc)) {
+ ip = mtod(m, struct ip *);
+ if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
+ sc->me_dst.s_addr == ip->ip_src.s_addr)
+ ret = 32 * 2;
+ }
+ ME_RUNLOCK(sc);
+ return (ret);
+}
+
+static int
+me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src,
+ struct sockaddr_in *dst)
+{
+ struct me_softc *sc, *tsc;
+
+ sx_assert(&me_ioctl_sx, SA_XLOCKED);
+ ME_LIST_LOCK();
+ sc = ifp->if_softc;
+ LIST_FOREACH(tsc, &V_me_softc_list, me_list) {
+ if (tsc == sc || !ME_READY(tsc))
+ continue;
+ if (tsc->me_src.s_addr == src->sin_addr.s_addr &&
+ tsc->me_dst.s_addr == dst->sin_addr.s_addr) {
+ ME_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+ }
+ ME_LIST_UNLOCK();
+
+ ME_WLOCK(sc);
+ sc->me_dst = dst->sin_addr;
+ sc->me_src = src->sin_addr;
+ ME_WUNLOCK(sc);
+
+ if (sc->me_ecookie == NULL)
+ sc->me_ecookie = encap_attach_func(AF_INET, IPPROTO_MOBILE,
+ me_encapcheck, &in_mobile_protosw, sc);
+ if (sc->me_ecookie != NULL)
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ return (0);
+}
+
+static void
+me_delete_tunnel(struct ifnet *ifp)
+{
+ struct me_softc *sc = ifp->if_softc;
+
+ sx_assert(&me_ioctl_sx, SA_XLOCKED);
+ if (sc->me_ecookie != NULL)
+ encap_detach(sc->me_ecookie);
+ sc->me_ecookie = NULL;
+ ME_WLOCK(sc);
+ sc->me_src.s_addr = 0;
+ sc->me_dst.s_addr = 0;
+ ME_WUNLOCK(sc);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+}
+
+static uint16_t
+me_in_cksum(uint16_t *p, int nwords)
+{
+ uint32_t sum = 0;
+
+ while (nwords-- > 0)
+ sum += *p++;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16);
+ return (~sum);
+}
+
+int
+me_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct me_softc *sc;
+ struct mobhdr *mh;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct ip *ip;
+ int hlen;
+
+ m = *mp;
+ sc = encap_getarg(m);
+ KASSERT(sc != NULL, ("encap_getarg returned NULL"));
+
+ ifp = ME2IFP(sc);
+ /* checks for short packets */
+ hlen = sizeof(struct mobhdr);
+ if (m->m_pkthdr.len < sizeof(struct ip) + hlen)
+ hlen -= sizeof(struct in_addr);
+ if (m->m_len < sizeof(struct ip) + hlen)
+ m = m_pullup(m, sizeof(struct ip) + hlen);
+ if (m == NULL)
+ goto drop;
+ mh = (struct mobhdr *)mtodo(m, sizeof(struct ip));
+ /* check for wrong flags */
+ if (mh->mob_flags & (~MOB_FLAGS_SP)) {
+ m_freem(m);
+ goto drop;
+ }
+ if (mh->mob_flags) {
+ if (hlen != sizeof(struct mobhdr)) {
+ m_freem(m);
+ goto drop;
+ }
+ } else
+ hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
+ /* check mobile header checksum */
+ if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) {
+ m_freem(m);
+ goto drop;
+ }
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+ ip = mtod(m, struct ip *);
+ ip->ip_dst = mh->mob_dst;
+ ip->ip_p = mh->mob_proto;
+ ip->ip_sum = 0;
+ ip->ip_len = htons(m->m_pkthdr.len - hlen);
+ if (mh->mob_flags)
+ ip->ip_src = mh->mob_src;
+ memmove(mtodo(m, hlen), ip, sizeof(struct ip));
+ m_adj(m, hlen);
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
+ M_SETFIB(m, sc->me_fibnum);
+ hlen = AF_INET;
+ BPF_MTAP2(ifp, &hlen, sizeof(hlen), m);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if ((ifp->if_flags & IFF_MONITOR) != 0)
+ m_freem(m);
+ else
+ netisr_dispatch(NETISR_IP, m);
+ return (IPPROTO_DONE);
+drop:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ return (IPPROTO_DONE);
+}
+
+#define MTAG_ME 1414491977
+static int
+me_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ int count;
+
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_ME, 0, NULL)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > V_max_me_nesting) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ ifp->if_xname, count);
+ return (EIO);
+ }
+ mtag = m_tag_alloc(MTAG_ME, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
+
+static int
+me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
+ int error;
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error != 0)
+ goto drop;
+#endif
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0) {
+ error = ENETDOWN;
+ goto drop;
+ }
+
+ error = me_check_nesting(ifp, m);
+ if (error != 0)
+ goto drop;
+
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+ if (af != AF_INET) {
+ error = EAFNOSUPPORT;
+ goto drop;
+ }
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ return (ifp->if_transmit(ifp, m));
+drop:
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (error);
+}
+
+static int
+me_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ ME_RLOCK_TRACKER;
+ struct mobhdr mh;
+ struct me_softc *sc;
+ struct ip *ip;
+ int error, hlen, plen;
+
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ if (m->m_len < sizeof(struct ip))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto drop;
+ }
+ ip = mtod(m, struct ip *);
+ /* Fragmented datagramms shouldn't be encapsulated */
+ if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
+ error = EINVAL;
+ m_freem(m);
+ goto drop;
+ }
+ mh.mob_proto = ip->ip_p;
+ mh.mob_src = ip->ip_src;
+ mh.mob_dst = ip->ip_dst;
+ ME_RLOCK(sc);
+ if (!ME_READY(sc)) {
+ ME_RUNLOCK(sc);
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ if (in_hosteq(sc->me_src, ip->ip_src)) {
+ hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
+ mh.mob_flags = 0;
+ } else {
+ hlen = sizeof(struct mobhdr);
+ mh.mob_flags = MOB_FLAGS_SP;
+ }
+ plen = m->m_pkthdr.len;
+ ip->ip_src = sc->me_src;
+ ip->ip_dst = sc->me_dst;
+ M_SETFIB(m, sc->me_fibnum);
+ ME_RUNLOCK(sc);
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto drop;
+ }
+ if (m->m_len < sizeof(struct ip) + hlen)
+ m = m_pullup(m, sizeof(struct ip) + hlen);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto drop;
+ }
+ memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip));
+ ip = mtod(m, struct ip *);
+ ip->ip_len = htons(m->m_pkthdr.len);
+ ip->ip_p = IPPROTO_MOBILE;
+ ip->ip_sum = 0;
+ mh.mob_csum = 0;
+ mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t));
+ bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen);
+ error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
+drop:
+ if (error)
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ else {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+ }
+ return (error);
+}
+
+static void
+me_qflush(struct ifnet *ifp __unused)
+{
+
+}
+
+static int
+memodevent(module_t mod, int type, void *data)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t me_mod = {
+ "if_me",
+ memodevent,
+ 0
+};
+
+DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_me, 1);
OpenPOWER on IntegriCloud