summaryrefslogtreecommitdiffstats
path: root/sys/net/if_vxlan.c
diff options
context:
space:
mode:
authorbryanv <bryanv@FreeBSD.org>2014-10-20 14:42:42 +0000
committerbryanv <bryanv@FreeBSD.org>2014-10-20 14:42:42 +0000
commit783bd6e089f7c74d39b735a0a59a22ff53973c5b (patch)
tree75ca7abb57cdcb5af2ee6b56f62b7d987b1edded /sys/net/if_vxlan.c
parentc08eaa2d436024d7b17ad8bbbaaae10b3870d561 (diff)
downloadFreeBSD-src-783bd6e089f7c74d39b735a0a59a22ff53973c5b.zip
FreeBSD-src-783bd6e089f7c74d39b735a0a59a22ff53973c5b.tar.gz
Add vxlan interface
vxlan creates a virtual LAN by encapsulating the inner Ethernet frame in a UDP packet. This implementation is based on RFC7348. Currently, the IPv6 support is not fully compliant with the specification: we should be able to receive UPDv6 packets with a zero checksum, but we need to support RFC6935 first. Patches for this should come soon. Encapsulation protocols such as vxlan emphasize the need for the FreeBSD network stack to support batching, GRO, and GSO. Each frame has to make two trips through the network stack, and each frame will be at most MTU sized. Performance suffers accordingly. Some latest generation NICs have begun to support vxlan HW offloads that we should also take advantage of. VIMAGE support should also be added soon. Differential Revision: https://reviews.freebsd.org/D384 Reviewed by: gnn Relnotes: yes
Diffstat (limited to 'sys/net/if_vxlan.c')
-rw-r--r--sys/net/if_vxlan.c3089
1 files changed, 3089 insertions, 0 deletions
diff --git a/sys/net/if_vxlan.c b/sys/net/if_vxlan.c
new file mode 100644
index 0000000..8e93069
--- /dev/null
+++ b/sys/net/if_vxlan.c
@@ -0,0 +1,3089 @@
+/*-
+ * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/eventhandler.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/hash.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/refcount.h>
+#include <sys/rmlock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_vxlan.h>
+#include <net/netisr.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+struct vxlan_softc;
+LIST_HEAD(vxlan_softc_head, vxlan_softc);
+
+struct vxlan_socket_mc_info {
+ union vxlan_sockaddr vxlsomc_saddr;
+ union vxlan_sockaddr vxlsomc_gaddr;
+ int vxlsomc_ifidx;
+ int vxlsomc_users;
+};
+
+#define VXLAN_SO_MC_MAX_GROUPS 32
+
+#define VXLAN_SO_VNI_HASH_SHIFT 6
+#define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT)
+#define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE)
+
+struct vxlan_socket {
+ struct socket *vxlso_sock;
+ struct rmlock vxlso_lock;
+ u_int vxlso_refcnt;
+ union vxlan_sockaddr vxlso_laddr;
+ LIST_ENTRY(vxlan_socket) vxlso_entry;
+ struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
+ struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
+};
+
+#define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p))
+#define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p))
+#define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock)
+#define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock)
+#define VXLAN_SO_LOCK_ASSERT(_vso) \
+ rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
+#define VXLAN_SO_LOCK_WASSERT(_vso) \
+ rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
+
+#define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt)
+#define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt)
+
+struct vxlan_ftable_entry {
+ LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash;
+ uint16_t vxlfe_flags;
+ uint8_t vxlfe_mac[ETHER_ADDR_LEN];
+ union vxlan_sockaddr vxlfe_raddr;
+ time_t vxlfe_expire;
+};
+
+#define VXLAN_FE_FLAG_DYNAMIC 0x01
+#define VXLAN_FE_FLAG_STATIC 0x02
+
+#define VXLAN_FE_IS_DYNAMIC(_fe) \
+ ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
+
+#define VXLAN_SC_FTABLE_SHIFT 9
+#define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT)
+#define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1)
+#define VXLAN_SC_FTABLE_HASH(_sc, _mac) \
+ (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
+
+LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
+
+struct vxlan_statistics {
+ uint32_t ftable_nospace;
+ uint32_t ftable_lock_upgrade_failed;
+};
+
+struct vxlan_softc {
+ struct ifnet *vxl_ifp;
+ struct vxlan_socket *vxl_sock;
+ uint32_t vxl_vni;
+ union vxlan_sockaddr vxl_src_addr;
+ union vxlan_sockaddr vxl_dst_addr;
+ uint32_t vxl_flags;
+#define VXLAN_FLAG_INIT 0x0001
+#define VXLAN_FLAG_TEARDOWN 0x0002
+#define VXLAN_FLAG_LEARN 0x0004
+
+ uint32_t vxl_port_hash_key;
+ uint16_t vxl_min_port;
+ uint16_t vxl_max_port;
+ uint8_t vxl_ttl;
+
+ /* Lookup table from MAC address to forwarding entry. */
+ uint32_t vxl_ftable_cnt;
+ uint32_t vxl_ftable_max;
+ uint32_t vxl_ftable_timeout;
+ uint32_t vxl_ftable_hash_key;
+ struct vxlan_ftable_head *vxl_ftable;
+
+ /* Derived from vxl_dst_addr. */
+ struct vxlan_ftable_entry vxl_default_fe;
+
+ struct ip_moptions *vxl_im4o;
+ struct ip6_moptions *vxl_im6o;
+
+ struct rmlock vxl_lock;
+ volatile u_int vxl_refcnt;
+
+ int vxl_unit;
+ int vxl_vso_mc_index;
+ struct vxlan_statistics vxl_stats;
+ struct sysctl_oid *vxl_sysctl_node;
+ struct sysctl_ctx_list vxl_sysctl_ctx;
+ struct callout vxl_callout;
+ uint8_t vxl_hwaddr[ETHER_ADDR_LEN];
+ int vxl_mc_ifindex;
+ struct ifnet *vxl_mc_ifp;
+ char vxl_mc_ifname[IFNAMSIZ];
+ LIST_ENTRY(vxlan_softc) vxl_entry;
+ LIST_ENTRY(vxlan_softc) vxl_ifdetach_list;
+};
+
+#define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p))
+#define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p))
+#define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock)
+#define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock)
+#define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock)
+#define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
+#define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
+#define VXLAN_UNLOCK(_sc, _p) do { \
+ if (VXLAN_LOCK_WOWNED(_sc)) \
+ VXLAN_WUNLOCK(_sc); \
+ else \
+ VXLAN_RUNLOCK(_sc, _p); \
+} while (0)
+
+#define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt)
+#define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt)
+
+#define satoconstsin(sa) ((const struct sockaddr_in *)(sa))
+#define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa))
+
+struct vxlanudphdr {
+ struct udphdr vxlh_udp;
+ struct vxlan_header vxlh_hdr;
+} __packed;
+
+static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
+static void vxlan_ftable_init(struct vxlan_softc *);
+static void vxlan_ftable_fini(struct vxlan_softc *);
+static void vxlan_ftable_flush(struct vxlan_softc *, int);
+static void vxlan_ftable_expire(struct vxlan_softc *);
+static int vxlan_ftable_update_locked(struct vxlan_softc *,
+ const struct sockaddr *, const uint8_t *,
+ struct rm_priotracker *);
+static int vxlan_ftable_update(struct vxlan_softc *,
+ const struct sockaddr *, const uint8_t *);
+static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
+
+static struct vxlan_ftable_entry *
+ vxlan_ftable_entry_alloc(void);
+static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
+static void vxlan_ftable_entry_init(struct vxlan_softc *,
+ struct vxlan_ftable_entry *, const uint8_t *,
+ const struct sockaddr *, uint32_t);
+static void vxlan_ftable_entry_destroy(struct vxlan_softc *,
+ struct vxlan_ftable_entry *);
+static int vxlan_ftable_entry_insert(struct vxlan_softc *,
+ struct vxlan_ftable_entry *);
+static struct vxlan_ftable_entry *
+ vxlan_ftable_entry_lookup(struct vxlan_softc *,
+ const uint8_t *);
+static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
+ struct sbuf *);
+
+static struct vxlan_socket *
+ vxlan_socket_alloc(const union vxlan_sockaddr *);
+static void vxlan_socket_destroy(struct vxlan_socket *);
+static void vxlan_socket_release(struct vxlan_socket *);
+static struct vxlan_socket *
+ vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
+static void vxlan_socket_insert(struct vxlan_socket *);
+static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
+static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
+static int vxlan_socket_create(struct ifnet *, int,
+ const union vxlan_sockaddr *, struct vxlan_socket **);
+static void vxlan_socket_ifdetach(struct vxlan_socket *,
+ struct ifnet *, struct vxlan_softc_head *);
+
+static struct vxlan_socket *
+ vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
+static int vxlan_sockaddr_mc_info_match(
+ const struct vxlan_socket_mc_info *,
+ const union vxlan_sockaddr *,
+ const union vxlan_sockaddr *, int);
+static int vxlan_socket_mc_join_group(struct vxlan_socket *,
+ const union vxlan_sockaddr *, const union vxlan_sockaddr *,
+ int *, union vxlan_sockaddr *);
+static int vxlan_socket_mc_leave_group(struct vxlan_socket *,
+ const union vxlan_sockaddr *,
+ const union vxlan_sockaddr *, int);
+static int vxlan_socket_mc_add_group(struct vxlan_socket *,
+ const union vxlan_sockaddr *, const union vxlan_sockaddr *,
+ int, int *);
+static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
+ int);
+
+static struct vxlan_softc *
+ vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
+ uint32_t);
+static struct vxlan_softc *
+ vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
+static int vxlan_socket_insert_softc(struct vxlan_socket *,
+ struct vxlan_softc *);
+static void vxlan_socket_remove_softc(struct vxlan_socket *,
+ struct vxlan_softc *);
+
+static struct ifnet *
+ vxlan_multicast_if_ref(struct vxlan_softc *, int);
+static void vxlan_free_multicast(struct vxlan_softc *);
+static int vxlan_setup_multicast_interface(struct vxlan_softc *);
+
+static int vxlan_setup_multicast(struct vxlan_softc *);
+static int vxlan_setup_socket(struct vxlan_softc *);
+static void vxlan_setup_interface(struct vxlan_softc *);
+static int vxlan_valid_init_config(struct vxlan_softc *);
+static void vxlan_init_wait(struct vxlan_softc *);
+static void vxlan_init_complete(struct vxlan_softc *);
+static void vxlan_init(void *);
+static void vxlan_release(struct vxlan_softc *);
+static void vxlan_teardown_wait(struct vxlan_softc *);
+static void vxlan_teardown_complete(struct vxlan_softc *);
+static void vxlan_teardown_locked(struct vxlan_softc *);
+static void vxlan_teardown(struct vxlan_softc *);
+static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
+ struct vxlan_softc_head *);
+static void vxlan_timer(void *);
+
+static int vxlan_ctrl_get_config(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
+static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
+static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
+static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
+static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
+static int vxlan_ctrl_flush(struct vxlan_softc *, void *);
+static int vxlan_ioctl_drvspec(struct vxlan_softc *,
+ struct ifdrv *, int);
+static int vxlan_ioctl_ifflags(struct vxlan_softc *);
+static int vxlan_ioctl(struct ifnet *, u_long, caddr_t);
+
+#if defined(INET) || defined(INET6)
+static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
+static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
+ int, uint16_t, uint16_t);
+#endif
+static int vxlan_encap4(struct vxlan_softc *,
+ const union vxlan_sockaddr *, struct mbuf *);
+static int vxlan_encap6(struct vxlan_softc *,
+ const union vxlan_sockaddr *, struct mbuf *);
+static int vxlan_transmit(struct ifnet *, struct mbuf *);
+static void vxlan_qflush(struct ifnet *);
+static void vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
+ const struct sockaddr *, void *);
+static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
+ const struct sockaddr *);
+
+static void vxlan_set_default_config(struct vxlan_softc *);
+static int vxlan_set_user_config(struct vxlan_softc *,
+ struct ifvxlanparam *);
+static int vxlan_clone_create(struct if_clone *, int, caddr_t);
+static void vxlan_clone_destroy(struct ifnet *);
+
+static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
+static void vxlan_fakeaddr(struct vxlan_softc *);
+
+static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
+ const struct sockaddr *);
+static void vxlan_sockaddr_copy(union vxlan_sockaddr *,
+ const struct sockaddr *);
+static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
+ const struct sockaddr *);
+static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
+ const struct sockaddr *);
+static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
+static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
+static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
+
+static int vxlan_can_change_config(struct vxlan_softc *);
+static int vxlan_check_vni(uint32_t);
+static int vxlan_check_ttl(int);
+static int vxlan_check_ftable_timeout(uint32_t);
+static int vxlan_check_ftable_max(uint32_t);
+
+static void vxlan_sysctl_setup(struct vxlan_softc *);
+static void vxlan_sysctl_destroy(struct vxlan_softc *);
+static int vxlan_tunable_int(struct vxlan_softc *, const char *, int);
+
+static void vxlan_ifdetach_event(void *, struct ifnet *);
+static void vxlan_load(void);
+static void vxlan_unload(void);
+static int vxlan_modevent(module_t, int, void *);
+
+static const char vxlan_name[] = "vxlan";
+static MALLOC_DEFINE(M_VXLAN, vxlan_name,
+ "Virtual eXtensible LAN Interface");
+static struct if_clone *vxlan_cloner;
+static struct mtx vxlan_list_mtx;
+static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
+
+static eventhandler_tag vxlan_ifdetach_event_tag;
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW, 0,
+ "Virtual eXtensible Local Area Network");
+
+static int vxlan_legacy_port = 0;
+TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
+static int vxlan_reuse_port = 0;
+TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
+
+/* Default maximum number of addresses in the forwarding table. */
+#ifndef VXLAN_FTABLE_MAX
+#define VXLAN_FTABLE_MAX 2000
+#endif
+
+/* Timeout (in seconds) of addresses learned in the forwarding table. */
+#ifndef VXLAN_FTABLE_TIMEOUT
+#define VXLAN_FTABLE_TIMEOUT (20 * 60)
+#endif
+
+/*
+ * Maximum timeout (in seconds) of addresses learned in the forwarding
+ * table.
+ */
+#ifndef VXLAN_FTABLE_MAX_TIMEOUT
+#define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24)
+#endif
+
+/* Number of seconds between pruning attempts of the forwarding table. */
+#ifndef VXLAN_FTABLE_PRUNE
+#define VXLAN_FTABLE_PRUNE (5 * 60)
+#endif
+
+static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
+
+struct vxlan_control {
+ int (*vxlc_func)(struct vxlan_softc *, void *);
+ int vxlc_argsize;
+ int vxlc_flags;
+#define VXLAN_CTRL_FLAG_COPYIN 0x01
+#define VXLAN_CTRL_FLAG_COPYOUT 0x02
+#define VXLAN_CTRL_FLAG_SUSER 0x04
+};
+
+static const struct vxlan_control vxlan_control_table[] = {
+ [VXLAN_CMD_GET_CONFIG] =
+ { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
+ VXLAN_CTRL_FLAG_COPYOUT
+ },
+
+ [VXLAN_CMD_SET_VNI] =
+ { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_LOCAL_ADDR] =
+ { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_REMOTE_ADDR] =
+ { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_LOCAL_PORT] =
+ { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_REMOTE_PORT] =
+ { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_PORT_RANGE] =
+ { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_FTABLE_TIMEOUT] =
+ { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_FTABLE_MAX] =
+ { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_MULTICAST_IF] =
+ { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_TTL] =
+ { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_SET_LEARN] =
+ { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_FTABLE_ENTRY_ADD] =
+ { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_FTABLE_ENTRY_REM] =
+ { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+
+ [VXLAN_CMD_FLUSH] =
+ { vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
+ VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
+ },
+};
+
+static const int vxlan_control_table_size = nitems(vxlan_control_table);
+
+static int
+vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+ int i, d;
+
+ for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
+ d = ((int)a[i]) - ((int)b[i]);
+
+ return (d);
+}
+
+static void
+vxlan_ftable_init(struct vxlan_softc *sc)
+{
+ int i;
+
+ sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
+ VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
+
+ for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
+ LIST_INIT(&sc->vxl_ftable[i]);
+ sc->vxl_ftable_hash_key = arc4random();
+}
+
+static void
+vxlan_ftable_fini(struct vxlan_softc *sc)
+{
+ int i;
+
+ for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+ KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
+ ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
+ }
+ MPASS(sc->vxl_ftable_cnt == 0);
+
+ free(sc->vxl_ftable, M_VXLAN);
+ sc->vxl_ftable = NULL;
+}
+
+static void
+vxlan_ftable_flush(struct vxlan_softc *sc, int all)
+{
+ struct vxlan_ftable_entry *fe, *tfe;
+ int i;
+
+ for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+ LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
+ if (all || VXLAN_FE_IS_DYNAMIC(fe))
+ vxlan_ftable_entry_destroy(sc, fe);
+ }
+ }
+}
+
+static void
+vxlan_ftable_expire(struct vxlan_softc *sc)
+{
+ struct vxlan_ftable_entry *fe, *tfe;
+ int i;
+
+ VXLAN_LOCK_WASSERT(sc);
+
+ for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+ LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
+ if (VXLAN_FE_IS_DYNAMIC(fe) &&
+ time_uptime >= fe->vxlfe_expire)
+ vxlan_ftable_entry_destroy(sc, fe);
+ }
+ }
+}
+
+static int
+vxlan_ftable_update_locked(struct vxlan_softc *sc, const struct sockaddr *sa,
+ const uint8_t *mac, struct rm_priotracker *tracker)
+{
+ union vxlan_sockaddr vxlsa;
+ struct vxlan_ftable_entry *fe;
+ int error;
+
+ VXLAN_LOCK_ASSERT(sc);
+
+again:
+ /*
+ * A forwarding entry for this MAC address might already exist. If
+ * so, update it, otherwise create a new one. We may have to upgrade
+ * the lock if we have to change or create an entry.
+ */
+ fe = vxlan_ftable_entry_lookup(sc, mac);
+ if (fe != NULL) {
+ fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
+
+ if (!VXLAN_FE_IS_DYNAMIC(fe) ||
+ vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, sa))
+ return (0);
+ if (!VXLAN_LOCK_WOWNED(sc)) {
+ VXLAN_RUNLOCK(sc, tracker);
+ VXLAN_WLOCK(sc);
+ sc->vxl_stats.ftable_lock_upgrade_failed++;
+ goto again;
+ }
+ vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, sa);
+ return (0);
+ }
+
+ if (!VXLAN_LOCK_WOWNED(sc)) {
+ VXLAN_RUNLOCK(sc, tracker);
+ VXLAN_WLOCK(sc);
+ sc->vxl_stats.ftable_lock_upgrade_failed++;
+ goto again;
+ }
+
+ if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
+ sc->vxl_stats.ftable_nospace++;
+ return (ENOSPC);
+ }
+
+ fe = vxlan_ftable_entry_alloc();
+ if (fe == NULL)
+ return (ENOMEM);
+
+ /*
+ * The source port may be randomly select by the remove host, so
+ * use the port of the default destination address.
+ */
+ vxlan_sockaddr_copy(&vxlsa, sa);
+ vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
+
+ vxlan_ftable_entry_init(sc, fe, mac, &vxlsa.sa,
+ VXLAN_FE_FLAG_DYNAMIC);
+
+ /* The prior lookup failed, so the insert should not. */
+ error = vxlan_ftable_entry_insert(sc, fe);
+ MPASS(error == 0);
+
+ return (0);
+}
+
+static int
+vxlan_ftable_update(struct vxlan_softc *sc, const struct sockaddr *sa,
+ const uint8_t *mac)
+{
+ struct rm_priotracker tracker;
+ int error;
+
+ VXLAN_RLOCK(sc, &tracker);
+ error = vxlan_ftable_update_locked(sc, sa, mac, &tracker);
+ VXLAN_UNLOCK(sc, &tracker);
+
+ return (error);
+}
+
+static int
+vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
+{
+ struct rm_priotracker tracker;
+ struct sbuf sb;
+ struct vxlan_softc *sc;
+ struct vxlan_ftable_entry *fe;
+ size_t size;
+ int i, error;
+
+ /*
+ * This is mostly intended for debugging during development. It is
+ * not practical to dump an entire large table this way.
+ */
+
+ sc = arg1;
+ size = PAGE_SIZE; /* Calculate later. */
+
+ sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
+ sbuf_putc(&sb, '\n');
+
+ VXLAN_RLOCK(sc, &tracker);
+ for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
+ LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
+ if (sbuf_error(&sb) != 0)
+ break;
+ vxlan_ftable_entry_dump(fe, &sb);
+ }
+ }
+ VXLAN_RUNLOCK(sc, &tracker);
+
+ if (sbuf_len(&sb) == 1)
+ sbuf_setpos(&sb, 0);
+
+ sbuf_finish(&sb);
+ error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
+ sbuf_delete(&sb);
+
+ return (error);
+}
+
+static struct vxlan_ftable_entry *
+vxlan_ftable_entry_alloc(void)
+{
+ struct vxlan_ftable_entry *fe;
+
+ fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
+
+ return (fe);
+}
+
+static void
+vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
+{
+
+ free(fe, M_VXLAN);
+}
+
+static void
+vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
+ const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
+{
+
+ fe->vxlfe_flags = flags;
+ fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
+ memcpy(fe->vxlfe_mac, mac, ETHER_HDR_LEN);
+ vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
+}
+
+static void
+vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
+ struct vxlan_ftable_entry *fe)
+{
+
+ sc->vxl_ftable_cnt--;
+ LIST_REMOVE(fe, vxlfe_hash);
+ vxlan_ftable_entry_free(fe);
+}
+
+static int
+vxlan_ftable_entry_insert(struct vxlan_softc *sc,
+ struct vxlan_ftable_entry *fe)
+{
+ struct vxlan_ftable_entry *lfe;
+ uint32_t hash;
+ int dir;
+
+ VXLAN_LOCK_WASSERT(sc);
+ hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
+
+ lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
+ if (lfe == NULL) {
+ LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
+ goto out;
+ }
+
+ do {
+ dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
+ if (dir == 0)
+ return (EEXIST);
+ if (dir > 0) {
+ LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
+ goto out;
+ } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
+ LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
+ goto out;
+ } else
+ lfe = LIST_NEXT(lfe, vxlfe_hash);
+ } while (lfe != NULL);
+
+out:
+ sc->vxl_ftable_cnt++;
+
+ return (0);
+}
+
+static struct vxlan_ftable_entry *
+vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
+{
+ struct vxlan_ftable_entry *fe;
+ uint32_t hash;
+ int dir;
+
+ VXLAN_LOCK_ASSERT(sc);
+ hash = VXLAN_SC_FTABLE_HASH(sc, mac);
+
+ LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
+ dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, mac);
+ if (dir == 0)
+ return (fe);
+ if (dir > 0)
+ break;
+ }
+
+ return (NULL);
+}
+
+static void
+vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
+{
+ char buf[64];
+ const union vxlan_sockaddr *sa;
+ const void *addr;
+ int i, len, af, width;
+
+ sa = &fe->vxlfe_raddr;
+ af = sa->sa.sa_family;
+ len = sbuf_len(sb);
+
+ sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
+ fe->vxlfe_flags);
+
+ for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
+ sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
+ sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
+
+ if (af == AF_INET) {
+ addr = &sa->in4.sin_addr;
+ width = INET_ADDRSTRLEN - 1;
+ } else {
+ addr = &sa->in6.sin6_addr;
+ width = INET6_ADDRSTRLEN - 1;
+ }
+ inet_ntop(af, addr, buf, sizeof(buf));
+ sbuf_printf(sb, "%*s ", width, buf);
+
+ sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
+
+ sbuf_putc(sb, '\n');
+
+ /* Truncate a partial line. */
+ if (sbuf_error(sb) != 0)
+ sbuf_setpos(sb, len);
+}
+
+static struct vxlan_socket *
+vxlan_socket_alloc(const union vxlan_sockaddr *sa)
+{
+ struct vxlan_socket *vso;
+ int i;
+
+ vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
+ rm_init(&vso->vxlso_lock, "vxlansorm");
+ refcount_init(&vso->vxlso_refcnt, 0);
+ for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
+ LIST_INIT(&vso->vxlso_vni_hash[i]);
+ vso->vxlso_laddr = *sa;
+
+ return (vso);
+}
+
+static void
+vxlan_socket_destroy(struct vxlan_socket *vso)
+{
+ struct socket *so;
+ struct vxlan_socket_mc_info *mc;
+ int i;
+
+ for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
+ mc = &vso->vxlso_mc[i];
+ KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
+ ("%s: socket %p mc[%d] still has address",
+ __func__, vso, i));
+ }
+
+ for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
+ KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
+ ("%s: socket %p vni_hash[%d] not empty",
+ __func__, vso, i));
+ }
+
+ so = vso->vxlso_sock;
+ if (so != NULL) {
+ vso->vxlso_sock = NULL;
+ soclose(so);
+ }
+
+ rm_destroy(&vso->vxlso_lock);
+ free(vso, M_VXLAN);
+}
+
+static void
+vxlan_socket_release(struct vxlan_socket *vso)
+{
+ int destroy;
+
+ mtx_lock(&vxlan_list_mtx);
+ destroy = VXLAN_SO_RELEASE(vso);
+ if (destroy != 0)
+ LIST_REMOVE(vso, vxlso_entry);
+ mtx_unlock(&vxlan_list_mtx);
+
+ if (destroy != 0)
+ vxlan_socket_destroy(vso);
+}
+
+static struct vxlan_socket *
+vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
+{
+ struct vxlan_socket *vso;
+
+ mtx_lock(&vxlan_list_mtx);
+ LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
+ if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
+ VXLAN_SO_ACQUIRE(vso);
+ break;
+ }
+ }
+ mtx_unlock(&vxlan_list_mtx);
+
+ return (vso);
+}
+
+static void
+vxlan_socket_insert(struct vxlan_socket *vso)
+{
+
+ mtx_lock(&vxlan_list_mtx);
+ VXLAN_SO_ACQUIRE(vso);
+ LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
+ mtx_unlock(&vxlan_list_mtx);
+}
+
+static int
+vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
+{
+ struct thread *td;
+ int error;
+
+ td = curthread;
+
+ error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
+ SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
+ if (error) {
+ if_printf(ifp, "cannot create socket: %d\n", error);
+ return (error);
+ }
+
+ error = udp_set_kernel_tunneling(vso->vxlso_sock,
+ vxlan_rcv_udp_packet, vso);
+ if (error) {
+ if_printf(ifp, "cannot set tunneling function: %d\n", error);
+ return (error);
+ }
+
+ if (vxlan_reuse_port != 0) {
+ struct sockopt sopt;
+ int val = 1;
+
+ bzero(&sopt, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = SO_REUSEPORT;
+ sopt.sopt_val = &val;
+ sopt.sopt_valsize = sizeof(val);
+ error = sosetopt(vso->vxlso_sock, &sopt);
+ if (error) {
+ if_printf(ifp,
+ "cannot set REUSEADDR socket opt: %d\n", error);
+ return (error);
+ }
+ }
+
+ return (0);
+}
+
+static int
+vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
+{
+ union vxlan_sockaddr laddr;
+ struct thread *td;
+ int error;
+
+ td = curthread;
+ laddr = vso->vxlso_laddr;
+
+ error = sobind(vso->vxlso_sock, &laddr.sa, td);
+ if (error) {
+ if (error != EADDRINUSE)
+ if_printf(ifp, "cannot bind socket: %d\n", error);
+ return (error);
+ }
+
+ return (0);
+}
+
+static int
+vxlan_socket_create(struct ifnet *ifp, int multicast,
+ const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
+{
+ union vxlan_sockaddr laddr;
+ struct vxlan_socket *vso;
+ int error;
+
+ laddr = *saddr;
+
+ /*
+ * If this socket will be multicast, then only the local port
+ * must be specified when binding.
+ */
+ if (multicast != 0) {
+ if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
+ laddr.in4.sin_addr.s_addr = INADDR_ANY;
+#ifdef INET6
+ else
+ laddr.in6.sin6_addr = in6addr_any;
+#endif
+ }
+
+ vso = vxlan_socket_alloc(&laddr);
+ if (vso == NULL)
+ return (ENOMEM);
+
+ error = vxlan_socket_init(vso, ifp);
+ if (error)
+ goto fail;
+
+ error = vxlan_socket_bind(vso, ifp);
+ if (error)
+ goto fail;
+
+ /*
+ * There is a small window between the bind completing and
+ * inserting the socket, so that a concurrent create may fail.
+ * Let's not worry about that for now.
+ */
+ vxlan_socket_insert(vso);
+ *vsop = vso;
+
+ return (0);
+
+fail:
+ vxlan_socket_destroy(vso);
+
+ return (error);
+}
+
+static void
+vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
+ struct vxlan_softc_head *list)
+{
+ struct rm_priotracker tracker;
+ struct vxlan_softc *sc;
+ int i;
+
+ VXLAN_SO_RLOCK(vso, &tracker);
+ for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
+ LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
+ vxlan_ifdetach(sc, ifp, list);
+ }
+ VXLAN_SO_RUNLOCK(vso, &tracker);
+}
+
+static struct vxlan_socket *
+vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
+{
+ struct vxlan_socket *vso;
+ union vxlan_sockaddr laddr;
+
+ laddr = *vxlsa;
+
+ if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
+ laddr.in4.sin_addr.s_addr = INADDR_ANY;
+#ifdef INET6
+ else
+ laddr.in6.sin6_addr = in6addr_any;
+#endif
+
+ vso = vxlan_socket_lookup(&laddr);
+
+ return (vso);
+}
+
+static int
+vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
+ const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
+ int ifidx)
+{
+
+ if (!vxlan_sockaddr_in_any(local) &&
+ !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
+ return (0);
+ if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
+ return (0);
+ if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
+ return (0);
+
+ return (1);
+}
+
+static int
+vxlan_socket_mc_join_group(struct vxlan_socket *vso,
+ const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
+ int *ifidx, union vxlan_sockaddr *source)
+{
+ struct sockopt sopt;
+ int error;
+
+ *source = *local;
+
+ if (VXLAN_SOCKADDR_IS_IPV4(group)) {
+ struct ip_mreq mreq;
+
+ mreq.imr_multiaddr = group->in4.sin_addr;
+ mreq.imr_interface = local->in4.sin_addr;
+
+ bzero(&sopt, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = IP_ADD_MEMBERSHIP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(vso->vxlso_sock, &sopt);
+ if (error)
+ return (error);
+
+ /*
+ * BMV: Ideally, there would be a formal way for us to get
+ * the local interface that was selected based on the
+ * imr_interface address. We could then update *ifidx so
+ * vxlan_sockaddr_mc_info_match() would return a match for
+ * later creates that explicitly set the multicast interface.
+ *
+ * If we really need to, we can of course look in the INP's
+ * membership list:
+ * sotoinpcb(vso->vxlso_sock)->inp_moptions->
+ * imo_membership[]->inm_ifp
+ * similarly to imo_match_group().
+ */
+ source->in4.sin_addr = local->in4.sin_addr;
+
+ } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
+ struct ipv6_mreq mreq;
+
+ mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
+ mreq.ipv6mr_interface = *ifidx;
+
+ bzero(&sopt, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IPV6;
+ sopt.sopt_name = IPV6_JOIN_GROUP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(vso->vxlso_sock, &sopt);
+ if (error)
+ return (error);
+
+ /*
+ * BMV: As with IPv4, we would really like to know what
+ * interface in6p_lookup_mcast_ifp() selected.
+ */
+ } else
+ error = EAFNOSUPPORT;
+
+ return (error);
+}
+
+static int
+vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
+ const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
+ int ifidx)
+{
+ struct sockopt sopt;
+ int error;
+
+ bzero(&sopt, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+
+ if (VXLAN_SOCKADDR_IS_IPV4(group)) {
+ struct ip_mreq mreq;
+
+ mreq.imr_multiaddr = group->in4.sin_addr;
+ mreq.imr_interface = source->in4.sin_addr;
+
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = IP_DROP_MEMBERSHIP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(vso->vxlso_sock, &sopt);
+
+ } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
+ struct ipv6_mreq mreq;
+
+ mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
+ mreq.ipv6mr_interface = ifidx;
+
+ sopt.sopt_level = IPPROTO_IPV6;
+ sopt.sopt_name = IPV6_LEAVE_GROUP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(vso->vxlso_sock, &sopt);
+
+ } else
+ error = EAFNOSUPPORT;
+
+ return (error);
+}
+
+static int
+vxlan_socket_mc_add_group(struct vxlan_socket *vso,
+ const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
+ int ifidx, int *idx)
+{
+ union vxlan_sockaddr source;
+ struct vxlan_socket_mc_info *mc;
+ int i, empty, error;
+
+ /*
+ * Within a socket, the same multicast group may be used by multiple
+ * interfaces, each with a different network identifier. But a socket
+ * may only join a multicast group once, so keep track of the users
+ * here.
+ */
+
+ VXLAN_SO_WLOCK(vso);
+ for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
+ mc = &vso->vxlso_mc[i];
+
+ if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
+ empty++;
+ continue;
+ }
+
+ if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
+ goto out;
+ }
+ VXLAN_SO_WUNLOCK(vso);
+
+ if (empty == 0)
+ return (ENOSPC);
+
+ error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
+ if (error)
+ return (error);
+
+ VXLAN_SO_WLOCK(vso);
+ for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
+ mc = &vso->vxlso_mc[i];
+
+ if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
+ vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
+ vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
+ mc->vxlsomc_ifidx = ifidx;
+ goto out;
+ }
+ }
+ VXLAN_SO_WUNLOCK(vso);
+
+ error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
+ MPASS(error == 0);
+
+ return (ENOSPC);
+
+out:
+ mc->vxlsomc_users++;
+ VXLAN_SO_WUNLOCK(vso);
+
+ *idx = i;
+
+ return (0);
+}
+
+static void
+vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
+{
+ union vxlan_sockaddr group, source;
+ struct vxlan_socket_mc_info *mc;
+ int ifidx, leave;
+
+ KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
+ ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
+
+ leave = 0;
+ mc = &vso->vxlso_mc[idx];
+
+ VXLAN_SO_WLOCK(vso);
+ mc->vxlsomc_users--;
+ if (mc->vxlsomc_users == 0) {
+ group = mc->vxlsomc_gaddr;
+ source = mc->vxlsomc_saddr;
+ ifidx = mc->vxlsomc_ifidx;
+ bzero(mc, sizeof(*mc));
+ leave = 1;
+ }
+ VXLAN_SO_WUNLOCK(vso);
+
+ if (leave != 0) {
+ /*
+ * Our socket's membership in this group may have already
+ * been removed if we joined through an interface that's
+ * been detached.
+ */
+ vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
+ }
+}
+
+static struct vxlan_softc *
+vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
+{
+ struct vxlan_softc *sc;
+ uint32_t hash;
+
+ VXLAN_SO_LOCK_ASSERT(vso);
+ hash = VXLAN_SO_VNI_HASH(vni);
+
+ LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
+ if (sc->vxl_vni == vni) {
+ VXLAN_ACQUIRE(sc);
+ break;
+ }
+ }
+
+ return (sc);
+}
+
+static struct vxlan_softc *
+vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
+{
+ struct rm_priotracker tracker;
+ struct vxlan_softc *sc;
+
+ VXLAN_SO_RLOCK(vso, &tracker);
+ sc = vxlan_socket_lookup_softc_locked(vso, vni);
+ VXLAN_SO_RUNLOCK(vso, &tracker);
+
+ return (sc);
+}
+
+static int
+vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
+{
+ struct vxlan_softc *tsc;
+ uint32_t vni, hash;
+
+ vni = sc->vxl_vni;
+ hash = VXLAN_SO_VNI_HASH(vni);
+
+ VXLAN_SO_WLOCK(vso);
+ tsc = vxlan_socket_lookup_softc_locked(vso, vni);
+ if (tsc != NULL) {
+ VXLAN_SO_WUNLOCK(vso);
+ vxlan_release(tsc);
+ return (EEXIST);
+ }
+
+ VXLAN_ACQUIRE(sc);
+ LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
+ VXLAN_SO_WUNLOCK(vso);
+
+ return (0);
+}
+
+static void
+vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
+{
+
+ VXLAN_SO_WLOCK(vso);
+ LIST_REMOVE(sc, vxl_entry);
+ VXLAN_SO_WUNLOCK(vso);
+
+ vxlan_release(sc);
+}
+
+static struct ifnet *
+vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
+{
+ struct ifnet *ifp;
+
+ VXLAN_LOCK_ASSERT(sc);
+
+ if (ipv4 && sc->vxl_im4o != NULL)
+ ifp = sc->vxl_im4o->imo_multicast_ifp;
+ else if (!ipv4 && sc->vxl_im6o != NULL)
+ ifp = sc->vxl_im6o->im6o_multicast_ifp;
+ else
+ ifp = NULL;
+
+ if (ifp != NULL)
+ if_ref(ifp);
+
+ return (ifp);
+}
+
+static void
+vxlan_free_multicast(struct vxlan_softc *sc)
+{
+
+ if (sc->vxl_mc_ifp != NULL) {
+ if_rele(sc->vxl_mc_ifp);
+ sc->vxl_mc_ifp = NULL;
+ sc->vxl_mc_ifindex = 0;
+ }
+
+ if (sc->vxl_im4o != NULL) {
+ free(sc->vxl_im4o, M_VXLAN);
+ sc->vxl_im4o = NULL;
+ }
+
+ if (sc->vxl_im6o != NULL) {
+ free(sc->vxl_im6o, M_VXLAN);
+ sc->vxl_im6o = NULL;
+ }
+}
+
+static int
+vxlan_setup_multicast_interface(struct vxlan_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = ifunit_ref(sc->vxl_mc_ifname);
+ if (ifp == NULL) {
+ if_printf(sc->vxl_ifp, "multicast interfaces %s does "
+ "not exist\n", sc->vxl_mc_ifname);
+ return (ENOENT);
+ }
+
+ if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+ if_printf(sc->vxl_ifp, "interface %s does not support "
+ "multicast\n", sc->vxl_mc_ifname);
+ if_rele(ifp);
+ return (ENOTSUP);
+ }
+
+ sc->vxl_mc_ifp = ifp;
+ sc->vxl_mc_ifindex = ifp->if_index;
+
+ return (0);
+}
+
+static int
+vxlan_setup_multicast(struct vxlan_softc *sc)
+{
+ const union vxlan_sockaddr *group;
+ int error;
+
+ group = &sc->vxl_dst_addr;
+ error = 0;
+
+ if (sc->vxl_mc_ifname[0] != '\0') {
+ error = vxlan_setup_multicast_interface(sc);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * Initialize an multicast options structure that is sufficiently
+ * populated for use in the respective IP output routine. This
+ * structure is typically stored in the socket, but our sockets
+ * may be shared among multiple interfaces.
+ */
+ if (VXLAN_SOCKADDR_IS_IPV4(group)) {
+ sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
+ M_ZERO | M_WAITOK);
+ sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
+ sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
+ sc->vxl_im4o->imo_multicast_vif = -1;
+ } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
+ sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
+ M_ZERO | M_WAITOK);
+ sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
+ sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
+ }
+
+ return (error);
+}
+
+static int
+vxlan_setup_socket(struct vxlan_softc *sc)
+{
+ struct vxlan_socket *vso;
+ struct ifnet *ifp;
+ union vxlan_sockaddr *saddr, *daddr;
+ int multicast, error;
+
+ vso = NULL;
+ ifp = sc->vxl_ifp;
+ saddr = &sc->vxl_src_addr;
+ daddr = &sc->vxl_dst_addr;
+
+ multicast = vxlan_sockaddr_in_multicast(daddr);
+ MPASS(multicast != -1);
+ sc->vxl_vso_mc_index = -1;
+
+ /*
+ * Try to create the socket. If that fails, attempt to use an
+ * existing socket.
+ */
+ error = vxlan_socket_create(ifp, multicast, saddr, &vso);
+ if (error) {
+ if (multicast != 0)
+ vso = vxlan_socket_mc_lookup(saddr);
+ else
+ vso = vxlan_socket_lookup(saddr);
+
+ if (vso == NULL) {
+ if_printf(ifp, "cannot create socket (error: %d), "
+ "and no existing socket found\n", error);
+ goto out;
+ }
+ }
+
+ if (multicast != 0) {
+ error = vxlan_setup_multicast(sc);
+ if (error)
+ goto out;
+
+ error = vxlan_socket_mc_add_group(vso, daddr, saddr,
+ sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
+ if (error)
+ goto out;
+ }
+
+ sc->vxl_sock = vso;
+ error = vxlan_socket_insert_softc(vso, sc);
+ if (error) {
+ sc->vxl_sock = NULL;
+ if_printf(ifp, "network identifier %d already exists in "
+ "this socket\n", sc->vxl_vni);
+ goto out;
+ }
+
+ return (0);
+
+out:
+ if (vso != NULL) {
+ if (sc->vxl_vso_mc_index != -1) {
+ vxlan_socket_mc_release_group_by_idx(vso,
+ sc->vxl_vso_mc_index);
+ sc->vxl_vso_mc_index = -1;
+ }
+ if (multicast != 0)
+ vxlan_free_multicast(sc);
+ vxlan_socket_release(vso);
+ }
+
+ return (error);
+}
+
+static void
+vxlan_setup_interface(struct vxlan_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = sc->vxl_ifp;
+ ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
+
+ if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
+ ifp->if_hdrlen += sizeof(struct ip);
+ else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
+ ifp->if_hdrlen += sizeof(struct ip6_hdr);
+}
+
+static int
+vxlan_valid_init_config(struct vxlan_softc *sc)
+{
+ const char *reason;
+
+ if (vxlan_check_vni(sc->vxl_vni) != 0) {
+ reason = "invalid virtual network identifier specified";
+ goto fail;
+ }
+
+ if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
+ reason = "source address type is not supported";
+ goto fail;
+ }
+
+ if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
+ reason = "destination address type is not supported";
+ goto fail;
+ }
+
+ if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
+ reason = "no valid destination address specified";
+ goto fail;
+ }
+
+ if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
+ sc->vxl_mc_ifname[0] != '\0') {
+ reason = "can only specify interface with a group address";
+ goto fail;
+ }
+
+ if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
+ if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
+ VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
+ reason = "source and destination address must both "
+ "be either IPv4 or IPv6";
+ goto fail;
+ }
+ }
+
+ if (sc->vxl_src_addr.in4.sin_port == 0) {
+ reason = "local port not specified";
+ goto fail;
+ }
+
+ if (sc->vxl_dst_addr.in4.sin_port == 0) {
+ reason = "remote port not specified";
+ goto fail;
+ }
+
+ return (0);
+
+fail:
+ if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
+ return (EINVAL);
+}
+
+static void
+vxlan_init_wait(struct vxlan_softc *sc)
+{
+
+ VXLAN_LOCK_WASSERT(sc);
+ while (sc->vxl_flags & VXLAN_FLAG_INIT)
+ rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
+}
+
+static void
+vxlan_init_complete(struct vxlan_softc *sc)
+{
+
+ VXLAN_WLOCK(sc);
+ sc->vxl_flags &= ~VXLAN_FLAG_INIT;
+ wakeup(sc);
+ VXLAN_WUNLOCK(sc);
+}
+
+static void
+vxlan_init(void *xsc)
+{
+ static const uint8_t empty_mac[ETHER_ADDR_LEN];
+ struct vxlan_softc *sc;
+ struct ifnet *ifp;
+
+ sc = xsc;
+ ifp = sc->vxl_ifp;
+
+ VXLAN_WLOCK(sc);
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ VXLAN_WUNLOCK(sc);
+ return;
+ }
+ sc->vxl_flags |= VXLAN_FLAG_INIT;
+ VXLAN_WUNLOCK(sc);
+
+ if (vxlan_valid_init_config(sc) != 0)
+ goto out;
+
+ vxlan_setup_interface(sc);
+
+ if (vxlan_setup_socket(sc) != 0)
+ goto out;
+
+ /* Initialize the default forwarding entry. */
+ vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
+ &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
+
+ VXLAN_WLOCK(sc);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
+ vxlan_timer, sc);
+ VXLAN_WUNLOCK(sc);
+
+out:
+ vxlan_init_complete(sc);
+}
+
+static void
+vxlan_release(struct vxlan_softc *sc)
+{
+
+ /*
+ * The softc may be destroyed as soon as we release our reference,
+ * so we cannot serialize the wakeup with the softc lock. We use a
+ * timeout in our sleeps so a missed wakeup is unfortunate but not
+ * fatal.
+ */
+ if (VXLAN_RELEASE(sc) != 0)
+ wakeup(sc);
+}
+
+static void
+vxlan_teardown_wait(struct vxlan_softc *sc)
+{
+
+ VXLAN_LOCK_WASSERT(sc);
+ while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
+ rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
+}
+
+static void
+vxlan_teardown_complete(struct vxlan_softc *sc)
+{
+
+ VXLAN_WLOCK(sc);
+ sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
+ wakeup(sc);
+ VXLAN_WUNLOCK(sc);
+}
+
+static void
+vxlan_teardown_locked(struct vxlan_softc *sc)
+{
+ struct ifnet *ifp;
+ struct vxlan_socket *vso;
+
+ ifp = sc->vxl_ifp;
+
+ VXLAN_LOCK_WASSERT(sc);
+ MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
+
+ ifp->if_flags &= ~IFF_UP;
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ callout_stop(&sc->vxl_callout);
+ vso = sc->vxl_sock;
+ sc->vxl_sock = NULL;
+
+ VXLAN_WUNLOCK(sc);
+
+ if (vso != NULL) {
+ vxlan_socket_remove_softc(vso, sc);
+
+ if (sc->vxl_vso_mc_index != -1) {
+ vxlan_socket_mc_release_group_by_idx(vso,
+ sc->vxl_vso_mc_index);
+ sc->vxl_vso_mc_index = -1;
+ }
+ }
+
+ VXLAN_WLOCK(sc);
+ while (sc->vxl_refcnt != 0)
+ rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
+ VXLAN_WUNLOCK(sc);
+
+ callout_drain(&sc->vxl_callout);
+
+ vxlan_free_multicast(sc);
+ if (vso != NULL)
+ vxlan_socket_release(vso);
+
+ vxlan_teardown_complete(sc);
+}
+
+static void
+vxlan_teardown(struct vxlan_softc *sc)
+{
+
+ VXLAN_WLOCK(sc);
+ if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
+ vxlan_teardown_wait(sc);
+ VXLAN_WUNLOCK(sc);
+ return;
+ }
+
+ sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
+ vxlan_teardown_locked(sc);
+}
+
+static void
+vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
+ struct vxlan_softc_head *list)
+{
+
+ VXLAN_WLOCK(sc);
+
+ if (sc->vxl_mc_ifp != ifp)
+ goto out;
+ if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
+ goto out;
+
+ sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
+ LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
+
+out:
+ VXLAN_WUNLOCK(sc);
+}
+
+static void
+vxlan_timer(void *xsc)
+{
+ struct vxlan_softc *sc;
+
+ sc = xsc;
+ VXLAN_LOCK_WASSERT(sc);
+
+ vxlan_ftable_expire(sc);
+ callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
+}
+
+static int
+vxlan_ioctl_ifflags(struct vxlan_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = sc->vxl_ifp;
+
+ if (ifp->if_flags & IFF_UP) {
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ vxlan_init(sc);
+ } else {
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ vxlan_teardown(sc);
+ }
+
+ return (0);
+}
+
+static int
+vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
+{
+ struct rm_priotracker tracker;
+ struct ifvxlancfg *cfg;
+
+ cfg = arg;
+ bzero(cfg, sizeof(*cfg));
+
+ VXLAN_RLOCK(sc, &tracker);
+ cfg->vxlc_vni = sc->vxl_vni;
+ memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
+ sizeof(union vxlan_sockaddr));
+ memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
+ sizeof(union vxlan_sockaddr));
+ cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
+ cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
+ cfg->vxlc_ftable_max = sc->vxl_ftable_max;
+ cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
+ cfg->vxlc_port_min = sc->vxl_min_port;
+ cfg->vxlc_port_max = sc->vxl_max_port;
+ cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
+ cfg->vxlc_ttl = sc->vxl_ttl;
+ VXLAN_RUNLOCK(sc, &tracker);
+
+ return (0);
+}
+
+static int
+vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ sc->vxl_vni = cmd->vxlcmd_vni;
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ union vxlan_sockaddr *vxlsa;
+ int error;
+
+ cmd = arg;
+ vxlsa = &cmd->vxlcmd_sa;
+
+ if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
+ return (EINVAL);
+ if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ union vxlan_sockaddr *vxlsa;
+ int error;
+
+ cmd = arg;
+ vxlsa = &cmd->vxlcmd_sa;
+
+ if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ if (cmd->vxlcmd_port == 0)
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ if (cmd->vxlcmd_port == 0)
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ uint16_t min, max;
+ int error;
+
+ cmd = arg;
+ min = cmd->vxlcmd_port_min;
+ max = cmd->vxlcmd_port_max;
+
+ if (max < min)
+ return (EINVAL);
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ sc->vxl_min_port = min;
+ sc->vxl_max_port = max;
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
+ sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
+ error = 0;
+ } else
+ error = EINVAL;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
+ sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
+ error = 0;
+ } else
+ error = EINVAL;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_can_change_config(sc)) {
+ strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
+ error = 0;
+ } else
+ error = EBUSY;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int error;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
+ sc->vxl_ttl = cmd->vxlcmd_ttl;
+ if (sc->vxl_im4o != NULL)
+ sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
+ if (sc->vxl_im6o != NULL)
+ sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
+ error = 0;
+ } else
+ error = EINVAL;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
+ sc->vxl_flags |= VXLAN_FLAG_LEARN;
+ else
+ sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
+ VXLAN_WUNLOCK(sc);
+
+ return (0);
+}
+
+static int
+vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
+{
+ union vxlan_sockaddr vxlsa;
+ struct ifvxlancmd *cmd;
+ struct vxlan_ftable_entry *fe;
+ int error;
+
+ cmd = arg;
+ vxlsa = cmd->vxlcmd_sa;
+
+ if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
+ return (EINVAL);
+ if (vxlan_sockaddr_in_any(&vxlsa) != 0)
+ return (EINVAL);
+ if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
+ return (EINVAL);
+ /* BMV: We could support both IPv4 and IPv6 later. */
+ if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
+ return (EAFNOSUPPORT);
+
+ fe = vxlan_ftable_entry_alloc();
+ if (fe == NULL)
+ return (ENOMEM);
+
+ if (vxlsa.in4.sin_port == 0)
+ vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
+
+ vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
+ VXLAN_FE_FLAG_STATIC);
+
+ VXLAN_WLOCK(sc);
+ error = vxlan_ftable_entry_insert(sc, fe);
+ VXLAN_WUNLOCK(sc);
+
+ if (error)
+ vxlan_ftable_entry_free(fe);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ struct vxlan_ftable_entry *fe;
+ int error;
+
+ cmd = arg;
+
+ VXLAN_WLOCK(sc);
+ fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
+ if (fe != NULL) {
+ vxlan_ftable_entry_destroy(sc, fe);
+ error = 0;
+ } else
+ error = ENOENT;
+ VXLAN_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
+{
+ struct ifvxlancmd *cmd;
+ int all;
+
+ cmd = arg;
+ all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
+
+ VXLAN_WLOCK(sc);
+ vxlan_ftable_flush(sc, all);
+ VXLAN_WUNLOCK(sc);
+
+ return (0);
+}
+
+static int
+vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
+{
+ const struct vxlan_control *vc;
+ union {
+ struct ifvxlancfg cfg;
+ struct ifvxlancmd cmd;
+ } args;
+ int out, error;
+
+ if (ifd->ifd_cmd >= vxlan_control_table_size)
+ return (EINVAL);
+
+ bzero(&args, sizeof(args));
+ vc = &vxlan_control_table[ifd->ifd_cmd];
+ out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
+
+ if ((get != 0 && out == 0) || (get == 0 && out != 0))
+ return (EINVAL);
+
+ if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
+ error = priv_check(curthread, PRIV_NET_VXLAN);
+ if (error)
+ return (error);
+ }
+
+ if (ifd->ifd_len != vc->vxlc_argsize ||
+ ifd->ifd_len > sizeof(args))
+ return (EINVAL);
+
+ if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
+ error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
+ if (error)
+ return (error);
+ }
+
+ error = vc->vxlc_func(sc, &args);
+ if (error)
+ return (error);
+
+ if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
+ error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
+
+static int
+vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct vxlan_softc *sc;
+ struct ifreq *ifr;
+ struct ifdrv *ifd;
+ int error;
+
+ sc = ifp->if_softc;
+ ifr = (struct ifreq *) data;
+ ifd = (struct ifdrv *) data;
+
+ switch (cmd) {
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ error = 0;
+ break;
+
+ case SIOCGDRVSPEC:
+ case SIOCSDRVSPEC:
+ error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
+ break;
+
+ case SIOCSIFFLAGS:
+ error = vxlan_ioctl_ifflags(sc);
+ break;
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+
+ return (error);
+}
+
+#if defined(INET) || defined(INET6)
+static uint16_t
+vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
+{
+ int range;
+ uint32_t hash;
+
+ range = sc->vxl_max_port - sc->vxl_min_port + 1;
+
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE &&
+ M_HASHTYPE_GET(m) != M_HASHTYPE_OPAQUE)
+ hash = m->m_pkthdr.flowid;
+ else
+ hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
+ sc->vxl_port_hash_key);
+
+ return (sc->vxl_min_port + (hash % range));
+}
+
+static void
+vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
+ uint16_t srcport, uint16_t dstport)
+{
+ struct vxlanudphdr *hdr;
+ struct udphdr *udph;
+ struct vxlan_header *vxh;
+ int len;
+
+ len = m->m_pkthdr.len - ipoff;
+ MPASS(len >= sizeof(struct vxlanudphdr));
+ hdr = mtodo(m, ipoff);
+
+ udph = &hdr->vxlh_udp;
+ udph->uh_sport = srcport;
+ udph->uh_dport = dstport;
+ udph->uh_ulen = htons(len);
+ udph->uh_sum = 0;
+
+ vxh = &hdr->vxlh_hdr;
+ vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
+ vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
+}
+#endif
+
+static int
+vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
+ struct mbuf *m)
+{
+#ifdef INET
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct in_addr srcaddr, dstaddr;
+ uint16_t srcport, dstport;
+ int len, mcast, error;
+
+ ifp = sc->vxl_ifp;
+ srcaddr = sc->vxl_src_addr.in4.sin_addr;
+ srcport = vxlan_pick_source_port(sc, m);
+ dstaddr = fvxlsa->in4.sin_addr;
+ dstport = fvxlsa->in4.sin_port;
+
+ M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
+ M_NOWAIT);
+ if (m == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ }
+
+ len = m->m_pkthdr.len;
+
+ ip = mtod(m, struct ip *);
+ ip->ip_tos = 0;
+ ip->ip_len = htons(len);
+ ip->ip_off = 0;
+ ip->ip_ttl = sc->vxl_ttl;
+ ip->ip_p = IPPROTO_UDP;
+ ip->ip_sum = 0;
+ ip->ip_src = srcaddr;
+ ip->ip_dst = dstaddr;
+
+ vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
+
+ mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
+ m->m_flags &= ~(M_MCAST | M_BCAST);
+
+ error = ip_output(m, NULL, NULL, 0, sc->vxl_im4o, NULL);
+ if (error == 0) {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+ if (mcast != 0)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+ } else
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+
+ return (error);
+#else
+ m_freem(m);
+ return (ENOTSUP);
+#endif
+}
+
+static int
+vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
+ struct mbuf *m)
+{
+#ifdef INET6
+ struct ifnet *ifp;
+ struct ip6_hdr *ip6;
+ const struct in6_addr *srcaddr, *dstaddr;
+ uint16_t srcport, dstport;
+ int len, mcast, error;
+
+ ifp = sc->vxl_ifp;
+ srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
+ srcport = vxlan_pick_source_port(sc, m);
+ dstaddr = &fvxlsa->in6.sin6_addr;
+ dstport = fvxlsa->in6.sin6_port;
+
+ M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
+ M_NOWAIT);
+ if (m == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ }
+
+ len = m->m_pkthdr.len;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */
+ ip6->ip6_vfc = IPV6_VERSION;
+ ip6->ip6_plen = 0;
+ ip6->ip6_nxt = IPPROTO_UDP;
+ ip6->ip6_hlim = sc->vxl_ttl;
+ ip6->ip6_src = *srcaddr;
+ ip6->ip6_dst = *dstaddr;
+
+ vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
+
+ /*
+ * XXX BMV We need support for RFC6935 before we can send and
+ * receive IPv6 UDP packets with a zero checksum.
+ */
+ {
+ struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
+ hdr->uh_sum = in6_cksum_pseudo(ip6,
+ m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ }
+
+ mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
+ m->m_flags &= ~(M_MCAST | M_BCAST);
+
+ error = ip6_output(m, NULL, NULL, 0, sc->vxl_im6o, NULL, NULL);
+ if (error == 0) {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+ if (mcast != 0)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+ } else
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+
+ return (error);
+#else
+ m_freem(m);
+ return (ENOTSUP);
+#endif
+}
+
+static int
+vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ struct rm_priotracker tracker;
+ union vxlan_sockaddr vxlsa;
+ struct vxlan_softc *sc;
+ struct vxlan_ftable_entry *fe;
+ struct ifnet *mcifp;
+ struct ether_header *eh;
+ int ipv4, error;
+
+ sc = ifp->if_softc;
+ eh = mtod(m, struct ether_header *);
+ fe = NULL;
+ mcifp = NULL;
+
+ ETHER_BPF_MTAP(ifp, m);
+
+ VXLAN_RLOCK(sc, &tracker);
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ VXLAN_RUNLOCK(sc, &tracker);
+ m_freem(m);
+ return (ENETDOWN);
+ }
+
+ if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+ fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
+ if (fe == NULL)
+ fe = &sc->vxl_default_fe;
+ vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
+
+ ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
+ if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
+ mcifp = vxlan_multicast_if_ref(sc, ipv4);
+
+ VXLAN_ACQUIRE(sc);
+ VXLAN_RUNLOCK(sc, &tracker);
+
+ if (ipv4 != 0)
+ error = vxlan_encap4(sc, &vxlsa, m);
+ else
+ error = vxlan_encap6(sc, &vxlsa, m);
+
+ vxlan_release(sc);
+ if (mcifp != NULL)
+ if_rele(mcifp);
+
+ return (error);
+}
+
+static void
+vxlan_qflush(struct ifnet *ifp __unused)
+{
+}
+
+static void
+vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
+ const struct sockaddr *srcsa, void *xvso)
+{
+ struct vxlan_socket *vso;
+ struct vxlan_header *vxh, vxlanhdr;
+ uint32_t vni;
+ int error;
+
+ M_ASSERTPKTHDR(m);
+ vso = xvso;
+ offset += sizeof(struct udphdr);
+
+ if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
+ goto out;
+
+ if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
+ m_copydata(m, offset, sizeof(struct vxlan_header),
+ (caddr_t) &vxlanhdr);
+ vxh = &vxlanhdr;
+ } else
+ vxh = mtodo(m, offset);
+
+ /*
+ * Drop if there is a reserved bit set in either the flags or VNI
+ * fields of the header. This goes against the specification, but
+ * a bit set may indicate an unsupported new feature. This matches
+ * the behavior of the Linux implementation.
+ */
+ if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
+ vxh->vxlh_vni & ~htonl(VXLAN_VNI_MASK))
+ goto out;
+
+ vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
+ /* Adjust to the start of the inner Ethernet frame. */
+ m_adj(m, offset + sizeof(struct vxlan_header));
+
+ error = vxlan_input(vso, vni, &m, srcsa);
+ MPASS(error != 0 || m == NULL);
+
+out:
+ if (m != NULL)
+ m_freem(m);
+}
+
+static int
+vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
+ const struct sockaddr *sa)
+{
+ struct vxlan_softc *sc;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ struct ether_header *eh;
+ int error;
+
+ sc = vxlan_socket_lookup_softc(vso, vni);
+ if (sc == NULL)
+ return (ENOENT);
+
+ ifp = sc->vxl_ifp;
+ m = *m0;
+ eh = mtod(m, struct ether_header *);
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ error = ENETDOWN;
+ goto out;
+ } else if (ifp == m->m_pkthdr.rcvif) {
+ /* XXX Does not catch more complex loops. */
+ error = EDEADLK;
+ goto out;
+ }
+
+ if (sc->vxl_flags & VXLAN_FLAG_LEARN)
+ vxlan_ftable_update(sc, sa, eh->ether_shost);
+
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ M_SETFIB(m, ifp->if_fib);
+
+ error = netisr_queue_src(NETISR_ETHER, 0, m);
+ *m0 = NULL;
+
+out:
+ vxlan_release(sc);
+ return (error);
+}
+
+static void
+vxlan_set_default_config(struct vxlan_softc *sc)
+{
+
+ sc->vxl_flags |= VXLAN_FLAG_LEARN;
+
+ sc->vxl_vni = VXLAN_VNI_MAX;
+ sc->vxl_ttl = IPDEFTTL;
+
+ if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
+ sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
+ sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
+ } else {
+ sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
+ sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
+ }
+
+ sc->vxl_min_port = V_ipport_firstauto;
+ sc->vxl_max_port = V_ipport_lastauto;
+
+ sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
+ sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
+}
+
+static int
+vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
+{
+
+#ifndef INET
+ if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
+ VXLAN_PARAM_WITH_REMOTE_ADDR4))
+ return (EAFNOSUPPORT);
+#endif
+
+#ifndef INET6
+ if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
+ VXLAN_PARAM_WITH_REMOTE_ADDR6))
+ return (EAFNOSUPPORT);
+#endif
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
+ if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
+ sc->vxl_vni = vxlp->vxlp_vni;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
+ sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
+ sc->vxl_src_addr.in4.sin_family = AF_INET;
+ sc->vxl_src_addr.in4.sin_addr = vxlp->vxlp_local_in4;
+ } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
+ sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
+ sc->vxl_src_addr.in6.sin6_family = AF_INET6;
+ sc->vxl_src_addr.in6.sin6_addr = vxlp->vxlp_local_in6;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
+ sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
+ sc->vxl_dst_addr.in4.sin_family = AF_INET;
+ sc->vxl_dst_addr.in4.sin_addr = vxlp->vxlp_remote_in4;
+ } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
+ sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
+ sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
+ sc->vxl_dst_addr.in6.sin6_addr = vxlp->vxlp_remote_in6;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
+ sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
+ sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
+ if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
+ sc->vxl_min_port = vxlp->vxlp_min_port;
+ sc->vxl_max_port = vxlp->vxlp_max_port;
+ }
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
+ strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
+ if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
+ sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
+ if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
+ sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
+ if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
+ sc->vxl_ttl = vxlp->vxlp_ttl;
+ }
+
+ if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
+ if (vxlp->vxlp_learn == 0)
+ sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
+ }
+
+ return (0);
+}
+
+static int
+vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct vxlan_softc *sc;
+ struct ifnet *ifp;
+ struct ifvxlanparam vxlp;
+ int error;
+
+ sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
+ sc->vxl_unit = unit;
+ vxlan_set_default_config(sc);
+
+ if (params != 0) {
+ error = copyin(params, &vxlp, sizeof(vxlp));
+ if (error)
+ goto fail;
+
+ error = vxlan_set_user_config(sc, &vxlp);
+ if (error)
+ goto fail;
+ }
+
+ ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ error = ENOSPC;
+ goto fail;
+ }
+
+ sc->vxl_ifp = ifp;
+ rm_init(&sc->vxl_lock, "vxlanrm");
+ callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
+ sc->vxl_port_hash_key = arc4random();
+ vxlan_ftable_init(sc);
+
+ vxlan_sysctl_setup(sc);
+
+ ifp->if_softc = sc;
+ if_initname(ifp, vxlan_name, unit);
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_init = vxlan_init;
+ ifp->if_ioctl = vxlan_ioctl;
+ ifp->if_transmit = vxlan_transmit;
+ ifp->if_qflush = vxlan_qflush;
+
+ vxlan_fakeaddr(sc);
+ ether_ifattach(ifp, sc->vxl_hwaddr);
+
+ ifp->if_baudrate = 0;
+ ifp->if_hdrlen = 0;
+
+ return (0);
+
+fail:
+ free(sc, M_VXLAN);
+ return (error);
+}
+
+static void
+vxlan_clone_destroy(struct ifnet *ifp)
+{
+ struct vxlan_softc *sc;
+
+ sc = ifp->if_softc;
+
+ vxlan_teardown(sc);
+
+ vxlan_ftable_flush(sc, 1);
+
+ ether_ifdetach(ifp);
+ if_free(ifp);
+
+ vxlan_ftable_fini(sc);
+
+ vxlan_sysctl_destroy(sc);
+ rm_destroy(&sc->vxl_lock);
+ free(sc, M_VXLAN);
+}
+
+/* BMV: Taken from if_bridge. */
+static uint32_t
+vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
+{
+ uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
+
+ b += addr[5] << 8;
+ b += addr[4];
+ a += addr[3] << 24;
+ a += addr[2] << 16;
+ a += addr[1] << 8;
+ a += addr[0];
+
+/*
+ * The following hash function is adapted from "Hash Functions" by Bob Jenkins
+ * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ */
+#define mix(a, b, c) \
+do { \
+ a -= b; a -= c; a ^= (c >> 13); \
+ b -= c; b -= a; b ^= (a << 8); \
+ c -= a; c -= b; c ^= (b >> 13); \
+ a -= b; a -= c; a ^= (c >> 12); \
+ b -= c; b -= a; b ^= (a << 16); \
+ c -= a; c -= b; c ^= (b >> 5); \
+ a -= b; a -= c; a ^= (c >> 3); \
+ b -= c; b -= a; b ^= (a << 10); \
+ c -= a; c -= b; c ^= (b >> 15); \
+} while (0)
+
+ mix(a, b, c);
+
+#undef mix
+
+ return (c);
+}
+
+static void
+vxlan_fakeaddr(struct vxlan_softc *sc)
+{
+
+ /*
+ * Generate a non-multicast, locally administered address.
+ *
+ * BMV: Should we use the FreeBSD OUI range instead?
+ */
+ arc4rand(sc->vxl_hwaddr, ETHER_ADDR_LEN, 1);
+ sc->vxl_hwaddr[0] &= ~1;
+ sc->vxl_hwaddr[0] |= 2;
+}
+
+static int
+vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
+ const struct sockaddr *sa)
+{
+
+ return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
+}
+
+static void
+vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
+ const struct sockaddr *sa)
+{
+
+ MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+ bzero(vxladdr, sizeof(*vxladdr));
+
+ if (sa->sa_family == AF_INET) {
+ vxladdr->in4 = *satoconstsin(sa);
+ vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
+ } else if (sa->sa_family == AF_INET6) {
+ vxladdr->in6 = *satoconstsin6(sa);
+ vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
+ }
+}
+
+static int
+vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
+ const struct sockaddr *sa)
+{
+ int equal;
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+ equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+ equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
+ } else
+ equal = 0;
+
+ return (equal);
+}
+
+static void
+vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
+ const struct sockaddr *sa)
+{
+
+ MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+ vxladdr->in4.sin_family = AF_INET;
+ vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
+ vxladdr->in4.sin_addr = *in4;
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+ vxladdr->in6.sin6_family = AF_INET6;
+ vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
+ vxladdr->in6.sin6_addr = *in6;
+ }
+}
+
+static int
+vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
+{
+ const struct sockaddr *sa;
+ int supported;
+
+ sa = &vxladdr->sa;
+ supported = 0;
+
+ if (sa->sa_family == AF_UNSPEC && unspec != 0) {
+ supported = 1;
+ } else if (sa->sa_family == AF_INET) {
+#ifdef INET
+ supported = 1;
+#endif
+ } else if (sa->sa_family == AF_INET6) {
+#ifdef INET6
+ supported = 1;
+#endif
+ }
+
+ return (supported);
+}
+
+static int
+vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
+{
+ const struct sockaddr *sa;
+ int any;
+
+ sa = &vxladdr->sa;
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+ any = in4->s_addr == INADDR_ANY;
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+ any = IN6_IS_ADDR_UNSPECIFIED(in6);
+ } else
+ any = -1;
+
+ return (any);
+}
+
+static int
+vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
+{
+ const struct sockaddr *sa;
+ int mc;
+
+ sa = &vxladdr->sa;
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
+ mc = IN_MULTICAST(ntohl(in4->s_addr));
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
+ mc = IN6_IS_ADDR_MULTICAST(in6);
+ } else
+ mc = -1;
+
+ return (mc);
+}
+
+static int
+vxlan_can_change_config(struct vxlan_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = sc->vxl_ifp;
+ VXLAN_LOCK_ASSERT(sc);
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ return (0);
+ if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
+ return (0);
+
+ return (1);
+}
+
+static int
+vxlan_check_vni(uint32_t vni)
+{
+
+ return (vni >= VXLAN_VNI_MAX);
+}
+
+static int
+vxlan_check_ttl(int ttl)
+{
+
+ return (ttl > MAXTTL);
+}
+
+static int
+vxlan_check_ftable_timeout(uint32_t timeout)
+{
+
+ return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
+}
+
+static int
+vxlan_check_ftable_max(uint32_t max)
+{
+
+ return (max > VXLAN_FTABLE_MAX);
+}
+
+static void
+vxlan_sysctl_setup(struct vxlan_softc *sc)
+{
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *node;
+ struct vxlan_statistics *stats;
+ char namebuf[8];
+
+ ctx = &sc->vxl_sysctl_ctx;
+ stats = &sc->vxl_stats;
+ snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
+
+ sysctl_ctx_init(ctx);
+ sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
+ CTLFLAG_RD, NULL, "");
+
+ node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
+ OID_AUTO, "ftable", CTLFLAG_RD, NULL, "");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
+ CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
+ "Number of entries in fowarding table");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
+ CTLFLAG_RD, &sc->vxl_ftable_max, 0,
+ "Maximum number of entries allowed in fowarding table");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
+ CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
+ "Number of seconds between prunes of the forwarding table");
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
+ sc, 0, vxlan_ftable_sysctl_dump, "A",
+ "Dump the forwarding table entries");
+
+ node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
+ OID_AUTO, "stats", CTLFLAG_RD, NULL, "");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
+ "Fowarding table reached maximum entries");
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "ftable_lock_upgrade_failed", CTLFLAG_RD,
+ &stats->ftable_lock_upgrade_failed, 0,
+ "Forwarding table update required lock upgrade");
+}
+
+static void
+vxlan_sysctl_destroy(struct vxlan_softc *sc)
+{
+
+ sysctl_ctx_free(&sc->vxl_sysctl_ctx);
+ sc->vxl_sysctl_node = NULL;
+}
+
+static int
+vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
+{
+ char path[64];
+
+ snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
+ sc->vxl_unit, knob);
+ TUNABLE_INT_FETCH(path, &def);
+
+ return (def);
+}
+
+static void
+vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
+{
+ struct vxlan_softc_head list;
+ struct vxlan_socket *vso;
+ struct vxlan_softc *sc, *tsc;
+
+ LIST_INIT(&list);
+
+ if (ifp->if_flags & IFF_RENAMING)
+ return;
+ if ((ifp->if_flags & IFF_MULTICAST) == 0)
+ return;
+
+ mtx_lock(&vxlan_list_mtx);
+ LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
+ vxlan_socket_ifdetach(vso, ifp, &list);
+ mtx_unlock(&vxlan_list_mtx);
+
+ LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
+ LIST_REMOVE(sc, vxl_ifdetach_list);
+
+ VXLAN_WLOCK(sc);
+ if (sc->vxl_flags & VXLAN_FLAG_INIT)
+ vxlan_init_wait(sc);
+ vxlan_teardown_locked(sc);
+ }
+}
+
+static void
+vxlan_load(void)
+{
+
+ mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
+ LIST_INIT(&vxlan_socket_list);
+ vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+ vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
+ vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create,
+ vxlan_clone_destroy, 0);
+}
+
+static void
+vxlan_unload(void)
+{
+
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+ vxlan_ifdetach_event_tag);
+ if_clone_detach(vxlan_cloner);
+ mtx_destroy(&vxlan_list_mtx);
+ MPASS(LIST_EMPTY(&vxlan_socket_list));
+}
+
+static int
+vxlan_modevent(module_t mod, int type, void *unused)
+{
+ int error;
+
+ error = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ vxlan_load();
+ break;
+ case MOD_UNLOAD:
+ vxlan_unload();
+ break;
+ default:
+ error = ENOTSUP;
+ break;
+ }
+
+ return (error);
+}
+
+static moduledata_t vxlan_mod = {
+ "if_vxlan",
+ vxlan_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_vxlan, 1);
OpenPOWER on IntegriCloud