summaryrefslogtreecommitdiffstats
path: root/sys/netpfil/ipfw
diff options
context:
space:
mode:
authorae <ae@FreeBSD.org>2017-04-03 08:50:54 +0000
committerae <ae@FreeBSD.org>2017-04-03 08:50:54 +0000
commit1f5f6e71571eeaab683b65e91887222f4a6ece97 (patch)
treeff473621e6678586ebca72423c488eb34dc474e8 /sys/netpfil/ipfw
parentae659ef435156d011821295969f63c5f4dcfb1d2 (diff)
downloadFreeBSD-src-1f5f6e71571eeaab683b65e91887222f4a6ece97.zip
FreeBSD-src-1f5f6e71571eeaab683b65e91887222f4a6ece97.tar.gz
MFC r304041:
Move logging via BPF support into separate file. * make interface cloner VNET-aware; * simplify cloner code and use if_clone_simple(); * migrate LOGIF_LOCK() to rmlock; * add ipfw_bpf_mtap2() function to pass mbuf to BPF; * introduce new additional ipfwlog0 pseudo interface. It differs from ipfw0 by DLT type used in bpfattach. This interface is intended to used by ipfw modules to dump packets with additional info attached. Currently pflog format is used. ipfw_bpf_mtap2() function uses second argument to determine which interface use for dumping. If dlen is equal to ETHER_HDR_LEN it uses old ipfw0 interface, if dlen is equal to PFLOG_HDRLEN - ipfwlog0 will be used. Obtained from: Yandex LLC Sponsored by: Yandex LLC MFC r304043: Add three helper function to manage tables from external modules. ipfw_objhash_lookup_table_kidx does lookup kernel index of table; ipfw_ref_table/ipfw_unref_table takes and releases reference to table. Obtained from: Yandex LLC Sponsored by: Yandex LLC MFC r304046, 304108: Add ipfw_nat64 module that implements stateless and stateful NAT64. The module works together with ipfw(4) and implemented as its external action module. Stateless NAT64 registers external action with name nat64stl. This keyword should be used to create NAT64 instance and to address this instance in rules. Stateless NAT64 uses two lookup tables with mapped IPv4->IPv6 and IPv6->IPv4 addresses to perform translation. A configuration of instance should looks like this: 1. Create lookup tables: # ipfw table T46 create type addr valtype ipv6 # ipfw table T64 create type addr valtype ipv4 2. Fill T46 and T64 tables. 3. Add rule to allow neighbor solicitation and advertisement: # ipfw add allow icmp6 from any to any icmp6types 135,136 4. Create NAT64 instance: # ipfw nat64stl NAT create table4 T46 table6 T64 5. Add rules that matches the traffic: # ipfw add nat64stl NAT ip from any to table(T46) # ipfw add nat64stl NAT ip from table(T64) to 64:ff9b::/96 6. Configure DNS64 for IPv6 clients and add route to 64:ff9b::/96 via NAT64 host. Stateful NAT64 registers external action with name nat64lsn. The only one option required to create nat64lsn instance - prefix4. It defines the pool of IPv4 addresses used for translation. A configuration of instance should looks like this: 1. Add rule to allow neighbor solicitation and advertisement: # ipfw add allow icmp6 from any to any icmp6types 135,136 2. Create NAT64 instance: # ipfw nat64lsn NAT create prefix4 A.B.C.D/28 3. Add rules that matches the traffic: # ipfw add nat64lsn NAT ip from any to A.B.C.D/28 # ipfw add nat64lsn NAT ip6 from any to 64:ff9b::/96 4. Configure DNS64 for IPv6 clients and add route to 64:ff9b::/96 via NAT64 host. Obtained from: Yandex LLC Relnotes: yes Sponsored by: Yandex LLC Differential Revision: https://reviews.freebsd.org/D6434 MFC r304048: Replace __noinline with special debug macro NAT64NOINLINE. MFC r304061: Use %ju to print unsigned 64-bit value. MFC r304076: Make statistics nat64lsn, nat64stl an nptv6 output netstat-like: "@value @description" and fix build due to -Wformat errors. MFC r304378 (by bz): Try to fix gcc compilation errors (which are right). nat64_getlasthdr() returns an int, which can be -1 in case of error, storing the result in an uint8_t and then comparing to < 0 is not helpful. Do what is done in the rest of the code and make proto an int here as well. MFC r309187: Fix ICMPv6 Time Exceeded error message translation. MFC r314718: Use new ipfw_lookup_table() in the nat64 too. MFC r315204,315233: Use memset with structure size.
Diffstat (limited to 'sys/netpfil/ipfw')
-rw-r--r--sys/netpfil/ipfw/ip_fw2.c5
-rw-r--r--sys/netpfil/ipfw/ip_fw_bpf.c209
-rw-r--r--sys/netpfil/ipfw/ip_fw_log.c177
-rw-r--r--sys/netpfil/ipfw/ip_fw_private.h8
-rw-r--r--sys/netpfil/ipfw/ip_fw_table.c51
-rw-r--r--sys/netpfil/ipfw/nat64/ip_fw_nat64.c129
-rw-r--r--sys/netpfil/ipfw/nat64/ip_fw_nat64.h117
-rw-r--r--sys/netpfil/ipfw/nat64/nat64_translate.c1572
-rw-r--r--sys/netpfil/ipfw/nat64/nat64_translate.h116
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn.c1770
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn.h351
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn_control.c917
-rw-r--r--sys/netpfil/ipfw/nat64/nat64stl.c262
-rw-r--r--sys/netpfil/ipfw/nat64/nat64stl.h58
-rw-r--r--sys/netpfil/ipfw/nat64/nat64stl_control.c621
15 files changed, 6190 insertions, 173 deletions
diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c
index 02c0c94..2710aaf 100644
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@@ -2846,6 +2846,7 @@ vnet_ipfw_init(const void *unused)
#ifdef LINEAR_SKIPTO
ipfw_init_skipto_cache(chain);
#endif
+ ipfw_bpf_init(first);
/* First set up some values that are compile time options */
V_ipfw_vnet_ready = 1; /* Open for business */
@@ -2864,7 +2865,6 @@ vnet_ipfw_init(const void *unused)
* is checked on each packet because there are no pfil hooks.
*/
V_ip_fw_ctl_ptr = ipfw_ctl3;
- ipfw_log_bpf(1); /* init */
error = ipfw_attach_hooks(1);
return (error);
}
@@ -2888,8 +2888,6 @@ vnet_ipfw_uninit(const void *unused)
(void)ipfw_attach_hooks(0 /* detach */);
V_ip_fw_ctl_ptr = NULL;
- ipfw_log_bpf(0); /* uninit */
-
last = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
IPFW_UH_WLOCK(chain);
@@ -2918,6 +2916,7 @@ vnet_ipfw_uninit(const void *unused)
IPFW_LOCK_DESTROY(chain);
ipfw_dyn_uninit(1); /* free the remaining parts */
ipfw_destroy_counters();
+ ipfw_bpf_uninit(last);
return (0);
}
diff --git a/sys/netpfil/ipfw/ip_fw_bpf.c b/sys/netpfil/ipfw/ip_fw_bpf.c
new file mode 100644
index 0000000..6f8aa39
--- /dev/null
+++ b/sys/netpfil/ipfw/ip_fw_bpf.c
@@ -0,0 +1,209 @@
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_pflog.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+#include <net/bpf.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_var.h>
+#include <netpfil/ipfw/ip_fw_private.h>
+
+static VNET_DEFINE(struct ifnet *, log_if);
+static VNET_DEFINE(struct ifnet *, pflog_if);
+static VNET_DEFINE(struct if_clone *, ipfw_cloner);
+static VNET_DEFINE(struct if_clone *, ipfwlog_cloner);
+#define V_ipfw_cloner VNET(ipfw_cloner)
+#define V_ipfwlog_cloner VNET(ipfwlog_cloner)
+#define V_log_if VNET(log_if)
+#define V_pflog_if VNET(pflog_if)
+
+static struct rmlock log_if_lock;
+#define LOGIF_LOCK_INIT(x) rm_init(&log_if_lock, "ipfw log_if lock")
+#define LOGIF_LOCK_DESTROY(x) rm_destroy(&log_if_lock)
+#define LOGIF_RLOCK_TRACKER struct rm_priotracker _log_tracker
+#define LOGIF_RLOCK(x) rm_rlock(&log_if_lock, &_log_tracker)
+#define LOGIF_RUNLOCK(x) rm_runlock(&log_if_lock, &_log_tracker)
+#define LOGIF_WLOCK(x) rm_wlock(&log_if_lock)
+#define LOGIF_WUNLOCK(x) rm_wunlock(&log_if_lock)
+
+static const char ipfwname[] = "ipfw";
+static const char ipfwlogname[] = "ipfwlog";
+
+static int
+ipfw_bpf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+{
+
+ return (EINVAL);
+}
+
+static int
+ipfw_bpf_output(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro)
+{
+
+ if (m != NULL)
+ FREE_PKT(m);
+ return (0);
+}
+
+static void
+ipfw_clone_destroy(struct ifnet *ifp)
+{
+
+ LOGIF_WLOCK();
+ if (ifp->if_hdrlen == ETHER_HDR_LEN)
+ V_log_if = NULL;
+ else
+ V_pflog_if = NULL;
+ LOGIF_WUNLOCK();
+
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+}
+
+static int
+ipfw_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct ifnet *ifp;
+
+ ifp = if_alloc(IFT_PFLOG);
+ if (ifp == NULL)
+ return (ENOSPC);
+ if_initname(ifp, ipfwname, unit);
+ ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_mtu = 65536;
+ ifp->if_ioctl = ipfw_bpf_ioctl;
+ ifp->if_output = ipfw_bpf_output;
+ ifp->if_hdrlen = ETHER_HDR_LEN;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
+ LOGIF_WLOCK();
+ if (V_log_if != NULL) {
+ LOGIF_WUNLOCK();
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ return (EEXIST);
+ }
+ V_log_if = ifp;
+ LOGIF_WUNLOCK();
+ return (0);
+}
+
+static int
+ipfwlog_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct ifnet *ifp;
+
+ ifp = if_alloc(IFT_PFLOG);
+ if (ifp == NULL)
+ return (ENOSPC);
+ if_initname(ifp, ipfwlogname, unit);
+ ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_mtu = 65536;
+ ifp->if_ioctl = ipfw_bpf_ioctl;
+ ifp->if_output = ipfw_bpf_output;
+ ifp->if_hdrlen = PFLOG_HDRLEN;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN);
+ LOGIF_WLOCK();
+ if (V_pflog_if != NULL) {
+ LOGIF_WUNLOCK();
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ return (EEXIST);
+ }
+ V_pflog_if = ifp;
+ LOGIF_WUNLOCK();
+ return (0);
+}
+
+void
+ipfw_bpf_mtap2(void *data, u_int dlen, struct mbuf *m)
+{
+ LOGIF_RLOCK_TRACKER;
+
+ LOGIF_RLOCK();
+ if (dlen == ETHER_HDR_LEN) {
+ if (V_log_if == NULL) {
+ LOGIF_RUNLOCK();
+ return;
+ }
+ BPF_MTAP2(V_log_if, data, dlen, m);
+ } else if (dlen == PFLOG_HDRLEN) {
+ if (V_pflog_if == NULL) {
+ LOGIF_RUNLOCK();
+ return;
+ }
+ BPF_MTAP2(V_pflog_if, data, dlen, m);
+ }
+ LOGIF_RUNLOCK();
+}
+
+void
+ipfw_bpf_init(int first)
+{
+
+ if (first) {
+ LOGIF_LOCK_INIT();
+ V_log_if = NULL;
+ V_pflog_if = NULL;
+ }
+ V_ipfw_cloner = if_clone_simple(ipfwname, ipfw_clone_create,
+ ipfw_clone_destroy, 0);
+ V_ipfwlog_cloner = if_clone_simple(ipfwlogname, ipfwlog_clone_create,
+ ipfw_clone_destroy, 0);
+}
+
+void
+ipfw_bpf_uninit(int last)
+{
+
+ if_clone_detach(V_ipfw_cloner);
+ if_clone_detach(V_ipfwlog_cloner);
+ if (last)
+ LOGIF_LOCK_DESTROY();
+}
+
diff --git a/sys/netpfil/ipfw/ip_fw_log.c b/sys/netpfil/ipfw/ip_fw_log.c
index 7ef92cd..a8e53fe 100644
--- a/sys/netpfil/ipfw/ip_fw_log.c
+++ b/sys/netpfil/ipfw/ip_fw_log.c
@@ -40,20 +40,14 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
-#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
-#include <sys/lock.h>
-#include <sys/rwlock.h>
#include <net/ethernet.h> /* for ETHERTYPE_IP */
#include <net/if.h>
#include <net/if_var.h>
-#include <net/if_clone.h>
#include <net/vnet.h>
-#include <net/if_types.h> /* for IFT_PFLOG */
-#include <net/bpf.h> /* for BPF */
#include <netinet/in.h>
#include <netinet/ip.h>
@@ -96,155 +90,6 @@ __FBSDID("$FreeBSD$");
#define SNP(buf) buf, sizeof(buf)
#endif /* !__APPLE__ */
-#ifdef WITHOUT_BPF
-void
-ipfw_log_bpf(int onoff)
-{
-}
-#else /* !WITHOUT_BPF */
-static VNET_DEFINE(struct ifnet *, log_if); /* hook to attach to bpf */
-#define V_log_if VNET(log_if)
-static struct rwlock log_if_lock;
-#define LOGIF_LOCK_INIT(x) rw_init(&log_if_lock, "ipfw log_if lock")
-#define LOGIF_LOCK_DESTROY(x) rw_destroy(&log_if_lock)
-#define LOGIF_RLOCK(x) rw_rlock(&log_if_lock)
-#define LOGIF_RUNLOCK(x) rw_runlock(&log_if_lock)
-#define LOGIF_WLOCK(x) rw_wlock(&log_if_lock)
-#define LOGIF_WUNLOCK(x) rw_wunlock(&log_if_lock)
-
-static const char ipfwname[] = "ipfw";
-
-/* we use this dummy function for all ifnet callbacks */
-static int
-log_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
-{
- return EINVAL;
-}
-
-static int
-ipfw_log_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro)
-{
- if (m != NULL)
- FREE_PKT(m);
- return EINVAL;
-}
-
-static void
-ipfw_log_start(struct ifnet* ifp)
-{
- panic("ipfw_log_start() must not be called");
-}
-
-static const u_char ipfwbroadcastaddr[6] =
- { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-
-static int
-ipfw_log_clone_match(struct if_clone *ifc, const char *name)
-{
-
- return (strncmp(name, ipfwname, sizeof(ipfwname) - 1) == 0);
-}
-
-static int
-ipfw_log_clone_create(struct if_clone *ifc, char *name, size_t len,
- caddr_t params)
-{
- int error;
- int unit;
- struct ifnet *ifp;
-
- error = ifc_name2unit(name, &unit);
- if (error)
- return (error);
-
- error = ifc_alloc_unit(ifc, &unit);
- if (error)
- return (error);
-
- ifp = if_alloc(IFT_PFLOG);
- if (ifp == NULL) {
- ifc_free_unit(ifc, unit);
- return (ENOSPC);
- }
- ifp->if_dname = ipfwname;
- ifp->if_dunit = unit;
- snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", ipfwname, unit);
- strlcpy(name, ifp->if_xname, len);
- ifp->if_mtu = 65536;
- ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_init = (void *)log_dummy;
- ifp->if_ioctl = log_dummy;
- ifp->if_start = ipfw_log_start;
- ifp->if_output = ipfw_log_output;
- ifp->if_addrlen = 6;
- ifp->if_hdrlen = 14;
- ifp->if_broadcastaddr = ipfwbroadcastaddr;
- ifp->if_baudrate = IF_Mbps(10);
-
- LOGIF_WLOCK();
- if (V_log_if == NULL)
- V_log_if = ifp;
- else {
- LOGIF_WUNLOCK();
- if_free(ifp);
- ifc_free_unit(ifc, unit);
- return (EEXIST);
- }
- LOGIF_WUNLOCK();
- if_attach(ifp);
- bpfattach(ifp, DLT_EN10MB, 14);
-
- return (0);
-}
-
-static int
-ipfw_log_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
-{
- int unit;
-
- if (ifp == NULL)
- return (0);
-
- LOGIF_WLOCK();
- if (V_log_if != NULL && ifp == V_log_if)
- V_log_if = NULL;
- else {
- LOGIF_WUNLOCK();
- return (EINVAL);
- }
- LOGIF_WUNLOCK();
-
- unit = ifp->if_dunit;
- bpfdetach(ifp);
- if_detach(ifp);
- if_free(ifp);
- ifc_free_unit(ifc, unit);
-
- return (0);
-}
-
-static VNET_DEFINE(struct if_clone *, ipfw_log_cloner);
-#define V_ipfw_log_cloner VNET(ipfw_log_cloner)
-
-void
-ipfw_log_bpf(int onoff)
-{
-
- if (onoff) {
- if (IS_DEFAULT_VNET(curvnet))
- LOGIF_LOCK_INIT();
- V_ipfw_log_cloner = if_clone_advanced(ipfwname, 0,
- ipfw_log_clone_match, ipfw_log_clone_create,
- ipfw_log_clone_destroy);
- } else {
- if_clone_detach(V_ipfw_log_cloner);
- if (IS_DEFAULT_VNET(curvnet))
- LOGIF_LOCK_DESTROY();
- }
-}
-#endif /* !WITHOUT_BPF */
-
#define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f)
/*
* We enter here when we have a rule with O_LOG.
@@ -260,29 +105,23 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
char action2[92], proto[128], fragment[32];
if (V_fw_verbose == 0) {
-#ifndef WITHOUT_BPF
- LOGIF_RLOCK();
- if (V_log_if == NULL || V_log_if->if_bpf == NULL) {
- LOGIF_RUNLOCK();
- return;
- }
-
if (args->eh) /* layer2, use orig hdr */
- BPF_MTAP2(V_log_if, args->eh, ETHER_HDR_LEN, m);
+ ipfw_bpf_mtap2(args->eh, ETHER_HDR_LEN, m);
else {
/* Add fake header. Later we will store
* more info in the header.
*/
if (ip->ip_v == 4)
- BPF_MTAP2(V_log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
- else if (ip->ip_v == 6)
- BPF_MTAP2(V_log_if, "DDDDDDSSSSSS\x86\xdd", ETHER_HDR_LEN, m);
+ ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00",
+ ETHER_HDR_LEN, m);
+ else if (ip->ip_v == 6)
+ ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd",
+ ETHER_HDR_LEN, m);
else
/* Obviously bogus EtherType. */
- BPF_MTAP2(V_log_if, "DDDDDDSSSSSS\xff\xff", ETHER_HDR_LEN, m);
+ ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff",
+ ETHER_HDR_LEN, m);
}
- LOGIF_RUNLOCK();
-#endif /* !WITHOUT_BPF */
return;
}
/* the old 'log' function */
diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h
index bbc0114..d670a49 100644
--- a/sys/netpfil/ipfw/ip_fw_private.h
+++ b/sys/netpfil/ipfw/ip_fw_private.h
@@ -154,7 +154,9 @@ void ipfw_nat_destroy(void);
/* In ip_fw_log.c */
struct ip;
struct ip_fw_chain;
-void ipfw_log_bpf(int);
+void ipfw_bpf_init(int);
+void ipfw_bpf_uninit(int);
+void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif,
u_short offset, uint32_t tablearg, struct ip *ip);
@@ -741,6 +743,10 @@ typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen,
int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
void *paddr, uint32_t *val);
+struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch,
+ uint16_t kidx);
+int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx);
+void ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx);
int ipfw_init_tables(struct ip_fw_chain *ch, int first);
int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables);
int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets);
diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c
index 7638b17..313abda 100644
--- a/sys/netpfil/ipfw/ip_fw_table.c
+++ b/sys/netpfil/ipfw/ip_fw_table.c
@@ -1606,6 +1606,57 @@ ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
}
/*
+ * Lookup table's named object by its @kidx.
+ */
+struct named_object *
+ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx)
+{
+
+ return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
+}
+
+/*
+ * Take reference to table specified in @ntlv.
+ * On success return its @kidx.
+ */
+int
+ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx)
+{
+ struct tid_info ti;
+ struct table_config *tc;
+ int error;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ntlv_to_ti(ntlv, &ti);
+ error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc);
+ if (error != 0)
+ return (error);
+
+ if (tc == NULL)
+ return (ESRCH);
+
+ tc_ref(tc);
+ *kidx = tc->no.kidx;
+
+ return (0);
+}
+
+void
+ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx)
+{
+
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_NI(ch);
+ no = ipfw_objhash_lookup_kidx(ni, kidx);
+ KASSERT(no != NULL, ("Table with index %d not found", kidx));
+ no->refcnt--;
+}
+
+/*
* Lookup an arbtrary key @paddr of legth @plen in table @tbl.
* Stores found value in @val.
*
diff --git a/sys/netpfil/ipfw/nat64/ip_fw_nat64.c b/sys/netpfil/ipfw/nat64/ip_fw_nat64.c
new file mode 100644
index 0000000..58c4427
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/ip_fw_nat64.c
@@ -0,0 +1,129 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+
+
+int nat64_debug = 0;
+SYSCTL_DECL(_net_inet_ip_fw);
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_debug, CTLFLAG_RW,
+ &nat64_debug, 0, "Debug level for NAT64 module");
+
+int nat64_allow_private = 0;
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_allow_private, CTLFLAG_RW,
+ &nat64_allow_private, 0,
+ "Allow use of non-global IPv4 addresses with NAT64");
+
+static int
+vnet_ipfw_nat64_init(const void *arg __unused)
+{
+ struct ip_fw_chain *ch;
+ int first, error;
+
+ ch = &V_layer3_chain;
+ first = IS_DEFAULT_VNET(curvnet) ? 1: 0;
+ error = nat64stl_init(ch, first);
+ if (error != 0)
+ return (error);
+ error = nat64lsn_init(ch, first);
+ if (error != 0) {
+ nat64stl_uninit(ch, first);
+ return (error);
+ }
+ return (0);
+}
+
+static int
+vnet_ipfw_nat64_uninit(const void *arg __unused)
+{
+ struct ip_fw_chain *ch;
+ int last;
+
+ ch = &V_layer3_chain;
+ last = IS_DEFAULT_VNET(curvnet) ? 1: 0;
+ nat64stl_uninit(ch, last);
+ nat64lsn_uninit(ch, last);
+ return (0);
+}
+
+static int
+ipfw_nat64_modevent(module_t mod, int type, void *unused)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t ipfw_nat64_mod = {
+ "ipfw_nat64",
+ ipfw_nat64_modevent,
+ 0
+};
+
+/* Define startup order. */
+#define IPFW_NAT64_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
+#define IPFW_NAT64_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */
+#define IPFW_NAT64_MODULE_ORDER (IPFW_NAT64_MODEVENT_ORDER + 1)
+#define IPFW_NAT64_VNET_ORDER (IPFW_NAT64_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nat64, ipfw_nat64_mod, IPFW_NAT64_SI_SUB_FIREWALL,
+ SI_ORDER_ANY);
+MODULE_DEPEND(ipfw_nat64, ipfw, 3, 3, 3);
+MODULE_VERSION(ipfw_nat64, 1);
+
+VNET_SYSINIT(vnet_ipfw_nat64_init, IPFW_NAT64_SI_SUB_FIREWALL,
+ IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_init, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nat64_uninit, IPFW_NAT64_SI_SUB_FIREWALL,
+ IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_uninit, NULL);
diff --git a/sys/netpfil/ipfw/nat64/ip_fw_nat64.h b/sys/netpfil/ipfw/nat64/ip_fw_nat64.h
new file mode 100644
index 0000000..1d2bb77
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/ip_fw_nat64.h
@@ -0,0 +1,117 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64_H_
+#define _IP_FW_NAT64_H_
+
+#define DPRINTF(mask, fmt, ...) \
+ if (nat64_debug & (mask)) \
+ printf("NAT64: %s: " fmt "\n", __func__, ## __VA_ARGS__)
+#define DP_GENERIC 0x0001
+#define DP_OBJ 0x0002
+#define DP_JQUEUE 0x0004
+#define DP_STATE 0x0008
+#define DP_DROPS 0x0010
+#define DP_ALL 0xFFFF
+extern int nat64_debug;
+
+#if 0
+#define NAT64NOINLINE __noinline
+#else
+#define NAT64NOINLINE
+#endif
+
+int nat64stl_init(struct ip_fw_chain *ch, int first);
+void nat64stl_uninit(struct ip_fw_chain *ch, int last);
+int nat64lsn_init(struct ip_fw_chain *ch, int first);
+void nat64lsn_uninit(struct ip_fw_chain *ch, int last);
+
+struct ip_fw_nat64_stats {
+ counter_u64_t opcnt64; /* 6to4 of packets translated */
+ counter_u64_t opcnt46; /* 4to6 of packets translated */
+ counter_u64_t ofrags; /* number of fragments generated */
+ counter_u64_t ifrags; /* number of fragments received */
+ counter_u64_t oerrors; /* number of output errors */
+ counter_u64_t noroute4;
+ counter_u64_t noroute6;
+ counter_u64_t nomatch4; /* No addr/port match */
+ counter_u64_t noproto; /* Protocol not supported */
+ counter_u64_t nomem; /* mbufs allocation failed */
+ counter_u64_t dropped; /* number of packets silently
+ * dropped due to some errors/
+ * unsupported/etc.
+ */
+
+ counter_u64_t jrequests; /* number of jobs requests queued */
+ counter_u64_t jcalls; /* number of jobs handler calls */
+ counter_u64_t jhostsreq; /* number of hosts requests */
+ counter_u64_t jportreq;
+ counter_u64_t jhostfails;
+ counter_u64_t jportfails;
+ counter_u64_t jmaxlen;
+ counter_u64_t jnomem;
+ counter_u64_t jreinjected;
+
+ counter_u64_t screated;
+ counter_u64_t sdeleted;
+ counter_u64_t spgcreated;
+ counter_u64_t spgdeleted;
+};
+
+#define IPFW_NAT64_VERSION 1
+#define NAT64STATS (sizeof(struct ip_fw_nat64_stats) / sizeof(uint64_t))
+typedef struct _nat64_stats_block {
+ counter_u64_t stats[NAT64STATS];
+} nat64_stats_block;
+#define NAT64STAT_ADD(s, f, v) \
+ counter_u64_add((s)->stats[ \
+ offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)], (v))
+#define NAT64STAT_INC(s, f) NAT64STAT_ADD(s, f, 1)
+#define NAT64STAT_FETCH(s, f) \
+ counter_u64_fetch((s)->stats[ \
+ offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)])
+
+#define L3HDR(_ip, _t) ((_t)((u_int32_t *)(_ip) + (_ip)->ip_hl))
+#define TCP(p) ((struct tcphdr *)(p))
+#define UDP(p) ((struct udphdr *)(p))
+#define ICMP(p) ((struct icmphdr *)(p))
+#define ICMP6(p) ((struct icmp6_hdr *)(p))
+
+#define NAT64SKIP 0
+#define NAT64RETURN 1
+#define NAT64MFREE -1
+
+/* Well-known prefix 64:ff9b::/96 */
+#define IPV6_ADDR_INT32_WKPFX htonl(0x64ff9b)
+#define IN6_IS_ADDR_WKPFX(a) \
+ ((a)->s6_addr32[0] == IPV6_ADDR_INT32_WKPFX && \
+ (a)->s6_addr32[1] == 0 && (a)->s6_addr32[2] == 0)
+
+#endif
+
diff --git a/sys/netpfil/ipfw/nat64/nat64_translate.c b/sys/netpfil/ipfw/nat64/nat64_translate.c
new file mode 100644
index 0000000..aefd0f9
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64_translate.c
@@ -0,0 +1,1572 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_ipfw.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <machine/in_cksum.h>
+
+static void
+nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
+{
+
+ logdata->dir = PF_OUT;
+ logdata->af = family;
+ ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
+}
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+static NAT64NOINLINE struct sockaddr* nat64_find_route4(struct route *ro,
+ in_addr_t dest, struct mbuf *m);
+static NAT64NOINLINE struct sockaddr* nat64_find_route6(struct route_in6 *ro,
+ struct in6_addr *dest, struct mbuf *m);
+
+static NAT64NOINLINE int
+nat64_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
+ void *logdata)
+{
+ int error;
+
+ if (logdata != NULL)
+ nat64_log(logdata, m, dst->sa_family);
+ error = (*ifp->if_output)(ifp, m, dst, ro);
+ if (error != 0)
+ NAT64STAT_INC(stats, oerrors);
+ return (error);
+}
+
+static NAT64NOINLINE int
+nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
+{
+ struct route_in6 ro6;
+ struct route ro4, *ro;
+ struct sockaddr *dst;
+ struct ifnet *ifp;
+ struct ip6_hdr *ip6;
+ struct ip *ip4;
+ int error;
+
+ ip4 = mtod(m, struct ip *);
+ switch (ip4->ip_v) {
+ case IPVERSION:
+ ro = &ro4;
+ dst = nat64_find_route4(&ro4, ip4->ip_dst.s_addr, m);
+ if (dst == NULL)
+ NAT64STAT_INC(stats, noroute4);
+ break;
+ case (IPV6_VERSION >> 4):
+ ip6 = (struct ip6_hdr *)ip4;
+ ro = (struct route *)&ro6;
+ dst = nat64_find_route6(&ro6, &ip6->ip6_dst, m);
+ if (dst == NULL)
+ NAT64STAT_INC(stats, noroute6);
+ break;
+ default:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "dropped due to unknown IP version");
+ return (EAFNOSUPPORT);
+ }
+ if (dst == NULL) {
+ FREE_ROUTE(ro);
+ m_freem(m);
+ return (EHOSTUNREACH);
+ }
+ if (logdata != NULL)
+ nat64_log(logdata, m, dst->sa_family);
+ ifp = ro->ro_rt->rt_ifp;
+ error = (*ifp->if_output)(ifp, m, dst, ro);
+ if (error != 0)
+ NAT64STAT_INC(stats, oerrors);
+ FREE_ROUTE(ro);
+ return (error);
+}
+#else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
+static NAT64NOINLINE int
+nat64_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
+ void *logdata)
+{
+ struct ip *ip4;
+ int ret, af;
+
+ ip4 = mtod(m, struct ip *);
+ switch (ip4->ip_v) {
+ case IPVERSION:
+ af = AF_INET;
+ ret = NETISR_IP;
+ break;
+ case (IPV6_VERSION >> 4):
+ af = AF_INET6;
+ ret = NETISR_IPV6;
+ break;
+ default:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "unknown IP version");
+ return (EAFNOSUPPORT);
+ }
+ if (logdata != NULL)
+ nat64_log(logdata, m, af);
+ ret = netisr_queue(ret, m);
+ if (ret != 0)
+ NAT64STAT_INC(stats, oerrors);
+ return (ret);
+}
+
+static NAT64NOINLINE int
+nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
+{
+
+ return (nat64_output(NULL, m, NULL, NULL, stats, logdata));
+}
+#endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
+
+
+#if 0
+void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize);
+
+void
+print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize)
+{
+ char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf));
+ inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf));
+ snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt);
+}
+
+
+static NAT64NOINLINE int
+nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6)
+{
+
+ /* assume the prefix is properly filled with zeros */
+ bcopy(&cfg->prefix, ip6, sizeof(*ip6));
+ switch (cfg->plen) {
+ case 32:
+ case 96:
+ ip6->s6_addr32[cfg->plen / 32] = ia;
+ break;
+ case 40:
+ case 48:
+ case 56:
+#if BYTE_ORDER == BIG_ENDIAN
+ ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
+ (ia >> (cfg->plen % 32));
+ ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
+ (ia << (cfg->plen % 32));
+ ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32);
+#endif
+ break;
+ case 64:
+#if BYTE_ORDER == BIG_ENDIAN
+ ip6->s6_addr32[2] = ia >> 8;
+ ip6->s6_addr32[3] = ia << 24;
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ip6->s6_addr32[2] = ia << 8;
+ ip6->s6_addr32[3] = ia >> 24;
+#endif
+ break;
+ default:
+ return (0);
+ };
+ ip6->s6_addr8[8] = 0;
+ return (1);
+}
+
+static NAT64NOINLINE in_addr_t
+nat64_extract_ip4(struct in6_addr *ip6, int plen)
+{
+ in_addr_t ia;
+
+ /*
+ * According to RFC 6052 p2.2:
+ * IPv4-embedded IPv6 addresses are composed of a variable-length
+ * prefix, the embedded IPv4 address, and a variable length suffix.
+ * The suffix bits are reserved for future extensions and SHOULD
+ * be set to zero.
+ */
+ switch (plen) {
+ case 32:
+ if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
+ goto badip6;
+ break;
+ case 40:
+ if (ip6->s6_addr32[3] != 0 ||
+ (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
+ goto badip6;
+ break;
+ case 48:
+ if (ip6->s6_addr32[3] != 0 ||
+ (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
+ goto badip6;
+ break;
+ case 56:
+ if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
+ goto badip6;
+ break;
+ case 64:
+ if (ip6->s6_addr8[8] != 0 ||
+ (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
+ goto badip6;
+ };
+ switch (plen) {
+ case 32:
+ case 96:
+ ia = ip6->s6_addr32[plen / 32];
+ break;
+ case 40:
+ case 48:
+ case 56:
+#if BYTE_ORDER == BIG_ENDIAN
+ ia = (ip6->s6_addr32[1] << (plen % 32)) |
+ (ip6->s6_addr32[2] >> (24 - plen % 32));
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ia = (ip6->s6_addr32[1] >> (plen % 32)) |
+ (ip6->s6_addr32[2] << (24 - plen % 32));
+#endif
+ break;
+ case 64:
+#if BYTE_ORDER == BIG_ENDIAN
+ ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
+#endif
+ break;
+ default:
+ return (0);
+ };
+ if (nat64_check_ip4(ia) != 0 ||
+ nat64_check_private_ip4(ia) != 0)
+ goto badip4;
+
+ return (ia);
+badip4:
+ DPRINTF(DP_GENERIC, "invalid destination address: %08x", ia);
+ return (0);
+badip6:
+ DPRINTF(DP_GENERIC, "invalid IPv4-embedded IPv6 address");
+ return (0);
+}
+#endif
+
+/*
+ * According to RFC 1624 the equation for incremental checksum update is:
+ * HC' = ~(~HC + ~m + m') -- [Eqn. 3]
+ * HC' = HC - ~m - m' -- [Eqn. 4]
+ * So, when we are replacing IPv4 addresses to IPv6, we
+ * can assume, that new bytes previously were zeros, and vise versa -
+ * when we replacing IPv6 addresses to IPv4, now unused bytes become
+ * zeros. The payload length in pseudo header has bigger size, but one
+ * half of it should be zero. Using the equation 4 we get:
+ * HC' = HC - (~m0 + m0') -- m0 is first changed word
+ * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word
+ * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
+ * = HC - sum(~m[i] + m'[i])
+ *
+ * The function result should be used as follows:
+ * IPv6 to IPv4: HC' = cksum_add(HC, result)
+ * IPv4 to IPv6: HC' = cksum_add(HC, ~result)
+ */
+static NAT64NOINLINE uint16_t
+nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
+{
+ uint32_t sum;
+ uint16_t *p;
+
+ sum = ~ip->ip_src.s_addr >> 16;
+ sum += ~ip->ip_src.s_addr & 0xffff;
+ sum += ~ip->ip_dst.s_addr >> 16;
+ sum += ~ip->ip_dst.s_addr & 0xffff;
+
+ for (p = (uint16_t *)&ip6->ip6_src;
+ p < (uint16_t *)(&ip6->ip6_src + 2); p++)
+ sum += *p;
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (sum);
+}
+
+#if __FreeBSD_version < 1100000
+#define ip_fillid(ip) (ip)->ip_id = ip_newid()
+#endif
+static NAT64NOINLINE void
+nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
+ uint16_t plen, uint8_t proto, struct ip *ip)
+{
+
+ /* assume addresses are already initialized */
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ ip->ip_len = htons(sizeof(*ip) + plen);
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+ ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC;
+#else
+ /* Forwarding code will decrement TTL. */
+ ip->ip_ttl = ip6->ip6_hlim;
+#endif
+ ip->ip_sum = 0;
+ ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
+ ip_fillid(ip);
+ if (frag != NULL) {
+ ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
+ if (frag->ip6f_offlg & IP6F_MORE_FRAG)
+ ip->ip_off |= htons(IP_MF);
+ } else {
+ ip->ip_off = htons(IP_DF);
+ }
+ ip->ip_sum = in_cksum_hdr(ip);
+}
+
+#define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
+static NAT64NOINLINE int
+nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq,
+ struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off)
+{
+ struct ip6_frag ip6f;
+ struct mbuf *n;
+ uint16_t hlen, len, offset;
+ int plen;
+
+ plen = ntohs(ip6->ip6_plen);
+ hlen = sizeof(struct ip6_hdr);
+
+ /* Fragmentation isn't needed */
+ if (ip_off == 0 && plen <= mtu - hlen) {
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (ENOMEM);
+ }
+ bcopy(ip6, mtod(m, void *), hlen);
+ if (mbufq_enqueue(mq, m) != 0) {
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
+ return (ENOBUFS);
+ }
+ return (0);
+ }
+
+ hlen += sizeof(struct ip6_frag);
+ ip6f.ip6f_reserved = 0;
+ ip6f.ip6f_nxt = ip6->ip6_nxt;
+ ip6->ip6_nxt = IPPROTO_FRAGMENT;
+ if (ip_off != 0) {
+ /*
+ * We have got an IPv4 fragment.
+ * Use offset value and ip_id from original fragment.
+ */
+ ip6f.ip6f_ident = htonl(ntohs(ip_id));
+ offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
+ NAT64STAT_INC(stats, ifrags);
+ } else {
+ /* The packet size exceeds interface MTU */
+ ip6f.ip6f_ident = htonl(ip6_randomid());
+ offset = 0; /* First fragment*/
+ }
+ while (plen > 0 && m != NULL) {
+ n = NULL;
+ len = FRAGSZ(mtu) & ~7;
+ if (len > plen)
+ len = plen;
+ ip6->ip6_plen = htons(len + sizeof(ip6f));
+ ip6f.ip6f_offlg = ntohs(offset);
+ if (len < plen || (ip_off & htons(IP_MF)) != 0)
+ ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
+ offset += len;
+ plen -= len;
+ if (plen > 0) {
+ n = m_split(m, len, M_NOWAIT);
+ if (n == NULL)
+ goto fail;
+ }
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL)
+ goto fail;
+ bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
+ bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
+ sizeof(struct ip6_frag));
+ if (mbufq_enqueue(mq, m) != 0)
+ goto fail;
+ m = n;
+ }
+ NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
+ return (0);
+fail:
+ if (m != NULL)
+ m_freem(m);
+ if (n != NULL)
+ m_freem(n);
+ mbufq_drain(mq);
+ NAT64STAT_INC(stats, nomem);
+ return (ENOMEM);
+}
+
+#if __FreeBSD_version < 1100000
+#define rt_expire rt_rmx.rmx_expire
+#define rt_mtu rt_rmx.rmx_mtu
+#endif
+static NAT64NOINLINE struct sockaddr*
+nat64_find_route6(struct route_in6 *ro, struct in6_addr *dest, struct mbuf *m)
+{
+ struct sockaddr_in6 *dst;
+ struct rtentry *rt;
+
+ bzero(ro, sizeof(*ro));
+ dst = (struct sockaddr_in6 *)&ro->ro_dst;
+ dst->sin6_family = AF_INET6;
+ dst->sin6_len = sizeof(*dst);
+ dst->sin6_addr = *dest;
+ IN6_LOOKUP_ROUTE(ro, M_GETFIB(m));
+ rt = ro->ro_rt;
+ if (rt && (rt->rt_flags & RTF_UP) &&
+ (rt->rt_ifp->if_flags & IFF_UP) &&
+ (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ dst = (struct sockaddr_in6 *)rt->rt_gateway;
+ } else
+ return (NULL);
+ if (((rt->rt_flags & RTF_REJECT) &&
+ (rt->rt_expire == 0 ||
+ time_uptime < rt->rt_expire)) ||
+ rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
+ return (NULL);
+ return ((struct sockaddr *)dst);
+}
+
+#define NAT64_ICMP6_PLEN 64
+static NAT64NOINLINE void
+nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
+ nat64_stats_block *stats, void *logdata)
+{
+ struct icmp6_hdr *icmp6;
+ struct ip6_hdr *ip6, *oip6;
+ struct mbuf *n;
+ int len, plen;
+
+ len = 0;
+ plen = nat64_getlasthdr(m, &len);
+ if (plen < 0) {
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ goto freeit;
+ }
+ /*
+ * Do not send ICMPv6 in reply to ICMPv6 errors.
+ */
+ if (plen == IPPROTO_ICMPV6) {
+ if (m->m_len < len + sizeof(*icmp6)) {
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ goto freeit;
+ }
+ icmp6 = mtodo(m, len);
+ if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
+ icmp6->icmp6_type == ND_REDIRECT) {
+ DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
+ "ICMPv6 errors");
+ goto freeit;
+ }
+ }
+ /*
+ if (icmp6_ratelimit(&ip6->ip6_src, type, code))
+ goto freeit;
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ switch (type) {
+ case ICMP6_DST_UNREACH:
+ case ICMP6_PACKET_TOO_BIG:
+ case ICMP6_TIME_EXCEEDED:
+ case ICMP6_PARAM_PROB:
+ break;
+ default:
+ goto freeit;
+ }
+ /* Calculate length of ICMPv6 payload */
+ len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
+ m->m_pkthdr.len;
+
+ /* Create new ICMPv6 datagram */
+ plen = len + sizeof(struct icmp6_hdr);
+ n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
+ MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return;
+ }
+ /*
+ * Move pkthdr from original mbuf. We should have initialized some
+ * fields, because we can reinject this mbuf to netisr and it will
+ * go trough input path (it requires at least rcvif should be set).
+ * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
+ * in the chain, when we will do M_PREPEND() or make some type of
+ * tunneling.
+ */
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
+
+ n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
+ oip6 = mtod(n, struct ip6_hdr *);
+ oip6->ip6_src = ip6->ip6_dst;
+ oip6->ip6_dst = ip6->ip6_src;
+ oip6->ip6_nxt = IPPROTO_ICMPV6;
+ oip6->ip6_flow = 0;
+ oip6->ip6_vfc |= IPV6_VERSION;
+ oip6->ip6_hlim = V_ip6_defhlim;
+ oip6->ip6_plen = htons(plen);
+
+ icmp6 = mtodo(n, sizeof(struct ip6_hdr));
+ icmp6->icmp6_cksum = 0;
+ icmp6->icmp6_type = type;
+ icmp6->icmp6_code = code;
+ icmp6->icmp6_mtu = htonl(mtu);
+
+ m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
+ sizeof(struct icmp6_hdr)));
+ icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
+ sizeof(struct ip6_hdr), plen);
+ m_freem(m);
+ nat64_output_one(n, stats, logdata);
+ return;
+freeit:
+ NAT64STAT_INC(stats, dropped);
+ m_freem(m);
+}
+
+static NAT64NOINLINE struct sockaddr*
+nat64_find_route4(struct route *ro, in_addr_t dest, struct mbuf *m)
+{
+ struct sockaddr_in *dst;
+ struct rtentry *rt;
+
+ bzero(ro, sizeof(*ro));
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr.s_addr = dest;
+ IN_LOOKUP_ROUTE(ro, M_GETFIB(m));
+ rt = ro->ro_rt;
+ if (rt && (rt->rt_flags & RTF_UP) &&
+ (rt->rt_ifp->if_flags & IFF_UP) &&
+ (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ dst = (struct sockaddr_in *)rt->rt_gateway;
+ } else
+ return (NULL);
+ if (((rt->rt_flags & RTF_REJECT) &&
+ (rt->rt_expire == 0 ||
+ time_uptime < rt->rt_expire)) ||
+ rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
+ return (NULL);
+ return ((struct sockaddr *)dst);
+}
+
+#define NAT64_ICMP_PLEN 64
+static NAT64NOINLINE void
+nat64_icmp_reflect(struct mbuf *m, uint8_t type,
+ uint8_t code, uint16_t mtu, nat64_stats_block *stats, void *logdata)
+{
+ struct icmp *icmp;
+ struct ip *ip, *oip;
+ struct mbuf *n;
+ int len, plen;
+
+ ip = mtod(m, struct ip *);
+ /* Do not send ICMP error if packet is not the first fragment */
+ if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
+ DPRINTF(DP_DROPS, "not first fragment");
+ goto freeit;
+ }
+ /* Do not send ICMP in reply to ICMP errors */
+ if (ip->ip_p == IPPROTO_ICMP) {
+ if (m->m_len < (ip->ip_hl << 2)) {
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ goto freeit;
+ }
+ icmp = mtodo(m, ip->ip_hl << 2);
+ if (!ICMP_INFOTYPE(icmp->icmp_type)) {
+ DPRINTF(DP_DROPS, "do not send ICMP in reply to "
+ "ICMP errors");
+ goto freeit;
+ }
+ }
+ switch (type) {
+ case ICMP_UNREACH:
+ case ICMP_TIMXCEED:
+ case ICMP_PARAMPROB:
+ break;
+ default:
+ goto freeit;
+ }
+ /* Calculate length of ICMP payload */
+ len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
+ m->m_pkthdr.len;
+
+ /* Create new ICMPv4 datagram */
+ plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
+ n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
+ MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return;
+ }
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
+
+ n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
+ oip = mtod(n, struct ip *);
+ oip->ip_v = IPVERSION;
+ oip->ip_hl = sizeof(struct ip) >> 2;
+ oip->ip_tos = 0;
+ oip->ip_len = htons(n->m_pkthdr.len);
+ oip->ip_ttl = V_ip_defttl;
+ oip->ip_p = IPPROTO_ICMP;
+ ip_fillid(oip);
+ oip->ip_off = htons(IP_DF);
+ oip->ip_src = ip->ip_dst;
+ oip->ip_dst = ip->ip_src;
+ oip->ip_sum = 0;
+ oip->ip_sum = in_cksum_hdr(oip);
+
+ icmp = mtodo(n, sizeof(struct ip));
+ icmp->icmp_type = type;
+ icmp->icmp_code = code;
+ icmp->icmp_cksum = 0;
+ icmp->icmp_pmvoid = 0;
+ icmp->icmp_nextmtu = htons(mtu);
+ m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
+ sizeof(struct icmphdr) + sizeof(uint32_t)));
+ icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
+ sizeof(struct ip));
+ m_freem(m);
+ nat64_output_one(n, stats, logdata);
+ return;
+freeit:
+ NAT64STAT_INC(stats, dropped);
+ m_freem(m);
+}
+
+/* Translate ICMP echo request/reply into ICMPv6 */
+static void
+nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
+ uint16_t id, uint8_t type)
+{
+ uint16_t old;
+
+ old = *(uint16_t *)icmp6; /* save type+code in one word */
+ icmp6->icmp6_type = type;
+ /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
+ icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
+ old, *(uint16_t *)icmp6);
+ if (id != 0) {
+ old = icmp6->icmp6_id;
+ icmp6->icmp6_id = id;
+ /* Reflect ICMP id translation in the cksum */
+ icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
+ old, id);
+ }
+ /* Reflect IPv6 pseudo header in the cksum */
+ icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
+ IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
+}
+
+static NAT64NOINLINE struct mbuf *
+nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
+ int offset, nat64_stats_block *stats)
+{
+ struct ip ip;
+ struct icmp *icmp;
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ struct ip6_hdr *eip6;
+ struct mbuf *n;
+ uint32_t mtu;
+ int len, hlen, plen;
+ uint8_t type, code;
+
+ if (m->m_len < offset + ICMP_MINLEN)
+ m = m_pullup(m, offset + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (m);
+ }
+ mtu = 0;
+ icmp = mtodo(m, offset);
+ /* RFC 7915 p4.2 */
+ switch (icmp->icmp_type) {
+ case ICMP_ECHOREPLY:
+ type = ICMP6_ECHO_REPLY;
+ code = 0;
+ break;
+ case ICMP_UNREACH:
+ type = ICMP6_DST_UNREACH;
+ switch (icmp->icmp_code) {
+ case ICMP_UNREACH_NET:
+ case ICMP_UNREACH_HOST:
+ case ICMP_UNREACH_SRCFAIL:
+ case ICMP_UNREACH_NET_UNKNOWN:
+ case ICMP_UNREACH_HOST_UNKNOWN:
+ case ICMP_UNREACH_TOSNET:
+ case ICMP_UNREACH_TOSHOST:
+ code = ICMP6_DST_UNREACH_NOROUTE;
+ break;
+ case ICMP_UNREACH_PROTOCOL:
+ type = ICMP6_PARAM_PROB;
+ code = ICMP6_PARAMPROB_NEXTHEADER;
+ break;
+ case ICMP_UNREACH_PORT:
+ code = ICMP6_DST_UNREACH_NOPORT;
+ break;
+ case ICMP_UNREACH_NEEDFRAG:
+ type = ICMP6_PACKET_TOO_BIG;
+ code = 0;
+ /* XXX: needs an additional look */
+ mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
+ break;
+ case ICMP_UNREACH_NET_PROHIB:
+ case ICMP_UNREACH_HOST_PROHIB:
+ case ICMP_UNREACH_FILTER_PROHIB:
+ case ICMP_UNREACH_PRECEDENCE_CUTOFF:
+ code = ICMP6_DST_UNREACH_ADMIN;
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
+ icmp->icmp_type, icmp->icmp_code);
+ goto freeit;
+ }
+ break;
+ case ICMP_TIMXCEED:
+ type = ICMP6_TIME_EXCEEDED;
+ code = icmp->icmp_code;
+ break;
+ case ICMP_ECHO:
+ type = ICMP6_ECHO_REQUEST;
+ code = 0;
+ break;
+ case ICMP_PARAMPROB:
+ type = ICMP6_PARAM_PROB;
+ switch (icmp->icmp_code) {
+ case ICMP_PARAMPROB_ERRATPTR:
+ case ICMP_PARAMPROB_LENGTH:
+ code = ICMP6_PARAMPROB_HEADER;
+ switch (icmp->icmp_pptr) {
+ case 0: /* Version/IHL */
+ case 1: /* Type Of Service */
+ mtu = icmp->icmp_pptr;
+ break;
+ case 2: /* Total Length */
+ case 3: mtu = 4; /* Payload Length */
+ break;
+ case 8: /* Time to Live */
+ mtu = 7; /* Hop Limit */
+ break;
+ case 9: /* Protocol */
+ mtu = 6; /* Next Header */
+ break;
+ case 12: /* Source address */
+ case 13:
+ case 14:
+ case 15:
+ mtu = 8;
+ break;
+ case 16: /* Destination address */
+ case 17:
+ case 18:
+ case 19:
+ mtu = 24;
+ break;
+ default: /* Silently drop */
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
+ " code %d, pptr %d", icmp->icmp_type,
+ icmp->icmp_code, icmp->icmp_pptr);
+ goto freeit;
+ }
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
+ " code %d, pptr %d", icmp->icmp_type,
+ icmp->icmp_code, icmp->icmp_pptr);
+ goto freeit;
+ }
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
+ icmp->icmp_type, icmp->icmp_code);
+ goto freeit;
+ }
+ /*
+ * For echo request/reply we can use original payload,
+ * but we need adjust icmp_cksum, because ICMPv6 cksum covers
+ * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
+ */
+ if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
+ nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
+ return (m);
+ }
+ /*
+ * For other types of ICMP messages we need to translate inner
+ * IPv4 header to IPv6 header.
+ * Assume ICMP src is the same as payload dst
+ * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
+ * and ( NATIP1, Hostdst1 ) in ICMP copy header.
+ * In that case, we already have map for NATIP1 and GWsrc1.
+ * The only thing we need is to copy IPv6 map prefix to
+ * Hostdst1.
+ */
+ hlen = offset + ICMP_MINLEN;
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
+ DPRINTF(DP_DROPS, "Message is too short %d",
+ m->m_pkthdr.len);
+ goto freeit;
+ }
+ m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
+ if (ip.ip_v != IPVERSION) {
+ DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
+ goto freeit;
+ }
+ hlen += ip.ip_hl << 2; /* Skip inner IP header */
+ if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
+ nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
+ nat64_check_private_ip4(ip.ip_src.s_addr) != 0 ||
+ nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) {
+ DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
+ ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
+ goto freeit;
+ }
+ if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
+ DPRINTF(DP_DROPS, "Message is too short %d",
+ m->m_pkthdr.len);
+ goto freeit;
+ }
+#if 0
+ /*
+ * Check that inner source matches the outer destination.
+ * XXX: We need some method to convert IPv4 into IPv6 address here,
+ * and compare IPv6 addresses.
+ */
+ if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
+ DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
+ "%04x vs %04x", ip.ip_src.s_addr,
+ nat64_get_ip4(&ip6->ip6_dst));
+ goto freeit;
+ }
+#endif
+ /*
+ * Create new mbuf for ICMPv6 datagram.
+ * NOTE: len is data length just after inner IP header.
+ */
+ len = m->m_pkthdr.len - hlen;
+ if (sizeof(struct ip6_hdr) +
+ sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
+ len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
+ sizeof(struct ip6_hdr);
+ plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
+ n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return (NULL);
+ }
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, offset + plen + max_hdr);
+ n->m_len = n->m_pkthdr.len = offset + plen;
+ /* Adjust ip6_plen in outer header */
+ ip6->ip6_plen = htons(plen);
+ /* Construct new inner IPv6 header */
+ eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
+ eip6->ip6_src = ip6->ip6_dst;
+ /* Use the fact that we have single /96 prefix for IPv4 map */
+ eip6->ip6_dst = ip6->ip6_src;
+ nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr);
+
+ eip6->ip6_flow = htonl(ip.ip_tos << 20);
+ eip6->ip6_vfc |= IPV6_VERSION;
+ eip6->ip6_hlim = ip.ip_ttl;
+ eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
+ eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
+ m_copydata(m, hlen, len, (char *)(eip6 + 1));
+ /*
+ * We need to translate source port in the inner ULP header,
+ * and adjust ULP checksum.
+ */
+ switch (ip.ip_p) {
+ case IPPROTO_TCP:
+ if (len < offsetof(struct tcphdr, th_sum))
+ break;
+ tcp = TCP(eip6 + 1);
+ if (icmpid != 0) {
+ tcp->th_sum = cksum_adjust(tcp->th_sum,
+ tcp->th_sport, icmpid);
+ tcp->th_sport = icmpid;
+ }
+ tcp->th_sum = cksum_add(tcp->th_sum,
+ ~nat64_cksum_convert(eip6, &ip));
+ break;
+ case IPPROTO_UDP:
+ if (len < offsetof(struct udphdr, uh_sum))
+ break;
+ udp = UDP(eip6 + 1);
+ if (icmpid != 0) {
+ udp->uh_sum = cksum_adjust(udp->uh_sum,
+ udp->uh_sport, icmpid);
+ udp->uh_sport = icmpid;
+ }
+ udp->uh_sum = cksum_add(udp->uh_sum,
+ ~nat64_cksum_convert(eip6, &ip));
+ break;
+ case IPPROTO_ICMP:
+ /*
+ * Check if this is an ICMP error message for echo request
+ * that we sent. I.e. ULP in the data containing invoking
+ * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
+ */
+ icmp = (struct icmp *)(eip6 + 1);
+ if (icmp->icmp_type != ICMP_ECHO) {
+ m_freem(n);
+ goto freeit;
+ }
+ /*
+ * For our client this original datagram should looks
+ * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
+ * Thus we need adjust icmp_cksum and convert type from
+ * ICMP_ECHO to ICMP6_ECHO_REQUEST.
+ */
+ nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
+ ICMP6_ECHO_REQUEST);
+ }
+ m_freem(m);
+ /* Convert ICMPv4 into ICMPv6 header */
+ icmp = mtodo(n, offset);
+ ICMP6(icmp)->icmp6_type = type;
+ ICMP6(icmp)->icmp6_code = code;
+ ICMP6(icmp)->icmp6_mtu = htonl(mtu);
+ ICMP6(icmp)->icmp6_cksum = 0;
+ ICMP6(icmp)->icmp6_cksum = cksum_add(
+ ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
+ in_cksum_skip(n, n->m_pkthdr.len, offset));
+ return (n);
+freeit:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (NULL);
+}
+
+int
+nat64_getlasthdr(struct mbuf *m, int *offset)
+{
+ struct ip6_hdr *ip6;
+ struct ip6_hbh *hbh;
+ int proto, hlen;
+
+ if (offset != NULL)
+ hlen = *offset;
+ else
+ hlen = 0;
+
+ if (m->m_len < hlen + sizeof(*ip6))
+ return (-1);
+
+ ip6 = mtodo(m, hlen);
+ hlen += sizeof(*ip6);
+ proto = ip6->ip6_nxt;
+ /* Skip extension headers */
+ while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_DSTOPTS) {
+ hbh = mtodo(m, hlen);
+ /*
+ * We expect mbuf has contigious data up to
+ * upper level header.
+ */
+ if (m->m_len < hlen)
+ return (-1);
+ /*
+ * We doesn't support Jumbo payload option,
+ * so return error.
+ */
+ if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
+ return (-1);
+ proto = hbh->ip6h_nxt;
+ hlen += hbh->ip6h_len << 3;
+ }
+ if (offset != NULL)
+ *offset = hlen;
+ return (proto);
+}
+
+int
+nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
+ struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats,
+ void *logdata)
+{
+ struct route_in6 ro;
+ struct ip6_hdr ip6;
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct mbufq mq;
+ struct sockaddr *dst;
+ uint32_t mtu;
+ uint16_t ip_id, ip_off;
+ uint16_t *csum;
+ int plen, hlen;
+ uint8_t proto;
+
+ ip = mtod(m, struct ip*);
+
+ if (ip->ip_ttl <= IPTTLDEC) {
+ nat64_icmp_reflect(m, ICMP_TIMXCEED,
+ ICMP_TIMXCEED_INTRANS, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ ip6.ip6_dst = *daddr;
+ ip6.ip6_src = *saddr;
+
+ hlen = ip->ip_hl << 2;
+ plen = ntohs(ip->ip_len) - hlen;
+ proto = ip->ip_p;
+
+ /* Save ip_id and ip_off, both are in network byte order */
+ ip_id = ip->ip_id;
+ ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
+
+ /* Fragment length must be multiple of 8 octets */
+ if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
+ nat64_icmp_reflect(m, ICMP_PARAMPROB,
+ ICMP_PARAMPROB_LENGTH, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+ /* Fragmented ICMP is unsupported */
+ if (proto == IPPROTO_ICMP && ip_off != 0) {
+ DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ dst = nat64_find_route6(&ro, &ip6.ip6_dst, m);
+ if (dst == NULL) {
+ FREE_ROUTE(&ro);
+ NAT64STAT_INC(stats, noroute6);
+ nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
+ stats, logdata);
+ return (NAT64RETURN);
+ }
+ ifp = ro.ro_rt->rt_ifp;
+ if (ro.ro_rt->rt_mtu != 0)
+ mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
+ else
+ mtu = ifp->if_mtu;
+ if (mtu < plen + sizeof(ip6) && (ip->ip_off & htons(IP_DF)) != 0) {
+ FREE_ROUTE(&ro);
+ nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
+ FRAGSZ(mtu) + sizeof(struct ip), stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ ip6.ip6_flow = htonl(ip->ip_tos << 20);
+ ip6.ip6_vfc |= IPV6_VERSION;
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+ ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC;
+#else
+ /* Forwarding code will decrement HLIM. */
+ ip6.ip6_hlim = ip->ip_ttl;
+#endif
+ ip6.ip6_plen = htons(plen);
+ ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
+ /* Convert checksums. */
+ switch (proto) {
+ case IPPROTO_TCP:
+ csum = &TCP(mtodo(m, hlen))->th_sum;
+ if (lport != 0) {
+ struct tcphdr *tcp = TCP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, tcp->th_dport, lport);
+ tcp->th_dport = lport;
+ }
+ *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
+ break;
+ case IPPROTO_UDP:
+ csum = &UDP(mtodo(m, hlen))->uh_sum;
+ if (lport != 0) {
+ struct udphdr *udp = UDP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, udp->uh_dport, lport);
+ udp->uh_dport = lport;
+ }
+ *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
+ break;
+ case IPPROTO_ICMP:
+ m = nat64_icmp_translate(m, &ip6, lport, hlen, stats);
+ if (m == NULL) {
+ FREE_ROUTE(&ro);
+ /* stats already accounted */
+ return (NAT64RETURN);
+ }
+ }
+
+ m_adj(m, hlen);
+ mbufq_init(&mq, 255);
+ nat64_fragment6(stats, &ip6, &mq, m, mtu, ip_id, ip_off);
+ while ((m = mbufq_dequeue(&mq)) != NULL) {
+ if (nat64_output(ifp, m, dst, (struct route *)&ro, stats,
+ logdata) != 0)
+ break;
+ NAT64STAT_INC(stats, opcnt46);
+ }
+ mbufq_drain(&mq);
+ FREE_ROUTE(&ro);
+ return (NAT64RETURN);
+}
+
+int
+nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata)
+{
+ struct ip ip;
+ struct icmp6_hdr *icmp6;
+ struct ip6_frag *ip6f;
+ struct ip6_hdr *ip6, *ip6i;
+ uint32_t mtu;
+ int plen, proto;
+ uint8_t type, code;
+
+ if (hlen == 0) {
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6->ip6_dst) != 0)
+ return (NAT64SKIP);
+
+ proto = nat64_getlasthdr(m, &hlen);
+ if (proto != IPPROTO_ICMPV6) {
+ DPRINTF(DP_DROPS,
+ "dropped due to mbuf isn't contigious");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ }
+
+ /*
+ * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
+ * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
+ */
+ icmp6 = mtodo(m, hlen);
+ mtu = 0;
+ switch (icmp6->icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ type = ICMP_UNREACH;
+ switch (icmp6->icmp6_code) {
+ case ICMP6_DST_UNREACH_NOROUTE:
+ case ICMP6_DST_UNREACH_BEYONDSCOPE:
+ case ICMP6_DST_UNREACH_ADDR:
+ code = ICMP_UNREACH_HOST;
+ break;
+ case ICMP6_DST_UNREACH_ADMIN:
+ code = ICMP_UNREACH_HOST_PROHIB;
+ break;
+ case ICMP6_DST_UNREACH_NOPORT:
+ code = ICMP_UNREACH_PORT;
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+ " code %d", icmp6->icmp6_type,
+ icmp6->icmp6_code);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ break;
+ case ICMP6_PACKET_TOO_BIG:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_NEEDFRAG;
+ mtu = ntohl(icmp6->icmp6_mtu);
+ if (mtu < IPV6_MMTU) {
+ DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
+ " code %d", mtu, icmp6->icmp6_type,
+ icmp6->icmp6_code);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ /*
+ * Adjust MTU to reflect difference between
+ * IPv6 an IPv4 headers.
+ */
+ mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
+ break;
+ case ICMP6_TIME_EXCEEDED:
+ type = ICMP_TIMXCEED;
+ code = icmp6->icmp6_code;
+ break;
+ case ICMP6_PARAM_PROB:
+ switch (icmp6->icmp6_code) {
+ case ICMP6_PARAMPROB_HEADER:
+ type = ICMP_PARAMPROB;
+ code = ICMP_PARAMPROB_ERRATPTR;
+ mtu = ntohl(icmp6->icmp6_pptr);
+ switch (mtu) {
+ case 0: /* Version/Traffic Class */
+ case 1: /* Traffic Class/Flow Label */
+ break;
+ case 4: /* Payload Length */
+ case 5:
+ mtu = 2;
+ break;
+ case 6: /* Next Header */
+ mtu = 9;
+ break;
+ case 7: /* Hop Limit */
+ mtu = 8;
+ break;
+ default:
+ if (mtu >= 8 && mtu <= 23) {
+ mtu = 12; /* Source address */
+ break;
+ }
+ if (mtu >= 24 && mtu <= 39) {
+ mtu = 16; /* Destination address */
+ break;
+ }
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+ " code %d, pptr %d", icmp6->icmp6_type,
+ icmp6->icmp6_code, mtu);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ case ICMP6_PARAMPROB_NEXTHEADER:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_PROTOCOL;
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+ " code %d, pptr %d", icmp6->icmp6_type,
+ icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
+ icmp6->icmp6_type, icmp6->icmp6_code);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ hlen += sizeof(struct icmp6_hdr);
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "Message is too short %d",
+ m->m_pkthdr.len);
+ return (NAT64MFREE);
+ }
+ /*
+ * We need at least ICMP_MINLEN bytes of original datagram payload
+ * to generate ICMP message. It is nice that ICMP_MINLEN is equal
+ * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
+ * header we will not have to do m_pullup() again.
+ *
+ * What we have here:
+ * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
+ * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
+ * We need to translate it to:
+ *
+ * Outer header: (alias_host, v4exthost)
+ * Inner header: (v4exthost, alias_host) [sport, alias_port]
+ *
+ * Assume caller function has checked if v4mapPRefix+v4host
+ * matches configured prefix.
+ * The only two things we should be provided with are mapping between
+ * IPv6iHost <> alias_host and between dport and alias_port.
+ */
+ if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
+ m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (NAT64RETURN);
+ }
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6i = mtodo(m, hlen);
+ ip6f = NULL;
+ proto = ip6i->ip6_nxt;
+ plen = ntohs(ip6i->ip6_plen);
+ hlen += sizeof(struct ip6_hdr);
+ if (proto == IPPROTO_FRAGMENT) {
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
+ ICMP_MINLEN)
+ goto fail;
+ ip6f = mtodo(m, hlen);
+ proto = ip6f->ip6f_nxt;
+ plen -= sizeof(struct ip6_frag);
+ hlen += sizeof(struct ip6_frag);
+ /* Ajust MTU to reflect frag header size */
+ if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
+ mtu -= sizeof(struct ip6_frag);
+ }
+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
+ DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
+ proto);
+ goto fail;
+ }
+ if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6i->ip6_dst) != 0) {
+ DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
+ goto fail;
+ }
+ /* Check if outer dst is the same as inner src */
+ if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
+ DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
+ goto fail;
+ }
+
+ /* Now we need to make a fake IPv4 packet to generate ICMP message */
+ ip.ip_dst.s_addr = aaddr;
+ ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src);
+ /* XXX: Make fake ulp header */
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+ ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */
+#endif
+ nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
+ m_adj(m, hlen - sizeof(struct ip));
+ bcopy(&ip, mtod(m, void *), sizeof(ip));
+ nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats, logdata);
+ return (NAT64RETURN);
+fail:
+ /*
+ * We must call m_freem() because mbuf pointer could be
+ * changed with m_pullup().
+ */
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64RETURN);
+}
+
+int
+nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata)
+{
+ struct route ro;
+ struct ip ip;
+ struct ifnet *ifp;
+ struct ip6_frag *frag;
+ struct ip6_hdr *ip6;
+ struct icmp6_hdr *icmp6;
+ struct sockaddr *dst;
+ uint16_t *csum;
+ uint32_t mtu;
+ int plen, hlen, proto;
+
+ /*
+ * XXX: we expect ipfw_chk() did m_pullup() up to upper level
+ * protocol's headers. Also we skip some checks, that ip6_input(),
+ * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6->ip6_dst) != 0) {
+ return (NAT64SKIP);
+ }
+
+ /* Starting from this point we must not return zero */
+ ip.ip_src.s_addr = aaddr;
+ if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
+ DPRINTF(DP_GENERIC, "invalid source address: %08x",
+ ip.ip_src.s_addr);
+ /* XXX: stats? */
+ return (NAT64MFREE);
+ }
+
+ ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst);
+ if (ip.ip_dst.s_addr == 0) {
+ /* XXX: stats? */
+ return (NAT64MFREE);
+ }
+
+ if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
+ nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
+ ICMP6_TIME_EXCEED_TRANSIT, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ hlen = 0;
+ plen = ntohs(ip6->ip6_plen);
+ proto = nat64_getlasthdr(m, &hlen);
+ if (proto < 0) {
+ DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ frag = NULL;
+ if (proto == IPPROTO_FRAGMENT) {
+ /* ipfw_chk should m_pullup up to frag header */
+ if (m->m_len < hlen + sizeof(*frag)) {
+ DPRINTF(DP_DROPS,
+ "dropped due to mbuf isn't contigious");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ frag = mtodo(m, hlen);
+ proto = frag->ip6f_nxt;
+ hlen += sizeof(*frag);
+ /* Fragmented ICMPv6 is unsupported */
+ if (proto == IPPROTO_ICMPV6) {
+ DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ /* Fragment length must be multiple of 8 octets */
+ if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
+ ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
+ nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
+ ICMP6_PARAMPROB_HEADER,
+ offsetof(struct ip6_hdr, ip6_plen), stats,
+ logdata);
+ return (NAT64RETURN);
+ }
+ }
+ plen -= hlen - sizeof(struct ip6_hdr);
+ if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
+ DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
+ plen, m->m_pkthdr.len, hlen);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ icmp6 = NULL; /* Make gcc happy */
+ if (proto == IPPROTO_ICMPV6) {
+ icmp6 = mtodo(m, hlen);
+ if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
+ icmp6->icmp6_type != ICMP6_ECHO_REPLY)
+ return (nat64_handle_icmp6(m, hlen, aaddr, aport,
+ stats, logdata));
+ }
+ dst = nat64_find_route4(&ro, ip.ip_dst.s_addr, m);
+ if (dst == NULL) {
+ FREE_ROUTE(&ro);
+ NAT64STAT_INC(stats, noroute4);
+ nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
+ ICMP6_DST_UNREACH_NOROUTE, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ ifp = ro.ro_rt->rt_ifp;
+ if (ro.ro_rt->rt_mtu != 0)
+ mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
+ else
+ mtu = ifp->if_mtu;
+ if (mtu < plen + sizeof(ip)) {
+ FREE_ROUTE(&ro);
+ nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, mtu, stats,
+ logdata);
+ return (NAT64RETURN);
+ }
+ nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
+ /* Convert checksums. */
+ switch (proto) {
+ case IPPROTO_TCP:
+ csum = &TCP(mtodo(m, hlen))->th_sum;
+ if (aport != 0) {
+ struct tcphdr *tcp = TCP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, tcp->th_sport, aport);
+ tcp->th_sport = aport;
+ }
+ *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
+ break;
+ case IPPROTO_UDP:
+ csum = &UDP(mtodo(m, hlen))->uh_sum;
+ if (aport != 0) {
+ struct udphdr *udp = UDP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, udp->uh_sport, aport);
+ udp->uh_sport = aport;
+ }
+ *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
+ break;
+ case IPPROTO_ICMPV6:
+ /* Checksum in ICMPv6 covers pseudo header */
+ csum = &icmp6->icmp6_cksum;
+ *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
+ IPPROTO_ICMPV6, 0));
+ /* Convert ICMPv6 types to ICMP */
+ mtu = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
+ icmp6->icmp6_type = ICMP_ECHO;
+ else /* ICMP6_ECHO_REPLY */
+ icmp6->icmp6_type = ICMP_ECHOREPLY;
+ *csum = cksum_adjust(*csum, (uint16_t)mtu, *(uint16_t *)icmp6);
+ if (aport != 0) {
+ uint16_t old_id = icmp6->icmp6_id;
+ icmp6->icmp6_id = aport;
+ *csum = cksum_adjust(*csum, old_id, aport);
+ }
+ break;
+ };
+
+ m_adj(m, hlen - sizeof(ip));
+ bcopy(&ip, mtod(m, void *), sizeof(ip));
+ if (nat64_output(ifp, m, dst, &ro, stats, logdata) == 0)
+ NAT64STAT_INC(stats, opcnt64);
+ FREE_ROUTE(&ro);
+ return (NAT64RETURN);
+}
+
diff --git a/sys/netpfil/ipfw/nat64/nat64_translate.h b/sys/netpfil/ipfw/nat64/nat64_translate.h
new file mode 100644
index 0000000..9f65395
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64_translate.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64_TRANSLATE_H_
+#define _IP_FW_NAT64_TRANSLATE_H_
+
+#ifdef RTALLOC_NOLOCK
+#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_fib_nolock((ro), 0, (fib))
+#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc_nolock((ro), (fib))
+#define FREE_ROUTE(ro)
+#else
+#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_ign_fib((ro), 0, (fib))
+#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc((ro), (fib))
+#define FREE_ROUTE(ro) RO_RTFREE((ro))
+#endif
+
+static inline int
+nat64_check_ip6(struct in6_addr *addr)
+{
+
+ /* XXX: We should really check /8 */
+ if (addr->s6_addr16[0] == 0 || /* 0000::/8 Reserved by IETF */
+ IN6_IS_ADDR_MULTICAST(addr) || IN6_IS_ADDR_LINKLOCAL(addr))
+ return (1);
+ return (0);
+}
+
+extern int nat64_allow_private;
+static inline int
+nat64_check_private_ip4(in_addr_t ia)
+{
+
+ if (nat64_allow_private)
+ return (0);
+ /* WKPFX must not be used to represent non-global IPv4 addresses */
+// if (cfg->flags & NAT64_WKPFX) {
+ /* IN_PRIVATE */
+ if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
+ (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
+ (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
+ return (1);
+ /*
+ * RFC 5735:
+ * 192.0.0.0/24 - reserved for IETF protocol assignments
+ * 192.88.99.0/24 - for use as 6to4 relay anycast addresses
+ * 198.18.0.0/15 - for use in benchmark tests
+ * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
+ * in documentation and example code
+ */
+ if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
+ (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
+ (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
+ (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
+ (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
+ (ia & htonl(0xffffff00)) == htonl(0xcb007100))
+ return (1);
+// }
+ return (0);
+}
+
+static inline int
+nat64_check_ip4(in_addr_t ia)
+{
+
+ /* IN_LOOPBACK */
+ if ((ia & htonl(0xff000000)) == htonl(0x7f000000))
+ return (1);
+ /* IN_LINKLOCAL */
+ if ((ia & htonl(0xffff0000)) == htonl(0xa9fe0000))
+ return (1);
+ /* IN_MULTICAST & IN_EXPERIMENTAL */
+ if ((ia & htonl(0xe0000000)) == htonl(0xe0000000))
+ return (1);
+ return (0);
+}
+
+#define nat64_get_ip4(_ip6) ((_ip6)->s6_addr32[3])
+#define nat64_set_ip4(_ip6, _ip4) (_ip6)->s6_addr32[3] = (_ip4)
+
+int nat64_getlasthdr(struct mbuf *m, int *offset);
+int nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
+ struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats,
+ void *logdata);
+int nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata);
+int nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata);
+
+#endif
+
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.c b/sys/netpfil/ipfw/nat64/nat64lsn.c
new file mode 100644
index 0000000..d615f58
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64lsn.c
@@ -0,0 +1,1770 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/ip_fw_nat64.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64lsn.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <netpfil/pf/pf.h>
+
+MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
+
+static void nat64lsn_periodic(void *data);
+#define PERIODIC_DELAY 4
+static uint8_t nat64lsn_proto_map[256];
+uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+
+#define NAT64_FLAG_FIN 0x01 /* FIN was seen */
+#define NAT64_FLAG_SYN 0x02 /* First syn in->out */
+#define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */
+#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
+
+#define NAT64_FLAG_RDR 0x80 /* Port redirect */
+#define NAT64_LOOKUP(chain, cmd) \
+ (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+/*
+ * Delayed job queue, used to create new hosts
+ * and new portgroups
+ */
+enum nat64lsn_jtype {
+ JTYPE_NEWHOST = 1,
+ JTYPE_NEWPORTGROUP,
+ JTYPE_DELPORTGROUP,
+};
+
+struct nat64lsn_job_item {
+ TAILQ_ENTRY(nat64lsn_job_item) next;
+ enum nat64lsn_jtype jtype;
+ struct nat64lsn_host *nh;
+ struct nat64lsn_portgroup *pg;
+ void *spare_idx;
+ struct in6_addr haddr;
+ uint8_t nat_proto;
+ uint8_t done;
+ int needs_idx;
+ int delcount;
+ unsigned int fhash; /* Flow hash */
+ uint32_t aaddr; /* Last used address (net) */
+ struct mbuf *m;
+ struct ipfw_flow_id f_id;
+ uint64_t delmask[NAT64LSN_PGPTRNMASK];
+};
+
+static struct mtx jmtx;
+#define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
+#define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx)
+#define JQUEUE_LOCK() mtx_lock(&jmtx)
+#define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
+
+static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_head *jhead, int jlen);
+
+static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, int jtype);
+static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
+ int needs_idx);
+static int nat64lsn_request_host(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
+ struct ipfw_flow_id *f_id, struct mbuf **pm);
+
+static int alloc_portgroup(struct nat64lsn_job_item *ji);
+static void destroy_portgroup(struct nat64lsn_portgroup *pg);
+static void destroy_host6(struct nat64lsn_host *nh);
+static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+
+static int attach_portgroup(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+
+
+/* XXX tmp */
+static uma_zone_t nat64lsn_host_zone;
+static uma_zone_t nat64lsn_pg_zone;
+static uma_zone_t nat64lsn_pgidx_zone;
+
+static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_host *nh);
+
+#define I6_hash(x) (djb_hash((const unsigned char *)(x), 16))
+#define I6_first(_ph, h) (_ph)[h]
+#define I6_next(x) (x)->next
+#define I6_val(x) (&(x)->addr)
+#define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b)
+#define I6_lock(a, b)
+#define I6_unlock(a, b)
+
+#define I6HASH_FIND(_cfg, _res, _a) \
+ CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a)
+#define I6HASH_INSERT(_cfg, _i) \
+ CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i)
+#define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \
+ CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a)
+
+#define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \
+ CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg)
+
+#define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8)
+
+static unsigned
+djb_hash(const unsigned char *h, const int len)
+{
+ unsigned int result = 0;
+ int i;
+
+ for (i = 0; i < len; i++)
+ result = 33 * result ^ h[i];
+
+ return (result);
+}
+
+/*
+static size_t
+bitmask_size(size_t num, int *level)
+{
+ size_t x;
+ int c;
+
+ for (c = 0, x = num; num > 1; num /= 64, c++)
+ ;
+
+ return (x);
+}
+
+static void
+bitmask_prepare(uint64_t *pmask, size_t bufsize, int level)
+{
+ size_t x, z;
+
+ memset(pmask, 0xFF, bufsize);
+ for (x = 0, z = 1; level > 1; x += z, z *= 64, level--)
+ ;
+ pmask[x] ~= 0x01;
+}
+*/
+
+static void
+nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
+ uint32_t n, uint32_t sn)
+{
+
+ memset(plog, 0, sizeof(*plog));
+ plog->length = PFLOG_REAL_HDRLEN;
+ plog->af = family;
+ plog->action = PF_NAT;
+ plog->dir = PF_IN;
+ plog->rulenr = htonl(n);
+ plog->subrulenr = htonl(sn);
+ plog->ruleset[0] = '\0';
+ strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
+ ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
+/*
+ * Inspects icmp packets to see if the message contains different
+ * packet header so we need to alter @addr and @port.
+ */
+static int
+inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
+ uint16_t *port)
+{
+ struct ip *ip;
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ struct icmphdr *icmp;
+ int off;
+ uint8_t proto;
+
+ ip = mtod(*m, struct ip *); /* Outer IP header */
+ off = (ip->ip_hl << 2) + ICMP_MINLEN;
+ if ((*m)->m_len < off)
+ *m = m_pullup(*m, off);
+ if (*m == NULL)
+ return (ENOMEM);
+
+ ip = mtod(*m, struct ip *); /* Outer IP header */
+ icmp = L3HDR(ip, struct icmphdr *);
+ switch (icmp->icmp_type) {
+ case ICMP_ECHO:
+ case ICMP_ECHOREPLY:
+ /* Use icmp ID as distinguisher */
+ *port = ntohs(*((uint16_t *)(icmp + 1)));
+ return (0);
+ case ICMP_UNREACH:
+ case ICMP_TIMXCEED:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ /*
+ * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
+ * of ULP header.
+ */
+ if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
+ return (EINVAL);
+ if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
+ *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN);
+ if (*m == NULL)
+ return (ENOMEM);
+ ip = mtodo(*m, off); /* Inner IP header */
+ proto = ip->ip_p;
+ off += ip->ip_hl << 2; /* Skip inner IP header */
+ *addr = ntohl(ip->ip_src.s_addr);
+ if ((*m)->m_len < off + ICMP_MINLEN)
+ *m = m_pullup(*m, off + ICMP_MINLEN);
+ if (*m == NULL)
+ return (ENOMEM);
+ switch (proto) {
+ case IPPROTO_TCP:
+ tcp = mtodo(*m, off);
+ *nat_proto = NAT_PROTO_TCP;
+ *port = ntohs(tcp->th_sport);
+ return (0);
+ case IPPROTO_UDP:
+ udp = mtodo(*m, off);
+ *nat_proto = NAT_PROTO_UDP;
+ *port = ntohs(udp->uh_sport);
+ return (0);
+ case IPPROTO_ICMP:
+ /*
+ * We will translate only ICMP errors for our ICMP
+ * echo requests.
+ */
+ icmp = mtodo(*m, off);
+ if (icmp->icmp_type != ICMP_ECHO)
+ return (EOPNOTSUPP);
+ *port = ntohs(*((uint16_t *)(icmp + 1)));
+ return (0);
+ };
+ return (EOPNOTSUPP);
+}
+
+static inline uint8_t
+convert_tcp_flags(uint8_t flags)
+{
+ uint8_t result;
+
+ result = flags & (TH_FIN|TH_SYN);
+ result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
+ result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
+
+ return (result);
+}
+
+static NAT64NOINLINE int
+nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+ struct mbuf **pm)
+{
+ struct pfloghdr loghdr, *logdata;
+ struct in6_addr src6;
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_host *nh;
+ struct nat64lsn_state *st;
+ struct ip *ip;
+ uint32_t addr;
+ uint16_t state_flags, state_ts;
+ uint16_t port, lport;
+ uint8_t nat_proto;
+ int ret;
+
+ addr = f_id->dst_ip;
+ port = f_id->dst_port;
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+ return (cfg->nomatch_verdict);
+ }
+
+ /* Check if protocol is supported and get its short id */
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+ if (nat_proto == 0) {
+ NAT64STAT_INC(&cfg->stats, noproto);
+ return (cfg->nomatch_verdict);
+ }
+
+ /* We might need to handle icmp differently */
+ if (nat_proto == NAT_PROTO_ICMP) {
+ ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port);
+ if (ret != 0) {
+ if (ret == ENOMEM)
+ NAT64STAT_INC(&cfg->stats, nomem);
+ else
+ NAT64STAT_INC(&cfg->stats, noproto);
+ return (cfg->nomatch_verdict);
+ }
+ /* XXX: Check addr for validity */
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+ return (cfg->nomatch_verdict);
+ }
+ }
+
+ /* Calc portgroup offset w.r.t protocol */
+ pg = GET_PORTGROUP(cfg, addr, nat_proto, port);
+
+ /* Check if this port is occupied by any portgroup */
+ if (pg == NULL) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+#if 0
+ DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port,
+ _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port));
+#endif
+ return (cfg->nomatch_verdict);
+ }
+
+ /* TODO: Check flags to see if we need to do some static mapping */
+ nh = pg->host;
+
+ /* Prepare some fields we might need to update */
+ SET_AGE(state_ts);
+ ip = mtod(*pm, struct ip *);
+ if (ip->ip_p == IPPROTO_TCP)
+ state_flags = convert_tcp_flags(
+ L3HDR(ip, struct tcphdr *)->th_flags);
+ else
+ state_flags = 0;
+
+ /* Lock host and get port mapping */
+ NAT64_LOCK(nh);
+
+ st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)];
+ if (st->timestamp != state_ts)
+ st->timestamp = state_ts;
+ if ((st->flags & state_flags) != state_flags)
+ st->flags |= state_flags;
+ lport = htons(st->u.s.lport);
+
+ NAT64_UNLOCK(nh);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off);
+ } else
+ logdata = NULL;
+
+ src6.s6_addr32[0] = cfg->prefix6.s6_addr32[0];
+ src6.s6_addr32[1] = cfg->prefix6.s6_addr32[1];
+ src6.s6_addr32[2] = cfg->prefix6.s6_addr32[2];
+ src6.s6_addr32[3] = htonl(f_id->src_ip);
+
+ ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport,
+ &cfg->stats, logdata);
+
+ if (ret == NAT64SKIP)
+ return (IP_FW_PASS);
+ if (ret == NAT64MFREE)
+ m_freem(*pm);
+ *pm = NULL;
+
+ return (IP_FW_DENY);
+}
+
+void
+nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
+ const char *px, int off)
+{
+ char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN];
+
+ if ((nat64_debug & DP_STATE) == 0)
+ return;
+ inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s));
+ inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a));
+ inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d));
+
+ DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> "
+ "%s:%d AGE %d", px, pg->idx, st, off,
+ s, st->u.s.lport, pg->nat_proto, a, pg->aport + off,
+ d, st->u.s.fport, GET_AGE(st->timestamp));
+}
+
+/*
+ * Check if particular TCP state is stale and should be deleted.
+ * Return 1 if true, 0 otherwise.
+ */
+static int
+nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_state *st, int age)
+{
+ int ttl;
+
+ if (st->flags & NAT64_FLAG_FIN)
+ ttl = cfg->st_close_ttl;
+ else if (st->flags & NAT64_FLAG_ESTAB)
+ ttl = cfg->st_estab_ttl;
+ else if (st->flags & NAT64_FLAG_SYN)
+ ttl = cfg->st_syn_ttl;
+ else
+ ttl = cfg->st_syn_ttl;
+
+ if (age > ttl)
+ return (1);
+ return (0);
+}
+
+/*
+ * Check if nat state @st is stale and should be deleted.
+ * Return 1 if true, 0 otherwise.
+ */
+static NAT64NOINLINE int
+nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st)
+{
+ int age, delete;
+
+ age = GET_AGE(st->timestamp);
+ delete = 0;
+
+ /* Skip immutable records */
+ if (st->flags & NAT64_FLAG_RDR)
+ return (0);
+
+ switch (pg->nat_proto) {
+ case NAT_PROTO_TCP:
+ delete = nat64lsn_periodic_check_tcp(cfg, st, age);
+ break;
+ case NAT_PROTO_UDP:
+ if (age > cfg->st_udp_ttl)
+ delete = 1;
+ break;
+ case NAT_PROTO_ICMP:
+ if (age > cfg->st_icmp_ttl)
+ delete = 1;
+ break;
+ }
+
+ return (delete);
+}
+
+
+/*
+ * The following structures and functions
+ * are used to perform SLIST_FOREACH_SAFE()
+ * analog for states identified by struct st_ptr.
+ */
+
+struct st_idx {
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+ struct st_ptr sidx_next;
+};
+
+static struct st_idx *
+st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
+ struct st_ptr *sidx, struct st_idx *si)
+{
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+
+ if (sidx->idx == 0) {
+ memset(si, 0, sizeof(*si));
+ return (si);
+ }
+
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx);
+ st = &pg->states[sidx->off];
+
+ si->pg = pg;
+ si->st = st;
+ si->sidx_next = st->next;
+
+ return (si);
+}
+
+static struct st_idx *
+st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
+ struct st_idx *si)
+{
+ struct st_ptr sidx;
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+
+ sidx = si->sidx_next;
+ if (sidx.idx == 0) {
+ memset(si, 0, sizeof(*si));
+ si->st = NULL;
+ si->pg = NULL;
+ return (si);
+ }
+
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ st = &pg->states[sidx.off];
+
+ si->pg = pg;
+ si->st = st;
+ si->sidx_next = st->next;
+
+ return (si);
+}
+
+static struct st_idx *
+st_save_cond(struct st_idx *si_dst, struct st_idx *si)
+{
+ if (si->st != NULL)
+ *si_dst = *si;
+
+ return (si_dst);
+}
+
+unsigned int
+nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh)
+{
+ struct st_idx si, si_prev;
+ int i;
+ unsigned int delcount;
+
+ delcount = 0;
+ for (i = 0; i < nh->hsize; i++) {
+ memset(&si_prev, 0, sizeof(si_prev));
+ for (st_first(cfg, nh, &nh->phash[i], &si);
+ si.st != NULL;
+ st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) {
+ if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0)
+ continue;
+ nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE",
+ si.st->cur.off);
+ /* Unlink from hash */
+ if (si_prev.st != NULL)
+ si_prev.st->next = si.st->next;
+ else
+ nh->phash[i] = si.st->next;
+ /* Delete state and free its data */
+ PG_MARK_FREE_IDX(si.pg, si.st->cur.off);
+ memset(si.st, 0, sizeof(struct nat64lsn_state));
+ si.st = NULL;
+ delcount++;
+
+ /* Update portgroup timestamp */
+ SET_AGE(si.pg->timestamp);
+ }
+ }
+ NAT64STAT_ADD(&cfg->stats, sdeleted, delcount);
+ return (delcount);
+}
+
+/*
+ * Checks if portgroup is not used and can be deleted,
+ * Returns 1 if stale, 0 otherwise
+ */
+static int
+stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg)
+{
+
+ if (!PG_IS_EMPTY(pg))
+ return (0);
+ if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
+ return (0);
+ return (1);
+}
+
+/*
+ * Checks if host record is not used and can be deleted,
+ * Returns 1 if stale, 0 otherwise
+ */
+static int
+stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh)
+{
+
+ if (nh->pg_used != 0)
+ return (0);
+ if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay)
+ return (0);
+ return (1);
+}
+
+struct nat64lsn_periodic_data {
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_job_head jhead;
+ int jlen;
+};
+
+static NAT64NOINLINE int
+nat64lsn_periodic_chkhost(struct nat64lsn_host *nh,
+ struct nat64lsn_periodic_data *d)
+{
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_job_item *ji;
+ uint64_t delmask[NAT64LSN_PGPTRNMASK];
+ int delcount, i;
+
+ delcount = 0;
+ memset(delmask, 0, sizeof(delmask));
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d",
+ stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu);
+ if (!stale_nh(d->cfg, nh)) {
+ /* Non-stale host. Inspect internals */
+ NAT64_LOCK(nh);
+
+ /* Stage 1: Check&expire states */
+ if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0)
+ SET_AGE(nh->timestamp);
+
+ /* Stage 2: Check if we need to expire */
+ for (i = 0; i < nh->pg_used; i++) {
+ pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1);
+ if (pg == NULL)
+ continue;
+
+ /* Check if we can delete portgroup */
+ if (stale_pg(d->cfg, pg) == 0)
+ continue;
+
+ DPRINTF(DP_JQUEUE, "Check PG %d", i);
+ delmask[i / 64] |= ((uint64_t)1 << (i % 64));
+ delcount++;
+ }
+
+ NAT64_UNLOCK(nh);
+ if (delcount == 0)
+ return (0);
+ }
+
+ DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount);
+ /* We have something to delete - add it to queue */
+ ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP);
+ if (ji == NULL)
+ return (0);
+
+ ji->haddr = nh->addr;
+ ji->delcount = delcount;
+ memcpy(ji->delmask, delmask, sizeof(ji->delmask));
+
+ TAILQ_INSERT_TAIL(&d->jhead, ji, next);
+ d->jlen++;
+ return (0);
+}
+
+/*
+ * This procedure is used to perform various maintance
+ * on dynamic hash list. Currently it is called every second.
+ */
+static void
+nat64lsn_periodic(void *data)
+{
+ struct ip_fw_chain *ch;
+ IPFW_RLOCK_TRACKER;
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_periodic_data d;
+ struct nat64lsn_host *nh, *tmp;
+
+ cfg = (struct nat64lsn_cfg *) data;
+ ch = cfg->ch;
+ CURVNET_SET(cfg->vp);
+
+ memset(&d, 0, sizeof(d));
+ d.cfg = cfg;
+ TAILQ_INIT(&d.jhead);
+
+ IPFW_RLOCK(ch);
+
+ /* Stage 1: foreach host, check all its portgroups */
+ I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d);
+
+ /* Enqueue everything we have requested */
+ nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen);
+
+ callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
+
+ IPFW_RUNLOCK(ch);
+
+ CURVNET_RESTORE();
+}
+
+static NAT64NOINLINE void
+reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+
+ if (ji->m == NULL)
+ return;
+
+ /* Request has failed or packet type is wrong */
+ if (ji->f_id.addr_type != 6 || ji->done == 0) {
+ m_freem(ji->m);
+ ji->m = NULL;
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d",
+ ji->jtype, ji->done);
+ return;
+ }
+
+ /*
+ * XXX: Limit recursion level
+ */
+
+ NAT64STAT_INC(&cfg->stats, jreinjected);
+ DPRINTF(DP_JQUEUE, "Reinject mbuf");
+ nat64lsn_translate6(cfg, &ji->f_id, &ji->m);
+}
+
+static void
+destroy_portgroup(struct nat64lsn_portgroup *pg)
+{
+
+ DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg);
+ uma_zfree(nat64lsn_pg_zone, pg);
+}
+
+static NAT64NOINLINE int
+alloc_portgroup(struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_portgroup *pg;
+
+ pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
+ if (pg == NULL)
+ return (1);
+
+ if (ji->needs_idx != 0) {
+ ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
+ /* Failed alloc isn't always fatal, so don't check */
+ }
+ memset(&pg->freemask, 0xFF, sizeof(pg->freemask));
+ pg->nat_proto = ji->nat_proto;
+ ji->pg = pg;
+ return (0);
+
+}
+
+static void
+destroy_host6(struct nat64lsn_host *nh)
+{
+ char a[INET6_ADDRSTRLEN];
+ int i;
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh,
+ nh->pg_used);
+ NAT64_LOCK_DESTROY(nh);
+ for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++)
+ uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i));
+ uma_zfree(nat64lsn_host_zone, nh);
+}
+
+static NAT64NOINLINE int
+alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_host *nh;
+ char a[INET6_ADDRSTRLEN];
+
+ nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
+ if (nh == NULL)
+ return (1);
+ PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
+ if (PORTGROUP_CHUNK(nh, 0) == NULL) {
+ uma_zfree(nat64lsn_host_zone, nh);
+ return (2);
+ }
+ if (alloc_portgroup(ji) != 0) {
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0));
+ uma_zfree(nat64lsn_host_zone, nh);
+ return (3);
+ }
+
+ NAT64_LOCK_INIT(nh);
+ nh->addr = ji->haddr;
+ nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */
+ nh->pg_allocated = NAT64LSN_PGIDX_CHUNK;
+ nh->pg_used = 0;
+ ji->nh = nh;
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh);
+ return (0);
+}
+
+/*
+ * Finds free @pg index inside @nh
+ */
+static NAT64NOINLINE int
+find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx)
+{
+ int i;
+
+ for (i = 0; i < nh->pg_allocated; i++) {
+ if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) {
+ *idx = i;
+ return (0);
+ }
+ }
+ return (1);
+}
+
+static NAT64NOINLINE int
+attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_host *nh;
+
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL) {
+ /* Add new host to list */
+ nh = ji->nh;
+ I6HASH_INSERT(cfg, nh);
+ cfg->ihcount++;
+ ji->nh = NULL;
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh);
+ /*
+ * Try to add portgroup.
+ * Note it will automatically set
+ * 'done' on ji if successful.
+ */
+ if (attach_portgroup(cfg, ji) != 0) {
+ DPRINTF(DP_DROPS, "%s %p failed to attach PG",
+ a, nh);
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ return (1);
+ }
+ return (0);
+ }
+
+ /*
+ * nh isn't NULL. This probably means we had several simultaneous
+ * host requests. The previous one request has already attached
+ * this host. Requeue attached mbuf and mark job as done, but
+ * leave nh and pg pointers not changed, so nat64lsn_do_request()
+ * will release all allocated resources.
+ */
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "%s %p is already attached as %p",
+ a, ji->nh, nh);
+ ji->done = 1;
+ return (0);
+}
+
+static NAT64NOINLINE int
+find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off,
+ int nat_proto, uint16_t *aport, int *ppg_idx)
+{
+ int j, pg_idx;
+
+ pg_idx = addr_off * _ADDR_PG_COUNT +
+ (nat_proto - 1) * _ADDR_PG_PROTO_COUNT;
+
+ for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) {
+ if (cfg->pg[pg_idx + j] != NULL)
+ continue;
+
+ *aport = j * NAT64_CHUNK_SIZE;
+ *ppg_idx = pg_idx + j;
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * XXX: This function needs to be rewritten to
+ * use free bitmask for faster pg finding,
+ * additionally, it should take into consideration
+ * a) randomization and
+ * b) previous addresses allocated to given nat instance
+ *
+ */
+static NAT64NOINLINE int
+find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji,
+ uint32_t *aaddr, uint16_t *aport, int *ppg_idx)
+{
+ int i, nat_proto;
+
+ /*
+ * XXX: Use bitmask index to be able to find/check if IP address
+ * has some spare pg's
+ */
+ nat_proto = ji->nat_proto;
+
+ /* First, try to use same address */
+ if (ji->aaddr != 0) {
+ i = ntohl(ji->aaddr) - cfg->prefix4;
+ if (find_pg_place_addr(cfg, i, nat_proto, aport,
+ ppg_idx) != 0){
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+ }
+
+ /* Next, try to use random address based on flow hash */
+ i = ji->fhash % (1 << (32 - cfg->plen4));
+ if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) {
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+
+
+ /* Last one: simply find ANY available */
+ for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
+ if (find_pg_place_addr(cfg, i, nat_proto, aport,
+ ppg_idx) != 0){
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+ }
+
+ return (1);
+}
+
+static NAT64NOINLINE int
+attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_host *nh;
+ uint32_t aaddr;
+ uint16_t aport;
+ int nh_pg_idx, pg_idx;
+
+ pg = ji->pg;
+
+ /*
+ * Find source host and bind: we can't rely on
+ * pg->host
+ */
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL)
+ return (1);
+
+ /* Find spare port chunk */
+ if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) {
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a);
+ return (2);
+ }
+
+ /* Expand PG indexes if needed */
+ if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) {
+ PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) =
+ ji->spare_idx;
+ nh->pg_allocated += NAT64LSN_PGIDX_CHUNK;
+ ji->spare_idx = NULL;
+ }
+
+ /* Find empty index to store PG in the @nh */
+ if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) {
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s",
+ a);
+ return (3);
+ }
+
+ cfg->pg[pg_idx] = pg;
+ cfg->protochunks[pg->nat_proto]++;
+ NAT64STAT_INC(&cfg->stats, spgcreated);
+
+ pg->aaddr = aaddr;
+ pg->aport = aport;
+ pg->host = nh;
+ pg->idx = pg_idx;
+ SET_AGE(pg->timestamp);
+
+ PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg;
+ if (nh->pg_used == nh_pg_idx)
+ nh->pg_used++;
+ SET_AGE(nh->timestamp);
+
+ ji->pg = NULL;
+ ji->done = 1;
+
+ return (0);
+}
+
+static NAT64NOINLINE void
+consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_host *nh, *nh_tmp;
+ struct nat64lsn_portgroup *pg, *pg_list[256];
+ int i, pg_lidx, idx;
+
+ /* Find source host */
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL || nh->pg_used == 0)
+ return;
+
+ memset(pg_list, 0, sizeof(pg_list));
+ pg_lidx = 0;
+
+ NAT64_LOCK(nh);
+
+ for (i = nh->pg_used - 1; i >= 0; i--) {
+ if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0)
+ continue;
+ pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
+
+ /* Check that PG isn't busy. */
+ if (stale_pg(cfg, pg) == 0)
+ continue;
+
+ /* DO delete */
+ pg_list[pg_lidx++] = pg;
+ PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL;
+
+ idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto,
+ pg->aport);
+ KASSERT(cfg->pg[idx] == pg, ("Non matched pg"));
+ cfg->pg[idx] = NULL;
+ cfg->protochunks[pg->nat_proto]--;
+ NAT64STAT_INC(&cfg->stats, spgdeleted);
+
+ /* Decrease pg_used */
+ while (nh->pg_used > 0 &&
+ PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL)
+ nh->pg_used--;
+
+ /* Check if on-stack buffer has ended */
+ if (pg_lidx == nitems(pg_list))
+ break;
+ }
+
+ NAT64_UNLOCK(nh);
+
+ if (stale_nh(cfg, nh)) {
+ I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr);
+ KASSERT(nh != NULL, ("Unable to find address"));
+ cfg->ihcount--;
+ ji->nh = nh;
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ KASSERT(nh == NULL, ("Failed to delete address"));
+ }
+
+ /* TODO: Delay freeing portgroups */
+ while (pg_lidx > 0) {
+ pg_lidx--;
+ NAT64STAT_INC(&cfg->stats, spgdeleted);
+ destroy_portgroup(pg_list[pg_lidx]);
+ }
+}
+
+/*
+ * Main request handler.
+ * Responsible for handling jqueue, e.g.
+ * creating new hosts, addind/deleting portgroups.
+ */
+static NAT64NOINLINE void
+nat64lsn_do_request(void *data)
+{
+ IPFW_RLOCK_TRACKER;
+ struct nat64lsn_job_head jhead;
+ struct nat64lsn_job_item *ji;
+ int jcount, nhsize;
+ struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data;
+ struct ip_fw_chain *ch;
+ int delcount;
+
+ CURVNET_SET(cfg->vp);
+
+ TAILQ_INIT(&jhead);
+
+ /* XXX: We're running unlocked here */
+
+ ch = cfg->ch;
+ delcount = 0;
+ IPFW_RLOCK(ch);
+
+ /* Grab queue */
+ JQUEUE_LOCK();
+ TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next);
+ jcount = cfg->jlen;
+ cfg->jlen = 0;
+ JQUEUE_UNLOCK();
+
+ /* check if we need to resize hash */
+ nhsize = 0;
+ if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) {
+ nhsize = cfg->ihsize;
+ for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2)
+ ;
+ } else if (cfg->ihcount < cfg->ihsize * 4) {
+ nhsize = cfg->ihsize;
+ for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2)
+ ;
+ }
+
+ IPFW_RUNLOCK(ch);
+
+ if (TAILQ_EMPTY(&jhead)) {
+ CURVNET_RESTORE();
+ return;
+ }
+
+ NAT64STAT_INC(&cfg->stats, jcalls);
+ DPRINTF(DP_JQUEUE, "count=%d", jcount);
+
+ /*
+ * TODO:
+ * What we should do here is to build a hash
+ * to ensure we don't have lots of duplicate requests.
+ * Skip this for now.
+ *
+ * TODO: Limit per-call number of items
+ */
+
+ /* Pre-allocate everything for entire chain */
+ TAILQ_FOREACH(ji, &jhead, next) {
+ switch (ji->jtype) {
+ case JTYPE_NEWHOST:
+ if (alloc_host6(cfg, ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jhostfails);
+ break;
+ case JTYPE_NEWPORTGROUP:
+ if (alloc_portgroup(ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ break;
+ case JTYPE_DELPORTGROUP:
+ delcount += ji->delcount;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * TODO: Alloc hew hash
+ */
+ nhsize = 0;
+ if (nhsize > 0) {
+ /* XXX: */
+ }
+
+ /* Apply all changes in batch */
+ IPFW_UH_WLOCK(ch);
+ IPFW_WLOCK(ch);
+
+ TAILQ_FOREACH(ji, &jhead, next) {
+ switch (ji->jtype) {
+ case JTYPE_NEWHOST:
+ if (ji->nh != NULL)
+ attach_host6(cfg, ji);
+ break;
+ case JTYPE_NEWPORTGROUP:
+ if (ji->pg != NULL &&
+ attach_portgroup(cfg, ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ break;
+ case JTYPE_DELPORTGROUP:
+ consider_del_portgroup(cfg, ji);
+ break;
+ }
+ }
+
+ if (nhsize > 0) {
+ /* XXX: Move everything to new hash */
+ }
+
+ IPFW_WUNLOCK(ch);
+ IPFW_UH_WUNLOCK(ch);
+
+ /* Flush unused entries */
+ while (!TAILQ_EMPTY(&jhead)) {
+ ji = TAILQ_FIRST(&jhead);
+ TAILQ_REMOVE(&jhead, ji, next);
+ if (ji->nh != NULL)
+ destroy_host6(ji->nh);
+ if (ji->pg != NULL)
+ destroy_portgroup(ji->pg);
+ if (ji->m != NULL)
+ reinject_mbuf(cfg, ji);
+ if (ji->spare_idx != NULL)
+ uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx);
+ free(ji, M_IPFW);
+ }
+ CURVNET_RESTORE();
+}
+
+static NAT64NOINLINE struct nat64lsn_job_item *
+nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+ int jtype)
+{
+ struct nat64lsn_job_item *ji;
+ struct in6_addr haddr;
+ uint8_t nat_proto;
+
+ /*
+ * Do not try to lock possibly contested mutex if we're near the limit.
+ * Drop packet instead.
+ */
+ if (cfg->jlen >= cfg->jmaxlen) {
+ NAT64STAT_INC(&cfg->stats, jmaxlen);
+ return (NULL);
+ }
+
+ memset(&haddr, 0, sizeof(haddr));
+ nat_proto = 0;
+ if (f_id != NULL) {
+ haddr = f_id->src_ip6;
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+
+ DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d",
+ nat_proto, f_id->proto);
+
+ if (nat_proto == 0)
+ return (NULL);
+ }
+
+ ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW,
+ M_NOWAIT | M_ZERO);
+
+ if (ji == NULL) {
+ NAT64STAT_INC(&cfg->stats, jnomem);
+ return (NULL);
+ }
+
+ ji->jtype = jtype;
+
+ if (f_id != NULL) {
+ ji->f_id = *f_id;
+ ji->haddr = haddr;
+ ji->nat_proto = nat_proto;
+ }
+
+ return (ji);
+}
+
+static NAT64NOINLINE void
+nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+
+ if (ji == NULL)
+ return;
+
+ JQUEUE_LOCK();
+ TAILQ_INSERT_TAIL(&cfg->jhead, ji, next);
+ cfg->jlen++;
+ NAT64STAT_INC(&cfg->stats, jrequests);
+
+ if (callout_pending(&cfg->jcallout) == 0)
+ callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
+ JQUEUE_UNLOCK();
+}
+
+static NAT64NOINLINE void
+nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_head *jhead, int jlen)
+{
+
+ if (TAILQ_EMPTY(jhead))
+ return;
+
+ /* Attach current queue to execution one */
+ JQUEUE_LOCK();
+ TAILQ_CONCAT(&cfg->jhead, jhead, next);
+ cfg->jlen += jlen;
+ NAT64STAT_ADD(&cfg->stats, jrequests, jlen);
+
+ if (callout_pending(&cfg->jcallout) == 0)
+ callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
+ JQUEUE_UNLOCK();
+}
+
+static unsigned int
+flow6_hash(const struct ipfw_flow_id *f_id)
+{
+ unsigned char hbuf[36];
+
+ memcpy(hbuf, &f_id->dst_ip6, 16);
+ memcpy(&hbuf[16], &f_id->src_ip6, 16);
+ memcpy(&hbuf[32], &f_id->dst_port, 2);
+ memcpy(&hbuf[32], &f_id->src_port, 2);
+
+ return (djb_hash(hbuf, sizeof(hbuf)));
+}
+
+static NAT64NOINLINE int
+nat64lsn_request_host(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm)
+{
+ struct nat64lsn_job_item *ji;
+ struct mbuf *m;
+
+ m = *pm;
+ *pm = NULL;
+
+ ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST);
+ if (ji == NULL) {
+ m_freem(m);
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "failed to create job");
+ } else {
+ ji->m = m;
+ /* Provide pseudo-random value based on flow */
+ ji->fhash = flow6_hash(f_id);
+ nat64lsn_enqueue_job(cfg, ji);
+ NAT64STAT_INC(&cfg->stats, jhostsreq);
+ }
+
+ return (IP_FW_PASS);
+}
+
+static NAT64NOINLINE int
+nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
+ int needs_idx)
+{
+ struct nat64lsn_job_item *ji;
+ struct mbuf *m;
+
+ m = *pm;
+ *pm = NULL;
+
+ ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP);
+ if (ji == NULL) {
+ m_freem(m);
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "failed to create job");
+ } else {
+ ji->m = m;
+ /* Provide pseudo-random value based on flow */
+ ji->fhash = flow6_hash(f_id);
+ ji->aaddr = aaddr;
+ ji->needs_idx = needs_idx;
+ nat64lsn_enqueue_job(cfg, ji);
+ NAT64STAT_INC(&cfg->stats, jportreq);
+ }
+
+ return (IP_FW_PASS);
+}
+
+static NAT64NOINLINE struct nat64lsn_state *
+nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh,
+ int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr)
+{
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+ int i, hval, off;
+
+ /* XXX: create additional bitmask for selecting proper portgroup */
+ for (i = 0; i < nh->pg_used; i++) {
+ pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
+ if (pg == NULL)
+ continue;
+ if (*aaddr == 0)
+ *aaddr = pg->aaddr;
+ if (pg->nat_proto != nat_proto)
+ continue;
+
+ off = PG_GET_FREE_IDX(pg);
+ if (off != 0) {
+ /* We have found spare state. Use it */
+ off--;
+ PG_MARK_BUSY_IDX(pg, off);
+ st = &pg->states[off];
+
+ /*
+ * Fill in new info. Assume state was zeroed.
+ * Timestamp and flags will be filled by caller.
+ */
+ st->u.s = kst->u.s;
+ st->cur.idx = i + 1;
+ st->cur.off = off;
+
+ /* Insert into host hash table */
+ hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1);
+ st->next = nh->phash[hval];
+ nh->phash[hval] = st->cur;
+
+ nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off);
+
+ NAT64STAT_INC(&cfg->stats, screated);
+
+ return (st);
+ }
+ /* Saev last used alias affress */
+ *aaddr = pg->aaddr;
+ }
+
+ return (NULL);
+}
+
+static NAT64NOINLINE int
+nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
+ struct mbuf **pm)
+{
+ struct pfloghdr loghdr, *logdata;
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_host *nh;
+ struct st_ptr sidx;
+ struct nat64lsn_state *st, kst;
+ struct nat64lsn_portgroup *pg;
+ struct icmp6_hdr *icmp6;
+ uint32_t aaddr;
+ int action, hval, nat_proto, proto;
+ uint16_t aport, state_ts, state_flags;
+
+ /* Check if af/protocol is supported and get it short id */
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+ if (nat_proto == 0) {
+ /*
+ * Since we can be called from jobs handler, we need
+ * to free mbuf by self, do not leave this task to
+ * ipfw_check_packet().
+ */
+ NAT64STAT_INC(&cfg->stats, noproto);
+ m_freem(*pm);
+ *pm = NULL;
+ return (IP_FW_DENY);
+ }
+
+ /* Try to find host first */
+ I6HASH_FIND(cfg, nh, &f_id->src_ip6);
+
+ if (nh == NULL)
+ return (nat64lsn_request_host(cfg, f_id, pm));
+
+ /* Fill-in on-stack state structure */
+ kst.u.s.faddr = f_id->dst_ip6.s6_addr32[3];
+ kst.u.s.fport = f_id->dst_port;
+ kst.u.s.lport = f_id->src_port;
+
+ /* Prepare some fields we might need to update */
+ hval = 0;
+ proto = nat64_getlasthdr(*pm, &hval);
+ if (proto < 0) {
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
+ m_freem(*pm);
+ *pm = NULL;
+ return (IP_FW_DENY);
+ }
+
+ SET_AGE(state_ts);
+ if (proto == IPPROTO_TCP)
+ state_flags = convert_tcp_flags(
+ TCP(mtodo(*pm, hval))->th_flags);
+ else
+ state_flags = 0;
+ if (proto == IPPROTO_ICMPV6) {
+ /* Alter local port data */
+ icmp6 = mtodo(*pm, hval);
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
+ icmp6->icmp6_type == ICMP6_ECHO_REPLY)
+ kst.u.s.lport = ntohs(icmp6->icmp6_id);
+ }
+
+ hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1);
+ pg = NULL;
+ st = NULL;
+
+ /* OK, let's find state in host hash */
+ NAT64_LOCK(nh);
+ sidx = nh->phash[hval];
+ int k = 0;
+ while (sidx.idx != 0) {
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ st = &pg->states[sidx.off];
+ //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off,
+ //st->next.idx, st->next.off);
+ if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto)
+ break;
+ if (k++ > 1000) {
+ DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n",
+ sidx.idx, sidx.off, st->next.idx, st->next.off);
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d",
+ a, nh, curcpu);
+ k = 0;
+ }
+ sidx = st->next;
+ }
+
+ if (sidx.idx == 0) {
+ aaddr = 0;
+ st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr);
+ if (st == NULL) {
+ /* No free states. Request more if we can */
+ if (nh->pg_used >= cfg->max_chunks) {
+ /* Limit reached */
+ NAT64STAT_INC(&cfg->stats, dropped);
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_DROPS, "PG limit reached "
+ " for host %s (used %u, allocated %u, "
+ "limit %u)", a,
+ nh->pg_used * NAT64_CHUNK_SIZE,
+ nh->pg_allocated * NAT64_CHUNK_SIZE,
+ cfg->max_chunks * NAT64_CHUNK_SIZE);
+ m_freem(*pm);
+ *pm = NULL;
+ NAT64_UNLOCK(nh);
+ return (IP_FW_DENY);
+ }
+ if ((nh->pg_allocated <=
+ nh->pg_used + NAT64LSN_REMAININGPG) &&
+ nh->pg_allocated < cfg->max_chunks)
+ action = 1; /* Request new indexes */
+ else
+ action = 0;
+ NAT64_UNLOCK(nh);
+ //DPRINTF("No state, unlock for %p", nh);
+ return (nat64lsn_request_portgroup(cfg, f_id,
+ pm, aaddr, action));
+ }
+
+ /* We've got new state. */
+ sidx = st->cur;
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ }
+
+ /* Okay, state found */
+
+ /* Update necessary fileds */
+ if (st->timestamp != state_ts)
+ st->timestamp = state_ts;
+ if ((st->flags & state_flags) != 0)
+ st->flags |= state_flags;
+
+ /* Copy needed state data */
+ aaddr = pg->aaddr;
+ aport = htons(pg->aport + sidx.off);
+
+ NAT64_UNLOCK(nh);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off);
+ } else
+ logdata = NULL;
+
+ action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->stats, logdata);
+ if (action == NAT64SKIP)
+ return (IP_FW_PASS);
+ if (action == NAT64MFREE)
+ m_freem(*pm);
+ *pm = NULL; /* mark mbuf as consumed */
+ return (IP_FW_DENY);
+}
+
+/*
+ * Main dataplane entry point.
+ */
+int
+ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+ ipfw_insn *icmd;
+ struct nat64lsn_cfg *cfg;
+ int ret;
+
+ IPFW_RLOCK_ASSERT(ch);
+
+ *done = 1; /* terminate the search */
+ icmd = cmd + 1;
+ if (cmd->opcode != O_EXTERNAL_ACTION ||
+ cmd->arg1 != V_nat64lsn_eid ||
+ icmd->opcode != O_EXTERNAL_INSTANCE ||
+ (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
+ return (0);
+
+ switch (args->f_id.addr_type) {
+ case 4:
+ ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
+ break;
+ case 6:
+ ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
+ break;
+ default:
+ return (0);
+ }
+ return (ret);
+}
+
+static int
+nat64lsn_ctor_host(void *mem, int size, void *arg, int flags)
+{
+ struct nat64lsn_host *nh;
+
+ nh = (struct nat64lsn_host *)mem;
+ memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr));
+ memset(nh->phash, 0, sizeof(nh->phash));
+ return (0);
+}
+
+static int
+nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags)
+{
+
+ memset(mem, 0, size);
+ return (0);
+}
+
+void
+nat64lsn_init_internal(void)
+{
+
+ memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map));
+ /* Set up supported protocol map */
+ nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP;
+ nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP;
+ nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP;
+ nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP;
+ /* Fill in reverse proto map */
+ memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map));
+ nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP;
+ nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP;
+ nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6;
+
+ JQUEUE_LOCK_INIT();
+ nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone",
+ sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, 0);
+ nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone",
+ sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone",
+ sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK,
+ nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+
+void
+nat64lsn_uninit_internal(void)
+{
+
+ JQUEUE_LOCK_DESTROY();
+ uma_zdestroy(nat64lsn_host_zone);
+ uma_zdestroy(nat64lsn_pg_zone);
+ uma_zdestroy(nat64lsn_pgidx_zone);
+}
+
+void
+nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
+{
+
+ callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
+ nat64lsn_periodic, cfg);
+}
+
+struct nat64lsn_cfg *
+nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr)
+{
+ struct nat64lsn_cfg *cfg;
+
+ cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO);
+ TAILQ_INIT(&cfg->jhead);
+ cfg->vp = curvnet;
+ cfg->ch = ch;
+ COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK);
+
+ cfg->ihsize = NAT64LSN_HSIZE;
+ cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ callout_init(&cfg->periodic, CALLOUT_MPSAFE);
+ callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
+
+ return (cfg);
+}
+
+/*
+ * Destroy all hosts callback.
+ * Called on module unload when all activity already finished, so
+ * can work without any locks.
+ */
+static NAT64NOINLINE int
+nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg)
+{
+ struct nat64lsn_portgroup *pg;
+ int i;
+
+ for (i = nh->pg_used; i > 0; i--) {
+ pg = PORTGROUP_BYSIDX(cfg, nh, i);
+ if (pg == NULL)
+ continue;
+ cfg->pg[pg->idx] = NULL;
+ destroy_portgroup(pg);
+ nh->pg_used--;
+ }
+ destroy_host6(nh);
+ cfg->ihcount--;
+ return (0);
+}
+
+void
+nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
+{
+ struct nat64lsn_host *nh, *tmp;
+
+ JQUEUE_LOCK();
+ callout_drain(&cfg->jcallout);
+ JQUEUE_UNLOCK();
+
+ callout_drain(&cfg->periodic);
+ I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg);
+ DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount);
+
+ COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS);
+ free(cfg->ih, M_IPFW);
+ free(cfg->pg, M_IPFW);
+ free(cfg, M_IPFW);
+}
+
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.h b/sys/netpfil/ipfw/nat64/nat64lsn.h
new file mode 100644
index 0000000..e6ceb1d
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64lsn.h
@@ -0,0 +1,351 @@
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64LSN_H_
+#define _IP_FW_NAT64LSN_H_
+
+#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */
+#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS)
+
+#define NAT64_MIN_PORT 1024
+#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)
+
+struct st_ptr {
+ uint8_t idx; /* index in nh->pg_ptr array.
+ * NOTE: it starts from 1.
+ */
+ uint8_t off;
+};
+#define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1)
+#define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY)
+#define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \
+ NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS)
+
+struct nat64lsn_portgroup;
+/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */
+struct nat64lsn_host {
+ struct rwlock h_lock; /* Host states lock */
+
+ struct in6_addr addr;
+ struct nat64lsn_host *next;
+ uint16_t timestamp; /* Last altered */
+ uint16_t hsize; /* ports hash size */
+ uint16_t pg_used; /* Number of portgroups used */
+#define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before
+ * requesting of new chunk of indexes.
+ */
+ uint16_t pg_allocated; /* Number of portgroups indexes
+ * allocated.
+ */
+#define NAT64LSN_HSIZE 64
+ struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */
+ /*
+ * PG indexes are stored in chunks with 32 elements.
+ * The maximum count is limited to 255 due to st_ptr->idx is uint8_t.
+ */
+#define NAT64LSN_PGIDX_CHUNK 32
+#define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \
+ NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK)
+ struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */
+};
+
+#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED)
+#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED)
+
+#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock)
+#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock)
+#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock)
+#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock)
+#define NAT64_LOCK(h) NAT64_WLOCK(h)
+#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h)
+#define NAT64_LOCK_INIT(h) do { \
+ rw_init(&(h)->h_lock, "NAT64 host lock"); \
+ } while (0)
+
+#define NAT64_LOCK_DESTROY(h) do { \
+ rw_destroy(&(h)->h_lock); \
+ } while (0)
+
+/* Internal proto index */
+#define NAT_PROTO_TCP 1
+#define NAT_PROTO_UDP 2
+#define NAT_PROTO_ICMP 3
+
+#define NAT_MAX_PROTO 4
+extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+
+VNET_DECLARE(uint16_t, nat64lsn_eid);
+#define V_nat64lsn_eid VNET(nat64lsn_eid)
+#define IPFW_TLV_NAT64LSN_NAME IPFW_TLV_EACTION_NAME(V_nat64lsn_eid)
+
+/* Timestamp macro */
+#define _CT ((int)time_uptime % 65536)
+#define SET_AGE(x) (x) = _CT
+#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \
+ (int)65536 + _CT - (x))
+
+#ifdef __LP64__
+/* ffsl() is capable of checking 64-bit ints */
+#define _FFS64
+#endif
+
+/* 16 bytes */
+struct nat64lsn_state {
+ union {
+ struct {
+ in_addr_t faddr; /* Remote IPv4 address */
+ uint16_t fport; /* Remote IPv4 port */
+ uint16_t lport; /* Local IPv6 port */
+ }s;
+ uint64_t hkey;
+ } u;
+ uint8_t nat_proto;
+ uint8_t flags;
+ uint16_t timestamp;
+ struct st_ptr cur; /* Index of portgroup in nat64lsn_host */
+ struct st_ptr next; /* Next entry index */
+};
+
+/*
+ * 1024+32 bytes per 64 states, used to store state
+ * AND for outside-in state lookup
+ */
+struct nat64lsn_portgroup {
+ struct nat64lsn_host *host; /* IPv6 source host info */
+ in_addr_t aaddr; /* Alias addr, network format */
+ uint16_t aport; /* Base port */
+ uint16_t timestamp;
+ uint8_t nat_proto;
+ uint8_t spare[3];
+ uint32_t idx;
+#ifdef _FFS64
+ uint64_t freemask; /* Mask of free entries */
+#else
+ uint32_t freemask[2]; /* Mask of free entries */
+#endif
+ struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */
+};
+#ifdef _FFS64
+#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx))
+#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx))
+#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx)))
+#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
+#define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask))
+#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0)
+#else
+#define PG_MARK_BUSY_IDX(_pg, _idx) \
+ (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
+#define PG_MARK_FREE_IDX(_pg, _idx) \
+ (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32))
+#define PG_IS_FREE_IDX(_pg, _idx) \
+ ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
+#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
+#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg)
+#define PG_IS_EMPTY(_pg) \
+ ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
+
+static inline int
+_pg_get_free_idx(const struct nat64lsn_portgroup *pg)
+{
+ int i;
+
+ if ((i = ffsl(pg->freemask[0])) != 0)
+ return (i);
+ if ((i = ffsl(pg->freemask[1])) != 0)
+ return (i + 32);
+ return (0);
+}
+
+#endif
+
+TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
+
+#define NAT64LSN_FLAGSMASK (NAT64_LOG)
+struct nat64lsn_cfg {
+ struct named_object no;
+ //struct nat64_exthost *ex; /* Pointer to external addr array */
+ struct nat64lsn_portgroup **pg; /* XXX: array of pointers */
+ struct nat64lsn_host **ih; /* Host hash */
+ uint32_t prefix4; /* IPv4 prefix */
+ uint32_t pmask4; /* IPv4 prefix mask */
+ uint32_t ihsize; /* IPv6 host hash size */
+ uint8_t plen4;
+ uint8_t plen6;
+ uint8_t nomatch_verdict;/* What to return to ipfw on no-match */
+ uint8_t nomatch_final; /* Exit outer loop? */
+ struct in6_addr prefix6; /* IPv6 prefix to embed IPv4 hosts */
+
+ uint32_t ihcount; /* Number of items in host hash */
+ int max_chunks; /* Max chunks per client */
+ int agg_prefix_len; /* Prefix length to count */
+ int agg_prefix_max; /* Max hosts per agg prefix */
+ uint32_t jmaxlen; /* Max jobqueue length */
+ uint32_t flags;
+ uint16_t min_chunk; /* Min port group # to use */
+ uint16_t max_chunk; /* Max port group # to use */
+ uint16_t nh_delete_delay; /* Stale host delete delay */
+ uint16_t pg_delete_delay; /* Stale portgroup del delay */
+ uint16_t st_syn_ttl; /* TCP syn expire */
+ uint16_t st_close_ttl; /* TCP fin expire */
+ uint16_t st_estab_ttl; /* TCP established expire */
+ uint16_t st_udp_ttl; /* UDP expire */
+ uint16_t st_icmp_ttl; /* ICMP expire */
+ uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */
+
+ struct callout periodic;
+ struct callout jcallout;
+ struct ip_fw_chain *ch;
+ struct vnet *vp;
+ struct nat64lsn_job_head jhead;
+ int jlen;
+ char name[64]; /* Nat instance name */
+ nat64_stats_block stats;
+};
+
+struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
+ size_t numaddr);
+void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
+void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
+void nat64lsn_init_internal(void);
+void nat64lsn_uninit_internal(void);
+int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done);
+
+void
+nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
+ const char *px, int off);
+/*
+ * Portgroup layout
+ * addr x nat_proto x port_off
+ *
+ */
+
+#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS)
+#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
+
+#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4))
+#define __GET_PORTGROUP_IDX(_proto, _port) \
+ ((_proto - 1) * _ADDR_PG_PROTO_COUNT + \
+ ((_port) >> NAT64_CHUNK_SIZE_BITS))
+
+#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \
+ GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \
+ __GET_PORTGROUP_IDX(_proto, _port)
+#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \
+ ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)])
+
+#define PORTGROUP_CHUNK(_nh, _idx) \
+ ((_nh)->pg_ptr[(_idx)])
+#define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \
+ (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \
+ [((_idx) - 1) % NAT64LSN_PGIDX_CHUNK])
+
+
+/* Chained hash table */
+#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _x = _PX##first(_ph, _buck); \
+ for ( ; _x != NULL; _x = _PX##next(_x)) { \
+ if (_PX##cmp(_key, _PX##val(_x))) \
+ break; \
+ } \
+ if (_x == NULL) \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \
+ _PX##unlock(_ph, _buck);
+
+#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \
+ unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _PX##next(_i) = _PX##first(_ph, _buck); \
+ _PX##first(_ph, _buck) = _i; \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _x = _PX##first(_ph, _buck); \
+ _tmp = NULL; \
+ for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
+ if (_PX##cmp(_key, _PX##val(_x))) \
+ break; \
+ } \
+ if (_x != NULL) { \
+ if (_tmp == NULL) \
+ _PX##first(_ph, _buck) = _PX##next(_x); \
+ else \
+ _PX##next(_tmp) = _PX##next(_x); \
+ } \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \
+ for (unsigned int _i = 0; _i < _hsize; _i++) { \
+ _PX##lock(_ph, _i); \
+ _x = _PX##first(_ph, _i); \
+ _tmp = NULL; \
+ for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
+ if (_cb(_x, _arg) == 0) \
+ continue; \
+ if (_tmp == NULL) \
+ _PX##first(_ph, _i) = _PX##next(_x); \
+ else \
+ _tmp = _PX##next(_x); \
+ } \
+ _PX##unlock(_ph, _i); \
+ } \
+} while(0)
+
+#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \
+ unsigned int _buck; \
+ for (unsigned int _i = 0; _i < _hsize; _i++) { \
+ _x = _PX##first(_ph, _i); \
+ _y = _x; \
+ while (_y != NULL) { \
+ _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
+ _y = _PX##next(_x); \
+ _PX##next(_x) = _PX##first(_nph, _buck); \
+ _PX##first(_nph, _buck) = _x; \
+ } \
+ } \
+} while(0)
+
+#endif /* _IP_FW_NAT64LSN_H_ */
+
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn_control.c b/sys/netpfil/ipfw/nat64/nat64lsn_control.c
new file mode 100644
index 0000000..3d79085
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64lsn_control.c
@@ -0,0 +1,917 @@
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sockopt.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64lsn.h>
+#include <netinet6/ip_fw_nat64.h>
+
+VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
+
+static struct nat64lsn_cfg *
+nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+ struct nat64lsn_cfg *cfg;
+
+ cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+ IPFW_TLV_NAT64LSN_NAME, name);
+
+ return (cfg);
+}
+
+static void
+nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
+{
+
+ if (uc->max_ports == 0)
+ uc->max_ports = NAT64LSN_MAX_PORTS;
+ else
+ uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE);
+ if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR)
+ uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR;
+ if (uc->jmaxlen == 0)
+ uc->jmaxlen = NAT64LSN_JMAXLEN;
+ if (uc->jmaxlen > 65536)
+ uc->jmaxlen = 65536;
+ if (uc->nh_delete_delay == 0)
+ uc->nh_delete_delay = NAT64LSN_HOST_AGE;
+ if (uc->pg_delete_delay == 0)
+ uc->pg_delete_delay = NAT64LSN_PG_AGE;
+ if (uc->st_syn_ttl == 0)
+ uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
+ if (uc->st_close_ttl == 0)
+ uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
+ if (uc->st_estab_ttl == 0)
+ uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
+ if (uc->st_udp_ttl == 0)
+ uc->st_udp_ttl = NAT64LSN_UDP_AGE;
+ if (uc->st_icmp_ttl == 0)
+ uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
+}
+
+/*
+ * Creates new nat64lsn instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64lsn_cfg *uc;
+ struct nat64lsn_cfg *cfg;
+ struct namedobj_instance *ni;
+ uint32_t addr4, mask4;
+
+ if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)sd->kbuf;
+ uc = (ipfw_nat64lsn_cfg *)(olh + 1);
+
+ if (ipfw_check_object_name_generic(uc->name) != 0)
+ return (EINVAL);
+
+ if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ if (uc->plen4 > 32)
+ return (EINVAL);
+ if (uc->plen6 > 128 || ((uc->plen6 % 8) != 0))
+ return (EINVAL);
+
+ /* XXX: Check prefix4 to be global */
+ addr4 = ntohl(uc->prefix4.s_addr);
+ mask4 = ~((1 << (32 - uc->plen4)) - 1);
+ if ((addr4 & mask4) != addr4)
+ return (EINVAL);
+
+ /* XXX: Check prefix6 */
+ if (uc->min_port == 0)
+ uc->min_port = NAT64_MIN_PORT;
+ if (uc->max_port == 0)
+ uc->max_port = 65535;
+ if (uc->min_port > uc->max_port)
+ return (EINVAL);
+ uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE);
+ uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE);
+
+ nat64lsn_default_config(uc);
+
+ ni = CHAIN_TO_SRV(ch);
+ IPFW_UH_RLOCK(ch);
+ if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ IPFW_UH_RUNLOCK(ch);
+
+ cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4));
+ strlcpy(cfg->name, uc->name, sizeof(cfg->name));
+ cfg->no.name = cfg->name;
+ cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
+ cfg->no.set = uc->set;
+
+ cfg->prefix4 = addr4;
+ cfg->pmask4 = addr4 | ~mask4;
+ /* XXX: Copy 96 bits */
+ cfg->plen6 = 96;
+ memcpy(&cfg->prefix6, &uc->prefix6, cfg->plen6 / 8);
+ cfg->plen4 = uc->plen4;
+ cfg->flags = uc->flags & NAT64LSN_FLAGSMASK;
+ cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+ cfg->agg_prefix_len = uc->agg_prefix_len;
+ cfg->agg_prefix_max = uc->agg_prefix_max;
+
+ cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE;
+ cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE;
+
+ cfg->jmaxlen = uc->jmaxlen;
+ cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->pg_delete_delay = uc->pg_delete_delay;
+ cfg->st_syn_ttl = uc->st_syn_ttl;
+ cfg->st_close_ttl = uc->st_close_ttl;
+ cfg->st_estab_ttl = uc->st_estab_ttl;
+ cfg->st_udp_ttl = uc->st_udp_ttl;
+ cfg->st_icmp_ttl = uc->st_icmp_ttl;
+
+ cfg->nomatch_verdict = IP_FW_DENY;
+ cfg->nomatch_final = 1; /* Exit outer loop by default */
+
+ IPFW_UH_WLOCK(ch);
+
+ if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64lsn_destroy_instance(cfg);
+ return (EEXIST);
+ }
+
+ if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64lsn_destroy_instance(cfg);
+ return (ENOSPC);
+ }
+ ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
+
+ /* Okay, let's link data */
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+ IPFW_WUNLOCK(ch);
+
+ nat64lsn_start_instance(cfg);
+
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static void
+nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+ ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+}
+
+/*
+ * Destroys nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)op3;
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ if (cfg->no.refcnt > 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+
+ nat64lsn_detach_config(ch, cfg);
+ IPFW_UH_WUNLOCK(ch);
+
+ nat64lsn_destroy_instance(cfg);
+ return (0);
+}
+
+#define __COPY_STAT_FIELD(_cfg, _stats, _field) \
+ (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
+ struct ipfw_nat64lsn_stats *stats)
+{
+
+ __COPY_STAT_FIELD(cfg, stats, opcnt64);
+ __COPY_STAT_FIELD(cfg, stats, opcnt46);
+ __COPY_STAT_FIELD(cfg, stats, ofrags);
+ __COPY_STAT_FIELD(cfg, stats, ifrags);
+ __COPY_STAT_FIELD(cfg, stats, oerrors);
+ __COPY_STAT_FIELD(cfg, stats, noroute4);
+ __COPY_STAT_FIELD(cfg, stats, noroute6);
+ __COPY_STAT_FIELD(cfg, stats, nomatch4);
+ __COPY_STAT_FIELD(cfg, stats, noproto);
+ __COPY_STAT_FIELD(cfg, stats, nomem);
+ __COPY_STAT_FIELD(cfg, stats, dropped);
+
+ __COPY_STAT_FIELD(cfg, stats, jcalls);
+ __COPY_STAT_FIELD(cfg, stats, jrequests);
+ __COPY_STAT_FIELD(cfg, stats, jhostsreq);
+ __COPY_STAT_FIELD(cfg, stats, jportreq);
+ __COPY_STAT_FIELD(cfg, stats, jhostfails);
+ __COPY_STAT_FIELD(cfg, stats, jportfails);
+ __COPY_STAT_FIELD(cfg, stats, jmaxlen);
+ __COPY_STAT_FIELD(cfg, stats, jnomem);
+ __COPY_STAT_FIELD(cfg, stats, jreinjected);
+ __COPY_STAT_FIELD(cfg, stats, screated);
+ __COPY_STAT_FIELD(cfg, stats, sdeleted);
+ __COPY_STAT_FIELD(cfg, stats, spgcreated);
+ __COPY_STAT_FIELD(cfg, stats, spgdeleted);
+
+ stats->hostcount = cfg->ihcount;
+ stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP];
+ stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP];
+ stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP];
+}
+#undef __COPY_STAT_FIELD
+
+static void
+nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
+ ipfw_nat64lsn_cfg *uc)
+{
+
+ uc->flags = cfg->flags & NAT64LSN_FLAGSMASK;
+ uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE;
+ uc->agg_prefix_len = cfg->agg_prefix_len;
+ uc->agg_prefix_max = cfg->agg_prefix_max;
+
+ uc->jmaxlen = cfg->jmaxlen;
+ uc->nh_delete_delay = cfg->nh_delete_delay;
+ uc->pg_delete_delay = cfg->pg_delete_delay;
+ uc->st_syn_ttl = cfg->st_syn_ttl;
+ uc->st_close_ttl = cfg->st_close_ttl;
+ uc->st_estab_ttl = cfg->st_estab_ttl;
+ uc->st_udp_ttl = cfg->st_udp_ttl;
+ uc->st_icmp_ttl = cfg->st_icmp_ttl;
+ uc->prefix4.s_addr = htonl(cfg->prefix4);
+ uc->prefix6 = cfg->prefix6;
+ uc->plen4 = cfg->plen4;
+ uc->plen6 = cfg->plen6;
+ uc->set = cfg->no.set;
+ strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+}
+
+struct nat64_dump_arg {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
+ ipfw_nat64lsn_cfg *uc;
+
+ uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
+ sizeof(*uc));
+ nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
+ return (0);
+}
+
+/*
+ * Lists all nat64 lsn instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ struct nat64_dump_arg da;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(ipfw_obj_lheader))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+ IPFW_UH_RLOCK(ch);
+ olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+ IPFW_TLV_NAT64LSN_NAME);
+ olh->objsize = sizeof(ipfw_nat64lsn_cfg);
+ olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+ if (sd->valsize < olh->size) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.sd = sd;
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
+ IPFW_TLV_NAT64LSN_NAME);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+/*
+ * Change existing nat64lsn instance configuration.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
+ * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ ipfw_nat64lsn_cfg *uc;
+ struct nat64lsn_cfg *cfg;
+ struct namedobj_instance *ni;
+
+ if (sd->valsize != sizeof(*oh) + sizeof(*uc))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
+ sizeof(*oh) + sizeof(*uc));
+ uc = (ipfw_nat64lsn_cfg *)(oh + 1);
+
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ ni = CHAIN_TO_SRV(ch);
+ if (sd->sopt->sopt_dir == SOPT_GET) {
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ nat64lsn_export_config(ch, cfg, uc);
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+ }
+
+ nat64lsn_default_config(uc);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EEXIST);
+ }
+
+ /*
+ * For now allow to change only following values:
+ * jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
+ * tcp_est_age, udp_age, icmp_age, flags, max_ports.
+ */
+
+ cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+ cfg->jmaxlen = uc->jmaxlen;
+ cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->pg_delete_delay = uc->pg_delete_delay;
+ cfg->st_syn_ttl = uc->st_syn_ttl;
+ cfg->st_close_ttl = uc->st_close_ttl;
+ cfg->st_estab_ttl = uc->st_estab_ttl;
+ cfg->st_udp_ttl = uc->st_udp_ttl;
+ cfg->st_icmp_ttl = uc->st_icmp_ttl;
+ cfg->flags = uc->flags & NAT64LSN_FLAGSMASK;
+
+ IPFW_UH_WUNLOCK(ch);
+
+ return (0);
+}
+
+/*
+ * Get nat64lsn statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct ipfw_nat64lsn_stats stats;
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *ctlv;
+ size_t sz;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+ if (sd->valsize % sizeof(uint64_t))
+ return (EINVAL);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+ memset(&stats, 0, sizeof(stats));
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ export_stats(ch, cfg, &stats);
+ IPFW_UH_RUNLOCK(ch);
+
+ ctlv = (ipfw_obj_ctlv *)(oh + 1);
+ memset(ctlv, 0, sizeof(*ctlv));
+ ctlv->head.type = IPFW_TLV_COUNTERS;
+ ctlv->head.length = sz - sizeof(ipfw_obj_header);
+ ctlv->count = sizeof(stats) / sizeof(uint64_t);
+ ctlv->objsize = sizeof(uint64_t);
+ ctlv->version = IPFW_NAT64_VERSION;
+ memcpy(ctlv + 1, &stats, sizeof(stats));
+ return (0);
+}
+
+/*
+ * Reset nat64lsn statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS);
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+/*
+ * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
+ * ipfw_nat64lsn_state x count, ... ] ]
+ */
+static int
+export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg,
+ ipfw_nat64lsn_stg *stg, struct sockopt_data *sd)
+{
+ ipfw_nat64lsn_state *ste;
+ struct nat64lsn_state *st;
+ int i, count;
+
+ NAT64_LOCK(pg->host);
+ count = 0;
+ for (i = 0; i < 64; i++) {
+ if (PG_IS_BUSY_IDX(pg, i))
+ count++;
+ }
+ DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count);
+
+ if (count == 0) {
+ stg->count = 0;
+ NAT64_UNLOCK(pg->host);
+ return (0);
+ }
+ ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd,
+ count * sizeof(ipfw_nat64lsn_state));
+ if (ste == NULL) {
+ NAT64_UNLOCK(pg->host);
+ return (1);
+ }
+
+ stg->alias4.s_addr = pg->aaddr;
+ stg->proto = nat64lsn_rproto_map[pg->nat_proto];
+ stg->flags = 0;
+ stg->host6 = pg->host->addr;
+ stg->count = count;
+ for (i = 0; i < 64; i++) {
+ if (PG_IS_FREE_IDX(pg, i))
+ continue;
+ st = &pg->states[i];
+ ste->daddr.s_addr = st->u.s.faddr;
+ ste->dport = st->u.s.fport;
+ ste->aport = pg->aport + i;
+ ste->sport = st->u.s.lport;
+ ste->flags = st->flags; /* XXX filter flags */
+ ste->idle = GET_AGE(st->timestamp);
+ ste++;
+ }
+ NAT64_UNLOCK(pg->host);
+
+ return (0);
+}
+
+static int
+get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+
+ if (*port < 65536 - NAT64_CHUNK_SIZE) {
+ *port += NAT64_CHUNK_SIZE;
+ return (0);
+ }
+ *port = 0;
+
+ if (*nat_proto < NAT_MAX_PROTO - 1) {
+ *nat_proto += 1;
+ return (0);
+ }
+ *nat_proto = 1;
+
+ if (*addr < cfg->pmask4) {
+ *addr += 1;
+ return (0);
+ }
+
+ /* End of space. */
+ return (1);
+}
+
+#define PACK_IDX(addr, proto, port) \
+ ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8)
+#define UNPACK_IDX(idx, addr, proto, port) \
+ (addr) = (uint32_t)((idx) >> 32); \
+ (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \
+ (proto) = (uint8_t)(((idx) >> 8) & 0xFF)
+
+static struct nat64lsn_portgroup *
+get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+ struct nat64lsn_portgroup *pg;
+ uint64_t pre_pack, post_pack;
+
+ pg = NULL;
+ pre_pack = PACK_IDX(*addr, *nat_proto, *port);
+ for (;;) {
+ if (get_next_idx(cfg, addr, nat_proto, port) != 0) {
+ /* End of states */
+ return (pg);
+ }
+
+ pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
+ if (pg != NULL)
+ break;
+ }
+
+ post_pack = PACK_IDX(*addr, *nat_proto, *port);
+ if (pre_pack == post_pack)
+ DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d",
+ *addr, *nat_proto, *port);
+ return (pg);
+}
+
+static NAT64NOINLINE struct nat64lsn_portgroup *
+get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+ struct nat64lsn_portgroup *pg;
+
+ pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
+ if (pg == NULL)
+ pg = get_next_pg(cfg, addr, nat_proto, port);
+
+ return (pg);
+}
+
+/*
+ * Lists nat64lsn states.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
+ * Reply: [ ipfw_obj_header ipfw_obj_data [
+ * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ ipfw_obj_data *od;
+ ipfw_nat64lsn_stg *stg;
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_portgroup *pg, *pg_next;
+ uint64_t next_idx;
+ size_t sz;
+ uint32_t addr, states;
+ uint16_t port;
+ uint8_t nat_proto;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
+ sizeof(uint64_t);
+ /* Check minimum header size */
+ if (sd->valsize < sz)
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+ od = (ipfw_obj_data *)(oh + 1);
+ if (od->head.type != IPFW_TLV_OBJDATA ||
+ od->head.length != sz - sizeof(ipfw_obj_header))
+ return (EINVAL);
+
+ next_idx = *(uint64_t *)(od + 1);
+ /* Translate index to the request position to start from */
+ UNPACK_IDX(next_idx, addr, nat_proto, port);
+ if (nat_proto >= NAT_MAX_PROTO)
+ return (EINVAL);
+ if (nat_proto == 0 && addr != 0)
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+ /* Fill in starting point */
+ if (addr == 0) {
+ addr = cfg->prefix4;
+ nat_proto = 1;
+ port = 0;
+ }
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ IPFW_UH_RUNLOCK(ch);
+ DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u",
+ (uintmax_t)next_idx, addr, cfg->pmask4);
+ return (EINVAL);
+ }
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
+ sizeof(ipfw_nat64lsn_stg);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
+ od = (ipfw_obj_data *)(oh + 1);
+ od->head.type = IPFW_TLV_OBJDATA;
+ od->head.length = sz - sizeof(ipfw_obj_header);
+ stg = (ipfw_nat64lsn_stg *)(od + 1);
+
+ pg = get_first_pg(cfg, &addr, &nat_proto, &port);
+ if (pg == NULL) {
+ /* No states */
+ stg->next_idx = 0xFF;
+ stg->count = 0;
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+ }
+ states = 0;
+ pg_next = NULL;
+ while (pg != NULL) {
+ pg_next = get_next_pg(cfg, &addr, &nat_proto, &port);
+ if (pg_next == NULL)
+ stg->next_idx = 0xFF;
+ else
+ stg->next_idx = PACK_IDX(addr, nat_proto, port);
+
+ if (export_pg_states(cfg, pg, stg, sd) != 0) {
+ IPFW_UH_RUNLOCK(ch);
+ return (states == 0 ? ENOMEM: 0);
+ }
+ states += stg->count;
+ od->head.length += stg->count * sizeof(ipfw_nat64lsn_state);
+ sz += stg->count * sizeof(ipfw_nat64lsn_state);
+ if (pg_next != NULL) {
+ sz += sizeof(ipfw_nat64lsn_stg);
+ if (sd->valsize < sz)
+ break;
+ stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd,
+ sizeof(ipfw_nat64lsn_stg));
+ }
+ pg = pg_next;
+ }
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+ { IP_FW_NAT64LSN_CREATE, 0, HDIR_BOTH, nat64lsn_create },
+ { IP_FW_NAT64LSN_DESTROY,0, HDIR_SET, nat64lsn_destroy },
+ { IP_FW_NAT64LSN_CONFIG, 0, HDIR_BOTH, nat64lsn_config },
+ { IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list },
+ { IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats },
+ { IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats },
+ { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states },
+};
+
+static int
+nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn *icmd;
+
+ icmd = cmd - 1;
+ if (icmd->opcode != O_EXTERNAL_ACTION ||
+ icmd->arg1 != V_nat64lsn_eid)
+ return (1);
+
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+}
+
+static int
+nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ int err;
+
+ err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+ IPFW_TLV_NAT64LSN_NAME, pno);
+ return (err);
+}
+
+static struct named_object *
+nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_SRV(ch);
+ no = ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
+
+ return (no);
+}
+
+static int
+nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+ enum ipfw_sets_cmd cmd)
+{
+
+ return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
+ set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ .opcode = O_EXTERNAL_INSTANCE,
+ .etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+ .classifier = nat64lsn_classify,
+ .update = nat64lsn_update_arg1,
+ .find_byname = nat64lsn_findbyname,
+ .find_bykidx = nat64lsn_findbykidx,
+ .manage_sets = nat64lsn_manage_sets,
+ },
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64lsn_cfg *cfg;
+ struct ip_fw_chain *ch;
+
+ ch = (struct ip_fw_chain *)arg;
+ cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ nat64lsn_detach_config(ch, cfg);
+ nat64lsn_destroy_instance(cfg);
+ return (0);
+}
+
+int
+nat64lsn_init(struct ip_fw_chain *ch, int first)
+{
+
+ if (first != 0)
+ nat64lsn_init_internal();
+ V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
+ if (V_nat64lsn_eid == 0)
+ return (ENXIO);
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
+ return (0);
+}
+
+void
+nat64lsn_uninit(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+ ipfw_del_eaction(ch, V_nat64lsn_eid);
+ /*
+ * Since we already have deregistered external action,
+ * our named objects become unaccessible via rules, because
+ * all rules were truncated by ipfw_del_eaction().
+ * So, we can unlink and destroy our named objects without holding
+ * IPFW_WLOCK().
+ */
+ IPFW_UH_WLOCK(ch);
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+ IPFW_TLV_NAT64LSN_NAME);
+ V_nat64lsn_eid = 0;
+ IPFW_UH_WUNLOCK(ch);
+ if (last != 0)
+ nat64lsn_uninit_internal();
+}
+
diff --git a/sys/netpfil/ipfw/nat64/nat64stl.c b/sys/netpfil/ipfw/nat64/nat64stl.c
new file mode 100644
index 0000000..3a13aba
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64stl.c
@@ -0,0 +1,262 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/ip_fw_nat64.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <netpfil/ipfw/nat64/nat64stl.h>
+#include <netpfil/pf/pf.h>
+
+#define NAT64_LOOKUP(chain, cmd) \
+ (struct nat64stl_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+
+static void
+nat64stl_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
+ uint32_t kidx)
+{
+ static uint32_t pktid = 0;
+
+ memset(plog, 0, sizeof(*plog));
+ plog->length = PFLOG_REAL_HDRLEN;
+ plog->af = family;
+ plog->action = PF_NAT;
+ plog->dir = PF_IN;
+ plog->rulenr = htonl(kidx);
+ plog->subrulenr = htonl(++pktid);
+ plog->ruleset[0] = '\0';
+ strlcpy(plog->ifname, "NAT64STL", sizeof(plog->ifname));
+ ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
+
+static int
+nat64stl_handle_ip4(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m, uint32_t tablearg)
+{
+ struct pfloghdr loghdr, *logdata;
+ struct in6_addr saddr, daddr;
+ struct ip *ip;
+
+ ip = mtod(m, struct ip*);
+ if (nat64_check_ip4(ip->ip_src.s_addr) != 0 ||
+ nat64_check_ip4(ip->ip_dst.s_addr) != 0 ||
+ nat64_check_private_ip4(ip->ip_src.s_addr) != 0 ||
+ nat64_check_private_ip4(ip->ip_dst.s_addr) != 0)
+ return (NAT64SKIP);
+
+ daddr = TARG_VAL(chain, tablearg, nh6);
+ if (nat64_check_ip6(&daddr) != 0)
+ return (NAT64MFREE);
+ saddr = cfg->prefix6;
+ nat64_set_ip4(&saddr, ip->ip_src.s_addr);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64stl_log(logdata, m, AF_INET, cfg->no.kidx);
+ } else
+ logdata = NULL;
+ return (nat64_do_handle_ip4(m, &saddr, &daddr, 0, &cfg->stats,
+ logdata));
+}
+
+static int
+nat64stl_handle_ip6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m, uint32_t tablearg)
+{
+ struct pfloghdr loghdr, *logdata;
+ struct ip6_hdr *ip6;
+ uint32_t aaddr;
+
+ aaddr = htonl(TARG_VAL(chain, tablearg, nh4));
+
+ /*
+ * NOTE: we expect ipfw_chk() did m_pullup() up to upper level
+ * protocol's headers. Also we skip some checks, that ip6_input(),
+ * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ /* Check ip6_dst matches configured prefix */
+ if (bcmp(&ip6->ip6_dst, &cfg->prefix6, cfg->plen6 / 8) != 0)
+ return (NAT64SKIP);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx);
+ } else
+ logdata = NULL;
+ return (nat64_do_handle_ip6(m, aaddr, 0, &cfg->stats, logdata));
+}
+
+static int
+nat64stl_handle_icmp6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m)
+{
+ struct pfloghdr loghdr, *logdata;
+ nat64_stats_block *stats;
+ struct ip6_hdr *ip6i;
+ struct icmp6_hdr *icmp6;
+ uint32_t tablearg;
+ int hlen, proto;
+
+ hlen = 0;
+ stats = &cfg->stats;
+ proto = nat64_getlasthdr(m, &hlen);
+ if (proto != IPPROTO_ICMPV6) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ icmp6 = mtodo(m, hlen);
+ switch (icmp6->icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ case ICMP6_PACKET_TOO_BIG:
+ case ICMP6_TIME_EXCEED_TRANSIT:
+ case ICMP6_PARAM_PROB:
+ break;
+ default:
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ hlen += sizeof(struct icmp6_hdr);
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
+ m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (NAT64RETURN);
+ }
+ /*
+ * Use destination address from inner IPv6 header to determine
+ * IPv4 mapped address.
+ */
+ ip6i = mtodo(m, hlen);
+ if (ipfw_lookup_table(chain, cfg->map64,
+ sizeof(struct in6_addr), &ip6i->ip6_dst, &tablearg) == 0) {
+ m_freem(m);
+ return (NAT64RETURN);
+ }
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx);
+ } else
+ logdata = NULL;
+ return (nat64_handle_icmp6(m, 0,
+ htonl(TARG_VAL(chain, tablearg, nh4)), 0, stats, logdata));
+}
+
+int
+ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+ ipfw_insn *icmd;
+ struct nat64stl_cfg *cfg;
+ in_addr_t dst4;
+ uint32_t tablearg;
+ int ret;
+
+ IPFW_RLOCK_ASSERT(chain);
+
+ *done = 0; /* try next rule if not matched */
+ icmd = cmd + 1;
+ if (cmd->opcode != O_EXTERNAL_ACTION ||
+ cmd->arg1 != V_nat64stl_eid ||
+ icmd->opcode != O_EXTERNAL_INSTANCE ||
+ (cfg = NAT64_LOOKUP(chain, icmd)) == NULL)
+ return (0);
+
+ switch (args->f_id.addr_type) {
+ case 4:
+ dst4 = htonl(args->f_id.dst_ip);
+ ret = ipfw_lookup_table(chain, cfg->map46, sizeof(in_addr_t),
+ &dst4, &tablearg);
+ break;
+ case 6:
+ ret = ipfw_lookup_table(chain, cfg->map64,
+ sizeof(struct in6_addr), &args->f_id.src_ip6, &tablearg);
+ break;
+ default:
+ return (0);
+ }
+ if (ret == 0) {
+ /*
+ * In case when packet is ICMPv6 message from an intermediate
+ * router, the source address of message will not match the
+ * addresses from our map64 table.
+ */
+ if (args->f_id.proto != IPPROTO_ICMPV6)
+ return (0);
+
+ ret = nat64stl_handle_icmp6(chain, cfg, args->m);
+ } else {
+ if (args->f_id.addr_type == 4)
+ ret = nat64stl_handle_ip4(chain, cfg, args->m,
+ tablearg);
+ else
+ ret = nat64stl_handle_ip6(chain, cfg, args->m,
+ tablearg);
+ }
+ if (ret == NAT64SKIP)
+ return (0);
+
+ *done = 1; /* terminate the search */
+ if (ret == NAT64MFREE)
+ m_freem(args->m);
+ args->m = NULL;
+ return (IP_FW_DENY);
+}
+
+
diff --git a/sys/netpfil/ipfw/nat64/nat64stl.h b/sys/netpfil/ipfw/nat64/nat64stl.h
new file mode 100644
index 0000000..42ec20e
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64stl.h
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64STL_H_
+#define _IP_FW_NAT64STL_H_
+
+struct nat64stl_cfg {
+ struct named_object no;
+
+ uint16_t map64; /* table with 6to4 mapping */
+ uint16_t map46; /* table with 4to6 mapping */
+
+ struct in6_addr prefix6;/* IPv6 prefix */
+ uint8_t plen6; /* prefix length */
+ uint8_t flags; /* flags for internal use */
+#define NAT64STL_KIDX 0x0100
+#define NAT64STL_46T 0x0200
+#define NAT64STL_64T 0x0400
+#define NAT64STL_FLAGSMASK (NAT64_LOG) /* flags to pass to userland */
+ char name[64];
+ nat64_stats_block stats;
+};
+
+VNET_DECLARE(uint16_t, nat64stl_eid);
+#define V_nat64stl_eid VNET(nat64stl_eid)
+#define IPFW_TLV_NAT64STL_NAME IPFW_TLV_EACTION_NAME(V_nat64stl_eid)
+
+int ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done);
+
+#endif
+
diff --git a/sys/netpfil/ipfw/nat64/nat64stl_control.c b/sys/netpfil/ipfw/nat64/nat64stl_control.c
new file mode 100644
index 0000000..d8599d9
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64stl_control.c
@@ -0,0 +1,621 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sockopt.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/pfil.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64stl.h>
+#include <netinet6/ip_fw_nat64.h>
+
+VNET_DEFINE(uint16_t, nat64stl_eid) = 0;
+
+static struct nat64stl_cfg *nat64stl_alloc_config(const char *name, uint8_t set);
+static void nat64stl_free_config(struct nat64stl_cfg *cfg);
+static struct nat64stl_cfg *nat64stl_find(struct namedobj_instance *ni,
+ const char *name, uint8_t set);
+
+static struct nat64stl_cfg *
+nat64stl_alloc_config(const char *name, uint8_t set)
+{
+ struct nat64stl_cfg *cfg;
+
+ cfg = malloc(sizeof(struct nat64stl_cfg), M_IPFW, M_WAITOK | M_ZERO);
+ COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK);
+ cfg->no.name = cfg->name;
+ cfg->no.etlv = IPFW_TLV_NAT64STL_NAME;
+ cfg->no.set = set;
+ strlcpy(cfg->name, name, sizeof(cfg->name));
+ return (cfg);
+}
+
+static void
+nat64stl_free_config(struct nat64stl_cfg *cfg)
+{
+
+ COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS);
+ free(cfg, M_IPFW);
+}
+
+static void
+nat64stl_export_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ ipfw_nat64stl_cfg *uc)
+{
+ struct named_object *no;
+
+ uc->prefix6 = cfg->prefix6;
+ uc->plen6 = cfg->plen6;
+ uc->flags = cfg->flags & NAT64STL_FLAGSMASK;
+ uc->set = cfg->no.set;
+ strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+
+ no = ipfw_objhash_lookup_table_kidx(ch, cfg->map64);
+ ipfw_export_obj_ntlv(no, &uc->ntlv6);
+ no = ipfw_objhash_lookup_table_kidx(ch, cfg->map46);
+ ipfw_export_obj_ntlv(no, &uc->ntlv4);
+}
+
+struct nat64stl_dump_arg {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64stl_dump_arg *da = (struct nat64stl_dump_arg *)arg;
+ ipfw_nat64stl_cfg *uc;
+
+ uc = (ipfw_nat64stl_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc));
+ nat64stl_export_config(da->ch, (struct nat64stl_cfg *)no, uc);
+ return (0);
+}
+
+static struct nat64stl_cfg *
+nat64stl_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+ struct nat64stl_cfg *cfg;
+
+ cfg = (struct nat64stl_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+ IPFW_TLV_NAT64STL_NAME, name);
+
+ return (cfg);
+}
+
+
+static int
+nat64stl_create_internal(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ ipfw_nat64stl_cfg *i)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0)
+ return (ENOSPC);
+ cfg->flags |= NAT64STL_KIDX;
+
+ if (ipfw_ref_table(ch, &i->ntlv4, &cfg->map46) != 0)
+ return (EINVAL);
+ cfg->flags |= NAT64STL_46T;
+
+ if (ipfw_ref_table(ch, &i->ntlv6, &cfg->map64) != 0)
+ return (EINVAL);
+ cfg->flags |= NAT64STL_64T;
+
+ ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
+
+ return (0);
+}
+
+/*
+ * Creates new nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64stl_cfg *uc;
+ struct namedobj_instance *ni;
+ struct nat64stl_cfg *cfg;
+ int error;
+
+ if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)sd->kbuf;
+ uc = (ipfw_nat64stl_cfg *)(olh + 1);
+
+ if (ipfw_check_object_name_generic(uc->name) != 0)
+ return (EINVAL);
+ if (!IN6_IS_ADDR_WKPFX(&uc->prefix6))
+ return (EINVAL);
+ if (uc->plen6 != 96 || uc->set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ /* XXX: check types of tables */
+
+ ni = CHAIN_TO_SRV(ch);
+ error = 0;
+
+ IPFW_UH_RLOCK(ch);
+ if (nat64stl_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ IPFW_UH_RUNLOCK(ch);
+
+ cfg = nat64stl_alloc_config(uc->name, uc->set);
+ cfg->prefix6 = uc->prefix6;
+ cfg->plen6 = uc->plen6;
+ cfg->flags = uc->flags & NAT64STL_FLAGSMASK;
+
+ IPFW_UH_WLOCK(ch);
+
+ if (nat64stl_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64stl_free_config(cfg);
+ return (EEXIST);
+ }
+ error = nat64stl_create_internal(ch, cfg, uc);
+ if (error == 0) {
+ /* Okay, let's link data */
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+ IPFW_WUNLOCK(ch);
+
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+ }
+
+ if (cfg->flags & NAT64STL_KIDX)
+ ipfw_objhash_free_idx(ni, cfg->no.kidx);
+ if (cfg->flags & NAT64STL_46T)
+ ipfw_unref_table(ch, cfg->map46);
+ if (cfg->flags & NAT64STL_64T)
+ ipfw_unref_table(ch, cfg->map64);
+
+ IPFW_UH_WUNLOCK(ch);
+ nat64stl_free_config(cfg);
+ return (error);
+}
+
+/*
+ * Change existing nat64stl instance configuration.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_nat64stl_cfg ]
+ * Reply: [ ipfw_obj_header ipfw_nat64stl_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ ipfw_nat64stl_cfg *uc;
+ struct nat64stl_cfg *cfg;
+ struct namedobj_instance *ni;
+
+ if (sd->valsize != sizeof(*oh) + sizeof(*uc))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
+ sizeof(*oh) + sizeof(*uc));
+ uc = (ipfw_nat64stl_cfg *)(oh + 1);
+
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ ni = CHAIN_TO_SRV(ch);
+ if (sd->sopt->sopt_dir == SOPT_GET) {
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ nat64stl_export_config(ch, cfg, uc);
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+ }
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EEXIST);
+ }
+
+ /*
+ * For now allow to change only following values:
+ * flags.
+ */
+
+ cfg->flags = uc->flags & NAT64STL_FLAGSMASK;
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static void
+nat64stl_detach_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+ ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+ ipfw_unref_table(ch, cfg->map46);
+ ipfw_unref_table(ch, cfg->map64);
+}
+
+/*
+ * Destroys nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ struct nat64stl_cfg *cfg;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ if (cfg->no.refcnt > 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+
+ nat64stl_detach_config(ch, cfg);
+ IPFW_UH_WUNLOCK(ch);
+
+ nat64stl_free_config(cfg);
+ return (0);
+}
+
+/*
+ * Lists all nat64stl instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ struct nat64stl_dump_arg da;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(ipfw_obj_lheader))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+ IPFW_UH_RLOCK(ch);
+ olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+ IPFW_TLV_NAT64STL_NAME);
+ olh->objsize = sizeof(ipfw_nat64stl_cfg);
+ olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+ if (sd->valsize < olh->size) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.sd = sd;
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb,
+ &da, IPFW_TLV_NAT64STL_NAME);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+#define __COPY_STAT_FIELD(_cfg, _stats, _field) \
+ (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ struct ipfw_nat64stl_stats *stats)
+{
+
+ __COPY_STAT_FIELD(cfg, stats, opcnt64);
+ __COPY_STAT_FIELD(cfg, stats, opcnt46);
+ __COPY_STAT_FIELD(cfg, stats, ofrags);
+ __COPY_STAT_FIELD(cfg, stats, ifrags);
+ __COPY_STAT_FIELD(cfg, stats, oerrors);
+ __COPY_STAT_FIELD(cfg, stats, noroute4);
+ __COPY_STAT_FIELD(cfg, stats, noroute6);
+ __COPY_STAT_FIELD(cfg, stats, noproto);
+ __COPY_STAT_FIELD(cfg, stats, nomem);
+ __COPY_STAT_FIELD(cfg, stats, dropped);
+}
+
+/*
+ * Get nat64stl statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct ipfw_nat64stl_stats stats;
+ struct nat64stl_cfg *cfg;
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *ctlv;
+ size_t sz;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+ if (sd->valsize % sizeof(uint64_t))
+ return (EINVAL);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+ memset(&stats, 0, sizeof(stats));
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+ export_stats(ch, cfg, &stats);
+ IPFW_UH_RUNLOCK(ch);
+
+ ctlv = (ipfw_obj_ctlv *)(oh + 1);
+ memset(ctlv, 0, sizeof(*ctlv));
+ ctlv->head.type = IPFW_TLV_COUNTERS;
+ ctlv->head.length = sz - sizeof(ipfw_obj_header);
+ ctlv->count = sizeof(stats) / sizeof(uint64_t);
+ ctlv->objsize = sizeof(uint64_t);
+ ctlv->version = IPFW_NAT64_VERSION;
+ memcpy(ctlv + 1, &stats, sizeof(stats));
+ return (0);
+}
+
+/*
+ * Reset nat64stl statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct nat64stl_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS);
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+
+ { IP_FW_NAT64STL_CREATE, 0, HDIR_SET, nat64stl_create },
+ { IP_FW_NAT64STL_DESTROY,0, HDIR_SET, nat64stl_destroy },
+ { IP_FW_NAT64STL_CONFIG, 0, HDIR_BOTH, nat64stl_config },
+ { IP_FW_NAT64STL_LIST, 0, HDIR_GET, nat64stl_list },
+ { IP_FW_NAT64STL_STATS, 0, HDIR_GET, nat64stl_stats },
+ { IP_FW_NAT64STL_RESET_STATS,0, HDIR_SET, nat64stl_reset_stats },
+};
+
+static int
+nat64stl_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn *icmd;
+
+ icmd = cmd - 1;
+ if (icmd->opcode != O_EXTERNAL_ACTION ||
+ icmd->arg1 != V_nat64stl_eid)
+ return (1);
+
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+nat64stl_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+}
+
+static int
+nat64stl_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ int err;
+
+ err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+ IPFW_TLV_NAT64STL_NAME, pno);
+ return (err);
+}
+
+static struct named_object *
+nat64stl_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_SRV(ch);
+ no = ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(no != NULL, ("NAT with index %d not found", idx));
+
+ return (no);
+}
+
+static int
+nat64stl_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+ enum ipfw_sets_cmd cmd)
+{
+
+ return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64STL_NAME,
+ set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ .opcode = O_EXTERNAL_INSTANCE,
+ .etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+ .classifier = nat64stl_classify,
+ .update = nat64stl_update_arg1,
+ .find_byname = nat64stl_findbyname,
+ .find_bykidx = nat64stl_findbykidx,
+ .manage_sets = nat64stl_manage_sets,
+ },
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64stl_cfg *cfg;
+ struct ip_fw_chain *ch;
+
+ ch = (struct ip_fw_chain *)arg;
+ cfg = (struct nat64stl_cfg *)SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ nat64stl_detach_config(ch, cfg);
+ nat64stl_free_config(cfg);
+ return (0);
+}
+
+int
+nat64stl_init(struct ip_fw_chain *ch, int first)
+{
+
+ V_nat64stl_eid = ipfw_add_eaction(ch, ipfw_nat64stl, "nat64stl");
+ if (V_nat64stl_eid == 0)
+ return (ENXIO);
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
+ return (0);
+}
+
+void
+nat64stl_uninit(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+ ipfw_del_eaction(ch, V_nat64stl_eid);
+ /*
+ * Since we already have deregistered external action,
+ * our named objects become unaccessible via rules, because
+ * all rules were truncated by ipfw_del_eaction().
+ * So, we can unlink and destroy our named objects without holding
+ * IPFW_WLOCK().
+ */
+ IPFW_UH_WLOCK(ch);
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+ IPFW_TLV_NAT64STL_NAME);
+ V_nat64stl_eid = 0;
+ IPFW_UH_WUNLOCK(ch);
+}
+
OpenPOWER on IntegriCloud