summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorae <ae@FreeBSD.org>2017-04-03 08:50:54 +0000
committerLuiz Souza <luiz@netgate.com>2017-07-15 11:15:04 -0500
commit6d4158777f7427cfa95335e8af0fab0dba5b25be (patch)
tree3eb57c07ab2de85902601fb548ec834ff3070a3e
parentc60851b98c5af5414d2d531586b9af15755469e3 (diff)
downloadFreeBSD-src-6d4158777f7427cfa95335e8af0fab0dba5b25be.zip
FreeBSD-src-6d4158777f7427cfa95335e8af0fab0dba5b25be.tar.gz
MFC r304041:
Move logging via BPF support into separate file. * make interface cloner VNET-aware; * simplify cloner code and use if_clone_simple(); * migrate LOGIF_LOCK() to rmlock; * add ipfw_bpf_mtap2() function to pass mbuf to BPF; * introduce new additional ipfwlog0 pseudo interface. It differs from ipfw0 by DLT type used in bpfattach. This interface is intended to used by ipfw modules to dump packets with additional info attached. Currently pflog format is used. ipfw_bpf_mtap2() function uses second argument to determine which interface use for dumping. If dlen is equal to ETHER_HDR_LEN it uses old ipfw0 interface, if dlen is equal to PFLOG_HDRLEN - ipfwlog0 will be used. Obtained from: Yandex LLC Sponsored by: Yandex LLC MFC r304043: Add three helper function to manage tables from external modules. ipfw_objhash_lookup_table_kidx does lookup kernel index of table; ipfw_ref_table/ipfw_unref_table takes and releases reference to table. Obtained from: Yandex LLC Sponsored by: Yandex LLC MFC r304046, 304108: Add ipfw_nat64 module that implements stateless and stateful NAT64. The module works together with ipfw(4) and implemented as its external action module. Stateless NAT64 registers external action with name nat64stl. This keyword should be used to create NAT64 instance and to address this instance in rules. Stateless NAT64 uses two lookup tables with mapped IPv4->IPv6 and IPv6->IPv4 addresses to perform translation. A configuration of instance should looks like this: 1. Create lookup tables: # ipfw table T46 create type addr valtype ipv6 # ipfw table T64 create type addr valtype ipv4 2. Fill T46 and T64 tables. 3. Add rule to allow neighbor solicitation and advertisement: # ipfw add allow icmp6 from any to any icmp6types 135,136 4. Create NAT64 instance: # ipfw nat64stl NAT create table4 T46 table6 T64 5. Add rules that matches the traffic: # ipfw add nat64stl NAT ip from any to table(T46) # ipfw add nat64stl NAT ip from table(T64) to 64:ff9b::/96 6. Configure DNS64 for IPv6 clients and add route to 64:ff9b::/96 via NAT64 host. Stateful NAT64 registers external action with name nat64lsn. The only one option required to create nat64lsn instance - prefix4. It defines the pool of IPv4 addresses used for translation. A configuration of instance should looks like this: 1. Add rule to allow neighbor solicitation and advertisement: # ipfw add allow icmp6 from any to any icmp6types 135,136 2. Create NAT64 instance: # ipfw nat64lsn NAT create prefix4 A.B.C.D/28 3. Add rules that matches the traffic: # ipfw add nat64lsn NAT ip from any to A.B.C.D/28 # ipfw add nat64lsn NAT ip6 from any to 64:ff9b::/96 4. Configure DNS64 for IPv6 clients and add route to 64:ff9b::/96 via NAT64 host. Obtained from: Yandex LLC Relnotes: yes Sponsored by: Yandex LLC Differential Revision: https://reviews.freebsd.org/D6434 MFC r304048: Replace __noinline with special debug macro NAT64NOINLINE. MFC r304061: Use %ju to print unsigned 64-bit value. MFC r304076: Make statistics nat64lsn, nat64stl an nptv6 output netstat-like: "@value @description" and fix build due to -Wformat errors. MFC r304378 (by bz): Try to fix gcc compilation errors (which are right). nat64_getlasthdr() returns an int, which can be -1 in case of error, storing the result in an uint8_t and then comparing to < 0 is not helpful. Do what is done in the rest of the code and make proto an int here as well. MFC r309187: Fix ICMPv6 Time Exceeded error message translation. MFC r314718: Use new ipfw_lookup_table() in the nat64 too. MFC r315204,315233: Use memset with structure size. (cherry picked from commit 1f5f6e71571eeaab683b65e91887222f4a6ece97)
-rw-r--r--sbin/ipfw/Makefile2
-rw-r--r--sbin/ipfw/ipfw.8223
-rw-r--r--sbin/ipfw/ipfw2.c2
-rw-r--r--sbin/ipfw/ipfw2.h28
-rw-r--r--sbin/ipfw/main.c4
-rw-r--r--sbin/ipfw/nat64lsn.c881
-rw-r--r--sbin/ipfw/nat64stl.c535
-rw-r--r--sbin/ipfw/tables.c8
-rw-r--r--sys/conf/NOTES3
-rw-r--r--sys/conf/files13
-rw-r--r--sys/conf/options2
-rw-r--r--sys/modules/Makefile4
-rw-r--r--sys/modules/ipfw/Makefile2
-rw-r--r--sys/modules/ipfw_nat64/Makefile11
-rw-r--r--sys/netinet/ip_fw.h21
-rw-r--r--sys/netinet6/ip_fw_nat64.h154
-rw-r--r--sys/netpfil/ipfw/ip_fw2.c5
-rw-r--r--sys/netpfil/ipfw/ip_fw_bpf.c209
-rw-r--r--sys/netpfil/ipfw/ip_fw_log.c177
-rw-r--r--sys/netpfil/ipfw/ip_fw_private.h8
-rw-r--r--sys/netpfil/ipfw/ip_fw_table.c51
-rw-r--r--sys/netpfil/ipfw/nat64/ip_fw_nat64.c129
-rw-r--r--sys/netpfil/ipfw/nat64/ip_fw_nat64.h117
-rw-r--r--sys/netpfil/ipfw/nat64/nat64_translate.c1572
-rw-r--r--sys/netpfil/ipfw/nat64/nat64_translate.h116
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn.c1770
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn.h351
-rw-r--r--sys/netpfil/ipfw/nat64/nat64lsn_control.c917
-rw-r--r--sys/netpfil/ipfw/nat64/nat64stl.c262
-rw-r--r--sys/netpfil/ipfw/nat64/nat64stl.h58
-rw-r--r--sys/netpfil/ipfw/nat64/nat64stl_control.c621
31 files changed, 8075 insertions, 181 deletions
diff --git a/sbin/ipfw/Makefile b/sbin/ipfw/Makefile
index 54809b2..06ffb48 100644
--- a/sbin/ipfw/Makefile
+++ b/sbin/ipfw/Makefile
@@ -5,7 +5,7 @@
PACKAGE=ipfw
PROG= ipfw
SRCS= ipfw2.c dummynet.c ipv6.c main.c nat.c tables.c
-SRCS+= nptv6.c
+SRCS+= nat64lsn.c nat64stl.c nptv6.c
WARNS?= 2
.if ${MK_PF} != "no"
diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8
index 2035637..9f11bbb 100644
--- a/sbin/ipfw/ipfw.8
+++ b/sbin/ipfw/ipfw.8
@@ -113,6 +113,37 @@ in-kernel NAT.
.Oc
.Oc
.Ar pathname
+.Ss STATEFUL IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION
+.Nm
+.Oo Cm set Ar N Oc Cm nat64lsn Ar name Cm create Ar create-options
+.Nm
+.Oo Cm set Ar N Oc Cm nat64lsn Ar name Cm config Ar config-options
+.Nm
+.Oo Cm set Ar N Oc Cm nat64lsn
+.Brq Ar name | all
+.Brq Cm list | show
+.Op Cm states
+.Nm
+.Oo Cm set Ar N Oc Cm nat64lsn
+.Brq Ar name | all
+.Cm destroy
+.Nm
+.Oo Cm set Ar N Oc Cm nat64lsn Ar name Cm stats Op Cm reset
+.Ss STATELESS IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION
+.Nm
+.Oo Cm set Ar N Oc Cm nat64stl Ar name Cm create Ar create-options
+.Nm
+.Oo Cm set Ar N Oc Cm nat64stl Ar name Cm config Ar config-options
+.Nm
+.Oo Cm set Ar N Oc Cm nat64stl
+.Brq Ar name | all
+.Brq Cm list | show
+.Nm
+.Oo Cm set Ar N Oc Cm nat64stl
+.Brq Ar name | all
+.Cm destroy
+.Nm
+.Oo Cm set Ar N Oc Cm nat64stl Ar name Cm stats Op Cm reset
.Ss IPv6-to-IPv6 NETWORK PREFIX TRANSLATION
.Nm
.Oo Cm set Ar N Oc Cm nptv6 Ar name Cm create Ar create-options
@@ -837,6 +868,16 @@ nat instance
see the
.Sx NETWORK ADDRESS TRANSLATION (NAT)
Section for further information.
+.It Cm nat64lsn Ar name
+Pass packet to a stateful NAT64 instance (for IPv6/IPv4 network address and
+protocol translation): see the
+.Sx IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION
+Section for further information.
+.It Cm nat64stl Ar name
+Pass packet to a stateless NAT64 instance (for IPv6/IPv4 network address and
+protocol translation): see the
+.Sx IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION
+Section for further information.
.It Cm nptv6 Ar name
Pass packet to a NPTv6 instance (for IPv6-to-IPv6 network prefix translation):
see the
@@ -2927,9 +2968,189 @@ instances.
See
.Sx SYSCTL VARIABLES
for more info.
+.Sh IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION
+.Nm
+supports in-kernel IPv6/IPv4 network address and protocol translation.
+Stateful NAT64 translation allows IPv6-only clients to contact IPv4 servers
+using unicast TCP, UDP or ICMP protocols.
+One or more IPv4 addresses assigned to a stateful NAT64 translator are shared
+among serveral IPv6-only clients.
+When stateful NAT64 is used in conjunction with DNS64, no changes are usually
+required in the IPv6 client or the IPv4 server.
+The kernel module
+.Cm ipfw_nat64
+should be loaded or kernel should have
+.Cm options IPFIREWALL_NAT64
+to be able use stateful NAT64 translator.
+.Pp
+Stateful NAT64 uses a bunch of memory for several types of objects.
+When IPv6 client initiates connection, NAT64 translator creates a host entry
+in the states table.
+Each host entry has a number of ports group entries allocated on demand.
+Ports group entries contains connection state entries.
+There are several options to control limits and lifetime for these objects.
+.Pp
+NAT64 translator follows RFC7915 when does ICMPv6/ICMP translation,
+unsupported message types will be silently dropped.
+IPv6 needs several ICMPv6 message types to be explicitly allowed for correct
+operation.
+Make sure that ND6 neighbor solicitation (ICMPv6 type 135) and neighbor
+advertisement (ICMPv6 type 136) messages will not be handled by translation
+rules.
+.Pp
+After translation NAT64 translator sends packets through corresponding netisr
+queue.
+Thus translator host should be configured as IPv4 and IPv6 router.
+.Pp
+Currently both stateful and stateless NAT64 translators use Well-Known IPv6
+Prefix
+.Ar 64:ff9b::/96
+to represent IPv4 addresses in the IPv6 address.
+Thus DNS64 service and routing should be configured to use Well-Known IPv6
+Prefix.
+.Pp
+The stateful NAT64 configuration command is the following:
+.Bd -ragged -offset indent
+.Bk -words
+.Cm nat64lsn
+.Ar name
+.Cm create
+.Ar create-options
+.Ek
+.Ed
+.Pp
+The following parameters can be configured:
+.Bl -tag -width indent
+.It Cm prefix4 Ar ipv4_prefix/mask
+The IPv4 prefix with mask defines the pool of IPv4 addresses used as
+source address after translation.
+Stateful NAT64 module translates IPv6 source address of client to one
+IPv4 address from this pool.
+Note that incoming IPv4 packets that don't have corresponding state entry
+in the states table will be dropped by translator.
+Make sure that translation rules handle packets, destined to configured prefix.
+.It Cm max_ports Ar number
+Maximum number of ports reserved for upper level protocols to one IPv6 client.
+All reserved ports are divided into chunks between supported protocols.
+The number of connections from one IPv6 client is limited by this option.
+Note that closed TCP connections still remain in the list of connections until
+.Cm tcp_close_age
+interval will not expire.
+Default value is
+.Ar 2048 .
+.It Cm host_del_age Ar seconds
+The number of seconds until the host entry for a IPv6 client will be deleted
+and all its resources will be released due to inactivity.
+Default value is
+.Ar 3600 .
+.It Cm pg_del_age Ar seconds
+The number of seconds until a ports group with unused state entries will
+be released.
+Default value is
+.Ar 900 .
+.It Cm tcp_syn_age Ar seconds
+The number of seconds while a state entry for TCP connection with only SYN
+sent will be kept.
+If TCP connection establishing will not be finished,
+state entry will be deleted.
+Default value is
+.Ar 10 .
+.It Cm tcp_est_age Ar seconds
+The number of seconds while a state entry for established TCP connection
+will be kept.
+Default value is
+.Ar 7200 .
+.It Cm tcp_close_age Ar seconds
+The number of seconds while a state entry for closed TCP connection
+will be kept.
+Keeping state entries for closed connections is needed, because IPv4 servers
+typically keep closed connections in a TIME_WAIT state for a several minutes.
+Since translator's IPv4 addresses are shared among all IPv6 clients,
+new connections from the same addresses and ports may be rejected by server,
+because these connections are still in a TIME_WAIT state.
+Keeping them in translator's state table protects from such rejects.
+Default value is
+.Ar 180 .
+.It Cm udp_age Ar seconds
+The number of seconds while translator keeps state entry in a waiting for
+reply to the sent UDP datagram.
+Default value is
+.Ar 120 .
+.It Cm icmp_age Ar seconds
+The number of seconds while translator keeps state entry in a waiting for
+reply to the sent ICMP message.
+Default value is
+.Ar 60 .
+.It Cm log
+Turn on logging of all handled packets via BPF through
+.Ar ipfwlog0
+interface.
+.Ar ipfwlog0
+is a pseudo interface and can be created after a boot manually with
+.Cm ifconfig
+command.
+Note that it has different purpose than
+.Ar ipfw0
+interface.
+Translators sends to BPF an additional information with each packet.
+With
+.Cm tcpdump
+you are able to see each handled packet before and after translation.
+.It Cm -log
+Turn off logging of all handled packets via BPF.
+.El
+.Pp
+To inspect a states table of stateful NAT64 the following command can be used:
+.Bd -ragged -offset indent
+.Bk -words
+.Cm nat64lsn
+.Ar name
+.Cm show Cm states
+.Ek
+.Ed
+.Pp
+.Pp
+Stateless NAT64 translator doesn't use a states table for translation
+and converts IPv4 addresses to IPv6 and vice versa solely based on the
+mappings taken from configured lookup tables.
+Since a states table doesn't used by stateless translator,
+it can be configured to pass IPv4 clients to IPv6-only servers.
+.Pp
+The stateless NAT64 configuration command is the following:
+.Bd -ragged -offset indent
+.Bk -words
+.Cm nat64stl
+.Ar name
+.Cm create
+.Ar create-options
+.Ek
+.Ed
+.Pp
+The following parameters can be configured:
+.Bl -tag -width indent
+.It Cm table4 Ar table46
+The lookup table
+.Ar table46
+contains mapping how IPv4 addresses should be translated to IPv6 addresses.
+.It Cm table6 Ar table64
+The lookup table
+.Ar table64
+contains mapping how IPv6 addresses should be translated to IPv4 addresses.
+.It Cm log
+Turn on logging of all handled packets via BPF through
+.Ar ipfwlog0
+interface.
+.It Cm -log
+Turn off logging of all handled packets via BPF.
+.El
+.Pp
+Note that the behavior of stateless translator with respect to not matched
+packets differs from stateful translator.
+If corresponding addresses was not found in the lookup tables, the packet
+will not be dropped and the search continues.
.Sh IPv6-to-IPv6 NETWORK PREFIX TRANSLATION (NPTv6)
.Nm
-support in-kernel IPv6-to-IPv6 network prefix translation as described
+supports in-kernel IPv6-to-IPv6 network prefix translation as described
in RFC6296.
The kernel module
.Cm ipfw_nptv6
diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c
index 0faff36..fd3ad2a 100644
--- a/sbin/ipfw/ipfw2.c
+++ b/sbin/ipfw/ipfw2.c
@@ -235,6 +235,8 @@ static struct _s_x ether_types[] = {
};
static struct _s_x rule_eactions[] = {
+ { "nat64lsn", TOK_NAT64LSN },
+ { "nat64stl", TOK_NAT64STL },
{ "nptv6", TOK_NPTV6 },
{ NULL, 0 } /* terminator */
};
diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h
index 39b7dd9..a311458 100644
--- a/sbin/ipfw/ipfw2.h
+++ b/sbin/ipfw/ipfw2.h
@@ -254,7 +254,30 @@ enum tokens {
TOK_UNLOCK,
TOK_VLIST,
TOK_OLIST,
+
+ /* NAT64 tokens */
+ TOK_NAT64STL,
+ TOK_NAT64LSN,
TOK_STATS,
+ TOK_STATES,
+ TOK_CONFIG,
+ TOK_TABLE4,
+ TOK_TABLE6,
+ TOK_PREFIX4,
+ TOK_PREFIX6,
+ TOK_AGG_LEN,
+ TOK_AGG_COUNT,
+ TOK_MAX_PORTS,
+ TOK_JMAXLEN,
+ TOK_PORT_RANGE,
+ TOK_HOST_DEL_AGE,
+ TOK_PG_DEL_AGE,
+ TOK_TCP_SYN_AGE,
+ TOK_TCP_CLOSE_AGE,
+ TOK_TCP_EST_AGE,
+ TOK_UDP_AGE,
+ TOK_ICMP_AGE,
+ TOK_LOGOFF,
/* NPTv6 tokens */
TOK_NPTV6,
@@ -349,6 +372,8 @@ void ipfw_flush(int force);
void ipfw_zero(int ac, char *av[], int optname);
void ipfw_list(int ac, char *av[], int show_counters);
void ipfw_internal_handler(int ac, char *av[]);
+void ipfw_nat64lsn_handler(int ac, char *av[]);
+void ipfw_nat64stl_handler(int ac, char *av[]);
void ipfw_nptv6_handler(int ac, char *av[]);
int ipfw_check_object_name(const char *name);
@@ -386,7 +411,10 @@ void bp_flush(struct buf_pr *b);
/* tables.c */
struct _ipfw_obj_ctlv;
+struct _ipfw_obj_ntlv;
int table_check_name(const char *tablename);
void ipfw_list_ta(int ac, char *av[]);
void ipfw_list_values(int ac, char *av[]);
+void table_fill_ntlv(struct _ipfw_obj_ntlv *ntlv, const char *name,
+ uint8_t set, uint16_t uidx);
diff --git a/sbin/ipfw/main.c b/sbin/ipfw/main.c
index cb4168f..b7ff07a 100644
--- a/sbin/ipfw/main.c
+++ b/sbin/ipfw/main.c
@@ -425,6 +425,10 @@ ipfw_main(int oldac, char **oldav)
if (co.use_set || try_next) {
if (_substrcmp(*av, "delete") == 0)
ipfw_delete(av);
+ else if (!strncmp(*av, "nat64stl", strlen(*av)))
+ ipfw_nat64stl_handler(ac, av);
+ else if (!strncmp(*av, "nat64lsn", strlen(*av)))
+ ipfw_nat64lsn_handler(ac, av);
else if (!strncmp(*av, "nptv6", strlen(*av)))
ipfw_nptv6_handler(ac, av);
else if (_substrcmp(*av, "flush") == 0)
diff --git a/sbin/ipfw/nat64lsn.c b/sbin/ipfw/nat64lsn.c
new file mode 100644
index 0000000..7fd3c77
--- /dev/null
+++ b/sbin/ipfw/nat64lsn.c
@@ -0,0 +1,881 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "ipfw2.h"
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip_fw.h>
+#include <netinet6/ip_fw_nat64.h>
+#include <arpa/inet.h>
+
+static void nat64lsn_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name,
+ uint8_t set);
+typedef int (nat64lsn_cb_t)(ipfw_nat64lsn_cfg *cfg, const char *name,
+ uint8_t set);
+static int nat64lsn_foreach(nat64lsn_cb_t *f, const char *name, uint8_t set,
+ int sort);
+
+static void nat64lsn_create(const char *name, uint8_t set, int ac, char **av);
+static void nat64lsn_config(const char *name, uint8_t set, int ac, char **av);
+static void nat64lsn_destroy(const char *name, uint8_t set);
+static void nat64lsn_stats(const char *name, uint8_t set);
+static void nat64lsn_reset_stats(const char *name, uint8_t set);
+static int nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name,
+ uint8_t set);
+static int nat64lsn_destroy_cb(ipfw_nat64lsn_cfg *cfg, const char *name,
+ uint8_t set);
+static int nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name,
+ uint8_t set);
+
+static struct _s_x nat64cmds[] = {
+ { "create", TOK_CREATE },
+ { "config", TOK_CONFIG },
+ { "destroy", TOK_DESTROY },
+ { "list", TOK_LIST },
+ { "show", TOK_LIST },
+ { "stats", TOK_STATS },
+ { NULL, 0 }
+};
+
+static uint64_t
+nat64lsn_print_states(void *buf)
+{
+ char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], f[INET_ADDRSTRLEN];
+ char sflags[4], *sf, *proto;
+ ipfw_obj_header *oh;
+ ipfw_obj_data *od;
+ ipfw_nat64lsn_stg *stg;
+ ipfw_nat64lsn_state *ste;
+ uint64_t next_idx;
+ int i, sz;
+
+ oh = (ipfw_obj_header *)buf;
+ od = (ipfw_obj_data *)(oh + 1);
+ stg = (ipfw_nat64lsn_stg *)(od + 1);
+ sz = od->head.length - sizeof(*od);
+ next_idx = 0;
+ while (sz > 0 && next_idx != 0xFF) {
+ next_idx = stg->next_idx;
+ sz -= sizeof(*stg);
+ if (stg->count == 0) {
+ stg++;
+ continue;
+ }
+ switch (stg->proto) {
+ case IPPROTO_TCP:
+ proto = "TCP";
+ break;
+ case IPPROTO_UDP:
+ proto = "UDP";
+ break;
+ case IPPROTO_ICMPV6:
+ proto = "ICMPv6";
+ break;
+ }
+ inet_ntop(AF_INET6, &stg->host6, s, sizeof(s));
+ inet_ntop(AF_INET, &stg->alias4, a, sizeof(a));
+ ste = (ipfw_nat64lsn_state *)(stg + 1);
+ for (i = 0; i < stg->count && sz > 0; i++) {
+ sf = sflags;
+ inet_ntop(AF_INET, &ste->daddr, f, sizeof(f));
+ if (stg->proto == IPPROTO_TCP) {
+ if (ste->flags & 0x02)
+ *sf++ = 'S';
+ if (ste->flags & 0x04)
+ *sf++ = 'E';
+ if (ste->flags & 0x01)
+ *sf++ = 'F';
+ }
+ *sf = '\0';
+ switch (stg->proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ printf("%s:%d\t%s:%d\t%s\t%s\t%d\t%s:%d\n",
+ s, ste->sport, a, ste->aport, proto,
+ sflags, ste->idle, f, ste->dport);
+ break;
+ case IPPROTO_ICMPV6:
+ printf("%s\t%s\t%s\t\t%d\t%s\n",
+ s, a, proto, ste->idle, f);
+ break;
+ default:
+ printf("%s\t%s\t%d\t\t%d\t%s\n",
+ s, a, stg->proto, ste->idle, f);
+ }
+ ste++;
+ sz -= sizeof(*ste);
+ }
+ stg = (ipfw_nat64lsn_stg *)ste;
+ }
+ return (next_idx);
+}
+
+static int
+nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
+{
+ ipfw_obj_header *oh;
+ ipfw_obj_data *od;
+ void *buf;
+ uint64_t next_idx;
+ size_t sz;
+
+ if (name != NULL && strcmp(cfg->name, name) != 0)
+ return (ESRCH);
+
+ if (set != 0 && cfg->set != set)
+ return (ESRCH);
+
+ next_idx = 0;
+ sz = 4096;
+ if ((buf = calloc(1, sz)) == NULL)
+ err(EX_OSERR, NULL);
+ do {
+ oh = (ipfw_obj_header *)buf;
+ od = (ipfw_obj_data *)(oh + 1);
+ nat64lsn_fill_ntlv(&oh->ntlv, cfg->name, set);
+ od->head.type = IPFW_TLV_OBJDATA;
+ od->head.length = sizeof(*od) + sizeof(next_idx);
+ *((uint64_t *)(od + 1)) = next_idx;
+ if (do_get3(IP_FW_NAT64LSN_LIST_STATES, &oh->opheader, &sz))
+ err(EX_OSERR, "Error reading nat64lsn states");
+ next_idx = nat64lsn_print_states(buf);
+ sz = 4096;
+ memset(buf, 0, sz);
+ } while (next_idx != 0xFF);
+
+ free(buf);
+ return (0);
+}
+
+static struct _s_x nat64statscmds[] = {
+ { "reset", TOK_RESET },
+ { NULL, 0 }
+};
+
+static void
+ipfw_nat64lsn_stats_handler(const char *name, uint8_t set, int ac, char *av[])
+{
+ int tcmd;
+
+ if (ac == 0) {
+ nat64lsn_stats(name, set);
+ return;
+ }
+ NEED1("nat64lsn stats needs command");
+ tcmd = get_token(nat64statscmds, *av, "nat64lsn stats command");
+ switch (tcmd) {
+ case TOK_RESET:
+ nat64lsn_reset_stats(name, set);
+ }
+}
+
+static struct _s_x nat64listcmds[] = {
+ { "states", TOK_STATES },
+ { "config", TOK_CONFIG },
+ { NULL, 0 }
+};
+
+static void
+ipfw_nat64lsn_list_handler(const char *name, uint8_t set, int ac, char *av[])
+{
+ int tcmd;
+
+ if (ac == 0) {
+ nat64lsn_foreach(nat64lsn_show_cb, name, set, 1);
+ return;
+ }
+ NEED1("nat64lsn list needs command");
+ tcmd = get_token(nat64listcmds, *av, "nat64lsn list command");
+ switch (tcmd) {
+ case TOK_STATES:
+ nat64lsn_foreach(nat64lsn_states_cb, name, set, 1);
+ break;
+ case TOK_CONFIG:
+ nat64lsn_foreach(nat64lsn_show_cb, name, set, 1);
+ }
+}
+
+/*
+ * This one handles all nat64lsn-related commands
+ * ipfw [set N] nat64lsn NAME {create | config} ...
+ * ipfw [set N] nat64lsn NAME stats
+ * ipfw [set N] nat64lsn {NAME | all} destroy
+ * ipfw [set N] nat64lsn {NAME | all} {list | show} [config | states]
+ */
+#define nat64lsn_check_name table_check_name
+void
+ipfw_nat64lsn_handler(int ac, char *av[])
+{
+ const char *name;
+ int tcmd;
+ uint8_t set;
+
+ if (co.use_set != 0)
+ set = co.use_set - 1;
+ else
+ set = 0;
+ ac--; av++;
+
+ NEED1("nat64lsn needs instance name");
+ name = *av;
+ if (nat64lsn_check_name(name) != 0) {
+ if (strcmp(name, "all") == 0)
+ name = NULL;
+ else
+ errx(EX_USAGE, "nat64lsn instance name %s is invalid",
+ name);
+ }
+ ac--; av++;
+ NEED1("nat64lsn needs command");
+
+ tcmd = get_token(nat64cmds, *av, "nat64lsn command");
+ if (name == NULL && tcmd != TOK_DESTROY && tcmd != TOK_LIST)
+ errx(EX_USAGE, "nat64lsn instance name required");
+ switch (tcmd) {
+ case TOK_CREATE:
+ ac--; av++;
+ nat64lsn_create(name, set, ac, av);
+ break;
+ case TOK_CONFIG:
+ ac--; av++;
+ nat64lsn_config(name, set, ac, av);
+ break;
+ case TOK_LIST:
+ ac--; av++;
+ ipfw_nat64lsn_list_handler(name, set, ac, av);
+ break;
+ case TOK_DESTROY:
+ if (name == NULL)
+ nat64lsn_foreach(nat64lsn_destroy_cb, NULL, set, 0);
+ else
+ nat64lsn_destroy(name, set);
+ break;
+ case TOK_STATS:
+ ac--; av++;
+ ipfw_nat64lsn_stats_handler(name, set, ac, av);
+ }
+}
+
+static void
+nat64lsn_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint8_t set)
+{
+
+ ntlv->head.type = IPFW_TLV_EACTION_NAME(1); /* it doesn't matter */
+ ntlv->head.length = sizeof(ipfw_obj_ntlv);
+ ntlv->idx = 1;
+ ntlv->set = set;
+ strlcpy(ntlv->name, name, sizeof(ntlv->name));
+}
+
+static void
+nat64lsn_apply_mask(int af, void *prefix, uint16_t plen)
+{
+ struct in6_addr mask6, *p6;
+ struct in_addr mask4, *p4;
+
+ if (af == AF_INET) {
+ p4 = (struct in_addr *)prefix;
+ mask4.s_addr = htonl(~((1 << (32 - plen)) - 1));
+ p4->s_addr &= mask4.s_addr;
+ } else if (af == AF_INET6) {
+ p6 = (struct in6_addr *)prefix;
+ n2mask(&mask6, plen);
+ APPLY_MASK(p6, &mask6);
+ }
+}
+
+static void
+nat64lsn_parse_prefix(const char *arg, int af, void *prefix, uint16_t *plen)
+{
+ char *p, *l;
+
+ p = strdup(arg);
+ if (p == NULL)
+ err(EX_OSERR, NULL);
+ if ((l = strchr(p, '/')) != NULL)
+ *l++ = '\0';
+ if (l == NULL)
+ errx(EX_USAGE, "Prefix length required");
+ if (inet_pton(af, p, prefix) != 1)
+ errx(EX_USAGE, "Bad prefix: %s", p);
+ *plen = (uint16_t)strtol(l, &l, 10);
+ if (*l != '\0' || *plen == 0 || (af == AF_INET && *plen > 32) ||
+ (af == AF_INET6 && *plen > 96))
+ errx(EX_USAGE, "Bad prefix length: %s", arg);
+ nat64lsn_apply_mask(af, prefix, *plen);
+ free(p);
+}
+
+static uint32_t
+nat64lsn_parse_int(const char *arg, const char *desc)
+{
+ char *p;
+ uint32_t val;
+
+ val = (uint32_t)strtol(arg, &p, 10);
+ if (*p != '\0')
+ errx(EX_USAGE, "Invalid %s value: %s\n", desc, arg);
+ return (val);
+}
+
+static struct _s_x nat64newcmds[] = {
+ { "prefix6", TOK_PREFIX6 },
+ { "agg_len", TOK_AGG_LEN }, /* not yet */
+ { "agg_count", TOK_AGG_COUNT }, /* not yet */
+ { "port_range", TOK_PORT_RANGE }, /* not yet */
+ { "jmaxlen", TOK_JMAXLEN },
+ { "prefix4", TOK_PREFIX4 },
+ { "max_ports", TOK_MAX_PORTS },
+ { "host_del_age", TOK_HOST_DEL_AGE },
+ { "pg_del_age", TOK_PG_DEL_AGE },
+ { "tcp_syn_age", TOK_TCP_SYN_AGE },
+ { "tcp_close_age",TOK_TCP_CLOSE_AGE },
+ { "tcp_est_age", TOK_TCP_EST_AGE },
+ { "udp_age", TOK_UDP_AGE },
+ { "icmp_age", TOK_ICMP_AGE },
+ { "log", TOK_LOG },
+ { "-log", TOK_LOGOFF },
+ { NULL, 0 }
+};
+
+/*
+ * Creates new nat64lsn instance
+ * ipfw nat64lsn <NAME> create
+ * [ max_ports <N> ]
+ * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
+ */
+#define NAT64LSN_HAS_PREFIX4 0x01
+#define NAT64LSN_HAS_PREFIX6 0x02
+static void
+nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
+{
+ char buf[sizeof(ipfw_obj_lheader) + sizeof(ipfw_nat64lsn_cfg)];
+ ipfw_nat64lsn_cfg *cfg;
+ ipfw_obj_lheader *olh;
+ int tcmd, flags;
+ char *opt;
+
+ memset(&buf, 0, sizeof(buf));
+ olh = (ipfw_obj_lheader *)buf;
+ cfg = (ipfw_nat64lsn_cfg *)(olh + 1);
+
+ /* Some reasonable defaults */
+ inet_pton(AF_INET6, "64:ff9b::", &cfg->prefix6);
+ cfg->plen6 = 96;
+ cfg->set = set;
+ cfg->max_ports = NAT64LSN_MAX_PORTS;
+ cfg->jmaxlen = NAT64LSN_JMAXLEN;
+ cfg->nh_delete_delay = NAT64LSN_HOST_AGE;
+ cfg->pg_delete_delay = NAT64LSN_PG_AGE;
+ cfg->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
+ cfg->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
+ cfg->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
+ cfg->st_udp_ttl = NAT64LSN_UDP_AGE;
+ cfg->st_icmp_ttl = NAT64LSN_ICMP_AGE;
+ flags = NAT64LSN_HAS_PREFIX6;
+ while (ac > 0) {
+ tcmd = get_token(nat64newcmds, *av, "option");
+ opt = *av;
+ ac--; av++;
+
+ switch (tcmd) {
+ case TOK_PREFIX4:
+ NEED1("IPv4 prefix required");
+ nat64lsn_parse_prefix(*av, AF_INET, &cfg->prefix4,
+ &cfg->plen4);
+ flags |= NAT64LSN_HAS_PREFIX4;
+ ac--; av++;
+ break;
+#if 0
+ case TOK_PREFIX6:
+ NEED1("IPv6 prefix required");
+ nat64lsn_parse_prefix(*av, AF_INET6, &cfg->prefix6,
+ &cfg->plen6);
+ ac--; av++;
+ break;
+ case TOK_AGG_LEN:
+ NEED1("Aggregation prefix len required");
+ cfg->agg_prefix_len = nat64lsn_parse_int(*av, opt);
+ ac--; av++;
+ break;
+ case TOK_AGG_COUNT:
+ NEED1("Max per-prefix count required");
+ cfg->agg_prefix_max = nat64lsn_parse_int(*av, opt);
+ ac--; av++;
+ break;
+ case TOK_PORT_RANGE:
+ NEED1("port range x[:y] required");
+ if ((p = strchr(*av, ':')) == NULL)
+ cfg->min_port = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ else {
+ *p++ = '\0';
+ cfg->min_port = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ cfg->max_port = (uint16_t)nat64lsn_parse_int(
+ p, opt);
+ }
+ ac--; av++;
+ break;
+ case TOK_JMAXLEN:
+ NEED1("job queue length required");
+ cfg->jmaxlen = nat64lsn_parse_int(*av, opt);
+ ac--; av++;
+ break;
+#endif
+ case TOK_MAX_PORTS:
+ NEED1("Max per-user ports required");
+ cfg->max_ports = nat64lsn_parse_int(*av, opt);
+ ac--; av++;
+ break;
+ case TOK_HOST_DEL_AGE:
+ NEED1("host delete delay required");
+ cfg->nh_delete_delay = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_PG_DEL_AGE:
+ NEED1("portgroup delete delay required");
+ cfg->pg_delete_delay = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_TCP_SYN_AGE:
+ NEED1("tcp syn age required");
+ cfg->st_syn_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_TCP_CLOSE_AGE:
+ NEED1("tcp close age required");
+ cfg->st_close_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_TCP_EST_AGE:
+ NEED1("tcp est age required");
+ cfg->st_estab_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_UDP_AGE:
+ NEED1("udp age required");
+ cfg->st_udp_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_ICMP_AGE:
+ NEED1("icmp age required");
+ cfg->st_icmp_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_LOG:
+ cfg->flags |= NAT64_LOG;
+ break;
+ case TOK_LOGOFF:
+ cfg->flags &= ~NAT64_LOG;
+ break;
+ }
+ }
+
+ /* Check validness */
+ if ((flags & NAT64LSN_HAS_PREFIX4) != NAT64LSN_HAS_PREFIX4)
+ errx(EX_USAGE, "prefix4 required");
+
+ olh->count = 1;
+ olh->objsize = sizeof(*cfg);
+ olh->size = sizeof(buf);
+ strlcpy(cfg->name, name, sizeof(cfg->name));
+ if (do_set3(IP_FW_NAT64LSN_CREATE, &olh->opheader, sizeof(buf)) != 0)
+ err(EX_OSERR, "nat64lsn instance creation failed");
+}
+
+/*
+ * Configures existing nat64lsn instance
+ * ipfw nat64lsn <NAME> config <options>
+ * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
+ */
+static void
+nat64lsn_config(const char *name, uint8_t set, int ac, char **av)
+{
+ char buf[sizeof(ipfw_obj_header) + sizeof(ipfw_nat64lsn_cfg)];
+ ipfw_nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+ size_t sz;
+ char *opt;
+ int tcmd;
+
+ if (ac == 0)
+ errx(EX_USAGE, "config options required");
+ memset(&buf, 0, sizeof(buf));
+ oh = (ipfw_obj_header *)buf;
+ cfg = (ipfw_nat64lsn_cfg *)(oh + 1);
+ sz = sizeof(buf);
+
+ nat64lsn_fill_ntlv(&oh->ntlv, name, set);
+ if (do_get3(IP_FW_NAT64LSN_CONFIG, &oh->opheader, &sz) != 0)
+ err(EX_OSERR, "failed to get config for instance %s", name);
+
+ while (ac > 0) {
+ tcmd = get_token(nat64newcmds, *av, "option");
+ opt = *av;
+ ac--; av++;
+
+ switch (tcmd) {
+ case TOK_MAX_PORTS:
+ NEED1("Max per-user ports required");
+ cfg->max_ports = nat64lsn_parse_int(*av, opt);
+ ac--; av++;
+ break;
+ case TOK_JMAXLEN:
+ NEED1("job queue length required");
+ cfg->jmaxlen = nat64lsn_parse_int(*av, opt);
+ ac--; av++;
+ break;
+ case TOK_HOST_DEL_AGE:
+ NEED1("host delete delay required");
+ cfg->nh_delete_delay = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_PG_DEL_AGE:
+ NEED1("portgroup delete delay required");
+ cfg->pg_delete_delay = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_TCP_SYN_AGE:
+ NEED1("tcp syn age required");
+ cfg->st_syn_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_TCP_CLOSE_AGE:
+ NEED1("tcp close age required");
+ cfg->st_close_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_TCP_EST_AGE:
+ NEED1("tcp est age required");
+ cfg->st_estab_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_UDP_AGE:
+ NEED1("udp age required");
+ cfg->st_udp_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_ICMP_AGE:
+ NEED1("icmp age required");
+ cfg->st_icmp_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_LOG:
+ cfg->flags |= NAT64_LOG;
+ break;
+ case TOK_LOGOFF:
+ cfg->flags &= ~NAT64_LOG;
+ break;
+ default:
+ errx(EX_USAGE, "Can't change %s option", opt);
+ }
+ }
+
+ if (do_set3(IP_FW_NAT64LSN_CONFIG, &oh->opheader, sizeof(buf)) != 0)
+ err(EX_OSERR, "nat64lsn instance configuration failed");
+}
+
+/*
+ * Reset nat64lsn instance statistics specified by @oh->ntlv.
+ * Request: [ ipfw_obj_header ]
+ */
+static void
+nat64lsn_reset_stats(const char *name, uint8_t set)
+{
+ ipfw_obj_header oh;
+
+ memset(&oh, 0, sizeof(oh));
+ nat64lsn_fill_ntlv(&oh.ntlv, name, set);
+ if (do_set3(IP_FW_NAT64LSN_RESET_STATS, &oh.opheader, sizeof(oh)) != 0)
+ err(EX_OSERR, "failed to reset stats for instance %s", name);
+}
+
+/*
+ * Destroys nat64lsn instance specified by @oh->ntlv.
+ * Request: [ ipfw_obj_header ]
+ */
+static void
+nat64lsn_destroy(const char *name, uint8_t set)
+{
+ ipfw_obj_header oh;
+
+ memset(&oh, 0, sizeof(oh));
+ nat64lsn_fill_ntlv(&oh.ntlv, name, set);
+ if (do_set3(IP_FW_NAT64LSN_DESTROY, &oh.opheader, sizeof(oh)) != 0)
+ err(EX_OSERR, "failed to destroy nat instance %s", name);
+}
+
+/*
+ * Get nat64lsn instance statistics.
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ] ]
+ */
+static int
+nat64lsn_get_stats(const char *name, uint8_t set,
+ struct ipfw_nat64lsn_stats *stats)
+{
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *oc;
+ size_t sz;
+
+ sz = sizeof(*oh) + sizeof(*oc) + sizeof(*stats);
+ oh = calloc(1, sz);
+ nat64lsn_fill_ntlv(&oh->ntlv, name, set);
+ if (do_get3(IP_FW_NAT64LSN_STATS, &oh->opheader, &sz) == 0) {
+ oc = (ipfw_obj_ctlv *)(oh + 1);
+ memcpy(stats, oc + 1, sizeof(*stats));
+ free(oh);
+ return (0);
+ }
+ free(oh);
+ return (-1);
+}
+
+static void
+nat64lsn_stats(const char *name, uint8_t set)
+{
+ struct ipfw_nat64lsn_stats stats;
+
+ if (nat64lsn_get_stats(name, set, &stats) != 0)
+ err(EX_OSERR, "Error retrieving stats");
+
+ if (co.use_set != 0 || set != 0)
+ printf("set %u ", set);
+ printf("nat64lsn %s\n", name);
+ printf("\t%ju packets translated from IPv6 to IPv4\n",
+ (uintmax_t)stats.opcnt64);
+ printf("\t%ju packets translated from IPv4 to IPv6\n",
+ (uintmax_t)stats.opcnt46);
+ printf("\t%ju IPv6 fragments created\n",
+ (uintmax_t)stats.ofrags);
+ printf("\t%ju IPv4 fragments received\n",
+ (uintmax_t)stats.ifrags);
+ printf("\t%ju output packets dropped due to no bufs, etc.\n",
+ (uintmax_t)stats.oerrors);
+ printf("\t%ju output packets discarded due to no IPv4 route\n",
+ (uintmax_t)stats.noroute4);
+ printf("\t%ju output packets discarded due to no IPv6 route\n",
+ (uintmax_t)stats.noroute6);
+ printf("\t%ju packets discarded due to unsupported protocol\n",
+ (uintmax_t)stats.noproto);
+ printf("\t%ju packets discarded due to memory allocation problems\n",
+ (uintmax_t)stats.nomem);
+ printf("\t%ju packets discarded due to some errors\n",
+ (uintmax_t)stats.dropped);
+ printf("\t%ju packets not matched with IPv4 prefix\n",
+ (uintmax_t)stats.nomatch4);
+
+ printf("\t%ju mbufs queued for post processing\n",
+ (uintmax_t)stats.jreinjected);
+ printf("\t%ju times the job queue was processed\n",
+ (uintmax_t)stats.jcalls);
+ printf("\t%ju job requests queued\n",
+ (uintmax_t)stats.jrequests);
+ printf("\t%ju job requests queue limit reached\n",
+ (uintmax_t)stats.jmaxlen);
+ printf("\t%ju job requests failed due to memory allocation problems\n",
+ (uintmax_t)stats.jnomem);
+
+ printf("\t%ju hosts allocated\n", (uintmax_t)stats.hostcount);
+ printf("\t%ju hosts requested\n", (uintmax_t)stats.jhostsreq);
+ printf("\t%ju host requests failed\n", (uintmax_t)stats.jhostfails);
+
+ printf("\t%ju portgroups requested\n", (uintmax_t)stats.jportreq);
+ printf("\t%ju portgroups allocated\n", (uintmax_t)stats.spgcreated);
+ printf("\t%ju portgroups deleted\n", (uintmax_t)stats.spgdeleted);
+ printf("\t%ju portgroup requests failed\n",
+ (uintmax_t)stats.jportfails);
+ printf("\t%ju portgroups allocated for TCP\n",
+ (uintmax_t)stats.tcpchunks);
+ printf("\t%ju portgroups allocated for UDP\n",
+ (uintmax_t)stats.udpchunks);
+ printf("\t%ju portgroups allocated for ICMP\n",
+ (uintmax_t)stats.icmpchunks);
+
+ printf("\t%ju states created\n", (uintmax_t)stats.screated);
+ printf("\t%ju states deleted\n", (uintmax_t)stats.sdeleted);
+}
+
+static int
+nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
+{
+ char abuf[INET6_ADDRSTRLEN];
+
+ if (name != NULL && strcmp(cfg->name, name) != 0)
+ return (ESRCH);
+
+ if (co.use_set != 0 && cfg->set != set)
+ return (ESRCH);
+
+ if (co.use_set != 0 || cfg->set != 0)
+ printf("set %u ", cfg->set);
+ inet_ntop(AF_INET, &cfg->prefix4, abuf, sizeof(abuf));
+ printf("nat64lsn %s prefix4 %s/%u ", cfg->name, abuf, cfg->plen4);
+#if 0
+ inet_ntop(AF_INET6, &cfg->prefix6, abuf, sizeof(abuf));
+ printf("prefix6 %s/%u", abuf, cfg->plen6);
+ printf("agg_len %u agg_count %u ", cfg->agg_prefix_len,
+ cfg->agg_prefix_max);
+ if (cfg->min_port != NAT64LSN_PORT_MIN ||
+ cfg->max_port != NAT64LSN_PORT_MAX)
+ printf(" port_range %u:%u", cfg->min_port, cfg->max_port);
+ if (cfg->jmaxlen != NAT64LSN_JMAXLEN)
+ printf(" jmaxlen %u ", cfg->jmaxlen);
+#endif
+ if (cfg->max_ports != NAT64LSN_MAX_PORTS)
+ printf(" max_ports %u", cfg->max_ports);
+ if (cfg->nh_delete_delay != NAT64LSN_HOST_AGE)
+ printf(" host_del_age %u", cfg->nh_delete_delay);
+ if (cfg->pg_delete_delay != NAT64LSN_PG_AGE)
+ printf(" pg_del_age %u ", cfg->pg_delete_delay);
+ if (cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE)
+ printf(" tcp_syn_age %u", cfg->st_syn_ttl);
+ if (cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE)
+ printf(" tcp_close_age %u", cfg->st_close_ttl);
+ if (cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE)
+ printf(" tcp_est_age %u", cfg->st_estab_ttl);
+ if (cfg->st_udp_ttl != NAT64LSN_UDP_AGE)
+ printf(" udp_age %u", cfg->st_udp_ttl);
+ if (cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE)
+ printf(" icmp_age %u", cfg->st_icmp_ttl);
+ if (cfg->flags & NAT64_LOG)
+ printf(" log");
+ printf("\n");
+ return (0);
+}
+
+static int
+nat64lsn_destroy_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
+{
+
+ if (co.use_set != 0 && cfg->set != set)
+ return (ESRCH);
+
+ nat64lsn_destroy(cfg->name, cfg->set);
+ return (0);
+}
+
+
+/*
+ * Compare nat64lsn instances names.
+ * Honor number comparison.
+ */
+static int
+nat64name_cmp(const void *a, const void *b)
+{
+ ipfw_nat64lsn_cfg *ca, *cb;
+
+ ca = (ipfw_nat64lsn_cfg *)a;
+ cb = (ipfw_nat64lsn_cfg *)b;
+
+ if (ca->set > cb->set)
+ return (1);
+ else if (ca->set < cb->set)
+ return (-1);
+ return (stringnum_cmp(ca->name, cb->name));
+}
+
+/*
+ * Retrieves nat64lsn instance list from kernel,
+ * optionally sorts it and calls requested function for each instance.
+ *
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
+ */
+static int
+nat64lsn_foreach(nat64lsn_cb_t *f, const char *name, uint8_t set, int sort)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64lsn_cfg *cfg;
+ size_t sz;
+ int i, error;
+
+ /* Start with reasonable default */
+ sz = sizeof(*olh) + 16 * sizeof(ipfw_nat64lsn_cfg);
+
+ for (;;) {
+ if ((olh = calloc(1, sz)) == NULL)
+ return (ENOMEM);
+
+ olh->size = sz;
+ if (do_get3(IP_FW_NAT64LSN_LIST, &olh->opheader, &sz) != 0) {
+ sz = olh->size;
+ free(olh);
+ if (errno != ENOMEM)
+ return (errno);
+ continue;
+ }
+
+ if (sort != 0)
+ qsort(olh + 1, olh->count, olh->objsize,
+ nat64name_cmp);
+
+ cfg = (ipfw_nat64lsn_cfg *)(olh + 1);
+ for (i = 0; i < olh->count; i++) {
+ error = f(cfg, name, set); /* Ignore errors for now */
+ cfg = (ipfw_nat64lsn_cfg *)((caddr_t)cfg +
+ olh->objsize);
+ }
+ free(olh);
+ break;
+ }
+ return (0);
+}
+
diff --git a/sbin/ipfw/nat64stl.c b/sbin/ipfw/nat64stl.c
new file mode 100644
index 0000000..6cd936c
--- /dev/null
+++ b/sbin/ipfw/nat64stl.c
@@ -0,0 +1,535 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "ipfw2.h"
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip_fw.h>
+#include <netinet6/ip_fw_nat64.h>
+#include <arpa/inet.h>
+
+static int nat64stl_check_prefix(struct in6_addr *prefix, int length);
+typedef int (nat64stl_cb_t)(ipfw_nat64stl_cfg *i, const char *name,
+ uint8_t set);
+static int nat64stl_foreach(nat64stl_cb_t *f, const char *name, uint8_t set,
+ int sort);
+
+static void nat64stl_create(const char *name, uint8_t set, int ac, char **av);
+static void nat64stl_config(const char *name, uint8_t set, int ac, char **av);
+static void nat64stl_destroy(const char *name, uint8_t set);
+static void nat64stl_stats(const char *name, uint8_t set);
+static void nat64stl_reset_stats(const char *name, uint8_t set);
+static int nat64stl_show_cb(ipfw_nat64stl_cfg *cfg, const char *name,
+ uint8_t set);
+static int nat64stl_destroy_cb(ipfw_nat64stl_cfg *cfg, const char *name,
+ uint8_t set);
+
+static struct _s_x nat64cmds[] = {
+ { "create", TOK_CREATE },
+ { "config", TOK_CONFIG },
+ { "destroy", TOK_DESTROY },
+ { "list", TOK_LIST },
+ { "show", TOK_LIST },
+ { "stats", TOK_STATS },
+ { NULL, 0 }
+};
+
+#define IPV6_ADDR_INT32_WKPFX htonl(0x64ff9b)
+#define IN6_IS_ADDR_WKPFX(a) \
+ ((a)->__u6_addr.__u6_addr32[0] == IPV6_ADDR_INT32_WKPFX && \
+ (a)->__u6_addr.__u6_addr32[1] == 0 && \
+ (a)->__u6_addr.__u6_addr32[2] == 0)
+static int
+nat64stl_check_prefix(struct in6_addr *prefix, int length)
+{
+
+ if (IN6_IS_ADDR_WKPFX(prefix) && length == 96)
+ return (0);
+#if 0
+ switch (length) {
+ case 32:
+ case 40:
+ case 48:
+ case 56:
+ case 64:
+ /* Well-known prefix has 96 prefix length */
+ if (IN6_IS_ADDR_WKPFX(prefix))
+ return (1);
+ /* FALLTHROUGH */
+ case 96:
+ /* Bits 64 to 71 must be set to zero */
+ if (prefix->__u6_addr.__u6_addr8[8] != 0)
+ return (1);
+ /* XXX: looks incorrect */
+ if (IN6_IS_ADDR_MULTICAST(prefix) ||
+ IN6_IS_ADDR_UNSPECIFIED(prefix) ||
+ IN6_IS_ADDR_LOOPBACK(prefix))
+ return (1);
+ return (0);
+ }
+#endif
+ return (1);
+}
+
+static struct _s_x nat64statscmds[] = {
+ { "reset", TOK_RESET },
+ { NULL, 0 }
+};
+
+/*
+ * This one handles all nat64stl-related commands
+ * ipfw [set N] nat64stl NAME {create | config} ...
+ * ipfw [set N] nat64stl NAME stats [reset]
+ * ipfw [set N] nat64stl {NAME | all} destroy
+ * ipfw [set N] nat64stl {NAME | all} {list | show}
+ */
+#define nat64stl_check_name table_check_name
+void
+ipfw_nat64stl_handler(int ac, char *av[])
+{
+ const char *name;
+ int tcmd;
+ uint8_t set;
+
+ if (co.use_set != 0)
+ set = co.use_set - 1;
+ else
+ set = 0;
+ ac--; av++;
+
+ NEED1("nat64stl needs instance name");
+ name = *av;
+ if (nat64stl_check_name(name) != 0) {
+ if (strcmp(name, "all") == 0)
+ name = NULL;
+ else
+ errx(EX_USAGE, "nat64stl instance name %s is invalid",
+ name);
+ }
+ ac--; av++;
+ NEED1("nat64stl needs command");
+
+ tcmd = get_token(nat64cmds, *av, "nat64stl command");
+ if (name == NULL && tcmd != TOK_DESTROY && tcmd != TOK_LIST)
+ errx(EX_USAGE, "nat64stl instance name required");
+ switch (tcmd) {
+ case TOK_CREATE:
+ ac--; av++;
+ nat64stl_create(name, set, ac, av);
+ break;
+ case TOK_CONFIG:
+ ac--; av++;
+ nat64stl_config(name, set, ac, av);
+ break;
+ case TOK_LIST:
+ nat64stl_foreach(nat64stl_show_cb, name, set, 1);
+ break;
+ case TOK_DESTROY:
+ if (name == NULL)
+ nat64stl_foreach(nat64stl_destroy_cb, NULL, set, 0);
+ else
+ nat64stl_destroy(name, set);
+ break;
+ case TOK_STATS:
+ ac--; av++;
+ if (ac == 0) {
+ nat64stl_stats(name, set);
+ break;
+ }
+ tcmd = get_token(nat64statscmds, *av, "stats command");
+ if (tcmd == TOK_RESET)
+ nat64stl_reset_stats(name, set);
+ }
+}
+
+
+static void
+nat64stl_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint8_t set)
+{
+
+ ntlv->head.type = IPFW_TLV_EACTION_NAME(1); /* it doesn't matter */
+ ntlv->head.length = sizeof(ipfw_obj_ntlv);
+ ntlv->idx = 1;
+ ntlv->set = set;
+ strlcpy(ntlv->name, name, sizeof(ntlv->name));
+}
+
+static struct _s_x nat64newcmds[] = {
+ { "table4", TOK_TABLE4 },
+ { "table6", TOK_TABLE6 },
+ { "prefix6", TOK_PREFIX6 },
+ { "log", TOK_LOG },
+ { "-log", TOK_LOGOFF },
+ { NULL, 0 }
+};
+
+/*
+ * Creates new nat64stl instance
+ * ipfw nat64stl <NAME> create table4 <name> table6 <name> [ prefix6 <prefix>]
+ * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ]
+ */
+#define NAT64STL_HAS_TABLE4 0x01
+#define NAT64STL_HAS_TABLE6 0x02
+#define NAT64STL_HAS_PREFIX6 0x04
+static void
+nat64stl_create(const char *name, uint8_t set, int ac, char *av[])
+{
+ char buf[sizeof(ipfw_obj_lheader) + sizeof(ipfw_nat64stl_cfg)];
+ ipfw_nat64stl_cfg *cfg;
+ ipfw_obj_lheader *olh;
+ int tcmd, flags;
+ char *p;
+
+ memset(buf, 0, sizeof(buf));
+ olh = (ipfw_obj_lheader *)buf;
+ cfg = (ipfw_nat64stl_cfg *)(olh + 1);
+
+ /* Some reasonable defaults */
+ inet_pton(AF_INET6, "64:ff9b::", &cfg->prefix6);
+ cfg->plen6 = 96;
+ cfg->set = set;
+ flags = NAT64STL_HAS_PREFIX6;
+ while (ac > 0) {
+ tcmd = get_token(nat64newcmds, *av, "option");
+ ac--; av++;
+
+ switch (tcmd) {
+ case TOK_TABLE4:
+ NEED1("table name required");
+ table_fill_ntlv(&cfg->ntlv4, *av, set, 4);
+ flags |= NAT64STL_HAS_TABLE4;
+ ac--; av++;
+ break;
+ case TOK_TABLE6:
+ NEED1("table name required");
+ table_fill_ntlv(&cfg->ntlv6, *av, set, 6);
+ flags |= NAT64STL_HAS_TABLE6;
+ ac--; av++;
+ break;
+ case TOK_PREFIX6:
+ NEED1("IPv6 prefix6 required");
+ if ((p = strchr(*av, '/')) != NULL)
+ *p++ = '\0';
+ if (inet_pton(AF_INET6, *av, &cfg->prefix6) != 1)
+ errx(EX_USAGE,
+ "Bad prefix: %s", *av);
+ cfg->plen6 = strtol(p, NULL, 10);
+ if (nat64stl_check_prefix(&cfg->prefix6,
+ cfg->plen6) != 0)
+ errx(EX_USAGE,
+ "Bad prefix length: %s", p);
+ flags |= NAT64STL_HAS_PREFIX6;
+ ac--; av++;
+ break;
+ case TOK_LOG:
+ cfg->flags |= NAT64_LOG;
+ break;
+ case TOK_LOGOFF:
+ cfg->flags &= ~NAT64_LOG;
+ break;
+ }
+ }
+
+ /* Check validness */
+ if ((flags & NAT64STL_HAS_TABLE4) != NAT64STL_HAS_TABLE4)
+ errx(EX_USAGE, "table4 required");
+ if ((flags & NAT64STL_HAS_TABLE6) != NAT64STL_HAS_TABLE6)
+ errx(EX_USAGE, "table6 required");
+ if ((flags & NAT64STL_HAS_PREFIX6) != NAT64STL_HAS_PREFIX6)
+ errx(EX_USAGE, "prefix6 required");
+
+ olh->count = 1;
+ olh->objsize = sizeof(*cfg);
+ olh->size = sizeof(buf);
+ strlcpy(cfg->name, name, sizeof(cfg->name));
+ if (do_set3(IP_FW_NAT64STL_CREATE, &olh->opheader, sizeof(buf)) != 0)
+ err(EX_OSERR, "nat64stl instance creation failed");
+}
+
+/*
+ * Configures existing nat64stl instance
+ * ipfw nat64stl <NAME> config <options>
+ * Request: [ ipfw_obj_header ipfw_nat64stl_cfg ]
+ */
+static void
+nat64stl_config(const char *name, uint8_t set, int ac, char **av)
+{
+ char buf[sizeof(ipfw_obj_header) + sizeof(ipfw_nat64stl_cfg)];
+ ipfw_nat64stl_cfg *cfg;
+ ipfw_obj_header *oh;
+ char *opt;
+ size_t sz;
+ int tcmd;
+
+ if (ac == 0)
+ errx(EX_USAGE, "config options required");
+ memset(&buf, 0, sizeof(buf));
+ oh = (ipfw_obj_header *)buf;
+ cfg = (ipfw_nat64stl_cfg *)(oh + 1);
+ sz = sizeof(buf);
+
+ nat64stl_fill_ntlv(&oh->ntlv, name, set);
+ if (do_get3(IP_FW_NAT64STL_CONFIG, &oh->opheader, &sz) != 0)
+ err(EX_OSERR, "failed to get config for instance %s", name);
+
+ while (ac > 0) {
+ tcmd = get_token(nat64newcmds, *av, "option");
+ opt = *av;
+ ac--; av++;
+
+ switch (tcmd) {
+#if 0
+ case TOK_TABLE4:
+ NEED1("table name required");
+ table_fill_ntlv(&cfg->ntlv4, *av, set, 4);
+ ac--; av++;
+ break;
+ case TOK_TABLE6:
+ NEED1("table name required");
+ table_fill_ntlv(&cfg->ntlv6, *av, set, 6);
+ ac--; av++;
+ break;
+#endif
+ case TOK_LOG:
+ cfg->flags |= NAT64_LOG;
+ break;
+ case TOK_LOGOFF:
+ cfg->flags &= ~NAT64_LOG;
+ break;
+ default:
+ errx(EX_USAGE, "Can't change %s option", opt);
+ }
+ }
+
+ if (do_set3(IP_FW_NAT64STL_CONFIG, &oh->opheader, sizeof(buf)) != 0)
+ err(EX_OSERR, "nat64stl instance configuration failed");
+}
+
+/*
+ * Destroys nat64stl instance.
+ * Request: [ ipfw_obj_header ]
+ */
+static void
+nat64stl_destroy(const char *name, uint8_t set)
+{
+ ipfw_obj_header oh;
+
+ memset(&oh, 0, sizeof(oh));
+ nat64stl_fill_ntlv(&oh.ntlv, name, set);
+ if (do_set3(IP_FW_NAT64STL_DESTROY, &oh.opheader, sizeof(oh)) != 0)
+ err(EX_OSERR, "failed to destroy nat instance %s", name);
+}
+
+/*
+ * Get nat64stl instance statistics.
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ] ]
+ */
+static int
+nat64stl_get_stats(const char *name, uint8_t set,
+ struct ipfw_nat64stl_stats *stats)
+{
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *oc;
+ size_t sz;
+
+ sz = sizeof(*oh) + sizeof(*oc) + sizeof(*stats);
+ oh = calloc(1, sz);
+ nat64stl_fill_ntlv(&oh->ntlv, name, set);
+ if (do_get3(IP_FW_NAT64STL_STATS, &oh->opheader, &sz) == 0) {
+ oc = (ipfw_obj_ctlv *)(oh + 1);
+ memcpy(stats, oc + 1, sizeof(*stats));
+ free(oh);
+ return (0);
+ }
+ free(oh);
+ return (-1);
+}
+
+static void
+nat64stl_stats(const char *name, uint8_t set)
+{
+ struct ipfw_nat64stl_stats stats;
+
+ if (nat64stl_get_stats(name, set, &stats) != 0)
+ err(EX_OSERR, "Error retrieving stats");
+
+ if (co.use_set != 0 || set != 0)
+ printf("set %u ", set);
+ printf("nat64stl %s\n", name);
+
+ printf("\t%ju packets translated from IPv6 to IPv4\n",
+ (uintmax_t)stats.opcnt64);
+ printf("\t%ju packets translated from IPv4 to IPv6\n",
+ (uintmax_t)stats.opcnt46);
+ printf("\t%ju IPv6 fragments created\n",
+ (uintmax_t)stats.ofrags);
+ printf("\t%ju IPv4 fragments received\n",
+ (uintmax_t)stats.ifrags);
+ printf("\t%ju output packets dropped due to no bufs, etc.\n",
+ (uintmax_t)stats.oerrors);
+ printf("\t%ju output packets discarded due to no IPv4 route\n",
+ (uintmax_t)stats.noroute4);
+ printf("\t%ju output packets discarded due to no IPv6 route\n",
+ (uintmax_t)stats.noroute6);
+ printf("\t%ju packets discarded due to unsupported protocol\n",
+ (uintmax_t)stats.noproto);
+ printf("\t%ju packets discarded due to memory allocation problems\n",
+ (uintmax_t)stats.nomem);
+ printf("\t%ju packets discarded due to some errors\n",
+ (uintmax_t)stats.dropped);
+}
+
+/*
+ * Reset nat64stl instance statistics specified by @oh->ntlv.
+ * Request: [ ipfw_obj_header ]
+ */
+static void
+nat64stl_reset_stats(const char *name, uint8_t set)
+{
+ ipfw_obj_header oh;
+
+ memset(&oh, 0, sizeof(oh));
+ nat64stl_fill_ntlv(&oh.ntlv, name, set);
+ if (do_set3(IP_FW_NAT64STL_RESET_STATS, &oh.opheader, sizeof(oh)) != 0)
+ err(EX_OSERR, "failed to reset stats for instance %s", name);
+}
+
+static int
+nat64stl_show_cb(ipfw_nat64stl_cfg *cfg, const char *name, uint8_t set)
+{
+
+ if (name != NULL && strcmp(cfg->name, name) != 0)
+ return (ESRCH);
+
+ if (co.use_set != 0 && cfg->set != set)
+ return (ESRCH);
+
+ if (co.use_set != 0 || cfg->set != 0)
+ printf("set %u ", cfg->set);
+ printf("nat64stl %s table4 %s table6 %s",
+ cfg->name, cfg->ntlv4.name, cfg->ntlv6.name);
+ if (cfg->flags & NAT64_LOG)
+ printf(" log");
+ printf("\n");
+ return (0);
+}
+
+static int
+nat64stl_destroy_cb(ipfw_nat64stl_cfg *cfg, const char *name, uint8_t set)
+{
+
+ if (co.use_set != 0 && cfg->set != set)
+ return (ESRCH);
+
+ nat64stl_destroy(cfg->name, cfg->set);
+ return (0);
+}
+
+
+/*
+ * Compare nat64stl instances names.
+ * Honor number comparison.
+ */
+static int
+nat64name_cmp(const void *a, const void *b)
+{
+ ipfw_nat64stl_cfg *ca, *cb;
+
+ ca = (ipfw_nat64stl_cfg *)a;
+ cb = (ipfw_nat64stl_cfg *)b;
+
+ if (ca->set > cb->set)
+ return (1);
+ else if (ca->set < cb->set)
+ return (-1);
+ return (stringnum_cmp(ca->name, cb->name));
+}
+
+/*
+ * Retrieves nat64stl instance list from kernel,
+ * optionally sorts it and calls requested function for each instance.
+ *
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ]
+ */
+static int
+nat64stl_foreach(nat64stl_cb_t *f, const char *name, uint8_t set, int sort)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64stl_cfg *cfg;
+ size_t sz;
+ int i, error;
+
+ /* Start with reasonable default */
+ sz = sizeof(*olh) + 16 * sizeof(*cfg);
+ for (;;) {
+ if ((olh = calloc(1, sz)) == NULL)
+ return (ENOMEM);
+
+ olh->size = sz;
+ if (do_get3(IP_FW_NAT64STL_LIST, &olh->opheader, &sz) != 0) {
+ sz = olh->size;
+ free(olh);
+ if (errno != ENOMEM)
+ return (errno);
+ continue;
+ }
+
+ if (sort != 0)
+ qsort(olh + 1, olh->count, olh->objsize,
+ nat64name_cmp);
+
+ cfg = (ipfw_nat64stl_cfg *)(olh + 1);
+ for (i = 0; i < olh->count; i++) {
+ error = f(cfg, name, set); /* Ignore errors for now */
+ cfg = (ipfw_nat64stl_cfg *)((caddr_t)cfg +
+ olh->objsize);
+ }
+ free(olh);
+ break;
+ }
+ return (0);
+}
+
diff --git a/sbin/ipfw/tables.c b/sbin/ipfw/tables.c
index 39e4dbc..85350a0 100644
--- a/sbin/ipfw/tables.c
+++ b/sbin/ipfw/tables.c
@@ -54,8 +54,6 @@ static void table_lock(ipfw_obj_header *oh, int lock);
static int table_swap(ipfw_obj_header *oh, char *second);
static int table_get_info(ipfw_obj_header *oh, ipfw_xtable_info *i);
static int table_show_info(ipfw_xtable_info *i, void *arg);
-static void table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name,
- uint32_t set, uint16_t uidx);
static int table_flush_one(ipfw_xtable_info *i, void *arg);
static int table_show_one(ipfw_xtable_info *i, void *arg);
@@ -157,7 +155,7 @@ ipfw_table_handler(int ac, char *av[])
ipfw_xtable_info i;
ipfw_obj_header oh;
char *tablename;
- uint32_t set;
+ uint8_t set;
void *arg;
memset(&oh, 0, sizeof(oh));
@@ -294,8 +292,8 @@ ipfw_table_handler(int ac, char *av[])
}
}
-static void
-table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint32_t set,
+void
+table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint8_t set,
uint16_t uidx)
{
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 00cdd65..fa4202d 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -965,6 +965,8 @@ device lagg
# IPFIREWALL_NAT adds support for in kernel nat in ipfw, and it requires
# LIBALIAS.
#
+# IPFIREWALL_NAT64 adds support for in kernel NAT64 in ipfw.
+#
# IPFIREWALL_NPTV6 adds support for in kernel NPTv6 in ipfw.
#
# IPSTEALTH enables code to support stealth forwarding (i.e., forwarding
@@ -988,6 +990,7 @@ options IPFIREWALL_VERBOSE #enable logging to syslogd(8)
options IPFIREWALL_VERBOSE_LIMIT=100 #limit verbosity
options IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default
options IPFIREWALL_NAT #ipfw kernel nat support
+options IPFIREWALL_NAT64 #ipfw kernel NAT64 support
options IPFIREWALL_NPTV6 #ipfw kernel IPv6 NPT support
options IPDIVERT #divert sockets
options IPFILTER #ipfilter support
diff --git a/sys/conf/files b/sys/conf/files
index e13e374..8478a15 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3869,6 +3869,7 @@ netpfil/ipfw/ip_dummynet.c optional inet dummynet
netpfil/ipfw/ip_dn_io.c optional inet dummynet
netpfil/ipfw/ip_dn_glue.c optional inet dummynet
netpfil/ipfw/ip_fw2.c optional inet ipfirewall
+netpfil/ipfw/ip_fw_bpf.c optional inet ipfirewall
netpfil/ipfw/ip_fw_dynamic.c optional inet ipfirewall
netpfil/ipfw/ip_fw_eaction.c optional inet ipfirewall
netpfil/ipfw/ip_fw_log.c optional inet ipfirewall
@@ -3879,6 +3880,18 @@ netpfil/ipfw/ip_fw_table_algo.c optional inet ipfirewall
netpfil/ipfw/ip_fw_table_value.c optional inet ipfirewall
netpfil/ipfw/ip_fw_iface.c optional inet ipfirewall
netpfil/ipfw/ip_fw_nat.c optional inet ipfirewall_nat
+netpfil/ipfw/nat64/ip_fw_nat64.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
+netpfil/ipfw/nat64/nat64lsn.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
+netpfil/ipfw/nat64/nat64lsn_control.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
+netpfil/ipfw/nat64/nat64stl.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
+netpfil/ipfw/nat64/nat64stl_control.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
+netpfil/ipfw/nat64/nat64_translate.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
netpfil/ipfw/nptv6/ip_fw_nptv6.c optional inet inet6 ipfirewall \
ipfirewall_nptv6
netpfil/ipfw/nptv6/nptv6.c optional inet inet6 ipfirewall \
diff --git a/sys/conf/options b/sys/conf/options
index d54bff2..b5257a9 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -417,6 +417,8 @@ IPFILTER_LOOKUP opt_ipfilter.h
IPFIREWALL opt_ipfw.h
IPFIREWALL_DEFAULT_TO_ACCEPT opt_ipfw.h
IPFIREWALL_NAT opt_ipfw.h
+IPFIREWALL_NAT64 opt_ipfw.h
+IPFIREWALL_NAT64_DIRECT_OUTPUT opt_ipfw.h
IPFIREWALL_NPTV6 opt_ipfw.h
IPFIREWALL_VERBOSE opt_ipfw.h
IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index 71bc99c..803538f 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -167,6 +167,7 @@ SUBDIR= \
${_ipfilter} \
${_ipfw} \
ipfw_nat \
+ ${_ipfw_nat64} \
${_ipfw_nptv6} \
${_ipmi} \
ip6_mroute_mod \
@@ -459,6 +460,9 @@ _if_stf= if_stf
_if_me= if_me
_ipdivert= ipdivert
_ipfw= ipfw
+.if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES)
+_ipfw_nat64= ipfw_nat64
+.endif
.endif
.if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES)
diff --git a/sys/modules/ipfw/Makefile b/sys/modules/ipfw/Makefile
index f25762d..a8348bd 100644
--- a/sys/modules/ipfw/Makefile
+++ b/sys/modules/ipfw/Makefile
@@ -3,7 +3,7 @@
.PATH: ${.CURDIR}/../../netpfil/ipfw
KMOD= ipfw
-SRCS= ip_fw2.c ip_fw_pfil.c
+SRCS= ip_fw2.c ip_fw_pfil.c ip_fw_bpf.c
SRCS+= ip_fw_dynamic.c ip_fw_log.c ip_fw_eaction.c
SRCS+= ip_fw_sockopt.c ip_fw_table.c ip_fw_table_algo.c ip_fw_iface.c
SRCS+= ip_fw_table_value.c
diff --git a/sys/modules/ipfw_nat64/Makefile b/sys/modules/ipfw_nat64/Makefile
new file mode 100644
index 0000000..c647845
--- /dev/null
+++ b/sys/modules/ipfw_nat64/Makefile
@@ -0,0 +1,11 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../netpfil/ipfw/nat64
+
+KMOD= ipfw_nat64
+SRCS= ip_fw_nat64.c nat64_translate.c
+SRCS+= nat64lsn.c nat64lsn_control.c
+SRCS+= nat64stl.c nat64stl_control.c
+SRCS+= opt_ipfw.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h
index 6e65c70..3b67029 100644
--- a/sys/netinet/ip_fw.h
+++ b/sys/netinet/ip_fw.h
@@ -110,6 +110,21 @@ typedef struct _ip_fw3_opheader {
#define IP_FW_DUMP_SOPTCODES 116 /* Dump available sopts/versions */
#define IP_FW_DUMP_SRVOBJECTS 117 /* Dump existing named objects */
+#define IP_FW_NAT64STL_CREATE 130 /* Create stateless NAT64 instance */
+#define IP_FW_NAT64STL_DESTROY 131 /* Destroy stateless NAT64 instance */
+#define IP_FW_NAT64STL_CONFIG 132 /* Modify stateless NAT64 instance */
+#define IP_FW_NAT64STL_LIST 133 /* List stateless NAT64 instances */
+#define IP_FW_NAT64STL_STATS 134 /* Get NAT64STL instance statistics */
+#define IP_FW_NAT64STL_RESET_STATS 135 /* Reset NAT64STL instance statistics */
+
+#define IP_FW_NAT64LSN_CREATE 140 /* Create stateful NAT64 instance */
+#define IP_FW_NAT64LSN_DESTROY 141 /* Destroy stateful NAT64 instance */
+#define IP_FW_NAT64LSN_CONFIG 142 /* Modify stateful NAT64 instance */
+#define IP_FW_NAT64LSN_LIST 143 /* List stateful NAT64 instances */
+#define IP_FW_NAT64LSN_STATS 144 /* Get NAT64LSN instance statistics */
+#define IP_FW_NAT64LSN_LIST_STATES 145 /* Get stateful NAT64 states */
+#define IP_FW_NAT64LSN_RESET_STATS 146 /* Reset NAT64LSN instance statistics */
+
#define IP_FW_NPTV6_CREATE 150 /* Create NPTv6 instance */
#define IP_FW_NPTV6_DESTROY 151 /* Destroy NPTv6 instance */
#define IP_FW_NPTV6_CONFIG 152 /* Modify NPTv6 instance */
@@ -796,11 +811,17 @@ typedef struct _ipfw_obj_tlv {
#define IPFW_TLV_RANGE 9
#define IPFW_TLV_EACTION 10
#define IPFW_TLV_COUNTERS 11
+#define IPFW_TLV_OBJDATA 12
#define IPFW_TLV_STATE_NAME 14
#define IPFW_TLV_EACTION_BASE 1000
#define IPFW_TLV_EACTION_NAME(arg) (IPFW_TLV_EACTION_BASE + (arg))
+typedef struct _ipfw_obj_data {
+ ipfw_obj_tlv head;
+ void *data[0];
+} ipfw_obj_data;
+
/* Object name TLV */
typedef struct _ipfw_obj_ntlv {
ipfw_obj_tlv head; /* TLV header */
diff --git a/sys/netinet6/ip_fw_nat64.h b/sys/netinet6/ip_fw_nat64.h
new file mode 100644
index 0000000..a5c38b2
--- /dev/null
+++ b/sys/netinet6/ip_fw_nat64.h
@@ -0,0 +1,154 @@
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_IP_FW_NAT64_H_
+#define _NETINET6_IP_FW_NAT64_H_
+
+struct ipfw_nat64stl_stats {
+ uint64_t opcnt64; /* 6to4 of packets translated */
+ uint64_t opcnt46; /* 4to6 of packets translated */
+ uint64_t ofrags; /* number of fragments generated */
+ uint64_t ifrags; /* number of fragments received */
+ uint64_t oerrors; /* number of output errors */
+ uint64_t noroute4;
+ uint64_t noroute6;
+ uint64_t noproto; /* Protocol not supported */
+ uint64_t nomem; /* mbuf allocation filed */
+ uint64_t dropped; /* dropped due to some errors */
+};
+
+struct ipfw_nat64lsn_stats {
+ uint64_t opcnt64; /* 6to4 of packets translated */
+ uint64_t opcnt46; /* 4to6 of packets translated */
+ uint64_t ofrags; /* number of fragments generated */
+ uint64_t ifrags; /* number of fragments received */
+ uint64_t oerrors; /* number of output errors */
+ uint64_t noroute4;
+ uint64_t noroute6;
+ uint64_t noproto; /* Protocol not supported */
+ uint64_t nomem; /* mbuf allocation filed */
+ uint64_t dropped; /* dropped due to some errors */
+
+ uint64_t nomatch4; /* No addr/port match */
+ uint64_t jcalls; /* Number of job handler calls */
+ uint64_t jrequests; /* Number of job requests */
+ uint64_t jhostsreq; /* Number of job host requests */
+ uint64_t jportreq; /* Number of portgroup requests */
+ uint64_t jhostfails; /* Number of failed host allocs */
+ uint64_t jportfails; /* Number of failed portgroup allocs */
+ uint64_t jreinjected; /* Number of packets reinjected to q */
+ uint64_t jmaxlen; /* Max queue length reached */
+ uint64_t jnomem; /* No memory to alloc queue item */
+
+ uint64_t screated; /* Number of states created */
+ uint64_t sdeleted; /* Number of states deleted */
+ uint64_t spgcreated; /* Number of portgroups created */
+ uint64_t spgdeleted; /* Number of portgroups deleted */
+ uint64_t hostcount; /* Number of hosts */
+ uint64_t tcpchunks; /* Number of TCP chunks */
+ uint64_t udpchunks; /* Number of UDP chunks */
+ uint64_t icmpchunks; /* Number of ICMP chunks */
+
+ uint64_t _reserved[4];
+};
+
+#define NAT64_LOG 0x0001 /* Enable logging via BPF */
+
+typedef struct _ipfw_nat64stl_cfg {
+ char name[64]; /* NAT name */
+ ipfw_obj_ntlv ntlv6; /* object name tlv */
+ ipfw_obj_ntlv ntlv4; /* object name tlv */
+ struct in6_addr prefix6; /* NAT64 prefix */
+ uint8_t plen6; /* Prefix length */
+ uint8_t set; /* Named instance set [0..31] */
+ uint8_t spare[2];
+ uint32_t flags;
+} ipfw_nat64stl_cfg;
+
+/*
+ * NAT64LSN default configuration values
+ */
+#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */
+#define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */
+#define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */
+#define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */
+#define NAT64LSN_TCP_FIN_AGE 180 /* State's TTL after FIN/RST received */
+#define NAT64LSN_UDP_AGE 120 /* TTL for UDP states */
+#define NAT64LSN_ICMP_AGE 60 /* TTL for ICMP states */
+#define NAT64LSN_HOST_AGE 3600 /* TTL for stale host entry */
+#define NAT64LSN_PG_AGE 900 /* TTL for stale ports groups */
+
+typedef struct _ipfw_nat64lsn_cfg {
+ char name[64]; /* NAT name */
+ uint32_t flags;
+ uint32_t max_ports; /* Max ports per client */
+ uint32_t agg_prefix_len; /* Prefix length to count */
+ uint32_t agg_prefix_max; /* Max hosts per agg prefix */
+ struct in_addr prefix4;
+ uint16_t plen4; /* Prefix length */
+ uint16_t plen6; /* Prefix length */
+ struct in6_addr prefix6; /* NAT64 prefix */
+ uint32_t jmaxlen; /* Max jobqueue length */
+ uint16_t min_port; /* Min port group # to use */
+ uint16_t max_port; /* Max port group # to use */
+ uint16_t nh_delete_delay;/* Stale host delete delay */
+ uint16_t pg_delete_delay;/* Stale portgroup delete delay */
+ uint16_t st_syn_ttl; /* TCP syn expire */
+ uint16_t st_close_ttl; /* TCP fin expire */
+ uint16_t st_estab_ttl; /* TCP established expire */
+ uint16_t st_udp_ttl; /* UDP expire */
+ uint16_t st_icmp_ttl; /* ICMP expire */
+ uint8_t set; /* Named instance set [0..31] */
+ uint8_t spare;
+} ipfw_nat64lsn_cfg;
+
+typedef struct _ipfw_nat64lsn_state {
+ struct in_addr daddr; /* Remote IPv4 address */
+ uint16_t dport; /* Remote destination port */
+ uint16_t aport; /* Local alias port */
+ uint16_t sport; /* Source port */
+ uint8_t flags; /* State flags */
+ uint8_t spare[3];
+ uint16_t idle; /* Last used time */
+} ipfw_nat64lsn_state;
+
+typedef struct _ipfw_nat64lsn_stg {
+ uint64_t next_idx; /* next state index */
+ struct in_addr alias4; /* IPv4 alias address */
+ uint8_t proto; /* protocol */
+ uint8_t flags;
+ uint16_t spare;
+ struct in6_addr host6; /* Bound IPv6 host */
+ uint32_t count; /* Number of states */
+ uint32_t spare2;
+} ipfw_nat64lsn_stg;
+
+#endif /* _NETINET6_IP_FW_NAT64_H_ */
+
diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c
index 5054d7b..2c5eabf 100644
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@@ -2854,6 +2854,7 @@ vnet_ipfw_init(const void *unused)
#ifdef LINEAR_SKIPTO
ipfw_init_skipto_cache(chain);
#endif
+ ipfw_bpf_init(first);
/* First set up some values that are compile time options */
V_ipfw_vnet_ready = 1; /* Open for business */
@@ -2872,7 +2873,6 @@ vnet_ipfw_init(const void *unused)
* is checked on each packet because there are no pfil hooks.
*/
V_ip_fw_ctl_ptr = ipfw_ctl3;
- ipfw_log_bpf(1); /* init */
error = ipfw_attach_hooks(1);
return (error);
}
@@ -2896,8 +2896,6 @@ vnet_ipfw_uninit(const void *unused)
(void)ipfw_attach_hooks(0 /* detach */);
V_ip_fw_ctl_ptr = NULL;
- ipfw_log_bpf(0); /* uninit */
-
last = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
IPFW_UH_WLOCK(chain);
@@ -2926,6 +2924,7 @@ vnet_ipfw_uninit(const void *unused)
IPFW_LOCK_DESTROY(chain);
ipfw_dyn_uninit(1); /* free the remaining parts */
ipfw_destroy_counters();
+ ipfw_bpf_uninit(last);
return (0);
}
diff --git a/sys/netpfil/ipfw/ip_fw_bpf.c b/sys/netpfil/ipfw/ip_fw_bpf.c
new file mode 100644
index 0000000..6f8aa39
--- /dev/null
+++ b/sys/netpfil/ipfw/ip_fw_bpf.c
@@ -0,0 +1,209 @@
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_pflog.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+#include <net/bpf.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_var.h>
+#include <netpfil/ipfw/ip_fw_private.h>
+
+static VNET_DEFINE(struct ifnet *, log_if);
+static VNET_DEFINE(struct ifnet *, pflog_if);
+static VNET_DEFINE(struct if_clone *, ipfw_cloner);
+static VNET_DEFINE(struct if_clone *, ipfwlog_cloner);
+#define V_ipfw_cloner VNET(ipfw_cloner)
+#define V_ipfwlog_cloner VNET(ipfwlog_cloner)
+#define V_log_if VNET(log_if)
+#define V_pflog_if VNET(pflog_if)
+
+static struct rmlock log_if_lock;
+#define LOGIF_LOCK_INIT(x) rm_init(&log_if_lock, "ipfw log_if lock")
+#define LOGIF_LOCK_DESTROY(x) rm_destroy(&log_if_lock)
+#define LOGIF_RLOCK_TRACKER struct rm_priotracker _log_tracker
+#define LOGIF_RLOCK(x) rm_rlock(&log_if_lock, &_log_tracker)
+#define LOGIF_RUNLOCK(x) rm_runlock(&log_if_lock, &_log_tracker)
+#define LOGIF_WLOCK(x) rm_wlock(&log_if_lock)
+#define LOGIF_WUNLOCK(x) rm_wunlock(&log_if_lock)
+
+static const char ipfwname[] = "ipfw";
+static const char ipfwlogname[] = "ipfwlog";
+
+static int
+ipfw_bpf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+{
+
+ return (EINVAL);
+}
+
+static int
+ipfw_bpf_output(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro)
+{
+
+ if (m != NULL)
+ FREE_PKT(m);
+ return (0);
+}
+
+static void
+ipfw_clone_destroy(struct ifnet *ifp)
+{
+
+ LOGIF_WLOCK();
+ if (ifp->if_hdrlen == ETHER_HDR_LEN)
+ V_log_if = NULL;
+ else
+ V_pflog_if = NULL;
+ LOGIF_WUNLOCK();
+
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+}
+
+static int
+ipfw_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct ifnet *ifp;
+
+ ifp = if_alloc(IFT_PFLOG);
+ if (ifp == NULL)
+ return (ENOSPC);
+ if_initname(ifp, ipfwname, unit);
+ ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_mtu = 65536;
+ ifp->if_ioctl = ipfw_bpf_ioctl;
+ ifp->if_output = ipfw_bpf_output;
+ ifp->if_hdrlen = ETHER_HDR_LEN;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
+ LOGIF_WLOCK();
+ if (V_log_if != NULL) {
+ LOGIF_WUNLOCK();
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ return (EEXIST);
+ }
+ V_log_if = ifp;
+ LOGIF_WUNLOCK();
+ return (0);
+}
+
+static int
+ipfwlog_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct ifnet *ifp;
+
+ ifp = if_alloc(IFT_PFLOG);
+ if (ifp == NULL)
+ return (ENOSPC);
+ if_initname(ifp, ipfwlogname, unit);
+ ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_mtu = 65536;
+ ifp->if_ioctl = ipfw_bpf_ioctl;
+ ifp->if_output = ipfw_bpf_output;
+ ifp->if_hdrlen = PFLOG_HDRLEN;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN);
+ LOGIF_WLOCK();
+ if (V_pflog_if != NULL) {
+ LOGIF_WUNLOCK();
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ return (EEXIST);
+ }
+ V_pflog_if = ifp;
+ LOGIF_WUNLOCK();
+ return (0);
+}
+
+void
+ipfw_bpf_mtap2(void *data, u_int dlen, struct mbuf *m)
+{
+ LOGIF_RLOCK_TRACKER;
+
+ LOGIF_RLOCK();
+ if (dlen == ETHER_HDR_LEN) {
+ if (V_log_if == NULL) {
+ LOGIF_RUNLOCK();
+ return;
+ }
+ BPF_MTAP2(V_log_if, data, dlen, m);
+ } else if (dlen == PFLOG_HDRLEN) {
+ if (V_pflog_if == NULL) {
+ LOGIF_RUNLOCK();
+ return;
+ }
+ BPF_MTAP2(V_pflog_if, data, dlen, m);
+ }
+ LOGIF_RUNLOCK();
+}
+
+void
+ipfw_bpf_init(int first)
+{
+
+ if (first) {
+ LOGIF_LOCK_INIT();
+ V_log_if = NULL;
+ V_pflog_if = NULL;
+ }
+ V_ipfw_cloner = if_clone_simple(ipfwname, ipfw_clone_create,
+ ipfw_clone_destroy, 0);
+ V_ipfwlog_cloner = if_clone_simple(ipfwlogname, ipfwlog_clone_create,
+ ipfw_clone_destroy, 0);
+}
+
+void
+ipfw_bpf_uninit(int last)
+{
+
+ if_clone_detach(V_ipfw_cloner);
+ if_clone_detach(V_ipfwlog_cloner);
+ if (last)
+ LOGIF_LOCK_DESTROY();
+}
+
diff --git a/sys/netpfil/ipfw/ip_fw_log.c b/sys/netpfil/ipfw/ip_fw_log.c
index 7ef92cd..a8e53fe 100644
--- a/sys/netpfil/ipfw/ip_fw_log.c
+++ b/sys/netpfil/ipfw/ip_fw_log.c
@@ -40,20 +40,14 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
-#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
-#include <sys/lock.h>
-#include <sys/rwlock.h>
#include <net/ethernet.h> /* for ETHERTYPE_IP */
#include <net/if.h>
#include <net/if_var.h>
-#include <net/if_clone.h>
#include <net/vnet.h>
-#include <net/if_types.h> /* for IFT_PFLOG */
-#include <net/bpf.h> /* for BPF */
#include <netinet/in.h>
#include <netinet/ip.h>
@@ -96,155 +90,6 @@ __FBSDID("$FreeBSD$");
#define SNP(buf) buf, sizeof(buf)
#endif /* !__APPLE__ */
-#ifdef WITHOUT_BPF
-void
-ipfw_log_bpf(int onoff)
-{
-}
-#else /* !WITHOUT_BPF */
-static VNET_DEFINE(struct ifnet *, log_if); /* hook to attach to bpf */
-#define V_log_if VNET(log_if)
-static struct rwlock log_if_lock;
-#define LOGIF_LOCK_INIT(x) rw_init(&log_if_lock, "ipfw log_if lock")
-#define LOGIF_LOCK_DESTROY(x) rw_destroy(&log_if_lock)
-#define LOGIF_RLOCK(x) rw_rlock(&log_if_lock)
-#define LOGIF_RUNLOCK(x) rw_runlock(&log_if_lock)
-#define LOGIF_WLOCK(x) rw_wlock(&log_if_lock)
-#define LOGIF_WUNLOCK(x) rw_wunlock(&log_if_lock)
-
-static const char ipfwname[] = "ipfw";
-
-/* we use this dummy function for all ifnet callbacks */
-static int
-log_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
-{
- return EINVAL;
-}
-
-static int
-ipfw_log_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro)
-{
- if (m != NULL)
- FREE_PKT(m);
- return EINVAL;
-}
-
-static void
-ipfw_log_start(struct ifnet* ifp)
-{
- panic("ipfw_log_start() must not be called");
-}
-
-static const u_char ipfwbroadcastaddr[6] =
- { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-
-static int
-ipfw_log_clone_match(struct if_clone *ifc, const char *name)
-{
-
- return (strncmp(name, ipfwname, sizeof(ipfwname) - 1) == 0);
-}
-
-static int
-ipfw_log_clone_create(struct if_clone *ifc, char *name, size_t len,
- caddr_t params)
-{
- int error;
- int unit;
- struct ifnet *ifp;
-
- error = ifc_name2unit(name, &unit);
- if (error)
- return (error);
-
- error = ifc_alloc_unit(ifc, &unit);
- if (error)
- return (error);
-
- ifp = if_alloc(IFT_PFLOG);
- if (ifp == NULL) {
- ifc_free_unit(ifc, unit);
- return (ENOSPC);
- }
- ifp->if_dname = ipfwname;
- ifp->if_dunit = unit;
- snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", ipfwname, unit);
- strlcpy(name, ifp->if_xname, len);
- ifp->if_mtu = 65536;
- ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_init = (void *)log_dummy;
- ifp->if_ioctl = log_dummy;
- ifp->if_start = ipfw_log_start;
- ifp->if_output = ipfw_log_output;
- ifp->if_addrlen = 6;
- ifp->if_hdrlen = 14;
- ifp->if_broadcastaddr = ipfwbroadcastaddr;
- ifp->if_baudrate = IF_Mbps(10);
-
- LOGIF_WLOCK();
- if (V_log_if == NULL)
- V_log_if = ifp;
- else {
- LOGIF_WUNLOCK();
- if_free(ifp);
- ifc_free_unit(ifc, unit);
- return (EEXIST);
- }
- LOGIF_WUNLOCK();
- if_attach(ifp);
- bpfattach(ifp, DLT_EN10MB, 14);
-
- return (0);
-}
-
-static int
-ipfw_log_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
-{
- int unit;
-
- if (ifp == NULL)
- return (0);
-
- LOGIF_WLOCK();
- if (V_log_if != NULL && ifp == V_log_if)
- V_log_if = NULL;
- else {
- LOGIF_WUNLOCK();
- return (EINVAL);
- }
- LOGIF_WUNLOCK();
-
- unit = ifp->if_dunit;
- bpfdetach(ifp);
- if_detach(ifp);
- if_free(ifp);
- ifc_free_unit(ifc, unit);
-
- return (0);
-}
-
-static VNET_DEFINE(struct if_clone *, ipfw_log_cloner);
-#define V_ipfw_log_cloner VNET(ipfw_log_cloner)
-
-void
-ipfw_log_bpf(int onoff)
-{
-
- if (onoff) {
- if (IS_DEFAULT_VNET(curvnet))
- LOGIF_LOCK_INIT();
- V_ipfw_log_cloner = if_clone_advanced(ipfwname, 0,
- ipfw_log_clone_match, ipfw_log_clone_create,
- ipfw_log_clone_destroy);
- } else {
- if_clone_detach(V_ipfw_log_cloner);
- if (IS_DEFAULT_VNET(curvnet))
- LOGIF_LOCK_DESTROY();
- }
-}
-#endif /* !WITHOUT_BPF */
-
#define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f)
/*
* We enter here when we have a rule with O_LOG.
@@ -260,29 +105,23 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
char action2[92], proto[128], fragment[32];
if (V_fw_verbose == 0) {
-#ifndef WITHOUT_BPF
- LOGIF_RLOCK();
- if (V_log_if == NULL || V_log_if->if_bpf == NULL) {
- LOGIF_RUNLOCK();
- return;
- }
-
if (args->eh) /* layer2, use orig hdr */
- BPF_MTAP2(V_log_if, args->eh, ETHER_HDR_LEN, m);
+ ipfw_bpf_mtap2(args->eh, ETHER_HDR_LEN, m);
else {
/* Add fake header. Later we will store
* more info in the header.
*/
if (ip->ip_v == 4)
- BPF_MTAP2(V_log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
- else if (ip->ip_v == 6)
- BPF_MTAP2(V_log_if, "DDDDDDSSSSSS\x86\xdd", ETHER_HDR_LEN, m);
+ ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00",
+ ETHER_HDR_LEN, m);
+ else if (ip->ip_v == 6)
+ ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd",
+ ETHER_HDR_LEN, m);
else
/* Obviously bogus EtherType. */
- BPF_MTAP2(V_log_if, "DDDDDDSSSSSS\xff\xff", ETHER_HDR_LEN, m);
+ ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff",
+ ETHER_HDR_LEN, m);
}
- LOGIF_RUNLOCK();
-#endif /* !WITHOUT_BPF */
return;
}
/* the old 'log' function */
diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h
index bbc0114..d670a49 100644
--- a/sys/netpfil/ipfw/ip_fw_private.h
+++ b/sys/netpfil/ipfw/ip_fw_private.h
@@ -154,7 +154,9 @@ void ipfw_nat_destroy(void);
/* In ip_fw_log.c */
struct ip;
struct ip_fw_chain;
-void ipfw_log_bpf(int);
+void ipfw_bpf_init(int);
+void ipfw_bpf_uninit(int);
+void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif,
u_short offset, uint32_t tablearg, struct ip *ip);
@@ -741,6 +743,10 @@ typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen,
int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
void *paddr, uint32_t *val);
+struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch,
+ uint16_t kidx);
+int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx);
+void ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx);
int ipfw_init_tables(struct ip_fw_chain *ch, int first);
int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables);
int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets);
diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c
index c932e7e..d187211 100644
--- a/sys/netpfil/ipfw/ip_fw_table.c
+++ b/sys/netpfil/ipfw/ip_fw_table.c
@@ -1606,6 +1606,57 @@ ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
}
/*
+ * Lookup table's named object by its @kidx.
+ */
+struct named_object *
+ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx)
+{
+
+ return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
+}
+
+/*
+ * Take reference to table specified in @ntlv.
+ * On success return its @kidx.
+ */
+int
+ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx)
+{
+ struct tid_info ti;
+ struct table_config *tc;
+ int error;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ntlv_to_ti(ntlv, &ti);
+ error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc);
+ if (error != 0)
+ return (error);
+
+ if (tc == NULL)
+ return (ESRCH);
+
+ tc_ref(tc);
+ *kidx = tc->no.kidx;
+
+ return (0);
+}
+
+void
+ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx)
+{
+
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_NI(ch);
+ no = ipfw_objhash_lookup_kidx(ni, kidx);
+ KASSERT(no != NULL, ("Table with index %d not found", kidx));
+ no->refcnt--;
+}
+
+/*
* Lookup an arbtrary key @paddr of legth @plen in table @tbl.
* Stores found value in @val.
*
diff --git a/sys/netpfil/ipfw/nat64/ip_fw_nat64.c b/sys/netpfil/ipfw/nat64/ip_fw_nat64.c
new file mode 100644
index 0000000..58c4427
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/ip_fw_nat64.c
@@ -0,0 +1,129 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+
+
+int nat64_debug = 0;
+SYSCTL_DECL(_net_inet_ip_fw);
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_debug, CTLFLAG_RW,
+ &nat64_debug, 0, "Debug level for NAT64 module");
+
+int nat64_allow_private = 0;
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_allow_private, CTLFLAG_RW,
+ &nat64_allow_private, 0,
+ "Allow use of non-global IPv4 addresses with NAT64");
+
+static int
+vnet_ipfw_nat64_init(const void *arg __unused)
+{
+ struct ip_fw_chain *ch;
+ int first, error;
+
+ ch = &V_layer3_chain;
+ first = IS_DEFAULT_VNET(curvnet) ? 1: 0;
+ error = nat64stl_init(ch, first);
+ if (error != 0)
+ return (error);
+ error = nat64lsn_init(ch, first);
+ if (error != 0) {
+ nat64stl_uninit(ch, first);
+ return (error);
+ }
+ return (0);
+}
+
+static int
+vnet_ipfw_nat64_uninit(const void *arg __unused)
+{
+ struct ip_fw_chain *ch;
+ int last;
+
+ ch = &V_layer3_chain;
+ last = IS_DEFAULT_VNET(curvnet) ? 1: 0;
+ nat64stl_uninit(ch, last);
+ nat64lsn_uninit(ch, last);
+ return (0);
+}
+
+static int
+ipfw_nat64_modevent(module_t mod, int type, void *unused)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t ipfw_nat64_mod = {
+ "ipfw_nat64",
+ ipfw_nat64_modevent,
+ 0
+};
+
+/* Define startup order. */
+#define IPFW_NAT64_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
+#define IPFW_NAT64_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */
+#define IPFW_NAT64_MODULE_ORDER (IPFW_NAT64_MODEVENT_ORDER + 1)
+#define IPFW_NAT64_VNET_ORDER (IPFW_NAT64_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nat64, ipfw_nat64_mod, IPFW_NAT64_SI_SUB_FIREWALL,
+ SI_ORDER_ANY);
+MODULE_DEPEND(ipfw_nat64, ipfw, 3, 3, 3);
+MODULE_VERSION(ipfw_nat64, 1);
+
+VNET_SYSINIT(vnet_ipfw_nat64_init, IPFW_NAT64_SI_SUB_FIREWALL,
+ IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_init, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nat64_uninit, IPFW_NAT64_SI_SUB_FIREWALL,
+ IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_uninit, NULL);
diff --git a/sys/netpfil/ipfw/nat64/ip_fw_nat64.h b/sys/netpfil/ipfw/nat64/ip_fw_nat64.h
new file mode 100644
index 0000000..1d2bb77
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/ip_fw_nat64.h
@@ -0,0 +1,117 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64_H_
+#define _IP_FW_NAT64_H_
+
+#define DPRINTF(mask, fmt, ...) \
+ if (nat64_debug & (mask)) \
+ printf("NAT64: %s: " fmt "\n", __func__, ## __VA_ARGS__)
+#define DP_GENERIC 0x0001
+#define DP_OBJ 0x0002
+#define DP_JQUEUE 0x0004
+#define DP_STATE 0x0008
+#define DP_DROPS 0x0010
+#define DP_ALL 0xFFFF
+extern int nat64_debug;
+
+#if 0
+#define NAT64NOINLINE __noinline
+#else
+#define NAT64NOINLINE
+#endif
+
+int nat64stl_init(struct ip_fw_chain *ch, int first);
+void nat64stl_uninit(struct ip_fw_chain *ch, int last);
+int nat64lsn_init(struct ip_fw_chain *ch, int first);
+void nat64lsn_uninit(struct ip_fw_chain *ch, int last);
+
+struct ip_fw_nat64_stats {
+ counter_u64_t opcnt64; /* 6to4 of packets translated */
+ counter_u64_t opcnt46; /* 4to6 of packets translated */
+ counter_u64_t ofrags; /* number of fragments generated */
+ counter_u64_t ifrags; /* number of fragments received */
+ counter_u64_t oerrors; /* number of output errors */
+ counter_u64_t noroute4;
+ counter_u64_t noroute6;
+ counter_u64_t nomatch4; /* No addr/port match */
+ counter_u64_t noproto; /* Protocol not supported */
+ counter_u64_t nomem; /* mbufs allocation failed */
+ counter_u64_t dropped; /* number of packets silently
+ * dropped due to some errors/
+ * unsupported/etc.
+ */
+
+ counter_u64_t jrequests; /* number of jobs requests queued */
+ counter_u64_t jcalls; /* number of jobs handler calls */
+ counter_u64_t jhostsreq; /* number of hosts requests */
+ counter_u64_t jportreq;
+ counter_u64_t jhostfails;
+ counter_u64_t jportfails;
+ counter_u64_t jmaxlen;
+ counter_u64_t jnomem;
+ counter_u64_t jreinjected;
+
+ counter_u64_t screated;
+ counter_u64_t sdeleted;
+ counter_u64_t spgcreated;
+ counter_u64_t spgdeleted;
+};
+
+#define IPFW_NAT64_VERSION 1
+#define NAT64STATS (sizeof(struct ip_fw_nat64_stats) / sizeof(uint64_t))
+typedef struct _nat64_stats_block {
+ counter_u64_t stats[NAT64STATS];
+} nat64_stats_block;
+#define NAT64STAT_ADD(s, f, v) \
+ counter_u64_add((s)->stats[ \
+ offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)], (v))
+#define NAT64STAT_INC(s, f) NAT64STAT_ADD(s, f, 1)
+#define NAT64STAT_FETCH(s, f) \
+ counter_u64_fetch((s)->stats[ \
+ offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)])
+
+#define L3HDR(_ip, _t) ((_t)((u_int32_t *)(_ip) + (_ip)->ip_hl))
+#define TCP(p) ((struct tcphdr *)(p))
+#define UDP(p) ((struct udphdr *)(p))
+#define ICMP(p) ((struct icmphdr *)(p))
+#define ICMP6(p) ((struct icmp6_hdr *)(p))
+
+#define NAT64SKIP 0
+#define NAT64RETURN 1
+#define NAT64MFREE -1
+
+/* Well-known prefix 64:ff9b::/96 */
+#define IPV6_ADDR_INT32_WKPFX htonl(0x64ff9b)
+#define IN6_IS_ADDR_WKPFX(a) \
+ ((a)->s6_addr32[0] == IPV6_ADDR_INT32_WKPFX && \
+ (a)->s6_addr32[1] == 0 && (a)->s6_addr32[2] == 0)
+
+#endif
+
diff --git a/sys/netpfil/ipfw/nat64/nat64_translate.c b/sys/netpfil/ipfw/nat64/nat64_translate.c
new file mode 100644
index 0000000..aefd0f9
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64_translate.c
@@ -0,0 +1,1572 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_ipfw.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <machine/in_cksum.h>
+
+static void
+nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
+{
+
+ logdata->dir = PF_OUT;
+ logdata->af = family;
+ ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
+}
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+static NAT64NOINLINE struct sockaddr* nat64_find_route4(struct route *ro,
+ in_addr_t dest, struct mbuf *m);
+static NAT64NOINLINE struct sockaddr* nat64_find_route6(struct route_in6 *ro,
+ struct in6_addr *dest, struct mbuf *m);
+
+static NAT64NOINLINE int
+nat64_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
+ void *logdata)
+{
+ int error;
+
+ if (logdata != NULL)
+ nat64_log(logdata, m, dst->sa_family);
+ error = (*ifp->if_output)(ifp, m, dst, ro);
+ if (error != 0)
+ NAT64STAT_INC(stats, oerrors);
+ return (error);
+}
+
+static NAT64NOINLINE int
+nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
+{
+ struct route_in6 ro6;
+ struct route ro4, *ro;
+ struct sockaddr *dst;
+ struct ifnet *ifp;
+ struct ip6_hdr *ip6;
+ struct ip *ip4;
+ int error;
+
+ ip4 = mtod(m, struct ip *);
+ switch (ip4->ip_v) {
+ case IPVERSION:
+ ro = &ro4;
+ dst = nat64_find_route4(&ro4, ip4->ip_dst.s_addr, m);
+ if (dst == NULL)
+ NAT64STAT_INC(stats, noroute4);
+ break;
+ case (IPV6_VERSION >> 4):
+ ip6 = (struct ip6_hdr *)ip4;
+ ro = (struct route *)&ro6;
+ dst = nat64_find_route6(&ro6, &ip6->ip6_dst, m);
+ if (dst == NULL)
+ NAT64STAT_INC(stats, noroute6);
+ break;
+ default:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "dropped due to unknown IP version");
+ return (EAFNOSUPPORT);
+ }
+ if (dst == NULL) {
+ FREE_ROUTE(ro);
+ m_freem(m);
+ return (EHOSTUNREACH);
+ }
+ if (logdata != NULL)
+ nat64_log(logdata, m, dst->sa_family);
+ ifp = ro->ro_rt->rt_ifp;
+ error = (*ifp->if_output)(ifp, m, dst, ro);
+ if (error != 0)
+ NAT64STAT_INC(stats, oerrors);
+ FREE_ROUTE(ro);
+ return (error);
+}
+#else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
+static NAT64NOINLINE int
+nat64_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
+ void *logdata)
+{
+ struct ip *ip4;
+ int ret, af;
+
+ ip4 = mtod(m, struct ip *);
+ switch (ip4->ip_v) {
+ case IPVERSION:
+ af = AF_INET;
+ ret = NETISR_IP;
+ break;
+ case (IPV6_VERSION >> 4):
+ af = AF_INET6;
+ ret = NETISR_IPV6;
+ break;
+ default:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "unknown IP version");
+ return (EAFNOSUPPORT);
+ }
+ if (logdata != NULL)
+ nat64_log(logdata, m, af);
+ ret = netisr_queue(ret, m);
+ if (ret != 0)
+ NAT64STAT_INC(stats, oerrors);
+ return (ret);
+}
+
+static NAT64NOINLINE int
+nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
+{
+
+ return (nat64_output(NULL, m, NULL, NULL, stats, logdata));
+}
+#endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
+
+
+#if 0
+void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize);
+
+void
+print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize)
+{
+ char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf));
+ inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf));
+ snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt);
+}
+
+
+static NAT64NOINLINE int
+nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6)
+{
+
+ /* assume the prefix is properly filled with zeros */
+ bcopy(&cfg->prefix, ip6, sizeof(*ip6));
+ switch (cfg->plen) {
+ case 32:
+ case 96:
+ ip6->s6_addr32[cfg->plen / 32] = ia;
+ break;
+ case 40:
+ case 48:
+ case 56:
+#if BYTE_ORDER == BIG_ENDIAN
+ ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
+ (ia >> (cfg->plen % 32));
+ ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
+ (ia << (cfg->plen % 32));
+ ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32);
+#endif
+ break;
+ case 64:
+#if BYTE_ORDER == BIG_ENDIAN
+ ip6->s6_addr32[2] = ia >> 8;
+ ip6->s6_addr32[3] = ia << 24;
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ip6->s6_addr32[2] = ia << 8;
+ ip6->s6_addr32[3] = ia >> 24;
+#endif
+ break;
+ default:
+ return (0);
+ };
+ ip6->s6_addr8[8] = 0;
+ return (1);
+}
+
+static NAT64NOINLINE in_addr_t
+nat64_extract_ip4(struct in6_addr *ip6, int plen)
+{
+ in_addr_t ia;
+
+ /*
+ * According to RFC 6052 p2.2:
+ * IPv4-embedded IPv6 addresses are composed of a variable-length
+ * prefix, the embedded IPv4 address, and a variable length suffix.
+ * The suffix bits are reserved for future extensions and SHOULD
+ * be set to zero.
+ */
+ switch (plen) {
+ case 32:
+ if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
+ goto badip6;
+ break;
+ case 40:
+ if (ip6->s6_addr32[3] != 0 ||
+ (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
+ goto badip6;
+ break;
+ case 48:
+ if (ip6->s6_addr32[3] != 0 ||
+ (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
+ goto badip6;
+ break;
+ case 56:
+ if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
+ goto badip6;
+ break;
+ case 64:
+ if (ip6->s6_addr8[8] != 0 ||
+ (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
+ goto badip6;
+ };
+ switch (plen) {
+ case 32:
+ case 96:
+ ia = ip6->s6_addr32[plen / 32];
+ break;
+ case 40:
+ case 48:
+ case 56:
+#if BYTE_ORDER == BIG_ENDIAN
+ ia = (ip6->s6_addr32[1] << (plen % 32)) |
+ (ip6->s6_addr32[2] >> (24 - plen % 32));
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ia = (ip6->s6_addr32[1] >> (plen % 32)) |
+ (ip6->s6_addr32[2] << (24 - plen % 32));
+#endif
+ break;
+ case 64:
+#if BYTE_ORDER == BIG_ENDIAN
+ ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
+#endif
+ break;
+ default:
+ return (0);
+ };
+ if (nat64_check_ip4(ia) != 0 ||
+ nat64_check_private_ip4(ia) != 0)
+ goto badip4;
+
+ return (ia);
+badip4:
+ DPRINTF(DP_GENERIC, "invalid destination address: %08x", ia);
+ return (0);
+badip6:
+ DPRINTF(DP_GENERIC, "invalid IPv4-embedded IPv6 address");
+ return (0);
+}
+#endif
+
+/*
+ * According to RFC 1624 the equation for incremental checksum update is:
+ * HC' = ~(~HC + ~m + m') -- [Eqn. 3]
+ * HC' = HC - ~m - m' -- [Eqn. 4]
+ * So, when we are replacing IPv4 addresses to IPv6, we
+ * can assume, that new bytes previously were zeros, and vise versa -
+ * when we replacing IPv6 addresses to IPv4, now unused bytes become
+ * zeros. The payload length in pseudo header has bigger size, but one
+ * half of it should be zero. Using the equation 4 we get:
+ * HC' = HC - (~m0 + m0') -- m0 is first changed word
+ * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word
+ * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
+ * = HC - sum(~m[i] + m'[i])
+ *
+ * The function result should be used as follows:
+ * IPv6 to IPv4: HC' = cksum_add(HC, result)
+ * IPv4 to IPv6: HC' = cksum_add(HC, ~result)
+ */
+static NAT64NOINLINE uint16_t
+nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
+{
+ uint32_t sum;
+ uint16_t *p;
+
+ sum = ~ip->ip_src.s_addr >> 16;
+ sum += ~ip->ip_src.s_addr & 0xffff;
+ sum += ~ip->ip_dst.s_addr >> 16;
+ sum += ~ip->ip_dst.s_addr & 0xffff;
+
+ for (p = (uint16_t *)&ip6->ip6_src;
+ p < (uint16_t *)(&ip6->ip6_src + 2); p++)
+ sum += *p;
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (sum);
+}
+
+#if __FreeBSD_version < 1100000
+#define ip_fillid(ip) (ip)->ip_id = ip_newid()
+#endif
+static NAT64NOINLINE void
+nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
+ uint16_t plen, uint8_t proto, struct ip *ip)
+{
+
+ /* assume addresses are already initialized */
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ ip->ip_len = htons(sizeof(*ip) + plen);
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+ ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC;
+#else
+ /* Forwarding code will decrement TTL. */
+ ip->ip_ttl = ip6->ip6_hlim;
+#endif
+ ip->ip_sum = 0;
+ ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
+ ip_fillid(ip);
+ if (frag != NULL) {
+ ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
+ if (frag->ip6f_offlg & IP6F_MORE_FRAG)
+ ip->ip_off |= htons(IP_MF);
+ } else {
+ ip->ip_off = htons(IP_DF);
+ }
+ ip->ip_sum = in_cksum_hdr(ip);
+}
+
+#define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
+static NAT64NOINLINE int
+nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq,
+ struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off)
+{
+ struct ip6_frag ip6f;
+ struct mbuf *n;
+ uint16_t hlen, len, offset;
+ int plen;
+
+ plen = ntohs(ip6->ip6_plen);
+ hlen = sizeof(struct ip6_hdr);
+
+ /* Fragmentation isn't needed */
+ if (ip_off == 0 && plen <= mtu - hlen) {
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (ENOMEM);
+ }
+ bcopy(ip6, mtod(m, void *), hlen);
+ if (mbufq_enqueue(mq, m) != 0) {
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
+ return (ENOBUFS);
+ }
+ return (0);
+ }
+
+ hlen += sizeof(struct ip6_frag);
+ ip6f.ip6f_reserved = 0;
+ ip6f.ip6f_nxt = ip6->ip6_nxt;
+ ip6->ip6_nxt = IPPROTO_FRAGMENT;
+ if (ip_off != 0) {
+ /*
+ * We have got an IPv4 fragment.
+ * Use offset value and ip_id from original fragment.
+ */
+ ip6f.ip6f_ident = htonl(ntohs(ip_id));
+ offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
+ NAT64STAT_INC(stats, ifrags);
+ } else {
+ /* The packet size exceeds interface MTU */
+ ip6f.ip6f_ident = htonl(ip6_randomid());
+ offset = 0; /* First fragment*/
+ }
+ while (plen > 0 && m != NULL) {
+ n = NULL;
+ len = FRAGSZ(mtu) & ~7;
+ if (len > plen)
+ len = plen;
+ ip6->ip6_plen = htons(len + sizeof(ip6f));
+ ip6f.ip6f_offlg = ntohs(offset);
+ if (len < plen || (ip_off & htons(IP_MF)) != 0)
+ ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
+ offset += len;
+ plen -= len;
+ if (plen > 0) {
+ n = m_split(m, len, M_NOWAIT);
+ if (n == NULL)
+ goto fail;
+ }
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL)
+ goto fail;
+ bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
+ bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
+ sizeof(struct ip6_frag));
+ if (mbufq_enqueue(mq, m) != 0)
+ goto fail;
+ m = n;
+ }
+ NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
+ return (0);
+fail:
+ if (m != NULL)
+ m_freem(m);
+ if (n != NULL)
+ m_freem(n);
+ mbufq_drain(mq);
+ NAT64STAT_INC(stats, nomem);
+ return (ENOMEM);
+}
+
+#if __FreeBSD_version < 1100000
+#define rt_expire rt_rmx.rmx_expire
+#define rt_mtu rt_rmx.rmx_mtu
+#endif
+static NAT64NOINLINE struct sockaddr*
+nat64_find_route6(struct route_in6 *ro, struct in6_addr *dest, struct mbuf *m)
+{
+ struct sockaddr_in6 *dst;
+ struct rtentry *rt;
+
+ bzero(ro, sizeof(*ro));
+ dst = (struct sockaddr_in6 *)&ro->ro_dst;
+ dst->sin6_family = AF_INET6;
+ dst->sin6_len = sizeof(*dst);
+ dst->sin6_addr = *dest;
+ IN6_LOOKUP_ROUTE(ro, M_GETFIB(m));
+ rt = ro->ro_rt;
+ if (rt && (rt->rt_flags & RTF_UP) &&
+ (rt->rt_ifp->if_flags & IFF_UP) &&
+ (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ dst = (struct sockaddr_in6 *)rt->rt_gateway;
+ } else
+ return (NULL);
+ if (((rt->rt_flags & RTF_REJECT) &&
+ (rt->rt_expire == 0 ||
+ time_uptime < rt->rt_expire)) ||
+ rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
+ return (NULL);
+ return ((struct sockaddr *)dst);
+}
+
+#define NAT64_ICMP6_PLEN 64
+static NAT64NOINLINE void
+nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
+ nat64_stats_block *stats, void *logdata)
+{
+ struct icmp6_hdr *icmp6;
+ struct ip6_hdr *ip6, *oip6;
+ struct mbuf *n;
+ int len, plen;
+
+ len = 0;
+ plen = nat64_getlasthdr(m, &len);
+ if (plen < 0) {
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ goto freeit;
+ }
+ /*
+ * Do not send ICMPv6 in reply to ICMPv6 errors.
+ */
+ if (plen == IPPROTO_ICMPV6) {
+ if (m->m_len < len + sizeof(*icmp6)) {
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ goto freeit;
+ }
+ icmp6 = mtodo(m, len);
+ if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
+ icmp6->icmp6_type == ND_REDIRECT) {
+ DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
+ "ICMPv6 errors");
+ goto freeit;
+ }
+ }
+ /*
+ if (icmp6_ratelimit(&ip6->ip6_src, type, code))
+ goto freeit;
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ switch (type) {
+ case ICMP6_DST_UNREACH:
+ case ICMP6_PACKET_TOO_BIG:
+ case ICMP6_TIME_EXCEEDED:
+ case ICMP6_PARAM_PROB:
+ break;
+ default:
+ goto freeit;
+ }
+ /* Calculate length of ICMPv6 payload */
+ len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
+ m->m_pkthdr.len;
+
+ /* Create new ICMPv6 datagram */
+ plen = len + sizeof(struct icmp6_hdr);
+ n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
+ MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return;
+ }
+ /*
+ * Move pkthdr from original mbuf. We should have initialized some
+ * fields, because we can reinject this mbuf to netisr and it will
+ * go trough input path (it requires at least rcvif should be set).
+ * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
+ * in the chain, when we will do M_PREPEND() or make some type of
+ * tunneling.
+ */
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
+
+ n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
+ oip6 = mtod(n, struct ip6_hdr *);
+ oip6->ip6_src = ip6->ip6_dst;
+ oip6->ip6_dst = ip6->ip6_src;
+ oip6->ip6_nxt = IPPROTO_ICMPV6;
+ oip6->ip6_flow = 0;
+ oip6->ip6_vfc |= IPV6_VERSION;
+ oip6->ip6_hlim = V_ip6_defhlim;
+ oip6->ip6_plen = htons(plen);
+
+ icmp6 = mtodo(n, sizeof(struct ip6_hdr));
+ icmp6->icmp6_cksum = 0;
+ icmp6->icmp6_type = type;
+ icmp6->icmp6_code = code;
+ icmp6->icmp6_mtu = htonl(mtu);
+
+ m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
+ sizeof(struct icmp6_hdr)));
+ icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
+ sizeof(struct ip6_hdr), plen);
+ m_freem(m);
+ nat64_output_one(n, stats, logdata);
+ return;
+freeit:
+ NAT64STAT_INC(stats, dropped);
+ m_freem(m);
+}
+
+static NAT64NOINLINE struct sockaddr*
+nat64_find_route4(struct route *ro, in_addr_t dest, struct mbuf *m)
+{
+ struct sockaddr_in *dst;
+ struct rtentry *rt;
+
+ bzero(ro, sizeof(*ro));
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr.s_addr = dest;
+ IN_LOOKUP_ROUTE(ro, M_GETFIB(m));
+ rt = ro->ro_rt;
+ if (rt && (rt->rt_flags & RTF_UP) &&
+ (rt->rt_ifp->if_flags & IFF_UP) &&
+ (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ dst = (struct sockaddr_in *)rt->rt_gateway;
+ } else
+ return (NULL);
+ if (((rt->rt_flags & RTF_REJECT) &&
+ (rt->rt_expire == 0 ||
+ time_uptime < rt->rt_expire)) ||
+ rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
+ return (NULL);
+ return ((struct sockaddr *)dst);
+}
+
+#define NAT64_ICMP_PLEN 64
+static NAT64NOINLINE void
+nat64_icmp_reflect(struct mbuf *m, uint8_t type,
+ uint8_t code, uint16_t mtu, nat64_stats_block *stats, void *logdata)
+{
+ struct icmp *icmp;
+ struct ip *ip, *oip;
+ struct mbuf *n;
+ int len, plen;
+
+ ip = mtod(m, struct ip *);
+ /* Do not send ICMP error if packet is not the first fragment */
+ if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
+ DPRINTF(DP_DROPS, "not first fragment");
+ goto freeit;
+ }
+ /* Do not send ICMP in reply to ICMP errors */
+ if (ip->ip_p == IPPROTO_ICMP) {
+ if (m->m_len < (ip->ip_hl << 2)) {
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ goto freeit;
+ }
+ icmp = mtodo(m, ip->ip_hl << 2);
+ if (!ICMP_INFOTYPE(icmp->icmp_type)) {
+ DPRINTF(DP_DROPS, "do not send ICMP in reply to "
+ "ICMP errors");
+ goto freeit;
+ }
+ }
+ switch (type) {
+ case ICMP_UNREACH:
+ case ICMP_TIMXCEED:
+ case ICMP_PARAMPROB:
+ break;
+ default:
+ goto freeit;
+ }
+ /* Calculate length of ICMP payload */
+ len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
+ m->m_pkthdr.len;
+
+ /* Create new ICMPv4 datagram */
+ plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
+ n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
+ MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return;
+ }
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
+
+ n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
+ oip = mtod(n, struct ip *);
+ oip->ip_v = IPVERSION;
+ oip->ip_hl = sizeof(struct ip) >> 2;
+ oip->ip_tos = 0;
+ oip->ip_len = htons(n->m_pkthdr.len);
+ oip->ip_ttl = V_ip_defttl;
+ oip->ip_p = IPPROTO_ICMP;
+ ip_fillid(oip);
+ oip->ip_off = htons(IP_DF);
+ oip->ip_src = ip->ip_dst;
+ oip->ip_dst = ip->ip_src;
+ oip->ip_sum = 0;
+ oip->ip_sum = in_cksum_hdr(oip);
+
+ icmp = mtodo(n, sizeof(struct ip));
+ icmp->icmp_type = type;
+ icmp->icmp_code = code;
+ icmp->icmp_cksum = 0;
+ icmp->icmp_pmvoid = 0;
+ icmp->icmp_nextmtu = htons(mtu);
+ m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
+ sizeof(struct icmphdr) + sizeof(uint32_t)));
+ icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
+ sizeof(struct ip));
+ m_freem(m);
+ nat64_output_one(n, stats, logdata);
+ return;
+freeit:
+ NAT64STAT_INC(stats, dropped);
+ m_freem(m);
+}
+
+/* Translate ICMP echo request/reply into ICMPv6 */
+static void
+nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
+ uint16_t id, uint8_t type)
+{
+ uint16_t old;
+
+ old = *(uint16_t *)icmp6; /* save type+code in one word */
+ icmp6->icmp6_type = type;
+ /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
+ icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
+ old, *(uint16_t *)icmp6);
+ if (id != 0) {
+ old = icmp6->icmp6_id;
+ icmp6->icmp6_id = id;
+ /* Reflect ICMP id translation in the cksum */
+ icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
+ old, id);
+ }
+ /* Reflect IPv6 pseudo header in the cksum */
+ icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
+ IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
+}
+
+static NAT64NOINLINE struct mbuf *
+nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
+ int offset, nat64_stats_block *stats)
+{
+ struct ip ip;
+ struct icmp *icmp;
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ struct ip6_hdr *eip6;
+ struct mbuf *n;
+ uint32_t mtu;
+ int len, hlen, plen;
+ uint8_t type, code;
+
+ if (m->m_len < offset + ICMP_MINLEN)
+ m = m_pullup(m, offset + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (m);
+ }
+ mtu = 0;
+ icmp = mtodo(m, offset);
+ /* RFC 7915 p4.2 */
+ switch (icmp->icmp_type) {
+ case ICMP_ECHOREPLY:
+ type = ICMP6_ECHO_REPLY;
+ code = 0;
+ break;
+ case ICMP_UNREACH:
+ type = ICMP6_DST_UNREACH;
+ switch (icmp->icmp_code) {
+ case ICMP_UNREACH_NET:
+ case ICMP_UNREACH_HOST:
+ case ICMP_UNREACH_SRCFAIL:
+ case ICMP_UNREACH_NET_UNKNOWN:
+ case ICMP_UNREACH_HOST_UNKNOWN:
+ case ICMP_UNREACH_TOSNET:
+ case ICMP_UNREACH_TOSHOST:
+ code = ICMP6_DST_UNREACH_NOROUTE;
+ break;
+ case ICMP_UNREACH_PROTOCOL:
+ type = ICMP6_PARAM_PROB;
+ code = ICMP6_PARAMPROB_NEXTHEADER;
+ break;
+ case ICMP_UNREACH_PORT:
+ code = ICMP6_DST_UNREACH_NOPORT;
+ break;
+ case ICMP_UNREACH_NEEDFRAG:
+ type = ICMP6_PACKET_TOO_BIG;
+ code = 0;
+ /* XXX: needs an additional look */
+ mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
+ break;
+ case ICMP_UNREACH_NET_PROHIB:
+ case ICMP_UNREACH_HOST_PROHIB:
+ case ICMP_UNREACH_FILTER_PROHIB:
+ case ICMP_UNREACH_PRECEDENCE_CUTOFF:
+ code = ICMP6_DST_UNREACH_ADMIN;
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
+ icmp->icmp_type, icmp->icmp_code);
+ goto freeit;
+ }
+ break;
+ case ICMP_TIMXCEED:
+ type = ICMP6_TIME_EXCEEDED;
+ code = icmp->icmp_code;
+ break;
+ case ICMP_ECHO:
+ type = ICMP6_ECHO_REQUEST;
+ code = 0;
+ break;
+ case ICMP_PARAMPROB:
+ type = ICMP6_PARAM_PROB;
+ switch (icmp->icmp_code) {
+ case ICMP_PARAMPROB_ERRATPTR:
+ case ICMP_PARAMPROB_LENGTH:
+ code = ICMP6_PARAMPROB_HEADER;
+ switch (icmp->icmp_pptr) {
+ case 0: /* Version/IHL */
+ case 1: /* Type Of Service */
+ mtu = icmp->icmp_pptr;
+ break;
+ case 2: /* Total Length */
+ case 3: mtu = 4; /* Payload Length */
+ break;
+ case 8: /* Time to Live */
+ mtu = 7; /* Hop Limit */
+ break;
+ case 9: /* Protocol */
+ mtu = 6; /* Next Header */
+ break;
+ case 12: /* Source address */
+ case 13:
+ case 14:
+ case 15:
+ mtu = 8;
+ break;
+ case 16: /* Destination address */
+ case 17:
+ case 18:
+ case 19:
+ mtu = 24;
+ break;
+ default: /* Silently drop */
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
+ " code %d, pptr %d", icmp->icmp_type,
+ icmp->icmp_code, icmp->icmp_pptr);
+ goto freeit;
+ }
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
+ " code %d, pptr %d", icmp->icmp_type,
+ icmp->icmp_code, icmp->icmp_pptr);
+ goto freeit;
+ }
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
+ icmp->icmp_type, icmp->icmp_code);
+ goto freeit;
+ }
+ /*
+ * For echo request/reply we can use original payload,
+ * but we need adjust icmp_cksum, because ICMPv6 cksum covers
+ * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
+ */
+ if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
+ nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
+ return (m);
+ }
+ /*
+ * For other types of ICMP messages we need to translate inner
+ * IPv4 header to IPv6 header.
+ * Assume ICMP src is the same as payload dst
+ * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
+ * and ( NATIP1, Hostdst1 ) in ICMP copy header.
+ * In that case, we already have map for NATIP1 and GWsrc1.
+ * The only thing we need is to copy IPv6 map prefix to
+ * Hostdst1.
+ */
+ hlen = offset + ICMP_MINLEN;
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
+ DPRINTF(DP_DROPS, "Message is too short %d",
+ m->m_pkthdr.len);
+ goto freeit;
+ }
+ m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
+ if (ip.ip_v != IPVERSION) {
+ DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
+ goto freeit;
+ }
+ hlen += ip.ip_hl << 2; /* Skip inner IP header */
+ if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
+ nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
+ nat64_check_private_ip4(ip.ip_src.s_addr) != 0 ||
+ nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) {
+ DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
+ ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
+ goto freeit;
+ }
+ if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
+ DPRINTF(DP_DROPS, "Message is too short %d",
+ m->m_pkthdr.len);
+ goto freeit;
+ }
+#if 0
+ /*
+ * Check that inner source matches the outer destination.
+ * XXX: We need some method to convert IPv4 into IPv6 address here,
+ * and compare IPv6 addresses.
+ */
+ if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
+ DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
+ "%04x vs %04x", ip.ip_src.s_addr,
+ nat64_get_ip4(&ip6->ip6_dst));
+ goto freeit;
+ }
+#endif
+ /*
+ * Create new mbuf for ICMPv6 datagram.
+ * NOTE: len is data length just after inner IP header.
+ */
+ len = m->m_pkthdr.len - hlen;
+ if (sizeof(struct ip6_hdr) +
+ sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
+ len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
+ sizeof(struct ip6_hdr);
+ plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
+ n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return (NULL);
+ }
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, offset + plen + max_hdr);
+ n->m_len = n->m_pkthdr.len = offset + plen;
+ /* Adjust ip6_plen in outer header */
+ ip6->ip6_plen = htons(plen);
+ /* Construct new inner IPv6 header */
+ eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
+ eip6->ip6_src = ip6->ip6_dst;
+ /* Use the fact that we have single /96 prefix for IPv4 map */
+ eip6->ip6_dst = ip6->ip6_src;
+ nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr);
+
+ eip6->ip6_flow = htonl(ip.ip_tos << 20);
+ eip6->ip6_vfc |= IPV6_VERSION;
+ eip6->ip6_hlim = ip.ip_ttl;
+ eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
+ eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
+ m_copydata(m, hlen, len, (char *)(eip6 + 1));
+ /*
+ * We need to translate source port in the inner ULP header,
+ * and adjust ULP checksum.
+ */
+ switch (ip.ip_p) {
+ case IPPROTO_TCP:
+ if (len < offsetof(struct tcphdr, th_sum))
+ break;
+ tcp = TCP(eip6 + 1);
+ if (icmpid != 0) {
+ tcp->th_sum = cksum_adjust(tcp->th_sum,
+ tcp->th_sport, icmpid);
+ tcp->th_sport = icmpid;
+ }
+ tcp->th_sum = cksum_add(tcp->th_sum,
+ ~nat64_cksum_convert(eip6, &ip));
+ break;
+ case IPPROTO_UDP:
+ if (len < offsetof(struct udphdr, uh_sum))
+ break;
+ udp = UDP(eip6 + 1);
+ if (icmpid != 0) {
+ udp->uh_sum = cksum_adjust(udp->uh_sum,
+ udp->uh_sport, icmpid);
+ udp->uh_sport = icmpid;
+ }
+ udp->uh_sum = cksum_add(udp->uh_sum,
+ ~nat64_cksum_convert(eip6, &ip));
+ break;
+ case IPPROTO_ICMP:
+ /*
+ * Check if this is an ICMP error message for echo request
+ * that we sent. I.e. ULP in the data containing invoking
+ * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
+ */
+ icmp = (struct icmp *)(eip6 + 1);
+ if (icmp->icmp_type != ICMP_ECHO) {
+ m_freem(n);
+ goto freeit;
+ }
+ /*
+ * For our client this original datagram should looks
+ * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
+ * Thus we need adjust icmp_cksum and convert type from
+ * ICMP_ECHO to ICMP6_ECHO_REQUEST.
+ */
+ nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
+ ICMP6_ECHO_REQUEST);
+ }
+ m_freem(m);
+ /* Convert ICMPv4 into ICMPv6 header */
+ icmp = mtodo(n, offset);
+ ICMP6(icmp)->icmp6_type = type;
+ ICMP6(icmp)->icmp6_code = code;
+ ICMP6(icmp)->icmp6_mtu = htonl(mtu);
+ ICMP6(icmp)->icmp6_cksum = 0;
+ ICMP6(icmp)->icmp6_cksum = cksum_add(
+ ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
+ in_cksum_skip(n, n->m_pkthdr.len, offset));
+ return (n);
+freeit:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (NULL);
+}
+
+int
+nat64_getlasthdr(struct mbuf *m, int *offset)
+{
+ struct ip6_hdr *ip6;
+ struct ip6_hbh *hbh;
+ int proto, hlen;
+
+ if (offset != NULL)
+ hlen = *offset;
+ else
+ hlen = 0;
+
+ if (m->m_len < hlen + sizeof(*ip6))
+ return (-1);
+
+ ip6 = mtodo(m, hlen);
+ hlen += sizeof(*ip6);
+ proto = ip6->ip6_nxt;
+ /* Skip extension headers */
+ while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_DSTOPTS) {
+ hbh = mtodo(m, hlen);
+ /*
+ * We expect mbuf has contigious data up to
+ * upper level header.
+ */
+ if (m->m_len < hlen)
+ return (-1);
+ /*
+ * We doesn't support Jumbo payload option,
+ * so return error.
+ */
+ if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
+ return (-1);
+ proto = hbh->ip6h_nxt;
+ hlen += hbh->ip6h_len << 3;
+ }
+ if (offset != NULL)
+ *offset = hlen;
+ return (proto);
+}
+
+int
+nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
+ struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats,
+ void *logdata)
+{
+ struct route_in6 ro;
+ struct ip6_hdr ip6;
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct mbufq mq;
+ struct sockaddr *dst;
+ uint32_t mtu;
+ uint16_t ip_id, ip_off;
+ uint16_t *csum;
+ int plen, hlen;
+ uint8_t proto;
+
+ ip = mtod(m, struct ip*);
+
+ if (ip->ip_ttl <= IPTTLDEC) {
+ nat64_icmp_reflect(m, ICMP_TIMXCEED,
+ ICMP_TIMXCEED_INTRANS, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ ip6.ip6_dst = *daddr;
+ ip6.ip6_src = *saddr;
+
+ hlen = ip->ip_hl << 2;
+ plen = ntohs(ip->ip_len) - hlen;
+ proto = ip->ip_p;
+
+ /* Save ip_id and ip_off, both are in network byte order */
+ ip_id = ip->ip_id;
+ ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
+
+ /* Fragment length must be multiple of 8 octets */
+ if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
+ nat64_icmp_reflect(m, ICMP_PARAMPROB,
+ ICMP_PARAMPROB_LENGTH, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+ /* Fragmented ICMP is unsupported */
+ if (proto == IPPROTO_ICMP && ip_off != 0) {
+ DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ dst = nat64_find_route6(&ro, &ip6.ip6_dst, m);
+ if (dst == NULL) {
+ FREE_ROUTE(&ro);
+ NAT64STAT_INC(stats, noroute6);
+ nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
+ stats, logdata);
+ return (NAT64RETURN);
+ }
+ ifp = ro.ro_rt->rt_ifp;
+ if (ro.ro_rt->rt_mtu != 0)
+ mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
+ else
+ mtu = ifp->if_mtu;
+ if (mtu < plen + sizeof(ip6) && (ip->ip_off & htons(IP_DF)) != 0) {
+ FREE_ROUTE(&ro);
+ nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
+ FRAGSZ(mtu) + sizeof(struct ip), stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ ip6.ip6_flow = htonl(ip->ip_tos << 20);
+ ip6.ip6_vfc |= IPV6_VERSION;
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+ ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC;
+#else
+ /* Forwarding code will decrement HLIM. */
+ ip6.ip6_hlim = ip->ip_ttl;
+#endif
+ ip6.ip6_plen = htons(plen);
+ ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
+ /* Convert checksums. */
+ switch (proto) {
+ case IPPROTO_TCP:
+ csum = &TCP(mtodo(m, hlen))->th_sum;
+ if (lport != 0) {
+ struct tcphdr *tcp = TCP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, tcp->th_dport, lport);
+ tcp->th_dport = lport;
+ }
+ *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
+ break;
+ case IPPROTO_UDP:
+ csum = &UDP(mtodo(m, hlen))->uh_sum;
+ if (lport != 0) {
+ struct udphdr *udp = UDP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, udp->uh_dport, lport);
+ udp->uh_dport = lport;
+ }
+ *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
+ break;
+ case IPPROTO_ICMP:
+ m = nat64_icmp_translate(m, &ip6, lport, hlen, stats);
+ if (m == NULL) {
+ FREE_ROUTE(&ro);
+ /* stats already accounted */
+ return (NAT64RETURN);
+ }
+ }
+
+ m_adj(m, hlen);
+ mbufq_init(&mq, 255);
+ nat64_fragment6(stats, &ip6, &mq, m, mtu, ip_id, ip_off);
+ while ((m = mbufq_dequeue(&mq)) != NULL) {
+ if (nat64_output(ifp, m, dst, (struct route *)&ro, stats,
+ logdata) != 0)
+ break;
+ NAT64STAT_INC(stats, opcnt46);
+ }
+ mbufq_drain(&mq);
+ FREE_ROUTE(&ro);
+ return (NAT64RETURN);
+}
+
+int
+nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata)
+{
+ struct ip ip;
+ struct icmp6_hdr *icmp6;
+ struct ip6_frag *ip6f;
+ struct ip6_hdr *ip6, *ip6i;
+ uint32_t mtu;
+ int plen, proto;
+ uint8_t type, code;
+
+ if (hlen == 0) {
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6->ip6_dst) != 0)
+ return (NAT64SKIP);
+
+ proto = nat64_getlasthdr(m, &hlen);
+ if (proto != IPPROTO_ICMPV6) {
+ DPRINTF(DP_DROPS,
+ "dropped due to mbuf isn't contigious");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ }
+
+ /*
+ * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
+ * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
+ */
+ icmp6 = mtodo(m, hlen);
+ mtu = 0;
+ switch (icmp6->icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ type = ICMP_UNREACH;
+ switch (icmp6->icmp6_code) {
+ case ICMP6_DST_UNREACH_NOROUTE:
+ case ICMP6_DST_UNREACH_BEYONDSCOPE:
+ case ICMP6_DST_UNREACH_ADDR:
+ code = ICMP_UNREACH_HOST;
+ break;
+ case ICMP6_DST_UNREACH_ADMIN:
+ code = ICMP_UNREACH_HOST_PROHIB;
+ break;
+ case ICMP6_DST_UNREACH_NOPORT:
+ code = ICMP_UNREACH_PORT;
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+ " code %d", icmp6->icmp6_type,
+ icmp6->icmp6_code);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ break;
+ case ICMP6_PACKET_TOO_BIG:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_NEEDFRAG;
+ mtu = ntohl(icmp6->icmp6_mtu);
+ if (mtu < IPV6_MMTU) {
+ DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
+ " code %d", mtu, icmp6->icmp6_type,
+ icmp6->icmp6_code);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ /*
+ * Adjust MTU to reflect difference between
+ * IPv6 an IPv4 headers.
+ */
+ mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
+ break;
+ case ICMP6_TIME_EXCEEDED:
+ type = ICMP_TIMXCEED;
+ code = icmp6->icmp6_code;
+ break;
+ case ICMP6_PARAM_PROB:
+ switch (icmp6->icmp6_code) {
+ case ICMP6_PARAMPROB_HEADER:
+ type = ICMP_PARAMPROB;
+ code = ICMP_PARAMPROB_ERRATPTR;
+ mtu = ntohl(icmp6->icmp6_pptr);
+ switch (mtu) {
+ case 0: /* Version/Traffic Class */
+ case 1: /* Traffic Class/Flow Label */
+ break;
+ case 4: /* Payload Length */
+ case 5:
+ mtu = 2;
+ break;
+ case 6: /* Next Header */
+ mtu = 9;
+ break;
+ case 7: /* Hop Limit */
+ mtu = 8;
+ break;
+ default:
+ if (mtu >= 8 && mtu <= 23) {
+ mtu = 12; /* Source address */
+ break;
+ }
+ if (mtu >= 24 && mtu <= 39) {
+ mtu = 16; /* Destination address */
+ break;
+ }
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+ " code %d, pptr %d", icmp6->icmp6_type,
+ icmp6->icmp6_code, mtu);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ case ICMP6_PARAMPROB_NEXTHEADER:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_PROTOCOL;
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+ " code %d, pptr %d", icmp6->icmp6_type,
+ icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
+ icmp6->icmp6_type, icmp6->icmp6_code);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ hlen += sizeof(struct icmp6_hdr);
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "Message is too short %d",
+ m->m_pkthdr.len);
+ return (NAT64MFREE);
+ }
+ /*
+ * We need at least ICMP_MINLEN bytes of original datagram payload
+ * to generate ICMP message. It is nice that ICMP_MINLEN is equal
+ * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
+ * header we will not have to do m_pullup() again.
+ *
+ * What we have here:
+ * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
+ * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
+ * We need to translate it to:
+ *
+ * Outer header: (alias_host, v4exthost)
+ * Inner header: (v4exthost, alias_host) [sport, alias_port]
+ *
+ * Assume caller function has checked if v4mapPRefix+v4host
+ * matches configured prefix.
+ * The only two things we should be provided with are mapping between
+ * IPv6iHost <> alias_host and between dport and alias_port.
+ */
+ if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
+ m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (NAT64RETURN);
+ }
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6i = mtodo(m, hlen);
+ ip6f = NULL;
+ proto = ip6i->ip6_nxt;
+ plen = ntohs(ip6i->ip6_plen);
+ hlen += sizeof(struct ip6_hdr);
+ if (proto == IPPROTO_FRAGMENT) {
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
+ ICMP_MINLEN)
+ goto fail;
+ ip6f = mtodo(m, hlen);
+ proto = ip6f->ip6f_nxt;
+ plen -= sizeof(struct ip6_frag);
+ hlen += sizeof(struct ip6_frag);
+ /* Ajust MTU to reflect frag header size */
+ if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
+ mtu -= sizeof(struct ip6_frag);
+ }
+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
+ DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
+ proto);
+ goto fail;
+ }
+ if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6i->ip6_dst) != 0) {
+ DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
+ goto fail;
+ }
+ /* Check if outer dst is the same as inner src */
+ if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
+ DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
+ goto fail;
+ }
+
+ /* Now we need to make a fake IPv4 packet to generate ICMP message */
+ ip.ip_dst.s_addr = aaddr;
+ ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src);
+ /* XXX: Make fake ulp header */
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+ ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */
+#endif
+ nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
+ m_adj(m, hlen - sizeof(struct ip));
+ bcopy(&ip, mtod(m, void *), sizeof(ip));
+ nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats, logdata);
+ return (NAT64RETURN);
+fail:
+ /*
+ * We must call m_freem() because mbuf pointer could be
+ * changed with m_pullup().
+ */
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64RETURN);
+}
+
+int
+nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata)
+{
+ struct route ro;
+ struct ip ip;
+ struct ifnet *ifp;
+ struct ip6_frag *frag;
+ struct ip6_hdr *ip6;
+ struct icmp6_hdr *icmp6;
+ struct sockaddr *dst;
+ uint16_t *csum;
+ uint32_t mtu;
+ int plen, hlen, proto;
+
+ /*
+ * XXX: we expect ipfw_chk() did m_pullup() up to upper level
+ * protocol's headers. Also we skip some checks, that ip6_input(),
+ * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6->ip6_dst) != 0) {
+ return (NAT64SKIP);
+ }
+
+ /* Starting from this point we must not return zero */
+ ip.ip_src.s_addr = aaddr;
+ if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
+ DPRINTF(DP_GENERIC, "invalid source address: %08x",
+ ip.ip_src.s_addr);
+ /* XXX: stats? */
+ return (NAT64MFREE);
+ }
+
+ ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst);
+ if (ip.ip_dst.s_addr == 0) {
+ /* XXX: stats? */
+ return (NAT64MFREE);
+ }
+
+ if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
+ nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
+ ICMP6_TIME_EXCEED_TRANSIT, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ hlen = 0;
+ plen = ntohs(ip6->ip6_plen);
+ proto = nat64_getlasthdr(m, &hlen);
+ if (proto < 0) {
+ DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ frag = NULL;
+ if (proto == IPPROTO_FRAGMENT) {
+ /* ipfw_chk should m_pullup up to frag header */
+ if (m->m_len < hlen + sizeof(*frag)) {
+ DPRINTF(DP_DROPS,
+ "dropped due to mbuf isn't contigious");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ frag = mtodo(m, hlen);
+ proto = frag->ip6f_nxt;
+ hlen += sizeof(*frag);
+ /* Fragmented ICMPv6 is unsupported */
+ if (proto == IPPROTO_ICMPV6) {
+ DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ /* Fragment length must be multiple of 8 octets */
+ if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
+ ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
+ nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
+ ICMP6_PARAMPROB_HEADER,
+ offsetof(struct ip6_hdr, ip6_plen), stats,
+ logdata);
+ return (NAT64RETURN);
+ }
+ }
+ plen -= hlen - sizeof(struct ip6_hdr);
+ if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
+ DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
+ plen, m->m_pkthdr.len, hlen);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ icmp6 = NULL; /* Make gcc happy */
+ if (proto == IPPROTO_ICMPV6) {
+ icmp6 = mtodo(m, hlen);
+ if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
+ icmp6->icmp6_type != ICMP6_ECHO_REPLY)
+ return (nat64_handle_icmp6(m, hlen, aaddr, aport,
+ stats, logdata));
+ }
+ dst = nat64_find_route4(&ro, ip.ip_dst.s_addr, m);
+ if (dst == NULL) {
+ FREE_ROUTE(&ro);
+ NAT64STAT_INC(stats, noroute4);
+ nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
+ ICMP6_DST_UNREACH_NOROUTE, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ ifp = ro.ro_rt->rt_ifp;
+ if (ro.ro_rt->rt_mtu != 0)
+ mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
+ else
+ mtu = ifp->if_mtu;
+ if (mtu < plen + sizeof(ip)) {
+ FREE_ROUTE(&ro);
+ nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, mtu, stats,
+ logdata);
+ return (NAT64RETURN);
+ }
+ nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
+ /* Convert checksums. */
+ switch (proto) {
+ case IPPROTO_TCP:
+ csum = &TCP(mtodo(m, hlen))->th_sum;
+ if (aport != 0) {
+ struct tcphdr *tcp = TCP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, tcp->th_sport, aport);
+ tcp->th_sport = aport;
+ }
+ *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
+ break;
+ case IPPROTO_UDP:
+ csum = &UDP(mtodo(m, hlen))->uh_sum;
+ if (aport != 0) {
+ struct udphdr *udp = UDP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, udp->uh_sport, aport);
+ udp->uh_sport = aport;
+ }
+ *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
+ break;
+ case IPPROTO_ICMPV6:
+ /* Checksum in ICMPv6 covers pseudo header */
+ csum = &icmp6->icmp6_cksum;
+ *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
+ IPPROTO_ICMPV6, 0));
+ /* Convert ICMPv6 types to ICMP */
+ mtu = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
+ icmp6->icmp6_type = ICMP_ECHO;
+ else /* ICMP6_ECHO_REPLY */
+ icmp6->icmp6_type = ICMP_ECHOREPLY;
+ *csum = cksum_adjust(*csum, (uint16_t)mtu, *(uint16_t *)icmp6);
+ if (aport != 0) {
+ uint16_t old_id = icmp6->icmp6_id;
+ icmp6->icmp6_id = aport;
+ *csum = cksum_adjust(*csum, old_id, aport);
+ }
+ break;
+ };
+
+ m_adj(m, hlen - sizeof(ip));
+ bcopy(&ip, mtod(m, void *), sizeof(ip));
+ if (nat64_output(ifp, m, dst, &ro, stats, logdata) == 0)
+ NAT64STAT_INC(stats, opcnt64);
+ FREE_ROUTE(&ro);
+ return (NAT64RETURN);
+}
+
diff --git a/sys/netpfil/ipfw/nat64/nat64_translate.h b/sys/netpfil/ipfw/nat64/nat64_translate.h
new file mode 100644
index 0000000..9f65395
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64_translate.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64_TRANSLATE_H_
+#define _IP_FW_NAT64_TRANSLATE_H_
+
+#ifdef RTALLOC_NOLOCK
+#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_fib_nolock((ro), 0, (fib))
+#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc_nolock((ro), (fib))
+#define FREE_ROUTE(ro)
+#else
+#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_ign_fib((ro), 0, (fib))
+#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc((ro), (fib))
+#define FREE_ROUTE(ro) RO_RTFREE((ro))
+#endif
+
+static inline int
+nat64_check_ip6(struct in6_addr *addr)
+{
+
+ /* XXX: We should really check /8 */
+ if (addr->s6_addr16[0] == 0 || /* 0000::/8 Reserved by IETF */
+ IN6_IS_ADDR_MULTICAST(addr) || IN6_IS_ADDR_LINKLOCAL(addr))
+ return (1);
+ return (0);
+}
+
+extern int nat64_allow_private;
+static inline int
+nat64_check_private_ip4(in_addr_t ia)
+{
+
+ if (nat64_allow_private)
+ return (0);
+ /* WKPFX must not be used to represent non-global IPv4 addresses */
+// if (cfg->flags & NAT64_WKPFX) {
+ /* IN_PRIVATE */
+ if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
+ (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
+ (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
+ return (1);
+ /*
+ * RFC 5735:
+ * 192.0.0.0/24 - reserved for IETF protocol assignments
+ * 192.88.99.0/24 - for use as 6to4 relay anycast addresses
+ * 198.18.0.0/15 - for use in benchmark tests
+ * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
+ * in documentation and example code
+ */
+ if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
+ (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
+ (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
+ (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
+ (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
+ (ia & htonl(0xffffff00)) == htonl(0xcb007100))
+ return (1);
+// }
+ return (0);
+}
+
+static inline int
+nat64_check_ip4(in_addr_t ia)
+{
+
+ /* IN_LOOPBACK */
+ if ((ia & htonl(0xff000000)) == htonl(0x7f000000))
+ return (1);
+ /* IN_LINKLOCAL */
+ if ((ia & htonl(0xffff0000)) == htonl(0xa9fe0000))
+ return (1);
+ /* IN_MULTICAST & IN_EXPERIMENTAL */
+ if ((ia & htonl(0xe0000000)) == htonl(0xe0000000))
+ return (1);
+ return (0);
+}
+
+#define nat64_get_ip4(_ip6) ((_ip6)->s6_addr32[3])
+#define nat64_set_ip4(_ip6, _ip4) (_ip6)->s6_addr32[3] = (_ip4)
+
+int nat64_getlasthdr(struct mbuf *m, int *offset);
+int nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
+ struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats,
+ void *logdata);
+int nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata);
+int nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata);
+
+#endif
+
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.c b/sys/netpfil/ipfw/nat64/nat64lsn.c
new file mode 100644
index 0000000..d615f58
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64lsn.c
@@ -0,0 +1,1770 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/ip_fw_nat64.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64lsn.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <netpfil/pf/pf.h>
+
+MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
+
+static void nat64lsn_periodic(void *data);
+#define PERIODIC_DELAY 4
+static uint8_t nat64lsn_proto_map[256];
+uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+
+#define NAT64_FLAG_FIN 0x01 /* FIN was seen */
+#define NAT64_FLAG_SYN 0x02 /* First syn in->out */
+#define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */
+#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
+
+#define NAT64_FLAG_RDR 0x80 /* Port redirect */
+#define NAT64_LOOKUP(chain, cmd) \
+ (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+/*
+ * Delayed job queue, used to create new hosts
+ * and new portgroups
+ */
+enum nat64lsn_jtype {
+ JTYPE_NEWHOST = 1,
+ JTYPE_NEWPORTGROUP,
+ JTYPE_DELPORTGROUP,
+};
+
+struct nat64lsn_job_item {
+ TAILQ_ENTRY(nat64lsn_job_item) next;
+ enum nat64lsn_jtype jtype;
+ struct nat64lsn_host *nh;
+ struct nat64lsn_portgroup *pg;
+ void *spare_idx;
+ struct in6_addr haddr;
+ uint8_t nat_proto;
+ uint8_t done;
+ int needs_idx;
+ int delcount;
+ unsigned int fhash; /* Flow hash */
+ uint32_t aaddr; /* Last used address (net) */
+ struct mbuf *m;
+ struct ipfw_flow_id f_id;
+ uint64_t delmask[NAT64LSN_PGPTRNMASK];
+};
+
+static struct mtx jmtx;
+#define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
+#define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx)
+#define JQUEUE_LOCK() mtx_lock(&jmtx)
+#define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
+
+static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_head *jhead, int jlen);
+
+static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, int jtype);
+static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
+ int needs_idx);
+static int nat64lsn_request_host(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
+ struct ipfw_flow_id *f_id, struct mbuf **pm);
+
+static int alloc_portgroup(struct nat64lsn_job_item *ji);
+static void destroy_portgroup(struct nat64lsn_portgroup *pg);
+static void destroy_host6(struct nat64lsn_host *nh);
+static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+
+static int attach_portgroup(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+
+
+/* XXX tmp */
+static uma_zone_t nat64lsn_host_zone;
+static uma_zone_t nat64lsn_pg_zone;
+static uma_zone_t nat64lsn_pgidx_zone;
+
+static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_host *nh);
+
+#define I6_hash(x) (djb_hash((const unsigned char *)(x), 16))
+#define I6_first(_ph, h) (_ph)[h]
+#define I6_next(x) (x)->next
+#define I6_val(x) (&(x)->addr)
+#define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b)
+#define I6_lock(a, b)
+#define I6_unlock(a, b)
+
+#define I6HASH_FIND(_cfg, _res, _a) \
+ CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a)
+#define I6HASH_INSERT(_cfg, _i) \
+ CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i)
+#define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \
+ CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a)
+
+#define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \
+ CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg)
+
+#define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8)
+
+static unsigned
+djb_hash(const unsigned char *h, const int len)
+{
+ unsigned int result = 0;
+ int i;
+
+ for (i = 0; i < len; i++)
+ result = 33 * result ^ h[i];
+
+ return (result);
+}
+
+/*
+static size_t
+bitmask_size(size_t num, int *level)
+{
+ size_t x;
+ int c;
+
+ for (c = 0, x = num; num > 1; num /= 64, c++)
+ ;
+
+ return (x);
+}
+
+static void
+bitmask_prepare(uint64_t *pmask, size_t bufsize, int level)
+{
+ size_t x, z;
+
+ memset(pmask, 0xFF, bufsize);
+ for (x = 0, z = 1; level > 1; x += z, z *= 64, level--)
+ ;
+ pmask[x] ~= 0x01;
+}
+*/
+
+static void
+nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
+ uint32_t n, uint32_t sn)
+{
+
+ memset(plog, 0, sizeof(*plog));
+ plog->length = PFLOG_REAL_HDRLEN;
+ plog->af = family;
+ plog->action = PF_NAT;
+ plog->dir = PF_IN;
+ plog->rulenr = htonl(n);
+ plog->subrulenr = htonl(sn);
+ plog->ruleset[0] = '\0';
+ strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
+ ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
+/*
+ * Inspects icmp packets to see if the message contains different
+ * packet header so we need to alter @addr and @port.
+ */
+static int
+inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
+ uint16_t *port)
+{
+ struct ip *ip;
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ struct icmphdr *icmp;
+ int off;
+ uint8_t proto;
+
+ ip = mtod(*m, struct ip *); /* Outer IP header */
+ off = (ip->ip_hl << 2) + ICMP_MINLEN;
+ if ((*m)->m_len < off)
+ *m = m_pullup(*m, off);
+ if (*m == NULL)
+ return (ENOMEM);
+
+ ip = mtod(*m, struct ip *); /* Outer IP header */
+ icmp = L3HDR(ip, struct icmphdr *);
+ switch (icmp->icmp_type) {
+ case ICMP_ECHO:
+ case ICMP_ECHOREPLY:
+ /* Use icmp ID as distinguisher */
+ *port = ntohs(*((uint16_t *)(icmp + 1)));
+ return (0);
+ case ICMP_UNREACH:
+ case ICMP_TIMXCEED:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ /*
+ * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
+ * of ULP header.
+ */
+ if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
+ return (EINVAL);
+ if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
+ *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN);
+ if (*m == NULL)
+ return (ENOMEM);
+ ip = mtodo(*m, off); /* Inner IP header */
+ proto = ip->ip_p;
+ off += ip->ip_hl << 2; /* Skip inner IP header */
+ *addr = ntohl(ip->ip_src.s_addr);
+ if ((*m)->m_len < off + ICMP_MINLEN)
+ *m = m_pullup(*m, off + ICMP_MINLEN);
+ if (*m == NULL)
+ return (ENOMEM);
+ switch (proto) {
+ case IPPROTO_TCP:
+ tcp = mtodo(*m, off);
+ *nat_proto = NAT_PROTO_TCP;
+ *port = ntohs(tcp->th_sport);
+ return (0);
+ case IPPROTO_UDP:
+ udp = mtodo(*m, off);
+ *nat_proto = NAT_PROTO_UDP;
+ *port = ntohs(udp->uh_sport);
+ return (0);
+ case IPPROTO_ICMP:
+ /*
+ * We will translate only ICMP errors for our ICMP
+ * echo requests.
+ */
+ icmp = mtodo(*m, off);
+ if (icmp->icmp_type != ICMP_ECHO)
+ return (EOPNOTSUPP);
+ *port = ntohs(*((uint16_t *)(icmp + 1)));
+ return (0);
+ };
+ return (EOPNOTSUPP);
+}
+
+static inline uint8_t
+convert_tcp_flags(uint8_t flags)
+{
+ uint8_t result;
+
+ result = flags & (TH_FIN|TH_SYN);
+ result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
+ result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
+
+ return (result);
+}
+
+static NAT64NOINLINE int
+nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+ struct mbuf **pm)
+{
+ struct pfloghdr loghdr, *logdata;
+ struct in6_addr src6;
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_host *nh;
+ struct nat64lsn_state *st;
+ struct ip *ip;
+ uint32_t addr;
+ uint16_t state_flags, state_ts;
+ uint16_t port, lport;
+ uint8_t nat_proto;
+ int ret;
+
+ addr = f_id->dst_ip;
+ port = f_id->dst_port;
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+ return (cfg->nomatch_verdict);
+ }
+
+ /* Check if protocol is supported and get its short id */
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+ if (nat_proto == 0) {
+ NAT64STAT_INC(&cfg->stats, noproto);
+ return (cfg->nomatch_verdict);
+ }
+
+ /* We might need to handle icmp differently */
+ if (nat_proto == NAT_PROTO_ICMP) {
+ ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port);
+ if (ret != 0) {
+ if (ret == ENOMEM)
+ NAT64STAT_INC(&cfg->stats, nomem);
+ else
+ NAT64STAT_INC(&cfg->stats, noproto);
+ return (cfg->nomatch_verdict);
+ }
+ /* XXX: Check addr for validity */
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+ return (cfg->nomatch_verdict);
+ }
+ }
+
+ /* Calc portgroup offset w.r.t protocol */
+ pg = GET_PORTGROUP(cfg, addr, nat_proto, port);
+
+ /* Check if this port is occupied by any portgroup */
+ if (pg == NULL) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+#if 0
+ DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port,
+ _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port));
+#endif
+ return (cfg->nomatch_verdict);
+ }
+
+ /* TODO: Check flags to see if we need to do some static mapping */
+ nh = pg->host;
+
+ /* Prepare some fields we might need to update */
+ SET_AGE(state_ts);
+ ip = mtod(*pm, struct ip *);
+ if (ip->ip_p == IPPROTO_TCP)
+ state_flags = convert_tcp_flags(
+ L3HDR(ip, struct tcphdr *)->th_flags);
+ else
+ state_flags = 0;
+
+ /* Lock host and get port mapping */
+ NAT64_LOCK(nh);
+
+ st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)];
+ if (st->timestamp != state_ts)
+ st->timestamp = state_ts;
+ if ((st->flags & state_flags) != state_flags)
+ st->flags |= state_flags;
+ lport = htons(st->u.s.lport);
+
+ NAT64_UNLOCK(nh);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off);
+ } else
+ logdata = NULL;
+
+ src6.s6_addr32[0] = cfg->prefix6.s6_addr32[0];
+ src6.s6_addr32[1] = cfg->prefix6.s6_addr32[1];
+ src6.s6_addr32[2] = cfg->prefix6.s6_addr32[2];
+ src6.s6_addr32[3] = htonl(f_id->src_ip);
+
+ ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport,
+ &cfg->stats, logdata);
+
+ if (ret == NAT64SKIP)
+ return (IP_FW_PASS);
+ if (ret == NAT64MFREE)
+ m_freem(*pm);
+ *pm = NULL;
+
+ return (IP_FW_DENY);
+}
+
+void
+nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
+ const char *px, int off)
+{
+ char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN];
+
+ if ((nat64_debug & DP_STATE) == 0)
+ return;
+ inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s));
+ inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a));
+ inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d));
+
+ DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> "
+ "%s:%d AGE %d", px, pg->idx, st, off,
+ s, st->u.s.lport, pg->nat_proto, a, pg->aport + off,
+ d, st->u.s.fport, GET_AGE(st->timestamp));
+}
+
+/*
+ * Check if particular TCP state is stale and should be deleted.
+ * Return 1 if true, 0 otherwise.
+ */
+static int
+nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_state *st, int age)
+{
+ int ttl;
+
+ if (st->flags & NAT64_FLAG_FIN)
+ ttl = cfg->st_close_ttl;
+ else if (st->flags & NAT64_FLAG_ESTAB)
+ ttl = cfg->st_estab_ttl;
+ else if (st->flags & NAT64_FLAG_SYN)
+ ttl = cfg->st_syn_ttl;
+ else
+ ttl = cfg->st_syn_ttl;
+
+ if (age > ttl)
+ return (1);
+ return (0);
+}
+
+/*
+ * Check if nat state @st is stale and should be deleted.
+ * Return 1 if true, 0 otherwise.
+ */
+static NAT64NOINLINE int
+nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st)
+{
+ int age, delete;
+
+ age = GET_AGE(st->timestamp);
+ delete = 0;
+
+ /* Skip immutable records */
+ if (st->flags & NAT64_FLAG_RDR)
+ return (0);
+
+ switch (pg->nat_proto) {
+ case NAT_PROTO_TCP:
+ delete = nat64lsn_periodic_check_tcp(cfg, st, age);
+ break;
+ case NAT_PROTO_UDP:
+ if (age > cfg->st_udp_ttl)
+ delete = 1;
+ break;
+ case NAT_PROTO_ICMP:
+ if (age > cfg->st_icmp_ttl)
+ delete = 1;
+ break;
+ }
+
+ return (delete);
+}
+
+
+/*
+ * The following structures and functions
+ * are used to perform SLIST_FOREACH_SAFE()
+ * analog for states identified by struct st_ptr.
+ */
+
+struct st_idx {
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+ struct st_ptr sidx_next;
+};
+
+static struct st_idx *
+st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
+ struct st_ptr *sidx, struct st_idx *si)
+{
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+
+ if (sidx->idx == 0) {
+ memset(si, 0, sizeof(*si));
+ return (si);
+ }
+
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx);
+ st = &pg->states[sidx->off];
+
+ si->pg = pg;
+ si->st = st;
+ si->sidx_next = st->next;
+
+ return (si);
+}
+
+static struct st_idx *
+st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
+ struct st_idx *si)
+{
+ struct st_ptr sidx;
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+
+ sidx = si->sidx_next;
+ if (sidx.idx == 0) {
+ memset(si, 0, sizeof(*si));
+ si->st = NULL;
+ si->pg = NULL;
+ return (si);
+ }
+
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ st = &pg->states[sidx.off];
+
+ si->pg = pg;
+ si->st = st;
+ si->sidx_next = st->next;
+
+ return (si);
+}
+
+static struct st_idx *
+st_save_cond(struct st_idx *si_dst, struct st_idx *si)
+{
+ if (si->st != NULL)
+ *si_dst = *si;
+
+ return (si_dst);
+}
+
+unsigned int
+nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh)
+{
+ struct st_idx si, si_prev;
+ int i;
+ unsigned int delcount;
+
+ delcount = 0;
+ for (i = 0; i < nh->hsize; i++) {
+ memset(&si_prev, 0, sizeof(si_prev));
+ for (st_first(cfg, nh, &nh->phash[i], &si);
+ si.st != NULL;
+ st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) {
+ if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0)
+ continue;
+ nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE",
+ si.st->cur.off);
+ /* Unlink from hash */
+ if (si_prev.st != NULL)
+ si_prev.st->next = si.st->next;
+ else
+ nh->phash[i] = si.st->next;
+ /* Delete state and free its data */
+ PG_MARK_FREE_IDX(si.pg, si.st->cur.off);
+ memset(si.st, 0, sizeof(struct nat64lsn_state));
+ si.st = NULL;
+ delcount++;
+
+ /* Update portgroup timestamp */
+ SET_AGE(si.pg->timestamp);
+ }
+ }
+ NAT64STAT_ADD(&cfg->stats, sdeleted, delcount);
+ return (delcount);
+}
+
+/*
+ * Checks if portgroup is not used and can be deleted,
+ * Returns 1 if stale, 0 otherwise
+ */
+static int
+stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg)
+{
+
+ if (!PG_IS_EMPTY(pg))
+ return (0);
+ if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
+ return (0);
+ return (1);
+}
+
+/*
+ * Checks if host record is not used and can be deleted,
+ * Returns 1 if stale, 0 otherwise
+ */
+static int
+stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh)
+{
+
+ if (nh->pg_used != 0)
+ return (0);
+ if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay)
+ return (0);
+ return (1);
+}
+
+struct nat64lsn_periodic_data {
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_job_head jhead;
+ int jlen;
+};
+
+static NAT64NOINLINE int
+nat64lsn_periodic_chkhost(struct nat64lsn_host *nh,
+ struct nat64lsn_periodic_data *d)
+{
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_job_item *ji;
+ uint64_t delmask[NAT64LSN_PGPTRNMASK];
+ int delcount, i;
+
+ delcount = 0;
+ memset(delmask, 0, sizeof(delmask));
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d",
+ stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu);
+ if (!stale_nh(d->cfg, nh)) {
+ /* Non-stale host. Inspect internals */
+ NAT64_LOCK(nh);
+
+ /* Stage 1: Check&expire states */
+ if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0)
+ SET_AGE(nh->timestamp);
+
+ /* Stage 2: Check if we need to expire */
+ for (i = 0; i < nh->pg_used; i++) {
+ pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1);
+ if (pg == NULL)
+ continue;
+
+ /* Check if we can delete portgroup */
+ if (stale_pg(d->cfg, pg) == 0)
+ continue;
+
+ DPRINTF(DP_JQUEUE, "Check PG %d", i);
+ delmask[i / 64] |= ((uint64_t)1 << (i % 64));
+ delcount++;
+ }
+
+ NAT64_UNLOCK(nh);
+ if (delcount == 0)
+ return (0);
+ }
+
+ DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount);
+ /* We have something to delete - add it to queue */
+ ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP);
+ if (ji == NULL)
+ return (0);
+
+ ji->haddr = nh->addr;
+ ji->delcount = delcount;
+ memcpy(ji->delmask, delmask, sizeof(ji->delmask));
+
+ TAILQ_INSERT_TAIL(&d->jhead, ji, next);
+ d->jlen++;
+ return (0);
+}
+
+/*
+ * This procedure is used to perform various maintance
+ * on dynamic hash list. Currently it is called every second.
+ */
+static void
+nat64lsn_periodic(void *data)
+{
+ struct ip_fw_chain *ch;
+ IPFW_RLOCK_TRACKER;
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_periodic_data d;
+ struct nat64lsn_host *nh, *tmp;
+
+ cfg = (struct nat64lsn_cfg *) data;
+ ch = cfg->ch;
+ CURVNET_SET(cfg->vp);
+
+ memset(&d, 0, sizeof(d));
+ d.cfg = cfg;
+ TAILQ_INIT(&d.jhead);
+
+ IPFW_RLOCK(ch);
+
+ /* Stage 1: foreach host, check all its portgroups */
+ I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d);
+
+ /* Enqueue everything we have requested */
+ nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen);
+
+ callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
+
+ IPFW_RUNLOCK(ch);
+
+ CURVNET_RESTORE();
+}
+
+static NAT64NOINLINE void
+reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+
+ if (ji->m == NULL)
+ return;
+
+ /* Request has failed or packet type is wrong */
+ if (ji->f_id.addr_type != 6 || ji->done == 0) {
+ m_freem(ji->m);
+ ji->m = NULL;
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d",
+ ji->jtype, ji->done);
+ return;
+ }
+
+ /*
+ * XXX: Limit recursion level
+ */
+
+ NAT64STAT_INC(&cfg->stats, jreinjected);
+ DPRINTF(DP_JQUEUE, "Reinject mbuf");
+ nat64lsn_translate6(cfg, &ji->f_id, &ji->m);
+}
+
+static void
+destroy_portgroup(struct nat64lsn_portgroup *pg)
+{
+
+ DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg);
+ uma_zfree(nat64lsn_pg_zone, pg);
+}
+
+static NAT64NOINLINE int
+alloc_portgroup(struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_portgroup *pg;
+
+ pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
+ if (pg == NULL)
+ return (1);
+
+ if (ji->needs_idx != 0) {
+ ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
+ /* Failed alloc isn't always fatal, so don't check */
+ }
+ memset(&pg->freemask, 0xFF, sizeof(pg->freemask));
+ pg->nat_proto = ji->nat_proto;
+ ji->pg = pg;
+ return (0);
+
+}
+
+static void
+destroy_host6(struct nat64lsn_host *nh)
+{
+ char a[INET6_ADDRSTRLEN];
+ int i;
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh,
+ nh->pg_used);
+ NAT64_LOCK_DESTROY(nh);
+ for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++)
+ uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i));
+ uma_zfree(nat64lsn_host_zone, nh);
+}
+
+static NAT64NOINLINE int
+alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_host *nh;
+ char a[INET6_ADDRSTRLEN];
+
+ nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
+ if (nh == NULL)
+ return (1);
+ PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
+ if (PORTGROUP_CHUNK(nh, 0) == NULL) {
+ uma_zfree(nat64lsn_host_zone, nh);
+ return (2);
+ }
+ if (alloc_portgroup(ji) != 0) {
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0));
+ uma_zfree(nat64lsn_host_zone, nh);
+ return (3);
+ }
+
+ NAT64_LOCK_INIT(nh);
+ nh->addr = ji->haddr;
+ nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */
+ nh->pg_allocated = NAT64LSN_PGIDX_CHUNK;
+ nh->pg_used = 0;
+ ji->nh = nh;
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh);
+ return (0);
+}
+
+/*
+ * Finds free @pg index inside @nh
+ */
+static NAT64NOINLINE int
+find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx)
+{
+ int i;
+
+ for (i = 0; i < nh->pg_allocated; i++) {
+ if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) {
+ *idx = i;
+ return (0);
+ }
+ }
+ return (1);
+}
+
+static NAT64NOINLINE int
+attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_host *nh;
+
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL) {
+ /* Add new host to list */
+ nh = ji->nh;
+ I6HASH_INSERT(cfg, nh);
+ cfg->ihcount++;
+ ji->nh = NULL;
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh);
+ /*
+ * Try to add portgroup.
+ * Note it will automatically set
+ * 'done' on ji if successful.
+ */
+ if (attach_portgroup(cfg, ji) != 0) {
+ DPRINTF(DP_DROPS, "%s %p failed to attach PG",
+ a, nh);
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ return (1);
+ }
+ return (0);
+ }
+
+ /*
+ * nh isn't NULL. This probably means we had several simultaneous
+ * host requests. The previous one request has already attached
+ * this host. Requeue attached mbuf and mark job as done, but
+ * leave nh and pg pointers not changed, so nat64lsn_do_request()
+ * will release all allocated resources.
+ */
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "%s %p is already attached as %p",
+ a, ji->nh, nh);
+ ji->done = 1;
+ return (0);
+}
+
+static NAT64NOINLINE int
+find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off,
+ int nat_proto, uint16_t *aport, int *ppg_idx)
+{
+ int j, pg_idx;
+
+ pg_idx = addr_off * _ADDR_PG_COUNT +
+ (nat_proto - 1) * _ADDR_PG_PROTO_COUNT;
+
+ for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) {
+ if (cfg->pg[pg_idx + j] != NULL)
+ continue;
+
+ *aport = j * NAT64_CHUNK_SIZE;
+ *ppg_idx = pg_idx + j;
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * XXX: This function needs to be rewritten to
+ * use free bitmask for faster pg finding,
+ * additionally, it should take into consideration
+ * a) randomization and
+ * b) previous addresses allocated to given nat instance
+ *
+ */
+static NAT64NOINLINE int
+find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji,
+ uint32_t *aaddr, uint16_t *aport, int *ppg_idx)
+{
+ int i, nat_proto;
+
+ /*
+ * XXX: Use bitmask index to be able to find/check if IP address
+ * has some spare pg's
+ */
+ nat_proto = ji->nat_proto;
+
+ /* First, try to use same address */
+ if (ji->aaddr != 0) {
+ i = ntohl(ji->aaddr) - cfg->prefix4;
+ if (find_pg_place_addr(cfg, i, nat_proto, aport,
+ ppg_idx) != 0){
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+ }
+
+ /* Next, try to use random address based on flow hash */
+ i = ji->fhash % (1 << (32 - cfg->plen4));
+ if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) {
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+
+
+ /* Last one: simply find ANY available */
+ for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
+ if (find_pg_place_addr(cfg, i, nat_proto, aport,
+ ppg_idx) != 0){
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+ }
+
+ return (1);
+}
+
+static NAT64NOINLINE int
+attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_host *nh;
+ uint32_t aaddr;
+ uint16_t aport;
+ int nh_pg_idx, pg_idx;
+
+ pg = ji->pg;
+
+ /*
+ * Find source host and bind: we can't rely on
+ * pg->host
+ */
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL)
+ return (1);
+
+ /* Find spare port chunk */
+ if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) {
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a);
+ return (2);
+ }
+
+ /* Expand PG indexes if needed */
+ if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) {
+ PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) =
+ ji->spare_idx;
+ nh->pg_allocated += NAT64LSN_PGIDX_CHUNK;
+ ji->spare_idx = NULL;
+ }
+
+ /* Find empty index to store PG in the @nh */
+ if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) {
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s",
+ a);
+ return (3);
+ }
+
+ cfg->pg[pg_idx] = pg;
+ cfg->protochunks[pg->nat_proto]++;
+ NAT64STAT_INC(&cfg->stats, spgcreated);
+
+ pg->aaddr = aaddr;
+ pg->aport = aport;
+ pg->host = nh;
+ pg->idx = pg_idx;
+ SET_AGE(pg->timestamp);
+
+ PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg;
+ if (nh->pg_used == nh_pg_idx)
+ nh->pg_used++;
+ SET_AGE(nh->timestamp);
+
+ ji->pg = NULL;
+ ji->done = 1;
+
+ return (0);
+}
+
+static NAT64NOINLINE void
+consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_host *nh, *nh_tmp;
+ struct nat64lsn_portgroup *pg, *pg_list[256];
+ int i, pg_lidx, idx;
+
+ /* Find source host */
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL || nh->pg_used == 0)
+ return;
+
+ memset(pg_list, 0, sizeof(pg_list));
+ pg_lidx = 0;
+
+ NAT64_LOCK(nh);
+
+ for (i = nh->pg_used - 1; i >= 0; i--) {
+ if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0)
+ continue;
+ pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
+
+ /* Check that PG isn't busy. */
+ if (stale_pg(cfg, pg) == 0)
+ continue;
+
+ /* DO delete */
+ pg_list[pg_lidx++] = pg;
+ PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL;
+
+ idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto,
+ pg->aport);
+ KASSERT(cfg->pg[idx] == pg, ("Non matched pg"));
+ cfg->pg[idx] = NULL;
+ cfg->protochunks[pg->nat_proto]--;
+ NAT64STAT_INC(&cfg->stats, spgdeleted);
+
+ /* Decrease pg_used */
+ while (nh->pg_used > 0 &&
+ PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL)
+ nh->pg_used--;
+
+ /* Check if on-stack buffer has ended */
+ if (pg_lidx == nitems(pg_list))
+ break;
+ }
+
+ NAT64_UNLOCK(nh);
+
+ if (stale_nh(cfg, nh)) {
+ I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr);
+ KASSERT(nh != NULL, ("Unable to find address"));
+ cfg->ihcount--;
+ ji->nh = nh;
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ KASSERT(nh == NULL, ("Failed to delete address"));
+ }
+
+ /* TODO: Delay freeing portgroups */
+ while (pg_lidx > 0) {
+ pg_lidx--;
+ NAT64STAT_INC(&cfg->stats, spgdeleted);
+ destroy_portgroup(pg_list[pg_lidx]);
+ }
+}
+
+/*
+ * Main request handler.
+ * Responsible for handling jqueue, e.g.
+ * creating new hosts, addind/deleting portgroups.
+ */
+static NAT64NOINLINE void
+nat64lsn_do_request(void *data)
+{
+ IPFW_RLOCK_TRACKER;
+ struct nat64lsn_job_head jhead;
+ struct nat64lsn_job_item *ji;
+ int jcount, nhsize;
+ struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data;
+ struct ip_fw_chain *ch;
+ int delcount;
+
+ CURVNET_SET(cfg->vp);
+
+ TAILQ_INIT(&jhead);
+
+ /* XXX: We're running unlocked here */
+
+ ch = cfg->ch;
+ delcount = 0;
+ IPFW_RLOCK(ch);
+
+ /* Grab queue */
+ JQUEUE_LOCK();
+ TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next);
+ jcount = cfg->jlen;
+ cfg->jlen = 0;
+ JQUEUE_UNLOCK();
+
+ /* check if we need to resize hash */
+ nhsize = 0;
+ if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) {
+ nhsize = cfg->ihsize;
+ for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2)
+ ;
+ } else if (cfg->ihcount < cfg->ihsize * 4) {
+ nhsize = cfg->ihsize;
+ for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2)
+ ;
+ }
+
+ IPFW_RUNLOCK(ch);
+
+ if (TAILQ_EMPTY(&jhead)) {
+ CURVNET_RESTORE();
+ return;
+ }
+
+ NAT64STAT_INC(&cfg->stats, jcalls);
+ DPRINTF(DP_JQUEUE, "count=%d", jcount);
+
+ /*
+ * TODO:
+ * What we should do here is to build a hash
+ * to ensure we don't have lots of duplicate requests.
+ * Skip this for now.
+ *
+ * TODO: Limit per-call number of items
+ */
+
+ /* Pre-allocate everything for entire chain */
+ TAILQ_FOREACH(ji, &jhead, next) {
+ switch (ji->jtype) {
+ case JTYPE_NEWHOST:
+ if (alloc_host6(cfg, ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jhostfails);
+ break;
+ case JTYPE_NEWPORTGROUP:
+ if (alloc_portgroup(ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ break;
+ case JTYPE_DELPORTGROUP:
+ delcount += ji->delcount;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * TODO: Alloc hew hash
+ */
+ nhsize = 0;
+ if (nhsize > 0) {
+ /* XXX: */
+ }
+
+ /* Apply all changes in batch */
+ IPFW_UH_WLOCK(ch);
+ IPFW_WLOCK(ch);
+
+ TAILQ_FOREACH(ji, &jhead, next) {
+ switch (ji->jtype) {
+ case JTYPE_NEWHOST:
+ if (ji->nh != NULL)
+ attach_host6(cfg, ji);
+ break;
+ case JTYPE_NEWPORTGROUP:
+ if (ji->pg != NULL &&
+ attach_portgroup(cfg, ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ break;
+ case JTYPE_DELPORTGROUP:
+ consider_del_portgroup(cfg, ji);
+ break;
+ }
+ }
+
+ if (nhsize > 0) {
+ /* XXX: Move everything to new hash */
+ }
+
+ IPFW_WUNLOCK(ch);
+ IPFW_UH_WUNLOCK(ch);
+
+ /* Flush unused entries */
+ while (!TAILQ_EMPTY(&jhead)) {
+ ji = TAILQ_FIRST(&jhead);
+ TAILQ_REMOVE(&jhead, ji, next);
+ if (ji->nh != NULL)
+ destroy_host6(ji->nh);
+ if (ji->pg != NULL)
+ destroy_portgroup(ji->pg);
+ if (ji->m != NULL)
+ reinject_mbuf(cfg, ji);
+ if (ji->spare_idx != NULL)
+ uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx);
+ free(ji, M_IPFW);
+ }
+ CURVNET_RESTORE();
+}
+
+static NAT64NOINLINE struct nat64lsn_job_item *
+nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+ int jtype)
+{
+ struct nat64lsn_job_item *ji;
+ struct in6_addr haddr;
+ uint8_t nat_proto;
+
+ /*
+ * Do not try to lock possibly contested mutex if we're near the limit.
+ * Drop packet instead.
+ */
+ if (cfg->jlen >= cfg->jmaxlen) {
+ NAT64STAT_INC(&cfg->stats, jmaxlen);
+ return (NULL);
+ }
+
+ memset(&haddr, 0, sizeof(haddr));
+ nat_proto = 0;
+ if (f_id != NULL) {
+ haddr = f_id->src_ip6;
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+
+ DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d",
+ nat_proto, f_id->proto);
+
+ if (nat_proto == 0)
+ return (NULL);
+ }
+
+ ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW,
+ M_NOWAIT | M_ZERO);
+
+ if (ji == NULL) {
+ NAT64STAT_INC(&cfg->stats, jnomem);
+ return (NULL);
+ }
+
+ ji->jtype = jtype;
+
+ if (f_id != NULL) {
+ ji->f_id = *f_id;
+ ji->haddr = haddr;
+ ji->nat_proto = nat_proto;
+ }
+
+ return (ji);
+}
+
+static NAT64NOINLINE void
+nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+
+ if (ji == NULL)
+ return;
+
+ JQUEUE_LOCK();
+ TAILQ_INSERT_TAIL(&cfg->jhead, ji, next);
+ cfg->jlen++;
+ NAT64STAT_INC(&cfg->stats, jrequests);
+
+ if (callout_pending(&cfg->jcallout) == 0)
+ callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
+ JQUEUE_UNLOCK();
+}
+
+static NAT64NOINLINE void
+nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_head *jhead, int jlen)
+{
+
+ if (TAILQ_EMPTY(jhead))
+ return;
+
+ /* Attach current queue to execution one */
+ JQUEUE_LOCK();
+ TAILQ_CONCAT(&cfg->jhead, jhead, next);
+ cfg->jlen += jlen;
+ NAT64STAT_ADD(&cfg->stats, jrequests, jlen);
+
+ if (callout_pending(&cfg->jcallout) == 0)
+ callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
+ JQUEUE_UNLOCK();
+}
+
+static unsigned int
+flow6_hash(const struct ipfw_flow_id *f_id)
+{
+ unsigned char hbuf[36];
+
+ memcpy(hbuf, &f_id->dst_ip6, 16);
+ memcpy(&hbuf[16], &f_id->src_ip6, 16);
+ memcpy(&hbuf[32], &f_id->dst_port, 2);
+ memcpy(&hbuf[32], &f_id->src_port, 2);
+
+ return (djb_hash(hbuf, sizeof(hbuf)));
+}
+
+static NAT64NOINLINE int
+nat64lsn_request_host(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm)
+{
+ struct nat64lsn_job_item *ji;
+ struct mbuf *m;
+
+ m = *pm;
+ *pm = NULL;
+
+ ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST);
+ if (ji == NULL) {
+ m_freem(m);
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "failed to create job");
+ } else {
+ ji->m = m;
+ /* Provide pseudo-random value based on flow */
+ ji->fhash = flow6_hash(f_id);
+ nat64lsn_enqueue_job(cfg, ji);
+ NAT64STAT_INC(&cfg->stats, jhostsreq);
+ }
+
+ return (IP_FW_PASS);
+}
+
+static NAT64NOINLINE int
+nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
+ int needs_idx)
+{
+ struct nat64lsn_job_item *ji;
+ struct mbuf *m;
+
+ m = *pm;
+ *pm = NULL;
+
+ ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP);
+ if (ji == NULL) {
+ m_freem(m);
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "failed to create job");
+ } else {
+ ji->m = m;
+ /* Provide pseudo-random value based on flow */
+ ji->fhash = flow6_hash(f_id);
+ ji->aaddr = aaddr;
+ ji->needs_idx = needs_idx;
+ nat64lsn_enqueue_job(cfg, ji);
+ NAT64STAT_INC(&cfg->stats, jportreq);
+ }
+
+ return (IP_FW_PASS);
+}
+
+static NAT64NOINLINE struct nat64lsn_state *
+nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh,
+ int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr)
+{
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+ int i, hval, off;
+
+ /* XXX: create additional bitmask for selecting proper portgroup */
+ for (i = 0; i < nh->pg_used; i++) {
+ pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
+ if (pg == NULL)
+ continue;
+ if (*aaddr == 0)
+ *aaddr = pg->aaddr;
+ if (pg->nat_proto != nat_proto)
+ continue;
+
+ off = PG_GET_FREE_IDX(pg);
+ if (off != 0) {
+ /* We have found spare state. Use it */
+ off--;
+ PG_MARK_BUSY_IDX(pg, off);
+ st = &pg->states[off];
+
+ /*
+ * Fill in new info. Assume state was zeroed.
+ * Timestamp and flags will be filled by caller.
+ */
+ st->u.s = kst->u.s;
+ st->cur.idx = i + 1;
+ st->cur.off = off;
+
+ /* Insert into host hash table */
+ hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1);
+ st->next = nh->phash[hval];
+ nh->phash[hval] = st->cur;
+
+ nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off);
+
+ NAT64STAT_INC(&cfg->stats, screated);
+
+ return (st);
+ }
+ /* Saev last used alias affress */
+ *aaddr = pg->aaddr;
+ }
+
+ return (NULL);
+}
+
+static NAT64NOINLINE int
+nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
+ struct mbuf **pm)
+{
+ struct pfloghdr loghdr, *logdata;
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_host *nh;
+ struct st_ptr sidx;
+ struct nat64lsn_state *st, kst;
+ struct nat64lsn_portgroup *pg;
+ struct icmp6_hdr *icmp6;
+ uint32_t aaddr;
+ int action, hval, nat_proto, proto;
+ uint16_t aport, state_ts, state_flags;
+
+ /* Check if af/protocol is supported and get it short id */
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+ if (nat_proto == 0) {
+ /*
+ * Since we can be called from jobs handler, we need
+ * to free mbuf by self, do not leave this task to
+ * ipfw_check_packet().
+ */
+ NAT64STAT_INC(&cfg->stats, noproto);
+ m_freem(*pm);
+ *pm = NULL;
+ return (IP_FW_DENY);
+ }
+
+ /* Try to find host first */
+ I6HASH_FIND(cfg, nh, &f_id->src_ip6);
+
+ if (nh == NULL)
+ return (nat64lsn_request_host(cfg, f_id, pm));
+
+ /* Fill-in on-stack state structure */
+ kst.u.s.faddr = f_id->dst_ip6.s6_addr32[3];
+ kst.u.s.fport = f_id->dst_port;
+ kst.u.s.lport = f_id->src_port;
+
+ /* Prepare some fields we might need to update */
+ hval = 0;
+ proto = nat64_getlasthdr(*pm, &hval);
+ if (proto < 0) {
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
+ m_freem(*pm);
+ *pm = NULL;
+ return (IP_FW_DENY);
+ }
+
+ SET_AGE(state_ts);
+ if (proto == IPPROTO_TCP)
+ state_flags = convert_tcp_flags(
+ TCP(mtodo(*pm, hval))->th_flags);
+ else
+ state_flags = 0;
+ if (proto == IPPROTO_ICMPV6) {
+ /* Alter local port data */
+ icmp6 = mtodo(*pm, hval);
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
+ icmp6->icmp6_type == ICMP6_ECHO_REPLY)
+ kst.u.s.lport = ntohs(icmp6->icmp6_id);
+ }
+
+ hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1);
+ pg = NULL;
+ st = NULL;
+
+ /* OK, let's find state in host hash */
+ NAT64_LOCK(nh);
+ sidx = nh->phash[hval];
+ int k = 0;
+ while (sidx.idx != 0) {
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ st = &pg->states[sidx.off];
+ //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off,
+ //st->next.idx, st->next.off);
+ if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto)
+ break;
+ if (k++ > 1000) {
+ DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n",
+ sidx.idx, sidx.off, st->next.idx, st->next.off);
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d",
+ a, nh, curcpu);
+ k = 0;
+ }
+ sidx = st->next;
+ }
+
+ if (sidx.idx == 0) {
+ aaddr = 0;
+ st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr);
+ if (st == NULL) {
+ /* No free states. Request more if we can */
+ if (nh->pg_used >= cfg->max_chunks) {
+ /* Limit reached */
+ NAT64STAT_INC(&cfg->stats, dropped);
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_DROPS, "PG limit reached "
+ " for host %s (used %u, allocated %u, "
+ "limit %u)", a,
+ nh->pg_used * NAT64_CHUNK_SIZE,
+ nh->pg_allocated * NAT64_CHUNK_SIZE,
+ cfg->max_chunks * NAT64_CHUNK_SIZE);
+ m_freem(*pm);
+ *pm = NULL;
+ NAT64_UNLOCK(nh);
+ return (IP_FW_DENY);
+ }
+ if ((nh->pg_allocated <=
+ nh->pg_used + NAT64LSN_REMAININGPG) &&
+ nh->pg_allocated < cfg->max_chunks)
+ action = 1; /* Request new indexes */
+ else
+ action = 0;
+ NAT64_UNLOCK(nh);
+ //DPRINTF("No state, unlock for %p", nh);
+ return (nat64lsn_request_portgroup(cfg, f_id,
+ pm, aaddr, action));
+ }
+
+ /* We've got new state. */
+ sidx = st->cur;
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ }
+
+ /* Okay, state found */
+
+ /* Update necessary fileds */
+ if (st->timestamp != state_ts)
+ st->timestamp = state_ts;
+ if ((st->flags & state_flags) != 0)
+ st->flags |= state_flags;
+
+ /* Copy needed state data */
+ aaddr = pg->aaddr;
+ aport = htons(pg->aport + sidx.off);
+
+ NAT64_UNLOCK(nh);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off);
+ } else
+ logdata = NULL;
+
+ action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->stats, logdata);
+ if (action == NAT64SKIP)
+ return (IP_FW_PASS);
+ if (action == NAT64MFREE)
+ m_freem(*pm);
+ *pm = NULL; /* mark mbuf as consumed */
+ return (IP_FW_DENY);
+}
+
+/*
+ * Main dataplane entry point.
+ */
+int
+ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+ ipfw_insn *icmd;
+ struct nat64lsn_cfg *cfg;
+ int ret;
+
+ IPFW_RLOCK_ASSERT(ch);
+
+ *done = 1; /* terminate the search */
+ icmd = cmd + 1;
+ if (cmd->opcode != O_EXTERNAL_ACTION ||
+ cmd->arg1 != V_nat64lsn_eid ||
+ icmd->opcode != O_EXTERNAL_INSTANCE ||
+ (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
+ return (0);
+
+ switch (args->f_id.addr_type) {
+ case 4:
+ ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
+ break;
+ case 6:
+ ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
+ break;
+ default:
+ return (0);
+ }
+ return (ret);
+}
+
+static int
+nat64lsn_ctor_host(void *mem, int size, void *arg, int flags)
+{
+ struct nat64lsn_host *nh;
+
+ nh = (struct nat64lsn_host *)mem;
+ memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr));
+ memset(nh->phash, 0, sizeof(nh->phash));
+ return (0);
+}
+
+static int
+nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags)
+{
+
+ memset(mem, 0, size);
+ return (0);
+}
+
+void
+nat64lsn_init_internal(void)
+{
+
+ memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map));
+ /* Set up supported protocol map */
+ nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP;
+ nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP;
+ nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP;
+ nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP;
+ /* Fill in reverse proto map */
+ memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map));
+ nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP;
+ nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP;
+ nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6;
+
+ JQUEUE_LOCK_INIT();
+ nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone",
+ sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, 0);
+ nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone",
+ sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone",
+ sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK,
+ nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+
+void
+nat64lsn_uninit_internal(void)
+{
+
+ JQUEUE_LOCK_DESTROY();
+ uma_zdestroy(nat64lsn_host_zone);
+ uma_zdestroy(nat64lsn_pg_zone);
+ uma_zdestroy(nat64lsn_pgidx_zone);
+}
+
+void
+nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
+{
+
+ callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
+ nat64lsn_periodic, cfg);
+}
+
+struct nat64lsn_cfg *
+nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr)
+{
+ struct nat64lsn_cfg *cfg;
+
+ cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO);
+ TAILQ_INIT(&cfg->jhead);
+ cfg->vp = curvnet;
+ cfg->ch = ch;
+ COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK);
+
+ cfg->ihsize = NAT64LSN_HSIZE;
+ cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ callout_init(&cfg->periodic, CALLOUT_MPSAFE);
+ callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
+
+ return (cfg);
+}
+
+/*
+ * Destroy all hosts callback.
+ * Called on module unload when all activity already finished, so
+ * can work without any locks.
+ */
+static NAT64NOINLINE int
+nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg)
+{
+ struct nat64lsn_portgroup *pg;
+ int i;
+
+ for (i = nh->pg_used; i > 0; i--) {
+ pg = PORTGROUP_BYSIDX(cfg, nh, i);
+ if (pg == NULL)
+ continue;
+ cfg->pg[pg->idx] = NULL;
+ destroy_portgroup(pg);
+ nh->pg_used--;
+ }
+ destroy_host6(nh);
+ cfg->ihcount--;
+ return (0);
+}
+
+void
+nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
+{
+ struct nat64lsn_host *nh, *tmp;
+
+ JQUEUE_LOCK();
+ callout_drain(&cfg->jcallout);
+ JQUEUE_UNLOCK();
+
+ callout_drain(&cfg->periodic);
+ I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg);
+ DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount);
+
+ COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS);
+ free(cfg->ih, M_IPFW);
+ free(cfg->pg, M_IPFW);
+ free(cfg, M_IPFW);
+}
+
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.h b/sys/netpfil/ipfw/nat64/nat64lsn.h
new file mode 100644
index 0000000..e6ceb1d
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64lsn.h
@@ -0,0 +1,351 @@
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64LSN_H_
+#define _IP_FW_NAT64LSN_H_
+
+#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */
+#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS)
+
+#define NAT64_MIN_PORT 1024
+#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)
+
+struct st_ptr {
+ uint8_t idx; /* index in nh->pg_ptr array.
+ * NOTE: it starts from 1.
+ */
+ uint8_t off;
+};
+#define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1)
+#define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY)
+#define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \
+ NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS)
+
+struct nat64lsn_portgroup;
+/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */
+struct nat64lsn_host {
+ struct rwlock h_lock; /* Host states lock */
+
+ struct in6_addr addr;
+ struct nat64lsn_host *next;
+ uint16_t timestamp; /* Last altered */
+ uint16_t hsize; /* ports hash size */
+ uint16_t pg_used; /* Number of portgroups used */
+#define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before
+ * requesting of new chunk of indexes.
+ */
+ uint16_t pg_allocated; /* Number of portgroups indexes
+ * allocated.
+ */
+#define NAT64LSN_HSIZE 64
+ struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */
+ /*
+ * PG indexes are stored in chunks with 32 elements.
+ * The maximum count is limited to 255 due to st_ptr->idx is uint8_t.
+ */
+#define NAT64LSN_PGIDX_CHUNK 32
+#define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \
+ NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK)
+ struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */
+};
+
+#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED)
+#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED)
+
+#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock)
+#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock)
+#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock)
+#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock)
+#define NAT64_LOCK(h) NAT64_WLOCK(h)
+#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h)
+#define NAT64_LOCK_INIT(h) do { \
+ rw_init(&(h)->h_lock, "NAT64 host lock"); \
+ } while (0)
+
+#define NAT64_LOCK_DESTROY(h) do { \
+ rw_destroy(&(h)->h_lock); \
+ } while (0)
+
+/* Internal proto index */
+#define NAT_PROTO_TCP 1
+#define NAT_PROTO_UDP 2
+#define NAT_PROTO_ICMP 3
+
+#define NAT_MAX_PROTO 4
+extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+
+VNET_DECLARE(uint16_t, nat64lsn_eid);
+#define V_nat64lsn_eid VNET(nat64lsn_eid)
+#define IPFW_TLV_NAT64LSN_NAME IPFW_TLV_EACTION_NAME(V_nat64lsn_eid)
+
+/* Timestamp macro */
+#define _CT ((int)time_uptime % 65536)
+#define SET_AGE(x) (x) = _CT
+#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \
+ (int)65536 + _CT - (x))
+
+#ifdef __LP64__
+/* ffsl() is capable of checking 64-bit ints */
+#define _FFS64
+#endif
+
+/* 16 bytes */
+struct nat64lsn_state {
+ union {
+ struct {
+ in_addr_t faddr; /* Remote IPv4 address */
+ uint16_t fport; /* Remote IPv4 port */
+ uint16_t lport; /* Local IPv6 port */
+ }s;
+ uint64_t hkey;
+ } u;
+ uint8_t nat_proto;
+ uint8_t flags;
+ uint16_t timestamp;
+ struct st_ptr cur; /* Index of portgroup in nat64lsn_host */
+ struct st_ptr next; /* Next entry index */
+};
+
+/*
+ * 1024+32 bytes per 64 states, used to store state
+ * AND for outside-in state lookup
+ */
+struct nat64lsn_portgroup {
+ struct nat64lsn_host *host; /* IPv6 source host info */
+ in_addr_t aaddr; /* Alias addr, network format */
+ uint16_t aport; /* Base port */
+ uint16_t timestamp;
+ uint8_t nat_proto;
+ uint8_t spare[3];
+ uint32_t idx;
+#ifdef _FFS64
+ uint64_t freemask; /* Mask of free entries */
+#else
+ uint32_t freemask[2]; /* Mask of free entries */
+#endif
+ struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */
+};
+#ifdef _FFS64
+#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx))
+#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx))
+#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx)))
+#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
+#define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask))
+#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0)
+#else
+#define PG_MARK_BUSY_IDX(_pg, _idx) \
+ (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
+#define PG_MARK_FREE_IDX(_pg, _idx) \
+ (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32))
+#define PG_IS_FREE_IDX(_pg, _idx) \
+ ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
+#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
+#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg)
+#define PG_IS_EMPTY(_pg) \
+ ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
+
+static inline int
+_pg_get_free_idx(const struct nat64lsn_portgroup *pg)
+{
+ int i;
+
+ if ((i = ffsl(pg->freemask[0])) != 0)
+ return (i);
+ if ((i = ffsl(pg->freemask[1])) != 0)
+ return (i + 32);
+ return (0);
+}
+
+#endif
+
+TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
+
+#define NAT64LSN_FLAGSMASK (NAT64_LOG)
+struct nat64lsn_cfg {
+ struct named_object no;
+ //struct nat64_exthost *ex; /* Pointer to external addr array */
+ struct nat64lsn_portgroup **pg; /* XXX: array of pointers */
+ struct nat64lsn_host **ih; /* Host hash */
+ uint32_t prefix4; /* IPv4 prefix */
+ uint32_t pmask4; /* IPv4 prefix mask */
+ uint32_t ihsize; /* IPv6 host hash size */
+ uint8_t plen4;
+ uint8_t plen6;
+ uint8_t nomatch_verdict;/* What to return to ipfw on no-match */
+ uint8_t nomatch_final; /* Exit outer loop? */
+ struct in6_addr prefix6; /* IPv6 prefix to embed IPv4 hosts */
+
+ uint32_t ihcount; /* Number of items in host hash */
+ int max_chunks; /* Max chunks per client */
+ int agg_prefix_len; /* Prefix length to count */
+ int agg_prefix_max; /* Max hosts per agg prefix */
+ uint32_t jmaxlen; /* Max jobqueue length */
+ uint32_t flags;
+ uint16_t min_chunk; /* Min port group # to use */
+ uint16_t max_chunk; /* Max port group # to use */
+ uint16_t nh_delete_delay; /* Stale host delete delay */
+ uint16_t pg_delete_delay; /* Stale portgroup del delay */
+ uint16_t st_syn_ttl; /* TCP syn expire */
+ uint16_t st_close_ttl; /* TCP fin expire */
+ uint16_t st_estab_ttl; /* TCP established expire */
+ uint16_t st_udp_ttl; /* UDP expire */
+ uint16_t st_icmp_ttl; /* ICMP expire */
+ uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */
+
+ struct callout periodic;
+ struct callout jcallout;
+ struct ip_fw_chain *ch;
+ struct vnet *vp;
+ struct nat64lsn_job_head jhead;
+ int jlen;
+ char name[64]; /* Nat instance name */
+ nat64_stats_block stats;
+};
+
+struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
+ size_t numaddr);
+void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
+void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
+void nat64lsn_init_internal(void);
+void nat64lsn_uninit_internal(void);
+int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done);
+
+void
+nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
+ const char *px, int off);
+/*
+ * Portgroup layout
+ * addr x nat_proto x port_off
+ *
+ */
+
+#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS)
+#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
+
+#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4))
+#define __GET_PORTGROUP_IDX(_proto, _port) \
+ ((_proto - 1) * _ADDR_PG_PROTO_COUNT + \
+ ((_port) >> NAT64_CHUNK_SIZE_BITS))
+
+#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \
+ GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \
+ __GET_PORTGROUP_IDX(_proto, _port)
+#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \
+ ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)])
+
+#define PORTGROUP_CHUNK(_nh, _idx) \
+ ((_nh)->pg_ptr[(_idx)])
+#define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \
+ (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \
+ [((_idx) - 1) % NAT64LSN_PGIDX_CHUNK])
+
+
+/* Chained hash table */
+#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _x = _PX##first(_ph, _buck); \
+ for ( ; _x != NULL; _x = _PX##next(_x)) { \
+ if (_PX##cmp(_key, _PX##val(_x))) \
+ break; \
+ } \
+ if (_x == NULL) \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \
+ _PX##unlock(_ph, _buck);
+
+#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \
+ unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _PX##next(_i) = _PX##first(_ph, _buck); \
+ _PX##first(_ph, _buck) = _i; \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _x = _PX##first(_ph, _buck); \
+ _tmp = NULL; \
+ for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
+ if (_PX##cmp(_key, _PX##val(_x))) \
+ break; \
+ } \
+ if (_x != NULL) { \
+ if (_tmp == NULL) \
+ _PX##first(_ph, _buck) = _PX##next(_x); \
+ else \
+ _PX##next(_tmp) = _PX##next(_x); \
+ } \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \
+ for (unsigned int _i = 0; _i < _hsize; _i++) { \
+ _PX##lock(_ph, _i); \
+ _x = _PX##first(_ph, _i); \
+ _tmp = NULL; \
+ for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
+ if (_cb(_x, _arg) == 0) \
+ continue; \
+ if (_tmp == NULL) \
+ _PX##first(_ph, _i) = _PX##next(_x); \
+ else \
+ _tmp = _PX##next(_x); \
+ } \
+ _PX##unlock(_ph, _i); \
+ } \
+} while(0)
+
+#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \
+ unsigned int _buck; \
+ for (unsigned int _i = 0; _i < _hsize; _i++) { \
+ _x = _PX##first(_ph, _i); \
+ _y = _x; \
+ while (_y != NULL) { \
+ _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
+ _y = _PX##next(_x); \
+ _PX##next(_x) = _PX##first(_nph, _buck); \
+ _PX##first(_nph, _buck) = _x; \
+ } \
+ } \
+} while(0)
+
+#endif /* _IP_FW_NAT64LSN_H_ */
+
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn_control.c b/sys/netpfil/ipfw/nat64/nat64lsn_control.c
new file mode 100644
index 0000000..3d79085
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64lsn_control.c
@@ -0,0 +1,917 @@
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sockopt.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64lsn.h>
+#include <netinet6/ip_fw_nat64.h>
+
+VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
+
+static struct nat64lsn_cfg *
+nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+ struct nat64lsn_cfg *cfg;
+
+ cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+ IPFW_TLV_NAT64LSN_NAME, name);
+
+ return (cfg);
+}
+
+static void
+nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
+{
+
+ if (uc->max_ports == 0)
+ uc->max_ports = NAT64LSN_MAX_PORTS;
+ else
+ uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE);
+ if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR)
+ uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR;
+ if (uc->jmaxlen == 0)
+ uc->jmaxlen = NAT64LSN_JMAXLEN;
+ if (uc->jmaxlen > 65536)
+ uc->jmaxlen = 65536;
+ if (uc->nh_delete_delay == 0)
+ uc->nh_delete_delay = NAT64LSN_HOST_AGE;
+ if (uc->pg_delete_delay == 0)
+ uc->pg_delete_delay = NAT64LSN_PG_AGE;
+ if (uc->st_syn_ttl == 0)
+ uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
+ if (uc->st_close_ttl == 0)
+ uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
+ if (uc->st_estab_ttl == 0)
+ uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
+ if (uc->st_udp_ttl == 0)
+ uc->st_udp_ttl = NAT64LSN_UDP_AGE;
+ if (uc->st_icmp_ttl == 0)
+ uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
+}
+
+/*
+ * Creates new nat64lsn instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64lsn_cfg *uc;
+ struct nat64lsn_cfg *cfg;
+ struct namedobj_instance *ni;
+ uint32_t addr4, mask4;
+
+ if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)sd->kbuf;
+ uc = (ipfw_nat64lsn_cfg *)(olh + 1);
+
+ if (ipfw_check_object_name_generic(uc->name) != 0)
+ return (EINVAL);
+
+ if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ if (uc->plen4 > 32)
+ return (EINVAL);
+ if (uc->plen6 > 128 || ((uc->plen6 % 8) != 0))
+ return (EINVAL);
+
+ /* XXX: Check prefix4 to be global */
+ addr4 = ntohl(uc->prefix4.s_addr);
+ mask4 = ~((1 << (32 - uc->plen4)) - 1);
+ if ((addr4 & mask4) != addr4)
+ return (EINVAL);
+
+ /* XXX: Check prefix6 */
+ if (uc->min_port == 0)
+ uc->min_port = NAT64_MIN_PORT;
+ if (uc->max_port == 0)
+ uc->max_port = 65535;
+ if (uc->min_port > uc->max_port)
+ return (EINVAL);
+ uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE);
+ uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE);
+
+ nat64lsn_default_config(uc);
+
+ ni = CHAIN_TO_SRV(ch);
+ IPFW_UH_RLOCK(ch);
+ if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ IPFW_UH_RUNLOCK(ch);
+
+ cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4));
+ strlcpy(cfg->name, uc->name, sizeof(cfg->name));
+ cfg->no.name = cfg->name;
+ cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
+ cfg->no.set = uc->set;
+
+ cfg->prefix4 = addr4;
+ cfg->pmask4 = addr4 | ~mask4;
+ /* XXX: Copy 96 bits */
+ cfg->plen6 = 96;
+ memcpy(&cfg->prefix6, &uc->prefix6, cfg->plen6 / 8);
+ cfg->plen4 = uc->plen4;
+ cfg->flags = uc->flags & NAT64LSN_FLAGSMASK;
+ cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+ cfg->agg_prefix_len = uc->agg_prefix_len;
+ cfg->agg_prefix_max = uc->agg_prefix_max;
+
+ cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE;
+ cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE;
+
+ cfg->jmaxlen = uc->jmaxlen;
+ cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->pg_delete_delay = uc->pg_delete_delay;
+ cfg->st_syn_ttl = uc->st_syn_ttl;
+ cfg->st_close_ttl = uc->st_close_ttl;
+ cfg->st_estab_ttl = uc->st_estab_ttl;
+ cfg->st_udp_ttl = uc->st_udp_ttl;
+ cfg->st_icmp_ttl = uc->st_icmp_ttl;
+
+ cfg->nomatch_verdict = IP_FW_DENY;
+ cfg->nomatch_final = 1; /* Exit outer loop by default */
+
+ IPFW_UH_WLOCK(ch);
+
+ if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64lsn_destroy_instance(cfg);
+ return (EEXIST);
+ }
+
+ if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64lsn_destroy_instance(cfg);
+ return (ENOSPC);
+ }
+ ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
+
+ /* Okay, let's link data */
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+ IPFW_WUNLOCK(ch);
+
+ nat64lsn_start_instance(cfg);
+
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static void
+nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+ ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+}
+
+/*
+ * Destroys nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)op3;
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ if (cfg->no.refcnt > 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+
+ nat64lsn_detach_config(ch, cfg);
+ IPFW_UH_WUNLOCK(ch);
+
+ nat64lsn_destroy_instance(cfg);
+ return (0);
+}
+
+#define __COPY_STAT_FIELD(_cfg, _stats, _field) \
+ (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
+ struct ipfw_nat64lsn_stats *stats)
+{
+
+ __COPY_STAT_FIELD(cfg, stats, opcnt64);
+ __COPY_STAT_FIELD(cfg, stats, opcnt46);
+ __COPY_STAT_FIELD(cfg, stats, ofrags);
+ __COPY_STAT_FIELD(cfg, stats, ifrags);
+ __COPY_STAT_FIELD(cfg, stats, oerrors);
+ __COPY_STAT_FIELD(cfg, stats, noroute4);
+ __COPY_STAT_FIELD(cfg, stats, noroute6);
+ __COPY_STAT_FIELD(cfg, stats, nomatch4);
+ __COPY_STAT_FIELD(cfg, stats, noproto);
+ __COPY_STAT_FIELD(cfg, stats, nomem);
+ __COPY_STAT_FIELD(cfg, stats, dropped);
+
+ __COPY_STAT_FIELD(cfg, stats, jcalls);
+ __COPY_STAT_FIELD(cfg, stats, jrequests);
+ __COPY_STAT_FIELD(cfg, stats, jhostsreq);
+ __COPY_STAT_FIELD(cfg, stats, jportreq);
+ __COPY_STAT_FIELD(cfg, stats, jhostfails);
+ __COPY_STAT_FIELD(cfg, stats, jportfails);
+ __COPY_STAT_FIELD(cfg, stats, jmaxlen);
+ __COPY_STAT_FIELD(cfg, stats, jnomem);
+ __COPY_STAT_FIELD(cfg, stats, jreinjected);
+ __COPY_STAT_FIELD(cfg, stats, screated);
+ __COPY_STAT_FIELD(cfg, stats, sdeleted);
+ __COPY_STAT_FIELD(cfg, stats, spgcreated);
+ __COPY_STAT_FIELD(cfg, stats, spgdeleted);
+
+ stats->hostcount = cfg->ihcount;
+ stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP];
+ stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP];
+ stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP];
+}
+#undef __COPY_STAT_FIELD
+
+static void
+nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
+ ipfw_nat64lsn_cfg *uc)
+{
+
+ uc->flags = cfg->flags & NAT64LSN_FLAGSMASK;
+ uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE;
+ uc->agg_prefix_len = cfg->agg_prefix_len;
+ uc->agg_prefix_max = cfg->agg_prefix_max;
+
+ uc->jmaxlen = cfg->jmaxlen;
+ uc->nh_delete_delay = cfg->nh_delete_delay;
+ uc->pg_delete_delay = cfg->pg_delete_delay;
+ uc->st_syn_ttl = cfg->st_syn_ttl;
+ uc->st_close_ttl = cfg->st_close_ttl;
+ uc->st_estab_ttl = cfg->st_estab_ttl;
+ uc->st_udp_ttl = cfg->st_udp_ttl;
+ uc->st_icmp_ttl = cfg->st_icmp_ttl;
+ uc->prefix4.s_addr = htonl(cfg->prefix4);
+ uc->prefix6 = cfg->prefix6;
+ uc->plen4 = cfg->plen4;
+ uc->plen6 = cfg->plen6;
+ uc->set = cfg->no.set;
+ strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+}
+
+struct nat64_dump_arg {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
+ ipfw_nat64lsn_cfg *uc;
+
+ uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
+ sizeof(*uc));
+ nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
+ return (0);
+}
+
+/*
+ * Lists all nat64 lsn instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ struct nat64_dump_arg da;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(ipfw_obj_lheader))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+ IPFW_UH_RLOCK(ch);
+ olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+ IPFW_TLV_NAT64LSN_NAME);
+ olh->objsize = sizeof(ipfw_nat64lsn_cfg);
+ olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+ if (sd->valsize < olh->size) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.sd = sd;
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
+ IPFW_TLV_NAT64LSN_NAME);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+/*
+ * Change existing nat64lsn instance configuration.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
+ * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ ipfw_nat64lsn_cfg *uc;
+ struct nat64lsn_cfg *cfg;
+ struct namedobj_instance *ni;
+
+ if (sd->valsize != sizeof(*oh) + sizeof(*uc))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
+ sizeof(*oh) + sizeof(*uc));
+ uc = (ipfw_nat64lsn_cfg *)(oh + 1);
+
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ ni = CHAIN_TO_SRV(ch);
+ if (sd->sopt->sopt_dir == SOPT_GET) {
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ nat64lsn_export_config(ch, cfg, uc);
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+ }
+
+ nat64lsn_default_config(uc);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EEXIST);
+ }
+
+ /*
+ * For now allow to change only following values:
+ * jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
+ * tcp_est_age, udp_age, icmp_age, flags, max_ports.
+ */
+
+ cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+ cfg->jmaxlen = uc->jmaxlen;
+ cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->pg_delete_delay = uc->pg_delete_delay;
+ cfg->st_syn_ttl = uc->st_syn_ttl;
+ cfg->st_close_ttl = uc->st_close_ttl;
+ cfg->st_estab_ttl = uc->st_estab_ttl;
+ cfg->st_udp_ttl = uc->st_udp_ttl;
+ cfg->st_icmp_ttl = uc->st_icmp_ttl;
+ cfg->flags = uc->flags & NAT64LSN_FLAGSMASK;
+
+ IPFW_UH_WUNLOCK(ch);
+
+ return (0);
+}
+
+/*
+ * Get nat64lsn statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct ipfw_nat64lsn_stats stats;
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *ctlv;
+ size_t sz;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+ if (sd->valsize % sizeof(uint64_t))
+ return (EINVAL);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+ memset(&stats, 0, sizeof(stats));
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ export_stats(ch, cfg, &stats);
+ IPFW_UH_RUNLOCK(ch);
+
+ ctlv = (ipfw_obj_ctlv *)(oh + 1);
+ memset(ctlv, 0, sizeof(*ctlv));
+ ctlv->head.type = IPFW_TLV_COUNTERS;
+ ctlv->head.length = sz - sizeof(ipfw_obj_header);
+ ctlv->count = sizeof(stats) / sizeof(uint64_t);
+ ctlv->objsize = sizeof(uint64_t);
+ ctlv->version = IPFW_NAT64_VERSION;
+ memcpy(ctlv + 1, &stats, sizeof(stats));
+ return (0);
+}
+
+/*
+ * Reset nat64lsn statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS);
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+/*
+ * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
+ * ipfw_nat64lsn_state x count, ... ] ]
+ */
+static int
+export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg,
+ ipfw_nat64lsn_stg *stg, struct sockopt_data *sd)
+{
+ ipfw_nat64lsn_state *ste;
+ struct nat64lsn_state *st;
+ int i, count;
+
+ NAT64_LOCK(pg->host);
+ count = 0;
+ for (i = 0; i < 64; i++) {
+ if (PG_IS_BUSY_IDX(pg, i))
+ count++;
+ }
+ DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count);
+
+ if (count == 0) {
+ stg->count = 0;
+ NAT64_UNLOCK(pg->host);
+ return (0);
+ }
+ ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd,
+ count * sizeof(ipfw_nat64lsn_state));
+ if (ste == NULL) {
+ NAT64_UNLOCK(pg->host);
+ return (1);
+ }
+
+ stg->alias4.s_addr = pg->aaddr;
+ stg->proto = nat64lsn_rproto_map[pg->nat_proto];
+ stg->flags = 0;
+ stg->host6 = pg->host->addr;
+ stg->count = count;
+ for (i = 0; i < 64; i++) {
+ if (PG_IS_FREE_IDX(pg, i))
+ continue;
+ st = &pg->states[i];
+ ste->daddr.s_addr = st->u.s.faddr;
+ ste->dport = st->u.s.fport;
+ ste->aport = pg->aport + i;
+ ste->sport = st->u.s.lport;
+ ste->flags = st->flags; /* XXX filter flags */
+ ste->idle = GET_AGE(st->timestamp);
+ ste++;
+ }
+ NAT64_UNLOCK(pg->host);
+
+ return (0);
+}
+
+static int
+get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+
+ if (*port < 65536 - NAT64_CHUNK_SIZE) {
+ *port += NAT64_CHUNK_SIZE;
+ return (0);
+ }
+ *port = 0;
+
+ if (*nat_proto < NAT_MAX_PROTO - 1) {
+ *nat_proto += 1;
+ return (0);
+ }
+ *nat_proto = 1;
+
+ if (*addr < cfg->pmask4) {
+ *addr += 1;
+ return (0);
+ }
+
+ /* End of space. */
+ return (1);
+}
+
+#define PACK_IDX(addr, proto, port) \
+ ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8)
+#define UNPACK_IDX(idx, addr, proto, port) \
+ (addr) = (uint32_t)((idx) >> 32); \
+ (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \
+ (proto) = (uint8_t)(((idx) >> 8) & 0xFF)
+
+static struct nat64lsn_portgroup *
+get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+ struct nat64lsn_portgroup *pg;
+ uint64_t pre_pack, post_pack;
+
+ pg = NULL;
+ pre_pack = PACK_IDX(*addr, *nat_proto, *port);
+ for (;;) {
+ if (get_next_idx(cfg, addr, nat_proto, port) != 0) {
+ /* End of states */
+ return (pg);
+ }
+
+ pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
+ if (pg != NULL)
+ break;
+ }
+
+ post_pack = PACK_IDX(*addr, *nat_proto, *port);
+ if (pre_pack == post_pack)
+ DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d",
+ *addr, *nat_proto, *port);
+ return (pg);
+}
+
+static NAT64NOINLINE struct nat64lsn_portgroup *
+get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+ struct nat64lsn_portgroup *pg;
+
+ pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
+ if (pg == NULL)
+ pg = get_next_pg(cfg, addr, nat_proto, port);
+
+ return (pg);
+}
+
+/*
+ * Lists nat64lsn states.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
+ * Reply: [ ipfw_obj_header ipfw_obj_data [
+ * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ ipfw_obj_data *od;
+ ipfw_nat64lsn_stg *stg;
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_portgroup *pg, *pg_next;
+ uint64_t next_idx;
+ size_t sz;
+ uint32_t addr, states;
+ uint16_t port;
+ uint8_t nat_proto;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
+ sizeof(uint64_t);
+ /* Check minimum header size */
+ if (sd->valsize < sz)
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+ od = (ipfw_obj_data *)(oh + 1);
+ if (od->head.type != IPFW_TLV_OBJDATA ||
+ od->head.length != sz - sizeof(ipfw_obj_header))
+ return (EINVAL);
+
+ next_idx = *(uint64_t *)(od + 1);
+ /* Translate index to the request position to start from */
+ UNPACK_IDX(next_idx, addr, nat_proto, port);
+ if (nat_proto >= NAT_MAX_PROTO)
+ return (EINVAL);
+ if (nat_proto == 0 && addr != 0)
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+ /* Fill in starting point */
+ if (addr == 0) {
+ addr = cfg->prefix4;
+ nat_proto = 1;
+ port = 0;
+ }
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ IPFW_UH_RUNLOCK(ch);
+ DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u",
+ (uintmax_t)next_idx, addr, cfg->pmask4);
+ return (EINVAL);
+ }
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
+ sizeof(ipfw_nat64lsn_stg);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
+ od = (ipfw_obj_data *)(oh + 1);
+ od->head.type = IPFW_TLV_OBJDATA;
+ od->head.length = sz - sizeof(ipfw_obj_header);
+ stg = (ipfw_nat64lsn_stg *)(od + 1);
+
+ pg = get_first_pg(cfg, &addr, &nat_proto, &port);
+ if (pg == NULL) {
+ /* No states */
+ stg->next_idx = 0xFF;
+ stg->count = 0;
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+ }
+ states = 0;
+ pg_next = NULL;
+ while (pg != NULL) {
+ pg_next = get_next_pg(cfg, &addr, &nat_proto, &port);
+ if (pg_next == NULL)
+ stg->next_idx = 0xFF;
+ else
+ stg->next_idx = PACK_IDX(addr, nat_proto, port);
+
+ if (export_pg_states(cfg, pg, stg, sd) != 0) {
+ IPFW_UH_RUNLOCK(ch);
+ return (states == 0 ? ENOMEM: 0);
+ }
+ states += stg->count;
+ od->head.length += stg->count * sizeof(ipfw_nat64lsn_state);
+ sz += stg->count * sizeof(ipfw_nat64lsn_state);
+ if (pg_next != NULL) {
+ sz += sizeof(ipfw_nat64lsn_stg);
+ if (sd->valsize < sz)
+ break;
+ stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd,
+ sizeof(ipfw_nat64lsn_stg));
+ }
+ pg = pg_next;
+ }
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+ { IP_FW_NAT64LSN_CREATE, 0, HDIR_BOTH, nat64lsn_create },
+ { IP_FW_NAT64LSN_DESTROY,0, HDIR_SET, nat64lsn_destroy },
+ { IP_FW_NAT64LSN_CONFIG, 0, HDIR_BOTH, nat64lsn_config },
+ { IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list },
+ { IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats },
+ { IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats },
+ { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states },
+};
+
+static int
+nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn *icmd;
+
+ icmd = cmd - 1;
+ if (icmd->opcode != O_EXTERNAL_ACTION ||
+ icmd->arg1 != V_nat64lsn_eid)
+ return (1);
+
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+}
+
+static int
+nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ int err;
+
+ err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+ IPFW_TLV_NAT64LSN_NAME, pno);
+ return (err);
+}
+
+static struct named_object *
+nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_SRV(ch);
+ no = ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
+
+ return (no);
+}
+
+static int
+nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+ enum ipfw_sets_cmd cmd)
+{
+
+ return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
+ set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ .opcode = O_EXTERNAL_INSTANCE,
+ .etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+ .classifier = nat64lsn_classify,
+ .update = nat64lsn_update_arg1,
+ .find_byname = nat64lsn_findbyname,
+ .find_bykidx = nat64lsn_findbykidx,
+ .manage_sets = nat64lsn_manage_sets,
+ },
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64lsn_cfg *cfg;
+ struct ip_fw_chain *ch;
+
+ ch = (struct ip_fw_chain *)arg;
+ cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ nat64lsn_detach_config(ch, cfg);
+ nat64lsn_destroy_instance(cfg);
+ return (0);
+}
+
+int
+nat64lsn_init(struct ip_fw_chain *ch, int first)
+{
+
+ if (first != 0)
+ nat64lsn_init_internal();
+ V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
+ if (V_nat64lsn_eid == 0)
+ return (ENXIO);
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
+ return (0);
+}
+
+void
+nat64lsn_uninit(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+ ipfw_del_eaction(ch, V_nat64lsn_eid);
+ /*
+ * Since we already have deregistered external action,
+ * our named objects become unaccessible via rules, because
+ * all rules were truncated by ipfw_del_eaction().
+ * So, we can unlink and destroy our named objects without holding
+ * IPFW_WLOCK().
+ */
+ IPFW_UH_WLOCK(ch);
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+ IPFW_TLV_NAT64LSN_NAME);
+ V_nat64lsn_eid = 0;
+ IPFW_UH_WUNLOCK(ch);
+ if (last != 0)
+ nat64lsn_uninit_internal();
+}
+
diff --git a/sys/netpfil/ipfw/nat64/nat64stl.c b/sys/netpfil/ipfw/nat64/nat64stl.c
new file mode 100644
index 0000000..3a13aba
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64stl.c
@@ -0,0 +1,262 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/ip_fw_nat64.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <netpfil/ipfw/nat64/nat64stl.h>
+#include <netpfil/pf/pf.h>
+
+#define NAT64_LOOKUP(chain, cmd) \
+ (struct nat64stl_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+
+static void
+nat64stl_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
+ uint32_t kidx)
+{
+ static uint32_t pktid = 0;
+
+ memset(plog, 0, sizeof(*plog));
+ plog->length = PFLOG_REAL_HDRLEN;
+ plog->af = family;
+ plog->action = PF_NAT;
+ plog->dir = PF_IN;
+ plog->rulenr = htonl(kidx);
+ plog->subrulenr = htonl(++pktid);
+ plog->ruleset[0] = '\0';
+ strlcpy(plog->ifname, "NAT64STL", sizeof(plog->ifname));
+ ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
+
+static int
+nat64stl_handle_ip4(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m, uint32_t tablearg)
+{
+ struct pfloghdr loghdr, *logdata;
+ struct in6_addr saddr, daddr;
+ struct ip *ip;
+
+ ip = mtod(m, struct ip*);
+ if (nat64_check_ip4(ip->ip_src.s_addr) != 0 ||
+ nat64_check_ip4(ip->ip_dst.s_addr) != 0 ||
+ nat64_check_private_ip4(ip->ip_src.s_addr) != 0 ||
+ nat64_check_private_ip4(ip->ip_dst.s_addr) != 0)
+ return (NAT64SKIP);
+
+ daddr = TARG_VAL(chain, tablearg, nh6);
+ if (nat64_check_ip6(&daddr) != 0)
+ return (NAT64MFREE);
+ saddr = cfg->prefix6;
+ nat64_set_ip4(&saddr, ip->ip_src.s_addr);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64stl_log(logdata, m, AF_INET, cfg->no.kidx);
+ } else
+ logdata = NULL;
+ return (nat64_do_handle_ip4(m, &saddr, &daddr, 0, &cfg->stats,
+ logdata));
+}
+
+static int
+nat64stl_handle_ip6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m, uint32_t tablearg)
+{
+ struct pfloghdr loghdr, *logdata;
+ struct ip6_hdr *ip6;
+ uint32_t aaddr;
+
+ aaddr = htonl(TARG_VAL(chain, tablearg, nh4));
+
+ /*
+ * NOTE: we expect ipfw_chk() did m_pullup() up to upper level
+ * protocol's headers. Also we skip some checks, that ip6_input(),
+ * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ /* Check ip6_dst matches configured prefix */
+ if (bcmp(&ip6->ip6_dst, &cfg->prefix6, cfg->plen6 / 8) != 0)
+ return (NAT64SKIP);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx);
+ } else
+ logdata = NULL;
+ return (nat64_do_handle_ip6(m, aaddr, 0, &cfg->stats, logdata));
+}
+
+static int
+nat64stl_handle_icmp6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m)
+{
+ struct pfloghdr loghdr, *logdata;
+ nat64_stats_block *stats;
+ struct ip6_hdr *ip6i;
+ struct icmp6_hdr *icmp6;
+ uint32_t tablearg;
+ int hlen, proto;
+
+ hlen = 0;
+ stats = &cfg->stats;
+ proto = nat64_getlasthdr(m, &hlen);
+ if (proto != IPPROTO_ICMPV6) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ icmp6 = mtodo(m, hlen);
+ switch (icmp6->icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ case ICMP6_PACKET_TOO_BIG:
+ case ICMP6_TIME_EXCEED_TRANSIT:
+ case ICMP6_PARAM_PROB:
+ break;
+ default:
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ hlen += sizeof(struct icmp6_hdr);
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
+ m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (NAT64RETURN);
+ }
+ /*
+ * Use destination address from inner IPv6 header to determine
+ * IPv4 mapped address.
+ */
+ ip6i = mtodo(m, hlen);
+ if (ipfw_lookup_table(chain, cfg->map64,
+ sizeof(struct in6_addr), &ip6i->ip6_dst, &tablearg) == 0) {
+ m_freem(m);
+ return (NAT64RETURN);
+ }
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx);
+ } else
+ logdata = NULL;
+ return (nat64_handle_icmp6(m, 0,
+ htonl(TARG_VAL(chain, tablearg, nh4)), 0, stats, logdata));
+}
+
+int
+ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+ ipfw_insn *icmd;
+ struct nat64stl_cfg *cfg;
+ in_addr_t dst4;
+ uint32_t tablearg;
+ int ret;
+
+ IPFW_RLOCK_ASSERT(chain);
+
+ *done = 0; /* try next rule if not matched */
+ icmd = cmd + 1;
+ if (cmd->opcode != O_EXTERNAL_ACTION ||
+ cmd->arg1 != V_nat64stl_eid ||
+ icmd->opcode != O_EXTERNAL_INSTANCE ||
+ (cfg = NAT64_LOOKUP(chain, icmd)) == NULL)
+ return (0);
+
+ switch (args->f_id.addr_type) {
+ case 4:
+ dst4 = htonl(args->f_id.dst_ip);
+ ret = ipfw_lookup_table(chain, cfg->map46, sizeof(in_addr_t),
+ &dst4, &tablearg);
+ break;
+ case 6:
+ ret = ipfw_lookup_table(chain, cfg->map64,
+ sizeof(struct in6_addr), &args->f_id.src_ip6, &tablearg);
+ break;
+ default:
+ return (0);
+ }
+ if (ret == 0) {
+ /*
+ * In case when packet is ICMPv6 message from an intermediate
+ * router, the source address of message will not match the
+ * addresses from our map64 table.
+ */
+ if (args->f_id.proto != IPPROTO_ICMPV6)
+ return (0);
+
+ ret = nat64stl_handle_icmp6(chain, cfg, args->m);
+ } else {
+ if (args->f_id.addr_type == 4)
+ ret = nat64stl_handle_ip4(chain, cfg, args->m,
+ tablearg);
+ else
+ ret = nat64stl_handle_ip6(chain, cfg, args->m,
+ tablearg);
+ }
+ if (ret == NAT64SKIP)
+ return (0);
+
+ *done = 1; /* terminate the search */
+ if (ret == NAT64MFREE)
+ m_freem(args->m);
+ args->m = NULL;
+ return (IP_FW_DENY);
+}
+
+
diff --git a/sys/netpfil/ipfw/nat64/nat64stl.h b/sys/netpfil/ipfw/nat64/nat64stl.h
new file mode 100644
index 0000000..42ec20e
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64stl.h
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64STL_H_
+#define _IP_FW_NAT64STL_H_
+
+struct nat64stl_cfg {
+ struct named_object no;
+
+ uint16_t map64; /* table with 6to4 mapping */
+ uint16_t map46; /* table with 4to6 mapping */
+
+ struct in6_addr prefix6;/* IPv6 prefix */
+ uint8_t plen6; /* prefix length */
+ uint8_t flags; /* flags for internal use */
+#define NAT64STL_KIDX 0x0100
+#define NAT64STL_46T 0x0200
+#define NAT64STL_64T 0x0400
+#define NAT64STL_FLAGSMASK (NAT64_LOG) /* flags to pass to userland */
+ char name[64];
+ nat64_stats_block stats;
+};
+
+VNET_DECLARE(uint16_t, nat64stl_eid);
+#define V_nat64stl_eid VNET(nat64stl_eid)
+#define IPFW_TLV_NAT64STL_NAME IPFW_TLV_EACTION_NAME(V_nat64stl_eid)
+
+int ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done);
+
+#endif
+
diff --git a/sys/netpfil/ipfw/nat64/nat64stl_control.c b/sys/netpfil/ipfw/nat64/nat64stl_control.c
new file mode 100644
index 0000000..d8599d9
--- /dev/null
+++ b/sys/netpfil/ipfw/nat64/nat64stl_control.c
@@ -0,0 +1,621 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sockopt.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/pfil.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64stl.h>
+#include <netinet6/ip_fw_nat64.h>
+
+VNET_DEFINE(uint16_t, nat64stl_eid) = 0;
+
+static struct nat64stl_cfg *nat64stl_alloc_config(const char *name, uint8_t set);
+static void nat64stl_free_config(struct nat64stl_cfg *cfg);
+static struct nat64stl_cfg *nat64stl_find(struct namedobj_instance *ni,
+ const char *name, uint8_t set);
+
+static struct nat64stl_cfg *
+nat64stl_alloc_config(const char *name, uint8_t set)
+{
+ struct nat64stl_cfg *cfg;
+
+ cfg = malloc(sizeof(struct nat64stl_cfg), M_IPFW, M_WAITOK | M_ZERO);
+ COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK);
+ cfg->no.name = cfg->name;
+ cfg->no.etlv = IPFW_TLV_NAT64STL_NAME;
+ cfg->no.set = set;
+ strlcpy(cfg->name, name, sizeof(cfg->name));
+ return (cfg);
+}
+
+static void
+nat64stl_free_config(struct nat64stl_cfg *cfg)
+{
+
+ COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS);
+ free(cfg, M_IPFW);
+}
+
+static void
+nat64stl_export_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ ipfw_nat64stl_cfg *uc)
+{
+ struct named_object *no;
+
+ uc->prefix6 = cfg->prefix6;
+ uc->plen6 = cfg->plen6;
+ uc->flags = cfg->flags & NAT64STL_FLAGSMASK;
+ uc->set = cfg->no.set;
+ strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+
+ no = ipfw_objhash_lookup_table_kidx(ch, cfg->map64);
+ ipfw_export_obj_ntlv(no, &uc->ntlv6);
+ no = ipfw_objhash_lookup_table_kidx(ch, cfg->map46);
+ ipfw_export_obj_ntlv(no, &uc->ntlv4);
+}
+
+struct nat64stl_dump_arg {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64stl_dump_arg *da = (struct nat64stl_dump_arg *)arg;
+ ipfw_nat64stl_cfg *uc;
+
+ uc = (ipfw_nat64stl_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc));
+ nat64stl_export_config(da->ch, (struct nat64stl_cfg *)no, uc);
+ return (0);
+}
+
+static struct nat64stl_cfg *
+nat64stl_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+ struct nat64stl_cfg *cfg;
+
+ cfg = (struct nat64stl_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+ IPFW_TLV_NAT64STL_NAME, name);
+
+ return (cfg);
+}
+
+
+static int
+nat64stl_create_internal(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ ipfw_nat64stl_cfg *i)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0)
+ return (ENOSPC);
+ cfg->flags |= NAT64STL_KIDX;
+
+ if (ipfw_ref_table(ch, &i->ntlv4, &cfg->map46) != 0)
+ return (EINVAL);
+ cfg->flags |= NAT64STL_46T;
+
+ if (ipfw_ref_table(ch, &i->ntlv6, &cfg->map64) != 0)
+ return (EINVAL);
+ cfg->flags |= NAT64STL_64T;
+
+ ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
+
+ return (0);
+}
+
+/*
+ * Creates new nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64stl_cfg *uc;
+ struct namedobj_instance *ni;
+ struct nat64stl_cfg *cfg;
+ int error;
+
+ if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)sd->kbuf;
+ uc = (ipfw_nat64stl_cfg *)(olh + 1);
+
+ if (ipfw_check_object_name_generic(uc->name) != 0)
+ return (EINVAL);
+ if (!IN6_IS_ADDR_WKPFX(&uc->prefix6))
+ return (EINVAL);
+ if (uc->plen6 != 96 || uc->set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ /* XXX: check types of tables */
+
+ ni = CHAIN_TO_SRV(ch);
+ error = 0;
+
+ IPFW_UH_RLOCK(ch);
+ if (nat64stl_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ IPFW_UH_RUNLOCK(ch);
+
+ cfg = nat64stl_alloc_config(uc->name, uc->set);
+ cfg->prefix6 = uc->prefix6;
+ cfg->plen6 = uc->plen6;
+ cfg->flags = uc->flags & NAT64STL_FLAGSMASK;
+
+ IPFW_UH_WLOCK(ch);
+
+ if (nat64stl_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64stl_free_config(cfg);
+ return (EEXIST);
+ }
+ error = nat64stl_create_internal(ch, cfg, uc);
+ if (error == 0) {
+ /* Okay, let's link data */
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+ IPFW_WUNLOCK(ch);
+
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+ }
+
+ if (cfg->flags & NAT64STL_KIDX)
+ ipfw_objhash_free_idx(ni, cfg->no.kidx);
+ if (cfg->flags & NAT64STL_46T)
+ ipfw_unref_table(ch, cfg->map46);
+ if (cfg->flags & NAT64STL_64T)
+ ipfw_unref_table(ch, cfg->map64);
+
+ IPFW_UH_WUNLOCK(ch);
+ nat64stl_free_config(cfg);
+ return (error);
+}
+
+/*
+ * Change existing nat64stl instance configuration.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_nat64stl_cfg ]
+ * Reply: [ ipfw_obj_header ipfw_nat64stl_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ ipfw_nat64stl_cfg *uc;
+ struct nat64stl_cfg *cfg;
+ struct namedobj_instance *ni;
+
+ if (sd->valsize != sizeof(*oh) + sizeof(*uc))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
+ sizeof(*oh) + sizeof(*uc));
+ uc = (ipfw_nat64stl_cfg *)(oh + 1);
+
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ ni = CHAIN_TO_SRV(ch);
+ if (sd->sopt->sopt_dir == SOPT_GET) {
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ nat64stl_export_config(ch, cfg, uc);
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+ }
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EEXIST);
+ }
+
+ /*
+ * For now allow to change only following values:
+ * flags.
+ */
+
+ cfg->flags = uc->flags & NAT64STL_FLAGSMASK;
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static void
+nat64stl_detach_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+ ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+ ipfw_unref_table(ch, cfg->map46);
+ ipfw_unref_table(ch, cfg->map64);
+}
+
+/*
+ * Destroys nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ struct nat64stl_cfg *cfg;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ if (cfg->no.refcnt > 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+
+ nat64stl_detach_config(ch, cfg);
+ IPFW_UH_WUNLOCK(ch);
+
+ nat64stl_free_config(cfg);
+ return (0);
+}
+
+/*
+ * Lists all nat64stl instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ struct nat64stl_dump_arg da;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(ipfw_obj_lheader))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+ IPFW_UH_RLOCK(ch);
+ olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+ IPFW_TLV_NAT64STL_NAME);
+ olh->objsize = sizeof(ipfw_nat64stl_cfg);
+ olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+ if (sd->valsize < olh->size) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.sd = sd;
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb,
+ &da, IPFW_TLV_NAT64STL_NAME);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+#define __COPY_STAT_FIELD(_cfg, _stats, _field) \
+ (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ struct ipfw_nat64stl_stats *stats)
+{
+
+ __COPY_STAT_FIELD(cfg, stats, opcnt64);
+ __COPY_STAT_FIELD(cfg, stats, opcnt46);
+ __COPY_STAT_FIELD(cfg, stats, ofrags);
+ __COPY_STAT_FIELD(cfg, stats, ifrags);
+ __COPY_STAT_FIELD(cfg, stats, oerrors);
+ __COPY_STAT_FIELD(cfg, stats, noroute4);
+ __COPY_STAT_FIELD(cfg, stats, noroute6);
+ __COPY_STAT_FIELD(cfg, stats, noproto);
+ __COPY_STAT_FIELD(cfg, stats, nomem);
+ __COPY_STAT_FIELD(cfg, stats, dropped);
+}
+
+/*
+ * Get nat64stl statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct ipfw_nat64stl_stats stats;
+ struct nat64stl_cfg *cfg;
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *ctlv;
+ size_t sz;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+ if (sd->valsize % sizeof(uint64_t))
+ return (EINVAL);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+ memset(&stats, 0, sizeof(stats));
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+ export_stats(ch, cfg, &stats);
+ IPFW_UH_RUNLOCK(ch);
+
+ ctlv = (ipfw_obj_ctlv *)(oh + 1);
+ memset(ctlv, 0, sizeof(*ctlv));
+ ctlv->head.type = IPFW_TLV_COUNTERS;
+ ctlv->head.length = sz - sizeof(ipfw_obj_header);
+ ctlv->count = sizeof(stats) / sizeof(uint64_t);
+ ctlv->objsize = sizeof(uint64_t);
+ ctlv->version = IPFW_NAT64_VERSION;
+ memcpy(ctlv + 1, &stats, sizeof(stats));
+ return (0);
+}
+
+/*
+ * Reset nat64stl statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct nat64stl_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS);
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+
+ { IP_FW_NAT64STL_CREATE, 0, HDIR_SET, nat64stl_create },
+ { IP_FW_NAT64STL_DESTROY,0, HDIR_SET, nat64stl_destroy },
+ { IP_FW_NAT64STL_CONFIG, 0, HDIR_BOTH, nat64stl_config },
+ { IP_FW_NAT64STL_LIST, 0, HDIR_GET, nat64stl_list },
+ { IP_FW_NAT64STL_STATS, 0, HDIR_GET, nat64stl_stats },
+ { IP_FW_NAT64STL_RESET_STATS,0, HDIR_SET, nat64stl_reset_stats },
+};
+
+static int
+nat64stl_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn *icmd;
+
+ icmd = cmd - 1;
+ if (icmd->opcode != O_EXTERNAL_ACTION ||
+ icmd->arg1 != V_nat64stl_eid)
+ return (1);
+
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+nat64stl_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+}
+
+static int
+nat64stl_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ int err;
+
+ err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+ IPFW_TLV_NAT64STL_NAME, pno);
+ return (err);
+}
+
+static struct named_object *
+nat64stl_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_SRV(ch);
+ no = ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(no != NULL, ("NAT with index %d not found", idx));
+
+ return (no);
+}
+
+static int
+nat64stl_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+ enum ipfw_sets_cmd cmd)
+{
+
+ return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64STL_NAME,
+ set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ .opcode = O_EXTERNAL_INSTANCE,
+ .etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+ .classifier = nat64stl_classify,
+ .update = nat64stl_update_arg1,
+ .find_byname = nat64stl_findbyname,
+ .find_bykidx = nat64stl_findbykidx,
+ .manage_sets = nat64stl_manage_sets,
+ },
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64stl_cfg *cfg;
+ struct ip_fw_chain *ch;
+
+ ch = (struct ip_fw_chain *)arg;
+ cfg = (struct nat64stl_cfg *)SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ nat64stl_detach_config(ch, cfg);
+ nat64stl_free_config(cfg);
+ return (0);
+}
+
+int
+nat64stl_init(struct ip_fw_chain *ch, int first)
+{
+
+ V_nat64stl_eid = ipfw_add_eaction(ch, ipfw_nat64stl, "nat64stl");
+ if (V_nat64stl_eid == 0)
+ return (ENXIO);
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
+ return (0);
+}
+
+void
+nat64stl_uninit(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+ ipfw_del_eaction(ch, V_nat64stl_eid);
+ /*
+ * Since we already have deregistered external action,
+ * our named objects become unaccessible via rules, because
+ * all rules were truncated by ipfw_del_eaction().
+ * So, we can unlink and destroy our named objects without holding
+ * IPFW_WLOCK().
+ */
+ IPFW_UH_WLOCK(ch);
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+ IPFW_TLV_NAT64STL_NAME);
+ V_nat64stl_eid = 0;
+ IPFW_UH_WUNLOCK(ch);
+}
+
OpenPOWER on IntegriCloud