o Create directory sys/netpfil, where all packet filters should

reside, and move there ipfw(4) and pf(4). o Move most modified parts of pf out of contrib. Actual movements: sys/contrib/pf/net/*.c -> sys/netpfil/pf/ sys/contrib/pf/net/*.h -> sys/net/ contrib/pf/pfctl/*.c -> sbin/pfctl contrib/pf/pfctl/*.h -> sbin/pfctl contrib/pf/pfctl/pfctl.8 -> sbin/pfctl contrib/pf/pfctl/*.4 -> share/man/man4 contrib/pf/pfctl/*.5 -> share/man/man5 sys/netinet/ipfw -> sys/netpfil/ipfw The arguable movement is pf/net/*.h -> sys/net. There are future plans to refactor pf includes, so I decided not to break things twice. Not modified bits of pf left in contrib: authpf, ftp-proxy, tftp-proxy, pflogd. The ipfw(4) movement is planned to be merged to stable/9, to make head and stable match. Discussed with: bz, luigi
author: glebius <glebius@FreeBSD.org> 2012-09-14 11:51:49 +0000
committer: glebius <glebius@FreeBSD.org> 2012-09-14 11:51:49 +0000
commit: 0ccf4838d7a8b4da2c3beaac7ea1fd977aa0ed11 (patch)
tree: ec60da6e90cde2e87aa91ac9450c84ce3446233a /sys/netpfil/pf
parent: f99fc207edf21e7c05c1147864077ce3fe1f3e2c (diff)
download: FreeBSD-src-0ccf4838d7a8b4da2c3beaac7ea1fd977aa0ed11.zip
FreeBSD-src-0ccf4838d7a8b4da2c3beaac7ea1fd977aa0ed11.tar.gz
11 files changed, 19514 insertions, 0 deletions
diff --git a/sys/netpfil/pf/if_pflog.c b/sys/netpfil/pf/if_pflog.c
new file mode 100644
index 0000000..20feea2
--- /dev/null
+++ b/sys/netpfil/pf/if_pflog.c
@@ -0,0 +1,290 @@
+/*	$OpenBSD: if_pflog.c,v 1.26 2007/10/18 21:58:18 mpf Exp $	*/
+/*
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr) and
+ * Niels Provos (provos@physnet.uni-hamburg.de).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998 by John Ioannidis, Angelos D. Keromytis
+ * and Niels Provos.
+ * Copyright (c) 2001, Angelos D. Keromytis, Niels Provos.
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_bpf.h"
+#include "opt_pf.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+
+#include <net/bpf.h>
+#include <net/if.h>
+#include <net/if_clone.h>
+#include <net/if_pflog.h>
+#include <net/if_types.h>
+#include <net/pfvar.h>
+
+#if defined(INET) || defined(INET6)
+#include <netinet/in.h>
+#endif
+#ifdef	INET
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#endif
+
+#ifdef INET6
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#endif /* INET6 */
+
+#ifdef INET
+#include <machine/in_cksum.h>
+#endif /* INET */
+
+#define PFLOGMTU	(32768 + MHLEN + MLEN)
+
+#ifdef PFLOGDEBUG
+#define DPRINTF(x)    do { if (pflogdebug) printf x ; } while (0)
+#else
+#define DPRINTF(x)
+#endif
+
+static int	pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+		    struct route *);
+static void	pflogattach(int);
+static int	pflogioctl(struct ifnet *, u_long, caddr_t);
+static void	pflogstart(struct ifnet *);
+static int	pflog_clone_create(struct if_clone *, int, caddr_t);
+static void	pflog_clone_destroy(struct ifnet *);
+
+IFC_SIMPLE_DECLARE(pflog, 1);
+
+struct ifnet	*pflogifs[PFLOGIFS_MAX];	/* for fast access */
+
+static void
+pflogattach(int npflog)
+{
+	int	i;
+	for (i = 0; i < PFLOGIFS_MAX; i++)
+		pflogifs[i] = NULL;
+	if_clone_attach(&pflog_cloner);
+}
+
+static int
+pflog_clone_create(struct if_clone *ifc, int unit, caddr_t param)
+{
+	struct ifnet *ifp;
+
+	if (unit >= PFLOGIFS_MAX)
+		return (EINVAL);
+
+	ifp = if_alloc(IFT_PFLOG);
+	if (ifp == NULL) {
+		return (ENOSPC);
+	}
+	if_initname(ifp, ifc->ifc_name, unit);
+	ifp->if_mtu = PFLOGMTU;
+	ifp->if_ioctl = pflogioctl;
+	ifp->if_output = pflogoutput;
+	ifp->if_start = pflogstart;
+	ifp->if_snd.ifq_maxlen = ifqmaxlen;
+	ifp->if_hdrlen = PFLOG_HDRLEN;
+	if_attach(ifp);
+
+	bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN);
+
+	pflogifs[unit] = ifp;
+
+	return (0);
+}
+
+static void
+pflog_clone_destroy(struct ifnet *ifp)
+{
+	int i;
+
+	for (i = 0; i < PFLOGIFS_MAX; i++)
+		if (pflogifs[i] == ifp)
+			pflogifs[i] = NULL;
+
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free(ifp);
+}
+
+/*
+ * Start output on the pflog interface.
+ */
+static void
+pflogstart(struct ifnet *ifp)
+{
+	struct mbuf *m;
+
+	for (;;) {
+		IF_LOCK(&ifp->if_snd);
+		_IF_DROP(&ifp->if_snd);
+		_IF_DEQUEUE(&ifp->if_snd, m);
+		IF_UNLOCK(&ifp->if_snd);
+
+		if (m == NULL)
+			return;
+		else
+			m_freem(m);
+	}
+}
+
+static int
+pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+	struct route *rt)
+{
+	m_freem(m);
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	switch (cmd) {
+	case SIOCSIFFLAGS:
+		if (ifp->if_flags & IFF_UP)
+			ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		else
+			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+		break;
+	default:
+		return (ENOTTY);
+	}
+
+	return (0);
+}
+
+static int
+pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
+    u_int8_t reason, struct pf_rule *rm, struct pf_rule *am,
+    struct pf_ruleset *ruleset, struct pf_pdesc *pd, int lookupsafe)
+{
+	struct ifnet *ifn;
+	struct pfloghdr hdr;
+
+	if (kif == NULL || m == NULL || rm == NULL || pd == NULL)
+		return ( 1);
+
+	if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf)
+		return (0);
+
+	bzero(&hdr, sizeof(hdr));
+	hdr.length = PFLOG_REAL_HDRLEN;
+	hdr.af = af;
+	hdr.action = rm->action;
+	hdr.reason = reason;
+	memcpy(hdr.ifname, kif->pfik_name, sizeof(hdr.ifname));
+
+	if (am == NULL) {
+		hdr.rulenr = htonl(rm->nr);
+		hdr.subrulenr =  1;
+	} else {
+		hdr.rulenr = htonl(am->nr);
+		hdr.subrulenr = htonl(rm->nr);
+		if (ruleset != NULL && ruleset->anchor != NULL)
+			strlcpy(hdr.ruleset, ruleset->anchor->name,
+			    sizeof(hdr.ruleset));
+	}
+	/*
+	 * XXXGL: we avoid pf_socket_lookup() when we are holding
+	 * state lock, since this leads to unsafe LOR.
+	 * These conditions are very very rare, however.
+	 */
+	if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done && lookupsafe)
+		pd->lookup.done = pf_socket_lookup(dir, pd, m);
+	if (pd->lookup.done > 0)
+		hdr.uid = pd->lookup.uid;
+	else
+		hdr.uid = UID_MAX;
+	hdr.pid = NO_PID;
+	hdr.rule_uid = rm->cuid;
+	hdr.rule_pid = rm->cpid;
+	hdr.dir = dir;
+
+#ifdef INET
+	if (af == AF_INET && dir == PF_OUT) {
+		struct ip *ip;
+
+		ip = mtod(m, struct ip *);
+		ip->ip_sum = 0;
+		ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
+	}
+#endif /* INET */
+
+	ifn->if_opackets++;
+	ifn->if_obytes += m->m_pkthdr.len;
+	BPF_MTAP2(ifn, &hdr, PFLOG_HDRLEN, m);
+
+	return (0);
+}
+
+static int
+pflog_modevent(module_t mod, int type, void *data)
+{
+	int error = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		pflogattach(1);
+		PF_RULES_WLOCK();
+		pflog_packet_ptr = pflog_packet;
+		PF_RULES_WUNLOCK();
+		break;
+	case MOD_UNLOAD:
+		PF_RULES_WLOCK();
+		pflog_packet_ptr = NULL;
+		PF_RULES_WUNLOCK();
+		if_clone_detach(&pflog_cloner);
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return error;
+}
+
+static moduledata_t pflog_mod = { "pflog", pflog_modevent, 0 };
+
+#define PFLOG_MODVER 1
+
+DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(pflog, PFLOG_MODVER);
+MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER);
diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c
new file mode 100644
index 0000000..28af641
--- /dev/null
+++ b/sys/netpfil/pf/if_pfsync.c
@@ -0,0 +1,2397 @@
+/*	$OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $	*/
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Revisions picked from OpenBSD after revision 1.110 import:
+ * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
+ * 1.120, 1.175 - use monotonic time_uptime
+ * 1.122 - reduce number of updates for non-TCP sessions
+ * 1.128 - cleanups
+ * 1.146 - bzero() mbuf before sparsely filling it with data
+ * 1.170 - SIOCSIFMTU checks
+ * 1.126, 1.142 - deferred packets processing
+ * 1.173 - correct expire time processing
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_pf.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+
+#include <net/bpf.h>
+#include <net/if.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_carp.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+
+#define PFSYNC_MINPKT ( \
+	sizeof(struct ip) + \
+	sizeof(struct pfsync_header) + \
+	sizeof(struct pfsync_subheader) + \
+	sizeof(struct pfsync_eof))
+
+struct pfsync_pkt {
+	struct ip *ip;
+	struct in_addr src;
+	u_int8_t flags;
+};
+
+static int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
+		    struct pfsync_state_peer *);
+static int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
+static int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
+
+static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
+	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
+	pfsync_in_ins,			/* PFSYNC_ACT_INS */
+	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
+	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
+	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
+	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
+	pfsync_in_del,			/* PFSYNC_ACT_DEL */
+	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
+	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
+	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
+	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
+	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
+	pfsync_in_eof			/* PFSYNC_ACT_EOF */
+};
+
+struct pfsync_q {
+	int		(*write)(struct pf_state *, struct mbuf *, int);
+	size_t		len;
+	u_int8_t	action;
+};
+
+/* we have one of these for every PFSYNC_S_ */
+static int	pfsync_out_state(struct pf_state *, struct mbuf *, int);
+static int	pfsync_out_iack(struct pf_state *, struct mbuf *, int);
+static int	pfsync_out_upd_c(struct pf_state *, struct mbuf *, int);
+static int	pfsync_out_del(struct pf_state *, struct mbuf *, int);
+
+static struct pfsync_q pfsync_qs[] = {
+	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
+	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
+	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
+	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
+	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
+};
+
+static void	pfsync_q_ins(struct pf_state *, int);
+static void	pfsync_q_del(struct pf_state *);
+
+static void	pfsync_update_state(struct pf_state *);
+
+struct pfsync_upd_req_item {
+	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
+	struct pfsync_upd_req			ur_msg;
+};
+
+struct pfsync_deferral {
+	struct pfsync_softc		*pd_sc;
+	TAILQ_ENTRY(pfsync_deferral)	pd_entry;
+	u_int				pd_refs;
+	struct callout			pd_tmo;
+
+	struct pf_state			*pd_st;
+	struct mbuf			*pd_m;
+};
+
+struct pfsync_softc {
+	/* Configuration */
+	struct ifnet		*sc_ifp;
+	struct ifnet		*sc_sync_if;
+	struct ip_moptions	sc_imo;
+	struct in_addr		sc_sync_peer;
+	uint32_t		sc_flags;
+#define	PFSYNCF_OK		0x00000001
+#define	PFSYNCF_DEFER		0x00000002
+#define	PFSYNCF_PUSH		0x00000004
+	uint8_t			sc_maxupdates;
+	struct ip		sc_template;
+	struct callout		sc_tmo;
+	struct mtx		sc_mtx;
+
+	/* Queued data */
+	size_t			sc_len;
+	TAILQ_HEAD(, pf_state)			sc_qs[PFSYNC_S_COUNT];
+	TAILQ_HEAD(, pfsync_upd_req_item)	sc_upd_req_list;
+	TAILQ_HEAD(, pfsync_deferral)		sc_deferrals;
+	u_int			sc_deferred;
+	void			*sc_plus;
+	size_t			sc_pluslen;
+
+	/* Bulk update info */
+	struct mtx		sc_bulk_mtx;
+	uint32_t		sc_ureq_sent;
+	int			sc_bulk_tries;
+	uint32_t		sc_ureq_received;
+	int			sc_bulk_hashid;
+	uint64_t		sc_bulk_stateid;
+	uint32_t		sc_bulk_creatorid;
+	struct callout		sc_bulk_tmo;
+	struct callout		sc_bulkfail_tmo;
+};
+
+#define	PFSYNC_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
+#define	PFSYNC_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
+#define	PFSYNC_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
+
+#define	PFSYNC_BLOCK(sc)	mtx_lock(&(sc)->sc_bulk_mtx)
+#define	PFSYNC_BUNLOCK(sc)	mtx_unlock(&(sc)->sc_bulk_mtx)
+#define	PFSYNC_BLOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
+
+static MALLOC_DEFINE(M_PFSYNC, "pfsync", "pfsync(4) data");
+static VNET_DEFINE(struct pfsync_softc	*, pfsyncif) = NULL;
+#define	V_pfsyncif		VNET(pfsyncif)
+static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
+#define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
+static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
+#define	V_pfsyncstats		VNET(pfsyncstats)
+static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW;
+#define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
+
+static void	pfsync_timeout(void *);
+static void	pfsync_push(struct pfsync_softc *);
+static void	pfsyncintr(void *);
+static int	pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
+		    void *);
+static void	pfsync_multicast_cleanup(struct pfsync_softc *);
+static int	pfsync_init(void);
+static void	pfsync_uninit(void);
+
+SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
+SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW,
+    &VNET_NAME(pfsyncstats), pfsyncstats,
+    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
+SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
+    &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
+
+static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
+static void	pfsync_clone_destroy(struct ifnet *);
+static int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
+		    struct pf_state_peer *);
+static int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+		    struct route *);
+static int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
+
+static int	pfsync_defer(struct pf_state *, struct mbuf *);
+static void	pfsync_undefer(struct pfsync_deferral *, int);
+static void	pfsync_undefer_state(struct pf_state *, int);
+static void	pfsync_defer_tmo(void *);
+
+static void	pfsync_request_update(u_int32_t, u_int64_t);
+static void	pfsync_update_state_req(struct pf_state *);
+
+static void	pfsync_drop(struct pfsync_softc *);
+static void	pfsync_sendout(int);
+static void	pfsync_send_plus(void *, size_t);
+
+static void	pfsync_bulk_start(void);
+static void	pfsync_bulk_status(u_int8_t);
+static void	pfsync_bulk_update(void *);
+static void	pfsync_bulk_fail(void *);
+
+#ifdef IPSEC
+static void	pfsync_update_net_tdb(struct pfsync_tdb *);
+#endif
+
+#define PFSYNC_MAX_BULKTRIES	12
+
+VNET_DEFINE(struct ifc_simple_data, pfsync_cloner_data);
+VNET_DEFINE(struct if_clone, pfsync_cloner);
+#define	V_pfsync_cloner_data	VNET(pfsync_cloner_data)
+#define	V_pfsync_cloner		VNET(pfsync_cloner)
+IFC_SIMPLE_DECLARE(pfsync, 1);
+
+static int
+pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
+{
+	struct pfsync_softc *sc;
+	struct ifnet *ifp;
+	int q;
+
+	if (unit != 0)
+		return (EINVAL);
+
+	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
+	sc->sc_flags |= PFSYNCF_OK;
+
+	for (q = 0; q < PFSYNC_S_COUNT; q++)
+		TAILQ_INIT(&sc->sc_qs[q]);
+
+	TAILQ_INIT(&sc->sc_upd_req_list);
+	TAILQ_INIT(&sc->sc_deferrals);
+
+	sc->sc_len = PFSYNC_MINPKT;
+	sc->sc_maxupdates = 128;
+
+	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
+	if (ifp == NULL) {
+		free(sc, M_PFSYNC);
+		return (ENOSPC);
+	}
+	if_initname(ifp, ifc->ifc_name, unit);
+	ifp->if_softc = sc;
+	ifp->if_ioctl = pfsyncioctl;
+	ifp->if_output = pfsyncoutput;
+	ifp->if_type = IFT_PFSYNC;
+	ifp->if_snd.ifq_maxlen = ifqmaxlen;
+	ifp->if_hdrlen = sizeof(struct pfsync_header);
+	ifp->if_mtu = ETHERMTU;
+	mtx_init(&sc->sc_mtx, "pfsync", NULL, MTX_DEF);
+	mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
+	callout_init(&sc->sc_tmo, CALLOUT_MPSAFE);
+	callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
+	callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
+
+	if_attach(ifp);
+
+	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
+
+	V_pfsyncif = sc;
+
+	return (0);
+}
+
+static void
+pfsync_clone_destroy(struct ifnet *ifp)
+{
+	struct pfsync_softc *sc = ifp->if_softc;
+
+	/*
+	 * At this stage, everything should have already been
+	 * cleared by pfsync_uninit(), and we have only to
+	 * drain callouts.
+	 */
+	while (sc->sc_deferred > 0) {
+		struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals);
+
+		TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+		sc->sc_deferred--;
+		if (callout_stop(&pd->pd_tmo)) {
+			pf_release_state(pd->pd_st);
+			m_freem(pd->pd_m);
+			free(pd, M_PFSYNC);
+		} else {
+			pd->pd_refs++;
+			callout_drain(&pd->pd_tmo);
+			free(pd, M_PFSYNC);
+		}
+	}
+
+	callout_drain(&sc->sc_tmo);
+	callout_drain(&sc->sc_bulkfail_tmo);
+	callout_drain(&sc->sc_bulk_tmo);
+
+	if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
+	bpfdetach(ifp);
+	if_detach(ifp);
+
+	pfsync_drop(sc);
+
+	if_free(ifp);
+	if (sc->sc_imo.imo_membership)
+		pfsync_multicast_cleanup(sc);
+	mtx_destroy(&sc->sc_mtx);
+	mtx_destroy(&sc->sc_bulk_mtx);
+	free(sc, M_PFSYNC);
+
+	V_pfsyncif = NULL;
+}
+
+static int
+pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
+    struct pf_state_peer *d)
+{
+	if (s->scrub.scrub_flag && d->scrub == NULL) {
+		d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
+		if (d->scrub == NULL)
+			return (ENOMEM);
+	}
+
+	return (0);
+}
+
+
+static int
+pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pf_state	*st = NULL;
+	struct pf_state_key *skw = NULL, *sks = NULL;
+	struct pf_rule *r = NULL;
+	struct pfi_kif	*kif;
+	int error;
+
+	PF_RULES_RASSERT();
+
+	if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) {
+		printf("%s: invalid creator id: %08x\n", __func__,
+		    ntohl(sp->creatorid));
+		return (EINVAL);
+	}
+
+	if ((kif = pfi_kif_find(sp->ifname)) == NULL) {
+		if (V_pf_status.debug >= PF_DEBUG_MISC)
+			printf("%s: unknown interface: %s\n", __func__,
+			    sp->ifname);
+		if (flags & PFSYNC_SI_IOCTL)
+			return (EINVAL);
+		return (0);	/* skip this state */
+	}
+
+	/*
+	 * If the ruleset checksums match or the state is coming from the ioctl,
+	 * it's safe to associate the state with the rule of that number.
+	 */
+	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
+	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
+	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
+		r = pf_main_ruleset.rules[
+		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
+	else
+		r = &V_pf_default_rule;
+
+	if ((r->max_states && r->states_cur >= r->max_states))
+		goto cleanup;
+
+	/*
+	 * XXXGL: consider M_WAITOK in ioctl path after.
+	 */
+	if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL)
+		goto cleanup;
+
+	if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
+		goto cleanup;
+
+	if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
+	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
+	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
+	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
+	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
+	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) {
+		sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
+		if (sks == NULL)
+			goto cleanup;
+	} else
+		sks = skw;
+
+	/* allocate memory for scrub info */
+	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
+	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
+		goto cleanup;
+
+	/* copy to state key(s) */
+	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
+	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
+	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
+	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
+	skw->proto = sp->proto;
+	skw->af = sp->af;
+	if (sks != skw) {
+		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
+		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
+		sks->port[0] = sp->key[PF_SK_STACK].port[0];
+		sks->port[1] = sp->key[PF_SK_STACK].port[1];
+		sks->proto = sp->proto;
+		sks->af = sp->af;
+	}
+
+	/* copy to state */
+	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
+	st->creation = time_uptime - ntohl(sp->creation);
+	st->expire = time_uptime;
+	if (sp->expire) {
+		uint32_t timeout;
+
+		timeout = r->timeout[sp->timeout];
+		if (!timeout)
+			timeout = V_pf_default_rule.timeout[sp->timeout];
+
+		/* sp->expire may have been adaptively scaled by export. */
+		st->expire -= timeout - ntohl(sp->expire);
+	}
+
+	st->direction = sp->direction;
+	st->log = sp->log;
+	st->timeout = sp->timeout;
+	st->state_flags = sp->state_flags;
+
+	st->id = sp->id;
+	st->creatorid = sp->creatorid;
+	pf_state_peer_ntoh(&sp->src, &st->src);
+	pf_state_peer_ntoh(&sp->dst, &st->dst);
+
+	st->rule.ptr = r;
+	st->nat_rule.ptr = NULL;
+	st->anchor.ptr = NULL;
+	st->rt_kif = NULL;
+
+	st->pfsync_time = time_uptime;
+	st->sync_state = PFSYNC_S_NONE;
+
+	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
+	r->states_cur++;
+	r->states_tot++;
+
+	if (!(flags & PFSYNC_SI_IOCTL))
+		st->state_flags |= PFSTATE_NOSYNC;
+
+	if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
+		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
+		r->states_cur--;
+		goto cleanup_state;
+	}
+
+	if (!(flags & PFSYNC_SI_IOCTL)) {
+		st->state_flags &= ~PFSTATE_NOSYNC;
+		if (st->state_flags & PFSTATE_ACK) {
+			pfsync_q_ins(st, PFSYNC_S_IACK);
+			pfsync_push(sc);
+		}
+	}
+	st->state_flags &= ~PFSTATE_ACK;
+	PF_STATE_UNLOCK(st);
+
+	return (0);
+
+cleanup:
+	error = ENOMEM;
+	if (skw == sks)
+		sks = NULL;
+	if (skw != NULL)
+		uma_zfree(V_pf_state_key_z, skw);
+	if (sks != NULL)
+		uma_zfree(V_pf_state_key_z, sks);
+
+cleanup_state:	/* pf_state_insert() frees the state keys. */
+	if (st) {
+		if (st->dst.scrub)
+			uma_zfree(V_pf_state_scrub_z, st->dst.scrub);
+		if (st->src.scrub)
+			uma_zfree(V_pf_state_scrub_z, st->src.scrub);
+		uma_zfree(V_pf_state_z, st);
+	}
+	return (error);
+}
+
+static void
+pfsync_input(struct mbuf *m, __unused int off)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_pkt pkt;
+	struct ip *ip = mtod(m, struct ip *);
+	struct pfsync_header *ph;
+	struct pfsync_subheader subh;
+
+	int offset;
+	int rv;
+	uint16_t count;
+
+	V_pfsyncstats.pfsyncs_ipackets++;
+
+	/* Verify that we have a sync interface configured. */
+	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
+	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+		goto done;
+
+	/* verify that the packet came in on the right interface */
+	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
+		V_pfsyncstats.pfsyncs_badif++;
+		goto done;
+	}
+
+	sc->sc_ifp->if_ipackets++;
+	sc->sc_ifp->if_ibytes += m->m_pkthdr.len;
+	/* verify that the IP TTL is 255. */
+	if (ip->ip_ttl != PFSYNC_DFLTTL) {
+		V_pfsyncstats.pfsyncs_badttl++;
+		goto done;
+	}
+
+	offset = ip->ip_hl << 2;
+	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
+		V_pfsyncstats.pfsyncs_hdrops++;
+		goto done;
+	}
+
+	if (offset + sizeof(*ph) > m->m_len) {
+		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
+			V_pfsyncstats.pfsyncs_hdrops++;
+			return;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	ph = (struct pfsync_header *)((char *)ip + offset);
+
+	/* verify the version */
+	if (ph->version != PFSYNC_VERSION) {
+		V_pfsyncstats.pfsyncs_badver++;
+		goto done;
+	}
+
+	/* Cheaper to grab this now than having to mess with mbufs later */
+	pkt.ip = ip;
+	pkt.src = ip->ip_src;
+	pkt.flags = 0;
+
+	/*
+	 * Trusting pf_chksum during packet processing, as well as seeking
+	 * in interface name tree, require holding PF_RULES_RLOCK().
+	 */
+	PF_RULES_RLOCK();
+	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
+		pkt.flags |= PFSYNC_SI_CKSUM;
+
+	offset += sizeof(*ph);
+	for (;;) {
+		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
+		offset += sizeof(subh);
+
+		if (subh.action >= PFSYNC_ACT_MAX) {
+			V_pfsyncstats.pfsyncs_badact++;
+			PF_RULES_RUNLOCK();
+			goto done;
+		}
+
+		count = ntohs(subh.count);
+		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
+		rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count);
+		if (rv == -1) {
+			PF_RULES_RUNLOCK();
+			return;
+		}
+
+		offset += rv;
+	}
+	PF_RULES_RUNLOCK();
+
+done:
+	m_freem(m);
+}
+
+static int
+pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_clr *clr;
+	struct mbuf *mp;
+	int len = sizeof(*clr) * count;
+	int i, offp;
+	u_int32_t creatorid;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	clr = (struct pfsync_clr *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		creatorid = clr[i].creatorid;
+
+		if (clr[i].ifname[0] != '\0' &&
+		    pfi_kif_find(clr[i].ifname) == NULL)
+			continue;
+
+		for (int i = 0; i <= V_pf_hashmask; i++) {
+			struct pf_idhash *ih = &V_pf_idhash[i];
+			struct pf_state *s;
+relock:
+			PF_HASHROW_LOCK(ih);
+			LIST_FOREACH(s, &ih->states, entry) {
+				if (s->creatorid == creatorid) {
+					s->state_flags |= PFSTATE_NOSYNC;
+					pf_unlink_state(s, PF_ENTER_LOCKED);
+					goto relock;
+				}
+			}
+			PF_HASHROW_UNLOCK(ih);
+		}
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct mbuf *mp;
+	struct pfsync_state *sa, *sp;
+	int len = sizeof(*sp) * count;
+	int i, offp;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	sa = (struct pfsync_state *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		sp = &sa[i];
+
+		/* Check for invalid values. */
+		if (sp->timeout >= PFTM_MAX ||
+		    sp->src.state > PF_TCPS_PROXY_DST ||
+		    sp->dst.state > PF_TCPS_PROXY_DST ||
+		    sp->direction > PF_OUT ||
+		    (sp->af != AF_INET && sp->af != AF_INET6)) {
+			if (V_pf_status.debug >= PF_DEBUG_MISC)
+				printf("%s: invalid value\n", __func__);
+			V_pfsyncstats.pfsyncs_badval++;
+			continue;
+		}
+
+		if (pfsync_state_import(sp, pkt->flags) == ENOMEM)
+			/* Drop out, but process the rest of the actions. */
+			break;
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_ins_ack *ia, *iaa;
+	struct pf_state *st;
+
+	struct mbuf *mp;
+	int len = count * sizeof(*ia);
+	int offp, i;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		ia = &iaa[i];
+
+		st = pf_find_state_byid(ia->id, ia->creatorid);
+		if (st == NULL)
+			continue;
+
+		if (st->state_flags & PFSTATE_ACK) {
+			PFSYNC_LOCK(V_pfsyncif);
+			pfsync_undefer_state(st, 0);
+			PFSYNC_UNLOCK(V_pfsyncif);
+		}
+		PF_STATE_UNLOCK(st);
+	}
+	/*
+	 * XXX this is not yet implemented, but we know the size of the
+	 * message so we can skip it.
+	 */
+
+	return (count * sizeof(struct pfsync_ins_ack));
+}
+
+static int
+pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
+    struct pfsync_state_peer *dst)
+{
+	int sfail = 0;
+
+	PF_STATE_LOCK_ASSERT(st);
+
+	/*
+	 * The state should never go backwards except
+	 * for syn-proxy states.  Neither should the
+	 * sequence window slide backwards.
+	 */
+	if (st->src.state > src->state &&
+	    (st->src.state < PF_TCPS_PROXY_SRC ||
+	    src->state >= PF_TCPS_PROXY_SRC))
+		sfail = 1;
+	else if (SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))
+		sfail = 3;
+	else if (st->dst.state > dst->state) {
+		/* There might still be useful
+		 * information about the src state here,
+		 * so import that part of the update,
+		 * then "fail" so we send the updated
+		 * state back to the peer who is missing
+		 * our what we know. */
+		pf_state_peer_ntoh(src, &st->src);
+		/* XXX do anything with timeouts? */
+		sfail = 7;
+	} else if (st->dst.state >= TCPS_SYN_SENT &&
+	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))
+		sfail = 4;
+
+	return (sfail);
+}
+
+static int
+pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_state *sa, *sp;
+	struct pf_state_key *sk;
+	struct pf_state *st;
+	int sfail;
+
+	struct mbuf *mp;
+	int len = count * sizeof(*sp);
+	int offp, i;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	sa = (struct pfsync_state *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		sp = &sa[i];
+
+		/* check for invalid values */
+		if (sp->timeout >= PFTM_MAX ||
+		    sp->src.state > PF_TCPS_PROXY_DST ||
+		    sp->dst.state > PF_TCPS_PROXY_DST) {
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				printf("pfsync_input: PFSYNC_ACT_UPD: "
+				    "invalid value\n");
+			}
+			V_pfsyncstats.pfsyncs_badval++;
+			continue;
+		}
+
+		st = pf_find_state_byid(sp->id, sp->creatorid);
+		if (st == NULL) {
+			/* insert the update */
+			if (pfsync_state_import(sp, 0))
+				V_pfsyncstats.pfsyncs_badstate++;
+			continue;
+		}
+
+		if (st->state_flags & PFSTATE_ACK) {
+			PFSYNC_LOCK(sc);
+			pfsync_undefer_state(st, 1);
+			PFSYNC_UNLOCK(sc);
+		}
+
+		sk = st->key[PF_SK_WIRE];	/* XXX right one? */
+		sfail = 0;
+		if (sk->proto == IPPROTO_TCP)
+			sfail = pfsync_upd_tcp(st, &sp->src, &sp->dst);
+		else {
+			/*
+			 * Non-TCP protocol state machine always go
+			 * forwards
+			 */
+			if (st->src.state > sp->src.state)
+				sfail = 5;
+			else if (st->dst.state > sp->dst.state)
+				sfail = 6;
+		}
+
+		if (sfail) {
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				printf("pfsync: %s stale update (%d)"
+				    " id: %016llx creatorid: %08x\n",
+				    (sfail < 7 ?  "ignoring" : "partial"),
+				    sfail, (unsigned long long)be64toh(st->id),
+				    ntohl(st->creatorid));
+			}
+			V_pfsyncstats.pfsyncs_stale++;
+
+			pfsync_update_state(st);
+			PF_STATE_UNLOCK(st);
+			PFSYNC_LOCK(sc);
+			pfsync_push(sc);
+			PFSYNC_UNLOCK(sc);
+			continue;
+		}
+		pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
+		pf_state_peer_ntoh(&sp->src, &st->src);
+		pf_state_peer_ntoh(&sp->dst, &st->dst);
+		st->expire = time_uptime;
+		st->timeout = sp->timeout;
+		st->pfsync_time = time_uptime;
+		PF_STATE_UNLOCK(st);
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_upd_c *ua, *up;
+	struct pf_state_key *sk;
+	struct pf_state *st;
+
+	int len = count * sizeof(*up);
+	int sfail;
+
+	struct mbuf *mp;
+	int offp, i;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		up = &ua[i];
+
+		/* check for invalid values */
+		if (up->timeout >= PFTM_MAX ||
+		    up->src.state > PF_TCPS_PROXY_DST ||
+		    up->dst.state > PF_TCPS_PROXY_DST) {
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				printf("pfsync_input: "
+				    "PFSYNC_ACT_UPD_C: "
+				    "invalid value\n");
+			}
+			V_pfsyncstats.pfsyncs_badval++;
+			continue;
+		}
+
+		st = pf_find_state_byid(up->id, up->creatorid);
+		if (st == NULL) {
+			/* We don't have this state. Ask for it. */
+			PFSYNC_LOCK(sc);
+			pfsync_request_update(up->creatorid, up->id);
+			PFSYNC_UNLOCK(sc);
+			continue;
+		}
+
+		if (st->state_flags & PFSTATE_ACK) {
+			PFSYNC_LOCK(sc);
+			pfsync_undefer_state(st, 1);
+			PFSYNC_UNLOCK(sc);
+		}
+
+		sk = st->key[PF_SK_WIRE]; /* XXX right one? */
+		sfail = 0;
+		if (sk->proto == IPPROTO_TCP)
+			sfail = pfsync_upd_tcp(st, &up->src, &up->dst);
+		else {
+			/*
+			 * Non-TCP protocol state machine always go forwards
+			 */
+			if (st->src.state > up->src.state)
+				sfail = 5;
+			else if (st->dst.state > up->dst.state)
+				sfail = 6;
+		}
+
+		if (sfail) {
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				printf("pfsync: ignoring stale update "
+				    "(%d) id: %016llx "
+				    "creatorid: %08x\n", sfail,
+				    (unsigned long long)be64toh(st->id),
+				    ntohl(st->creatorid));
+			}
+			V_pfsyncstats.pfsyncs_stale++;
+
+			pfsync_update_state(st);
+			PF_STATE_UNLOCK(st);
+			PFSYNC_LOCK(sc);
+			pfsync_push(sc);
+			PFSYNC_UNLOCK(sc);
+			continue;
+		}
+		pfsync_alloc_scrub_memory(&up->dst, &st->dst);
+		pf_state_peer_ntoh(&up->src, &st->src);
+		pf_state_peer_ntoh(&up->dst, &st->dst);
+		st->expire = time_uptime;
+		st->timeout = up->timeout;
+		st->pfsync_time = time_uptime;
+		PF_STATE_UNLOCK(st);
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_upd_req *ur, *ura;
+	struct mbuf *mp;
+	int len = count * sizeof(*ur);
+	int i, offp;
+
+	struct pf_state *st;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		ur = &ura[i];
+
+		if (ur->id == 0 && ur->creatorid == 0)
+			pfsync_bulk_start();
+		else {
+			st = pf_find_state_byid(ur->id, ur->creatorid);
+			if (st == NULL) {
+				V_pfsyncstats.pfsyncs_badstate++;
+				continue;
+			}
+			if (st->state_flags & PFSTATE_NOSYNC) {
+				PF_STATE_UNLOCK(st);
+				continue;
+			}
+
+			pfsync_update_state_req(st);
+			PF_STATE_UNLOCK(st);
+		}
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct mbuf *mp;
+	struct pfsync_state *sa, *sp;
+	struct pf_state *st;
+	int len = count * sizeof(*sp);
+	int offp, i;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	sa = (struct pfsync_state *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		sp = &sa[i];
+
+		st = pf_find_state_byid(sp->id, sp->creatorid);
+		if (st == NULL) {
+			V_pfsyncstats.pfsyncs_badstate++;
+			continue;
+		}
+		st->state_flags |= PFSTATE_NOSYNC;
+		pf_unlink_state(st, PF_ENTER_LOCKED);
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct mbuf *mp;
+	struct pfsync_del_c *sa, *sp;
+	struct pf_state *st;
+	int len = count * sizeof(*sp);
+	int offp, i;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	sa = (struct pfsync_del_c *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++) {
+		sp = &sa[i];
+
+		st = pf_find_state_byid(sp->id, sp->creatorid);
+		if (st == NULL) {
+			V_pfsyncstats.pfsyncs_badstate++;
+			continue;
+		}
+
+		st->state_flags |= PFSTATE_NOSYNC;
+		pf_unlink_state(st, PF_ENTER_LOCKED);
+	}
+
+	return (len);
+}
+
+static int
+pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_bus *bus;
+	struct mbuf *mp;
+	int len = count * sizeof(*bus);
+	int offp;
+
+	PFSYNC_BLOCK(sc);
+
+	/* If we're not waiting for a bulk update, who cares. */
+	if (sc->sc_ureq_sent == 0) {
+		PFSYNC_BUNLOCK(sc);
+		return (len);
+	}
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		PFSYNC_BUNLOCK(sc);
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	bus = (struct pfsync_bus *)(mp->m_data + offp);
+
+	switch (bus->status) {
+	case PFSYNC_BUS_START:
+		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
+		    V_pf_limits[PF_LIMIT_STATES].limit /
+		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
+		    sizeof(struct pfsync_state)),
+		    pfsync_bulk_fail, sc);
+		if (V_pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync: received bulk update start\n");
+		break;
+
+	case PFSYNC_BUS_END:
+		if (time_uptime - ntohl(bus->endtime) >=
+		    sc->sc_ureq_sent) {
+			/* that's it, we're happy */
+			sc->sc_ureq_sent = 0;
+			sc->sc_bulk_tries = 0;
+			callout_stop(&sc->sc_bulkfail_tmo);
+			if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
+				    "pfsync bulk done");
+			sc->sc_flags |= PFSYNCF_OK;
+			if (V_pf_status.debug >= PF_DEBUG_MISC)
+				printf("pfsync: received valid "
+				    "bulk update end\n");
+		} else {
+			if (V_pf_status.debug >= PF_DEBUG_MISC)
+				printf("pfsync: received invalid "
+				    "bulk update end: bad timestamp\n");
+		}
+		break;
+	}
+	PFSYNC_BUNLOCK(sc);
+
+	return (len);
+}
+
+static int
+pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	int len = count * sizeof(struct pfsync_tdb);
+
+#if defined(IPSEC)
+	struct pfsync_tdb *tp;
+	struct mbuf *mp;
+	int offp;
+	int i;
+	int s;
+
+	mp = m_pulldown(m, offset, len, &offp);
+	if (mp == NULL) {
+		V_pfsyncstats.pfsyncs_badlen++;
+		return (-1);
+	}
+	tp = (struct pfsync_tdb *)(mp->m_data + offp);
+
+	for (i = 0; i < count; i++)
+		pfsync_update_net_tdb(&tp[i]);
+#endif
+
+	return (len);
+}
+
+#if defined(IPSEC)
+/* Update an in-kernel tdb. Silently fail if no tdb is found. */
+static void
+pfsync_update_net_tdb(struct pfsync_tdb *pt)
+{
+	struct tdb		*tdb;
+	int			 s;
+
+	/* check for invalid values */
+	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
+	    (pt->dst.sa.sa_family != AF_INET &&
+	    pt->dst.sa.sa_family != AF_INET6))
+		goto bad;
+
+	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
+	if (tdb) {
+		pt->rpl = ntohl(pt->rpl);
+		pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
+
+		/* Neither replay nor byte counter should ever decrease. */
+		if (pt->rpl < tdb->tdb_rpl ||
+		    pt->cur_bytes < tdb->tdb_cur_bytes) {
+			goto bad;
+		}
+
+		tdb->tdb_rpl = pt->rpl;
+		tdb->tdb_cur_bytes = pt->cur_bytes;
+	}
+	return;
+
+bad:
+	if (V_pf_status.debug >= PF_DEBUG_MISC)
+		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
+		    "invalid value\n");
+	V_pfsyncstats.pfsyncs_badstate++;
+	return;
+}
+#endif
+
+
+static int
+pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	/* check if we are at the right place in the packet */
+	if (offset != m->m_pkthdr.len - sizeof(struct pfsync_eof))
+		V_pfsyncstats.pfsyncs_badact++;
+
+	/* we're done. free and let the caller return */
+	m_freem(m);
+	return (-1);
+}
+
+static int
+pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+	V_pfsyncstats.pfsyncs_badact++;
+
+	m_freem(m);
+	return (-1);
+}
+
+static int
+pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+	struct route *rt)
+{
+	m_freem(m);
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct pfsync_softc *sc = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq *)data;
+	struct pfsyncreq pfsyncr;
+	int error;
+
+	switch (cmd) {
+	case SIOCSIFFLAGS:
+		PFSYNC_LOCK(sc);
+		if (ifp->if_flags & IFF_UP)
+			ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		else
+			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+		PFSYNC_UNLOCK(sc);
+		break;
+	case SIOCSIFMTU:
+		if (!sc->sc_sync_if ||
+		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
+		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
+			return (EINVAL);
+		if (ifr->ifr_mtu < ifp->if_mtu) {
+			PFSYNC_LOCK(sc);
+			if (sc->sc_len > PFSYNC_MINPKT)
+				pfsync_sendout(1);
+			PFSYNC_UNLOCK(sc);
+		}
+		ifp->if_mtu = ifr->ifr_mtu;
+		break;
+	case SIOCGETPFSYNC:
+		bzero(&pfsyncr, sizeof(pfsyncr));
+		PFSYNC_LOCK(sc);
+		if (sc->sc_sync_if) {
+			strlcpy(pfsyncr.pfsyncr_syncdev,
+			    sc->sc_sync_if->if_xname, IFNAMSIZ);
+		}
+		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
+		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
+		pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER ==
+		    (sc->sc_flags & PFSYNCF_DEFER));
+		PFSYNC_UNLOCK(sc);
+		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
+
+	case SIOCSETPFSYNC:
+	    {
+		struct ip_moptions *imo = &sc->sc_imo;
+		struct ifnet *sifp;
+		struct ip *ip;
+		void *mship = NULL;
+
+		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
+			return (error);
+		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
+			return (error);
+
+		if (pfsyncr.pfsyncr_maxupdates > 255)
+			return (EINVAL);
+
+		if (pfsyncr.pfsyncr_syncdev[0] == 0)
+			sifp = NULL;
+		else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL)
+			return (EINVAL);
+
+		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0 && sifp != NULL)
+			mship = malloc((sizeof(struct in_multi *) *
+			    IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO);
+
+		PFSYNC_LOCK(sc);
+		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
+			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
+		else
+			sc->sc_sync_peer.s_addr =
+			    pfsyncr.pfsyncr_syncpeer.s_addr;
+
+		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
+		if (pfsyncr.pfsyncr_defer) {
+			sc->sc_flags |= PFSYNCF_DEFER;
+			pfsync_defer_ptr = pfsync_defer;
+		} else {
+			sc->sc_flags &= ~PFSYNCF_DEFER;
+			pfsync_defer_ptr = NULL;
+		}
+
+		if (sifp == NULL) {
+			if (sc->sc_sync_if)
+				if_rele(sc->sc_sync_if);
+			sc->sc_sync_if = NULL;
+			if (imo->imo_membership)
+				pfsync_multicast_cleanup(sc);
+			PFSYNC_UNLOCK(sc);
+			break;
+		}
+
+		if (sc->sc_len > PFSYNC_MINPKT &&
+		    (sifp->if_mtu < sc->sc_ifp->if_mtu ||
+		    (sc->sc_sync_if != NULL &&
+		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
+		    sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
+			pfsync_sendout(1);
+
+		if (imo->imo_membership)
+			pfsync_multicast_cleanup(sc);
+
+		if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
+			error = pfsync_multicast_setup(sc, sifp, mship);
+			if (error) {
+				if_rele(sifp);
+				free(mship, M_PFSYNC);
+				return (error);
+			}
+		}
+		if (sc->sc_sync_if)
+			if_rele(sc->sc_sync_if);
+		sc->sc_sync_if = sifp;
+
+		ip = &sc->sc_template;
+		bzero(ip, sizeof(*ip));
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = sizeof(sc->sc_template) >> 2;
+		ip->ip_tos = IPTOS_LOWDELAY;
+		/* len and id are set later. */
+		ip->ip_off = IP_DF;
+		ip->ip_ttl = PFSYNC_DFLTTL;
+		ip->ip_p = IPPROTO_PFSYNC;
+		ip->ip_src.s_addr = INADDR_ANY;
+		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
+
+		/* Request a full state table update. */
+		if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+			(*carp_demote_adj_p)(V_pfsync_carp_adj,
+			    "pfsync bulk start");
+		sc->sc_flags &= ~PFSYNCF_OK;
+		if (V_pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync: requesting bulk update\n");
+		pfsync_request_update(0, 0);
+		PFSYNC_UNLOCK(sc);
+		PFSYNC_BLOCK(sc);
+		sc->sc_ureq_sent = time_uptime;
+		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
+		    sc);
+		PFSYNC_BUNLOCK(sc);
+
+		break;
+	    }
+	default:
+		return (ENOTTY);
+	}
+
+	return (0);
+}
+
+static int
+pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset)
+{
+	struct pfsync_state *sp = (struct pfsync_state *)(m->m_data + offset);
+
+	pfsync_state_export(sp, st);
+
+	return (sizeof(*sp));
+}
+
+static int
+pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset)
+{
+	struct pfsync_ins_ack *iack =
+	    (struct pfsync_ins_ack *)(m->m_data + offset);
+
+	iack->id = st->id;
+	iack->creatorid = st->creatorid;
+
+	return (sizeof(*iack));
+}
+
+static int
+pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset)
+{
+	struct pfsync_upd_c *up = (struct pfsync_upd_c *)(m->m_data + offset);
+
+	bzero(up, sizeof(*up));
+	up->id = st->id;
+	pf_state_peer_hton(&st->src, &up->src);
+	pf_state_peer_hton(&st->dst, &up->dst);
+	up->creatorid = st->creatorid;
+	up->timeout = st->timeout;
+
+	return (sizeof(*up));
+}
+
+static int
+pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset)
+{
+	struct pfsync_del_c *dp = (struct pfsync_del_c *)(m->m_data + offset);
+
+	dp->id = st->id;
+	dp->creatorid = st->creatorid;
+
+	st->state_flags |= PFSTATE_NOSYNC;
+
+	return (sizeof(*dp));
+}
+
+static void
+pfsync_drop(struct pfsync_softc *sc)
+{
+	struct pf_state *st, *next;
+	struct pfsync_upd_req_item *ur;
+	int q;
+
+	for (q = 0; q < PFSYNC_S_COUNT; q++) {
+		if (TAILQ_EMPTY(&sc->sc_qs[q]))
+			continue;
+
+		TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) {
+			KASSERT(st->sync_state == q,
+				("%s: st->sync_state == q",
+					__func__));
+			st->sync_state = PFSYNC_S_NONE;
+			pf_release_state(st);
+		}
+		TAILQ_INIT(&sc->sc_qs[q]);
+	}
+
+	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
+		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+		free(ur, M_PFSYNC);
+	}
+
+	sc->sc_plus = NULL;
+	sc->sc_len = PFSYNC_MINPKT;
+}
+
+static void
+pfsync_sendout(int schedswi)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct ifnet *ifp = sc->sc_ifp;
+	struct mbuf *m;
+	struct ip *ip;
+	struct pfsync_header *ph;
+	struct pfsync_subheader *subh;
+	struct pf_state *st, *next;
+	struct pfsync_upd_req_item *ur;
+	int offset;
+	int q, count = 0;
+
+	KASSERT(sc != NULL, ("%s: null sc", __func__));
+	KASSERT(sc->sc_len > PFSYNC_MINPKT,
+	    ("%s: sc_len %zu", __func__, sc->sc_len));
+	PFSYNC_LOCK_ASSERT(sc);
+
+	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
+		pfsync_drop(sc);
+		return;
+	}
+
+	m = m_get2(M_NOWAIT, MT_DATA, M_PKTHDR, max_linkhdr + sc->sc_len);
+	if (m == NULL) {
+		sc->sc_ifp->if_oerrors++;
+		V_pfsyncstats.pfsyncs_onomem++;
+		return;
+	}
+	m->m_data += max_linkhdr;
+	m->m_len = m->m_pkthdr.len = sc->sc_len;
+
+	/* build the ip header */
+	ip = (struct ip *)m->m_data;
+	bcopy(&sc->sc_template, ip, sizeof(*ip));
+	offset = sizeof(*ip);
+
+	ip->ip_len = m->m_pkthdr.len;
+	ip->ip_id = htons(ip_randomid());
+
+	/* build the pfsync header */
+	ph = (struct pfsync_header *)(m->m_data + offset);
+	bzero(ph, sizeof(*ph));
+	offset += sizeof(*ph);
+
+	ph->version = PFSYNC_VERSION;
+	ph->len = htons(sc->sc_len - sizeof(*ip));
+	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
+
+	/* walk the queues */
+	for (q = 0; q < PFSYNC_S_COUNT; q++) {
+		if (TAILQ_EMPTY(&sc->sc_qs[q]))
+			continue;
+
+		subh = (struct pfsync_subheader *)(m->m_data + offset);
+		offset += sizeof(*subh);
+
+		count = 0;
+		TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) {
+			KASSERT(st->sync_state == q,
+				("%s: st->sync_state == q",
+					__func__));
+			/*
+			 * XXXGL: some of write methods do unlocked reads
+			 * of state data :(
+			 */
+			offset += pfsync_qs[q].write(st, m, offset);
+			st->sync_state = PFSYNC_S_NONE;
+			pf_release_state(st);
+			count++;
+		}
+		TAILQ_INIT(&sc->sc_qs[q]);
+
+		bzero(subh, sizeof(*subh));
+		subh->action = pfsync_qs[q].action;
+		subh->count = htons(count);
+		V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
+	}
+
+	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
+		subh = (struct pfsync_subheader *)(m->m_data + offset);
+		offset += sizeof(*subh);
+
+		count = 0;
+		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
+			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+
+			bcopy(&ur->ur_msg, m->m_data + offset,
+			    sizeof(ur->ur_msg));
+			offset += sizeof(ur->ur_msg);
+			free(ur, M_PFSYNC);
+			count++;
+		}
+
+		bzero(subh, sizeof(*subh));
+		subh->action = PFSYNC_ACT_UPD_REQ;
+		subh->count = htons(count);
+		V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
+	}
+
+	/* has someone built a custom region for us to add? */
+	if (sc->sc_plus != NULL) {
+		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
+		offset += sc->sc_pluslen;
+
+		sc->sc_plus = NULL;
+	}
+
+	subh = (struct pfsync_subheader *)(m->m_data + offset);
+	offset += sizeof(*subh);
+
+	bzero(subh, sizeof(*subh));
+	subh->action = PFSYNC_ACT_EOF;
+	subh->count = htons(1);
+	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
+
+	/* XXX write checksum in EOF here */
+
+	/* we're done, let's put it on the wire */
+	if (ifp->if_bpf) {
+		m->m_data += sizeof(*ip);
+		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
+		BPF_MTAP(ifp, m);
+		m->m_data -= sizeof(*ip);
+		m->m_len = m->m_pkthdr.len = sc->sc_len;
+	}
+
+	if (sc->sc_sync_if == NULL) {
+		sc->sc_len = PFSYNC_MINPKT;
+		m_freem(m);
+		return;
+	}
+
+	sc->sc_ifp->if_opackets++;
+	sc->sc_ifp->if_obytes += m->m_pkthdr.len;
+	sc->sc_len = PFSYNC_MINPKT;
+
+	if (!_IF_QFULL(&sc->sc_ifp->if_snd))
+		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
+	else {
+		m_freem(m);
+		sc->sc_ifp->if_snd.ifq_drops++;
+	}
+	if (schedswi)
+		swi_sched(V_pfsync_swi_cookie, 0);
+}
+
+static void
+pfsync_insert_state(struct pf_state *st)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	if (st->state_flags & PFSTATE_NOSYNC)
+		return;
+
+	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
+	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
+		st->state_flags |= PFSTATE_NOSYNC;
+		return;
+	}
+
+	KASSERT(st->sync_state == PFSYNC_S_NONE,
+		("%s: st->sync_state == PFSYNC_S_NONE", __func__));
+
+	PFSYNC_LOCK(sc);
+	if (sc->sc_len == PFSYNC_MINPKT)
+		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+
+	pfsync_q_ins(st, PFSYNC_S_INS);
+	PFSYNC_UNLOCK(sc);
+
+	st->sync_updates = 0;
+}
+
+static int
+pfsync_defer(struct pf_state *st, struct mbuf *m)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_deferral *pd;
+
+	if (m->m_flags & (M_BCAST|M_MCAST))
+		return (0);
+
+	PFSYNC_LOCK(sc);
+
+	if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) ||
+	    !(sc->sc_flags & PFSYNCF_DEFER)) {
+		PFSYNC_UNLOCK(sc);
+		return (0);
+	}
+
+	 if (sc->sc_deferred >= 128)
+		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
+
+	pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
+	if (pd == NULL)
+		return (0);
+	sc->sc_deferred++;
+
+	m->m_flags |= M_SKIP_FIREWALL;
+	st->state_flags |= PFSTATE_ACK;
+
+	pd->pd_sc = sc;
+	pd->pd_refs = 0;
+	pd->pd_st = st;
+	pf_ref_state(st);
+	pd->pd_m = m;
+
+	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
+	callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+	callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
+
+	pfsync_push(sc);
+
+	return (1);
+}
+
+static void
+pfsync_undefer(struct pfsync_deferral *pd, int drop)
+{
+	struct pfsync_softc *sc = pd->pd_sc;
+	struct mbuf *m = pd->pd_m;
+	struct pf_state *st = pd->pd_st;
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+	sc->sc_deferred--;
+	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
+	free(pd, M_PFSYNC);
+	pf_release_state(st);
+
+	if (drop)
+		m_freem(m);
+	else {
+		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
+		pfsync_push(sc);
+	}
+}
+
+static void
+pfsync_defer_tmo(void *arg)
+{
+	struct pfsync_deferral *pd = arg;
+	struct pfsync_softc *sc = pd->pd_sc;
+	struct mbuf *m = pd->pd_m;
+	struct pf_state *st = pd->pd_st;
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
+
+	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+	sc->sc_deferred--;
+	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
+	if (pd->pd_refs == 0)
+		free(pd, M_PFSYNC);
+	PFSYNC_UNLOCK(sc);
+
+	ip_output(m, NULL, NULL, 0, NULL, NULL);
+
+	pf_release_state(st);
+
+	CURVNET_RESTORE();
+}
+
+static void
+pfsync_undefer_state(struct pf_state *st, int drop)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_deferral *pd;
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
+		 if (pd->pd_st == st) {
+			if (callout_stop(&pd->pd_tmo))
+				pfsync_undefer(pd, drop);
+			return;
+		}
+	}
+
+	panic("%s: unable to find deferred state", __func__);
+}
+
+static void
+pfsync_update_state(struct pf_state *st)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	int sync = 0;
+
+	PF_STATE_LOCK_ASSERT(st);
+	PFSYNC_LOCK(sc);
+
+	if (st->state_flags & PFSTATE_ACK)
+		pfsync_undefer_state(st, 0);
+	if (st->state_flags & PFSTATE_NOSYNC) {
+		if (st->sync_state != PFSYNC_S_NONE)
+			pfsync_q_del(st);
+		PFSYNC_UNLOCK(sc);
+		return;
+	}
+
+	if (sc->sc_len == PFSYNC_MINPKT)
+		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+
+	switch (st->sync_state) {
+	case PFSYNC_S_UPD_C:
+	case PFSYNC_S_UPD:
+	case PFSYNC_S_INS:
+		/* we're already handling it */
+
+		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
+			st->sync_updates++;
+			if (st->sync_updates >= sc->sc_maxupdates)
+				sync = 1;
+		}
+		break;
+
+	case PFSYNC_S_IACK:
+		pfsync_q_del(st);
+	case PFSYNC_S_NONE:
+		pfsync_q_ins(st, PFSYNC_S_UPD_C);
+		st->sync_updates = 0;
+		break;
+
+	default:
+		panic("%s: unexpected sync state %d", __func__, st->sync_state);
+	}
+
+	if (sync || (time_uptime - st->pfsync_time) < 2)
+		pfsync_push(sc);
+
+	PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_request_update(u_int32_t creatorid, u_int64_t id)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct pfsync_upd_req_item *item;
+	size_t nlen = sizeof(struct pfsync_upd_req);
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	/*
+	 * This code does nothing to prevent multiple update requests for the
+	 * same state being generated.
+	 */
+	item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
+	if (item == NULL)
+		return; /* XXX stats */
+
+	item->ur_msg.id = id;
+	item->ur_msg.creatorid = creatorid;
+
+	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
+		nlen += sizeof(struct pfsync_subheader);
+
+	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
+		pfsync_sendout(1);
+
+		nlen = sizeof(struct pfsync_subheader) +
+		    sizeof(struct pfsync_upd_req);
+	}
+
+	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
+	sc->sc_len += nlen;
+
+	pfsync_push(sc);
+}
+
+static void
+pfsync_update_state_req(struct pf_state *st)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	PF_STATE_LOCK_ASSERT(st);
+	PFSYNC_LOCK(sc);
+
+	if (st->state_flags & PFSTATE_NOSYNC) {
+		if (st->sync_state != PFSYNC_S_NONE)
+			pfsync_q_del(st);
+		PFSYNC_UNLOCK(sc);
+		return;
+	}
+
+	switch (st->sync_state) {
+	case PFSYNC_S_UPD_C:
+	case PFSYNC_S_IACK:
+		pfsync_q_del(st);
+	case PFSYNC_S_NONE:
+		pfsync_q_ins(st, PFSYNC_S_UPD);
+		pfsync_push(sc);
+		break;
+
+	case PFSYNC_S_INS:
+	case PFSYNC_S_UPD:
+	case PFSYNC_S_DEL:
+		/* we're already handling it */
+		break;
+
+	default:
+		panic("%s: unexpected sync state %d", __func__, st->sync_state);
+	}
+
+	PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_delete_state(struct pf_state *st)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	PFSYNC_LOCK(sc);
+	if (st->state_flags & PFSTATE_ACK)
+		pfsync_undefer_state(st, 1);
+	if (st->state_flags & PFSTATE_NOSYNC) {
+		if (st->sync_state != PFSYNC_S_NONE)
+			pfsync_q_del(st);
+		PFSYNC_UNLOCK(sc);
+		return;
+	}
+
+	if (sc->sc_len == PFSYNC_MINPKT)
+		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+
+	switch (st->sync_state) {
+	case PFSYNC_S_INS:
+		/* We never got to tell the world so just forget about it. */
+		pfsync_q_del(st);
+		break;
+
+	case PFSYNC_S_UPD_C:
+	case PFSYNC_S_UPD:
+	case PFSYNC_S_IACK:
+		pfsync_q_del(st);
+		/* FALLTHROUGH to putting it on the del list */
+
+	case PFSYNC_S_NONE:
+		pfsync_q_ins(st, PFSYNC_S_DEL);
+		break;
+
+	default:
+		panic("%s: unexpected sync state %d", __func__, st->sync_state);
+	}
+	PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_clear_states(u_int32_t creatorid, const char *ifname)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	struct {
+		struct pfsync_subheader subh;
+		struct pfsync_clr clr;
+	} __packed r;
+
+	bzero(&r, sizeof(r));
+
+	r.subh.action = PFSYNC_ACT_CLR;
+	r.subh.count = htons(1);
+	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
+
+	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
+	r.clr.creatorid = creatorid;
+
+	PFSYNC_LOCK(sc);
+	pfsync_send_plus(&r, sizeof(r));
+	PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_q_ins(struct pf_state *st, int q)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	size_t nlen = pfsync_qs[q].len;
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	KASSERT(st->sync_state == PFSYNC_S_NONE,
+		("%s: st->sync_state == PFSYNC_S_NONE", __func__));
+	KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
+	    sc->sc_len));
+
+	if (TAILQ_EMPTY(&sc->sc_qs[q]))
+		nlen += sizeof(struct pfsync_subheader);
+
+	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
+		pfsync_sendout(1);
+
+		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
+	}
+
+	sc->sc_len += nlen;
+	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
+	st->sync_state = q;
+	pf_ref_state(st);
+}
+
+static void
+pfsync_q_del(struct pf_state *st)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+	int q = st->sync_state;
+
+	PFSYNC_LOCK_ASSERT(sc);
+	KASSERT(st->sync_state != PFSYNC_S_NONE,
+		("%s: st->sync_state != PFSYNC_S_NONE", __func__));
+
+	sc->sc_len -= pfsync_qs[q].len;
+	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+	st->sync_state = PFSYNC_S_NONE;
+	pf_release_state(st);
+
+	if (TAILQ_EMPTY(&sc->sc_qs[q]))
+		sc->sc_len -= sizeof(struct pfsync_subheader);
+}
+
+static void
+pfsync_bulk_start(void)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	if (V_pf_status.debug >= PF_DEBUG_MISC)
+		printf("pfsync: received bulk update request\n");
+
+	PFSYNC_BLOCK(sc);
+
+	sc->sc_ureq_received = time_uptime;
+	sc->sc_bulk_hashid = 0;
+	sc->sc_bulk_stateid = 0;
+	pfsync_bulk_status(PFSYNC_BUS_START);
+	callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
+	PFSYNC_BUNLOCK(sc);
+}
+
+static void
+pfsync_bulk_update(void *arg)
+{
+	struct pfsync_softc *sc = arg;
+	struct pf_state *s;
+	int i, sent = 0;
+
+	PFSYNC_BLOCK_ASSERT(sc);
+	CURVNET_SET(sc->sc_ifp->if_vnet);
+
+	/*
+	 * Start with last state from previous invocation.
+	 * It may had gone, in this case start from the
+	 * hash slot.
+	 */
+	s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
+
+	if (s != NULL)
+		i = PF_IDHASH(s);
+	else
+		i = sc->sc_bulk_hashid;
+
+	for (; i <= V_pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+
+		if (s != NULL)
+			PF_HASHROW_ASSERT(ih);
+		else {
+			PF_HASHROW_LOCK(ih);
+			s = LIST_FIRST(&ih->states);
+		}
+
+		for (; s; s = LIST_NEXT(s, entry)) {
+
+			if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
+			    sizeof(struct pfsync_state)) {
+				/* We've filled a packet. */
+				sc->sc_bulk_hashid = i;
+				sc->sc_bulk_stateid = s->id;
+				sc->sc_bulk_creatorid = s->creatorid;
+				PF_HASHROW_UNLOCK(ih);
+				callout_reset(&sc->sc_bulk_tmo, 1,
+				    pfsync_bulk_update, sc);
+				goto full;
+			}
+
+			if (s->sync_state == PFSYNC_S_NONE &&
+			    s->timeout < PFTM_MAX &&
+			    s->pfsync_time <= sc->sc_ureq_received) {
+				PFSYNC_LOCK(sc);
+				pfsync_update_state_req(s);
+				PFSYNC_UNLOCK(sc);
+				sent++;
+			}
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+
+	/* We're done. */
+	pfsync_bulk_status(PFSYNC_BUS_END);
+
+full:
+	CURVNET_RESTORE();
+}
+
+static void
+pfsync_bulk_status(u_int8_t status)
+{
+	struct {
+		struct pfsync_subheader subh;
+		struct pfsync_bus bus;
+	} __packed r;
+
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	bzero(&r, sizeof(r));
+
+	r.subh.action = PFSYNC_ACT_BUS;
+	r.subh.count = htons(1);
+	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
+
+	r.bus.creatorid = V_pf_status.hostid;
+	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
+	r.bus.status = status;
+
+	PFSYNC_LOCK(sc);
+	pfsync_send_plus(&r, sizeof(r));
+	PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_bulk_fail(void *arg)
+{
+	struct pfsync_softc *sc = arg;
+
+	CURVNET_SET(sc->sc_ifp->if_vnet);
+
+	PFSYNC_BLOCK_ASSERT(sc);
+
+	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
+		/* Try again */
+		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
+		    pfsync_bulk_fail, V_pfsyncif);
+		PFSYNC_LOCK(sc);
+		pfsync_request_update(0, 0);
+		PFSYNC_UNLOCK(sc);
+	} else {
+		/* Pretend like the transfer was ok. */
+		sc->sc_ureq_sent = 0;
+		sc->sc_bulk_tries = 0;
+		PFSYNC_LOCK(sc);
+		if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
+			    "pfsync bulk fail");
+		sc->sc_flags |= PFSYNCF_OK;
+		PFSYNC_UNLOCK(sc);
+		if (V_pf_status.debug >= PF_DEBUG_MISC)
+			printf("pfsync: failed to receive bulk update\n");
+	}
+
+	CURVNET_RESTORE();
+}
+
+static void
+pfsync_send_plus(void *plus, size_t pluslen)
+{
+	struct pfsync_softc *sc = V_pfsyncif;
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu)
+		pfsync_sendout(1);
+
+	sc->sc_plus = plus;
+	sc->sc_len += (sc->sc_pluslen = pluslen);
+
+	pfsync_sendout(1);
+}
+
+static void
+pfsync_timeout(void *arg)
+{
+	struct pfsync_softc *sc = arg;
+
+	CURVNET_SET(sc->sc_ifp->if_vnet);
+	PFSYNC_LOCK(sc);
+	pfsync_push(sc);
+	PFSYNC_UNLOCK(sc);
+	CURVNET_RESTORE();
+}
+
+static void
+pfsync_push(struct pfsync_softc *sc)
+{
+
+	PFSYNC_LOCK_ASSERT(sc);
+
+	sc->sc_flags |= PFSYNCF_PUSH;
+	swi_sched(V_pfsync_swi_cookie, 0);
+}
+
+static void
+pfsyncintr(void *arg)
+{
+	struct pfsync_softc *sc = arg;
+	struct mbuf *m, *n;
+
+	CURVNET_SET(sc->sc_ifp->if_vnet);
+
+	PFSYNC_LOCK(sc);
+	if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) {
+		pfsync_sendout(0);
+		sc->sc_flags &= ~PFSYNCF_PUSH;
+	}
+	_IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
+	PFSYNC_UNLOCK(sc);
+
+	for (; m != NULL; m = n) {
+
+		n = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+
+		/*
+		 * We distinguish between a deferral packet and our
+		 * own pfsync packet based on M_SKIP_FIREWALL
+		 * flag. This is XXX.
+		 */
+		if (m->m_flags & M_SKIP_FIREWALL)
+			ip_output(m, NULL, NULL, 0, NULL, NULL);
+		else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
+		    NULL) == 0)
+			V_pfsyncstats.pfsyncs_opackets++;
+		else
+			V_pfsyncstats.pfsyncs_oerrors++;
+	}
+	CURVNET_RESTORE();
+}
+
+static int
+pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship)
+{
+	struct ip_moptions *imo = &sc->sc_imo;
+	int error;
+
+	if (!(ifp->if_flags & IFF_MULTICAST))
+		return (EADDRNOTAVAIL);
+
+	imo->imo_membership = (struct in_multi **)mship;
+	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+	imo->imo_multicast_vif = -1;
+
+	if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL,
+	    &imo->imo_membership[0])) != 0) {
+		imo->imo_membership = NULL;
+		return (error);
+	}
+	imo->imo_num_memberships++;
+	imo->imo_multicast_ifp = ifp;
+	imo->imo_multicast_ttl = PFSYNC_DFLTTL;
+	imo->imo_multicast_loop = 0;
+
+	return (0);
+}
+
+static void
+pfsync_multicast_cleanup(struct pfsync_softc *sc)
+{
+	struct ip_moptions *imo = &sc->sc_imo;
+
+	in_leavegroup(imo->imo_membership[0], NULL);
+	free(imo->imo_membership, M_PFSYNC);
+	imo->imo_membership = NULL;
+	imo->imo_multicast_ifp = NULL;
+}
+
+#ifdef INET
+extern  struct domain inetdomain;
+static struct protosw in_pfsync_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_PFSYNC,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		pfsync_input,
+	.pr_output =		(pr_output_t *)rip_output,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+};
+#endif
+
+static int
+pfsync_init()
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+	int error = 0;
+
+	VNET_LIST_RLOCK();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		V_pfsync_cloner = pfsync_cloner;
+		V_pfsync_cloner_data = pfsync_cloner_data;
+		V_pfsync_cloner.ifc_data = &V_pfsync_cloner_data;
+		if_clone_attach(&V_pfsync_cloner);
+		error = swi_add(NULL, "pfsync", pfsyncintr, V_pfsyncif,
+		    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
+		CURVNET_RESTORE();
+		if (error)
+			goto fail_locked;
+	}
+	VNET_LIST_RUNLOCK();
+#ifdef INET
+	error = pf_proto_register(PF_INET, &in_pfsync_protosw);
+	if (error)
+		goto fail;
+	error = ipproto_register(IPPROTO_PFSYNC);
+	if (error) {
+		pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
+		goto fail;
+	}
+#endif
+	PF_RULES_WLOCK();
+	pfsync_state_import_ptr = pfsync_state_import;
+	pfsync_insert_state_ptr = pfsync_insert_state;
+	pfsync_update_state_ptr = pfsync_update_state;
+	pfsync_delete_state_ptr = pfsync_delete_state;
+	pfsync_clear_states_ptr = pfsync_clear_states;
+	pfsync_defer_ptr = pfsync_defer;
+	PF_RULES_WUNLOCK();
+
+	return (0);
+
+fail:
+	VNET_LIST_RLOCK();
+fail_locked:
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		if (V_pfsync_swi_cookie) {
+			swi_remove(V_pfsync_swi_cookie);
+			if_clone_detach(&V_pfsync_cloner);
+		}
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK();
+
+	return (error);
+}
+
+static void
+pfsync_uninit()
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+
+	PF_RULES_WLOCK();
+	pfsync_state_import_ptr = NULL;
+	pfsync_insert_state_ptr = NULL;
+	pfsync_update_state_ptr = NULL;
+	pfsync_delete_state_ptr = NULL;
+	pfsync_clear_states_ptr = NULL;
+	pfsync_defer_ptr = NULL;
+	PF_RULES_WUNLOCK();
+
+	ipproto_unregister(IPPROTO_PFSYNC);
+	pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
+	VNET_LIST_RLOCK();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		if_clone_detach(&V_pfsync_cloner);
+		swi_remove(V_pfsync_swi_cookie);
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK();
+}
+
+static int
+pfsync_modevent(module_t mod, int type, void *data)
+{
+	int error = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = pfsync_init();
+		break;
+	case MOD_QUIESCE:
+		/*
+		 * Module should not be unloaded due to race conditions.
+		 */
+		error = EPERM;
+		break;
+	case MOD_UNLOAD:
+		pfsync_uninit();
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+static moduledata_t pfsync_mod = {
+	"pfsync",
+	pfsync_modevent,
+	0
+};
+
+#define PFSYNC_MODVER 1
+
+DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
+MODULE_VERSION(pfsync, PFSYNC_MODVER);
+MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
diff --git a/sys/netpfil/pf/in4_cksum.c b/sys/netpfil/pf/in4_cksum.c
new file mode 100644
index 0000000..bf25baf
--- /dev/null
+++ b/sys/netpfil/pf/in4_cksum.c
@@ -0,0 +1,120 @@
+/*	$FreeBSD$	*/
+/*	$OpenBSD: in4_cksum.c,v 1.7 2003/06/02 23:28:13 millert Exp $	*/
+/*	$KAME: in4_cksum.c,v 1.10 2001/11/30 10:06:15 itojun Exp $	*/
+/*	$NetBSD: in_cksum.c,v 1.13 1996/10/13 02:03:03 christos Exp $	*/
+
+/*
+ * Copyright (C) 1999 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1988, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+
+#include <machine/in_cksum.h>
+
+#define ADDCARRY(x)  (x > 65535 ? x -= 65535 : x)
+#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; (void)ADDCARRY(sum);}
+
+int in4_cksum(struct mbuf *, u_int8_t, int, int);
+
+int
+in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
+{
+	union {
+		struct ipovly ipov;
+		u_int16_t w[10];
+	} u;
+	union {
+		u_int16_t s[2];
+		u_int32_t l;
+	} l_util;
+
+	u_int16_t *w;
+	int psum;
+	int sum = 0;
+
+	if (nxt != 0) {
+		/* pseudo header */
+		if (off < sizeof(struct ipovly))
+			panic("in4_cksum: offset too short");
+		if (m->m_len < sizeof(struct ip))
+			panic("in4_cksum: bad mbuf chain");
+		bzero(&u.ipov, sizeof(u.ipov));
+		u.ipov.ih_len = htons(len);
+		u.ipov.ih_pr = nxt;
+		u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
+		u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
+		w = u.w;
+		/* assumes sizeof(ipov) == 20 */
+		sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
+		sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
+	}
+
+	psum = in_cksum_skip(m, len + off, off);
+	psum = ~psum & 0xffff;
+	sum += psum;
+	REDUCE;
+	return (~sum & 0xffff);
+}
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
new file mode 100644
index 0000000..a61b87b
--- /dev/null
+++ b/sys/netpfil/pf/pf.c
@@ -0,0 +1,6271 @@
+/*	$OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ */
+
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002 - 2008 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_bpf.h"
+#include "opt_pf.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/hash.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/limits.h>
+#include <sys/mbuf.h>
+#include <sys/md5.h>
+#include <sys/random.h>
+#include <sys/refcount.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/ucred.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+#include <net/radix_mpath.h>
+#include <net/vnet.h>
+
+#include <net/pfvar.h>
+#include <net/pf_mtag.h>
+#include <net/if_pflog.h>
+#include <net/if_pfsync.h>
+
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_pcb.h>
+#endif /* INET6 */
+
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
+#define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
+
+/*
+ * Global variables
+ */
+
+/* state tables */
+VNET_DEFINE(struct pf_altqqueue,	 pf_altqs[2]);
+VNET_DEFINE(struct pf_palist,		 pf_pabuf);
+VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_active);
+VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_inactive);
+VNET_DEFINE(struct pf_status,		 pf_status);
+
+VNET_DEFINE(u_int32_t,			 ticket_altqs_active);
+VNET_DEFINE(u_int32_t,			 ticket_altqs_inactive);
+VNET_DEFINE(int,			 altqs_inactive_open);
+VNET_DEFINE(u_int32_t,			 ticket_pabuf);
+
+VNET_DEFINE(MD5_CTX,			 pf_tcp_secret_ctx);
+#define	V_pf_tcp_secret_ctx		 VNET(pf_tcp_secret_ctx)
+VNET_DEFINE(u_char,			 pf_tcp_secret[16]);
+#define	V_pf_tcp_secret			 VNET(pf_tcp_secret)
+VNET_DEFINE(int,			 pf_tcp_secret_init);
+#define	V_pf_tcp_secret_init		 VNET(pf_tcp_secret_init)
+VNET_DEFINE(int,			 pf_tcp_iss_off);
+#define	V_pf_tcp_iss_off		 VNET(pf_tcp_iss_off)
+
+struct pf_anchor_stackframe {
+	struct pf_ruleset		*rs;
+	struct pf_rule			*r;
+	struct pf_anchor_node		*parent;
+	struct pf_anchor		*child;
+};
+VNET_DEFINE(struct pf_anchor_stackframe, pf_anchor_stack[64]);
+#define	V_pf_anchor_stack		 VNET(pf_anchor_stack)
+
+/*
+ * Queue for pf_intr() sends.
+ */
+static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
+struct pf_send_entry {
+	STAILQ_ENTRY(pf_send_entry)	pfse_next;
+	struct mbuf			*pfse_m;
+	enum {
+		PFSE_IP,
+		PFSE_IP6,
+		PFSE_ICMP,
+		PFSE_ICMP6,
+	}				pfse_type;
+	union {
+		struct route		ro;
+		struct {
+			int		type;
+			int		code;
+			int		mtu;
+		} icmpopts;
+	} u;
+#define	pfse_ro		u.ro
+#define	pfse_icmp_type	u.icmpopts.type
+#define	pfse_icmp_code	u.icmpopts.code
+#define	pfse_icmp_mtu	u.icmpopts.mtu
+};
+
+STAILQ_HEAD(pf_send_head, pf_send_entry);
+static VNET_DEFINE(struct pf_send_head, pf_sendqueue);
+#define	V_pf_sendqueue	VNET(pf_sendqueue)
+
+static struct mtx pf_sendqueue_mtx;
+#define	PF_SENDQ_LOCK()		mtx_lock(&pf_sendqueue_mtx)
+#define	PF_SENDQ_UNLOCK()	mtx_unlock(&pf_sendqueue_mtx)
+
+/*
+ * Queue for pf_flush_task() tasks.
+ */
+struct pf_flush_entry {
+	SLIST_ENTRY(pf_flush_entry)	next;
+	struct pf_addr  		addr;
+	sa_family_t			af;
+	uint8_t				dir;
+	struct pf_rule  		*rule;  /* never dereferenced */
+};
+
+SLIST_HEAD(pf_flush_head, pf_flush_entry);
+static VNET_DEFINE(struct pf_flush_head, pf_flushqueue);
+#define V_pf_flushqueue	VNET(pf_flushqueue)
+static VNET_DEFINE(struct task, pf_flushtask);
+#define	V_pf_flushtask	VNET(pf_flushtask)
+
+static struct mtx pf_flushqueue_mtx;
+#define	PF_FLUSHQ_LOCK()	mtx_lock(&pf_flushqueue_mtx)
+#define	PF_FLUSHQ_UNLOCK()	mtx_unlock(&pf_flushqueue_mtx)
+
+VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
+struct mtx pf_unlnkdrules_mtx;
+
+static VNET_DEFINE(uma_zone_t,	pf_sources_z);
+#define	V_pf_sources_z	VNET(pf_sources_z)
+static VNET_DEFINE(uma_zone_t,	pf_mtag_z);
+#define	V_pf_mtag_z	VNET(pf_mtag_z)
+VNET_DEFINE(uma_zone_t,	 pf_state_z);
+VNET_DEFINE(uma_zone_t,	 pf_state_key_z);
+
+VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]);
+#define	PFID_CPUBITS	8
+#define	PFID_CPUSHIFT	(sizeof(uint64_t) * NBBY - PFID_CPUBITS)
+#define	PFID_CPUMASK	((uint64_t)((1 << PFID_CPUBITS) - 1) <<	PFID_CPUSHIFT)
+#define	PFID_MAXID	(~PFID_CPUMASK)
+CTASSERT((1 << PFID_CPUBITS) > MAXCPU);
+
+static void		 pf_src_tree_remove_state(struct pf_state *);
+static void		 pf_init_threshold(struct pf_threshold *, u_int32_t,
+			    u_int32_t);
+static void		 pf_add_threshold(struct pf_threshold *);
+static int		 pf_check_threshold(struct pf_threshold *);
+
+static void		 pf_change_ap(struct pf_addr *, u_int16_t *,
+			    u_int16_t *, u_int16_t *, struct pf_addr *,
+			    u_int16_t, u_int8_t, sa_family_t);
+static int		 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
+			    struct tcphdr *, struct pf_state_peer *);
+static void		 pf_change_icmp(struct pf_addr *, u_int16_t *,
+			    struct pf_addr *, struct pf_addr *, u_int16_t,
+			    u_int16_t *, u_int16_t *, u_int16_t *,
+			    u_int16_t *, u_int8_t, sa_family_t);
+static void		 pf_send_tcp(struct mbuf *,
+			    const struct pf_rule *, sa_family_t,
+			    const struct pf_addr *, const struct pf_addr *,
+			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
+			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
+			    u_int16_t, struct ifnet *);
+static void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
+			    sa_family_t, struct pf_rule *);
+static void		 pf_detach_state(struct pf_state *);
+static int		 pf_state_key_attach(struct pf_state_key *,
+			    struct pf_state_key *, struct pf_state *);
+static void		 pf_state_key_detach(struct pf_state *, int);
+static int		 pf_state_key_ctor(void *, int, void *, int);
+static u_int32_t	 pf_tcp_iss(struct pf_pdesc *);
+static int		 pf_test_rule(struct pf_rule **, struct pf_state **,
+			    int, struct pfi_kif *, struct mbuf *, int,
+			    struct pf_pdesc *, struct pf_rule **,
+			    struct pf_ruleset **, struct inpcb *);
+static int		 pf_create_state(struct pf_rule *, struct pf_rule *,
+			    struct pf_rule *, struct pf_pdesc *,
+			    struct pf_src_node *, struct pf_state_key *,
+			    struct pf_state_key *, struct mbuf *, int,
+			    u_int16_t, u_int16_t, int *, struct pfi_kif *,
+			    struct pf_state **, int, u_int16_t, u_int16_t,
+			    int);
+static int		 pf_test_fragment(struct pf_rule **, int,
+			    struct pfi_kif *, struct mbuf *, void *,
+			    struct pf_pdesc *, struct pf_rule **,
+			    struct pf_ruleset **);
+static int		 pf_tcp_track_full(struct pf_state_peer *,
+			    struct pf_state_peer *, struct pf_state **,
+			    struct pfi_kif *, struct mbuf *, int,
+			    struct pf_pdesc *, u_short *, int *);
+static int		 pf_tcp_track_sloppy(struct pf_state_peer *,
+			    struct pf_state_peer *, struct pf_state **,
+			    struct pf_pdesc *, u_short *);
+static int		 pf_test_state_tcp(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *, u_short *);
+static int		 pf_test_state_udp(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *);
+static int		 pf_test_state_icmp(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *, u_short *);
+static int		 pf_test_state_other(struct pf_state **, int,
+			    struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
+static u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
+			    sa_family_t);
+static u_int16_t	 pf_get_mss(struct mbuf *, int, u_int16_t,
+			    sa_family_t);
+static u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
+				int, u_int16_t);
+static void		 pf_set_rt_ifp(struct pf_state *,
+			    struct pf_addr *);
+static int		 pf_check_proto_cksum(struct mbuf *, int, int,
+			    u_int8_t, sa_family_t);
+static void		 pf_print_state_parts(struct pf_state *,
+			    struct pf_state_key *, struct pf_state_key *);
+static int		 pf_addr_wrap_neq(struct pf_addr_wrap *,
+			    struct pf_addr_wrap *);
+static struct pf_state	*pf_find_state(struct pfi_kif *,
+			    struct pf_state_key_cmp *, u_int);
+static int		 pf_src_connlimit(struct pf_state **);
+static void		 pf_flush_task(void *c, int pending);
+static int		 pf_insert_src_node(struct pf_src_node **,
+			    struct pf_rule *, struct pf_addr *, sa_family_t);
+static int		 pf_purge_expired_states(int);
+static void		 pf_purge_unlinked_rules(void);
+static int		 pf_mtag_init(void *, int, int);
+static void		 pf_mtag_free(struct m_tag *);
+#ifdef INET
+static void		 pf_route(struct mbuf **, struct pf_rule *, int,
+			    struct ifnet *, struct pf_state *,
+			    struct pf_pdesc *);
+#endif /* INET */
+#ifdef INET6
+static void		 pf_change_a6(struct pf_addr *, u_int16_t *,
+			    struct pf_addr *, u_int8_t);
+static void		 pf_route6(struct mbuf **, struct pf_rule *, int,
+			    struct ifnet *, struct pf_state *,
+			    struct pf_pdesc *);
+#endif /* INET6 */
+
+int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
+
+VNET_DECLARE(int, pf_end_threads);
+
+VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
+
+#define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
+				 (pd)->pf_mtag->flags & PF_PACKET_LOOPED)
+
+#define	STATE_LOOKUP(i, k, d, s, pd)					\
+	do {								\
+		(s) = pf_find_state((i), (k), (d));			\
+		if ((s) == NULL || (s)->timeout == PFTM_PURGE)		\
+			return (PF_DROP);				\
+		if (PACKET_LOOPED(pd))					\
+			return (PF_PASS);				\
+		if ((d) == PF_OUT &&					\
+		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
+		    (s)->rule.ptr->direction == PF_OUT) ||		\
+		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
+		    (s)->rule.ptr->direction == PF_IN)) &&		\
+		    (s)->rt_kif != NULL &&				\
+		    (s)->rt_kif != (i))					\
+			return (PF_PASS);				\
+	} while (0)
+
+#define	BOUND_IFACE(r, k) \
+	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
+
+#define	STATE_INC_COUNTERS(s)				\
+	do {						\
+		s->rule.ptr->states_cur++;		\
+		s->rule.ptr->states_tot++;		\
+		if (s->anchor.ptr != NULL) {		\
+			s->anchor.ptr->states_cur++;	\
+			s->anchor.ptr->states_tot++;	\
+		}					\
+		if (s->nat_rule.ptr != NULL) {		\
+			s->nat_rule.ptr->states_cur++;	\
+			s->nat_rule.ptr->states_tot++;	\
+		}					\
+	} while (0)
+
+#define	STATE_DEC_COUNTERS(s)				\
+	do {						\
+		if (s->nat_rule.ptr != NULL)		\
+			s->nat_rule.ptr->states_cur--;	\
+		if (s->anchor.ptr != NULL)		\
+			s->anchor.ptr->states_cur--;	\
+		s->rule.ptr->states_cur--;		\
+	} while (0)
+
+static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
+VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
+VNET_DEFINE(struct pf_idhash *, pf_idhash);
+VNET_DEFINE(u_long, pf_hashmask);
+VNET_DEFINE(struct pf_srchash *, pf_srchash);
+VNET_DEFINE(u_long, pf_srchashmask);
+
+SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
+
+VNET_DEFINE(u_long, pf_hashsize);
+#define	V_pf_hashsize	VNET(pf_hashsize)
+SYSCTL_VNET_UINT(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
+    &VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
+
+VNET_DEFINE(u_long, pf_srchashsize);
+#define	V_pf_srchashsize	VNET(pf_srchashsize)
+SYSCTL_VNET_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
+    &VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
+
+VNET_DEFINE(void *, pf_swi_cookie);
+
+VNET_DEFINE(uint32_t, pf_hashseed);
+#define	V_pf_hashseed	VNET(pf_hashseed)
+
+static __inline uint32_t
+pf_hashkey(struct pf_state_key *sk)
+{
+	uint32_t h;
+
+	h = jenkins_hash32((uint32_t *)sk,
+	    sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
+	    V_pf_hashseed);
+
+	return (h & V_pf_hashmask);
+}
+
+#ifdef INET6
+void
+pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		dst->addr32[0] = src->addr32[0];
+		break;
+#endif /* INET */
+	case AF_INET6:
+		dst->addr32[0] = src->addr32[0];
+		dst->addr32[1] = src->addr32[1];
+		dst->addr32[2] = src->addr32[2];
+		dst->addr32[3] = src->addr32[3];
+		break;
+	}
+}
+#endif /* INET6 */
+
+static void
+pf_init_threshold(struct pf_threshold *threshold,
+    u_int32_t limit, u_int32_t seconds)
+{
+	threshold->limit = limit * PF_THRESHOLD_MULT;
+	threshold->seconds = seconds;
+	threshold->count = 0;
+	threshold->last = time_uptime;
+}
+
+static void
+pf_add_threshold(struct pf_threshold *threshold)
+{
+	u_int32_t t = time_uptime, diff = t - threshold->last;
+
+	if (diff >= threshold->seconds)
+		threshold->count = 0;
+	else
+		threshold->count -= threshold->count * diff /
+		    threshold->seconds;
+	threshold->count += PF_THRESHOLD_MULT;
+	threshold->last = t;
+}
+
+static int
+pf_check_threshold(struct pf_threshold *threshold)
+{
+	return (threshold->count > threshold->limit);
+}
+
+static int
+pf_src_connlimit(struct pf_state **state)
+{
+	struct pfr_addr p;
+	struct pf_flush_entry *pffe;
+	int bad = 0;
+
+	PF_STATE_LOCK_ASSERT(*state);
+
+	(*state)->src_node->conn++;
+	(*state)->src.tcp_est = 1;
+	pf_add_threshold(&(*state)->src_node->conn_rate);
+
+	if ((*state)->rule.ptr->max_src_conn &&
+	    (*state)->rule.ptr->max_src_conn <
+	    (*state)->src_node->conn) {
+		V_pf_status.lcounters[LCNT_SRCCONN]++;
+		bad++;
+	}
+
+	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
+	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
+		V_pf_status.lcounters[LCNT_SRCCONNRATE]++;
+		bad++;
+	}
+
+	if (!bad)
+		return (0);
+
+	/* Kill this state. */
+	(*state)->timeout = PFTM_PURGE;
+	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
+
+	if ((*state)->rule.ptr->overload_tbl == NULL)
+		return (1);
+
+	V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
+	if (V_pf_status.debug >= PF_DEBUG_MISC) {
+		printf("%s: blocking address ", __func__);
+		pf_print_host(&(*state)->src_node->addr, 0,
+		    (*state)->key[PF_SK_WIRE]->af);
+		printf("\n");
+	}
+
+	bzero(&p, sizeof(p));
+	p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
+	switch ((*state)->key[PF_SK_WIRE]->af) {
+#ifdef INET
+	case AF_INET:
+		p.pfra_net = 32;
+		p.pfra_ip4addr = (*state)->src_node->addr.v4;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		p.pfra_net = 128;
+		p.pfra_ip6addr = (*state)->src_node->addr.v6;
+		break;
+#endif /* INET6 */
+	}
+
+	pfr_insert_kentry((*state)->rule.ptr->overload_tbl, &p, time_second);
+
+	if ((*state)->rule.ptr->flush == 0)
+		return (1);
+
+	/* Schedule flushing task. */
+	pffe = malloc(sizeof(*pffe), M_PFTEMP, M_NOWAIT);
+	if (pffe == NULL)
+		return (1);	/* too bad :( */
+
+	bcopy(&(*state)->src_node->addr, &pffe->addr, sizeof(pffe->addr));
+	pffe->af = (*state)->key[PF_SK_WIRE]->af;
+	pffe->dir = (*state)->direction;
+	if ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL)
+		pffe->rule = NULL;
+	else
+		pffe->rule = (*state)->rule.ptr;
+	PF_FLUSHQ_LOCK();
+	SLIST_INSERT_HEAD(&V_pf_flushqueue, pffe, next);
+	PF_FLUSHQ_UNLOCK();
+	taskqueue_enqueue(taskqueue_swi, &V_pf_flushtask);
+
+	return (1);
+}
+
+static void
+pf_flush_task(void *c, int pending)
+{
+	struct pf_flush_head queue;
+	struct pf_flush_entry *pffe, *pffe1;
+	uint32_t killed = 0;
+
+	PF_FLUSHQ_LOCK();
+	queue = *(struct pf_flush_head *)c;
+	SLIST_INIT((struct pf_flush_head *)c);
+	PF_FLUSHQ_UNLOCK();
+
+	V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
+
+	for (int i = 0; i <= V_pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+		struct pf_state_key *sk;
+		struct pf_state *s;
+
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+		    sk = s->key[PF_SK_WIRE];
+		    SLIST_FOREACH(pffe, &queue, next)
+			if (sk->af == pffe->af && (pffe->rule == NULL ||
+			    pffe->rule == s->rule.ptr) &&
+			    ((pffe->dir == PF_OUT &&
+			    PF_AEQ(&pffe->addr, &sk->addr[1], sk->af)) ||
+			    (pffe->dir == PF_IN &&
+			    PF_AEQ(&pffe->addr, &sk->addr[0], sk->af)))) {
+				s->timeout = PFTM_PURGE;
+				s->src.state = s->dst.state = TCPS_CLOSED;
+				killed++;
+			}
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+	SLIST_FOREACH_SAFE(pffe, &queue, next, pffe1)
+		free(pffe, M_PFTEMP);
+	if (V_pf_status.debug >= PF_DEBUG_MISC)
+		printf("%s: %u states killed", __func__, killed);
+}
+
+/*
+ * Can return locked on failure, so that we can consistently
+ * allocate and insert a new one.
+ */
+struct pf_src_node *
+pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af,
+	int returnlocked)
+{
+	struct pf_srchash *sh;
+	struct pf_src_node *n;
+
+	V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
+
+	sh = &V_pf_srchash[pf_hashsrc(src, af)];
+	PF_HASHROW_LOCK(sh);
+	LIST_FOREACH(n, &sh->nodes, entry)
+		if (n->rule.ptr == rule && n->af == af &&
+		    ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
+		    (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
+			break;
+	if (n != NULL || returnlocked == 0)
+		PF_HASHROW_UNLOCK(sh);
+
+	return (n);
+}
+
+static int
+pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
+    struct pf_addr *src, sa_family_t af)
+{
+
+	KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK ||
+	    rule->rpool.opts & PF_POOL_STICKYADDR),
+	    ("%s for non-tracking rule %p", __func__, rule));
+
+	if (*sn == NULL)
+		*sn = pf_find_src_node(src, rule, af, 1);
+
+	if (*sn == NULL) {
+		struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)];
+
+		PF_HASHROW_ASSERT(sh);
+
+		if (!rule->max_src_nodes ||
+		    rule->src_nodes < rule->max_src_nodes)
+			(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
+		else
+			V_pf_status.lcounters[LCNT_SRCNODES]++;
+		if ((*sn) == NULL) {
+			PF_HASHROW_UNLOCK(sh);
+			return (-1);
+		}
+
+		pf_init_threshold(&(*sn)->conn_rate,
+		    rule->max_src_conn_rate.limit,
+		    rule->max_src_conn_rate.seconds);
+
+		(*sn)->af = af;
+		(*sn)->rule.ptr = rule;
+		PF_ACPY(&(*sn)->addr, src, af);
+		LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
+		(*sn)->creation = time_uptime;
+		(*sn)->ruletype = rule->action;
+		if ((*sn)->rule.ptr != NULL)
+			(*sn)->rule.ptr->src_nodes++;
+		PF_HASHROW_UNLOCK(sh);
+		V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
+		V_pf_status.src_nodes++;
+	} else {
+		if (rule->max_src_states &&
+		    (*sn)->states >= rule->max_src_states) {
+			V_pf_status.lcounters[LCNT_SRCSTATES]++;
+			return (-1);
+		}
+	}
+	return (0);
+}
+
+static void
+pf_remove_src_node(struct pf_src_node *src)
+{
+	struct pf_srchash *sh;
+
+	sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)];
+	PF_HASHROW_LOCK(sh);
+	LIST_REMOVE(src, entry);
+	PF_HASHROW_UNLOCK(sh);
+}
+
+/* Data storage structures initialization. */
+void
+pf_initialize()
+{
+	struct pf_keyhash	*kh;
+	struct pf_idhash	*ih;
+	struct pf_srchash	*sh;
+	u_int i;
+
+	TUNABLE_ULONG_FETCH("net.pf.states_hashsize", &V_pf_hashsize);
+	if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
+		V_pf_hashsize = PF_HASHSIZ;
+	TUNABLE_ULONG_FETCH("net.pf.source_nodes_hashsize", &V_pf_srchashsize);
+	if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
+		V_pf_srchashsize = PF_HASHSIZ / 4;
+
+	V_pf_hashseed = arc4random();
+
+	/* States and state keys storage. */
+	V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
+	uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
+
+	V_pf_state_key_z = uma_zcreate("pf state keys",
+	    sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+	V_pf_keyhash = malloc(V_pf_hashsize * sizeof(struct pf_keyhash),
+	    M_PFHASH, M_WAITOK | M_ZERO);
+	V_pf_idhash = malloc(V_pf_hashsize * sizeof(struct pf_idhash),
+	    M_PFHASH, M_WAITOK | M_ZERO);
+	V_pf_hashmask = V_pf_hashsize - 1;
+	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
+	    i++, kh++, ih++) {
+		mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF);
+		mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
+	}
+
+	/* Source nodes. */
+	V_pf_sources_z = uma_zcreate("pf source nodes",
+	    sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
+	    0);
+	V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
+	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
+	V_pf_srchash = malloc(V_pf_srchashsize * sizeof(struct pf_srchash),
+	  M_PFHASH, M_WAITOK|M_ZERO);
+	V_pf_srchashmask = V_pf_srchashsize - 1;
+	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
+		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
+
+	/* ALTQ */
+	TAILQ_INIT(&V_pf_altqs[0]);
+	TAILQ_INIT(&V_pf_altqs[1]);
+	TAILQ_INIT(&V_pf_pabuf);
+	V_pf_altqs_active = &V_pf_altqs[0];
+	V_pf_altqs_inactive = &V_pf_altqs[1];
+
+	/* Mbuf tags */
+	V_pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
+	    sizeof(struct pf_mtag), NULL, NULL, pf_mtag_init, NULL,
+	    UMA_ALIGN_PTR, 0);
+
+	/* Send & flush queues. */
+	STAILQ_INIT(&V_pf_sendqueue);
+	SLIST_INIT(&V_pf_flushqueue);
+	TASK_INIT(&V_pf_flushtask, 0, pf_flush_task, &V_pf_flushqueue);
+	mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
+	mtx_init(&pf_flushqueue_mtx, "pf flush queue", NULL, MTX_DEF);
+
+	/* Unlinked, but may be referenced rules. */
+	TAILQ_INIT(&V_pf_unlinked_rules);
+	mtx_init(&pf_unlnkdrules_mtx, "pf unlinked rules", NULL, MTX_DEF);
+}
+
+void
+pf_cleanup()
+{
+	struct pf_keyhash	*kh;
+	struct pf_idhash	*ih;
+	struct pf_srchash	*sh;
+	struct pf_send_entry	*pfse, *next;
+	u_int i;
+
+	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
+	    i++, kh++, ih++) {
+		KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
+		    __func__));
+		KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
+		    __func__));
+		mtx_destroy(&kh->lock);
+		mtx_destroy(&ih->lock);
+	}
+	free(V_pf_keyhash, M_PFHASH);
+	free(V_pf_idhash, M_PFHASH);
+
+	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
+		KASSERT(LIST_EMPTY(&sh->nodes),
+		    ("%s: source node hash not empty", __func__));
+		mtx_destroy(&sh->lock);
+	}
+	free(V_pf_srchash, M_PFHASH);
+
+	STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
+		m_freem(pfse->pfse_m);
+		free(pfse, M_PFTEMP);
+	}
+
+	mtx_destroy(&pf_sendqueue_mtx);
+	mtx_destroy(&pf_flushqueue_mtx);
+	mtx_destroy(&pf_unlnkdrules_mtx);
+
+	uma_zdestroy(V_pf_mtag_z);
+	uma_zdestroy(V_pf_sources_z);
+	uma_zdestroy(V_pf_state_z);
+	uma_zdestroy(V_pf_state_key_z);
+}
+
+static int
+pf_mtag_init(void *mem, int size, int how)
+{
+	struct m_tag *t;
+
+	t = (struct m_tag *)mem;
+	t->m_tag_cookie = MTAG_ABI_COMPAT;
+	t->m_tag_id = PACKET_TAG_PF;
+	t->m_tag_len = sizeof(struct pf_mtag);
+	t->m_tag_free = pf_mtag_free;
+
+	return (0);
+}
+
+static void
+pf_mtag_free(struct m_tag *t)
+{
+
+	uma_zfree(V_pf_mtag_z, t);
+}
+
+struct pf_mtag *
+pf_get_mtag(struct mbuf *m)
+{
+	struct m_tag *mtag;
+
+	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
+		return ((struct pf_mtag *)(mtag + 1));
+
+	mtag = uma_zalloc(V_pf_mtag_z, M_NOWAIT);
+	if (mtag == NULL)
+		return (NULL);
+	bzero(mtag + 1, sizeof(struct pf_mtag));
+	m_tag_prepend(m, mtag);
+
+	return ((struct pf_mtag *)(mtag + 1));
+}
+
+static int
+pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
+    struct pf_state *s)
+{
+	struct pf_keyhash	*kh;
+	struct pf_state_key	*sk, *cur;
+	struct pf_state		*si, *olds = NULL;
+	int idx;
+
+	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
+	KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
+	KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
+
+	/*
+	 * First run: start with wire key.
+	 */
+	sk = skw;
+	idx = PF_SK_WIRE;
+
+keyattach:
+	kh = &V_pf_keyhash[pf_hashkey(sk)];
+
+	PF_HASHROW_LOCK(kh);
+	LIST_FOREACH(cur, &kh->keys, entry)
+		if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
+			break;
+
+	if (cur != NULL) {
+		/* Key exists. Check for same kif, if none, add to key. */
+		TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
+			struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
+
+			PF_HASHROW_LOCK(ih);
+			if (si->kif == s->kif &&
+			    si->direction == s->direction) {
+				if (sk->proto == IPPROTO_TCP &&
+				    si->src.state >= TCPS_FIN_WAIT_2 &&
+				    si->dst.state >= TCPS_FIN_WAIT_2) {
+					si->src.state = si->dst.state =
+					    TCPS_CLOSED;
+					/* Unlink later or cur can go away. */
+					pf_ref_state(si);
+					olds = si;
+				} else {
+					if (V_pf_status.debug >= PF_DEBUG_MISC) {
+						printf("pf: %s key attach "
+						    "failed on %s: ",
+						    (idx == PF_SK_WIRE) ?
+						    "wire" : "stack",
+						    s->kif->pfik_name);
+						pf_print_state_parts(s,
+						    (idx == PF_SK_WIRE) ?
+						    sk : NULL,
+						    (idx == PF_SK_STACK) ?
+						    sk : NULL);
+						printf(", existing: ");
+						pf_print_state_parts(si,
+						    (idx == PF_SK_WIRE) ?
+						    sk : NULL,
+						    (idx == PF_SK_STACK) ?
+						    sk : NULL);
+						printf("\n");
+					}
+					PF_HASHROW_UNLOCK(ih);
+					PF_HASHROW_UNLOCK(kh);
+					uma_zfree(V_pf_state_key_z, sk);
+					if (idx == PF_SK_STACK)
+						pf_detach_state(s);
+					return (-1);	/* collision! */
+				}
+			}
+			PF_HASHROW_UNLOCK(ih);
+		}
+		uma_zfree(V_pf_state_key_z, sk);
+		s->key[idx] = cur;
+	} else {
+		LIST_INSERT_HEAD(&kh->keys, sk, entry);
+		s->key[idx] = sk;
+	}
+
+stateattach:
+	/* List is sorted, if-bound states before floating. */
+	if (s->kif == V_pfi_all)
+		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
+	else
+		TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
+
+	/*
+	 * Attach done. See how should we (or should not?)
+	 * attach a second key.
+	 */
+	if (sks == skw) {
+		s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
+		idx = PF_SK_STACK;
+		sks = NULL;
+		goto stateattach;
+	} else if (sks != NULL) {
+		PF_HASHROW_UNLOCK(kh);
+		if (olds) {
+			pf_unlink_state(olds, 0);
+			pf_release_state(olds);
+			olds = NULL;
+		}
+		/*
+		 * Continue attaching with stack key.
+		 */
+		sk = sks;
+		idx = PF_SK_STACK;
+		sks = NULL;
+		goto keyattach;
+	} else
+		PF_HASHROW_UNLOCK(kh);
+
+	if (olds) {
+		pf_unlink_state(olds, 0);
+		pf_release_state(olds);
+	}
+
+	KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
+	    ("%s failure", __func__));
+
+	return (0);
+}
+
+static void
+pf_detach_state(struct pf_state *s)
+{
+	struct pf_state_key *sks = s->key[PF_SK_STACK];
+	struct pf_keyhash *kh;
+
+	if (sks != NULL) {
+		kh = &V_pf_keyhash[pf_hashkey(sks)];
+		PF_HASHROW_LOCK(kh);
+		if (s->key[PF_SK_STACK] != NULL)
+			pf_state_key_detach(s, PF_SK_STACK);
+		/*
+		 * If both point to same key, then we are done.
+		 */
+		if (sks == s->key[PF_SK_WIRE]) {
+			pf_state_key_detach(s, PF_SK_WIRE);
+			PF_HASHROW_UNLOCK(kh);
+			return;
+		}
+		PF_HASHROW_UNLOCK(kh);
+	}
+
+	if (s->key[PF_SK_WIRE] != NULL) {
+		kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
+		PF_HASHROW_LOCK(kh);
+		if (s->key[PF_SK_WIRE] != NULL)
+			pf_state_key_detach(s, PF_SK_WIRE);
+		PF_HASHROW_UNLOCK(kh);
+	}
+}
+
+static void
+pf_state_key_detach(struct pf_state *s, int idx)
+{
+	struct pf_state_key *sk = s->key[idx];
+#ifdef INVARIANTS
+	struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
+
+	PF_HASHROW_ASSERT(kh);
+#endif
+	TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
+	s->key[idx] = NULL;
+
+	if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
+		LIST_REMOVE(sk, entry);
+		uma_zfree(V_pf_state_key_z, sk);
+	}
+}
+
+static int
+pf_state_key_ctor(void *mem, int size, void *arg, int flags)
+{
+	struct pf_state_key *sk = mem;
+
+	bzero(sk, sizeof(struct pf_state_key_cmp));
+	TAILQ_INIT(&sk->states[PF_SK_WIRE]);
+	TAILQ_INIT(&sk->states[PF_SK_STACK]);
+
+	return (0);
+}
+
+struct pf_state_key *
+pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr,
+	struct pf_addr *daddr, u_int16_t sport, u_int16_t dport)
+{
+	struct pf_state_key *sk;
+
+	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
+	if (sk == NULL)
+		return (NULL);
+
+	PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af);
+	PF_ACPY(&sk->addr[pd->didx], daddr, pd->af);
+	sk->port[pd->sidx] = sport;
+	sk->port[pd->didx] = dport;
+	sk->proto = pd->proto;
+	sk->af = pd->af;
+
+	return (sk);
+}
+
+struct pf_state_key *
+pf_state_key_clone(struct pf_state_key *orig)
+{
+	struct pf_state_key *sk;
+
+	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
+	if (sk == NULL)
+		return (NULL);
+
+	bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
+
+	return (sk);
+}
+
+int
+pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
+    struct pf_state_key *sks, struct pf_state *s)
+{
+	struct pf_idhash *ih;
+	struct pf_state *cur;
+
+	KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
+	    ("%s: sks not pristine", __func__));
+	KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
+	    ("%s: skw not pristine", __func__));
+	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
+
+	s->kif = kif;
+
+	if (pf_state_key_attach(skw, sks, s))
+		return (-1);
+
+	if (s->id == 0 && s->creatorid == 0) {
+		/* XXX: should be atomic, but probability of collision low */
+		if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID)
+			V_pf_stateid[curcpu] = 1;
+		s->id |= (uint64_t )curcpu << PFID_CPUSHIFT;
+		s->id = htobe64(s->id);
+		s->creatorid = V_pf_status.hostid;
+	}
+
+	ih = &V_pf_idhash[PF_IDHASH(s)];
+	PF_HASHROW_LOCK(ih);
+	LIST_FOREACH(cur, &ih->states, entry)
+		if (cur->id == s->id && cur->creatorid == s->creatorid)
+			break;
+
+	if (cur != NULL) {
+		PF_HASHROW_UNLOCK(ih);
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: state insert failed: "
+			    "id: %016llx creatorid: %08x",
+			    (unsigned long long)be64toh(s->id),
+			    ntohl(s->creatorid));
+			printf("\n");
+		}
+		pf_detach_state(s);
+		return (-1);
+	}
+	LIST_INSERT_HEAD(&ih->states, s, entry);
+	/* One for keys, one for ID hash. */
+	refcount_init(&s->refs, 2);
+
+	V_pf_status.fcounters[FCNT_STATE_INSERT]++;
+	if (pfsync_insert_state_ptr != NULL)
+		pfsync_insert_state_ptr(s);
+
+	/* Returns locked. */
+	return (0);
+}
+
+/*
+ * Find state by ID: returns with locked row on success.
+ */
+struct pf_state *
+pf_find_state_byid(uint64_t id, uint32_t creatorid)
+{
+	struct pf_idhash *ih;
+	struct pf_state *s;
+
+	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
+
+	ih = &V_pf_idhash[(be64toh(id) % (V_pf_hashmask + 1))];
+
+	PF_HASHROW_LOCK(ih);
+	LIST_FOREACH(s, &ih->states, entry)
+		if (s->id == id && s->creatorid == creatorid)
+			break;
+
+	if (s == NULL)
+		PF_HASHROW_UNLOCK(ih);
+
+	return (s);
+}
+
+/*
+ * Find state by key.
+ * Returns with ID hash slot locked on success.
+ */
+static struct pf_state *
+pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
+{
+	struct pf_keyhash	*kh;
+	struct pf_state_key	*sk;
+	struct pf_state		*s;
+	int idx;
+
+	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
+
+	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
+
+	PF_HASHROW_LOCK(kh);
+	LIST_FOREACH(sk, &kh->keys, entry)
+		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
+			break;
+	if (sk == NULL) {
+		PF_HASHROW_UNLOCK(kh);
+		return (NULL);
+	}
+
+	idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
+
+	/* List is sorted, if-bound states before floating ones. */
+	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
+		if (s->kif == V_pfi_all || s->kif == kif) {
+			PF_STATE_LOCK(s);
+			PF_HASHROW_UNLOCK(kh);
+			if (s->timeout == PFTM_UNLINKED) {
+				/*
+				 * State is being processed
+				 * by pf_unlink_state() in
+				 * an other thread.
+				 */
+				PF_STATE_UNLOCK(s);
+				return (NULL);
+			}
+			return (s);
+		}
+	PF_HASHROW_UNLOCK(kh);
+
+	return (NULL);
+}
+
+struct pf_state *
+pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
+{
+	struct pf_keyhash	*kh;
+	struct pf_state_key	*sk;
+	struct pf_state		*s, *ret = NULL;
+	int			 idx, inout = 0;
+
+	V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
+
+	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
+
+	PF_HASHROW_LOCK(kh);
+	LIST_FOREACH(sk, &kh->keys, entry)
+		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
+			break;
+	if (sk == NULL) {
+		PF_HASHROW_UNLOCK(kh);
+		return (NULL);
+	}
+	switch (dir) {
+	case PF_IN:
+		idx = PF_SK_WIRE;
+		break;
+	case PF_OUT:
+		idx = PF_SK_STACK;
+		break;
+	case PF_INOUT:
+		idx = PF_SK_WIRE;
+		inout = 1;
+		break;
+	default:
+		panic("%s: dir %u", __func__, dir);
+	}
+second_run:
+	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
+		if (more == NULL) {
+			PF_HASHROW_UNLOCK(kh);
+			return (s);
+		}
+
+		if (ret)
+			(*more)++;
+		else
+			ret = s;
+	}
+	if (inout == 1) {
+		inout = 0;
+		idx = PF_SK_STACK;
+		goto second_run;
+	}
+	PF_HASHROW_UNLOCK(kh);
+
+	return (ret);
+}
+
+/* END state table stuff */
+
+static void
+pf_send(struct pf_send_entry *pfse)
+{
+
+	PF_SENDQ_LOCK();
+	STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
+	PF_SENDQ_UNLOCK();
+	swi_sched(V_pf_swi_cookie, 0);
+}
+
+void
+pf_intr(void *v)
+{
+	struct pf_send_head queue;
+	struct pf_send_entry *pfse, *next;
+
+	CURVNET_SET((struct vnet *)v);
+
+	PF_SENDQ_LOCK();
+	queue = V_pf_sendqueue;
+	STAILQ_INIT(&V_pf_sendqueue);
+	PF_SENDQ_UNLOCK();
+
+	STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
+		switch (pfse->pfse_type) {
+#ifdef INET
+		case PFSE_IP:
+			ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL);
+			break;
+		case PFSE_ICMP:
+			icmp_error(pfse->pfse_m, pfse->pfse_icmp_type,
+			    pfse->pfse_icmp_code, 0, pfse->pfse_icmp_mtu);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case PFSE_IP6:
+			ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL,
+			    NULL);
+			break;
+		case PFSE_ICMP6:
+			icmp6_error(pfse->pfse_m, pfse->pfse_icmp_type,
+			    pfse->pfse_icmp_code, pfse->pfse_icmp_mtu);
+			break;
+#endif /* INET6 */
+		default:
+			panic("%s: unknown type", __func__);
+		}
+		free(pfse, M_PFTEMP);
+	}
+	CURVNET_RESTORE();
+}
+
+void
+pf_purge_thread(void *v)
+{
+	int fullrun;
+
+	CURVNET_SET((struct vnet *)v);
+
+	for (;;) {
+		PF_RULES_RLOCK();
+		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
+
+		if (V_pf_end_threads) {
+			/*
+			 * To cleanse up all kifs and rules we need
+			 * two runs: first one clears reference flags,
+			 * then pf_purge_expired_states() doesn't
+			 * raise them, and then second run frees.
+			 */
+			PF_RULES_RUNLOCK();
+			pf_purge_unlinked_rules();
+			pfi_kif_purge();
+
+			/*
+			 * Now purge everything.
+			 */
+			pf_purge_expired_states(V_pf_hashmask + 1);
+			pf_purge_expired_fragments();
+			pf_purge_expired_src_nodes();
+
+			/*
+			 * Now all kifs & rules should be unreferenced,
+			 * thus should be successfully freed.
+			 */
+			pf_purge_unlinked_rules();
+			pfi_kif_purge();
+
+			/*
+			 * Announce success and exit.
+			 */
+			PF_RULES_RLOCK();
+			V_pf_end_threads++;
+			PF_RULES_RUNLOCK();
+			wakeup(pf_purge_thread);
+			kproc_exit(0);
+		}
+		PF_RULES_RUNLOCK();
+
+		/* Process 1/interval fraction of the state table every run. */
+		fullrun = pf_purge_expired_states(V_pf_hashmask /
+			    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
+
+		/* Purge other expired types every PFTM_INTERVAL seconds. */
+		if (fullrun) {
+			/*
+			 * Order is important:
+			 * - states and src nodes reference rules
+			 * - states and rules reference kifs
+			 */
+			pf_purge_expired_fragments();
+			pf_purge_expired_src_nodes();
+			pf_purge_unlinked_rules();
+			pfi_kif_purge();
+		}
+	}
+	/* not reached */
+	CURVNET_RESTORE();
+}
+
+u_int32_t
+pf_state_expires(const struct pf_state *state)
+{
+	u_int32_t	timeout;
+	u_int32_t	start;
+	u_int32_t	end;
+	u_int32_t	states;
+
+	/* handle all PFTM_* > PFTM_MAX here */
+	if (state->timeout == PFTM_PURGE)
+		return (time_uptime);
+	if (state->timeout == PFTM_UNTIL_PACKET)
+		return (0);
+	KASSERT(state->timeout != PFTM_UNLINKED,
+	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
+	KASSERT((state->timeout < PFTM_MAX),
+	    ("pf_state_expires: timeout > PFTM_MAX"));
+	timeout = state->rule.ptr->timeout[state->timeout];
+	if (!timeout)
+		timeout = V_pf_default_rule.timeout[state->timeout];
+	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
+	if (start) {
+		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
+		states = state->rule.ptr->states_cur;	/* XXXGL */
+	} else {
+		start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
+		end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
+		states = V_pf_status.states;
+	}
+	if (end && states > start && start < end) {
+		if (states < end)
+			return (state->expire + timeout * (end - states) /
+			    (end - start));
+		else
+			return (time_uptime);
+	}
+	return (state->expire + timeout);
+}
+
+void
+pf_purge_expired_src_nodes()
+{
+	struct pf_srchash	*sh;
+	struct pf_src_node	*cur, *next;
+	int i;
+
+	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
+	    PF_HASHROW_LOCK(sh);
+	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
+		if (cur->states <= 0 && cur->expire <= time_uptime) {
+			if (cur->rule.ptr != NULL)
+				cur->rule.ptr->src_nodes--;
+			LIST_REMOVE(cur, entry);
+			V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+			V_pf_status.src_nodes--;
+			uma_zfree(V_pf_sources_z, cur);
+		} else if (cur->rule.ptr != NULL)
+			cur->rule.ptr->rule_flag |= PFRULE_REFS;
+	    PF_HASHROW_UNLOCK(sh);
+	}
+}
+
+static void
+pf_src_tree_remove_state(struct pf_state *s)
+{
+	u_int32_t timeout;
+
+	if (s->src_node != NULL) {
+		if (s->src.tcp_est)
+			--s->src_node->conn;
+		if (--s->src_node->states <= 0) {
+			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
+			if (!timeout)
+				timeout =
+				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
+			s->src_node->expire = time_uptime + timeout;
+		}
+	}
+	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
+		if (--s->nat_src_node->states <= 0) {
+			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
+			if (!timeout)
+				timeout =
+				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
+			s->nat_src_node->expire = time_uptime + timeout;
+		}
+	}
+	s->src_node = s->nat_src_node = NULL;
+}
+
+/*
+ * Unlink and potentilly free a state. Function may be
+ * called with ID hash row locked, but always returns
+ * unlocked, since it needs to go through key hash locking.
+ */
+int
+pf_unlink_state(struct pf_state *s, u_int flags)
+{
+	struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
+
+	if ((flags & PF_ENTER_LOCKED) == 0)
+		PF_HASHROW_LOCK(ih);
+	else
+		PF_HASHROW_ASSERT(ih);
+
+	if (s->timeout == PFTM_UNLINKED) {
+		/*
+		 * State is being processed
+		 * by pf_unlink_state() in
+		 * an other thread.
+		 */
+		PF_HASHROW_UNLOCK(ih);
+		return (0);	/* XXXGL: undefined actually */
+	}
+
+	s->timeout = PFTM_UNLINKED;
+
+	if (s->src.state == PF_TCPS_PROXY_DST) {
+		/* XXX wire key the right one? */
+		pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af,
+		    &s->key[PF_SK_WIRE]->addr[1],
+		    &s->key[PF_SK_WIRE]->addr[0],
+		    s->key[PF_SK_WIRE]->port[1],
+		    s->key[PF_SK_WIRE]->port[0],
+		    s->src.seqhi, s->src.seqlo + 1,
+		    TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL);
+	}
+
+	LIST_REMOVE(s, entry);
+	pf_src_tree_remove_state(s);
+	PF_HASHROW_UNLOCK(ih);
+
+	if (pfsync_delete_state_ptr != NULL)
+		pfsync_delete_state_ptr(s);
+
+	pf_detach_state(s);
+	refcount_release(&s->refs);
+
+	return (pf_release_state(s));
+}
+
+void
+pf_free_state(struct pf_state *cur)
+{
+
+	KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
+	KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
+	    cur->timeout));
+	--cur->rule.ptr->states_cur;
+	if (cur->nat_rule.ptr != NULL)
+		--cur->nat_rule.ptr->states_cur;
+	if (cur->anchor.ptr != NULL)
+		--cur->anchor.ptr->states_cur;
+	pf_normalize_tcp_cleanup(cur);
+	uma_zfree(V_pf_state_z, cur);
+	V_pf_status.fcounters[FCNT_STATE_REMOVALS]++;
+}
+
+/*
+ * Called only from pf_purge_thread(), thus serialized.
+ */
+static int
+pf_purge_expired_states(int maxcheck)
+{
+	static u_int i = 0;
+
+	struct pf_idhash *ih;
+	struct pf_state *s;
+	int rv = 0;
+
+	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
+
+	/*
+	 * Go through hash and unlink states that expire now.
+	 */
+	while (maxcheck > 0) {
+
+		/* Wrap to start of hash when we hit the end. */
+		if (i > V_pf_hashmask) {
+			i = 0;
+			rv = 1;
+		}
+
+		ih = &V_pf_idhash[i];
+relock:
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+			if (pf_state_expires(s) <= time_uptime) {
+				V_pf_status.states -=
+				    pf_unlink_state(s, PF_ENTER_LOCKED);
+				goto relock;
+			}
+			s->rule.ptr->rule_flag |= PFRULE_REFS;
+			if (s->nat_rule.ptr != NULL)
+				s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
+			if (s->anchor.ptr != NULL)
+				s->anchor.ptr->rule_flag |= PFRULE_REFS;
+			s->kif->pfik_flags |= PFI_IFLAG_REFS;
+			if (s->rt_kif)
+				s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
+		}
+		PF_HASHROW_UNLOCK(ih);
+		i++;
+		maxcheck--;
+	}
+
+	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
+
+	return (rv);
+}
+
+static void
+pf_purge_unlinked_rules()
+{
+	struct pf_rulequeue tmpq;
+	struct pf_rule *r, *r1;
+
+	/*
+	 * Do naive mark-and-sweep garbage collecting of old rules.
+	 * Reference flag is raised by pf_purge_expired_states()
+	 * and pf_purge_expired_src_nodes().
+	 *
+	 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
+	 * use a temporary queue.
+	 */
+	TAILQ_INIT(&tmpq);
+	PF_UNLNKDRULES_LOCK();
+	TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
+		if (!(r->rule_flag & PFRULE_REFS)) {
+			TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
+			TAILQ_INSERT_TAIL(&tmpq, r, entries);
+		} else
+			r->rule_flag &= ~PFRULE_REFS;
+	}
+	PF_UNLNKDRULES_UNLOCK();
+
+	if (!TAILQ_EMPTY(&tmpq)) {
+		PF_RULES_WLOCK();
+		TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
+			TAILQ_REMOVE(&tmpq, r, entries);
+			pf_free_rule(r);
+		}
+		PF_RULES_WUNLOCK();
+	}
+}
+
+void
+pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET: {
+		u_int32_t a = ntohl(addr->addr32[0]);
+		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
+		    (a>>8)&255, a&255);
+		if (p) {
+			p = ntohs(p);
+			printf(":%u", p);
+		}
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		u_int16_t b;
+		u_int8_t i, curstart, curend, maxstart, maxend;
+		curstart = curend = maxstart = maxend = 255;
+		for (i = 0; i < 8; i++) {
+			if (!addr->addr16[i]) {
+				if (curstart == 255)
+					curstart = i;
+				curend = i;
+			} else {
+				if ((curend - curstart) >
+				    (maxend - maxstart)) {
+					maxstart = curstart;
+					maxend = curend;
+				}
+				curstart = curend = 255;
+			}
+		}
+		if ((curend - curstart) >
+		    (maxend - maxstart)) {
+			maxstart = curstart;
+			maxend = curend;
+		}
+		for (i = 0; i < 8; i++) {
+			if (i >= maxstart && i <= maxend) {
+				if (i == 0)
+					printf(":");
+				if (i == maxend)
+					printf(":");
+			} else {
+				b = ntohs(addr->addr16[i]);
+				printf("%x", b);
+				if (i < 7)
+					printf(":");
+			}
+		}
+		if (p) {
+			p = ntohs(p);
+			printf("[%u]", p);
+		}
+		break;
+	}
+#endif /* INET6 */
+	}
+}
+
+void
+pf_print_state(struct pf_state *s)
+{
+	pf_print_state_parts(s, NULL, NULL);
+}
+
+static void
+pf_print_state_parts(struct pf_state *s,
+    struct pf_state_key *skwp, struct pf_state_key *sksp)
+{
+	struct pf_state_key *skw, *sks;
+	u_int8_t proto, dir;
+
+	/* Do our best to fill these, but they're skipped if NULL */
+	skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
+	sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
+	proto = skw ? skw->proto : (sks ? sks->proto : 0);
+	dir = s ? s->direction : 0;
+
+	switch (proto) {
+	case IPPROTO_IPV4:
+		printf("IPv4");
+		break;
+	case IPPROTO_IPV6:
+		printf("IPv6");
+		break;
+	case IPPROTO_TCP:
+		printf("TCP");
+		break;
+	case IPPROTO_UDP:
+		printf("UDP");
+		break;
+	case IPPROTO_ICMP:
+		printf("ICMP");
+		break;
+	case IPPROTO_ICMPV6:
+		printf("ICMPv6");
+		break;
+	default:
+		printf("%u", skw->proto);
+		break;
+	}
+	switch (dir) {
+	case PF_IN:
+		printf(" in");
+		break;
+	case PF_OUT:
+		printf(" out");
+		break;
+	}
+	if (skw) {
+		printf(" wire: ");
+		pf_print_host(&skw->addr[0], skw->port[0], skw->af);
+		printf(" ");
+		pf_print_host(&skw->addr[1], skw->port[1], skw->af);
+	}
+	if (sks) {
+		printf(" stack: ");
+		if (sks != skw) {
+			pf_print_host(&sks->addr[0], sks->port[0], sks->af);
+			printf(" ");
+			pf_print_host(&sks->addr[1], sks->port[1], sks->af);
+		} else
+			printf("-");
+	}
+	if (s) {
+		if (proto == IPPROTO_TCP) {
+			printf(" [lo=%u high=%u win=%u modulator=%u",
+			    s->src.seqlo, s->src.seqhi,
+			    s->src.max_win, s->src.seqdiff);
+			if (s->src.wscale && s->dst.wscale)
+				printf(" wscale=%u",
+				    s->src.wscale & PF_WSCALE_MASK);
+			printf("]");
+			printf(" [lo=%u high=%u win=%u modulator=%u",
+			    s->dst.seqlo, s->dst.seqhi,
+			    s->dst.max_win, s->dst.seqdiff);
+			if (s->src.wscale && s->dst.wscale)
+				printf(" wscale=%u",
+				s->dst.wscale & PF_WSCALE_MASK);
+			printf("]");
+		}
+		printf(" %u:%u", s->src.state, s->dst.state);
+	}
+}
+
+void
+pf_print_flags(u_int8_t f)
+{
+	if (f)
+		printf(" ");
+	if (f & TH_FIN)
+		printf("F");
+	if (f & TH_SYN)
+		printf("S");
+	if (f & TH_RST)
+		printf("R");
+	if (f & TH_PUSH)
+		printf("P");
+	if (f & TH_ACK)
+		printf("A");
+	if (f & TH_URG)
+		printf("U");
+	if (f & TH_ECE)
+		printf("E");
+	if (f & TH_CWR)
+		printf("W");
+}
+
+#define	PF_SET_SKIP_STEPS(i)					\
+	do {							\
+		while (head[i] != cur) {			\
+			head[i]->skip[i].ptr = cur;		\
+			head[i] = TAILQ_NEXT(head[i], entries);	\
+		}						\
+	} while (0)
+
+void
+pf_calc_skip_steps(struct pf_rulequeue *rules)
+{
+	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
+	int i;
+
+	cur = TAILQ_FIRST(rules);
+	prev = cur;
+	for (i = 0; i < PF_SKIP_COUNT; ++i)
+		head[i] = cur;
+	while (cur != NULL) {
+
+		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
+			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
+		if (cur->direction != prev->direction)
+			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
+		if (cur->af != prev->af)
+			PF_SET_SKIP_STEPS(PF_SKIP_AF);
+		if (cur->proto != prev->proto)
+			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
+		if (cur->src.neg != prev->src.neg ||
+		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
+			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
+		if (cur->src.port[0] != prev->src.port[0] ||
+		    cur->src.port[1] != prev->src.port[1] ||
+		    cur->src.port_op != prev->src.port_op)
+			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
+		if (cur->dst.neg != prev->dst.neg ||
+		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
+			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
+		if (cur->dst.port[0] != prev->dst.port[0] ||
+		    cur->dst.port[1] != prev->dst.port[1] ||
+		    cur->dst.port_op != prev->dst.port_op)
+			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
+
+		prev = cur;
+		cur = TAILQ_NEXT(cur, entries);
+	}
+	for (i = 0; i < PF_SKIP_COUNT; ++i)
+		PF_SET_SKIP_STEPS(i);
+}
+
+static int
+pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
+{
+	if (aw1->type != aw2->type)
+		return (1);
+	switch (aw1->type) {
+	case PF_ADDR_ADDRMASK:
+	case PF_ADDR_RANGE:
+		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
+			return (1);
+		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
+			return (1);
+		return (0);
+	case PF_ADDR_DYNIFTL:
+		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
+	case PF_ADDR_NOROUTE:
+	case PF_ADDR_URPFFAILED:
+		return (0);
+	case PF_ADDR_TABLE:
+		return (aw1->p.tbl != aw2->p.tbl);
+	default:
+		printf("invalid address type: %d\n", aw1->type);
+		return (1);
+	}
+}
+
+u_int16_t
+pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
+{
+	u_int32_t	l;
+
+	if (udp && !cksum)
+		return (0x0000);
+	l = cksum + old - new;
+	l = (l >> 16) + (l & 65535);
+	l = l & 65535;
+	if (udp && !l)
+		return (0xFFFF);
+	return (l);
+}
+
+static void
+pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
+    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
+{
+	struct pf_addr	ao;
+	u_int16_t	po = *p;
+
+	PF_ACPY(&ao, a, af);
+	PF_ACPY(a, an, af);
+
+	*p = pn;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
+		    ao.addr16[0], an->addr16[0], 0),
+		    ao.addr16[1], an->addr16[1], 0);
+		*p = pn;
+		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+		    ao.addr16[0], an->addr16[0], u),
+		    ao.addr16[1], an->addr16[1], u),
+		    po, pn, u);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+		    ao.addr16[0], an->addr16[0], u),
+		    ao.addr16[1], an->addr16[1], u),
+		    ao.addr16[2], an->addr16[2], u),
+		    ao.addr16[3], an->addr16[3], u),
+		    ao.addr16[4], an->addr16[4], u),
+		    ao.addr16[5], an->addr16[5], u),
+		    ao.addr16[6], an->addr16[6], u),
+		    ao.addr16[7], an->addr16[7], u),
+		    po, pn, u);
+		break;
+#endif /* INET6 */
+	}
+}
+
+
+/* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
+void
+pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
+{
+	u_int32_t	ao;
+
+	memcpy(&ao, a, sizeof(ao));
+	memcpy(a, &an, sizeof(u_int32_t));
+	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
+	    ao % 65536, an % 65536, u);
+}
+
+#ifdef INET6
+static void
+pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
+{
+	struct pf_addr	ao;
+
+	PF_ACPY(&ao, a, AF_INET6);
+	PF_ACPY(a, an, AF_INET6);
+
+	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+	    pf_cksum_fixup(pf_cksum_fixup(*c,
+	    ao.addr16[0], an->addr16[0], u),
+	    ao.addr16[1], an->addr16[1], u),
+	    ao.addr16[2], an->addr16[2], u),
+	    ao.addr16[3], an->addr16[3], u),
+	    ao.addr16[4], an->addr16[4], u),
+	    ao.addr16[5], an->addr16[5], u),
+	    ao.addr16[6], an->addr16[6], u),
+	    ao.addr16[7], an->addr16[7], u);
+}
+#endif /* INET6 */
+
+static void
+pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
+    struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
+    u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
+{
+	struct pf_addr	oia, ooa;
+
+	PF_ACPY(&oia, ia, af);
+	if (oa)
+		PF_ACPY(&ooa, oa, af);
+
+	/* Change inner protocol port, fix inner protocol checksum. */
+	if (ip != NULL) {
+		u_int16_t	oip = *ip;
+		u_int32_t	opc;
+
+		if (pc != NULL)
+			opc = *pc;
+		*ip = np;
+		if (pc != NULL)
+			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
+		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
+		if (pc != NULL)
+			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
+	}
+	/* Change inner ip address, fix inner ip and icmp checksums. */
+	PF_ACPY(ia, na, af);
+	switch (af) {
+#ifdef INET
+	case AF_INET: {
+		u_int32_t	 oh2c = *h2c;
+
+		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
+		    oia.addr16[0], ia->addr16[0], 0),
+		    oia.addr16[1], ia->addr16[1], 0);
+		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
+		    oia.addr16[0], ia->addr16[0], 0),
+		    oia.addr16[1], ia->addr16[1], 0);
+		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    pf_cksum_fixup(pf_cksum_fixup(*ic,
+		    oia.addr16[0], ia->addr16[0], u),
+		    oia.addr16[1], ia->addr16[1], u),
+		    oia.addr16[2], ia->addr16[2], u),
+		    oia.addr16[3], ia->addr16[3], u),
+		    oia.addr16[4], ia->addr16[4], u),
+		    oia.addr16[5], ia->addr16[5], u),
+		    oia.addr16[6], ia->addr16[6], u),
+		    oia.addr16[7], ia->addr16[7], u);
+		break;
+#endif /* INET6 */
+	}
+	/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
+	if (oa) {
+		PF_ACPY(oa, na, af);
+		switch (af) {
+#ifdef INET
+		case AF_INET:
+			*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
+			    ooa.addr16[0], oa->addr16[0], 0),
+			    ooa.addr16[1], oa->addr16[1], 0);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+			    pf_cksum_fixup(pf_cksum_fixup(*ic,
+			    ooa.addr16[0], oa->addr16[0], u),
+			    ooa.addr16[1], oa->addr16[1], u),
+			    ooa.addr16[2], oa->addr16[2], u),
+			    ooa.addr16[3], oa->addr16[3], u),
+			    ooa.addr16[4], oa->addr16[4], u),
+			    ooa.addr16[5], oa->addr16[5], u),
+			    ooa.addr16[6], oa->addr16[6], u),
+			    ooa.addr16[7], oa->addr16[7], u);
+			break;
+#endif /* INET6 */
+		}
+	}
+}
+
+
+/*
+ * Need to modulate the sequence numbers in the TCP SACK option
+ * (credits to Krzysztof Pfaff for report and patch)
+ */
+static int
+pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
+    struct tcphdr *th, struct pf_state_peer *dst)
+{
+	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
+	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
+	int copyback = 0, i, olen;
+	struct sackblk sack;
+
+#define	TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
+	if (hlen < TCPOLEN_SACKLEN ||
+	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
+		return 0;
+
+	while (hlen >= TCPOLEN_SACKLEN) {
+		olen = opt[1];
+		switch (*opt) {
+		case TCPOPT_EOL:	/* FALLTHROUGH */
+		case TCPOPT_NOP:
+			opt++;
+			hlen--;
+			break;
+		case TCPOPT_SACK:
+			if (olen > hlen)
+				olen = hlen;
+			if (olen >= TCPOLEN_SACKLEN) {
+				for (i = 2; i + TCPOLEN_SACK <= olen;
+				    i += TCPOLEN_SACK) {
+					memcpy(&sack, &opt[i], sizeof(sack));
+					pf_change_a(&sack.start, &th->th_sum,
+					    htonl(ntohl(sack.start) -
+					    dst->seqdiff), 0);
+					pf_change_a(&sack.end, &th->th_sum,
+					    htonl(ntohl(sack.end) -
+					    dst->seqdiff), 0);
+					memcpy(&opt[i], &sack, sizeof(sack));
+				}
+				copyback = 1;
+			}
+			/* FALLTHROUGH */
+		default:
+			if (olen < 2)
+				olen = 2;
+			hlen -= olen;
+			opt += olen;
+		}
+	}
+
+	if (copyback)
+		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
+	return (copyback);
+}
+
+static void
+pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
+    const struct pf_addr *saddr, const struct pf_addr *daddr,
+    u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
+    u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
+    u_int16_t rtag, struct ifnet *ifp)
+{
+	struct pf_send_entry *pfse;
+	struct mbuf	*m;
+	int		 len, tlen;
+#ifdef INET
+	struct ip	*h = NULL;
+#endif /* INET */
+#ifdef INET6
+	struct ip6_hdr	*h6 = NULL;
+#endif /* INET6 */
+	struct tcphdr	*th;
+	char		*opt;
+	struct pf_mtag  *pf_mtag;
+
+	len = 0;
+	th = NULL;
+
+	/* maximum segment size tcp option */
+	tlen = sizeof(struct tcphdr);
+	if (mss)
+		tlen += 4;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		len = sizeof(struct ip) + tlen;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		len = sizeof(struct ip6_hdr) + tlen;
+		break;
+#endif /* INET6 */
+	default:
+		panic("%s: unsupported af %d", __func__, af);
+	}
+
+	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
+	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
+	if (pfse == NULL)
+		return;
+	m = m_gethdr(M_NOWAIT, MT_HEADER);
+	if (m == NULL) {
+		free(pfse, M_PFTEMP);
+		return;
+	}
+#ifdef MAC
+	mac_netinet_firewall_send(m);
+#endif
+	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
+		free(pfse, M_PFTEMP);
+		m_freem(m);
+		return;
+	}
+	if (tag)
+		m->m_flags |= M_SKIP_FIREWALL;
+	pf_mtag->tag = rtag;
+
+	if (r != NULL && r->rtableid >= 0)
+		M_SETFIB(m, r->rtableid);
+
+#ifdef ALTQ
+	if (r != NULL && r->qid) {
+		pf_mtag->qid = r->qid;
+
+		/* add hints for ecn */
+		pf_mtag->hdr = mtod(m, struct ip *);
+	}
+#endif /* ALTQ */
+	m->m_data += max_linkhdr;
+	m->m_pkthdr.len = m->m_len = len;
+	m->m_pkthdr.rcvif = NULL;
+	bzero(m->m_data, len);
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		h = mtod(m, struct ip *);
+
+		/* IP header fields included in the TCP checksum */
+		h->ip_p = IPPROTO_TCP;
+		h->ip_len = htons(tlen);
+		h->ip_src.s_addr = saddr->v4.s_addr;
+		h->ip_dst.s_addr = daddr->v4.s_addr;
+
+		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		h6 = mtod(m, struct ip6_hdr *);
+
+		/* IP header fields included in the TCP checksum */
+		h6->ip6_nxt = IPPROTO_TCP;
+		h6->ip6_plen = htons(tlen);
+		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
+		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
+
+		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
+		break;
+#endif /* INET6 */
+	}
+
+	/* TCP header */
+	th->th_sport = sport;
+	th->th_dport = dport;
+	th->th_seq = htonl(seq);
+	th->th_ack = htonl(ack);
+	th->th_off = tlen >> 2;
+	th->th_flags = flags;
+	th->th_win = htons(win);
+
+	if (mss) {
+		opt = (char *)(th + 1);
+		opt[0] = TCPOPT_MAXSEG;
+		opt[1] = 4;
+		HTONS(mss);
+		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
+	}
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		/* TCP checksum */
+		th->th_sum = in_cksum(m, len);
+
+		/* Finish the IP header */
+		h->ip_v = 4;
+		h->ip_hl = sizeof(*h) >> 2;
+		h->ip_tos = IPTOS_LOWDELAY;
+		h->ip_off = V_path_mtu_discovery ? IP_DF : 0;
+		h->ip_len = len;
+		h->ip_ttl = ttl ? ttl : V_ip_defttl;
+		h->ip_sum = 0;
+
+		pfse->pfse_type = PFSE_IP;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		/* TCP checksum */
+		th->th_sum = in6_cksum(m, IPPROTO_TCP,
+		    sizeof(struct ip6_hdr), tlen);
+
+		h6->ip6_vfc |= IPV6_VERSION;
+		h6->ip6_hlim = IPV6_DEFHLIM;
+
+		pfse->pfse_type = PFSE_IP6;
+		break;
+#endif /* INET6 */
+	}
+	pfse->pfse_m = m;
+	pf_send(pfse);
+}
+
+static void
+pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
+    struct pf_rule *r)
+{
+	struct pf_send_entry *pfse;
+	struct mbuf *m0;
+	struct pf_mtag *pf_mtag;
+
+	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
+	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
+	if (pfse == NULL)
+		return;
+
+	if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
+		free(pfse, M_PFTEMP);
+		return;
+	}
+
+	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
+		free(pfse, M_PFTEMP);
+		return;
+	}
+	/* XXX: revisit */
+	m0->m_flags |= M_SKIP_FIREWALL;
+
+	if (r->rtableid >= 0)
+		M_SETFIB(m0, r->rtableid);
+
+#ifdef ALTQ
+	if (r->qid) {
+		pf_mtag->qid = r->qid;
+		/* add hints for ecn */
+		pf_mtag->hdr = mtod(m0, struct ip *);
+	}
+#endif /* ALTQ */
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+	    {
+		struct ip *ip;
+
+		/* icmp_error() expects host byte ordering */
+		ip = mtod(m0, struct ip *);
+		NTOHS(ip->ip_len);
+		NTOHS(ip->ip_off);
+
+		pfse->pfse_type = PFSE_ICMP;
+		break;
+	    }
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		pfse->pfse_type = PFSE_ICMP6;
+		break;
+#endif /* INET6 */
+	}
+	pfse->pfse_m = m0;
+	pfse->pfse_icmp_type = type;
+	pfse->pfse_icmp_code = code;
+	pf_send(pfse);
+}
+
+/*
+ * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
+ * If n is 0, they match if they are equal. If n is != 0, they match if they
+ * are different.
+ */
+int
+pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
+    struct pf_addr *b, sa_family_t af)
+{
+	int	match = 0;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		if ((a->addr32[0] & m->addr32[0]) ==
+		    (b->addr32[0] & m->addr32[0]))
+			match++;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (((a->addr32[0] & m->addr32[0]) ==
+		     (b->addr32[0] & m->addr32[0])) &&
+		    ((a->addr32[1] & m->addr32[1]) ==
+		     (b->addr32[1] & m->addr32[1])) &&
+		    ((a->addr32[2] & m->addr32[2]) ==
+		     (b->addr32[2] & m->addr32[2])) &&
+		    ((a->addr32[3] & m->addr32[3]) ==
+		     (b->addr32[3] & m->addr32[3])))
+			match++;
+		break;
+#endif /* INET6 */
+	}
+	if (match) {
+		if (n)
+			return (0);
+		else
+			return (1);
+	} else {
+		if (n)
+			return (1);
+		else
+			return (0);
+	}
+}
+
+/*
+ * Return 1 if b <= a <= e, otherwise return 0.
+ */
+int
+pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
+    struct pf_addr *a, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		if ((a->addr32[0] < b->addr32[0]) ||
+		    (a->addr32[0] > e->addr32[0]))
+			return (0);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		int	i;
+
+		/* check a >= b */
+		for (i = 0; i < 4; ++i)
+			if (a->addr32[i] > b->addr32[i])
+				break;
+			else if (a->addr32[i] < b->addr32[i])
+				return (0);
+		/* check a <= e */
+		for (i = 0; i < 4; ++i)
+			if (a->addr32[i] < e->addr32[i])
+				break;
+			else if (a->addr32[i] > e->addr32[i])
+				return (0);
+		break;
+	}
+#endif /* INET6 */
+	}
+	return (1);
+}
+
+static int
+pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
+{
+	switch (op) {
+	case PF_OP_IRG:
+		return ((p > a1) && (p < a2));
+	case PF_OP_XRG:
+		return ((p < a1) || (p > a2));
+	case PF_OP_RRG:
+		return ((p >= a1) && (p <= a2));
+	case PF_OP_EQ:
+		return (p == a1);
+	case PF_OP_NE:
+		return (p != a1);
+	case PF_OP_LT:
+		return (p < a1);
+	case PF_OP_LE:
+		return (p <= a1);
+	case PF_OP_GT:
+		return (p > a1);
+	case PF_OP_GE:
+		return (p >= a1);
+	}
+	return (0); /* never reached */
+}
+
+int
+pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
+{
+	NTOHS(a1);
+	NTOHS(a2);
+	NTOHS(p);
+	return (pf_match(op, a1, a2, p));
+}
+
+static int
+pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
+{
+	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
+		return (0);
+	return (pf_match(op, a1, a2, u));
+}
+
+static int
+pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
+{
+	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
+		return (0);
+	return (pf_match(op, a1, a2, g));
+}
+
+int
+pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag)
+{
+	if (*tag == -1)
+		*tag = mtag;
+
+	return ((!r->match_tag_not && r->match_tag == *tag) ||
+	    (r->match_tag_not && r->match_tag != *tag));
+}
+
+int
+pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag)
+{
+
+	KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
+
+	if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL))
+		return (ENOMEM);
+
+	pd->pf_mtag->tag = tag;
+
+	return (0);
+}
+
+void
+pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
+    struct pf_rule **r, struct pf_rule **a, int *match)
+{
+	struct pf_anchor_stackframe	*f;
+
+	PF_RULES_RASSERT();
+
+	(*r)->anchor->match = 0;
+	if (match)
+		*match = 0;
+	if (*depth >= sizeof(V_pf_anchor_stack) /
+	    sizeof(V_pf_anchor_stack[0])) {
+		printf("pf_step_into_anchor: stack overflow\n");
+		*r = TAILQ_NEXT(*r, entries);
+		return;
+	} else if (*depth == 0 && a != NULL)
+		*a = *r;
+	f = V_pf_anchor_stack + (*depth)++;
+	f->rs = *rs;
+	f->r = *r;
+	if ((*r)->anchor_wildcard) {
+		f->parent = &(*r)->anchor->children;
+		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
+		    NULL) {
+			*r = NULL;
+			return;
+		}
+		*rs = &f->child->ruleset;
+	} else {
+		f->parent = NULL;
+		f->child = NULL;
+		*rs = &(*r)->anchor->ruleset;
+	}
+	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
+}
+
+int
+pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
+    struct pf_rule **r, struct pf_rule **a, int *match)
+{
+	struct pf_anchor_stackframe	*f;
+	int quick = 0;
+
+	PF_RULES_RASSERT();
+
+	do {
+		if (*depth <= 0)
+			break;
+		f = V_pf_anchor_stack + *depth - 1;
+		if (f->parent != NULL && f->child != NULL) {
+			if (f->child->match ||
+			    (match != NULL && *match)) {
+				f->r->anchor->match = 1;
+				*match = 0;
+			}
+			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
+			if (f->child != NULL) {
+				*rs = &f->child->ruleset;
+				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
+				if (*r == NULL)
+					continue;
+				else
+					break;
+			}
+		}
+		(*depth)--;
+		if (*depth == 0 && a != NULL)
+			*a = NULL;
+		*rs = f->rs;
+		if (f->r->anchor->match || (match != NULL && *match))
+			quick = f->r->quick;
+		*r = TAILQ_NEXT(f->r, entries);
+	} while (*r == NULL);
+
+	return (quick);
+}
+
+#ifdef INET6
+void
+pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
+    struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
+		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
+		break;
+#endif /* INET */
+	case AF_INET6:
+		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
+		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
+		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
+		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
+		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
+		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
+		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
+		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
+		break;
+	}
+}
+
+void
+pf_addr_inc(struct pf_addr *addr, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
+		break;
+#endif /* INET */
+	case AF_INET6:
+		if (addr->addr32[3] == 0xffffffff) {
+			addr->addr32[3] = 0;
+			if (addr->addr32[2] == 0xffffffff) {
+				addr->addr32[2] = 0;
+				if (addr->addr32[1] == 0xffffffff) {
+					addr->addr32[1] = 0;
+					addr->addr32[0] =
+					    htonl(ntohl(addr->addr32[0]) + 1);
+				} else
+					addr->addr32[1] =
+					    htonl(ntohl(addr->addr32[1]) + 1);
+			} else
+				addr->addr32[2] =
+				    htonl(ntohl(addr->addr32[2]) + 1);
+		} else
+			addr->addr32[3] =
+			    htonl(ntohl(addr->addr32[3]) + 1);
+		break;
+	}
+}
+#endif /* INET6 */
+
+int
+pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m)
+{
+	struct pf_addr		*saddr, *daddr;
+	u_int16_t		 sport, dport;
+	struct inpcbinfo	*pi;
+	struct inpcb		*inp;
+
+	pd->lookup.uid = UID_MAX;
+	pd->lookup.gid = GID_MAX;
+
+	switch (pd->proto) {
+	case IPPROTO_TCP:
+		if (pd->hdr.tcp == NULL)
+			return (-1);
+		sport = pd->hdr.tcp->th_sport;
+		dport = pd->hdr.tcp->th_dport;
+		pi = &V_tcbinfo;
+		break;
+	case IPPROTO_UDP:
+		if (pd->hdr.udp == NULL)
+			return (-1);
+		sport = pd->hdr.udp->uh_sport;
+		dport = pd->hdr.udp->uh_dport;
+		pi = &V_udbinfo;
+		break;
+	default:
+		return (-1);
+	}
+	if (direction == PF_IN) {
+		saddr = pd->src;
+		daddr = pd->dst;
+	} else {
+		u_int16_t	p;
+
+		p = sport;
+		sport = dport;
+		dport = p;
+		saddr = pd->dst;
+		daddr = pd->src;
+	}
+	switch (pd->af) {
+#ifdef INET
+	case AF_INET:
+		inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
+		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
+		if (inp == NULL) {
+			inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
+			   daddr->v4, dport, INPLOOKUP_WILDCARD |
+			   INPLOOKUP_RLOCKPCB, NULL, m);
+			if (inp == NULL)
+				return (-1);
+		}
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
+		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
+		if (inp == NULL) {
+			inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
+			    &daddr->v6, dport, INPLOOKUP_WILDCARD |
+			    INPLOOKUP_RLOCKPCB, NULL, m);
+			if (inp == NULL)
+				return (-1);
+		}
+		break;
+#endif /* INET6 */
+
+	default:
+		return (-1);
+	}
+	INP_RLOCK_ASSERT(inp);
+	pd->lookup.uid = inp->inp_cred->cr_uid;
+	pd->lookup.gid = inp->inp_cred->cr_groups[0];
+	INP_RUNLOCK(inp);
+
+	return (1);
+}
+
+static u_int8_t
+pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
+{
+	int		 hlen;
+	u_int8_t	 hdr[60];
+	u_int8_t	*opt, optlen;
+	u_int8_t	 wscale = 0;
+
+	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
+	if (hlen <= sizeof(struct tcphdr))
+		return (0);
+	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
+		return (0);
+	opt = hdr + sizeof(struct tcphdr);
+	hlen -= sizeof(struct tcphdr);
+	while (hlen >= 3) {
+		switch (*opt) {
+		case TCPOPT_EOL:
+		case TCPOPT_NOP:
+			++opt;
+			--hlen;
+			break;
+		case TCPOPT_WINDOW:
+			wscale = opt[2];
+			if (wscale > TCP_MAX_WINSHIFT)
+				wscale = TCP_MAX_WINSHIFT;
+			wscale |= PF_WSCALE_FLAG;
+			/* FALLTHROUGH */
+		default:
+			optlen = opt[1];
+			if (optlen < 2)
+				optlen = 2;
+			hlen -= optlen;
+			opt += optlen;
+			break;
+		}
+	}
+	return (wscale);
+}
+
+static u_int16_t
+pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
+{
+	int		 hlen;
+	u_int8_t	 hdr[60];
+	u_int8_t	*opt, optlen;
+	u_int16_t	 mss = V_tcp_mssdflt;
+
+	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
+	if (hlen <= sizeof(struct tcphdr))
+		return (0);
+	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
+		return (0);
+	opt = hdr + sizeof(struct tcphdr);
+	hlen -= sizeof(struct tcphdr);
+	while (hlen >= TCPOLEN_MAXSEG) {
+		switch (*opt) {
+		case TCPOPT_EOL:
+		case TCPOPT_NOP:
+			++opt;
+			--hlen;
+			break;
+		case TCPOPT_MAXSEG:
+			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
+			NTOHS(mss);
+			/* FALLTHROUGH */
+		default:
+			optlen = opt[1];
+			if (optlen < 2)
+				optlen = 2;
+			hlen -= optlen;
+			opt += optlen;
+			break;
+		}
+	}
+	return (mss);
+}
+
+static u_int16_t
+pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
+{
+#ifdef INET
+	struct sockaddr_in	*dst;
+	struct route		 ro;
+#endif /* INET */
+#ifdef INET6
+	struct sockaddr_in6	*dst6;
+	struct route_in6	 ro6;
+#endif /* INET6 */
+	struct rtentry		*rt = NULL;
+	int			 hlen = 0;
+	u_int16_t		 mss = V_tcp_mssdflt;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		hlen = sizeof(struct ip);
+		bzero(&ro, sizeof(ro));
+		dst = (struct sockaddr_in *)&ro.ro_dst;
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = addr->v4;
+		in_rtalloc_ign(&ro, 0, rtableid);
+		rt = ro.ro_rt;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		hlen = sizeof(struct ip6_hdr);
+		bzero(&ro6, sizeof(ro6));
+		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
+		dst6->sin6_family = AF_INET6;
+		dst6->sin6_len = sizeof(*dst6);
+		dst6->sin6_addr = addr->v6;
+		in6_rtalloc_ign(&ro6, 0, rtableid);
+		rt = ro6.ro_rt;
+		break;
+#endif /* INET6 */
+	}
+
+	if (rt && rt->rt_ifp) {
+		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
+		mss = max(V_tcp_mssdflt, mss);
+		RTFREE(rt);
+	}
+	mss = min(mss, offer);
+	mss = max(mss, 64);		/* sanity - at least max opt space */
+	return (mss);
+}
+
+static void
+pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
+{
+	struct pf_rule *r = s->rule.ptr;
+	struct pf_src_node *sn = NULL;
+
+	s->rt_kif = NULL;
+	if (!r->rt || r->rt == PF_FASTROUTE)
+		return;
+	switch (s->key[PF_SK_WIRE]->af) {
+#ifdef INET
+	case AF_INET:
+		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn);
+		s->rt_kif = r->rpool.cur->kif;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn);
+		s->rt_kif = r->rpool.cur->kif;
+		break;
+#endif /* INET6 */
+	}
+}
+
+static u_int32_t
+pf_tcp_iss(struct pf_pdesc *pd)
+{
+	MD5_CTX ctx;
+	u_int32_t digest[4];
+
+	if (V_pf_tcp_secret_init == 0) {
+		read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
+		MD5Init(&V_pf_tcp_secret_ctx);
+		MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
+		    sizeof(V_pf_tcp_secret));
+		V_pf_tcp_secret_init = 1;
+	}
+
+	ctx = V_pf_tcp_secret_ctx;
+
+	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
+	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
+	if (pd->af == AF_INET6) {
+		MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
+		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
+	} else {
+		MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
+		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
+	}
+	MD5Final((u_char *)digest, &ctx);
+	V_pf_tcp_iss_off += 4096;
+#define	ISN_RANDOM_INCREMENT (4096 - 1)
+	return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
+	    V_pf_tcp_iss_off);
+#undef	ISN_RANDOM_INCREMENT
+}
+
+static int
+pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
+    struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd,
+    struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp)
+{
+	struct pf_rule		*nr = NULL;
+	struct pf_addr		* const saddr = pd->src;
+	struct pf_addr		* const daddr = pd->dst;
+	sa_family_t		 af = pd->af;
+	struct pf_rule		*r, *a = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_src_node	*nsn = NULL;
+	struct tcphdr		*th = pd->hdr.tcp;
+	struct pf_state_key	*sk = NULL, *nk = NULL;
+	u_short			 reason;
+	int			 rewrite = 0, hdrlen = 0;
+	int			 tag = -1, rtableid = -1;
+	int			 asd = 0;
+	int			 match = 0;
+	int			 state_icmp = 0;
+	u_int16_t		 sport = 0, dport = 0;
+	u_int16_t		 bproto_sum = 0, bip_sum = 0;
+	u_int8_t		 icmptype = 0, icmpcode = 0;
+
+	PF_RULES_RASSERT();
+
+	if (inp != NULL) {
+		INP_LOCK_ASSERT(inp);
+		pd->lookup.uid = inp->inp_cred->cr_uid;
+		pd->lookup.gid = inp->inp_cred->cr_groups[0];
+		pd->lookup.done = 1;
+	}
+
+	switch (pd->proto) {
+	case IPPROTO_TCP:
+		sport = th->th_sport;
+		dport = th->th_dport;
+		hdrlen = sizeof(*th);
+		break;
+	case IPPROTO_UDP:
+		sport = pd->hdr.udp->uh_sport;
+		dport = pd->hdr.udp->uh_dport;
+		hdrlen = sizeof(*pd->hdr.udp);
+		break;
+#ifdef INET
+	case IPPROTO_ICMP:
+		if (pd->af != AF_INET)
+			break;
+		sport = dport = pd->hdr.icmp->icmp_id;
+		hdrlen = sizeof(*pd->hdr.icmp);
+		icmptype = pd->hdr.icmp->icmp_type;
+		icmpcode = pd->hdr.icmp->icmp_code;
+
+		if (icmptype == ICMP_UNREACH ||
+		    icmptype == ICMP_SOURCEQUENCH ||
+		    icmptype == ICMP_REDIRECT ||
+		    icmptype == ICMP_TIMXCEED ||
+		    icmptype == ICMP_PARAMPROB)
+			state_icmp++;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+		if (af != AF_INET6)
+			break;
+		sport = dport = pd->hdr.icmp6->icmp6_id;
+		hdrlen = sizeof(*pd->hdr.icmp6);
+		icmptype = pd->hdr.icmp6->icmp6_type;
+		icmpcode = pd->hdr.icmp6->icmp6_code;
+
+		if (icmptype == ICMP6_DST_UNREACH ||
+		    icmptype == ICMP6_PACKET_TOO_BIG ||
+		    icmptype == ICMP6_TIME_EXCEEDED ||
+		    icmptype == ICMP6_PARAM_PROB)
+			state_icmp++;
+		break;
+#endif /* INET6 */
+	default:
+		sport = dport = hdrlen = 0;
+		break;
+	}
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+
+	/* check packet for BINAT/NAT/RDR */
+	if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk,
+	    &nk, saddr, daddr, sport, dport)) != NULL) {
+		KASSERT(sk != NULL, ("%s: null sk", __func__));
+		KASSERT(nk != NULL, ("%s: null nk", __func__));
+
+		if (pd->ip_sum)
+			bip_sum = *pd->ip_sum;
+
+		switch (pd->proto) {
+		case IPPROTO_TCP:
+			bproto_sum = th->th_sum;
+			pd->proto_sum = &th->th_sum;
+
+			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
+			    nk->port[pd->sidx] != sport) {
+				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
+				    &th->th_sum, &nk->addr[pd->sidx],
+				    nk->port[pd->sidx], 0, af);
+				pd->sport = &th->th_sport;
+				sport = th->th_sport;
+			}
+
+			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
+			    nk->port[pd->didx] != dport) {
+				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
+				    &th->th_sum, &nk->addr[pd->didx],
+				    nk->port[pd->didx], 0, af);
+				dport = th->th_dport;
+				pd->dport = &th->th_dport;
+			}
+			rewrite++;
+			break;
+		case IPPROTO_UDP:
+			bproto_sum = pd->hdr.udp->uh_sum;
+			pd->proto_sum = &pd->hdr.udp->uh_sum;
+
+			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
+			    nk->port[pd->sidx] != sport) {
+				pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
+				    pd->ip_sum, &pd->hdr.udp->uh_sum,
+				    &nk->addr[pd->sidx],
+				    nk->port[pd->sidx], 1, af);
+				sport = pd->hdr.udp->uh_sport;
+				pd->sport = &pd->hdr.udp->uh_sport;
+			}
+
+			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
+			    nk->port[pd->didx] != dport) {
+				pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
+				    pd->ip_sum, &pd->hdr.udp->uh_sum,
+				    &nk->addr[pd->didx],
+				    nk->port[pd->didx], 1, af);
+				dport = pd->hdr.udp->uh_dport;
+				pd->dport = &pd->hdr.udp->uh_dport;
+			}
+			rewrite++;
+			break;
+#ifdef INET
+		case IPPROTO_ICMP:
+			nk->port[0] = nk->port[1];
+			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
+				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
+				    nk->addr[pd->sidx].v4.s_addr, 0);
+
+			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
+				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
+				    nk->addr[pd->didx].v4.s_addr, 0);
+
+			if (nk->port[1] != pd->hdr.icmp->icmp_id) {
+				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
+				    pd->hdr.icmp->icmp_cksum, sport,
+				    nk->port[1], 0);
+				pd->hdr.icmp->icmp_id = nk->port[1];
+				pd->sport = &pd->hdr.icmp->icmp_id;
+			}
+			m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case IPPROTO_ICMPV6:
+			nk->port[0] = nk->port[1];
+			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
+				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
+				    &nk->addr[pd->sidx], 0);
+
+			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
+				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
+				    &nk->addr[pd->didx], 0);
+			rewrite++;
+			break;
+#endif /* INET */
+		default:
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				if (PF_ANEQ(saddr,
+				    &nk->addr[pd->sidx], AF_INET))
+					pf_change_a(&saddr->v4.s_addr,
+					    pd->ip_sum,
+					    nk->addr[pd->sidx].v4.s_addr, 0);
+
+				if (PF_ANEQ(daddr,
+				    &nk->addr[pd->didx], AF_INET))
+					pf_change_a(&daddr->v4.s_addr,
+					    pd->ip_sum,
+					    nk->addr[pd->didx].v4.s_addr, 0);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				if (PF_ANEQ(saddr,
+				    &nk->addr[pd->sidx], AF_INET6))
+					PF_ACPY(saddr, &nk->addr[pd->sidx], af);
+
+				if (PF_ANEQ(daddr,
+				    &nk->addr[pd->didx], AF_INET6))
+					PF_ACPY(saddr, &nk->addr[pd->didx], af);
+				break;
+#endif /* INET */
+			}
+			break;
+		}
+		if (nr->natpass)
+			r = NULL;
+		pd->nat_rule = nr;
+	}
+
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		/* tcp/udp only. port_op always 0 in other cases */
+		else if (r->src.port_op && !pf_match_port(r->src.port_op,
+		    r->src.port[0], r->src.port[1], sport))
+			r = r->skip[PF_SKIP_SRC_PORT].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		/* tcp/udp only. port_op always 0 in other cases */
+		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
+		    r->dst.port[0], r->dst.port[1], dport))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		/* icmp only. type always 0 in other cases */
+		else if (r->type && r->type != icmptype + 1)
+			r = TAILQ_NEXT(r, entries);
+		/* icmp only. type always 0 in other cases */
+		else if (r->code && r->code != icmpcode + 1)
+			r = TAILQ_NEXT(r, entries);
+		else if (r->tos && !(r->tos == pd->tos))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->rule_flag & PFRULE_FRAGMENT)
+			r = TAILQ_NEXT(r, entries);
+		else if (pd->proto == IPPROTO_TCP &&
+		    (r->flagset & th->th_flags) != r->flags)
+			r = TAILQ_NEXT(r, entries);
+		/* tcp/udp only. uid.op always 0 in other cases */
+		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
+		    pf_socket_lookup(direction, pd, m), 1)) &&
+		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
+		    pd->lookup.uid))
+			r = TAILQ_NEXT(r, entries);
+		/* tcp/udp only. gid.op always 0 in other cases */
+		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
+		    pf_socket_lookup(direction, pd, m), 1)) &&
+		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
+		    pd->lookup.gid))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prob &&
+		    r->prob <= arc4random())
+			r = TAILQ_NEXT(r, entries);
+		else if (r->match_tag && !pf_match_tag(m, r, &tag,
+		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY &&
+		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
+		    pf_osfp_fingerprint(pd, m, off, th),
+		    r->os_fingerprint)))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->tag)
+				tag = r->tag;
+			if (r->rtableid >= 0)
+				rtableid = r->rtableid;
+			if (r->anchor == NULL) {
+				match = 1;
+				*rm = r;
+				*am = a;
+				*rsm = ruleset;
+				if ((*rm)->quick)
+					break;
+				r = TAILQ_NEXT(r, entries);
+			} else
+				pf_step_into_anchor(&asd, &ruleset,
+				    PF_RULESET_FILTER, &r, &a, &match);
+		}
+		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+		    PF_RULESET_FILTER, &r, &a, &match))
+			break;
+	}
+	r = *rm;
+	a = *am;
+	ruleset = *rsm;
+
+	REASON_SET(&reason, PFRES_MATCH);
+
+	if (r->log || (nr != NULL && nr->log)) {
+		if (rewrite)
+			m_copyback(m, off, hdrlen, pd->hdr.any);
+		PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a,
+		    ruleset, pd, 1);
+	}
+
+	if ((r->action == PF_DROP) &&
+	    ((r->rule_flag & PFRULE_RETURNRST) ||
+	    (r->rule_flag & PFRULE_RETURNICMP) ||
+	    (r->rule_flag & PFRULE_RETURN))) {
+		/* undo NAT changes, if they have taken place */
+		if (nr != NULL) {
+			PF_ACPY(saddr, &sk->addr[pd->sidx], af);
+			PF_ACPY(daddr, &sk->addr[pd->didx], af);
+			if (pd->sport)
+				*pd->sport = sk->port[pd->sidx];
+			if (pd->dport)
+				*pd->dport = sk->port[pd->didx];
+			if (pd->proto_sum)
+				*pd->proto_sum = bproto_sum;
+			if (pd->ip_sum)
+				*pd->ip_sum = bip_sum;
+			m_copyback(m, off, hdrlen, pd->hdr.any);
+		}
+		if (pd->proto == IPPROTO_TCP &&
+		    ((r->rule_flag & PFRULE_RETURNRST) ||
+		    (r->rule_flag & PFRULE_RETURN)) &&
+		    !(th->th_flags & TH_RST)) {
+			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
+			int		 len = 0;
+#ifdef INET
+			struct ip	*h4;
+#endif
+#ifdef INET6
+			struct ip6_hdr	*h6;
+#endif
+
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				h4 = mtod(m, struct ip *);
+				len = ntohs(h4->ip_len) - off;
+				break;
+#endif
+#ifdef INET6
+			case AF_INET6:
+				h6 = mtod(m, struct ip6_hdr *);
+				len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
+				break;
+#endif
+			}
+
+			if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
+				REASON_SET(&reason, PFRES_PROTCKSUM);
+			else {
+				if (th->th_flags & TH_SYN)
+					ack++;
+				if (th->th_flags & TH_FIN)
+					ack++;
+				pf_send_tcp(m, r, af, pd->dst,
+				    pd->src, th->th_dport, th->th_sport,
+				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
+				    r->return_ttl, 1, 0, kif->pfik_ifp);
+			}
+		} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
+		    r->return_icmp)
+			pf_send_icmp(m, r->return_icmp >> 8,
+			    r->return_icmp & 255, af, r);
+		else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
+		    r->return_icmp6)
+			pf_send_icmp(m, r->return_icmp6 >> 8,
+			    r->return_icmp6 & 255, af, r);
+	}
+
+	if (r->action == PF_DROP)
+		goto cleanup;
+
+	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		goto cleanup;
+	}
+	if (rtableid >= 0)
+		M_SETFIB(m, rtableid);
+
+	if (!state_icmp && (r->keep_state || nr != NULL ||
+	    (pd->flags & PFDESC_TCP_NORM))) {
+		int action;
+		action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
+		    sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
+		    hdrlen);
+		if (action != PF_PASS)
+			return (action);
+	} else {
+		if (sk != NULL)
+			uma_zfree(V_pf_state_key_z, sk);
+		if (nk != NULL)
+			uma_zfree(V_pf_state_key_z, nk);
+	}
+
+	/* copy back packet headers if we performed NAT operations */
+	if (rewrite)
+		m_copyback(m, off, hdrlen, pd->hdr.any);
+
+	if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
+	    direction == PF_OUT &&
+	    pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m))
+		/*
+		 * We want the state created, but we dont
+		 * want to send this in case a partner
+		 * firewall has to know about it to allow
+		 * replies through it.
+		 */
+		return (PF_DEFER);
+
+	return (PF_PASS);
+
+cleanup:
+	if (sk != NULL)
+		uma_zfree(V_pf_state_key_z, sk);
+	if (nk != NULL)
+		uma_zfree(V_pf_state_key_z, nk);
+	return (PF_DROP);
+}
+
+static int
+pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
+    struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk,
+    struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
+    u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm,
+    int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
+{
+	struct pf_state		*s = NULL;
+	struct pf_src_node	*sn = NULL;
+	struct tcphdr		*th = pd->hdr.tcp;
+	u_int16_t		 mss = V_tcp_mssdflt;
+	u_short			 reason;
+
+	/* check maximums */
+	if (r->max_states && (r->states_cur >= r->max_states)) {
+		V_pf_status.lcounters[LCNT_STATES]++;
+		REASON_SET(&reason, PFRES_MAXSTATES);
+		return (PF_DROP);
+	}
+	/* src node for filter rule */
+	if ((r->rule_flag & PFRULE_SRCTRACK ||
+	    r->rpool.opts & PF_POOL_STICKYADDR) &&
+	    pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
+		REASON_SET(&reason, PFRES_SRCLIMIT);
+		goto csfailed;
+	}
+	/* src node for translation rule */
+	if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
+	    pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
+		REASON_SET(&reason, PFRES_SRCLIMIT);
+		goto csfailed;
+	}
+	s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO);
+	if (s == NULL) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		goto csfailed;
+	}
+	s->rule.ptr = r;
+	s->nat_rule.ptr = nr;
+	s->anchor.ptr = a;
+	STATE_INC_COUNTERS(s);
+	if (r->allow_opts)
+		s->state_flags |= PFSTATE_ALLOWOPTS;
+	if (r->rule_flag & PFRULE_STATESLOPPY)
+		s->state_flags |= PFSTATE_SLOPPY;
+	s->log = r->log & PF_LOG_ALL;
+	s->sync_state = PFSYNC_S_NONE;
+	if (nr != NULL)
+		s->log |= nr->log & PF_LOG_ALL;
+	switch (pd->proto) {
+	case IPPROTO_TCP:
+		s->src.seqlo = ntohl(th->th_seq);
+		s->src.seqhi = s->src.seqlo + pd->p_len + 1;
+		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
+		    r->keep_state == PF_STATE_MODULATE) {
+			/* Generate sequence number modulator */
+			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
+			    0)
+				s->src.seqdiff = 1;
+			pf_change_a(&th->th_seq, &th->th_sum,
+			    htonl(s->src.seqlo + s->src.seqdiff), 0);
+			*rewrite = 1;
+		} else
+			s->src.seqdiff = 0;
+		if (th->th_flags & TH_SYN) {
+			s->src.seqhi++;
+			s->src.wscale = pf_get_wscale(m, off,
+			    th->th_off, pd->af);
+		}
+		s->src.max_win = MAX(ntohs(th->th_win), 1);
+		if (s->src.wscale & PF_WSCALE_MASK) {
+			/* Remove scale factor from initial window */
+			int win = s->src.max_win;
+			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
+			s->src.max_win = (win - 1) >>
+			    (s->src.wscale & PF_WSCALE_MASK);
+		}
+		if (th->th_flags & TH_FIN)
+			s->src.seqhi++;
+		s->dst.seqhi = 1;
+		s->dst.max_win = 1;
+		s->src.state = TCPS_SYN_SENT;
+		s->dst.state = TCPS_CLOSED;
+		s->timeout = PFTM_TCP_FIRST_PACKET;
+		break;
+	case IPPROTO_UDP:
+		s->src.state = PFUDPS_SINGLE;
+		s->dst.state = PFUDPS_NO_TRAFFIC;
+		s->timeout = PFTM_UDP_FIRST_PACKET;
+		break;
+	case IPPROTO_ICMP:
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+#endif
+		s->timeout = PFTM_ICMP_FIRST_PACKET;
+		break;
+	default:
+		s->src.state = PFOTHERS_SINGLE;
+		s->dst.state = PFOTHERS_NO_TRAFFIC;
+		s->timeout = PFTM_OTHER_FIRST_PACKET;
+	}
+
+	s->creation = time_uptime;
+	s->expire = time_uptime;
+
+	if (sn != NULL) {
+		s->src_node = sn;
+		s->src_node->states++;
+	}
+	if (nsn != NULL) {
+		/* XXX We only modify one side for now. */
+		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
+		s->nat_src_node = nsn;
+		s->nat_src_node->states++;
+	}
+	if (pd->proto == IPPROTO_TCP) {
+		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
+		    off, pd, th, &s->src, &s->dst)) {
+			REASON_SET(&reason, PFRES_MEMORY);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			uma_zfree(V_pf_state_z, s);
+			return (PF_DROP);
+		}
+		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
+		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
+		    &s->src, &s->dst, rewrite)) {
+			/* This really shouldn't happen!!! */
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("pf_normalize_tcp_stateful failed on first pkt"));
+			pf_normalize_tcp_cleanup(s);
+			pf_src_tree_remove_state(s);
+			STATE_DEC_COUNTERS(s);
+			uma_zfree(V_pf_state_z, s);
+			return (PF_DROP);
+		}
+	}
+	s->direction = pd->dir;
+
+	/*
+	 * sk/nk could already been setup by pf_get_translation().
+	 */
+	if (nr == NULL) {
+		KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p",
+		    __func__, nr, sk, nk));
+		sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport);
+		if (sk == NULL)
+			goto csfailed;
+		nk = sk;
+	} else
+		KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p",
+		    __func__, nr, sk, nk));
+
+	/* Swap sk/nk for PF_OUT. */
+	if (pf_state_insert(BOUND_IFACE(r, kif),
+	    (pd->dir == PF_IN) ? sk : nk,
+	    (pd->dir == PF_IN) ? nk : sk, s)) {
+		if (pd->proto == IPPROTO_TCP)
+			pf_normalize_tcp_cleanup(s);
+		REASON_SET(&reason, PFRES_STATEINS);
+		pf_src_tree_remove_state(s);
+		STATE_DEC_COUNTERS(s);
+		uma_zfree(V_pf_state_z, s);
+		return (PF_DROP);
+	} else
+		*sm = s;
+
+	pf_set_rt_ifp(s, pd->src);	/* needs s->state_key set */
+	if (tag > 0)
+		s->tag = tag;
+	if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
+	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
+		s->src.state = PF_TCPS_PROXY_SRC;
+		/* undo NAT changes, if they have taken place */
+		if (nr != NULL) {
+			struct pf_state_key *skt = s->key[PF_SK_WIRE];
+			if (pd->dir == PF_OUT)
+				skt = s->key[PF_SK_STACK];
+			PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
+			PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
+			if (pd->sport)
+				*pd->sport = skt->port[pd->sidx];
+			if (pd->dport)
+				*pd->dport = skt->port[pd->didx];
+			if (pd->proto_sum)
+				*pd->proto_sum = bproto_sum;
+			if (pd->ip_sum)
+				*pd->ip_sum = bip_sum;
+			m_copyback(m, off, hdrlen, pd->hdr.any);
+		}
+		s->src.seqhi = htonl(arc4random());
+		/* Find mss option */
+		int rtid = M_GETFIB(m);
+		mss = pf_get_mss(m, off, th->th_off, pd->af);
+		mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
+		mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
+		s->src.mss = mss;
+		pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
+		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
+		    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL);
+		REASON_SET(&reason, PFRES_SYNPROXY);
+		return (PF_SYNPROXY_DROP);
+	}
+
+	return (PF_PASS);
+
+csfailed:
+	if (sk != NULL)
+		uma_zfree(V_pf_state_key_z, sk);
+	if (nk != NULL)
+		uma_zfree(V_pf_state_key_z, nk);
+
+	if (sn != NULL && sn->states == 0 && sn->expire == 0) {
+		pf_remove_src_node(sn);
+		V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+		V_pf_status.src_nodes--;
+		uma_zfree(V_pf_sources_z, sn);
+	}
+	if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
+		pf_remove_src_node(nsn);
+		V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+		V_pf_status.src_nodes--;
+		uma_zfree(V_pf_sources_z, nsn);
+	}
+	return (PF_DROP);
+}
+
+static int
+pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
+    struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
+    struct pf_ruleset **rsm)
+{
+	struct pf_rule		*r, *a = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	sa_family_t		 af = pd->af;
+	u_short			 reason;
+	int			 tag = -1;
+	int			 asd = 0;
+	int			 match = 0;
+
+	PF_RULES_RASSERT();
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->tos && !(r->tos == pd->tos))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY)
+			r = TAILQ_NEXT(r, entries);
+		else if (pd->proto == IPPROTO_UDP &&
+		    (r->src.port_op || r->dst.port_op))
+			r = TAILQ_NEXT(r, entries);
+		else if (pd->proto == IPPROTO_TCP &&
+		    (r->src.port_op || r->dst.port_op || r->flagset))
+			r = TAILQ_NEXT(r, entries);
+		else if ((pd->proto == IPPROTO_ICMP ||
+		    pd->proto == IPPROTO_ICMPV6) &&
+		    (r->type || r->code))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->prob && r->prob <=
+		    (arc4random() % (UINT_MAX - 1) + 1))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->match_tag && !pf_match_tag(m, r, &tag,
+		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->anchor == NULL) {
+				match = 1;
+				*rm = r;
+				*am = a;
+				*rsm = ruleset;
+				if ((*rm)->quick)
+					break;
+				r = TAILQ_NEXT(r, entries);
+			} else
+				pf_step_into_anchor(&asd, &ruleset,
+				    PF_RULESET_FILTER, &r, &a, &match);
+		}
+		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+		    PF_RULESET_FILTER, &r, &a, &match))
+			break;
+	}
+	r = *rm;
+	a = *am;
+	ruleset = *rsm;
+
+	REASON_SET(&reason, PFRES_MATCH);
+
+	if (r->log)
+		PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd,
+		    1);
+
+	if (r->action != PF_PASS)
+		return (PF_DROP);
+
+	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
+		REASON_SET(&reason, PFRES_MEMORY);
+		return (PF_DROP);
+	}
+
+	return (PF_PASS);
+}
+
+static int
+pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
+	struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
+	struct pf_pdesc *pd, u_short *reason, int *copyback)
+{
+	struct tcphdr		*th = pd->hdr.tcp;
+	u_int16_t		 win = ntohs(th->th_win);
+	u_int32_t		 ack, end, seq, orig_seq;
+	u_int8_t		 sws, dws;
+	int			 ackskew;
+
+	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
+		sws = src->wscale & PF_WSCALE_MASK;
+		dws = dst->wscale & PF_WSCALE_MASK;
+	} else
+		sws = dws = 0;
+
+	/*
+	 * Sequence tracking algorithm from Guido van Rooij's paper:
+	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
+	 *	tcp_filtering.ps
+	 */
+
+	orig_seq = seq = ntohl(th->th_seq);
+	if (src->seqlo == 0) {
+		/* First packet from this end. Set its state */
+
+		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
+		    src->scrub == NULL) {
+			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
+				REASON_SET(reason, PFRES_MEMORY);
+				return (PF_DROP);
+			}
+		}
+
+		/* Deferred generation of sequence number modulator */
+		if (dst->seqdiff && !src->seqdiff) {
+			/* use random iss for the TCP server */
+			while ((src->seqdiff = arc4random() - seq) == 0)
+				;
+			ack = ntohl(th->th_ack) - dst->seqdiff;
+			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
+			    src->seqdiff), 0);
+			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
+			*copyback = 1;
+		} else {
+			ack = ntohl(th->th_ack);
+		}
+
+		end = seq + pd->p_len;
+		if (th->th_flags & TH_SYN) {
+			end++;
+			if (dst->wscale & PF_WSCALE_FLAG) {
+				src->wscale = pf_get_wscale(m, off, th->th_off,
+				    pd->af);
+				if (src->wscale & PF_WSCALE_FLAG) {
+					/* Remove scale factor from initial
+					 * window */
+					sws = src->wscale & PF_WSCALE_MASK;
+					win = ((u_int32_t)win + (1 << sws) - 1)
+					    >> sws;
+					dws = dst->wscale & PF_WSCALE_MASK;
+				} else {
+					/* fixup other window */
+					dst->max_win <<= dst->wscale &
+					    PF_WSCALE_MASK;
+					/* in case of a retrans SYN|ACK */
+					dst->wscale = 0;
+				}
+			}
+		}
+		if (th->th_flags & TH_FIN)
+			end++;
+
+		src->seqlo = seq;
+		if (src->state < TCPS_SYN_SENT)
+			src->state = TCPS_SYN_SENT;
+
+		/*
+		 * May need to slide the window (seqhi may have been set by
+		 * the crappy stack check or if we picked up the connection
+		 * after establishment)
+		 */
+		if (src->seqhi == 1 ||
+		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
+			src->seqhi = end + MAX(1, dst->max_win << dws);
+		if (win > src->max_win)
+			src->max_win = win;
+
+	} else {
+		ack = ntohl(th->th_ack) - dst->seqdiff;
+		if (src->seqdiff) {
+			/* Modulate sequence numbers */
+			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
+			    src->seqdiff), 0);
+			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
+			*copyback = 1;
+		}
+		end = seq + pd->p_len;
+		if (th->th_flags & TH_SYN)
+			end++;
+		if (th->th_flags & TH_FIN)
+			end++;
+	}
+
+	if ((th->th_flags & TH_ACK) == 0) {
+		/* Let it pass through the ack skew check */
+		ack = dst->seqlo;
+	} else if ((ack == 0 &&
+	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
+	    /* broken tcp stacks do not set ack */
+	    (dst->state < TCPS_SYN_SENT)) {
+		/*
+		 * Many stacks (ours included) will set the ACK number in an
+		 * FIN|ACK if the SYN times out -- no sequence to ACK.
+		 */
+		ack = dst->seqlo;
+	}
+
+	if (seq == end) {
+		/* Ease sequencing restrictions on no data packets */
+		seq = src->seqlo;
+		end = seq;
+	}
+
+	ackskew = dst->seqlo - ack;
+
+
+	/*
+	 * Need to demodulate the sequence numbers in any TCP SACK options
+	 * (Selective ACK). We could optionally validate the SACK values
+	 * against the current ACK window, either forwards or backwards, but
+	 * I'm not confident that SACK has been implemented properly
+	 * everywhere. It wouldn't surprise me if several stacks accidently
+	 * SACK too far backwards of previously ACKed data. There really aren't
+	 * any security implications of bad SACKing unless the target stack
+	 * doesn't validate the option length correctly. Someone trying to
+	 * spoof into a TCP connection won't bother blindly sending SACK
+	 * options anyway.
+	 */
+	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
+		if (pf_modulate_sack(m, off, pd, th, dst))
+			*copyback = 1;
+	}
+
+
+#define	MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
+	if (SEQ_GEQ(src->seqhi, end) &&
+	    /* Last octet inside other's window space */
+	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
+	    /* Retrans: not more than one window back */
+	    (ackskew >= -MAXACKWINDOW) &&
+	    /* Acking not more than one reassembled fragment backwards */
+	    (ackskew <= (MAXACKWINDOW << sws)) &&
+	    /* Acking not more than one window forward */
+	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
+	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
+	    (pd->flags & PFDESC_IP_REAS) == 0)) {
+	    /* Require an exact/+1 sequence match on resets when possible */
+
+		if (dst->scrub || src->scrub) {
+			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
+			    *state, src, dst, copyback))
+				return (PF_DROP);
+		}
+
+		/* update max window */
+		if (src->max_win < win)
+			src->max_win = win;
+		/* synchronize sequencing */
+		if (SEQ_GT(end, src->seqlo))
+			src->seqlo = end;
+		/* slide the window of what the other end can send */
+		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
+			dst->seqhi = ack + MAX((win << sws), 1);
+
+
+		/* update states */
+		if (th->th_flags & TH_SYN)
+			if (src->state < TCPS_SYN_SENT)
+				src->state = TCPS_SYN_SENT;
+		if (th->th_flags & TH_FIN)
+			if (src->state < TCPS_CLOSING)
+				src->state = TCPS_CLOSING;
+		if (th->th_flags & TH_ACK) {
+			if (dst->state == TCPS_SYN_SENT) {
+				dst->state = TCPS_ESTABLISHED;
+				if (src->state == TCPS_ESTABLISHED &&
+				    (*state)->src_node != NULL &&
+				    pf_src_connlimit(state)) {
+					REASON_SET(reason, PFRES_SRCLIMIT);
+					return (PF_DROP);
+				}
+			} else if (dst->state == TCPS_CLOSING)
+				dst->state = TCPS_FIN_WAIT_2;
+		}
+		if (th->th_flags & TH_RST)
+			src->state = dst->state = TCPS_TIME_WAIT;
+
+		/* update expire time */
+		(*state)->expire = time_uptime;
+		if (src->state >= TCPS_FIN_WAIT_2 &&
+		    dst->state >= TCPS_FIN_WAIT_2)
+			(*state)->timeout = PFTM_TCP_CLOSED;
+		else if (src->state >= TCPS_CLOSING &&
+		    dst->state >= TCPS_CLOSING)
+			(*state)->timeout = PFTM_TCP_FIN_WAIT;
+		else if (src->state < TCPS_ESTABLISHED ||
+		    dst->state < TCPS_ESTABLISHED)
+			(*state)->timeout = PFTM_TCP_OPENING;
+		else if (src->state >= TCPS_CLOSING ||
+		    dst->state >= TCPS_CLOSING)
+			(*state)->timeout = PFTM_TCP_CLOSING;
+		else
+			(*state)->timeout = PFTM_TCP_ESTABLISHED;
+
+		/* Fall through to PASS packet */
+
+	} else if ((dst->state < TCPS_SYN_SENT ||
+		dst->state >= TCPS_FIN_WAIT_2 ||
+		src->state >= TCPS_FIN_WAIT_2) &&
+	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
+	    /* Within a window forward of the originating packet */
+	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
+	    /* Within a window backward of the originating packet */
+
+		/*
+		 * This currently handles three situations:
+		 *  1) Stupid stacks will shotgun SYNs before their peer
+		 *     replies.
+		 *  2) When PF catches an already established stream (the
+		 *     firewall rebooted, the state table was flushed, routes
+		 *     changed...)
+		 *  3) Packets get funky immediately after the connection
+		 *     closes (this should catch Solaris spurious ACK|FINs
+		 *     that web servers like to spew after a close)
+		 *
+		 * This must be a little more careful than the above code
+		 * since packet floods will also be caught here. We don't
+		 * update the TTL here to mitigate the damage of a packet
+		 * flood and so the same code can handle awkward establishment
+		 * and a loosened connection close.
+		 * In the establishment case, a correct peer response will
+		 * validate the connection, go through the normal state code
+		 * and keep updating the state TTL.
+		 */
+
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: loose state match: ");
+			pf_print_state(*state);
+			pf_print_flags(th->th_flags);
+			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
+			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
+			    pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
+			    (unsigned long long)(*state)->packets[1],
+			    pd->dir == PF_IN ? "in" : "out",
+			    pd->dir == (*state)->direction ? "fwd" : "rev");
+		}
+
+		if (dst->scrub || src->scrub) {
+			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
+			    *state, src, dst, copyback))
+				return (PF_DROP);
+		}
+
+		/* update max window */
+		if (src->max_win < win)
+			src->max_win = win;
+		/* synchronize sequencing */
+		if (SEQ_GT(end, src->seqlo))
+			src->seqlo = end;
+		/* slide the window of what the other end can send */
+		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
+			dst->seqhi = ack + MAX((win << sws), 1);
+
+		/*
+		 * Cannot set dst->seqhi here since this could be a shotgunned
+		 * SYN and not an already established connection.
+		 */
+
+		if (th->th_flags & TH_FIN)
+			if (src->state < TCPS_CLOSING)
+				src->state = TCPS_CLOSING;
+		if (th->th_flags & TH_RST)
+			src->state = dst->state = TCPS_TIME_WAIT;
+
+		/* Fall through to PASS packet */
+
+	} else {
+		if ((*state)->dst.state == TCPS_SYN_SENT &&
+		    (*state)->src.state == TCPS_SYN_SENT) {
+			/* Send RST for state mismatches during handshake */
+			if (!(th->th_flags & TH_RST))
+				pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+				    pd->dst, pd->src, th->th_dport,
+				    th->th_sport, ntohl(th->th_ack), 0,
+				    TH_RST, 0, 0,
+				    (*state)->rule.ptr->return_ttl, 1, 0,
+				    kif->pfik_ifp);
+			src->seqlo = 0;
+			src->seqhi = 1;
+			src->max_win = 1;
+		} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: BAD state: ");
+			pf_print_state(*state);
+			pf_print_flags(th->th_flags);
+			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
+			    "pkts=%llu:%llu dir=%s,%s\n",
+			    seq, orig_seq, ack, pd->p_len, ackskew,
+			    (unsigned long long)(*state)->packets[0],
+			    (unsigned long long)(*state)->packets[1],
+			    pd->dir == PF_IN ? "in" : "out",
+			    pd->dir == (*state)->direction ? "fwd" : "rev");
+			printf("pf: State failure on: %c %c %c %c | %c %c\n",
+			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
+			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
+			    ' ': '2',
+			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
+			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
+			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
+			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
+		}
+		REASON_SET(reason, PFRES_BADSTATE);
+		return (PF_DROP);
+	}
+
+	return (PF_PASS);
+}
+
+static int
+pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
+	struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
+{
+	struct tcphdr		*th = pd->hdr.tcp;
+
+	if (th->th_flags & TH_SYN)
+		if (src->state < TCPS_SYN_SENT)
+			src->state = TCPS_SYN_SENT;
+	if (th->th_flags & TH_FIN)
+		if (src->state < TCPS_CLOSING)
+			src->state = TCPS_CLOSING;
+	if (th->th_flags & TH_ACK) {
+		if (dst->state == TCPS_SYN_SENT) {
+			dst->state = TCPS_ESTABLISHED;
+			if (src->state == TCPS_ESTABLISHED &&
+			    (*state)->src_node != NULL &&
+			    pf_src_connlimit(state)) {
+				REASON_SET(reason, PFRES_SRCLIMIT);
+				return (PF_DROP);
+			}
+		} else if (dst->state == TCPS_CLOSING) {
+			dst->state = TCPS_FIN_WAIT_2;
+		} else if (src->state == TCPS_SYN_SENT &&
+		    dst->state < TCPS_SYN_SENT) {
+			/*
+			 * Handle a special sloppy case where we only see one
+			 * half of the connection. If there is a ACK after
+			 * the initial SYN without ever seeing a packet from
+			 * the destination, set the connection to established.
+			 */
+			dst->state = src->state = TCPS_ESTABLISHED;
+			if ((*state)->src_node != NULL &&
+			    pf_src_connlimit(state)) {
+				REASON_SET(reason, PFRES_SRCLIMIT);
+				return (PF_DROP);
+			}
+		} else if (src->state == TCPS_CLOSING &&
+		    dst->state == TCPS_ESTABLISHED &&
+		    dst->seqlo == 0) {
+			/*
+			 * Handle the closing of half connections where we
+			 * don't see the full bidirectional FIN/ACK+ACK
+			 * handshake.
+			 */
+			dst->state = TCPS_CLOSING;
+		}
+	}
+	if (th->th_flags & TH_RST)
+		src->state = dst->state = TCPS_TIME_WAIT;
+
+	/* update expire time */
+	(*state)->expire = time_uptime;
+	if (src->state >= TCPS_FIN_WAIT_2 &&
+	    dst->state >= TCPS_FIN_WAIT_2)
+		(*state)->timeout = PFTM_TCP_CLOSED;
+	else if (src->state >= TCPS_CLOSING &&
+	    dst->state >= TCPS_CLOSING)
+		(*state)->timeout = PFTM_TCP_FIN_WAIT;
+	else if (src->state < TCPS_ESTABLISHED ||
+	    dst->state < TCPS_ESTABLISHED)
+		(*state)->timeout = PFTM_TCP_OPENING;
+	else if (src->state >= TCPS_CLOSING ||
+	    dst->state >= TCPS_CLOSING)
+		(*state)->timeout = PFTM_TCP_CLOSING;
+	else
+		(*state)->timeout = PFTM_TCP_ESTABLISHED;
+
+	return (PF_PASS);
+}
+
+static int
+pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
+    u_short *reason)
+{
+	struct pf_state_key_cmp	 key;
+	struct tcphdr		*th = pd->hdr.tcp;
+	int			 copyback = 0;
+	struct pf_state_peer	*src, *dst;
+	struct pf_state_key	*sk;
+
+	bzero(&key, sizeof(key));
+	key.af = pd->af;
+	key.proto = IPPROTO_TCP;
+	if (direction == PF_IN)	{	/* wire side, straight */
+		PF_ACPY(&key.addr[0], pd->src, key.af);
+		PF_ACPY(&key.addr[1], pd->dst, key.af);
+		key.port[0] = th->th_sport;
+		key.port[1] = th->th_dport;
+	} else {			/* stack side, reverse */
+		PF_ACPY(&key.addr[1], pd->src, key.af);
+		PF_ACPY(&key.addr[0], pd->dst, key.af);
+		key.port[1] = th->th_sport;
+		key.port[0] = th->th_dport;
+	}
+
+	STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+	if (direction == (*state)->direction) {
+		src = &(*state)->src;
+		dst = &(*state)->dst;
+	} else {
+		src = &(*state)->dst;
+		dst = &(*state)->src;
+	}
+
+	sk = (*state)->key[pd->didx];
+
+	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
+		if (direction != (*state)->direction) {
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		}
+		if (th->th_flags & TH_SYN) {
+			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
+				REASON_SET(reason, PFRES_SYNPROXY);
+				return (PF_DROP);
+			}
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
+			    pd->src, th->th_dport, th->th_sport,
+			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
+			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL);
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		} else if (!(th->th_flags & TH_ACK) ||
+		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
+		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_DROP);
+		} else if ((*state)->src_node != NULL &&
+		    pf_src_connlimit(state)) {
+			REASON_SET(reason, PFRES_SRCLIMIT);
+			return (PF_DROP);
+		} else
+			(*state)->src.state = PF_TCPS_PROXY_DST;
+	}
+	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
+		if (direction == (*state)->direction) {
+			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
+			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
+			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
+				REASON_SET(reason, PFRES_SYNPROXY);
+				return (PF_DROP);
+			}
+			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
+			if ((*state)->dst.seqhi == 1)
+				(*state)->dst.seqhi = htonl(arc4random());
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
+			    sk->port[pd->sidx], sk->port[pd->didx],
+			    (*state)->dst.seqhi, 0, TH_SYN, 0,
+			    (*state)->src.mss, 0, 0, (*state)->tag, NULL);
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
+		    (TH_SYN|TH_ACK)) ||
+		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_DROP);
+		} else {
+			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
+			(*state)->dst.seqlo = ntohl(th->th_seq);
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
+			    pd->src, th->th_dport, th->th_sport,
+			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
+			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
+			    (*state)->tag, NULL);
+			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
+			    sk->port[pd->sidx], sk->port[pd->didx],
+			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
+			    TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL);
+			(*state)->src.seqdiff = (*state)->dst.seqhi -
+			    (*state)->src.seqlo;
+			(*state)->dst.seqdiff = (*state)->src.seqhi -
+			    (*state)->dst.seqlo;
+			(*state)->src.seqhi = (*state)->src.seqlo +
+			    (*state)->dst.max_win;
+			(*state)->dst.seqhi = (*state)->dst.seqlo +
+			    (*state)->src.max_win;
+			(*state)->src.wscale = (*state)->dst.wscale = 0;
+			(*state)->src.state = (*state)->dst.state =
+			    TCPS_ESTABLISHED;
+			REASON_SET(reason, PFRES_SYNPROXY);
+			return (PF_SYNPROXY_DROP);
+		}
+	}
+
+	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
+	    dst->state >= TCPS_FIN_WAIT_2 &&
+	    src->state >= TCPS_FIN_WAIT_2) {
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			printf("pf: state reuse ");
+			pf_print_state(*state);
+			pf_print_flags(th->th_flags);
+			printf("\n");
+		}
+		/* XXX make sure it's the same direction ?? */
+		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
+		pf_unlink_state(*state, PF_ENTER_LOCKED);
+		*state = NULL;
+		return (PF_DROP);
+	}
+
+	if ((*state)->state_flags & PFSTATE_SLOPPY) {
+		if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
+			return (PF_DROP);
+	} else {
+		if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
+		    &copyback) == PF_DROP)
+			return (PF_DROP);
+	}
+
+	/* translate source/destination address, if necessary */
+	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+		struct pf_state_key *nk = (*state)->key[pd->didx];
+
+		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
+		    nk->port[pd->sidx] != th->th_sport)
+			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
+			    &th->th_sum, &nk->addr[pd->sidx],
+			    nk->port[pd->sidx], 0, pd->af);
+
+		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
+		    nk->port[pd->didx] != th->th_dport)
+			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
+			    &th->th_sum, &nk->addr[pd->didx],
+			    nk->port[pd->didx], 0, pd->af);
+		copyback = 1;
+	}
+
+	/* Copyback sequence modulation or stateful scrub changes if needed */
+	if (copyback)
+		m_copyback(m, off, sizeof(*th), (caddr_t)th);
+
+	return (PF_PASS);
+}
+
+static int
+pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
+{
+	struct pf_state_peer	*src, *dst;
+	struct pf_state_key_cmp	 key;
+	struct udphdr		*uh = pd->hdr.udp;
+
+	bzero(&key, sizeof(key));
+	key.af = pd->af;
+	key.proto = IPPROTO_UDP;
+	if (direction == PF_IN)	{	/* wire side, straight */
+		PF_ACPY(&key.addr[0], pd->src, key.af);
+		PF_ACPY(&key.addr[1], pd->dst, key.af);
+		key.port[0] = uh->uh_sport;
+		key.port[1] = uh->uh_dport;
+	} else {			/* stack side, reverse */
+		PF_ACPY(&key.addr[1], pd->src, key.af);
+		PF_ACPY(&key.addr[0], pd->dst, key.af);
+		key.port[1] = uh->uh_sport;
+		key.port[0] = uh->uh_dport;
+	}
+
+	STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+	if (direction == (*state)->direction) {
+		src = &(*state)->src;
+		dst = &(*state)->dst;
+	} else {
+		src = &(*state)->dst;
+		dst = &(*state)->src;
+	}
+
+	/* update states */
+	if (src->state < PFUDPS_SINGLE)
+		src->state = PFUDPS_SINGLE;
+	if (dst->state == PFUDPS_SINGLE)
+		dst->state = PFUDPS_MULTIPLE;
+
+	/* update expire time */
+	(*state)->expire = time_uptime;
+	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
+		(*state)->timeout = PFTM_UDP_MULTIPLE;
+	else
+		(*state)->timeout = PFTM_UDP_SINGLE;
+
+	/* translate source/destination address, if necessary */
+	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+		struct pf_state_key *nk = (*state)->key[pd->didx];
+
+		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
+		    nk->port[pd->sidx] != uh->uh_sport)
+			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
+			    &uh->uh_sum, &nk->addr[pd->sidx],
+			    nk->port[pd->sidx], 1, pd->af);
+
+		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
+		    nk->port[pd->didx] != uh->uh_dport)
+			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
+			    &uh->uh_sum, &nk->addr[pd->didx],
+			    nk->port[pd->didx], 1, pd->af);
+		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
+	}
+
+	return (PF_PASS);
+}
+
+static int
+pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
+{
+	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
+	u_int16_t	 icmpid = 0, *icmpsum;
+	u_int8_t	 icmptype;
+	int		 state_icmp = 0;
+	struct pf_state_key_cmp key;
+
+	bzero(&key, sizeof(key));
+	switch (pd->proto) {
+#ifdef INET
+	case IPPROTO_ICMP:
+		icmptype = pd->hdr.icmp->icmp_type;
+		icmpid = pd->hdr.icmp->icmp_id;
+		icmpsum = &pd->hdr.icmp->icmp_cksum;
+
+		if (icmptype == ICMP_UNREACH ||
+		    icmptype == ICMP_SOURCEQUENCH ||
+		    icmptype == ICMP_REDIRECT ||
+		    icmptype == ICMP_TIMXCEED ||
+		    icmptype == ICMP_PARAMPROB)
+			state_icmp++;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+		icmptype = pd->hdr.icmp6->icmp6_type;
+		icmpid = pd->hdr.icmp6->icmp6_id;
+		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
+
+		if (icmptype == ICMP6_DST_UNREACH ||
+		    icmptype == ICMP6_PACKET_TOO_BIG ||
+		    icmptype == ICMP6_TIME_EXCEEDED ||
+		    icmptype == ICMP6_PARAM_PROB)
+			state_icmp++;
+		break;
+#endif /* INET6 */
+	}
+
+	if (!state_icmp) {
+
+		/*
+		 * ICMP query/reply message not related to a TCP/UDP packet.
+		 * Search for an ICMP state.
+		 */
+		key.af = pd->af;
+		key.proto = pd->proto;
+		key.port[0] = key.port[1] = icmpid;
+		if (direction == PF_IN)	{	/* wire side, straight */
+			PF_ACPY(&key.addr[0], pd->src, key.af);
+			PF_ACPY(&key.addr[1], pd->dst, key.af);
+		} else {			/* stack side, reverse */
+			PF_ACPY(&key.addr[1], pd->src, key.af);
+			PF_ACPY(&key.addr[0], pd->dst, key.af);
+		}
+
+		STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+		(*state)->expire = time_uptime;
+		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
+
+		/* translate source/destination address, if necessary */
+		if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+			struct pf_state_key *nk = (*state)->key[pd->didx];
+
+			switch (pd->af) {
+#ifdef INET
+			case AF_INET:
+				if (PF_ANEQ(pd->src,
+				    &nk->addr[pd->sidx], AF_INET))
+					pf_change_a(&saddr->v4.s_addr,
+					    pd->ip_sum,
+					    nk->addr[pd->sidx].v4.s_addr, 0);
+
+				if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
+				    AF_INET))
+					pf_change_a(&daddr->v4.s_addr,
+					    pd->ip_sum,
+					    nk->addr[pd->didx].v4.s_addr, 0);
+
+				if (nk->port[0] !=
+				    pd->hdr.icmp->icmp_id) {
+					pd->hdr.icmp->icmp_cksum =
+					    pf_cksum_fixup(
+					    pd->hdr.icmp->icmp_cksum, icmpid,
+					    nk->port[pd->sidx], 0);
+					pd->hdr.icmp->icmp_id =
+					    nk->port[pd->sidx];
+				}
+
+				m_copyback(m, off, ICMP_MINLEN,
+				    (caddr_t )pd->hdr.icmp);
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				if (PF_ANEQ(pd->src,
+				    &nk->addr[pd->sidx], AF_INET6))
+					pf_change_a6(saddr,
+					    &pd->hdr.icmp6->icmp6_cksum,
+					    &nk->addr[pd->sidx], 0);
+
+				if (PF_ANEQ(pd->dst,
+				    &nk->addr[pd->didx], AF_INET6))
+					pf_change_a6(daddr,
+					    &pd->hdr.icmp6->icmp6_cksum,
+					    &nk->addr[pd->didx], 0);
+
+				m_copyback(m, off, sizeof(struct icmp6_hdr),
+				    (caddr_t )pd->hdr.icmp6);
+				break;
+#endif /* INET6 */
+			}
+		}
+		return (PF_PASS);
+
+	} else {
+		/*
+		 * ICMP error message in response to a TCP/UDP packet.
+		 * Extract the inner TCP/UDP header and search for that state.
+		 */
+
+		struct pf_pdesc	pd2;
+		bzero(&pd2, sizeof pd2);
+#ifdef INET
+		struct ip	h2;
+#endif /* INET */
+#ifdef INET6
+		struct ip6_hdr	h2_6;
+		int		terminal = 0;
+#endif /* INET6 */
+		int		ipoff2 = 0;
+		int		off2 = 0;
+
+		pd2.af = pd->af;
+		/* Payload packet is from the opposite direction. */
+		pd2.sidx = (direction == PF_IN) ? 1 : 0;
+		pd2.didx = (direction == PF_IN) ? 0 : 1;
+		switch (pd->af) {
+#ifdef INET
+		case AF_INET:
+			/* offset of h2 in mbuf chain */
+			ipoff2 = off + ICMP_MINLEN;
+
+			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(ip)\n"));
+				return (PF_DROP);
+			}
+			/*
+			 * ICMP error messages don't refer to non-first
+			 * fragments
+			 */
+			if (h2.ip_off & htons(IP_OFFMASK)) {
+				REASON_SET(reason, PFRES_FRAG);
+				return (PF_DROP);
+			}
+
+			/* offset of protocol header that follows h2 */
+			off2 = ipoff2 + (h2.ip_hl << 2);
+
+			pd2.proto = h2.ip_p;
+			pd2.src = (struct pf_addr *)&h2.ip_src;
+			pd2.dst = (struct pf_addr *)&h2.ip_dst;
+			pd2.ip_sum = &h2.ip_sum;
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			ipoff2 = off + sizeof(struct icmp6_hdr);
+
+			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(ip6)\n"));
+				return (PF_DROP);
+			}
+			pd2.proto = h2_6.ip6_nxt;
+			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
+			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
+			pd2.ip_sum = NULL;
+			off2 = ipoff2 + sizeof(h2_6);
+			do {
+				switch (pd2.proto) {
+				case IPPROTO_FRAGMENT:
+					/*
+					 * ICMPv6 error messages for
+					 * non-first fragments
+					 */
+					REASON_SET(reason, PFRES_FRAG);
+					return (PF_DROP);
+				case IPPROTO_AH:
+				case IPPROTO_HOPOPTS:
+				case IPPROTO_ROUTING:
+				case IPPROTO_DSTOPTS: {
+					/* get next header and header length */
+					struct ip6_ext opt6;
+
+					if (!pf_pull_hdr(m, off2, &opt6,
+					    sizeof(opt6), NULL, reason,
+					    pd2.af)) {
+						DPFPRINTF(PF_DEBUG_MISC,
+						    ("pf: ICMPv6 short opt\n"));
+						return (PF_DROP);
+					}
+					if (pd2.proto == IPPROTO_AH)
+						off2 += (opt6.ip6e_len + 2) * 4;
+					else
+						off2 += (opt6.ip6e_len + 1) * 8;
+					pd2.proto = opt6.ip6e_nxt;
+					/* goto the next header */
+					break;
+				}
+				default:
+					terminal++;
+					break;
+				}
+			} while (!terminal);
+			break;
+#endif /* INET6 */
+		}
+
+		switch (pd2.proto) {
+		case IPPROTO_TCP: {
+			struct tcphdr		 th;
+			u_int32_t		 seq;
+			struct pf_state_peer	*src, *dst;
+			u_int8_t		 dws;
+			int			 copyback = 0;
+
+			/*
+			 * Only the first 8 bytes of the TCP header can be
+			 * expected. Don't access any TCP header fields after
+			 * th_seq, an ackskew test is not possible.
+			 */
+			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
+			    pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(tcp)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_TCP;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[pd2.sidx] = th.th_sport;
+			key.port[pd2.didx] = th.th_dport;
+
+			STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+			if (direction == (*state)->direction) {
+				src = &(*state)->dst;
+				dst = &(*state)->src;
+			} else {
+				src = &(*state)->src;
+				dst = &(*state)->dst;
+			}
+
+			if (src->wscale && dst->wscale)
+				dws = dst->wscale & PF_WSCALE_MASK;
+			else
+				dws = 0;
+
+			/* Demodulate sequence number */
+			seq = ntohl(th.th_seq) - src->seqdiff;
+			if (src->seqdiff) {
+				pf_change_a(&th.th_seq, icmpsum,
+				    htonl(seq), 0);
+				copyback = 1;
+			}
+
+			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
+			    (!SEQ_GEQ(src->seqhi, seq) ||
+			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
+				if (V_pf_status.debug >= PF_DEBUG_MISC) {
+					printf("pf: BAD ICMP %d:%d ",
+					    icmptype, pd->hdr.icmp->icmp_code);
+					pf_print_host(pd->src, 0, pd->af);
+					printf(" -> ");
+					pf_print_host(pd->dst, 0, pd->af);
+					printf(" state: ");
+					pf_print_state(*state);
+					printf(" seq=%u\n", seq);
+				}
+				REASON_SET(reason, PFRES_BADSTATE);
+				return (PF_DROP);
+			} else {
+				if (V_pf_status.debug >= PF_DEBUG_MISC) {
+					printf("pf: OK ICMP %d:%d ",
+					    icmptype, pd->hdr.icmp->icmp_code);
+					pf_print_host(pd->src, 0, pd->af);
+					printf(" -> ");
+					pf_print_host(pd->dst, 0, pd->af);
+					printf(" state: ");
+					pf_print_state(*state);
+					printf(" seq=%u\n", seq);
+				}
+			}
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+				struct pf_state_key *nk =
+				    (*state)->key[pd->didx];
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af) ||
+				    nk->port[pd2.sidx] != th.th_sport)
+					pf_change_icmp(pd2.src, &th.th_sport,
+					    daddr, &nk->addr[pd2.sidx],
+					    nk->port[pd2.sidx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af) ||
+				    nk->port[pd2.didx] != th.th_dport)
+					pf_change_icmp(pd2.dst, &th.th_dport,
+					    NULL, /* XXX Inbound NAT? */
+					    &nk->addr[pd2.didx],
+					    nk->port[pd2.didx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+				copyback = 1;
+			}
+
+			if (copyback) {
+				switch (pd2.af) {
+#ifdef INET
+				case AF_INET:
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t )pd->hdr.icmp);
+					m_copyback(m, ipoff2, sizeof(h2),
+					    (caddr_t )&h2);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t )pd->hdr.icmp6);
+					m_copyback(m, ipoff2, sizeof(h2_6),
+					    (caddr_t )&h2_6);
+					break;
+#endif /* INET6 */
+				}
+				m_copyback(m, off2, 8, (caddr_t)&th);
+			}
+
+			return (PF_PASS);
+			break;
+		}
+		case IPPROTO_UDP: {
+			struct udphdr		uh;
+
+			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(udp)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_UDP;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[pd2.sidx] = uh.uh_sport;
+			key.port[pd2.didx] = uh.uh_dport;
+
+			STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+				struct pf_state_key *nk =
+				    (*state)->key[pd->didx];
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af) ||
+				    nk->port[pd2.sidx] != uh.uh_sport)
+					pf_change_icmp(pd2.src, &uh.uh_sport,
+					    daddr, &nk->addr[pd2.sidx],
+					    nk->port[pd2.sidx], &uh.uh_sum,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 1, pd2.af);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af) ||
+				    nk->port[pd2.didx] != uh.uh_dport)
+					pf_change_icmp(pd2.dst, &uh.uh_dport,
+					    NULL, /* XXX Inbound NAT? */
+					    &nk->addr[pd2.didx],
+					    nk->port[pd2.didx], &uh.uh_sum,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 1, pd2.af);
+
+				switch (pd2.af) {
+#ifdef INET
+				case AF_INET:
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t )pd->hdr.icmp);
+					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t )pd->hdr.icmp6);
+					m_copyback(m, ipoff2, sizeof(h2_6),
+					    (caddr_t )&h2_6);
+					break;
+#endif /* INET6 */
+				}
+				m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
+			}
+			return (PF_PASS);
+			break;
+		}
+#ifdef INET
+		case IPPROTO_ICMP: {
+			struct icmp		iih;
+
+			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
+			    NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short i"
+				    "(icmp)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_ICMP;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[0] = key.port[1] = iih.icmp_id;
+
+			STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+				struct pf_state_key *nk =
+				    (*state)->key[pd->didx];
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af) ||
+				    nk->port[pd2.sidx] != iih.icmp_id)
+					pf_change_icmp(pd2.src, &iih.icmp_id,
+					    daddr, &nk->addr[pd2.sidx],
+					    nk->port[pd2.sidx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af) ||
+				    nk->port[pd2.didx] != iih.icmp_id)
+					pf_change_icmp(pd2.dst, &iih.icmp_id,
+					    NULL, /* XXX Inbound NAT? */
+					    &nk->addr[pd2.didx],
+					    nk->port[pd2.didx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET);
+
+				m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
+				m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+				m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
+			}
+			return (PF_PASS);
+			break;
+		}
+#endif /* INET */
+#ifdef INET6
+		case IPPROTO_ICMPV6: {
+			struct icmp6_hdr	iih;
+
+			if (!pf_pull_hdr(m, off2, &iih,
+			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(icmp6)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_ICMPV6;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[0] = key.port[1] = iih.icmp6_id;
+
+			STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+				struct pf_state_key *nk =
+				    (*state)->key[pd->didx];
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af) ||
+				    nk->port[pd2.sidx] != iih.icmp6_id)
+					pf_change_icmp(pd2.src, &iih.icmp6_id,
+					    daddr, &nk->addr[pd2.sidx],
+					    nk->port[pd2.sidx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET6);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af) ||
+				    nk->port[pd2.didx] != iih.icmp6_id)
+					pf_change_icmp(pd2.dst, &iih.icmp6_id,
+					    NULL, /* XXX Inbound NAT? */
+					    &nk->addr[pd2.didx],
+					    nk->port[pd2.didx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, AF_INET6);
+
+				m_copyback(m, off, sizeof(struct icmp6_hdr),
+				    (caddr_t)pd->hdr.icmp6);
+				m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
+				m_copyback(m, off2, sizeof(struct icmp6_hdr),
+				    (caddr_t)&iih);
+			}
+			return (PF_PASS);
+			break;
+		}
+#endif /* INET6 */
+		default: {
+			key.af = pd2.af;
+			key.proto = pd2.proto;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[0] = key.port[1] = 0;
+
+			STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+				struct pf_state_key *nk =
+				    (*state)->key[pd->didx];
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af))
+					pf_change_icmp(pd2.src, NULL, daddr,
+					    &nk->addr[pd2.sidx], 0, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af))
+					pf_change_icmp(pd2.src, NULL,
+					    NULL, /* XXX Inbound NAT? */
+					    &nk->addr[pd2.didx], 0, NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+
+				switch (pd2.af) {
+#ifdef INET
+				case AF_INET:
+					m_copyback(m, off, ICMP_MINLEN,
+					    (caddr_t)pd->hdr.icmp);
+					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					m_copyback(m, off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t )pd->hdr.icmp6);
+					m_copyback(m, ipoff2, sizeof(h2_6),
+					    (caddr_t )&h2_6);
+					break;
+#endif /* INET6 */
+				}
+			}
+			return (PF_PASS);
+			break;
+		}
+		}
+	}
+}
+
+static int
+pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
+    struct mbuf *m, struct pf_pdesc *pd)
+{
+	struct pf_state_peer	*src, *dst;
+	struct pf_state_key_cmp	 key;
+
+	bzero(&key, sizeof(key));
+	key.af = pd->af;
+	key.proto = pd->proto;
+	if (direction == PF_IN)	{
+		PF_ACPY(&key.addr[0], pd->src, key.af);
+		PF_ACPY(&key.addr[1], pd->dst, key.af);
+		key.port[0] = key.port[1] = 0;
+	} else {
+		PF_ACPY(&key.addr[1], pd->src, key.af);
+		PF_ACPY(&key.addr[0], pd->dst, key.af);
+		key.port[1] = key.port[0] = 0;
+	}
+
+	STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+	if (direction == (*state)->direction) {
+		src = &(*state)->src;
+		dst = &(*state)->dst;
+	} else {
+		src = &(*state)->dst;
+		dst = &(*state)->src;
+	}
+
+	/* update states */
+	if (src->state < PFOTHERS_SINGLE)
+		src->state = PFOTHERS_SINGLE;
+	if (dst->state == PFOTHERS_SINGLE)
+		dst->state = PFOTHERS_MULTIPLE;
+
+	/* update expire time */
+	(*state)->expire = time_uptime;
+	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
+		(*state)->timeout = PFTM_OTHER_MULTIPLE;
+	else
+		(*state)->timeout = PFTM_OTHER_SINGLE;
+
+	/* translate source/destination address, if necessary */
+	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+		struct pf_state_key *nk = (*state)->key[pd->didx];
+
+		KASSERT(nk, ("%s: nk is null", __func__));
+		KASSERT(pd, ("%s: pd is null", __func__));
+		KASSERT(pd->src, ("%s: pd->src is null", __func__));
+		KASSERT(pd->dst, ("%s: pd->dst is null", __func__));
+		switch (pd->af) {
+#ifdef INET
+		case AF_INET:
+			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
+				pf_change_a(&pd->src->v4.s_addr,
+				    pd->ip_sum,
+				    nk->addr[pd->sidx].v4.s_addr,
+				    0);
+
+
+			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
+				pf_change_a(&pd->dst->v4.s_addr,
+				    pd->ip_sum,
+				    nk->addr[pd->didx].v4.s_addr,
+				    0);
+
+				break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
+				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
+
+			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
+				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
+#endif /* INET6 */
+		}
+	}
+	return (PF_PASS);
+}
+
+/*
+ * ipoff and off are measured from the start of the mbuf chain.
+ * h must be at "ipoff" on the mbuf chain.
+ */
+void *
+pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
+    u_short *actionp, u_short *reasonp, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET: {
+		struct ip	*h = mtod(m, struct ip *);
+		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+
+		if (fragoff) {
+			if (fragoff >= len)
+				ACTION_SET(actionp, PF_PASS);
+			else {
+				ACTION_SET(actionp, PF_DROP);
+				REASON_SET(reasonp, PFRES_FRAG);
+			}
+			return (NULL);
+		}
+		if (m->m_pkthdr.len < off + len ||
+		    ntohs(h->ip_len) < off + len) {
+			ACTION_SET(actionp, PF_DROP);
+			REASON_SET(reasonp, PFRES_SHORT);
+			return (NULL);
+		}
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
+
+		if (m->m_pkthdr.len < off + len ||
+		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
+		    (unsigned)(off + len)) {
+			ACTION_SET(actionp, PF_DROP);
+			REASON_SET(reasonp, PFRES_SHORT);
+			return (NULL);
+		}
+		break;
+	}
+#endif /* INET6 */
+	}
+	m_copydata(m, off, len, p);
+	return (p);
+}
+
+int
+pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
+    int rtableid)
+{
+#ifdef RADIX_MPATH
+	struct radix_node_head	*rnh;
+#endif
+	struct sockaddr_in	*dst;
+	int			 ret = 1;
+	int			 check_mpath;
+#ifdef INET6
+	struct sockaddr_in6	*dst6;
+	struct route_in6	 ro;
+#else
+	struct route		 ro;
+#endif
+	struct radix_node	*rn;
+	struct rtentry		*rt;
+	struct ifnet		*ifp;
+
+	check_mpath = 0;
+#ifdef RADIX_MPATH
+	/* XXX: stick to table 0 for now */
+	rnh = rt_tables_get_rnh(0, af);
+	if (rnh != NULL && rn_mpath_capable(rnh))
+		check_mpath = 1;
+#endif
+	bzero(&ro, sizeof(ro));
+	switch (af) {
+	case AF_INET:
+		dst = satosin(&ro.ro_dst);
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = addr->v4;
+		break;
+#ifdef INET6
+	case AF_INET6:
+		/*
+		 * Skip check for addresses with embedded interface scope,
+		 * as they would always match anyway.
+		 */
+		if (IN6_IS_SCOPE_EMBED(&addr->v6))
+			goto out;
+		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
+		dst6->sin6_family = AF_INET6;
+		dst6->sin6_len = sizeof(*dst6);
+		dst6->sin6_addr = addr->v6;
+		break;
+#endif /* INET6 */
+	default:
+		return (0);
+	}
+
+	/* Skip checks for ipsec interfaces */
+	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
+		goto out;
+
+	switch (af) {
+#ifdef INET6
+	case AF_INET6:
+		in6_rtalloc_ign(&ro, 0, rtableid);
+		break;
+#endif
+#ifdef INET
+	case AF_INET:
+		in_rtalloc_ign((struct route *)&ro, 0, rtableid);
+		break;
+#endif
+	default:
+		rtalloc_ign((struct route *)&ro, 0);	/* No/default FIB. */
+		break;
+	}
+
+	if (ro.ro_rt != NULL) {
+		/* No interface given, this is a no-route check */
+		if (kif == NULL)
+			goto out;
+
+		if (kif->pfik_ifp == NULL) {
+			ret = 0;
+			goto out;
+		}
+
+		/* Perform uRPF check if passed input interface */
+		ret = 0;
+		rn = (struct radix_node *)ro.ro_rt;
+		do {
+			rt = (struct rtentry *)rn;
+			ifp = rt->rt_ifp;
+
+			if (kif->pfik_ifp == ifp)
+				ret = 1;
+#ifdef RADIX_MPATH
+			rn = rn_mpath_next(rn);
+#endif
+		} while (check_mpath == 1 && rn != NULL && ret == 0);
+	} else
+		ret = 0;
+out:
+	if (ro.ro_rt != NULL)
+		RTFREE(ro.ro_rt);
+	return (ret);
+}
+
+#ifdef INET
+static void
+pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
+    struct pf_state *s, struct pf_pdesc *pd)
+{
+	struct mbuf		*m0, *m1;
+	struct sockaddr_in	dst;
+	struct ip		*ip;
+	struct ifnet		*ifp = NULL;
+	struct pf_addr		 naddr;
+	struct pf_src_node	*sn = NULL;
+	int			 error = 0;
+	int sw_csum;
+
+	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
+	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
+	    __func__));
+
+	if ((pd->pf_mtag == NULL &&
+	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
+	    pd->pf_mtag->routed++ > 3) {
+		m0 = *m;
+		*m = NULL;
+		goto bad_locked;
+	}
+
+	if (r->rt == PF_DUPTO) {
+		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			return;
+		}
+	} else {
+		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			return;
+		}
+		m0 = *m;
+	}
+
+	ip = mtod(m0, struct ip *);
+
+	bzero(&dst, sizeof(dst));
+	dst.sin_family = AF_INET;
+	dst.sin_len = sizeof(dst);
+	dst.sin_addr = ip->ip_dst;
+
+	if (r->rt == PF_FASTROUTE) {
+		struct rtentry *rt;
+
+		if (s)
+			PF_STATE_UNLOCK(s);
+		rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0));
+		if (rt == NULL) {
+			RTFREE_LOCKED(rt);
+			KMOD_IPSTAT_INC(ips_noroute);
+			error = EHOSTUNREACH;
+			goto bad;
+		}
+
+		ifp = rt->rt_ifp;
+		rt->rt_rmx.rmx_pksent++;
+
+		if (rt->rt_flags & RTF_GATEWAY)
+			bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst));
+		RTFREE_LOCKED(rt);
+	} else {
+		if (TAILQ_EMPTY(&r->rpool.list)) {
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
+			goto bad_locked;
+		}
+		if (s == NULL) {
+			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
+			    &naddr, NULL, &sn);
+			if (!PF_AZERO(&naddr, AF_INET))
+				dst.sin_addr.s_addr = naddr.v4.s_addr;
+			ifp = r->rpool.cur->kif ?
+			    r->rpool.cur->kif->pfik_ifp : NULL;
+		} else {
+			if (!PF_AZERO(&s->rt_addr, AF_INET))
+				dst.sin_addr.s_addr =
+				    s->rt_addr.v4.s_addr;
+			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
+			PF_STATE_UNLOCK(s);
+		}
+	}
+	if (ifp == NULL)
+		goto bad;
+
+	if (oifp != ifp) {
+		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
+			goto bad;
+		else if (m0 == NULL)
+			goto done;
+		if (m0->m_len < sizeof(struct ip)) {
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
+			goto bad;
+		}
+		ip = mtod(m0, struct ip *);
+	}
+
+	if (ifp->if_flags & IFF_LOOPBACK)
+		m0->m_flags |= M_SKIP_FIREWALL;
+
+	/* Back to host byte order. */
+	ip->ip_len = ntohs(ip->ip_len);
+	ip->ip_off = ntohs(ip->ip_off);
+
+	/* Copied from FreeBSD 10.0-CURRENT ip_output. */
+	m0->m_pkthdr.csum_flags |= CSUM_IP;
+	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
+	if (sw_csum & CSUM_DELAY_DATA) {
+		in_delayed_cksum(m0);
+		sw_csum &= ~CSUM_DELAY_DATA;
+	}
+#ifdef SCTP
+	if (sw_csum & CSUM_SCTP) {
+		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
+		sw_csum &= ~CSUM_SCTP;
+	}
+#endif
+	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
+
+	/*
+	 * If small enough for interface, or the interface will take
+	 * care of the fragmentation for us, we can just send directly.
+	 */
+	if (ip->ip_len <= ifp->if_mtu ||
+	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
+	    ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+		ip->ip_sum = 0;
+		if (sw_csum & CSUM_DELAY_IP)
+			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
+		m0->m_flags &= ~(M_PROTOFLAGS);
+		error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
+		goto done;
+	}
+
+	/* Balk when DF bit is set or the interface didn't support TSO. */
+	if ((ip->ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
+		error = EMSGSIZE;
+		KMOD_IPSTAT_INC(ips_cantfrag);
+		if (r->rt != PF_DUPTO) {
+			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
+			    ifp->if_mtu);
+			goto done;
+		} else
+			goto bad;
+	}
+
+	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
+	if (error)
+		goto bad;
+
+	for (; m0; m0 = m1) {
+		m1 = m0->m_nextpkt;
+		m0->m_nextpkt = NULL;
+		if (error == 0) {
+			m0->m_flags &= ~(M_PROTOFLAGS);
+			error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
+		} else
+			m_freem(m0);
+	}
+
+	if (error == 0)
+		KMOD_IPSTAT_INC(ips_fragmented);
+
+done:
+	if (r->rt != PF_DUPTO)
+		*m = NULL;
+	return;
+
+bad_locked:
+	if (s)
+		PF_STATE_UNLOCK(s);
+bad:
+	m_freem(m0);
+	goto done;
+}
+#endif /* INET */
+
+#ifdef INET6
+static void
+pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
+    struct pf_state *s, struct pf_pdesc *pd)
+{
+	struct mbuf		*m0;
+	struct sockaddr_in6	dst;
+	struct ip6_hdr		*ip6;
+	struct ifnet		*ifp = NULL;
+	struct pf_addr		 naddr;
+	struct pf_src_node	*sn = NULL;
+
+	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
+	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
+	    __func__));
+
+	if ((pd->pf_mtag == NULL &&
+	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
+	    pd->pf_mtag->routed++ > 3) {
+		m0 = *m;
+		*m = NULL;
+		goto bad_locked;
+	}
+
+	if (r->rt == PF_DUPTO) {
+		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			return;
+		}
+	} else {
+		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			return;
+		}
+		m0 = *m;
+	}
+
+	ip6 = mtod(m0, struct ip6_hdr *);
+
+	bzero(&dst, sizeof(dst));
+	dst.sin6_family = AF_INET6;
+	dst.sin6_len = sizeof(dst);
+	dst.sin6_addr = ip6->ip6_dst;
+
+	/* Cheat. XXX why only in the v6 case??? */
+	if (r->rt == PF_FASTROUTE) {
+		if (s)
+			PF_STATE_UNLOCK(s);
+		m0->m_flags |= M_SKIP_FIREWALL;
+		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
+		return;
+	}
+
+	if (TAILQ_EMPTY(&r->rpool.list)) {
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
+		goto bad_locked;
+	}
+	if (s == NULL) {
+		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
+		    &naddr, NULL, &sn);
+		if (!PF_AZERO(&naddr, AF_INET6))
+			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
+			    &naddr, AF_INET6);
+		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
+	} else {
+		if (!PF_AZERO(&s->rt_addr, AF_INET6))
+			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
+			    &s->rt_addr, AF_INET6);
+		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
+	}
+
+	if (s)
+		PF_STATE_UNLOCK(s);
+
+	if (ifp == NULL)
+		goto bad;
+
+	if (oifp != ifp) {
+		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
+			goto bad;
+		else if (m0 == NULL)
+			goto done;
+		if (m0->m_len < sizeof(struct ip6_hdr)) {
+			DPFPRINTF(PF_DEBUG_URGENT,
+			    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
+			    __func__));
+			goto bad;
+		}
+		ip6 = mtod(m0, struct ip6_hdr *);
+	}
+
+	if (ifp->if_flags & IFF_LOOPBACK)
+		m0->m_flags |= M_SKIP_FIREWALL;
+
+	/*
+	 * If the packet is too large for the outgoing interface,
+	 * send back an icmp6 error.
+	 */
+	if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
+		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
+	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
+		nd6_output(ifp, ifp, m0, &dst, NULL);
+	else {
+		in6_ifstat_inc(ifp, ifs6_in_toobig);
+		if (r->rt != PF_DUPTO)
+			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
+		else
+			goto bad;
+	}
+
+done:
+	if (r->rt != PF_DUPTO)
+		*m = NULL;
+	return;
+
+bad_locked:
+	if (s)
+		PF_STATE_UNLOCK(s);
+bad:
+	m_freem(m0);
+	goto done;
+}
+#endif /* INET6 */
+
+/*
+ * FreeBSD supports cksum offloads for the following drivers.
+ *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
+ *   ti(4), txp(4), xl(4)
+ *
+ * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
+ *  network driver performed cksum including pseudo header, need to verify
+ *   csum_data
+ * CSUM_DATA_VALID :
+ *  network driver performed cksum, needs to additional pseudo header
+ *  cksum computation with partial csum_data(i.e. lack of H/W support for
+ *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
+ *
+ * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
+ * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
+ * TCP/UDP layer.
+ * Also, set csum_data to 0xffff to force cksum validation.
+ */
+static int
+pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
+{
+	u_int16_t sum = 0;
+	int hw_assist = 0;
+	struct ip *ip;
+
+	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
+		return (1);
+	if (m->m_pkthdr.len < off + len)
+		return (1);
+
+	switch (p) {
+	case IPPROTO_TCP:
+		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
+				sum = m->m_pkthdr.csum_data;
+			} else {
+				ip = mtod(m, struct ip *);
+				sum = in_pseudo(ip->ip_src.s_addr,
+				ip->ip_dst.s_addr, htonl((u_short)len +
+				m->m_pkthdr.csum_data + IPPROTO_TCP));
+			}
+			sum ^= 0xffff;
+			++hw_assist;
+		}
+		break;
+	case IPPROTO_UDP:
+		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
+				sum = m->m_pkthdr.csum_data;
+			} else {
+				ip = mtod(m, struct ip *);
+				sum = in_pseudo(ip->ip_src.s_addr,
+				ip->ip_dst.s_addr, htonl((u_short)len +
+				m->m_pkthdr.csum_data + IPPROTO_UDP));
+			}
+			sum ^= 0xffff;
+			++hw_assist;
+		}
+		break;
+	case IPPROTO_ICMP:
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+#endif /* INET6 */
+		break;
+	default:
+		return (1);
+	}
+
+	if (!hw_assist) {
+		switch (af) {
+		case AF_INET:
+			if (p == IPPROTO_ICMP) {
+				if (m->m_len < off)
+					return (1);
+				m->m_data += off;
+				m->m_len -= off;
+				sum = in_cksum(m, len);
+				m->m_data -= off;
+				m->m_len += off;
+			} else {
+				if (m->m_len < sizeof(struct ip))
+					return (1);
+				sum = in4_cksum(m, p, off, len);
+			}
+			break;
+#ifdef INET6
+		case AF_INET6:
+			if (m->m_len < sizeof(struct ip6_hdr))
+				return (1);
+			sum = in6_cksum(m, p, off, len);
+			break;
+#endif /* INET6 */
+		default:
+			return (1);
+		}
+	}
+	if (sum) {
+		switch (p) {
+		case IPPROTO_TCP:
+		    {
+			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
+			break;
+		    }
+		case IPPROTO_UDP:
+		    {
+			KMOD_UDPSTAT_INC(udps_badsum);
+			break;
+		    }
+#ifdef INET
+		case IPPROTO_ICMP:
+		    {
+			KMOD_ICMPSTAT_INC(icps_checksum);
+			break;
+		    }
+#endif
+#ifdef INET6
+		case IPPROTO_ICMPV6:
+		    {
+			KMOD_ICMP6STAT_INC(icp6s_checksum);
+			break;
+		    }
+#endif /* INET6 */
+		}
+		return (1);
+	} else {
+		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
+			m->m_pkthdr.csum_flags |=
+			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+			m->m_pkthdr.csum_data = 0xffff;
+		}
+	}
+	return (0);
+}
+
+
+#ifdef INET
+int
+pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
+{
+	struct pfi_kif		*kif;
+	u_short			 action, reason = 0, log = 0;
+	struct mbuf		*m = *m0;
+	struct ip		*h = NULL;
+	struct m_tag		*ipfwtag;
+	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
+	struct pf_state		*s = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_pdesc		 pd;
+	int			 off, dirndx, pqid = 0;
+
+	M_ASSERTPKTHDR(m);
+
+	if (!V_pf_status.running)
+		return (PF_PASS);
+
+	memset(&pd, 0, sizeof(pd));
+
+	kif = (struct pfi_kif *)ifp->if_pf_kif;
+
+	if (kif == NULL) {
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
+		return (PF_DROP);
+	}
+	if (kif->pfik_flags & PFI_IFLAG_SKIP)
+		return (PF_PASS);
+
+	if (m->m_flags & M_SKIP_FIREWALL)
+		return (PF_PASS);
+
+	if (m->m_pkthdr.len < (int)sizeof(struct ip)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_SHORT);
+		log = 1;
+		goto done;
+	}
+
+	pd.pf_mtag = pf_find_mtag(m);
+
+	PF_RULES_RLOCK();
+
+	if (ip_divert_ptr != NULL &&
+	    ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
+		struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
+		if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
+			if (pd.pf_mtag == NULL &&
+			    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+				action = PF_DROP;
+				goto done;
+			}
+			pd.pf_mtag->flags |= PF_PACKET_LOOPED;
+			m_tag_delete(m, ipfwtag);
+		}
+		if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
+			m->m_flags |= M_FASTFWD_OURS;
+			pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
+		}
+	} else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
+		/* We do IP header normalization and packet reassembly here */
+		action = PF_DROP;
+		goto done;
+	}
+	m = *m0;	/* pf_normalize messes with m0 */
+	h = mtod(m, struct ip *);
+
+	off = h->ip_hl << 2;
+	if (off < (int)sizeof(struct ip)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_SHORT);
+		log = 1;
+		goto done;
+	}
+
+	pd.src = (struct pf_addr *)&h->ip_src;
+	pd.dst = (struct pf_addr *)&h->ip_dst;
+	pd.sport = pd.dport = NULL;
+	pd.ip_sum = &h->ip_sum;
+	pd.proto_sum = NULL;
+	pd.proto = h->ip_p;
+	pd.dir = dir;
+	pd.sidx = (dir == PF_IN) ? 0 : 1;
+	pd.didx = (dir == PF_IN) ? 1 : 0;
+	pd.af = AF_INET;
+	pd.tos = h->ip_tos;
+	pd.tot_len = ntohs(h->ip_len);
+
+	/* handle fragments that didn't get reassembled by normalization */
+	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
+		action = pf_test_fragment(&r, dir, kif, m, h,
+		    &pd, &a, &ruleset);
+		goto done;
+	}
+
+	switch (h->ip_p) {
+
+	case IPPROTO_TCP: {
+		struct tcphdr	th;
+
+		pd.hdr.tcp = &th;
+		if (!pf_pull_hdr(m, off, &th, sizeof(th),
+		    &action, &reason, AF_INET)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		pd.p_len = pd.tot_len - off - (th.th_off << 2);
+		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
+			pqid = 1;
+		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
+		if (action == PF_DROP)
+			goto done;
+		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+	case IPPROTO_UDP: {
+		struct udphdr	uh;
+
+		pd.hdr.udp = &uh;
+		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
+		    &action, &reason, AF_INET)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		if (uh.uh_dport == 0 ||
+		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
+		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_SHORT);
+			goto done;
+		}
+		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+	case IPPROTO_ICMP: {
+		struct icmp	ih;
+
+		pd.hdr.icmp = &ih;
+		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
+		    &action, &reason, AF_INET)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+#ifdef INET6
+	case IPPROTO_ICMPV6: {
+		action = PF_DROP;
+		DPFPRINTF(PF_DEBUG_MISC,
+		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
+		goto done;
+	}
+#endif
+
+	default:
+		action = pf_test_state_other(&s, dir, kif, m, &pd);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+done:
+	PF_RULES_RUNLOCK();
+	if (action == PF_PASS && h->ip_hl > 5 &&
+	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_IPOPTIONS);
+		log = 1;
+		DPFPRINTF(PF_DEBUG_MISC,
+		    ("pf: dropping packet with ip options\n"));
+	}
+
+	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_MEMORY);
+	}
+	if (r->rtableid >= 0)
+		M_SETFIB(m, r->rtableid);
+
+#ifdef ALTQ
+	if (action == PF_PASS && r->qid) {
+		if (pd.pf_mtag == NULL &&
+		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_MEMORY);
+		}
+		if (pqid || (pd.tos & IPTOS_LOWDELAY))
+			pd.pf_mtag->qid = r->pqid;
+		else
+			pd.pf_mtag->qid = r->qid;
+		/* add hints for ecn */
+		pd.pf_mtag->hdr = h;
+
+	}
+#endif /* ALTQ */
+
+	/*
+	 * connections redirected to loopback should not match sockets
+	 * bound specifically to loopback due to security implications,
+	 * see tcp_input() and in_pcblookup_listen().
+	 */
+	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
+	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
+	    (s->nat_rule.ptr->action == PF_RDR ||
+	    s->nat_rule.ptr->action == PF_BINAT) &&
+	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
+		m->m_flags |= M_SKIP_FIREWALL;
+
+	if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL &&
+	    !PACKET_LOOPED(&pd)) {
+
+		ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
+		    sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
+		if (ipfwtag != NULL) {
+			((struct ipfw_rule_ref *)(ipfwtag+1))->info =
+			    ntohs(r->divert.port);
+			((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
+
+			if (s)
+				PF_STATE_UNLOCK(s);
+
+			m_tag_prepend(m, ipfwtag);
+			if (m->m_flags & M_FASTFWD_OURS) {
+				if (pd.pf_mtag == NULL &&
+				    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+					action = PF_DROP;
+					REASON_SET(&reason, PFRES_MEMORY);
+					log = 1;
+					DPFPRINTF(PF_DEBUG_MISC,
+					    ("pf: failed to allocate tag\n"));
+				}
+				pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT;
+				m->m_flags &= ~M_FASTFWD_OURS;
+			}
+			ip_divert_ptr(*m0, dir ==  PF_IN ? DIR_IN : DIR_OUT);
+			*m0 = NULL;
+
+			return (action);
+		} else {
+			/* XXX: ipfw has the same behaviour! */
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_MEMORY);
+			log = 1;
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("pf: failed to allocate divert tag\n"));
+		}
+	}
+
+	if (log) {
+		struct pf_rule *lr;
+
+		if (s != NULL && s->nat_rule.ptr != NULL &&
+		    s->nat_rule.ptr->log & PF_LOG_ALL)
+			lr = s->nat_rule.ptr;
+		else
+			lr = r;
+		PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd,
+		    (s == NULL));
+	}
+
+	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
+	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
+
+	if (action == PF_PASS || r->action == PF_DROP) {
+		dirndx = (dir == PF_OUT);
+		r->packets[dirndx]++;
+		r->bytes[dirndx] += pd.tot_len;
+		if (a != NULL) {
+			a->packets[dirndx]++;
+			a->bytes[dirndx] += pd.tot_len;
+		}
+		if (s != NULL) {
+			if (s->nat_rule.ptr != NULL) {
+				s->nat_rule.ptr->packets[dirndx]++;
+				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->src_node != NULL) {
+				s->src_node->packets[dirndx]++;
+				s->src_node->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->nat_src_node != NULL) {
+				s->nat_src_node->packets[dirndx]++;
+				s->nat_src_node->bytes[dirndx] += pd.tot_len;
+			}
+			dirndx = (dir == s->direction) ? 0 : 1;
+			s->packets[dirndx]++;
+			s->bytes[dirndx] += pd.tot_len;
+		}
+		tr = r;
+		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
+		if (nr != NULL && r == &V_pf_default_rule)
+			tr = nr;
+		if (tr->src.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->src.addr.p.tbl,
+			    (s == NULL) ? pd.src :
+			    &s->key[(s->direction == PF_IN)]->
+				addr[(s->direction == PF_OUT)],
+			    pd.af, pd.tot_len, dir == PF_OUT,
+			    r->action == PF_PASS, tr->src.neg);
+		if (tr->dst.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->dst.addr.p.tbl,
+			    (s == NULL) ? pd.dst :
+			    &s->key[(s->direction == PF_IN)]->
+				addr[(s->direction == PF_IN)],
+			    pd.af, pd.tot_len, dir == PF_OUT,
+			    r->action == PF_PASS, tr->dst.neg);
+	}
+
+	switch (action) {
+	case PF_SYNPROXY_DROP:
+		m_freem(*m0);
+	case PF_DEFER:
+		*m0 = NULL;
+		action = PF_PASS;
+		break;
+	default:
+		/* pf_route() returns unlocked. */
+		if (r->rt) {
+			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
+			return (action);
+		}
+		break;
+	}
+	if (s)
+		PF_STATE_UNLOCK(s);
+
+	return (action);
+}
+#endif /* INET */
+
+#ifdef INET6
+int
+pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
+{
+	struct pfi_kif		*kif;
+	u_short			 action, reason = 0, log = 0;
+	struct mbuf		*m = *m0, *n = NULL;
+	struct ip6_hdr		*h = NULL;
+	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
+	struct pf_state		*s = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	struct pf_pdesc		 pd;
+	int			 off, terminal = 0, dirndx, rh_cnt = 0;
+
+	M_ASSERTPKTHDR(m);
+
+	if (!V_pf_status.running)
+		return (PF_PASS);
+
+	memset(&pd, 0, sizeof(pd));
+	pd.pf_mtag = pf_find_mtag(m);
+
+	if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED)
+		return (PF_PASS);
+
+	kif = (struct pfi_kif *)ifp->if_pf_kif;
+	if (kif == NULL) {
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
+		return (PF_DROP);
+	}
+	if (kif->pfik_flags & PFI_IFLAG_SKIP)
+		return (PF_PASS);
+
+	if (m->m_pkthdr.len < (int)sizeof(*h)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_SHORT);
+		log = 1;
+		goto done;
+	}
+
+	PF_RULES_RLOCK();
+
+	/* We do IP header normalization and packet reassembly here */
+	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
+		action = PF_DROP;
+		goto done;
+	}
+	m = *m0;	/* pf_normalize messes with m0 */
+	h = mtod(m, struct ip6_hdr *);
+
+#if 1
+	/*
+	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
+	 * will do something bad, so drop the packet for now.
+	 */
+	if (htons(h->ip6_plen) == 0) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
+		goto done;
+	}
+#endif
+
+	pd.src = (struct pf_addr *)&h->ip6_src;
+	pd.dst = (struct pf_addr *)&h->ip6_dst;
+	pd.sport = pd.dport = NULL;
+	pd.ip_sum = NULL;
+	pd.proto_sum = NULL;
+	pd.dir = dir;
+	pd.sidx = (dir == PF_IN) ? 0 : 1;
+	pd.didx = (dir == PF_IN) ? 1 : 0;
+	pd.af = AF_INET6;
+	pd.tos = 0;
+	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
+
+	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
+	pd.proto = h->ip6_nxt;
+	do {
+		switch (pd.proto) {
+		case IPPROTO_FRAGMENT:
+			action = pf_test_fragment(&r, dir, kif, m, h,
+			    &pd, &a, &ruleset);
+			if (action == PF_DROP)
+				REASON_SET(&reason, PFRES_FRAG);
+			goto done;
+		case IPPROTO_ROUTING: {
+			struct ip6_rthdr rthdr;
+
+			if (rh_cnt++) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 more than one rthdr\n"));
+				action = PF_DROP;
+				REASON_SET(&reason, PFRES_IPOPTIONS);
+				log = 1;
+				goto done;
+			}
+			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
+			    &reason, pd.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 short rthdr\n"));
+				action = PF_DROP;
+				REASON_SET(&reason, PFRES_SHORT);
+				log = 1;
+				goto done;
+			}
+			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 rthdr0\n"));
+				action = PF_DROP;
+				REASON_SET(&reason, PFRES_IPOPTIONS);
+				log = 1;
+				goto done;
+			}
+			/* FALLTHROUGH */
+		}
+		case IPPROTO_AH:
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_DSTOPTS: {
+			/* get next header and header length */
+			struct ip6_ext	opt6;
+
+			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
+			    NULL, &reason, pd.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: IPv6 short opt\n"));
+				action = PF_DROP;
+				log = 1;
+				goto done;
+			}
+			if (pd.proto == IPPROTO_AH)
+				off += (opt6.ip6e_len + 2) * 4;
+			else
+				off += (opt6.ip6e_len + 1) * 8;
+			pd.proto = opt6.ip6e_nxt;
+			/* goto the next header */
+			break;
+		}
+		default:
+			terminal++;
+			break;
+		}
+	} while (!terminal);
+
+	/* if there's no routing header, use unmodified mbuf for checksumming */
+	if (!n)
+		n = m;
+
+	switch (pd.proto) {
+
+	case IPPROTO_TCP: {
+		struct tcphdr	th;
+
+		pd.hdr.tcp = &th;
+		if (!pf_pull_hdr(m, off, &th, sizeof(th),
+		    &action, &reason, AF_INET6)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		pd.p_len = pd.tot_len - off - (th.th_off << 2);
+		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
+		if (action == PF_DROP)
+			goto done;
+		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+	case IPPROTO_UDP: {
+		struct udphdr	uh;
+
+		pd.hdr.udp = &uh;
+		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
+		    &action, &reason, AF_INET6)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		if (uh.uh_dport == 0 ||
+		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
+		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_SHORT);
+			goto done;
+		}
+		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+	case IPPROTO_ICMP: {
+		action = PF_DROP;
+		DPFPRINTF(PF_DEBUG_MISC,
+		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
+		goto done;
+	}
+
+	case IPPROTO_ICMPV6: {
+		struct icmp6_hdr	ih;
+
+		pd.hdr.icmp6 = &ih;
+		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
+		    &action, &reason, AF_INET6)) {
+			log = action != PF_PASS;
+			goto done;
+		}
+		action = pf_test_state_icmp(&s, dir, kif,
+		    m, off, h, &pd, &reason);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+	default:
+		action = pf_test_state_other(&s, dir, kif, m, &pd);
+		if (action == PF_PASS) {
+			if (pfsync_update_state_ptr != NULL)
+				pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+			log = s->log;
+		} else if (s == NULL)
+			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+			    &a, &ruleset, inp);
+		break;
+	}
+
+done:
+	PF_RULES_RUNLOCK();
+	if (n != m) {
+		m_freem(n);
+		n = NULL;
+	}
+
+	/* handle dangerous IPv6 extension headers. */
+	if (action == PF_PASS && rh_cnt &&
+	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_IPOPTIONS);
+		log = 1;
+		DPFPRINTF(PF_DEBUG_MISC,
+		    ("pf: dropping packet with dangerous v6 headers\n"));
+	}
+
+	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
+		action = PF_DROP;
+		REASON_SET(&reason, PFRES_MEMORY);
+	}
+	if (r->rtableid >= 0)
+		M_SETFIB(m, r->rtableid);
+
+#ifdef ALTQ
+	if (action == PF_PASS && r->qid) {
+		if (pd.pf_mtag == NULL &&
+		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_MEMORY);
+		}
+		if (pd.tos & IPTOS_LOWDELAY)
+			pd.pf_mtag->qid = r->pqid;
+		else
+			pd.pf_mtag->qid = r->qid;
+		/* add hints for ecn */
+		pd.pf_mtag->hdr = h;
+	}
+#endif /* ALTQ */
+
+	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
+	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
+	    (s->nat_rule.ptr->action == PF_RDR ||
+	    s->nat_rule.ptr->action == PF_BINAT) &&
+	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
+		m->m_flags |= M_SKIP_FIREWALL;
+
+	/* XXX: Anybody working on it?! */
+	if (r->divert.port)
+		printf("pf: divert(9) is not supported for IPv6\n");
+
+	if (log) {
+		struct pf_rule *lr;
+
+		if (s != NULL && s->nat_rule.ptr != NULL &&
+		    s->nat_rule.ptr->log & PF_LOG_ALL)
+			lr = s->nat_rule.ptr;
+		else
+			lr = r;
+		PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset,
+		    &pd, (s == NULL));
+	}
+
+	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
+	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
+
+	if (action == PF_PASS || r->action == PF_DROP) {
+		dirndx = (dir == PF_OUT);
+		r->packets[dirndx]++;
+		r->bytes[dirndx] += pd.tot_len;
+		if (a != NULL) {
+			a->packets[dirndx]++;
+			a->bytes[dirndx] += pd.tot_len;
+		}
+		if (s != NULL) {
+			if (s->nat_rule.ptr != NULL) {
+				s->nat_rule.ptr->packets[dirndx]++;
+				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->src_node != NULL) {
+				s->src_node->packets[dirndx]++;
+				s->src_node->bytes[dirndx] += pd.tot_len;
+			}
+			if (s->nat_src_node != NULL) {
+				s->nat_src_node->packets[dirndx]++;
+				s->nat_src_node->bytes[dirndx] += pd.tot_len;
+			}
+			dirndx = (dir == s->direction) ? 0 : 1;
+			s->packets[dirndx]++;
+			s->bytes[dirndx] += pd.tot_len;
+		}
+		tr = r;
+		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
+		if (nr != NULL && r == &V_pf_default_rule)
+			tr = nr;
+		if (tr->src.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->src.addr.p.tbl,
+			    (s == NULL) ? pd.src :
+			    &s->key[(s->direction == PF_IN)]->addr[0],
+			    pd.af, pd.tot_len, dir == PF_OUT,
+			    r->action == PF_PASS, tr->src.neg);
+		if (tr->dst.addr.type == PF_ADDR_TABLE)
+			pfr_update_stats(tr->dst.addr.p.tbl,
+			    (s == NULL) ? pd.dst :
+			    &s->key[(s->direction == PF_IN)]->addr[1],
+			    pd.af, pd.tot_len, dir == PF_OUT,
+			    r->action == PF_PASS, tr->dst.neg);
+	}
+
+	switch (action) {
+	case PF_SYNPROXY_DROP:
+		m_freem(*m0);
+	case PF_DEFER:
+		*m0 = NULL;
+		action = PF_PASS;
+		break;
+	default:
+		/* pf_route6() returns unlocked. */
+		if (r->rt) {
+			pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
+			return (action);
+		}
+		break;
+	}
+
+	if (s)
+		PF_STATE_UNLOCK(s);
+
+	return (action);
+}
+#endif /* INET6 */
diff --git a/sys/netpfil/pf/pf_if.c b/sys/netpfil/pf/pf_if.c
new file mode 100644
index 0000000..c010b65
--- /dev/null
+++ b/sys/netpfil/pf/pf_if.c
@@ -0,0 +1,859 @@
+/*	$OpenBSD: pf_if.c,v 1.54 2008/06/14 16:55:28 mk Exp $ */
+
+/*
+ * Copyright 2005 Henning Brauer <henning@openbsd.org>
+ * Copyright 2005 Ryan McBride <mcbride@openbsd.org>
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2003 Cedric Berger
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <net/pfvar.h>
+#include <net/route.h>
+
+VNET_DEFINE(struct pfi_kif *,	 pfi_all);
+static VNET_DEFINE(long, pfi_update);
+#define	V_pfi_update	VNET(pfi_update)
+#define PFI_BUFFER_MAX	0x10000
+
+static VNET_DEFINE(struct pfr_addr *, pfi_buffer);
+static VNET_DEFINE(int, pfi_buffer_cnt);
+static VNET_DEFINE(int,	pfi_buffer_max);
+#define	V_pfi_buffer		 VNET(pfi_buffer)
+#define	V_pfi_buffer_cnt	 VNET(pfi_buffer_cnt)
+#define	V_pfi_buffer_max	 VNET(pfi_buffer_max)
+
+eventhandler_tag	 pfi_attach_cookie;
+eventhandler_tag	 pfi_detach_cookie;
+eventhandler_tag	 pfi_attach_group_cookie;
+eventhandler_tag	 pfi_change_group_cookie;
+eventhandler_tag	 pfi_detach_group_cookie;
+eventhandler_tag	 pfi_ifaddr_event_cookie;
+
+static void	 pfi_attach_ifnet(struct ifnet *);
+static void	 pfi_attach_ifgroup(struct ifg_group *);
+
+static void	 pfi_kif_update(struct pfi_kif *);
+static void	 pfi_dynaddr_update(struct pfi_dynaddr *dyn);
+static void	 pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int,
+		    int);
+static void	 pfi_instance_add(struct ifnet *, int, int);
+static void	 pfi_address_add(struct sockaddr *, int, int);
+static int	 pfi_if_compare(struct pfi_kif *, struct pfi_kif *);
+static int	 pfi_skip_if(const char *, struct pfi_kif *);
+static int	 pfi_unmask(void *);
+static void	 pfi_attach_ifnet_event(void * __unused, struct ifnet *);
+static void	 pfi_detach_ifnet_event(void * __unused, struct ifnet *);
+static void	 pfi_attach_group_event(void *, struct ifg_group *);
+static void	 pfi_change_group_event(void *, char *);
+static void	 pfi_detach_group_event(void *, struct ifg_group *);
+static void	 pfi_ifaddr_event(void * __unused, struct ifnet *);
+
+RB_HEAD(pfi_ifhead, pfi_kif);
+static RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
+static RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
+static VNET_DEFINE(struct pfi_ifhead, pfi_ifs);
+#define	V_pfi_ifs	VNET(pfi_ifs)
+
+#define	PFI_BUFFER_MAX		0x10000
+MALLOC_DEFINE(PFI_MTYPE, "pf_ifnet", "pf(4) interface database");
+
+LIST_HEAD(pfi_list, pfi_kif);
+static VNET_DEFINE(struct pfi_list, pfi_unlinked_kifs);
+#define	V_pfi_unlinked_kifs	VNET(pfi_unlinked_kifs)
+static struct mtx pfi_unlnkdkifs_mtx;
+
+void
+pfi_initialize(void)
+{
+	struct ifg_group *ifg;
+	struct ifnet *ifp;
+	struct pfi_kif *kif;
+
+	V_pfi_buffer_max = 64;
+	V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer),
+	    PFI_MTYPE, M_WAITOK);
+
+	mtx_init(&pfi_unlnkdkifs_mtx, "pf unlinked interfaces", NULL, MTX_DEF);
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+	PF_RULES_WLOCK();
+	V_pfi_all = pfi_kif_attach(kif, IFG_ALL);
+	PF_RULES_WUNLOCK();
+
+	IFNET_RLOCK();
+	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
+		pfi_attach_ifgroup(ifg);
+	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
+		pfi_attach_ifnet(ifp);
+	IFNET_RUNLOCK();
+
+	pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_arrival_event,
+	    pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
+	pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event,
+	    pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
+	pfi_attach_group_cookie = EVENTHANDLER_REGISTER(group_attach_event,
+	    pfi_attach_group_event, curvnet, EVENTHANDLER_PRI_ANY);
+	pfi_change_group_cookie = EVENTHANDLER_REGISTER(group_change_event,
+	    pfi_change_group_event, curvnet, EVENTHANDLER_PRI_ANY);
+	pfi_detach_group_cookie = EVENTHANDLER_REGISTER(group_detach_event,
+	    pfi_detach_group_event, curvnet, EVENTHANDLER_PRI_ANY);
+	pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event,
+	    pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY);
+}
+
+void
+pfi_cleanup(void)
+{
+	struct pfi_kif *p;
+
+	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie);
+	EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie);
+	EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie);
+	EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie);
+	EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie);
+	EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie);
+
+	V_pfi_all = NULL;
+	while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) {
+		RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p);
+		free(p, PFI_MTYPE);
+	}
+
+	while ((p = LIST_FIRST(&V_pfi_unlinked_kifs))) {
+		LIST_REMOVE(p, pfik_list);
+		free(p, PFI_MTYPE);
+	}
+
+	mtx_destroy(&pfi_unlnkdkifs_mtx);
+
+	free(V_pfi_buffer, PFI_MTYPE);
+}
+
+struct pfi_kif *
+pfi_kif_find(const char *kif_name)
+{
+	struct pfi_kif_cmp s;
+
+	PF_RULES_ASSERT();
+
+	bzero(&s, sizeof(s));
+	strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name));
+
+	return (RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s));
+}
+
+struct pfi_kif *
+pfi_kif_attach(struct pfi_kif *kif, const char *kif_name)
+{
+	struct pfi_kif *kif1;
+
+	PF_RULES_WASSERT();
+	KASSERT(kif != NULL, ("%s: null kif", __func__));
+
+	kif1 = pfi_kif_find(kif_name);
+	if (kif1 != NULL) {
+		free(kif, PFI_MTYPE);
+		return (kif1);
+	}
+
+	bzero(kif, sizeof(*kif));
+	strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name));
+	/*
+	 * It seems that the value of time_second is in unintialzied state
+	 * when pf sets interface statistics clear time in boot phase if pf
+	 * was statically linked to kernel. Instead of setting the bogus
+	 * time value have pfi_get_ifaces handle this case. In
+	 * pfi_get_ifaces it uses time_second if it sees the time is 0.
+	 */
+	kif->pfik_tzero = time_second > 1 ? time_second : 0;
+	TAILQ_INIT(&kif->pfik_dynaddrs);
+
+	RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif);
+
+	return (kif);
+}
+
+void
+pfi_kif_ref(struct pfi_kif *kif)
+{
+
+	PF_RULES_WASSERT();
+	kif->pfik_rulerefs++;
+}
+
+void
+pfi_kif_unref(struct pfi_kif *kif)
+{
+
+	PF_RULES_WASSERT();
+	KASSERT(kif->pfik_rulerefs > 0, ("%s: %p has zero refs", __func__, kif));
+
+	kif->pfik_rulerefs--;
+
+	if (kif->pfik_rulerefs > 0)
+		return;
+
+	/* kif referencing an existing ifnet or group should exist. */
+	if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all)
+		return;
+
+	RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
+
+	kif->pfik_flags |= PFI_IFLAG_REFS;
+
+	mtx_lock(&pfi_unlnkdkifs_mtx);
+	LIST_INSERT_HEAD(&V_pfi_unlinked_kifs, kif, pfik_list);
+	mtx_unlock(&pfi_unlnkdkifs_mtx);
+}
+
+void
+pfi_kif_purge(void)
+{
+	struct pfi_kif *kif, *kif1;
+
+	/*
+	 * Do naive mark-and-sweep garbage collecting of old kifs.
+	 * Reference flag is raised by pf_purge_expired_states().
+	 */
+	mtx_lock(&pfi_unlnkdkifs_mtx);
+	LIST_FOREACH_SAFE(kif, &V_pfi_unlinked_kifs, pfik_list, kif1) {
+		if (!(kif->pfik_flags & PFI_IFLAG_REFS)) {
+			LIST_REMOVE(kif, pfik_list);
+			free(kif, PFI_MTYPE);
+		} else
+			kif->pfik_flags &= ~PFI_IFLAG_REFS;
+	}
+	mtx_unlock(&pfi_unlnkdkifs_mtx);
+}
+
+int
+pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif)
+{
+	struct ifg_list	*p;
+
+	if (rule_kif == NULL || rule_kif == packet_kif)
+		return (1);
+
+	if (rule_kif->pfik_group != NULL)
+		/* XXXGL: locking? */
+		TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next)
+			if (p->ifgl_group == rule_kif->pfik_group)
+				return (1);
+
+	return (0);
+}
+
+static void
+pfi_attach_ifnet(struct ifnet *ifp)
+{
+	struct pfi_kif *kif;
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+	kif = pfi_kif_attach(kif, ifp->if_xname);
+
+	kif->pfik_ifp = ifp;
+	ifp->if_pf_kif = kif;
+
+	pfi_kif_update(kif);
+	PF_RULES_WUNLOCK();
+}
+
+static void
+pfi_attach_ifgroup(struct ifg_group *ifg)
+{
+	struct pfi_kif *kif;
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+	kif = pfi_kif_attach(kif, ifg->ifg_group);
+
+	kif->pfik_group = ifg;
+	ifg->ifg_pf_kif = kif;
+	PF_RULES_WUNLOCK();
+}
+
+int
+pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af)
+{
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		switch (dyn->pfid_acnt4) {
+		case 0:
+			return (0);
+		case 1:
+			return (PF_MATCHA(0, &dyn->pfid_addr4,
+			    &dyn->pfid_mask4, a, AF_INET));
+		default:
+			return (pfr_match_addr(dyn->pfid_kt, a, AF_INET));
+		}
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		switch (dyn->pfid_acnt6) {
+		case 0:
+			return (0);
+		case 1:
+			return (PF_MATCHA(0, &dyn->pfid_addr6,
+			    &dyn->pfid_mask6, a, AF_INET6));
+		default:
+			return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6));
+		}
+		break;
+#endif /* INET6 */
+	default:
+		return (0);
+	}
+}
+
+int
+pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af)
+{
+	struct pfi_dynaddr	*dyn;
+	char			 tblname[PF_TABLE_NAME_SIZE];
+	struct pf_ruleset	*ruleset = NULL;
+	struct pfi_kif		*kif;
+	int			 rv = 0;
+
+	PF_RULES_WASSERT();
+	KASSERT(aw->type == PF_ADDR_DYNIFTL, ("%s: type %u",
+	    __func__, aw->type));
+	KASSERT(aw->p.dyn == NULL, ("%s: dyn is %p", __func__, aw->p.dyn));
+
+	if ((dyn = malloc(sizeof(*dyn), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL)
+		return (ENOMEM);
+
+	if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) {
+		free(dyn, PFI_MTYPE);
+		return (ENOMEM);
+	}
+
+	if (!strcmp(aw->v.ifname, "self"))
+		dyn->pfid_kif = pfi_kif_attach(kif, IFG_ALL);
+	else
+		dyn->pfid_kif = pfi_kif_attach(kif, aw->v.ifname);
+	pfi_kif_ref(dyn->pfid_kif);
+
+	dyn->pfid_net = pfi_unmask(&aw->v.a.mask);
+	if (af == AF_INET && dyn->pfid_net == 32)
+		dyn->pfid_net = 128;
+	strlcpy(tblname, aw->v.ifname, sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_NETWORK)
+		strlcat(tblname, ":network", sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_BROADCAST)
+		strlcat(tblname, ":broadcast", sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_PEER)
+		strlcat(tblname, ":peer", sizeof(tblname));
+	if (aw->iflags & PFI_AFLAG_NOALIAS)
+		strlcat(tblname, ":0", sizeof(tblname));
+	if (dyn->pfid_net != 128)
+		snprintf(tblname + strlen(tblname),
+		    sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net);
+	if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) {
+		rv = ENOMEM;
+		goto _bad;
+	}
+
+	if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) {
+		rv = ENOMEM;
+		goto _bad;
+	}
+
+	dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE;
+	dyn->pfid_iflags = aw->iflags;
+	dyn->pfid_af = af;
+
+	TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
+	aw->p.dyn = dyn;
+	pfi_kif_update(dyn->pfid_kif);
+
+	return (0);
+
+_bad:
+	if (dyn->pfid_kt != NULL)
+		pfr_detach_table(dyn->pfid_kt);
+	if (ruleset != NULL)
+		pf_remove_if_empty_ruleset(ruleset);
+	if (dyn->pfid_kif != NULL)
+		pfi_kif_unref(dyn->pfid_kif);
+	free(dyn, PFI_MTYPE);
+
+	return (rv);
+}
+
+static void
+pfi_kif_update(struct pfi_kif *kif)
+{
+	struct ifg_list		*ifgl;
+	struct pfi_dynaddr	*p;
+
+	PF_RULES_WASSERT();
+
+	/* update all dynaddr */
+	TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry)
+		pfi_dynaddr_update(p);
+
+	/* again for all groups kif is member of */
+	if (kif->pfik_ifp != NULL) {
+		IF_ADDR_RLOCK(kif->pfik_ifp);
+		TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next)
+			pfi_kif_update((struct pfi_kif *)
+			    ifgl->ifgl_group->ifg_pf_kif);
+		IF_ADDR_RUNLOCK(kif->pfik_ifp);
+	}
+}
+
+static void
+pfi_dynaddr_update(struct pfi_dynaddr *dyn)
+{
+	struct pfi_kif		*kif;
+	struct pfr_ktable	*kt;
+
+	PF_RULES_WASSERT();
+	KASSERT(dyn && dyn->pfid_kif && dyn->pfid_kt,
+	    ("%s: bad argument", __func__));
+
+	kif = dyn->pfid_kif;
+	kt = dyn->pfid_kt;
+
+	if (kt->pfrkt_larg != V_pfi_update) {
+		/* this table needs to be brought up-to-date */
+		pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags);
+		kt->pfrkt_larg = V_pfi_update;
+	}
+	pfr_dynaddr_update(kt, dyn);
+}
+
+static void
+pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags)
+{
+	int			 e, size2 = 0;
+	struct ifg_member	*ifgm;
+
+	V_pfi_buffer_cnt = 0;
+
+	if (kif->pfik_ifp != NULL)
+		pfi_instance_add(kif->pfik_ifp, net, flags);
+	else if (kif->pfik_group != NULL) {
+		IFNET_RLOCK();
+		TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next)
+			pfi_instance_add(ifgm->ifgm_ifp, net, flags);
+		IFNET_RUNLOCK();
+	}
+
+	if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2,
+	    NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK)))
+		printf("%s: cannot set %d new addresses into table %s: %d\n",
+		    __func__, V_pfi_buffer_cnt, kt->pfrkt_name, e);
+}
+
+static void
+pfi_instance_add(struct ifnet *ifp, int net, int flags)
+{
+	struct ifaddr	*ia;
+	int		 got4 = 0, got6 = 0;
+	int		 net2, af;
+
+	IF_ADDR_RLOCK(ifp);
+	TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_list) {
+		if (ia->ifa_addr == NULL)
+			continue;
+		af = ia->ifa_addr->sa_family;
+		if (af != AF_INET && af != AF_INET6)
+			continue;
+		/*
+		 * XXX: For point-to-point interfaces, (ifname:0) and IPv4,
+		 *      jump over addresses without a proper route to work
+		 *      around a problem with ppp not fully removing the
+		 *      address used during IPCP.
+		 */
+		if ((ifp->if_flags & IFF_POINTOPOINT) &&
+		    !(ia->ifa_flags & IFA_ROUTE) &&
+		    (flags & PFI_AFLAG_NOALIAS) && (af == AF_INET))
+			continue;
+		if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6)
+			continue;
+		if ((flags & PFI_AFLAG_BROADCAST) &&
+		    !(ifp->if_flags & IFF_BROADCAST))
+			continue;
+		if ((flags & PFI_AFLAG_PEER) &&
+		    !(ifp->if_flags & IFF_POINTOPOINT))
+			continue;
+		if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 &&
+		    IN6_IS_ADDR_LINKLOCAL(
+		    &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr))
+			continue;
+		if (flags & PFI_AFLAG_NOALIAS) {
+			if (af == AF_INET && got4)
+				continue;
+			if (af == AF_INET6 && got6)
+				continue;
+		}
+		if (af == AF_INET)
+			got4 = 1;
+		else if (af == AF_INET6)
+			got6 = 1;
+		net2 = net;
+		if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) {
+			if (af == AF_INET)
+				net2 = pfi_unmask(&((struct sockaddr_in *)
+				    ia->ifa_netmask)->sin_addr);
+			else if (af == AF_INET6)
+				net2 = pfi_unmask(&((struct sockaddr_in6 *)
+				    ia->ifa_netmask)->sin6_addr);
+		}
+		if (af == AF_INET && net2 > 32)
+			net2 = 32;
+		if (flags & PFI_AFLAG_BROADCAST)
+			pfi_address_add(ia->ifa_broadaddr, af, net2);
+		else if (flags & PFI_AFLAG_PEER)
+			pfi_address_add(ia->ifa_dstaddr, af, net2);
+		else
+			pfi_address_add(ia->ifa_addr, af, net2);
+	}
+	IF_ADDR_RUNLOCK(ifp);
+}
+
+static void
+pfi_address_add(struct sockaddr *sa, int af, int net)
+{
+	struct pfr_addr	*p;
+	int		 i;
+
+	if (V_pfi_buffer_cnt >= V_pfi_buffer_max) {
+		int		 new_max = V_pfi_buffer_max * 2;
+
+		if (new_max > PFI_BUFFER_MAX) {
+			printf("%s: address buffer full (%d/%d)\n", __func__,
+			    V_pfi_buffer_cnt, PFI_BUFFER_MAX);
+			return;
+		}
+		p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE,
+		    M_NOWAIT);
+		if (p == NULL) {
+			printf("%s: no memory to grow buffer (%d/%d)\n",
+			    __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX);
+			return;
+		}
+		memcpy(V_pfi_buffer, p, V_pfi_buffer_cnt * sizeof(*V_pfi_buffer));
+		/* no need to zero buffer */
+		free(V_pfi_buffer, PFI_MTYPE);
+		V_pfi_buffer = p;
+		V_pfi_buffer_max = new_max;
+	}
+	if (af == AF_INET && net > 32)
+		net = 128;
+	p = V_pfi_buffer + V_pfi_buffer_cnt++;
+	bzero(p, sizeof(*p));
+	p->pfra_af = af;
+	p->pfra_net = net;
+	if (af == AF_INET)
+		p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr;
+	else if (af == AF_INET6) {
+		p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr;
+		if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr))
+			p->pfra_ip6addr.s6_addr16[1] = 0;
+	}
+	/* mask network address bits */
+	if (net < 128)
+		((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8));
+	for (i = (p->pfra_net+7)/8; i < sizeof(p->pfra_u); i++)
+		((caddr_t)p)[i] = 0;
+}
+
+void
+pfi_dynaddr_remove(struct pfi_dynaddr *dyn)
+{
+
+	KASSERT(dyn->pfid_kif != NULL, ("%s: null pfid_kif", __func__));
+	KASSERT(dyn->pfid_kt != NULL, ("%s: null pfid_kt", __func__));
+
+	TAILQ_REMOVE(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
+	pfi_kif_unref(dyn->pfid_kif);
+	pfr_detach_table(dyn->pfid_kt);
+	free(dyn, PFI_MTYPE);
+}
+
+void
+pfi_dynaddr_copyout(struct pf_addr_wrap *aw)
+{
+
+	KASSERT(aw->type == PF_ADDR_DYNIFTL,
+	    ("%s: type %u", __func__, aw->type));
+
+	if (aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL)
+		return;
+	aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6;
+}
+
+static int
+pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q)
+{
+	return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ));
+}
+
+void
+pfi_update_status(const char *name, struct pf_status *pfs)
+{
+	struct pfi_kif		*p;
+	struct pfi_kif_cmp	 key;
+	struct ifg_member	 p_member, *ifgm;
+	TAILQ_HEAD(, ifg_member) ifg_members;
+	int			 i, j, k;
+
+	strlcpy(key.pfik_name, name, sizeof(key.pfik_name));
+	p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key);
+	if (p == NULL)
+		return;
+
+	if (p->pfik_group != NULL) {
+		bcopy(&p->pfik_group->ifg_members, &ifg_members,
+		    sizeof(ifg_members));
+	} else {
+		/* build a temporary list for p only */
+		bzero(&p_member, sizeof(p_member));
+		p_member.ifgm_ifp = p->pfik_ifp;
+		TAILQ_INIT(&ifg_members);
+		TAILQ_INSERT_TAIL(&ifg_members, &p_member, ifgm_next);
+	}
+	if (pfs) {
+		bzero(pfs->pcounters, sizeof(pfs->pcounters));
+		bzero(pfs->bcounters, sizeof(pfs->bcounters));
+	}
+	TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) {
+		if (ifgm->ifgm_ifp == NULL)
+			continue;
+		p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif;
+
+		/* just clear statistics */
+		if (pfs == NULL) {
+			bzero(p->pfik_packets, sizeof(p->pfik_packets));
+			bzero(p->pfik_bytes, sizeof(p->pfik_bytes));
+			p->pfik_tzero = time_second;
+			continue;
+		}
+		for (i = 0; i < 2; i++)
+			for (j = 0; j < 2; j++)
+				for (k = 0; k < 2; k++) {
+					pfs->pcounters[i][j][k] +=
+						p->pfik_packets[i][j][k];
+					pfs->bcounters[i][j] +=
+						p->pfik_bytes[i][j][k];
+				}
+	}
+}
+
+void
+pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size)
+{
+	struct pfi_kif	*p, *nextp;
+	int		 n = 0;
+
+	for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) {
+		nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
+		if (pfi_skip_if(name, p))
+			continue;
+		if (*size <= n++)
+			break;
+		if (!p->pfik_tzero)
+			p->pfik_tzero = time_second;
+		bcopy(p, buf++, sizeof(*buf));
+		nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
+	}
+	*size = n;
+}
+
+static int
+pfi_skip_if(const char *filter, struct pfi_kif *p)
+{
+	int	n;
+
+	if (filter == NULL || !*filter)
+		return (0);
+	if (!strcmp(p->pfik_name, filter))
+		return (0);	/* exact match */
+	n = strlen(filter);
+	if (n < 1 || n >= IFNAMSIZ)
+		return (1);	/* sanity check */
+	if (filter[n-1] >= '0' && filter[n-1] <= '9')
+		return (1);	/* only do exact match in that case */
+	if (strncmp(p->pfik_name, filter, n))
+		return (1);	/* prefix doesn't match */
+	return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9');
+}
+
+int
+pfi_set_flags(const char *name, int flags)
+{
+	struct pfi_kif	*p;
+
+	RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
+		if (pfi_skip_if(name, p))
+			continue;
+		p->pfik_flags |= flags;
+	}
+	return (0);
+}
+
+int
+pfi_clear_flags(const char *name, int flags)
+{
+	struct pfi_kif	*p;
+
+	RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
+		if (pfi_skip_if(name, p))
+			continue;
+		p->pfik_flags &= ~flags;
+	}
+	return (0);
+}
+
+/* from pf_print_state.c */
+static int
+pfi_unmask(void *addr)
+{
+	struct pf_addr *m = addr;
+	int i = 31, j = 0, b = 0;
+	u_int32_t tmp;
+
+	while (j < 4 && m->addr32[j] == 0xffffffff) {
+		b += 32;
+		j++;
+	}
+	if (j < 4) {
+		tmp = ntohl(m->addr32[j]);
+		for (i = 31; tmp & (1 << i); --i)
+			b++;
+	}
+	return (b);
+}
+
+static void
+pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp)
+{
+
+	CURVNET_SET(ifp->if_vnet);
+	pfi_attach_ifnet(ifp);
+#ifdef ALTQ
+	PF_RULES_WLOCK();
+	pf_altq_ifnet_event(ifp, 0);
+	PF_RULES_WUNLOCK();
+#endif
+	CURVNET_RESTORE();
+}
+
+static void
+pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp)
+{
+	struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif;
+
+	CURVNET_SET(ifp->if_vnet);
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+	pfi_kif_update(kif);
+
+	kif->pfik_ifp = NULL;
+	ifp->if_pf_kif = NULL;
+#ifdef ALTQ
+	pf_altq_ifnet_event(ifp, 1);
+#endif
+	PF_RULES_WUNLOCK();
+	CURVNET_RESTORE();
+}
+
+static void
+pfi_attach_group_event(void *arg , struct ifg_group *ifg)
+{
+
+	CURVNET_SET((struct vnet *)arg);
+	pfi_attach_ifgroup(ifg);
+	CURVNET_RESTORE();
+}
+
+static void
+pfi_change_group_event(void *arg, char *gname)
+{
+	struct pfi_kif *kif;
+
+	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+
+	CURVNET_SET((struct vnet *)arg);
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+	kif = pfi_kif_attach(kif, gname);
+	pfi_kif_update(kif);
+	PF_RULES_WUNLOCK();
+	CURVNET_RESTORE();
+}
+
+static void
+pfi_detach_group_event(void *arg, struct ifg_group *ifg)
+{
+	struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif;
+
+	CURVNET_SET((struct vnet *)arg);
+	PF_RULES_WLOCK();
+	V_pfi_update++;
+
+	kif->pfik_group = NULL;
+	ifg->ifg_pf_kif = NULL;
+	PF_RULES_WUNLOCK();
+	CURVNET_RESTORE();
+}
+
+static void
+pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp)
+{
+
+	CURVNET_SET(ifp->if_vnet);
+	PF_RULES_WLOCK();
+	if (ifp && ifp->if_pf_kif) {
+		V_pfi_update++;
+		pfi_kif_update(ifp->if_pf_kif);
+	}
+	PF_RULES_WUNLOCK();
+	CURVNET_RESTORE();
+}
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
new file mode 100644
index 0000000..032f051
--- /dev/null
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -0,0 +1,3774 @@
+/*	$OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $ */
+
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002,2003 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_bpf.h"
+#include "opt_pf.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/endian.h>
+#include <sys/fcntl.h>
+#include <sys/filio.h>
+#include <sys/interrupt.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/md5.h>
+#include <sys/ucred.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/pfil.h>
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+#include <net/if_pflog.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif /* INET6 */
+
+#ifdef ALTQ
+#include <altq/altq.h>
+#endif
+
+static int		 pfattach(void);
+static struct pf_pool	*pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t,
+			    u_int8_t, u_int8_t, u_int8_t);
+
+static void		 pf_mv_pool(struct pf_palist *, struct pf_palist *);
+static void		 pf_empty_pool(struct pf_palist *);
+static int		 pfioctl(struct cdev *, u_long, caddr_t, int,
+			    struct thread *);
+#ifdef ALTQ
+static int		 pf_begin_altq(u_int32_t *);
+static int		 pf_rollback_altq(u_int32_t);
+static int		 pf_commit_altq(u_int32_t);
+static int		 pf_enable_altq(struct pf_altq *);
+static int		 pf_disable_altq(struct pf_altq *);
+static u_int32_t	 pf_qname2qid(char *);
+static void		 pf_qid_unref(u_int32_t);
+#endif /* ALTQ */
+static int		 pf_begin_rules(u_int32_t *, int, const char *);
+static int		 pf_rollback_rules(u_int32_t, int, char *);
+static int		 pf_setup_pfsync_matching(struct pf_ruleset *);
+static void		 pf_hash_rule(MD5_CTX *, struct pf_rule *);
+static void		 pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
+static int		 pf_commit_rules(u_int32_t, int, char *);
+static int		 pf_addr_setup(struct pf_ruleset *,
+			    struct pf_addr_wrap *, sa_family_t);
+static void		 pf_addr_copyout(struct pf_addr_wrap *);
+
+VNET_DEFINE(struct pf_rule,	pf_default_rule);
+
+#ifdef ALTQ
+static VNET_DEFINE(int,		pf_altq_running);
+#define	V_pf_altq_running	VNET(pf_altq_running)
+#endif
+
+#define	TAGID_MAX	 50000
+struct pf_tagname {
+	TAILQ_ENTRY(pf_tagname)	entries;
+	char			name[PF_TAG_NAME_SIZE];
+	uint16_t		tag;
+	int			ref;
+};
+
+TAILQ_HEAD(pf_tags, pf_tagname);
+#define	V_pf_tags		VNET(pf_tags)
+VNET_DEFINE(struct pf_tags, pf_tags);
+#define	V_pf_qids		VNET(pf_qids)
+VNET_DEFINE(struct pf_tags, pf_qids);
+static MALLOC_DEFINE(M_PFTAG, "pf_tag", "pf(4) tag names");
+static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
+static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
+
+#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
+#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
+#endif
+
+static u_int16_t	 tagname2tag(struct pf_tags *, char *);
+static u_int16_t	 pf_tagname2tag(char *);
+static void		 tag_unref(struct pf_tags *, u_int16_t);
+
+#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
+
+struct cdev *pf_dev;
+
+/*
+ * XXX - These are new and need to be checked when moveing to a new version
+ */
+static void		 pf_clear_states(void);
+static int		 pf_clear_tables(void);
+static void		 pf_clear_srcnodes(struct pf_src_node *);
+static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
+
+/*
+ * Wrapper functions for pfil(9) hooks
+ */
+#ifdef INET
+static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp,
+    int dir, struct inpcb *inp);
+static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp,
+    int dir, struct inpcb *inp);
+#endif
+#ifdef INET6
+static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp,
+    int dir, struct inpcb *inp);
+static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp,
+    int dir, struct inpcb *inp);
+#endif
+
+static int		hook_pf(void);
+static int		dehook_pf(void);
+static int		shutdown_pf(void);
+static int		pf_load(void);
+static int		pf_unload(void);
+
+static struct cdevsw pf_cdevsw = {
+	.d_ioctl =	pfioctl,
+	.d_name =	PF_NAME,
+	.d_version =	D_VERSION,
+};
+
+static volatile VNET_DEFINE(int, pf_pfil_hooked);
+#define V_pf_pfil_hooked	VNET(pf_pfil_hooked)
+VNET_DEFINE(int,		pf_end_threads);
+
+struct rwlock			pf_rules_lock;
+
+/* pfsync */
+pfsync_state_import_t 		*pfsync_state_import_ptr = NULL;
+pfsync_insert_state_t		*pfsync_insert_state_ptr = NULL;
+pfsync_update_state_t		*pfsync_update_state_ptr = NULL;
+pfsync_delete_state_t		*pfsync_delete_state_ptr = NULL;
+pfsync_clear_states_t		*pfsync_clear_states_ptr = NULL;
+pfsync_defer_t			*pfsync_defer_ptr = NULL;
+/* pflog */
+pflog_packet_t			*pflog_packet_ptr = NULL;
+
+static int
+pfattach(void)
+{
+	u_int32_t *my_timeout = V_pf_default_rule.timeout;
+	int error;
+
+	pf_initialize();
+	pfr_initialize();
+	pfi_initialize();
+	pf_normalize_init();
+
+	V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
+	V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
+
+	RB_INIT(&V_pf_anchors);
+	pf_init_ruleset(&pf_main_ruleset);
+
+	/* default rule should never be garbage collected */
+	V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next;
+	V_pf_default_rule.action = PF_PASS;
+	V_pf_default_rule.nr = -1;
+	V_pf_default_rule.rtableid = -1;
+
+	/* initialize default timeouts */
+	my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
+	my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
+	my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
+	my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
+	my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
+	my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
+	my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
+	my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
+	my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
+	my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
+	my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
+	my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
+	my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
+	my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
+	my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
+	my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
+	my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
+	my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
+	my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
+	my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
+
+	bzero(&V_pf_status, sizeof(V_pf_status));
+	V_pf_status.debug = PF_DEBUG_URGENT;
+
+	V_pf_pfil_hooked = 0;
+
+	/* XXX do our best to avoid a conflict */
+	V_pf_status.hostid = arc4random();
+
+	if ((error = kproc_create(pf_purge_thread, curvnet, NULL, 0, 0,
+	    "pf purge")) != 0)
+		/* XXXGL: leaked all above. */
+		return (error);
+	if ((error = swi_add(NULL, "pf send", pf_intr, curvnet, SWI_NET,
+	    INTR_MPSAFE, &V_pf_swi_cookie)) != 0)
+		/* XXXGL: leaked all above. */
+		return (error);
+
+	return (0);
+}
+
+static struct pf_pool *
+pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action,
+    u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
+    u_int8_t check_ticket)
+{
+	struct pf_ruleset	*ruleset;
+	struct pf_rule		*rule;
+	int			 rs_num;
+
+	ruleset = pf_find_ruleset(anchor);
+	if (ruleset == NULL)
+		return (NULL);
+	rs_num = pf_get_ruleset_number(rule_action);
+	if (rs_num >= PF_RULESET_MAX)
+		return (NULL);
+	if (active) {
+		if (check_ticket && ticket !=
+		    ruleset->rules[rs_num].active.ticket)
+			return (NULL);
+		if (r_last)
+			rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
+			    pf_rulequeue);
+		else
+			rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+	} else {
+		if (check_ticket && ticket !=
+		    ruleset->rules[rs_num].inactive.ticket)
+			return (NULL);
+		if (r_last)
+			rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
+			    pf_rulequeue);
+		else
+			rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr);
+	}
+	if (!r_last) {
+		while ((rule != NULL) && (rule->nr != rule_number))
+			rule = TAILQ_NEXT(rule, entries);
+	}
+	if (rule == NULL)
+		return (NULL);
+
+	return (&rule->rpool);
+}
+
+static void
+pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb)
+{
+	struct pf_pooladdr	*mv_pool_pa;
+
+	while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) {
+		TAILQ_REMOVE(poola, mv_pool_pa, entries);
+		TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries);
+	}
+}
+
+static void
+pf_empty_pool(struct pf_palist *poola)
+{
+	struct pf_pooladdr *pa;
+
+	while ((pa = TAILQ_FIRST(poola)) != NULL) {
+		switch (pa->addr.type) {
+		case PF_ADDR_DYNIFTL:
+			pfi_dynaddr_remove(pa->addr.p.dyn);
+			break;
+		case PF_ADDR_TABLE:
+			pfr_detach_table(pa->addr.p.tbl);
+			break;
+		}
+		if (pa->kif)
+			pfi_kif_unref(pa->kif);
+		TAILQ_REMOVE(poola, pa, entries);
+		free(pa, M_PFRULE);
+	}
+}
+
+static void
+pf_unlink_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
+{
+
+	PF_RULES_WASSERT();
+
+	TAILQ_REMOVE(rulequeue, rule, entries);
+
+	PF_UNLNKDRULES_LOCK();
+	rule->rule_flag |= PFRULE_REFS;
+	TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries);
+	PF_UNLNKDRULES_UNLOCK();
+}
+
+void
+pf_free_rule(struct pf_rule *rule)
+{
+
+	PF_RULES_WASSERT();
+
+	if (rule->tag)
+		tag_unref(&V_pf_tags, rule->tag);
+	if (rule->match_tag)
+		tag_unref(&V_pf_tags, rule->match_tag);
+#ifdef ALTQ
+	if (rule->pqid != rule->qid)
+		pf_qid_unref(rule->pqid);
+	pf_qid_unref(rule->qid);
+#endif
+	switch (rule->src.addr.type) {
+	case PF_ADDR_DYNIFTL:
+		pfi_dynaddr_remove(rule->src.addr.p.dyn);
+		break;
+	case PF_ADDR_TABLE:
+		pfr_detach_table(rule->src.addr.p.tbl);
+		break;
+	}
+	switch (rule->dst.addr.type) {
+	case PF_ADDR_DYNIFTL:
+		pfi_dynaddr_remove(rule->dst.addr.p.dyn);
+		break;
+	case PF_ADDR_TABLE:
+		pfr_detach_table(rule->dst.addr.p.tbl);
+		break;
+	}
+	if (rule->overload_tbl)
+		pfr_detach_table(rule->overload_tbl);
+	if (rule->kif)
+		pfi_kif_unref(rule->kif);
+	pf_anchor_remove(rule);
+	pf_empty_pool(&rule->rpool.list);
+	free(rule, M_PFRULE);
+}
+
+static u_int16_t
+tagname2tag(struct pf_tags *head, char *tagname)
+{
+	struct pf_tagname	*tag, *p = NULL;
+	u_int16_t		 new_tagid = 1;
+
+	PF_RULES_WASSERT();
+
+	TAILQ_FOREACH(tag, head, entries)
+		if (strcmp(tagname, tag->name) == 0) {
+			tag->ref++;
+			return (tag->tag);
+		}
+
+	/*
+	 * to avoid fragmentation, we do a linear search from the beginning
+	 * and take the first free slot we find. if there is none or the list
+	 * is empty, append a new entry at the end.
+	 */
+
+	/* new entry */
+	if (!TAILQ_EMPTY(head))
+		for (p = TAILQ_FIRST(head); p != NULL &&
+		    p->tag == new_tagid; p = TAILQ_NEXT(p, entries))
+			new_tagid = p->tag + 1;
+
+	if (new_tagid > TAGID_MAX)
+		return (0);
+
+	/* allocate and fill new struct pf_tagname */
+	tag = malloc(sizeof(*tag), M_PFTAG, M_NOWAIT|M_ZERO);
+	if (tag == NULL)
+		return (0);
+	strlcpy(tag->name, tagname, sizeof(tag->name));
+	tag->tag = new_tagid;
+	tag->ref++;
+
+	if (p != NULL)	/* insert new entry before p */
+		TAILQ_INSERT_BEFORE(p, tag, entries);
+	else	/* either list empty or no free slot in between */
+		TAILQ_INSERT_TAIL(head, tag, entries);
+
+	return (tag->tag);
+}
+
+static void
+tag_unref(struct pf_tags *head, u_int16_t tag)
+{
+	struct pf_tagname	*p, *next;
+
+	PF_RULES_WASSERT();
+
+	for (p = TAILQ_FIRST(head); p != NULL; p = next) {
+		next = TAILQ_NEXT(p, entries);
+		if (tag == p->tag) {
+			if (--p->ref == 0) {
+				TAILQ_REMOVE(head, p, entries);
+				free(p, M_PFTAG);
+			}
+			break;
+		}
+	}
+}
+
+static u_int16_t
+pf_tagname2tag(char *tagname)
+{
+	return (tagname2tag(&V_pf_tags, tagname));
+}
+
+#ifdef ALTQ
+static u_int32_t
+pf_qname2qid(char *qname)
+{
+	return ((u_int32_t)tagname2tag(&V_pf_qids, qname));
+}
+
+static void
+pf_qid_unref(u_int32_t qid)
+{
+	tag_unref(&V_pf_qids, (u_int16_t)qid);
+}
+
+static int
+pf_begin_altq(u_int32_t *ticket)
+{
+	struct pf_altq	*altq;
+	int		 error = 0;
+
+	PF_RULES_WASSERT();
+
+	/* Purge the old altq list */
+	while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+		TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
+		if (altq->qname[0] == 0 &&
+		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+			/* detach and destroy the discipline */
+			error = altq_remove(altq);
+		} else
+			pf_qid_unref(altq->qid);
+		free(altq, M_PFALTQ);
+	}
+	if (error)
+		return (error);
+	*ticket = ++V_ticket_altqs_inactive;
+	V_altqs_inactive_open = 1;
+	return (0);
+}
+
+static int
+pf_rollback_altq(u_int32_t ticket)
+{
+	struct pf_altq	*altq;
+	int		 error = 0;
+
+	PF_RULES_WASSERT();
+
+	if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
+		return (0);
+	/* Purge the old altq list */
+	while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+		TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
+		if (altq->qname[0] == 0 &&
+		   (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+			/* detach and destroy the discipline */
+			error = altq_remove(altq);
+		} else
+			pf_qid_unref(altq->qid);
+		free(altq, M_PFALTQ);
+	}
+	V_altqs_inactive_open = 0;
+	return (error);
+}
+
+static int
+pf_commit_altq(u_int32_t ticket)
+{
+	struct pf_altqqueue	*old_altqs;
+	struct pf_altq		*altq;
+	int			 err, error = 0;
+
+	PF_RULES_WASSERT();
+
+	if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
+		return (EBUSY);
+
+	/* swap altqs, keep the old. */
+	old_altqs = V_pf_altqs_active;
+	V_pf_altqs_active = V_pf_altqs_inactive;
+	V_pf_altqs_inactive = old_altqs;
+	V_ticket_altqs_active = V_ticket_altqs_inactive;
+
+	/* Attach new disciplines */
+	TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+	if (altq->qname[0] == 0 &&
+	   (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+			/* attach the discipline */
+			error = altq_pfattach(altq);
+			if (error == 0 && V_pf_altq_running)
+				error = pf_enable_altq(altq);
+			if (error != 0)
+				return (error);
+		}
+	}
+
+	/* Purge the old altq list */
+	while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+		TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
+		if (altq->qname[0] == 0 &&
+		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+			/* detach and destroy the discipline */
+			if (V_pf_altq_running)
+				error = pf_disable_altq(altq);
+			err = altq_pfdetach(altq);
+			if (err != 0 && error == 0)
+				error = err;
+			err = altq_remove(altq);
+			if (err != 0 && error == 0)
+				error = err;
+		} else
+			pf_qid_unref(altq->qid);
+		free(altq, M_PFALTQ);
+	}
+
+	V_altqs_inactive_open = 0;
+	return (error);
+}
+
+static int
+pf_enable_altq(struct pf_altq *altq)
+{
+	struct ifnet		*ifp;
+	struct tb_profile	 tb;
+	int			 error = 0;
+
+	if ((ifp = ifunit(altq->ifname)) == NULL)
+		return (EINVAL);
+
+	if (ifp->if_snd.altq_type != ALTQT_NONE)
+		error = altq_enable(&ifp->if_snd);
+
+	/* set tokenbucket regulator */
+	if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+		tb.rate = altq->ifbandwidth;
+		tb.depth = altq->tbrsize;
+		error = tbr_set(&ifp->if_snd, &tb);
+	}
+
+	return (error);
+}
+
+static int
+pf_disable_altq(struct pf_altq *altq)
+{
+	struct ifnet		*ifp;
+	struct tb_profile	 tb;
+	int			 error;
+
+	if ((ifp = ifunit(altq->ifname)) == NULL)
+		return (EINVAL);
+
+	/*
+	 * when the discipline is no longer referenced, it was overridden
+	 * by a new one.  if so, just return.
+	 */
+	if (altq->altq_disc != ifp->if_snd.altq_disc)
+		return (0);
+
+	error = altq_disable(&ifp->if_snd);
+
+	if (error == 0) {
+		/* clear tokenbucket regulator */
+		tb.rate = 0;
+		error = tbr_set(&ifp->if_snd, &tb);
+	}
+
+	return (error);
+}
+
+void
+pf_altq_ifnet_event(struct ifnet *ifp, int remove)
+{
+	struct ifnet	*ifp1;
+	struct pf_altq	*a1, *a2, *a3;
+	u_int32_t	 ticket;
+	int		 error = 0;
+
+	/* Interrupt userland queue modifications */
+	if (V_altqs_inactive_open)
+		pf_rollback_altq(V_ticket_altqs_inactive);
+
+	/* Start new altq ruleset */
+	if (pf_begin_altq(&ticket))
+		return;
+
+	/* Copy the current active set */
+	TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
+		a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
+		if (a2 == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		bcopy(a1, a2, sizeof(struct pf_altq));
+
+		if (a2->qname[0] != 0) {
+			if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
+				error = EBUSY;
+				free(a2, M_PFALTQ);
+				break;
+			}
+			a2->altq_disc = NULL;
+			TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) {
+				if (strncmp(a3->ifname, a2->ifname,
+				    IFNAMSIZ) == 0 && a3->qname[0] == 0) {
+					a2->altq_disc = a3->altq_disc;
+					break;
+				}
+			}
+		}
+		/* Deactivate the interface in question */
+		a2->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
+		if ((ifp1 = ifunit(a2->ifname)) == NULL ||
+		    (remove && ifp1 == ifp)) {
+			a2->local_flags |= PFALTQ_FLAG_IF_REMOVED;
+		} else {
+			error = altq_add(a2);
+
+			if (ticket != V_ticket_altqs_inactive)
+				error = EBUSY;
+
+			if (error) {
+				free(a2, M_PFALTQ);
+				break;
+			}
+		}
+
+		TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries);
+	}
+
+	if (error != 0)
+		pf_rollback_altq(ticket);
+	else
+		pf_commit_altq(ticket);
+}
+#endif /* ALTQ */
+
+static int
+pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
+{
+	struct pf_ruleset	*rs;
+	struct pf_rule		*rule;
+
+	PF_RULES_WASSERT();
+
+	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+		return (EINVAL);
+	rs = pf_find_or_create_ruleset(anchor);
+	if (rs == NULL)
+		return (EINVAL);
+	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
+		pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
+		rs->rules[rs_num].inactive.rcount--;
+	}
+	*ticket = ++rs->rules[rs_num].inactive.ticket;
+	rs->rules[rs_num].inactive.open = 1;
+	return (0);
+}
+
+static int
+pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
+{
+	struct pf_ruleset	*rs;
+	struct pf_rule		*rule;
+
+	PF_RULES_WASSERT();
+
+	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+		return (EINVAL);
+	rs = pf_find_ruleset(anchor);
+	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
+	    rs->rules[rs_num].inactive.ticket != ticket)
+		return (0);
+	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
+		pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
+		rs->rules[rs_num].inactive.rcount--;
+	}
+	rs->rules[rs_num].inactive.open = 0;
+	return (0);
+}
+
+#define PF_MD5_UPD(st, elm)						\
+		MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
+
+#define PF_MD5_UPD_STR(st, elm)						\
+		MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
+
+#define PF_MD5_UPD_HTONL(st, elm, stor) do {				\
+		(stor) = htonl((st)->elm);				\
+		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
+} while (0)
+
+#define PF_MD5_UPD_HTONS(st, elm, stor) do {				\
+		(stor) = htons((st)->elm);				\
+		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
+} while (0)
+
+static void
+pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
+{
+	PF_MD5_UPD(pfr, addr.type);
+	switch (pfr->addr.type) {
+		case PF_ADDR_DYNIFTL:
+			PF_MD5_UPD(pfr, addr.v.ifname);
+			PF_MD5_UPD(pfr, addr.iflags);
+			break;
+		case PF_ADDR_TABLE:
+			PF_MD5_UPD(pfr, addr.v.tblname);
+			break;
+		case PF_ADDR_ADDRMASK:
+			/* XXX ignore af? */
+			PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
+			PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
+			break;
+	}
+
+	PF_MD5_UPD(pfr, port[0]);
+	PF_MD5_UPD(pfr, port[1]);
+	PF_MD5_UPD(pfr, neg);
+	PF_MD5_UPD(pfr, port_op);
+}
+
+static void
+pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule)
+{
+	u_int16_t x;
+	u_int32_t y;
+
+	pf_hash_rule_addr(ctx, &rule->src);
+	pf_hash_rule_addr(ctx, &rule->dst);
+	PF_MD5_UPD_STR(rule, label);
+	PF_MD5_UPD_STR(rule, ifname);
+	PF_MD5_UPD_STR(rule, match_tagname);
+	PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
+	PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
+	PF_MD5_UPD_HTONL(rule, prob, y);
+	PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
+	PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
+	PF_MD5_UPD(rule, uid.op);
+	PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
+	PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
+	PF_MD5_UPD(rule, gid.op);
+	PF_MD5_UPD_HTONL(rule, rule_flag, y);
+	PF_MD5_UPD(rule, action);
+	PF_MD5_UPD(rule, direction);
+	PF_MD5_UPD(rule, af);
+	PF_MD5_UPD(rule, quick);
+	PF_MD5_UPD(rule, ifnot);
+	PF_MD5_UPD(rule, match_tag_not);
+	PF_MD5_UPD(rule, natpass);
+	PF_MD5_UPD(rule, keep_state);
+	PF_MD5_UPD(rule, proto);
+	PF_MD5_UPD(rule, type);
+	PF_MD5_UPD(rule, code);
+	PF_MD5_UPD(rule, flags);
+	PF_MD5_UPD(rule, flagset);
+	PF_MD5_UPD(rule, allow_opts);
+	PF_MD5_UPD(rule, rt);
+	PF_MD5_UPD(rule, tos);
+}
+
+static int
+pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
+{
+	struct pf_ruleset	*rs;
+	struct pf_rule		*rule, **old_array;
+	struct pf_rulequeue	*old_rules;
+	int			 error;
+	u_int32_t		 old_rcount;
+
+	PF_RULES_WASSERT();
+
+	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+		return (EINVAL);
+	rs = pf_find_ruleset(anchor);
+	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
+	    ticket != rs->rules[rs_num].inactive.ticket)
+		return (EBUSY);
+
+	/* Calculate checksum for the main ruleset */
+	if (rs == &pf_main_ruleset) {
+		error = pf_setup_pfsync_matching(rs);
+		if (error != 0)
+			return (error);
+	}
+
+	/* Swap rules, keep the old. */
+	old_rules = rs->rules[rs_num].active.ptr;
+	old_rcount = rs->rules[rs_num].active.rcount;
+	old_array = rs->rules[rs_num].active.ptr_array;
+
+	rs->rules[rs_num].active.ptr =
+	    rs->rules[rs_num].inactive.ptr;
+	rs->rules[rs_num].active.ptr_array =
+	    rs->rules[rs_num].inactive.ptr_array;
+	rs->rules[rs_num].active.rcount =
+	    rs->rules[rs_num].inactive.rcount;
+	rs->rules[rs_num].inactive.ptr = old_rules;
+	rs->rules[rs_num].inactive.ptr_array = old_array;
+	rs->rules[rs_num].inactive.rcount = old_rcount;
+
+	rs->rules[rs_num].active.ticket =
+	    rs->rules[rs_num].inactive.ticket;
+	pf_calc_skip_steps(rs->rules[rs_num].active.ptr);
+
+
+	/* Purge the old rule list. */
+	while ((rule = TAILQ_FIRST(old_rules)) != NULL)
+		pf_unlink_rule(old_rules, rule);
+	if (rs->rules[rs_num].inactive.ptr_array)
+		free(rs->rules[rs_num].inactive.ptr_array, M_TEMP);
+	rs->rules[rs_num].inactive.ptr_array = NULL;
+	rs->rules[rs_num].inactive.rcount = 0;
+	rs->rules[rs_num].inactive.open = 0;
+	pf_remove_if_empty_ruleset(rs);
+
+	return (0);
+}
+
+static int
+pf_setup_pfsync_matching(struct pf_ruleset *rs)
+{
+	MD5_CTX			 ctx;
+	struct pf_rule		*rule;
+	int			 rs_cnt;
+	u_int8_t		 digest[PF_MD5_DIGEST_LENGTH];
+
+	MD5Init(&ctx);
+	for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) {
+		/* XXX PF_RULESET_SCRUB as well? */
+		if (rs_cnt == PF_RULESET_SCRUB)
+			continue;
+
+		if (rs->rules[rs_cnt].inactive.ptr_array)
+			free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP);
+		rs->rules[rs_cnt].inactive.ptr_array = NULL;
+
+		if (rs->rules[rs_cnt].inactive.rcount) {
+			rs->rules[rs_cnt].inactive.ptr_array =
+			    malloc(sizeof(caddr_t) *
+			    rs->rules[rs_cnt].inactive.rcount,
+			    M_TEMP, M_NOWAIT);
+
+			if (!rs->rules[rs_cnt].inactive.ptr_array)
+				return (ENOMEM);
+		}
+
+		TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
+		    entries) {
+			pf_hash_rule(&ctx, rule);
+			(rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule;
+		}
+	}
+
+	MD5Final(digest, &ctx);
+	memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum));
+	return (0);
+}
+
+static int
+pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr,
+    sa_family_t af)
+{
+	int error = 0;
+
+	switch (addr->type) {
+	case PF_ADDR_TABLE:
+		addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname);
+		if (addr->p.tbl == NULL)
+			error = ENOMEM;
+		break;
+	case PF_ADDR_DYNIFTL:
+		error = pfi_dynaddr_setup(addr, af);
+		break;
+	}
+
+	return (error);
+}
+
+static void
+pf_addr_copyout(struct pf_addr_wrap *addr)
+{
+
+	switch (addr->type) {
+	case PF_ADDR_DYNIFTL:
+		pfi_dynaddr_copyout(addr);
+		break;
+	case PF_ADDR_TABLE:
+		pf_tbladdr_copyout(addr);
+		break;
+	}
+}
+
+static int
+pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
+{
+	int			 error = 0;
+
+	CURVNET_SET(TD_TO_VNET(td));
+
+	/* XXX keep in sync with switch() below */
+	if (securelevel_gt(td->td_ucred, 2))
+		switch (cmd) {
+		case DIOCGETRULES:
+		case DIOCGETRULE:
+		case DIOCGETADDRS:
+		case DIOCGETADDR:
+		case DIOCGETSTATE:
+		case DIOCSETSTATUSIF:
+		case DIOCGETSTATUS:
+		case DIOCCLRSTATUS:
+		case DIOCNATLOOK:
+		case DIOCSETDEBUG:
+		case DIOCGETSTATES:
+		case DIOCGETTIMEOUT:
+		case DIOCCLRRULECTRS:
+		case DIOCGETLIMIT:
+		case DIOCGETALTQS:
+		case DIOCGETALTQ:
+		case DIOCGETQSTATS:
+		case DIOCGETRULESETS:
+		case DIOCGETRULESET:
+		case DIOCRGETTABLES:
+		case DIOCRGETTSTATS:
+		case DIOCRCLRTSTATS:
+		case DIOCRCLRADDRS:
+		case DIOCRADDADDRS:
+		case DIOCRDELADDRS:
+		case DIOCRSETADDRS:
+		case DIOCRGETADDRS:
+		case DIOCRGETASTATS:
+		case DIOCRCLRASTATS:
+		case DIOCRTSTADDRS:
+		case DIOCOSFPGET:
+		case DIOCGETSRCNODES:
+		case DIOCCLRSRCNODES:
+		case DIOCIGETIFACES:
+		case DIOCGIFSPEED:
+		case DIOCSETIFFLAG:
+		case DIOCCLRIFFLAG:
+			break;
+		case DIOCRCLRTABLES:
+		case DIOCRADDTABLES:
+		case DIOCRDELTABLES:
+		case DIOCRSETTFLAGS:
+			if (((struct pfioc_table *)addr)->pfrio_flags &
+			    PFR_FLAG_DUMMY)
+				break; /* dummy operation ok */
+			return (EPERM);
+		default:
+			return (EPERM);
+		}
+
+	if (!(flags & FWRITE))
+		switch (cmd) {
+		case DIOCGETRULES:
+		case DIOCGETADDRS:
+		case DIOCGETADDR:
+		case DIOCGETSTATE:
+		case DIOCGETSTATUS:
+		case DIOCGETSTATES:
+		case DIOCGETTIMEOUT:
+		case DIOCGETLIMIT:
+		case DIOCGETALTQS:
+		case DIOCGETALTQ:
+		case DIOCGETQSTATS:
+		case DIOCGETRULESETS:
+		case DIOCGETRULESET:
+		case DIOCNATLOOK:
+		case DIOCRGETTABLES:
+		case DIOCRGETTSTATS:
+		case DIOCRGETADDRS:
+		case DIOCRGETASTATS:
+		case DIOCRTSTADDRS:
+		case DIOCOSFPGET:
+		case DIOCGETSRCNODES:
+		case DIOCIGETIFACES:
+		case DIOCGIFSPEED:
+			break;
+		case DIOCRCLRTABLES:
+		case DIOCRADDTABLES:
+		case DIOCRDELTABLES:
+		case DIOCRCLRTSTATS:
+		case DIOCRCLRADDRS:
+		case DIOCRADDADDRS:
+		case DIOCRDELADDRS:
+		case DIOCRSETADDRS:
+		case DIOCRSETTFLAGS:
+			if (((struct pfioc_table *)addr)->pfrio_flags &
+			    PFR_FLAG_DUMMY) {
+				flags |= FWRITE; /* need write lock for dummy */
+				break; /* dummy operation ok */
+			}
+			return (EACCES);
+		case DIOCGETRULE:
+			if (((struct pfioc_rule *)addr)->action ==
+			    PF_GET_CLR_CNTR)
+				return (EACCES);
+			break;
+		default:
+			return (EACCES);
+		}
+
+	switch (cmd) {
+	case DIOCSTART:
+		PF_RULES_WLOCK();
+		if (V_pf_status.running)
+			error = EEXIST;
+		else {
+			int cpu;
+
+			PF_RULES_WUNLOCK();
+			error = hook_pf();
+			if (error) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: pfil registration failed\n"));
+				break;
+			}
+			PF_RULES_WLOCK();
+			V_pf_status.running = 1;
+			V_pf_status.since = time_second;
+
+			CPU_FOREACH(cpu)
+				V_pf_stateid[cpu] = time_second;
+
+			DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
+		}
+		PF_RULES_WUNLOCK();
+		break;
+
+	case DIOCSTOP:
+		PF_RULES_WLOCK();
+		if (!V_pf_status.running)
+			error = ENOENT;
+		else {
+			V_pf_status.running = 0;
+			PF_RULES_WUNLOCK();
+			error = dehook_pf();
+			if (error) {
+				V_pf_status.running = 1;
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: pfil unregistration failed\n"));
+			}
+			PF_RULES_WLOCK();
+			V_pf_status.since = time_second;
+			DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
+		}
+		PF_RULES_WUNLOCK();
+		break;
+
+	case DIOCADDRULE: {
+		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*rule, *tail;
+		struct pf_pooladdr	*pa;
+		struct pfi_kif		*kif = NULL;
+		int			 rs_num;
+
+		if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
+			error = EINVAL;
+			break;
+		}
+#ifndef INET
+		if (pr->rule.af == AF_INET) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET */
+#ifndef INET6
+		if (pr->rule.af == AF_INET6) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET6 */
+
+		rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK);
+		bcopy(&pr->rule, rule, sizeof(struct pf_rule));
+		if (rule->ifname[0])
+			kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+		rule->cuid = td->td_ucred->cr_ruid;
+		rule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
+		TAILQ_INIT(&rule->rpool.list);
+
+#define	ERROUT(x)	{ error = (x); goto DIOCADDRULE_error; }
+
+		PF_RULES_WLOCK();
+		pr->anchor[sizeof(pr->anchor) - 1] = 0;
+		ruleset = pf_find_ruleset(pr->anchor);
+		if (ruleset == NULL)
+			ERROUT(EINVAL);
+		rs_num = pf_get_ruleset_number(pr->rule.action);
+		if (rs_num >= PF_RULESET_MAX)
+			ERROUT(EINVAL);
+		if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) {
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("ticket: %d != [%d]%d\n", pr->ticket, rs_num,
+			    ruleset->rules[rs_num].inactive.ticket));
+			ERROUT(EBUSY);
+		}
+		if (pr->pool_ticket != V_ticket_pabuf) {
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("pool_ticket: %d != %d\n", pr->pool_ticket,
+			    V_ticket_pabuf));
+			ERROUT(EBUSY);
+		}
+
+		tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
+		    pf_rulequeue);
+		if (tail)
+			rule->nr = tail->nr + 1;
+		else
+			rule->nr = 0;
+		if (rule->ifname[0]) {
+			rule->kif = pfi_kif_attach(kif, rule->ifname);
+			pfi_kif_ref(rule->kif);
+		} else
+			rule->kif = NULL;
+
+		if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs)
+			error = EBUSY;
+
+#ifdef ALTQ
+		/* set queue IDs */
+		if (rule->qname[0] != 0) {
+			if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
+				error = EBUSY;
+			else if (rule->pqname[0] != 0) {
+				if ((rule->pqid =
+				    pf_qname2qid(rule->pqname)) == 0)
+					error = EBUSY;
+			} else
+				rule->pqid = rule->qid;
+		}
+#endif
+		if (rule->tagname[0])
+			if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
+				error = EBUSY;
+		if (rule->match_tagname[0])
+			if ((rule->match_tag =
+			    pf_tagname2tag(rule->match_tagname)) == 0)
+				error = EBUSY;
+		if (rule->rt && !rule->direction)
+			error = EINVAL;
+		if (!rule->log)
+			rule->logif = 0;
+		if (rule->logif >= PFLOGIFS_MAX)
+			error = EINVAL;
+		if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
+			error = ENOMEM;
+		if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
+			error = ENOMEM;
+		if (pf_anchor_setup(rule, ruleset, pr->anchor_call))
+			error = EINVAL;
+		TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
+			if (pa->addr.type == PF_ADDR_TABLE) {
+				pa->addr.p.tbl = pfr_attach_table(ruleset,
+				    pa->addr.v.tblname);
+				if (pa->addr.p.tbl == NULL)
+					error = ENOMEM;
+			}
+
+		if (rule->overload_tblname[0]) {
+			if ((rule->overload_tbl = pfr_attach_table(ruleset,
+			    rule->overload_tblname)) == NULL)
+				error = EINVAL;
+			else
+				rule->overload_tbl->pfrkt_flags |=
+				    PFR_TFLAG_ACTIVE;
+		}
+
+		pf_mv_pool(&V_pf_pabuf, &rule->rpool.list);
+		if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
+		    (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
+		    (rule->rt > PF_FASTROUTE)) &&
+		    (TAILQ_FIRST(&rule->rpool.list) == NULL))
+			error = EINVAL;
+
+		if (error) {
+			pf_free_rule(rule);
+			PF_RULES_WUNLOCK();
+			break;
+		}
+
+		rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
+		rule->evaluations = rule->packets[0] = rule->packets[1] =
+		    rule->bytes[0] = rule->bytes[1] = 0;
+		TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
+		    rule, entries);
+		ruleset->rules[rs_num].inactive.rcount++;
+		PF_RULES_WUNLOCK();
+		break;
+
+#undef ERROUT
+DIOCADDRULE_error:
+		PF_RULES_WUNLOCK();
+		free(rule, M_PFRULE);
+		if (kif)
+			free(kif, PFI_MTYPE);
+		break;
+	}
+
+	case DIOCGETRULES: {
+		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*tail;
+		int			 rs_num;
+
+		PF_RULES_WLOCK();
+		pr->anchor[sizeof(pr->anchor) - 1] = 0;
+		ruleset = pf_find_ruleset(pr->anchor);
+		if (ruleset == NULL) {
+			PF_RULES_WUNLOCK();
+			error = EINVAL;
+			break;
+		}
+		rs_num = pf_get_ruleset_number(pr->rule.action);
+		if (rs_num >= PF_RULESET_MAX) {
+			PF_RULES_WUNLOCK();
+			error = EINVAL;
+			break;
+		}
+		tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
+		    pf_rulequeue);
+		if (tail)
+			pr->nr = tail->nr + 1;
+		else
+			pr->nr = 0;
+		pr->ticket = ruleset->rules[rs_num].active.ticket;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCGETRULE: {
+		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*rule;
+		int			 rs_num, i;
+
+		PF_RULES_WLOCK();
+		pr->anchor[sizeof(pr->anchor) - 1] = 0;
+		ruleset = pf_find_ruleset(pr->anchor);
+		if (ruleset == NULL) {
+			PF_RULES_WUNLOCK();
+			error = EINVAL;
+			break;
+		}
+		rs_num = pf_get_ruleset_number(pr->rule.action);
+		if (rs_num >= PF_RULESET_MAX) {
+			PF_RULES_WUNLOCK();
+			error = EINVAL;
+			break;
+		}
+		if (pr->ticket != ruleset->rules[rs_num].active.ticket) {
+			PF_RULES_WUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+		while ((rule != NULL) && (rule->nr != pr->nr))
+			rule = TAILQ_NEXT(rule, entries);
+		if (rule == NULL) {
+			PF_RULES_WUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		bcopy(rule, &pr->rule, sizeof(struct pf_rule));
+		if (pf_anchor_copyout(ruleset, rule, pr)) {
+			PF_RULES_WUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		pf_addr_copyout(&pr->rule.src.addr);
+		pf_addr_copyout(&pr->rule.dst.addr);
+		for (i = 0; i < PF_SKIP_COUNT; ++i)
+			if (rule->skip[i].ptr == NULL)
+				pr->rule.skip[i].nr = -1;
+			else
+				pr->rule.skip[i].nr =
+				    rule->skip[i].ptr->nr;
+
+		if (pr->action == PF_GET_CLR_CNTR) {
+			rule->evaluations = 0;
+			rule->packets[0] = rule->packets[1] = 0;
+			rule->bytes[0] = rule->bytes[1] = 0;
+			rule->states_tot = 0;
+		}
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCCHANGERULE: {
+		struct pfioc_rule	*pcr = (struct pfioc_rule *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_rule		*oldrule = NULL, *newrule = NULL;
+		struct pfi_kif		*kif = NULL;
+		struct pf_pooladdr	*pa;
+		u_int32_t		 nr = 0;
+		int			 rs_num;
+
+		if (pcr->action < PF_CHANGE_ADD_HEAD ||
+		    pcr->action > PF_CHANGE_GET_TICKET) {
+			error = EINVAL;
+			break;
+		}
+		if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
+			error = EINVAL;
+			break;
+		}
+
+		if (pcr->action != PF_CHANGE_REMOVE) {
+#ifndef INET
+			if (pcr->rule.af == AF_INET) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET */
+#ifndef INET6
+			if (pcr->rule.af == AF_INET6) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET6 */
+			newrule = malloc(sizeof(*newrule), M_PFRULE, M_WAITOK);
+			bcopy(&pcr->rule, newrule, sizeof(struct pf_rule));
+			newrule->cuid = td->td_ucred->cr_ruid;
+			newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
+			TAILQ_INIT(&newrule->rpool.list);
+			/* Initialize refcounting. */
+			newrule->states_cur = 0;
+			newrule->entries.tqe_prev = NULL;
+
+			if (newrule->ifname[0])
+				kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+		}
+
+#define	ERROUT(x)	{ error = (x); goto DIOCCHANGERULE_error; }
+
+		PF_RULES_WLOCK();
+		if (!(pcr->action == PF_CHANGE_REMOVE ||
+		    pcr->action == PF_CHANGE_GET_TICKET) &&
+		    pcr->pool_ticket != V_ticket_pabuf)
+			ERROUT(EBUSY);
+
+		ruleset = pf_find_ruleset(pcr->anchor);
+		if (ruleset == NULL)
+			ERROUT(EINVAL);
+
+		rs_num = pf_get_ruleset_number(pcr->rule.action);
+		if (rs_num >= PF_RULESET_MAX)
+			ERROUT(EINVAL);
+
+		if (pcr->action == PF_CHANGE_GET_TICKET) {
+			pcr->ticket = ++ruleset->rules[rs_num].active.ticket;
+			ERROUT(0);
+		} else if (pcr->ticket !=
+			    ruleset->rules[rs_num].active.ticket)
+				ERROUT(EINVAL);
+
+		if (pcr->action != PF_CHANGE_REMOVE) {
+			if (newrule->ifname[0]) {
+				newrule->kif = pfi_kif_attach(kif,
+				    newrule->ifname);
+				pfi_kif_ref(newrule->kif);
+			} else
+				newrule->kif = NULL;
+
+			if (newrule->rtableid > 0 &&
+			    newrule->rtableid >= rt_numfibs)
+				error = EBUSY;
+
+#ifdef ALTQ
+			/* set queue IDs */
+			if (newrule->qname[0] != 0) {
+				if ((newrule->qid =
+				    pf_qname2qid(newrule->qname)) == 0)
+					error = EBUSY;
+				else if (newrule->pqname[0] != 0) {
+					if ((newrule->pqid =
+					    pf_qname2qid(newrule->pqname)) == 0)
+						error = EBUSY;
+				} else
+					newrule->pqid = newrule->qid;
+			}
+#endif /* ALTQ */
+			if (newrule->tagname[0])
+				if ((newrule->tag =
+				    pf_tagname2tag(newrule->tagname)) == 0)
+					error = EBUSY;
+			if (newrule->match_tagname[0])
+				if ((newrule->match_tag = pf_tagname2tag(
+				    newrule->match_tagname)) == 0)
+					error = EBUSY;
+			if (newrule->rt && !newrule->direction)
+				error = EINVAL;
+			if (!newrule->log)
+				newrule->logif = 0;
+			if (newrule->logif >= PFLOGIFS_MAX)
+				error = EINVAL;
+			if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af))
+				error = ENOMEM;
+			if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af))
+				error = ENOMEM;
+			if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call))
+				error = EINVAL;
+			TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
+				if (pa->addr.type == PF_ADDR_TABLE) {
+					pa->addr.p.tbl =
+					    pfr_attach_table(ruleset,
+					    pa->addr.v.tblname);
+					if (pa->addr.p.tbl == NULL)
+						error = ENOMEM;
+				}
+
+			if (newrule->overload_tblname[0]) {
+				if ((newrule->overload_tbl = pfr_attach_table(
+				    ruleset, newrule->overload_tblname)) ==
+				    NULL)
+					error = EINVAL;
+				else
+					newrule->overload_tbl->pfrkt_flags |=
+					    PFR_TFLAG_ACTIVE;
+			}
+
+			pf_mv_pool(&V_pf_pabuf, &newrule->rpool.list);
+			if (((((newrule->action == PF_NAT) ||
+			    (newrule->action == PF_RDR) ||
+			    (newrule->action == PF_BINAT) ||
+			    (newrule->rt > PF_FASTROUTE)) &&
+			    !newrule->anchor)) &&
+			    (TAILQ_FIRST(&newrule->rpool.list) == NULL))
+				error = EINVAL;
+
+			if (error) {
+				pf_free_rule(newrule);
+				PF_RULES_WUNLOCK();
+				break;
+			}
+
+			newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list);
+			newrule->evaluations = 0;
+			newrule->packets[0] = newrule->packets[1] = 0;
+			newrule->bytes[0] = newrule->bytes[1] = 0;
+		}
+		pf_empty_pool(&V_pf_pabuf);
+
+		if (pcr->action == PF_CHANGE_ADD_HEAD)
+			oldrule = TAILQ_FIRST(
+			    ruleset->rules[rs_num].active.ptr);
+		else if (pcr->action == PF_CHANGE_ADD_TAIL)
+			oldrule = TAILQ_LAST(
+			    ruleset->rules[rs_num].active.ptr, pf_rulequeue);
+		else {
+			oldrule = TAILQ_FIRST(
+			    ruleset->rules[rs_num].active.ptr);
+			while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
+				oldrule = TAILQ_NEXT(oldrule, entries);
+			if (oldrule == NULL) {
+				if (newrule != NULL)
+					pf_free_rule(newrule);
+				PF_RULES_WUNLOCK();
+				error = EINVAL;
+				break;
+			}
+		}
+
+		if (pcr->action == PF_CHANGE_REMOVE) {
+			pf_unlink_rule(ruleset->rules[rs_num].active.ptr,
+			    oldrule);
+			ruleset->rules[rs_num].active.rcount--;
+		} else {
+			if (oldrule == NULL)
+				TAILQ_INSERT_TAIL(
+				    ruleset->rules[rs_num].active.ptr,
+				    newrule, entries);
+			else if (pcr->action == PF_CHANGE_ADD_HEAD ||
+			    pcr->action == PF_CHANGE_ADD_BEFORE)
+				TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
+			else
+				TAILQ_INSERT_AFTER(
+				    ruleset->rules[rs_num].active.ptr,
+				    oldrule, newrule, entries);
+			ruleset->rules[rs_num].active.rcount++;
+		}
+
+		nr = 0;
+		TAILQ_FOREACH(oldrule,
+		    ruleset->rules[rs_num].active.ptr, entries)
+			oldrule->nr = nr++;
+
+		ruleset->rules[rs_num].active.ticket++;
+
+		pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr);
+		pf_remove_if_empty_ruleset(ruleset);
+
+		PF_RULES_WUNLOCK();
+		break;
+
+#undef ERROUT
+DIOCCHANGERULE_error:
+		PF_RULES_WUNLOCK();
+		if (newrule != NULL)
+			free(newrule, M_PFRULE);
+		if (kif != NULL)
+			free(kif, PFI_MTYPE);
+		break;
+	}
+
+	case DIOCCLRSTATES: {
+		struct pf_state		*s;
+		struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
+		u_int			 i, killed = 0;
+
+		for (i = 0; i <= V_pf_hashmask; i++) {
+			struct pf_idhash *ih = &V_pf_idhash[i];
+
+relock_DIOCCLRSTATES:
+			PF_HASHROW_LOCK(ih);
+			LIST_FOREACH(s, &ih->states, entry)
+				if (!psk->psk_ifname[0] ||
+				    !strcmp(psk->psk_ifname,
+				    s->kif->pfik_name)) {
+					/*
+					 * Don't send out individual
+					 * delete messages.
+					 */
+					s->state_flags |= PFSTATE_NOSYNC;
+					pf_unlink_state(s, PF_ENTER_LOCKED);
+					killed++;
+					goto relock_DIOCCLRSTATES;
+				}
+			PF_HASHROW_UNLOCK(ih);
+		}
+		psk->psk_killed = killed;
+		if (pfsync_clear_states_ptr != NULL)
+			pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname);
+		break;
+	}
+
+	case DIOCKILLSTATES: {
+		struct pf_state		*s;
+		struct pf_state_key	*sk;
+		struct pf_addr		*srcaddr, *dstaddr;
+		u_int16_t		 srcport, dstport;
+		struct pfioc_state_kill	*psk = (struct pfioc_state_kill *)addr;
+		u_int			 i, killed = 0;
+
+		if (psk->psk_pfcmp.id) {
+			if (psk->psk_pfcmp.creatorid == 0)
+				psk->psk_pfcmp.creatorid = V_pf_status.hostid;
+			if ((s = pf_find_state_byid(psk->psk_pfcmp.id,
+			    psk->psk_pfcmp.creatorid))) {
+				pf_unlink_state(s, PF_ENTER_LOCKED);
+				psk->psk_killed = 1;
+			}
+			break;
+		}
+
+		for (i = 0; i <= V_pf_hashmask; i++) {
+			struct pf_idhash *ih = &V_pf_idhash[i];
+
+relock_DIOCKILLSTATES:
+			PF_HASHROW_LOCK(ih);
+			LIST_FOREACH(s, &ih->states, entry) {
+				sk = s->key[PF_SK_WIRE];
+				if (s->direction == PF_OUT) {
+					srcaddr = &sk->addr[1];
+					dstaddr = &sk->addr[0];
+					srcport = sk->port[0];
+					dstport = sk->port[0];
+				} else {
+					srcaddr = &sk->addr[0];
+					dstaddr = &sk->addr[1];
+					srcport = sk->port[0];
+					dstport = sk->port[0];
+				}
+
+				if ((!psk->psk_af || sk->af == psk->psk_af)
+				    && (!psk->psk_proto || psk->psk_proto ==
+				    sk->proto) &&
+				    PF_MATCHA(psk->psk_src.neg,
+				    &psk->psk_src.addr.v.a.addr,
+				    &psk->psk_src.addr.v.a.mask,
+				    srcaddr, sk->af) &&
+				    PF_MATCHA(psk->psk_dst.neg,
+				    &psk->psk_dst.addr.v.a.addr,
+				    &psk->psk_dst.addr.v.a.mask,
+				    dstaddr, sk->af) &&
+				    (psk->psk_src.port_op == 0 ||
+				    pf_match_port(psk->psk_src.port_op,
+				    psk->psk_src.port[0], psk->psk_src.port[1],
+				    srcport)) &&
+				    (psk->psk_dst.port_op == 0 ||
+				    pf_match_port(psk->psk_dst.port_op,
+				    psk->psk_dst.port[0], psk->psk_dst.port[1],
+				    dstport)) &&
+				    (!psk->psk_label[0] ||
+				    (s->rule.ptr->label[0] &&
+				    !strcmp(psk->psk_label,
+				    s->rule.ptr->label))) &&
+				    (!psk->psk_ifname[0] ||
+				    !strcmp(psk->psk_ifname,
+				    s->kif->pfik_name))) {
+					pf_unlink_state(s, PF_ENTER_LOCKED);
+					killed++;
+					goto relock_DIOCKILLSTATES;
+				}
+			}
+			PF_HASHROW_UNLOCK(ih);
+		}
+		psk->psk_killed = killed;
+		break;
+	}
+
+	case DIOCADDSTATE: {
+		struct pfioc_state	*ps = (struct pfioc_state *)addr;
+		struct pfsync_state	*sp = &ps->state;
+
+		if (sp->timeout >= PFTM_MAX &&
+		    sp->timeout != PFTM_UNTIL_PACKET) {
+			error = EINVAL;
+			break;
+		}
+		if (pfsync_state_import_ptr != NULL) {
+			PF_RULES_RLOCK();
+			error = pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL);
+			PF_RULES_RUNLOCK();
+		}
+			error = EOPNOTSUPP;
+		break;
+	}
+
+	case DIOCGETSTATE: {
+		struct pfioc_state	*ps = (struct pfioc_state *)addr;
+		struct pf_state		*s;
+
+		s = pf_find_state_byid(ps->state.id, ps->state.creatorid);
+		if (s == NULL) {
+			error = ENOENT;
+			break;
+		}
+
+		pfsync_state_export(&ps->state, s);
+		PF_STATE_UNLOCK(s);
+		break;
+	}
+
+	case DIOCGETSTATES: {
+		struct pfioc_states	*ps = (struct pfioc_states *)addr;
+		struct pf_state		*s;
+		struct pfsync_state	*pstore, *p;
+		int i, nr;
+
+		if (ps->ps_len == 0) {
+			nr = uma_zone_get_cur(V_pf_state_z);
+			ps->ps_len = sizeof(struct pfsync_state) * nr;
+			break;
+		}
+
+		p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK);
+		nr = 0;
+
+		for (i = 0; i <= V_pf_hashmask; i++) {
+			struct pf_idhash *ih = &V_pf_idhash[i];
+
+			PF_HASHROW_LOCK(ih);
+			LIST_FOREACH(s, &ih->states, entry) {
+
+				if (s->timeout == PFTM_UNLINKED)
+					continue;
+
+				if ((nr+1) * sizeof(*p) > ps->ps_len) {
+					PF_HASHROW_UNLOCK(ih);
+					goto DIOCGETSTATES_full;
+				}
+				pfsync_state_export(p, s);
+				p++;
+				nr++;
+			}
+			PF_HASHROW_UNLOCK(ih);
+		}
+DIOCGETSTATES_full:
+		error = copyout(pstore, ps->ps_states,
+		    sizeof(struct pfsync_state) * nr);
+		if (error) {
+			free(pstore, M_TEMP);
+			break;
+		}
+		ps->ps_len = sizeof(struct pfsync_state) * nr;
+		free(pstore, M_TEMP);
+
+		break;
+	}
+
+	case DIOCGETSTATUS: {
+		struct pf_status *s = (struct pf_status *)addr;
+		PF_RULES_RLOCK();
+		bcopy(&V_pf_status, s, sizeof(struct pf_status));
+		pfi_update_status(s->ifname, s);
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCSETSTATUSIF: {
+		struct pfioc_if	*pi = (struct pfioc_if *)addr;
+
+		if (pi->ifname[0] == 0) {
+			bzero(V_pf_status.ifname, IFNAMSIZ);
+			break;
+		}
+		PF_RULES_WLOCK();
+		strlcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCCLRSTATUS: {
+		PF_RULES_WLOCK();
+		bzero(V_pf_status.counters, sizeof(V_pf_status.counters));
+		bzero(V_pf_status.fcounters, sizeof(V_pf_status.fcounters));
+		bzero(V_pf_status.scounters, sizeof(V_pf_status.scounters));
+		V_pf_status.since = time_second;
+		if (*V_pf_status.ifname)
+			pfi_update_status(V_pf_status.ifname, NULL);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCNATLOOK: {
+		struct pfioc_natlook	*pnl = (struct pfioc_natlook *)addr;
+		struct pf_state_key	*sk;
+		struct pf_state		*state;
+		struct pf_state_key_cmp	 key;
+		int			 m = 0, direction = pnl->direction;
+		int			 sidx, didx;
+
+		/* NATLOOK src and dst are reversed, so reverse sidx/didx */
+		sidx = (direction == PF_IN) ? 1 : 0;
+		didx = (direction == PF_IN) ? 0 : 1;
+
+		if (!pnl->proto ||
+		    PF_AZERO(&pnl->saddr, pnl->af) ||
+		    PF_AZERO(&pnl->daddr, pnl->af) ||
+		    ((pnl->proto == IPPROTO_TCP ||
+		    pnl->proto == IPPROTO_UDP) &&
+		    (!pnl->dport || !pnl->sport)))
+			error = EINVAL;
+		else {
+			key.af = pnl->af;
+			key.proto = pnl->proto;
+			PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af);
+			key.port[sidx] = pnl->sport;
+			PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af);
+			key.port[didx] = pnl->dport;
+
+			state = pf_find_state_all(&key, direction, &m);
+
+			if (m > 1)
+				error = E2BIG;	/* more than one state */
+			else if (state != NULL) {
+				/* XXXGL: not locked read */
+				sk = state->key[sidx];
+				PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af);
+				pnl->rsport = sk->port[sidx];
+				PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af);
+				pnl->rdport = sk->port[didx];
+			} else
+				error = ENOENT;
+		}
+		break;
+	}
+
+	case DIOCSETTIMEOUT: {
+		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
+		int		 old;
+
+		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
+		    pt->seconds < 0) {
+			error = EINVAL;
+			break;
+		}
+		PF_RULES_WLOCK();
+		old = V_pf_default_rule.timeout[pt->timeout];
+		if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
+			pt->seconds = 1;
+		V_pf_default_rule.timeout[pt->timeout] = pt->seconds;
+		if (pt->timeout == PFTM_INTERVAL && pt->seconds < old)
+			wakeup(pf_purge_thread);
+		pt->seconds = old;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCGETTIMEOUT: {
+		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
+
+		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
+			error = EINVAL;
+			break;
+		}
+		PF_RULES_RLOCK();
+		pt->seconds = V_pf_default_rule.timeout[pt->timeout];
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCGETLIMIT: {
+		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
+
+		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
+			error = EINVAL;
+			break;
+		}
+		PF_RULES_RLOCK();
+		pl->limit = V_pf_limits[pl->index].limit;
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCSETLIMIT: {
+		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
+		int			 old_limit;
+
+		PF_RULES_WLOCK();
+		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
+		    V_pf_limits[pl->index].zone == NULL) {
+			PF_RULES_WUNLOCK();
+			error = EINVAL;
+			break;
+		}
+		uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit);
+		old_limit = V_pf_limits[pl->index].limit;
+		V_pf_limits[pl->index].limit = pl->limit;
+		pl->limit = old_limit;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCSETDEBUG: {
+		u_int32_t	*level = (u_int32_t *)addr;
+
+		PF_RULES_WLOCK();
+		V_pf_status.debug = *level;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCCLRRULECTRS: {
+		/* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
+		struct pf_ruleset	*ruleset = &pf_main_ruleset;
+		struct pf_rule		*rule;
+
+		PF_RULES_WLOCK();
+		TAILQ_FOREACH(rule,
+		    ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
+			rule->evaluations = 0;
+			rule->packets[0] = rule->packets[1] = 0;
+			rule->bytes[0] = rule->bytes[1] = 0;
+		}
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCGIFSPEED: {
+		struct pf_ifspeed	*psp = (struct pf_ifspeed *)addr;
+		struct pf_ifspeed	ps;
+		struct ifnet		*ifp;
+
+		if (psp->ifname[0] != 0) {
+			/* Can we completely trust user-land? */
+			strlcpy(ps.ifname, psp->ifname, IFNAMSIZ);
+			ifp = ifunit(ps.ifname);
+			if (ifp != NULL)
+				psp->baudrate = ifp->if_baudrate;
+			else
+				error = EINVAL;
+		} else
+			error = EINVAL;
+		break;
+	}
+
+#ifdef ALTQ
+	case DIOCSTARTALTQ: {
+		struct pf_altq		*altq;
+
+		PF_RULES_WLOCK();
+		/* enable all altq interfaces on active list */
+		TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+			if (altq->qname[0] == 0 && (altq->local_flags &
+			    PFALTQ_FLAG_IF_REMOVED) == 0) {
+				error = pf_enable_altq(altq);
+				if (error != 0)
+					break;
+			}
+		}
+		if (error == 0)
+			V_pf_altq_running = 1;
+		PF_RULES_WUNLOCK();
+		DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
+		break;
+	}
+
+	case DIOCSTOPALTQ: {
+		struct pf_altq		*altq;
+
+		PF_RULES_WLOCK();
+		/* disable all altq interfaces on active list */
+		TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+			if (altq->qname[0] == 0 && (altq->local_flags &
+			    PFALTQ_FLAG_IF_REMOVED) == 0) {
+				error = pf_disable_altq(altq);
+				if (error != 0)
+					break;
+			}
+		}
+		if (error == 0)
+			V_pf_altq_running = 0;
+		PF_RULES_WUNLOCK();
+		DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
+		break;
+	}
+
+	case DIOCADDALTQ: {
+		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
+		struct pf_altq		*altq, *a;
+		struct ifnet		*ifp;
+
+		altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK);
+		bcopy(&pa->altq, altq, sizeof(struct pf_altq));
+		altq->local_flags = 0;
+
+		PF_RULES_WLOCK();
+		if (pa->ticket != V_ticket_altqs_inactive) {
+			PF_RULES_WUNLOCK();
+			free(altq, M_PFALTQ);
+			error = EBUSY;
+			break;
+		}
+
+		/*
+		 * if this is for a queue, find the discipline and
+		 * copy the necessary fields
+		 */
+		if (altq->qname[0] != 0) {
+			if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
+				PF_RULES_WUNLOCK();
+				error = EBUSY;
+				free(altq, M_PFALTQ);
+				break;
+			}
+			altq->altq_disc = NULL;
+			TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) {
+				if (strncmp(a->ifname, altq->ifname,
+				    IFNAMSIZ) == 0 && a->qname[0] == 0) {
+					altq->altq_disc = a->altq_disc;
+					break;
+				}
+			}
+		}
+
+		if ((ifp = ifunit(altq->ifname)) == NULL)
+			altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
+		else
+			error = altq_add(altq);
+
+		if (error) {
+			PF_RULES_WUNLOCK();
+			free(altq, M_PFALTQ);
+			break;
+		}
+
+		TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
+		bcopy(altq, &pa->altq, sizeof(struct pf_altq));
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCGETALTQS: {
+		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
+		struct pf_altq		*altq;
+
+		PF_RULES_RLOCK();
+		pa->nr = 0;
+		TAILQ_FOREACH(altq, V_pf_altqs_active, entries)
+			pa->nr++;
+		pa->ticket = V_ticket_altqs_active;
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCGETALTQ: {
+		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
+		struct pf_altq		*altq;
+		u_int32_t		 nr;
+
+		PF_RULES_RLOCK();
+		if (pa->ticket != V_ticket_altqs_active) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		nr = 0;
+		altq = TAILQ_FIRST(V_pf_altqs_active);
+		while ((altq != NULL) && (nr < pa->nr)) {
+			altq = TAILQ_NEXT(altq, entries);
+			nr++;
+		}
+		if (altq == NULL) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		bcopy(altq, &pa->altq, sizeof(struct pf_altq));
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCCHANGEALTQ:
+		/* CHANGEALTQ not supported yet! */
+		error = ENODEV;
+		break;
+
+	case DIOCGETQSTATS: {
+		struct pfioc_qstats	*pq = (struct pfioc_qstats *)addr;
+		struct pf_altq		*altq;
+		u_int32_t		 nr;
+		int			 nbytes;
+
+		PF_RULES_RLOCK();
+		if (pq->ticket != V_ticket_altqs_active) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		nbytes = pq->nbytes;
+		nr = 0;
+		altq = TAILQ_FIRST(V_pf_altqs_active);
+		while ((altq != NULL) && (nr < pq->nr)) {
+			altq = TAILQ_NEXT(altq, entries);
+			nr++;
+		}
+		if (altq == NULL) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+
+		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
+			PF_RULES_RUNLOCK();
+			error = ENXIO;
+			break;
+		}
+		PF_RULES_RUNLOCK();
+		error = altq_getqstats(altq, pq->buf, &nbytes);
+		if (error == 0) {
+			pq->scheduler = altq->scheduler;
+			pq->nbytes = nbytes;
+		}
+		break;
+	}
+#endif /* ALTQ */
+
+	case DIOCBEGINADDRS: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+
+		PF_RULES_WLOCK();
+		pf_empty_pool(&V_pf_pabuf);
+		pp->ticket = ++V_ticket_pabuf;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCADDADDR: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+		struct pf_pooladdr	*pa;
+		struct pfi_kif		*kif = NULL;
+
+#ifndef INET
+		if (pp->af == AF_INET) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET */
+#ifndef INET6
+		if (pp->af == AF_INET6) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+#endif /* INET6 */
+		if (pp->addr.addr.type != PF_ADDR_ADDRMASK &&
+		    pp->addr.addr.type != PF_ADDR_DYNIFTL &&
+		    pp->addr.addr.type != PF_ADDR_TABLE) {
+			error = EINVAL;
+			break;
+		}
+		pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK);
+		bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr));
+		if (pa->ifname[0])
+			kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+		PF_RULES_WLOCK();
+		if (pp->ticket != V_ticket_pabuf) {
+			PF_RULES_WUNLOCK();
+			if (pa->ifname[0])
+				free(kif, PFI_MTYPE);
+			free(pa, M_PFRULE);
+			error = EBUSY;
+			break;
+		}
+		if (pa->ifname[0]) {
+			pa->kif = pfi_kif_attach(kif, pa->ifname);
+			pfi_kif_ref(pa->kif);
+		} else
+			pa->kif = NULL;
+		if (pa->addr.type == PF_ADDR_DYNIFTL && ((error =
+		    pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) {
+			if (pa->ifname[0])
+				pfi_kif_unref(pa->kif);
+			PF_RULES_WUNLOCK();
+			free(pa, M_PFRULE);
+			break;
+		}
+		TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCGETADDRS: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+		struct pf_pool		*pool;
+		struct pf_pooladdr	*pa;
+
+		PF_RULES_RLOCK();
+		pp->nr = 0;
+		pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
+		    pp->r_num, 0, 1, 0);
+		if (pool == NULL) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		TAILQ_FOREACH(pa, &pool->list, entries)
+			pp->nr++;
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCGETADDR: {
+		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
+		struct pf_pool		*pool;
+		struct pf_pooladdr	*pa;
+		u_int32_t		 nr = 0;
+
+		PF_RULES_RLOCK();
+		pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
+		    pp->r_num, 0, 1, 1);
+		if (pool == NULL) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		pa = TAILQ_FIRST(&pool->list);
+		while ((pa != NULL) && (nr < pp->nr)) {
+			pa = TAILQ_NEXT(pa, entries);
+			nr++;
+		}
+		if (pa == NULL) {
+			PF_RULES_RUNLOCK();
+			error = EBUSY;
+			break;
+		}
+		bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr));
+		pf_addr_copyout(&pp->addr.addr);
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCCHANGEADDR: {
+		struct pfioc_pooladdr	*pca = (struct pfioc_pooladdr *)addr;
+		struct pf_pool		*pool;
+		struct pf_pooladdr	*oldpa = NULL, *newpa = NULL;
+		struct pf_ruleset	*ruleset;
+		struct pfi_kif		*kif = NULL;
+
+		if (pca->action < PF_CHANGE_ADD_HEAD ||
+		    pca->action > PF_CHANGE_REMOVE) {
+			error = EINVAL;
+			break;
+		}
+		if (pca->addr.addr.type != PF_ADDR_ADDRMASK &&
+		    pca->addr.addr.type != PF_ADDR_DYNIFTL &&
+		    pca->addr.addr.type != PF_ADDR_TABLE) {
+			error = EINVAL;
+			break;
+		}
+
+		if (pca->action != PF_CHANGE_REMOVE) {
+#ifndef INET
+			if (pca->af == AF_INET) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET */
+#ifndef INET6
+			if (pca->af == AF_INET6) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+#endif /* INET6 */
+			newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK);
+			bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
+			if (newpa->ifname[0])
+				kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+		}
+
+#define	ERROUT(x)	{ error = (x); goto DIOCCHANGEADDR_error; }
+		PF_RULES_WLOCK();
+		ruleset = pf_find_ruleset(pca->anchor);
+		if (ruleset == NULL)
+			ERROUT(EBUSY);
+
+		pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action,
+		    pca->r_num, pca->r_last, 1, 1);
+		if (pool == NULL)
+			ERROUT(EBUSY);
+
+		if (pca->action != PF_CHANGE_REMOVE) {
+			if (newpa->ifname[0]) {
+				newpa->kif = pfi_kif_attach(kif, newpa->ifname);
+				pfi_kif_ref(newpa->kif);
+			} else
+				newpa->kif = NULL;
+
+			switch (newpa->addr.type) {
+			case PF_ADDR_DYNIFTL:
+				error = pfi_dynaddr_setup(&newpa->addr,
+				    pca->af);
+				break;
+			case PF_ADDR_TABLE:
+				newpa->addr.p.tbl = pfr_attach_table(ruleset,
+				    newpa->addr.v.tblname);
+				if (newpa->addr.p.tbl == NULL)
+					error = ENOMEM;
+				break;
+			}
+			if (error) {
+				if (newpa->kif)
+					pfi_kif_unref(newpa->kif);
+				PF_RULES_WUNLOCK();
+				free(newpa, M_PFRULE);
+				break;
+			}
+		}
+
+		if (pca->action == PF_CHANGE_ADD_HEAD)
+			oldpa = TAILQ_FIRST(&pool->list);
+		else if (pca->action == PF_CHANGE_ADD_TAIL)
+			oldpa = TAILQ_LAST(&pool->list, pf_palist);
+		else {
+			int	i = 0;
+
+			oldpa = TAILQ_FIRST(&pool->list);
+			while ((oldpa != NULL) && (i < pca->nr)) {
+				oldpa = TAILQ_NEXT(oldpa, entries);
+				i++;
+			}
+			if (oldpa == NULL) {
+				PF_RULES_WUNLOCK();
+				error = EINVAL;
+				break;
+			}
+		}
+
+		if (pca->action == PF_CHANGE_REMOVE) {
+			TAILQ_REMOVE(&pool->list, oldpa, entries);
+			switch (oldpa->addr.type) {
+			case PF_ADDR_DYNIFTL:
+				pfi_dynaddr_remove(oldpa->addr.p.dyn);
+				break;
+			case PF_ADDR_TABLE:
+				pfr_detach_table(oldpa->addr.p.tbl);
+				break;
+			}
+			if (oldpa->kif)
+				pfi_kif_unref(oldpa->kif);
+			free(oldpa, M_PFRULE);
+		} else {
+			if (oldpa == NULL)
+				TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
+			else if (pca->action == PF_CHANGE_ADD_HEAD ||
+			    pca->action == PF_CHANGE_ADD_BEFORE)
+				TAILQ_INSERT_BEFORE(oldpa, newpa, entries);
+			else
+				TAILQ_INSERT_AFTER(&pool->list, oldpa,
+				    newpa, entries);
+		}
+
+		pool->cur = TAILQ_FIRST(&pool->list);
+		PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr,
+		    pca->af);
+		PF_RULES_WUNLOCK();
+		break;
+
+#undef ERROUT
+DIOCCHANGEADDR_error:
+		PF_RULES_WUNLOCK();
+		if (newpa != NULL)
+			free(newpa, M_PFRULE);
+		if (kif != NULL)
+			free(kif, PFI_MTYPE);
+		break;
+	}
+
+	case DIOCGETRULESETS: {
+		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_anchor	*anchor;
+
+		PF_RULES_RLOCK();
+		pr->path[sizeof(pr->path) - 1] = 0;
+		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
+			PF_RULES_RUNLOCK();
+			error = ENOENT;
+			break;
+		}
+		pr->nr = 0;
+		if (ruleset->anchor == NULL) {
+			/* XXX kludge for pf_main_ruleset */
+			RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors)
+				if (anchor->parent == NULL)
+					pr->nr++;
+		} else {
+			RB_FOREACH(anchor, pf_anchor_node,
+			    &ruleset->anchor->children)
+				pr->nr++;
+		}
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCGETRULESET: {
+		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
+		struct pf_ruleset	*ruleset;
+		struct pf_anchor	*anchor;
+		u_int32_t		 nr = 0;
+
+		PF_RULES_RLOCK();
+		pr->path[sizeof(pr->path) - 1] = 0;
+		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
+			PF_RULES_RUNLOCK();
+			error = ENOENT;
+			break;
+		}
+		pr->name[0] = 0;
+		if (ruleset->anchor == NULL) {
+			/* XXX kludge for pf_main_ruleset */
+			RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors)
+				if (anchor->parent == NULL && nr++ == pr->nr) {
+					strlcpy(pr->name, anchor->name,
+					    sizeof(pr->name));
+					break;
+				}
+		} else {
+			RB_FOREACH(anchor, pf_anchor_node,
+			    &ruleset->anchor->children)
+				if (nr++ == pr->nr) {
+					strlcpy(pr->name, anchor->name,
+					    sizeof(pr->name));
+					break;
+				}
+		}
+		if (!pr->name[0])
+			error = EBUSY;
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCRCLRTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != 0) {
+			error = ENODEV;
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
+		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCRADDTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_table *pfrts;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_table);
+		pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfrts, totlen);
+		if (error) {
+			free(pfrts, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_add_tables(pfrts, io->pfrio_size,
+		    &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		free(pfrts, M_TEMP);
+		break;
+	}
+
+	case DIOCRDELTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_table *pfrts;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_table);
+		pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfrts, totlen);
+		if (error) {
+			free(pfrts, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_del_tables(pfrts, io->pfrio_size,
+		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		free(pfrts, M_TEMP);
+		break;
+	}
+
+	case DIOCRGETTABLES: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_table *pfrts;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_table);
+		pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+		PF_RULES_RLOCK();
+		error = pfr_get_tables(&io->pfrio_table, pfrts,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_RUNLOCK();
+		if (error == 0)
+			error = copyout(pfrts, io->pfrio_buffer, totlen);
+		free(pfrts, M_TEMP);
+		break;
+	}
+
+	case DIOCRGETTSTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_tstats *pfrtstats;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_tstats);
+		pfrtstats = malloc(totlen, M_TEMP, M_WAITOK);
+		PF_RULES_WLOCK();
+		error = pfr_get_tstats(&io->pfrio_table, pfrtstats,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		if (error == 0)
+			error = copyout(pfrtstats, io->pfrio_buffer, totlen);
+		free(pfrtstats, M_TEMP);
+		break;
+	}
+
+	case DIOCRCLRTSTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_table *pfrts;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_table);
+		pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfrts, totlen);
+		if (error) {
+			free(pfrts, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_clr_tstats(pfrts, io->pfrio_size,
+		    &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		free(pfrts, M_TEMP);
+		break;
+	}
+
+	case DIOCRSETTFLAGS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_table *pfrts;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_table)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_table);
+		pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfrts, totlen);
+		if (error) {
+			free(pfrts, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_set_tflags(pfrts, io->pfrio_size,
+		    io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
+		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		free(pfrts, M_TEMP);
+		break;
+	}
+
+	case DIOCRCLRADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+
+		if (io->pfrio_esize != 0) {
+			error = ENODEV;
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
+		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCRADDADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_add_addrs(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRDELADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_del_addrs(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRSETADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = (io->pfrio_size + io->pfrio_size2) *
+		    sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_set_addrs(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
+		    &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL, 0);
+		PF_RULES_WUNLOCK();
+		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRGETADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		PF_RULES_RLOCK();
+		error = pfr_get_addrs(&io->pfrio_table, pfras,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_RUNLOCK();
+		if (error == 0)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRGETASTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_astats *pfrastats;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_astats)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_astats);
+		pfrastats = malloc(totlen, M_TEMP, M_WAITOK);
+		PF_RULES_RLOCK();
+		error = pfr_get_astats(&io->pfrio_table, pfrastats,
+		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_RUNLOCK();
+		if (error == 0)
+			error = copyout(pfrastats, io->pfrio_buffer, totlen);
+		free(pfrastats, M_TEMP);
+		break;
+	}
+
+	case DIOCRCLRASTATS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_clr_astats(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRTSTADDRS: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_RLOCK();
+		error = pfr_tst_addrs(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
+		    PFR_FLAG_USERIOCTL);
+		PF_RULES_RUNLOCK();
+		if (error == 0)
+			error = copyout(pfras, io->pfrio_buffer, totlen);
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCRINADEFINE: {
+		struct pfioc_table *io = (struct pfioc_table *)addr;
+		struct pfr_addr *pfras;
+		size_t totlen;
+
+		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = io->pfrio_size * sizeof(struct pfr_addr);
+		pfras = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->pfrio_buffer, pfras, totlen);
+		if (error) {
+			free(pfras, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		error = pfr_ina_define(&io->pfrio_table, pfras,
+		    io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
+		    io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+		PF_RULES_WUNLOCK();
+		free(pfras, M_TEMP);
+		break;
+	}
+
+	case DIOCOSFPADD: {
+		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
+		PF_RULES_WLOCK();
+		error = pf_osfp_add(io);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCOSFPGET: {
+		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
+		PF_RULES_RLOCK();
+		error = pf_osfp_get(io);
+		PF_RULES_RUNLOCK();
+		break;
+	}
+
+	case DIOCXBEGIN: {
+		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
+		struct pfioc_trans_e	*ioes, *ioe;
+		size_t			 totlen;
+		int			 i;
+
+		if (io->esize != sizeof(*ioe)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = sizeof(struct pfioc_trans_e) * io->size;
+		ioes = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->array, ioes, totlen);
+		if (error) {
+			free(ioes, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if (ioe->anchor[0]) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if ((error = pf_begin_altq(&ioe->ticket))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail;
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+			    {
+				struct pfr_table table;
+
+				bzero(&table, sizeof(table));
+				strlcpy(table.pfrt_anchor, ioe->anchor,
+				    sizeof(table.pfrt_anchor));
+				if ((error = pfr_ina_begin(&table,
+				    &ioe->ticket, NULL, 0))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail;
+				}
+				break;
+			    }
+			default:
+				if ((error = pf_begin_rules(&ioe->ticket,
+				    ioe->rs_num, ioe->anchor))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail;
+				}
+				break;
+			}
+		}
+		PF_RULES_WUNLOCK();
+		error = copyout(ioes, io->array, totlen);
+		free(ioes, M_TEMP);
+		break;
+	}
+
+	case DIOCXROLLBACK: {
+		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
+		struct pfioc_trans_e	*ioe, *ioes;
+		size_t			 totlen;
+		int			 i;
+
+		if (io->esize != sizeof(*ioe)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = sizeof(struct pfioc_trans_e) * io->size;
+		ioes = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->array, ioes, totlen);
+		if (error) {
+			free(ioes, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if (ioe->anchor[0]) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if ((error = pf_rollback_altq(ioe->ticket))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+			    {
+				struct pfr_table table;
+
+				bzero(&table, sizeof(table));
+				strlcpy(table.pfrt_anchor, ioe->anchor,
+				    sizeof(table.pfrt_anchor));
+				if ((error = pfr_ina_rollback(&table,
+				    ioe->ticket, NULL, 0))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			    }
+			default:
+				if ((error = pf_rollback_rules(ioe->ticket,
+				    ioe->rs_num, ioe->anchor))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			}
+		}
+		PF_RULES_WUNLOCK();
+		free(ioes, M_TEMP);
+		break;
+	}
+
+	case DIOCXCOMMIT: {
+		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
+		struct pfioc_trans_e	*ioe, *ioes;
+		struct pf_ruleset	*rs;
+		size_t			 totlen;
+		int			 i;
+
+		if (io->esize != sizeof(*ioe)) {
+			error = ENODEV;
+			break;
+		}
+		totlen = sizeof(struct pfioc_trans_e) * io->size;
+		ioes = malloc(totlen, M_TEMP, M_WAITOK);
+		error = copyin(io->array, ioes, totlen);
+		if (error) {
+			free(ioes, M_TEMP);
+			break;
+		}
+		PF_RULES_WLOCK();
+		/* First makes sure everything will succeed. */
+		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if (ioe->anchor[0]) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if (!V_altqs_inactive_open || ioe->ticket !=
+				    V_ticket_altqs_inactive) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EBUSY;
+					goto fail;
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+				rs = pf_find_ruleset(ioe->anchor);
+				if (rs == NULL || !rs->topen || ioe->ticket !=
+				    rs->tticket) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EBUSY;
+					goto fail;
+				}
+				break;
+			default:
+				if (ioe->rs_num < 0 || ioe->rs_num >=
+				    PF_RULESET_MAX) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				rs = pf_find_ruleset(ioe->anchor);
+				if (rs == NULL ||
+				    !rs->rules[ioe->rs_num].inactive.open ||
+				    rs->rules[ioe->rs_num].inactive.ticket !=
+				    ioe->ticket) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EBUSY;
+					goto fail;
+				}
+				break;
+			}
+		}
+		/* Now do the commit - no errors should happen here. */
+		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+			switch (ioe->rs_num) {
+#ifdef ALTQ
+			case PF_RULESET_ALTQ:
+				if ((error = pf_commit_altq(ioe->ticket))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+#endif /* ALTQ */
+			case PF_RULESET_TABLE:
+			    {
+				struct pfr_table table;
+
+				bzero(&table, sizeof(table));
+				strlcpy(table.pfrt_anchor, ioe->anchor,
+				    sizeof(table.pfrt_anchor));
+				if ((error = pfr_ina_commit(&table,
+				    ioe->ticket, NULL, NULL, 0))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			    }
+			default:
+				if ((error = pf_commit_rules(ioe->ticket,
+				    ioe->rs_num, ioe->anchor))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
+			}
+		}
+		PF_RULES_WUNLOCK();
+		free(ioes, M_TEMP);
+		break;
+	}
+
+	case DIOCGETSRCNODES: {
+		struct pfioc_src_nodes	*psn = (struct pfioc_src_nodes *)addr;
+		struct pf_srchash	*sh;
+		struct pf_src_node	*n, *p, *pstore;
+		uint32_t		 i, nr = 0;
+
+		if (psn->psn_len == 0) {
+			for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
+			    i++, sh++) {
+				PF_HASHROW_LOCK(sh);
+				LIST_FOREACH(n, &sh->nodes, entry)
+					nr++;
+				PF_HASHROW_UNLOCK(sh);
+			}
+			psn->psn_len = sizeof(struct pf_src_node) * nr;
+			break;
+		}
+
+		p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK);
+		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
+		    i++, sh++) {
+		    PF_HASHROW_LOCK(sh);
+		    LIST_FOREACH(n, &sh->nodes, entry) {
+			int	secs = time_uptime, diff;
+
+			if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
+				break;
+
+			bcopy(n, p, sizeof(struct pf_src_node));
+			if (n->rule.ptr != NULL)
+				p->rule.nr = n->rule.ptr->nr;
+			p->creation = secs - p->creation;
+			if (p->expire > secs)
+				p->expire -= secs;
+			else
+				p->expire = 0;
+
+			/* Adjust the connection rate estimate. */
+			diff = secs - n->conn_rate.last;
+			if (diff >= n->conn_rate.seconds)
+				p->conn_rate.count = 0;
+			else
+				p->conn_rate.count -=
+				    n->conn_rate.count * diff /
+				    n->conn_rate.seconds;
+			p++;
+			nr++;
+		    }
+		    PF_HASHROW_UNLOCK(sh);
+		}
+		error = copyout(pstore, psn->psn_src_nodes,
+		    sizeof(struct pf_src_node) * nr);
+		if (error) {
+			free(pstore, M_TEMP);
+			break;
+		}
+		psn->psn_len = sizeof(struct pf_src_node) * nr;
+		free(pstore, M_TEMP);
+		break;
+	}
+
+	case DIOCCLRSRCNODES: {
+
+		pf_clear_srcnodes(NULL);
+		pf_purge_expired_src_nodes();
+		V_pf_status.src_nodes = 0;
+		break;
+	}
+
+	case DIOCKILLSRCNODES: {
+		struct pfioc_src_node_kill *psnk =
+		    (struct pfioc_src_node_kill *)addr;
+		struct pf_srchash	*sh;
+		struct pf_src_node	*sn;
+		u_int			i, killed = 0;
+
+		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
+		    i++, sh++) {
+		    /*
+		     * XXXGL: we don't ever acquire sources hash lock
+		     * but if we ever do, the below call to pf_clear_srcnodes()
+		     * would lead to a LOR.
+		     */
+		    PF_HASHROW_LOCK(sh);
+		    LIST_FOREACH(sn, &sh->nodes, entry)
+			if (PF_MATCHA(psnk->psnk_src.neg,
+				&psnk->psnk_src.addr.v.a.addr,
+				&psnk->psnk_src.addr.v.a.mask,
+				&sn->addr, sn->af) &&
+			    PF_MATCHA(psnk->psnk_dst.neg,
+				&psnk->psnk_dst.addr.v.a.addr,
+				&psnk->psnk_dst.addr.v.a.mask,
+				&sn->raddr, sn->af)) {
+				/* Handle state to src_node linkage */
+				if (sn->states != 0)
+					pf_clear_srcnodes(sn);
+				sn->expire = 1;
+				killed++;
+			}
+		    PF_HASHROW_UNLOCK(sh);
+		}
+
+		if (killed > 0)
+			pf_purge_expired_src_nodes();
+
+		psnk->psnk_killed = killed;
+		break;
+	}
+
+	case DIOCSETHOSTID: {
+		u_int32_t	*hostid = (u_int32_t *)addr;
+
+		PF_RULES_WLOCK();
+		if (*hostid == 0)
+			V_pf_status.hostid = arc4random();
+		else
+			V_pf_status.hostid = *hostid;
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCOSFPFLUSH:
+		PF_RULES_WLOCK();
+		pf_osfp_flush();
+		PF_RULES_WUNLOCK();
+		break;
+
+	case DIOCIGETIFACES: {
+		struct pfioc_iface *io = (struct pfioc_iface *)addr;
+		struct pfi_kif *ifstore;
+		size_t bufsiz;
+
+		if (io->pfiio_esize != sizeof(struct pfi_kif)) {
+			error = ENODEV;
+			break;
+		}
+
+		bufsiz = io->pfiio_size * sizeof(struct pfi_kif);
+		ifstore = malloc(bufsiz, M_TEMP, M_WAITOK);
+		PF_RULES_RLOCK();
+		pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size);
+		PF_RULES_RUNLOCK();
+		error = copyout(ifstore, io->pfiio_buffer, bufsiz);
+		free(ifstore, M_TEMP);
+		break;
+	}
+
+	case DIOCSETIFFLAG: {
+		struct pfioc_iface *io = (struct pfioc_iface *)addr;
+
+		PF_RULES_WLOCK();
+		error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	case DIOCCLRIFFLAG: {
+		struct pfioc_iface *io = (struct pfioc_iface *)addr;
+
+		PF_RULES_WLOCK();
+		error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
+		PF_RULES_WUNLOCK();
+		break;
+	}
+
+	default:
+		error = ENODEV;
+		break;
+	}
+fail:
+	CURVNET_RESTORE();
+
+	return (error);
+}
+
+void
+pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
+{
+	bzero(sp, sizeof(struct pfsync_state));
+
+	/* copy from state key */
+	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
+	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
+	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
+	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
+	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
+	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
+	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
+	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
+	sp->proto = st->key[PF_SK_WIRE]->proto;
+	sp->af = st->key[PF_SK_WIRE]->af;
+
+	/* copy from state */
+	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
+	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
+	sp->creation = htonl(time_uptime - st->creation);
+	sp->expire = pf_state_expires(st);
+	if (sp->expire <= time_uptime)
+		sp->expire = htonl(0);
+	else
+		sp->expire = htonl(sp->expire - time_uptime);
+
+	sp->direction = st->direction;
+	sp->log = st->log;
+	sp->timeout = st->timeout;
+	sp->state_flags = st->state_flags;
+	if (st->src_node)
+		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
+	if (st->nat_src_node)
+		sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
+
+	sp->id = st->id;
+	sp->creatorid = st->creatorid;
+	pf_state_peer_hton(&st->src, &sp->src);
+	pf_state_peer_hton(&st->dst, &sp->dst);
+
+	if (st->rule.ptr == NULL)
+		sp->rule = htonl(-1);
+	else
+		sp->rule = htonl(st->rule.ptr->nr);
+	if (st->anchor.ptr == NULL)
+		sp->anchor = htonl(-1);
+	else
+		sp->anchor = htonl(st->anchor.ptr->nr);
+	if (st->nat_rule.ptr == NULL)
+		sp->nat_rule = htonl(-1);
+	else
+		sp->nat_rule = htonl(st->nat_rule.ptr->nr);
+
+	pf_state_counter_hton(st->packets[0], sp->packets[0]);
+	pf_state_counter_hton(st->packets[1], sp->packets[1]);
+	pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
+	pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
+
+}
+
+static void
+pf_tbladdr_copyout(struct pf_addr_wrap *aw)
+{
+	struct pfr_ktable *kt;
+
+	KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type));
+
+	kt = aw->p.tbl;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	aw->p.tbl = NULL;
+	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
+		kt->pfrkt_cnt : -1;
+}
+
+/*
+ * XXX - Check for version missmatch!!!
+ */
+static void
+pf_clear_states(void)
+{
+	struct pf_state	*s;
+	u_int i;
+
+	for (i = 0; i <= V_pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+relock:
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+			s->timeout = PFTM_PURGE;
+			/* Don't send out individual delete messages. */
+			s->sync_state = PFSTATE_NOSYNC;
+			pf_unlink_state(s, PF_ENTER_LOCKED);
+			goto relock;
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+}
+
+static int
+pf_clear_tables(void)
+{
+	struct pfioc_table io;
+	int error;
+
+	bzero(&io, sizeof(io));
+
+	error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel,
+	    io.pfrio_flags);
+
+	return (error);
+}
+
+static void
+pf_clear_srcnodes(struct pf_src_node *n)
+{
+	struct pf_state *s;
+	int i;
+
+	for (i = 0; i <= V_pf_hashmask; i++) {
+		struct pf_idhash *ih = &V_pf_idhash[i];
+
+		PF_HASHROW_LOCK(ih);
+		LIST_FOREACH(s, &ih->states, entry) {
+			if (n == NULL || n == s->src_node)
+				s->src_node = NULL;
+			if (n == NULL || n == s->nat_src_node)
+				s->nat_src_node = NULL;
+		}
+		PF_HASHROW_UNLOCK(ih);
+	}
+
+	if (n == NULL) {
+		struct pf_srchash *sh;
+
+		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
+		    i++, sh++) {
+			PF_HASHROW_LOCK(sh);
+			LIST_FOREACH(n, &sh->nodes, entry) {
+				n->expire = 1;
+				n->states = 0;
+			}
+			PF_HASHROW_UNLOCK(sh);
+		}
+	} else {
+		/* XXX: hash slot should already be locked here. */
+		n->expire = 1;
+		n->states = 0;
+	}
+}
+/*
+ * XXX - Check for version missmatch!!!
+ */
+
+/*
+ * Duplicate pfctl -Fa operation to get rid of as much as we can.
+ */
+static int
+shutdown_pf(void)
+{
+	int error = 0;
+	u_int32_t t[5];
+	char nn = '\0';
+
+	V_pf_status.running = 0;
+	do {
+		if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n"));
+			break;
+		}
+		if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n"));
+			break;		/* XXX: rollback? */
+		}
+		if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n"));
+			break;		/* XXX: rollback? */
+		}
+		if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n"));
+			break;		/* XXX: rollback? */
+		}
+		if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
+		    != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n"));
+			break;		/* XXX: rollback? */
+		}
+
+		/* XXX: these should always succeed here */
+		pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn);
+		pf_commit_rules(t[1], PF_RULESET_FILTER, &nn);
+		pf_commit_rules(t[2], PF_RULESET_NAT, &nn);
+		pf_commit_rules(t[3], PF_RULESET_BINAT, &nn);
+		pf_commit_rules(t[4], PF_RULESET_RDR, &nn);
+
+		if ((error = pf_clear_tables()) != 0)
+			break;
+
+#ifdef ALTQ
+		if ((error = pf_begin_altq(&t[0])) != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
+			break;
+		}
+		pf_commit_altq(t[0]);
+#endif
+
+		pf_clear_states();
+
+		pf_clear_srcnodes(NULL);
+
+		/* status does not use malloced mem so no need to cleanup */
+		/* fingerprints and interfaces have thier own cleanup code */
+	} while(0);
+
+	return (error);
+}
+
+#ifdef INET
+static int
+pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	/*
+	 * XXX Wed Jul 9 22:03:16 2003 UTC
+	 * OpenBSD has changed its byte ordering convention on ip_len/ip_off
+	 * in network stack. OpenBSD's network stack have converted
+	 * ip_len/ip_off to host byte order frist as FreeBSD.
+	 * Now this is not true anymore , so we should convert back to network
+	 * byte order.
+	 */
+	struct ip *h = NULL;
+	int chk;
+
+	if ((*m)->m_pkthdr.len >= (int)sizeof(struct ip)) {
+		/* if m_pkthdr.len is less than ip header, pf will handle. */
+		h = mtod(*m, struct ip *);
+		HTONS(h->ip_len);
+		HTONS(h->ip_off);
+	}
+	CURVNET_SET(ifp->if_vnet);
+	chk = pf_test(PF_IN, ifp, m, inp);
+	CURVNET_RESTORE();
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+	if (*m != NULL) {
+		/* pf_test can change ip header location */
+		h = mtod(*m, struct ip *);
+		NTOHS(h->ip_len);
+		NTOHS(h->ip_off);
+	}
+	return chk;
+}
+
+static int
+pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	/*
+	 * XXX Wed Jul 9 22:03:16 2003 UTC
+	 * OpenBSD has changed its byte ordering convention on ip_len/ip_off
+	 * in network stack. OpenBSD's network stack have converted
+	 * ip_len/ip_off to host byte order frist as FreeBSD.
+	 * Now this is not true anymore , so we should convert back to network
+	 * byte order.
+	 */
+	struct ip *h = NULL;
+	int chk;
+
+	/* We need a proper CSUM befor we start (s. OpenBSD ip_output) */
+	if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+		in_delayed_cksum(*m);
+		(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	}
+	if ((*m)->m_pkthdr.len >= (int)sizeof(*h)) {
+		/* if m_pkthdr.len is less than ip header, pf will handle. */
+		h = mtod(*m, struct ip *);
+		HTONS(h->ip_len);
+		HTONS(h->ip_off);
+	}
+	CURVNET_SET(ifp->if_vnet);
+	chk = pf_test(PF_OUT, ifp, m, inp);
+	CURVNET_RESTORE();
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+	if (*m != NULL) {
+		/* pf_test can change ip header location */
+		h = mtod(*m, struct ip *);
+		NTOHS(h->ip_len);
+		NTOHS(h->ip_off);
+	}
+	return chk;
+}
+#endif
+
+#ifdef INET6
+static int
+pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+
+	/*
+	 * IPv6 is not affected by ip_len/ip_off byte order changes.
+	 */
+	int chk;
+
+	/*
+	 * In case of loopback traffic IPv6 uses the real interface in
+	 * order to support scoped addresses. In order to support stateful
+	 * filtering we have change this to lo0 as it is the case in IPv4.
+	 */
+	CURVNET_SET(ifp->if_vnet);
+	chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp);
+	CURVNET_RESTORE();
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+	return chk;
+}
+
+static int
+pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+    struct inpcb *inp)
+{
+	/*
+	 * IPv6 does not affected ip_len/ip_off byte order changes.
+	 */
+	int chk;
+
+	/* We need a proper CSUM before we start (s. OpenBSD ip_output) */
+	if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+#ifdef INET
+		/* XXX-BZ copy&paste error from r126261? */
+		in_delayed_cksum(*m);
+#endif
+		(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	}
+	CURVNET_SET(ifp->if_vnet);
+	chk = pf_test6(PF_OUT, ifp, m, inp);
+	CURVNET_RESTORE();
+	if (chk && *m) {
+		m_freem(*m);
+		*m = NULL;
+	}
+	return chk;
+}
+#endif /* INET6 */
+
+static int
+hook_pf(void)
+{
+#ifdef INET
+	struct pfil_head *pfh_inet;
+#endif
+#ifdef INET6
+	struct pfil_head *pfh_inet6;
+#endif
+
+	if (V_pf_pfil_hooked)
+		return (0);
+
+#ifdef INET
+	pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
+	if (pfh_inet == NULL)
+		return (ESRCH); /* XXX */
+	pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
+	pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
+#endif
+#ifdef INET6
+	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
+	if (pfh_inet6 == NULL) {
+#ifdef INET
+		pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
+		    pfh_inet);
+		pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
+		    pfh_inet);
+#endif
+		return (ESRCH); /* XXX */
+	}
+	pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
+	pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
+#endif
+
+	V_pf_pfil_hooked = 1;
+	return (0);
+}
+
+static int
+dehook_pf(void)
+{
+#ifdef INET
+	struct pfil_head *pfh_inet;
+#endif
+#ifdef INET6
+	struct pfil_head *pfh_inet6;
+#endif
+
+	if (V_pf_pfil_hooked == 0)
+		return (0);
+
+#ifdef INET
+	pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
+	if (pfh_inet == NULL)
+		return (ESRCH); /* XXX */
+	pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
+	    pfh_inet);
+	pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
+	    pfh_inet);
+#endif
+#ifdef INET6
+	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
+	if (pfh_inet6 == NULL)
+		return (ESRCH); /* XXX */
+	pfil_remove_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK,
+	    pfh_inet6);
+	pfil_remove_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK,
+	    pfh_inet6);
+#endif
+
+	V_pf_pfil_hooked = 0;
+	return (0);
+}
+
+static int
+pf_load(void)
+{
+	int error;
+
+	VNET_ITERATOR_DECL(vnet_iter);
+
+	VNET_LIST_RLOCK();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		V_pf_pfil_hooked = 0;
+		V_pf_end_threads = 0;
+		TAILQ_INIT(&V_pf_tags);
+		TAILQ_INIT(&V_pf_qids);
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK();
+
+	rw_init(&pf_rules_lock, "pf rulesets");
+
+	pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME);
+	if ((error = pfattach()) != 0)
+		return (error);
+
+	return (0);
+}
+
+static int
+pf_unload(void)
+{
+	int error = 0;
+
+	PF_RULES_WLOCK();
+	V_pf_status.running = 0;
+	PF_RULES_WUNLOCK();
+	swi_remove(V_pf_swi_cookie);
+	error = dehook_pf();
+	if (error) {
+		/*
+		 * Should not happen!
+		 * XXX Due to error code ESRCH, kldunload will show
+		 * a message like 'No such process'.
+		 */
+		printf("%s : pfil unregisteration fail\n", __FUNCTION__);
+		return error;
+	}
+	PF_RULES_WLOCK();
+	shutdown_pf();
+	V_pf_end_threads = 1;
+	while (V_pf_end_threads < 2) {
+		wakeup_one(pf_purge_thread);
+		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0);
+	}
+	pf_normalize_cleanup();
+	pfi_cleanup();
+	pfr_cleanup();
+	pf_osfp_flush();
+	pf_cleanup();
+	PF_RULES_WUNLOCK();
+	destroy_dev(pf_dev);
+	rw_destroy(&pf_rules_lock);
+
+	return (error);
+}
+
+static int
+pf_modevent(module_t mod, int type, void *data)
+{
+	int error = 0;
+
+	switch(type) {
+	case MOD_LOAD:
+		error = pf_load();
+		break;
+	case MOD_QUIESCE:
+		/*
+		 * Module should not be unloaded due to race conditions.
+		 */
+		error = EPERM;
+		break;
+	case MOD_UNLOAD:
+		error = pf_unload();
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+static moduledata_t pf_mod = {
+	"pf",
+	pf_modevent,
+	0
+};
+
+DECLARE_MODULE(pf, pf_mod, SI_SUB_PSEUDO, SI_ORDER_FIRST);
+MODULE_VERSION(pf, PF_MODVER);
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
new file mode 100644
index 0000000..5b47852
--- /dev/null
+++ b/sys/netpfil/pf/pf_lb.c
@@ -0,0 +1,663 @@
+/*	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $ */
+
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002 - 2008 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_pf.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/pfvar.h>
+#include <net/if_pflog.h>
+#include <net/pf_mtag.h>
+
+#define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
+
+static void		 pf_hash(struct pf_addr *, struct pf_addr *,
+			    struct pf_poolhashkey *, sa_family_t);
+static struct pf_rule	*pf_match_translation(struct pf_pdesc *, struct mbuf *,
+			    int, int, struct pfi_kif *,
+			    struct pf_addr *, u_int16_t, struct pf_addr *,
+			    u_int16_t, int);
+static int		 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
+			    struct pf_addr *, struct pf_addr *, u_int16_t,
+			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
+			    struct pf_src_node **);
+
+#define mix(a,b,c) \
+	do {					\
+		a -= b; a -= c; a ^= (c >> 13);	\
+		b -= c; b -= a; b ^= (a << 8);	\
+		c -= a; c -= b; c ^= (b >> 13);	\
+		a -= b; a -= c; a ^= (c >> 12);	\
+		b -= c; b -= a; b ^= (a << 16);	\
+		c -= a; c -= b; c ^= (b >> 5);	\
+		a -= b; a -= c; a ^= (c >> 3);	\
+		b -= c; b -= a; b ^= (a << 10);	\
+		c -= a; c -= b; c ^= (b >> 15);	\
+	} while (0)
+
+/*
+ * hash function based on bridge_hash in if_bridge.c
+ */
+static void
+pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
+    struct pf_poolhashkey *key, sa_family_t af)
+{
+	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		a += inaddr->addr32[0];
+		b += key->key32[1];
+		mix(a, b, c);
+		hash->addr32[0] = c + key->key32[2];
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		a += inaddr->addr32[0];
+		b += inaddr->addr32[2];
+		mix(a, b, c);
+		hash->addr32[0] = c;
+		a += inaddr->addr32[1];
+		b += inaddr->addr32[3];
+		c += key->key32[1];
+		mix(a, b, c);
+		hash->addr32[1] = c;
+		a += inaddr->addr32[2];
+		b += inaddr->addr32[1];
+		c += key->key32[2];
+		mix(a, b, c);
+		hash->addr32[2] = c;
+		a += inaddr->addr32[3];
+		b += inaddr->addr32[0];
+		c += key->key32[3];
+		mix(a, b, c);
+		hash->addr32[3] = c;
+		break;
+#endif /* INET6 */
+	}
+}
+
+static struct pf_rule *
+pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
+    int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
+    struct pf_addr *daddr, u_int16_t dport, int rs_num)
+{
+	struct pf_rule		*r, *rm = NULL;
+	struct pf_ruleset	*ruleset = NULL;
+	int			 tag = -1;
+	int			 rtableid = -1;
+	int			 asd = 0;
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
+	while (r && rm == NULL) {
+		struct pf_rule_addr	*src = NULL, *dst = NULL;
+		struct pf_addr_wrap	*xdst = NULL;
+
+		if (r->action == PF_BINAT && direction == PF_IN) {
+			src = &r->dst;
+			if (r->rpool.cur != NULL)
+				xdst = &r->rpool.cur->addr;
+		} else {
+			src = &r->src;
+			dst = &r->dst;
+		}
+
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != direction)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != pd->af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
+		    src->neg, kif, M_GETFIB(m)))
+			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
+			    PF_SKIP_DST_ADDR].ptr;
+		else if (src->port_op && !pf_match_port(src->port_op,
+		    src->port[0], src->port[1], sport))
+			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
+			    PF_SKIP_DST_PORT].ptr;
+		else if (dst != NULL &&
+		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
+		    M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
+		    0, NULL, M_GETFIB(m)))
+			r = TAILQ_NEXT(r, entries);
+		else if (dst != NULL && dst->port_op &&
+		    !pf_match_port(dst->port_op, dst->port[0],
+		    dst->port[1], dport))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		else if (r->match_tag && !pf_match_tag(m, r, &tag,
+		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
+			r = TAILQ_NEXT(r, entries);
+		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
+		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
+		    off, pd->hdr.tcp), r->os_fingerprint)))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			if (r->tag)
+				tag = r->tag;
+			if (r->rtableid >= 0)
+				rtableid = r->rtableid;
+			if (r->anchor == NULL) {
+				rm = r;
+			} else
+				pf_step_into_anchor(&asd, &ruleset, rs_num,
+				    &r, NULL, NULL);
+		}
+		if (r == NULL)
+			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
+			    NULL, NULL);
+	}
+
+	if (tag > 0 && pf_tag_packet(m, pd, tag))
+		return (NULL);
+	if (rtableid >= 0)
+		M_SETFIB(m, rtableid);
+
+	if (rm != NULL && (rm->action == PF_NONAT ||
+	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
+		return (NULL);
+	return (rm);
+}
+
+static int
+pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
+    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
+    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
+    struct pf_src_node **sn)
+{
+	struct pf_state_key_cmp	key;
+	struct pf_addr		init_addr;
+	u_int16_t		cut;
+
+	bzero(&init_addr, sizeof(init_addr));
+	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
+		return (1);
+
+	if (proto == IPPROTO_ICMP) {
+		low = 1;
+		high = 65535;
+	}
+
+	do {
+		key.af = af;
+		key.proto = proto;
+		PF_ACPY(&key.addr[1], daddr, key.af);
+		PF_ACPY(&key.addr[0], naddr, key.af);
+		key.port[1] = dport;
+
+		/*
+		 * port search; start random, step;
+		 * similar 2 portloop in in_pcbbind
+		 */
+		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
+		    proto == IPPROTO_ICMP)) {
+			key.port[0] = dport;
+			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
+				return (0);
+		} else if (low == 0 && high == 0) {
+			key.port[0] = *nport;
+			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
+				return (0);
+		} else if (low == high) {
+			key.port[0] = htons(low);
+			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
+				*nport = htons(low);
+				return (0);
+			}
+		} else {
+			u_int16_t tmp;
+
+			if (low > high) {
+				tmp = low;
+				low = high;
+				high = tmp;
+			}
+			/* low < high */
+			cut = htonl(arc4random()) % (1 + high - low) + low;
+			/* low <= cut <= high */
+			for (tmp = cut; tmp <= high; ++(tmp)) {
+				key.port[0] = htons(tmp);
+				if (pf_find_state_all(&key, PF_IN, NULL) ==
+				    NULL) {
+					*nport = htons(tmp);
+					return (0);
+				}
+			}
+			for (tmp = cut - 1; tmp >= low; --(tmp)) {
+				key.port[0] = htons(tmp);
+				if (pf_find_state_all(&key, PF_IN, NULL) ==
+				    NULL) {
+					*nport = htons(tmp);
+					return (0);
+				}
+			}
+		}
+
+		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
+		case PF_POOL_RANDOM:
+		case PF_POOL_ROUNDROBIN:
+			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
+				return (1);
+			break;
+		case PF_POOL_NONE:
+		case PF_POOL_SRCHASH:
+		case PF_POOL_BITMASK:
+		default:
+			return (1);
+		}
+	} while (! PF_AEQ(&init_addr, naddr, af) );
+	return (1);					/* none available */
+}
+
+int
+pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
+    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
+{
+	struct pf_pool		*rpool = &r->rpool;
+	struct pf_addr		*raddr = NULL, *rmask = NULL;
+
+	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
+	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
+		*sn = pf_find_src_node(saddr, r, af, 0);
+		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
+			PF_ACPY(naddr, &(*sn)->raddr, af);
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				printf("pf_map_addr: src tracking maps ");
+				pf_print_host(saddr, 0, af);
+				printf(" to ");
+				pf_print_host(naddr, 0, af);
+				printf("\n");
+			}
+			return (0);
+		}
+	}
+
+	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
+		return (1);
+	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+		switch (af) {
+#ifdef INET
+		case AF_INET:
+			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
+			    (rpool->opts & PF_POOL_TYPEMASK) !=
+			    PF_POOL_ROUNDROBIN)
+				return (1);
+			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
+			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
+			    (rpool->opts & PF_POOL_TYPEMASK) !=
+			    PF_POOL_ROUNDROBIN)
+				return (1);
+			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
+			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
+			break;
+#endif /* INET6 */
+		}
+	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
+			return (1); /* unsupported */
+	} else {
+		raddr = &rpool->cur->addr.v.a.addr;
+		rmask = &rpool->cur->addr.v.a.mask;
+	}
+
+	switch (rpool->opts & PF_POOL_TYPEMASK) {
+	case PF_POOL_NONE:
+		PF_ACPY(naddr, raddr, af);
+		break;
+	case PF_POOL_BITMASK:
+		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
+		break;
+	case PF_POOL_RANDOM:
+		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
+			switch (af) {
+#ifdef INET
+			case AF_INET:
+				rpool->counter.addr32[0] = htonl(arc4random());
+				break;
+#endif /* INET */
+#ifdef INET6
+			case AF_INET6:
+				if (rmask->addr32[3] != 0xffffffff)
+					rpool->counter.addr32[3] =
+					    htonl(arc4random());
+				else
+					break;
+				if (rmask->addr32[2] != 0xffffffff)
+					rpool->counter.addr32[2] =
+					    htonl(arc4random());
+				else
+					break;
+				if (rmask->addr32[1] != 0xffffffff)
+					rpool->counter.addr32[1] =
+					    htonl(arc4random());
+				else
+					break;
+				if (rmask->addr32[0] != 0xffffffff)
+					rpool->counter.addr32[0] =
+					    htonl(arc4random());
+				break;
+#endif /* INET6 */
+			}
+			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+			PF_ACPY(init_addr, naddr, af);
+
+		} else {
+			PF_AINC(&rpool->counter, af);
+			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+		}
+		break;
+	case PF_POOL_SRCHASH:
+	    {
+		unsigned char hash[16];
+
+		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
+		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
+		break;
+	    }
+	case PF_POOL_ROUNDROBIN:
+	    {
+		struct pf_pooladdr *acur = rpool->cur;
+
+		/*
+		 * XXXGL: in the round-robin case we need to store
+		 * the round-robin machine state in the rule, thus
+		 * forwarding thread needs to modify rule.
+		 *
+		 * This is done w/o locking, because performance is assumed
+		 * more important than round-robin precision.
+		 *
+		 * In the simpliest case we just update the "rpool->cur"
+		 * pointer. However, if pool contains tables or dynamic
+		 * addresses, then "tblidx" is also used to store machine
+		 * state. Since "tblidx" is int, concurrent access to it can't
+		 * lead to inconsistence, only to lost of precision.
+		 *
+		 * Things get worse, if table contains not hosts, but
+		 * prefixes. In this case counter also stores machine state,
+		 * and for IPv6 address, counter can't be updated atomically.
+		 * Probably, using round-robin on a table containing IPv6
+		 * prefixes (or even IPv4) would cause a panic.
+		 */
+
+		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
+			    &rpool->tblidx, &rpool->counter, af))
+				goto get_addr;
+		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+			    &rpool->tblidx, &rpool->counter, af))
+				goto get_addr;
+		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
+			goto get_addr;
+
+	try_next:
+		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
+			rpool->cur = TAILQ_FIRST(&rpool->list);
+		else
+			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
+		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+			rpool->tblidx = -1;
+			if (pfr_pool_get(rpool->cur->addr.p.tbl,
+			    &rpool->tblidx, &rpool->counter, af)) {
+				/* table contains no address of type 'af' */
+				if (rpool->cur != acur)
+					goto try_next;
+				return (1);
+			}
+		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+			rpool->tblidx = -1;
+			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+			    &rpool->tblidx, &rpool->counter, af)) {
+				/* table contains no address of type 'af' */
+				if (rpool->cur != acur)
+					goto try_next;
+				return (1);
+			}
+		} else {
+			raddr = &rpool->cur->addr.v.a.addr;
+			rmask = &rpool->cur->addr.v.a.mask;
+			PF_ACPY(&rpool->counter, raddr, af);
+		}
+
+	get_addr:
+		PF_ACPY(naddr, &rpool->counter, af);
+		if (init_addr != NULL && PF_AZERO(init_addr, af))
+			PF_ACPY(init_addr, naddr, af);
+		PF_AINC(&rpool->counter, af);
+		break;
+	    }
+	}
+	if (*sn != NULL)
+		PF_ACPY(&(*sn)->raddr, naddr, af);
+
+	if (V_pf_status.debug >= PF_DEBUG_MISC &&
+	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
+		printf("pf_map_addr: selected address ");
+		pf_print_host(naddr, 0, af);
+		printf("\n");
+	}
+
+	return (0);
+}
+
+struct pf_rule *
+pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
+    struct pfi_kif *kif, struct pf_src_node **sn,
+    struct pf_state_key **skp, struct pf_state_key **nkp,
+    struct pf_addr *saddr, struct pf_addr *daddr,
+    u_int16_t sport, u_int16_t dport)
+{
+	struct pf_rule	*r = NULL;
+	struct pf_addr	*naddr;
+	uint16_t	*nport;
+
+	PF_RULES_RASSERT();
+	KASSERT(*skp == NULL, ("*skp not NULL"));
+	KASSERT(*nkp == NULL, ("*nkp not NULL"));
+
+	if (direction == PF_OUT) {
+		r = pf_match_translation(pd, m, off, direction, kif, saddr,
+		    sport, daddr, dport, PF_RULESET_BINAT);
+		if (r == NULL)
+			r = pf_match_translation(pd, m, off, direction, kif,
+			    saddr, sport, daddr, dport, PF_RULESET_NAT);
+	} else {
+		r = pf_match_translation(pd, m, off, direction, kif, saddr,
+		    sport, daddr, dport, PF_RULESET_RDR);
+		if (r == NULL)
+			r = pf_match_translation(pd, m, off, direction, kif,
+			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
+	}
+
+	if (r == NULL)
+		return (NULL);
+
+	switch (r->action) {
+	case PF_NONAT:
+	case PF_NOBINAT:
+	case PF_NORDR:
+		return (NULL);
+	}
+
+	*skp = pf_state_key_setup(pd, saddr, daddr, sport, dport);
+	if (*skp == NULL)
+		return (NULL);
+	*nkp = pf_state_key_clone(*skp);
+	if (*nkp == NULL) {
+		uma_zfree(V_pf_state_key_z, skp);
+		*skp = NULL;
+		return (NULL);
+	}
+
+	/* XXX We only modify one side for now. */
+	naddr = &(*nkp)->addr[1];
+	nport = &(*nkp)->port[1];
+
+	switch (r->action) {
+	case PF_NAT:
+		if (pf_get_sport(pd->af, pd->proto, r, saddr, daddr, dport,
+		    naddr, nport, r->rpool.proxy_port[0],
+		    r->rpool.proxy_port[1], sn)) {
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
+			    r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
+			goto notrans;
+		}
+		break;
+	case PF_BINAT:
+		switch (direction) {
+		case PF_OUT:
+			if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
+				switch (pd->af) {
+#ifdef INET
+				case AF_INET:
+					if (r->rpool.cur->addr.p.dyn->
+					    pfid_acnt4 < 1)
+						goto notrans;
+					PF_POOLMASK(naddr,
+					    &r->rpool.cur->addr.p.dyn->
+					    pfid_addr4,
+					    &r->rpool.cur->addr.p.dyn->
+					    pfid_mask4, saddr, AF_INET);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					if (r->rpool.cur->addr.p.dyn->
+					    pfid_acnt6 < 1)
+						goto notrans;
+					PF_POOLMASK(naddr,
+					    &r->rpool.cur->addr.p.dyn->
+					    pfid_addr6,
+					    &r->rpool.cur->addr.p.dyn->
+					    pfid_mask6, saddr, AF_INET6);
+					break;
+#endif /* INET6 */
+				}
+			} else
+				PF_POOLMASK(naddr,
+				    &r->rpool.cur->addr.v.a.addr,
+				    &r->rpool.cur->addr.v.a.mask, saddr,
+				    pd->af);
+			break;
+		case PF_IN:
+			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
+				switch (pd->af) {
+#ifdef INET
+				case AF_INET:
+					if (r->src.addr.p.dyn-> pfid_acnt4 < 1)
+						goto notrans;
+					PF_POOLMASK(naddr,
+					    &r->src.addr.p.dyn->pfid_addr4,
+					    &r->src.addr.p.dyn->pfid_mask4,
+					    daddr, AF_INET);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					if (r->src.addr.p.dyn->pfid_acnt6 < 1)
+						goto notrans;
+					PF_POOLMASK(naddr,
+					    &r->src.addr.p.dyn->pfid_addr6,
+					    &r->src.addr.p.dyn->pfid_mask6,
+					    daddr, AF_INET6);
+					break;
+#endif /* INET6 */
+				}
+			} else
+				PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
+				    &r->src.addr.v.a.mask, daddr, pd->af);
+			break;
+		}
+		break;
+	case PF_RDR: {
+		if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
+			goto notrans;
+		if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
+			PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
+			    daddr, pd->af);
+
+		if (r->rpool.proxy_port[1]) {
+			uint32_t	tmp_nport;
+
+			tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
+			    (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
+			    1)) + r->rpool.proxy_port[0];
+
+			/* Wrap around if necessary. */
+			if (tmp_nport > 65535)
+				tmp_nport -= 65535;
+			*nport = htons((uint16_t)tmp_nport);
+		} else if (r->rpool.proxy_port[0])
+			*nport = htons(r->rpool.proxy_port[0]);
+		break;
+	}
+	default:
+		panic("%s: unknown action %u", __func__, r->action);
+	}
+
+	/* Return success only if translation really happened. */
+	if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp)))
+		return (r);
+
+notrans:
+	uma_zfree(V_pf_state_key_z, *nkp);
+	uma_zfree(V_pf_state_key_z, *skp);
+	*skp = *nkp = NULL;
+
+	return (NULL);
+}
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
new file mode 100644
index 0000000..9063fe8
--- /dev/null
+++ b/sys/netpfil/pf/pf_norm.c
@@ -0,0 +1,1999 @@
+/*	$OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $ */
+
+/*
+ * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_pf.h"
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/refcount.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+#include <net/pf_mtag.h>
+#include <net/if_pflog.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif /* INET6 */
+
+struct pf_frent {
+	LIST_ENTRY(pf_frent) fr_next;
+	union {
+		struct {
+			struct ip *_fr_ip;
+			struct mbuf *_fr_m;
+		} _frag;
+		struct {
+			uint16_t _fr_off;
+			uint16_t _fr_end;
+		} _cache;
+	} _u;
+};
+#define	fr_ip	_u._frag._fr_ip
+#define	fr_m	_u._frag._fr_m
+#define	fr_off	_u._cache._fr_off
+#define	fr_end	_u._cache._fr_end
+
+struct pf_fragment {
+	RB_ENTRY(pf_fragment) fr_entry;
+	TAILQ_ENTRY(pf_fragment) frag_next;
+	struct in_addr	fr_src;
+	struct in_addr	fr_dst;
+	u_int8_t	fr_p;		/* protocol of this fragment */
+	u_int8_t	fr_flags;	/* status flags */
+#define PFFRAG_SEENLAST	0x0001		/* Seen the last fragment for this */
+#define PFFRAG_NOBUFFER	0x0002		/* Non-buffering fragment cache */
+#define PFFRAG_DROP	0x0004		/* Drop all fragments */
+#define BUFFER_FRAGMENTS(fr)	(!((fr)->fr_flags & PFFRAG_NOBUFFER))
+	u_int16_t	fr_id;		/* fragment id for reassemble */
+	u_int16_t	fr_max;		/* fragment data max */
+	u_int32_t	fr_timeout;
+	LIST_HEAD(, pf_frent) fr_queue;
+};
+
+static struct mtx pf_frag_mtx;
+#define PF_FRAG_LOCK()		mtx_lock(&pf_frag_mtx)
+#define PF_FRAG_UNLOCK()	mtx_unlock(&pf_frag_mtx)
+#define PF_FRAG_ASSERT()	mtx_assert(&pf_frag_mtx, MA_OWNED)
+
+VNET_DEFINE(uma_zone_t, pf_state_scrub_z);	/* XXX: shared with pfsync */
+
+static VNET_DEFINE(uma_zone_t, pf_frent_z);
+#define	V_pf_frent_z	VNET(pf_frent_z)
+static VNET_DEFINE(uma_zone_t, pf_frag_z);
+#define	V_pf_frag_z	VNET(pf_frag_z)
+
+TAILQ_HEAD(pf_fragqueue, pf_fragment);
+TAILQ_HEAD(pf_cachequeue, pf_fragment);
+static VNET_DEFINE(struct pf_fragqueue,	pf_fragqueue);
+#define	V_pf_fragqueue			VNET(pf_fragqueue)
+static VNET_DEFINE(struct pf_cachequeue,	pf_cachequeue);
+#define	V_pf_cachequeue			VNET(pf_cachequeue)
+RB_HEAD(pf_frag_tree, pf_fragment);
+static VNET_DEFINE(struct pf_frag_tree,	pf_frag_tree);
+#define	V_pf_frag_tree			VNET(pf_frag_tree)
+static VNET_DEFINE(struct pf_frag_tree,	pf_cache_tree);
+#define	V_pf_cache_tree			VNET(pf_cache_tree)
+static int		 pf_frag_compare(struct pf_fragment *,
+			    struct pf_fragment *);
+static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
+static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
+
+/* Private prototypes */
+static void		 pf_free_fragment(struct pf_fragment *);
+static void		 pf_remove_fragment(struct pf_fragment *);
+static int		 pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
+			    struct tcphdr *, int, sa_family_t);
+#ifdef INET
+static void		 pf_ip2key(struct pf_fragment *, struct ip *);
+static void		 pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t,
+			    u_int8_t);
+static void		 pf_flush_fragments(void);
+static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *);
+static struct mbuf	*pf_reassemble(struct mbuf **, struct pf_fragment **,
+			    struct pf_frent *, int);
+static struct mbuf	*pf_fragcache(struct mbuf **, struct ip*,
+			    struct pf_fragment **, int, int, int *);
+#endif /* INET */
+#ifdef INET6
+static void		 pf_scrub_ip6(struct mbuf **, u_int8_t);
+#endif
+#define	DPFPRINTF(x) do {				\
+	if (V_pf_status.debug >= PF_DEBUG_MISC) {	\
+		printf("%s: ", __func__);		\
+		printf x ;				\
+	}						\
+} while(0)
+
+void
+pf_normalize_init(void)
+{
+
+	V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	V_pf_state_scrub_z = uma_zcreate("pf state scrubs",
+	    sizeof(struct pf_state_scrub),  NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+
+	V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z;
+	V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
+	uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT);
+
+	mtx_init(&pf_frag_mtx, "pf fragments", NULL, MTX_DEF);
+
+	TAILQ_INIT(&V_pf_fragqueue);
+	TAILQ_INIT(&V_pf_cachequeue);
+}
+
+void
+pf_normalize_cleanup(void)
+{
+
+	uma_zdestroy(V_pf_state_scrub_z);
+	uma_zdestroy(V_pf_frent_z);
+	uma_zdestroy(V_pf_frag_z);
+
+	mtx_destroy(&pf_frag_mtx);
+}
+
+static int
+pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
+{
+	int	diff;
+
+	if ((diff = a->fr_id - b->fr_id))
+		return (diff);
+	else if ((diff = a->fr_p - b->fr_p))
+		return (diff);
+	else if (a->fr_src.s_addr < b->fr_src.s_addr)
+		return (-1);
+	else if (a->fr_src.s_addr > b->fr_src.s_addr)
+		return (1);
+	else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
+		return (-1);
+	else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
+		return (1);
+	return (0);
+}
+
+void
+pf_purge_expired_fragments(void)
+{
+	struct pf_fragment	*frag;
+	u_int32_t		 expire = time_uptime -
+				    V_pf_default_rule.timeout[PFTM_FRAG];
+
+	PF_FRAG_LOCK();
+	while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
+		KASSERT((BUFFER_FRAGMENTS(frag)),
+		    ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
+		if (frag->fr_timeout > expire)
+			break;
+
+		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
+		pf_free_fragment(frag);
+	}
+
+	while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) {
+		KASSERT((!BUFFER_FRAGMENTS(frag)),
+		    ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
+		if (frag->fr_timeout > expire)
+			break;
+
+		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
+		pf_free_fragment(frag);
+		KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) ||
+		    TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag),
+		    ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
+		    __FUNCTION__));
+	}
+	PF_FRAG_UNLOCK();
+}
+
+#ifdef INET
+/*
+ * Try to flush old fragments to make space for new ones
+ */
+static void
+pf_flush_fragments(void)
+{
+	struct pf_fragment	*frag, *cache;
+	int			 goal;
+
+	PF_FRAG_ASSERT();
+
+	goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10;
+	DPFPRINTF(("trying to free %d frag entriess\n", goal));
+	while (goal < uma_zone_get_cur(V_pf_frent_z)) {
+		frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
+		if (frag)
+			pf_free_fragment(frag);
+		cache = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue);
+		if (cache)
+			pf_free_fragment(cache);
+		if (frag == NULL && cache == NULL)
+			break;
+	}
+}
+#endif /* INET */
+
+/* Frees the fragments and all associated entries */
+static void
+pf_free_fragment(struct pf_fragment *frag)
+{
+	struct pf_frent		*frent;
+
+	PF_FRAG_ASSERT();
+
+	/* Free all fragments */
+	if (BUFFER_FRAGMENTS(frag)) {
+		for (frent = LIST_FIRST(&frag->fr_queue); frent;
+		    frent = LIST_FIRST(&frag->fr_queue)) {
+			LIST_REMOVE(frent, fr_next);
+
+			m_freem(frent->fr_m);
+			uma_zfree(V_pf_frent_z, frent);
+		}
+	} else {
+		for (frent = LIST_FIRST(&frag->fr_queue); frent;
+		    frent = LIST_FIRST(&frag->fr_queue)) {
+			LIST_REMOVE(frent, fr_next);
+
+			KASSERT((LIST_EMPTY(&frag->fr_queue) ||
+			    LIST_FIRST(&frag->fr_queue)->fr_off >
+			    frent->fr_end),
+			    ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
+			    " frent->fr_end): %s", __func__));
+
+			uma_zfree(V_pf_frent_z, frent);
+		}
+	}
+
+	pf_remove_fragment(frag);
+}
+
+#ifdef INET
+static void
+pf_ip2key(struct pf_fragment *key, struct ip *ip)
+{
+	key->fr_p = ip->ip_p;
+	key->fr_id = ip->ip_id;
+	key->fr_src.s_addr = ip->ip_src.s_addr;
+	key->fr_dst.s_addr = ip->ip_dst.s_addr;
+}
+
+static struct pf_fragment *
+pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
+{
+	struct pf_fragment	 key;
+	struct pf_fragment	*frag;
+
+	PF_FRAG_ASSERT();
+
+	pf_ip2key(&key, ip);
+
+	frag = RB_FIND(pf_frag_tree, tree, &key);
+	if (frag != NULL) {
+		/* XXX Are we sure we want to update the timeout? */
+		frag->fr_timeout = time_uptime;
+		if (BUFFER_FRAGMENTS(frag)) {
+			TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
+			TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
+		} else {
+			TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
+			TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next);
+		}
+	}
+
+	return (frag);
+}
+#endif /* INET */
+
+/* Removes a fragment from the fragment queue and frees the fragment */
+
+static void
+pf_remove_fragment(struct pf_fragment *frag)
+{
+
+	PF_FRAG_ASSERT();
+
+	if (BUFFER_FRAGMENTS(frag)) {
+		RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
+		TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
+		uma_zfree(V_pf_frag_z, frag);
+	} else {
+		RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag);
+		TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
+		uma_zfree(V_pf_frag_z, frag);
+	}
+}
+
+#ifdef INET
+#define FR_IP_OFF(fr)	((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
+static struct mbuf *
+pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
+    struct pf_frent *frent, int mff)
+{
+	struct mbuf	*m = *m0, *m2;
+	struct pf_frent	*frea, *next;
+	struct pf_frent	*frep = NULL;
+	struct ip	*ip = frent->fr_ip;
+	int		 hlen = ip->ip_hl << 2;
+	u_int16_t	 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
+	u_int16_t	 ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
+	u_int16_t	 max = ip_len + off;
+
+	PF_FRAG_ASSERT();
+	KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
+	    ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
+
+	/* Strip off ip header */
+	m->m_data += hlen;
+	m->m_len -= hlen;
+
+	/* Create a new reassembly queue for this packet */
+	if (*frag == NULL) {
+		*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+		if (*frag == NULL) {
+			pf_flush_fragments();
+			*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+			if (*frag == NULL)
+				goto drop_fragment;
+		}
+
+		(*frag)->fr_flags = 0;
+		(*frag)->fr_max = 0;
+		(*frag)->fr_src = frent->fr_ip->ip_src;
+		(*frag)->fr_dst = frent->fr_ip->ip_dst;
+		(*frag)->fr_p = frent->fr_ip->ip_p;
+		(*frag)->fr_id = frent->fr_ip->ip_id;
+		(*frag)->fr_timeout = time_uptime;
+		LIST_INIT(&(*frag)->fr_queue);
+
+		RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag);
+		TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next);
+
+		/* We do not have a previous fragment */
+		frep = NULL;
+		goto insert;
+	}
+
+	/*
+	 * Find a fragment after the current one:
+	 *  - off contains the real shifted offset.
+	 */
+	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
+		if (FR_IP_OFF(frea) > off)
+			break;
+		frep = frea;
+	}
+
+	KASSERT((frep != NULL || frea != NULL),
+	    ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));;
+
+	if (frep != NULL &&
+	    FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
+	    4 > off)
+	{
+		u_int16_t	precut;
+
+		precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
+		    frep->fr_ip->ip_hl * 4 - off;
+		if (precut >= ip_len)
+			goto drop_fragment;
+		m_adj(frent->fr_m, precut);
+		DPFPRINTF(("overlap -%d\n", precut));
+		/* Enforce 8 byte boundaries */
+		ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
+		off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
+		ip_len -= precut;
+		ip->ip_len = htons(ip_len);
+	}
+
+	for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
+	    frea = next)
+	{
+		u_int16_t	aftercut;
+
+		aftercut = ip_len + off - FR_IP_OFF(frea);
+		DPFPRINTF(("adjust overlap %d\n", aftercut));
+		if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
+		    * 4)
+		{
+			frea->fr_ip->ip_len =
+			    htons(ntohs(frea->fr_ip->ip_len) - aftercut);
+			frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
+			    (aftercut >> 3));
+			m_adj(frea->fr_m, aftercut);
+			break;
+		}
+
+		/* This fragment is completely overlapped, lose it */
+		next = LIST_NEXT(frea, fr_next);
+		m_freem(frea->fr_m);
+		LIST_REMOVE(frea, fr_next);
+		uma_zfree(V_pf_frent_z, frea);
+	}
+
+ insert:
+	/* Update maximum data size */
+	if ((*frag)->fr_max < max)
+		(*frag)->fr_max = max;
+	/* This is the last segment */
+	if (!mff)
+		(*frag)->fr_flags |= PFFRAG_SEENLAST;
+
+	if (frep == NULL)
+		LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
+	else
+		LIST_INSERT_AFTER(frep, frent, fr_next);
+
+	/* Check if we are completely reassembled */
+	if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
+		return (NULL);
+
+	/* Check if we have all the data */
+	off = 0;
+	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
+		next = LIST_NEXT(frep, fr_next);
+
+		off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
+		if (off < (*frag)->fr_max &&
+		    (next == NULL || FR_IP_OFF(next) != off))
+		{
+			DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
+			    off, next == NULL ? -1 : FR_IP_OFF(next),
+			    (*frag)->fr_max));
+			return (NULL);
+		}
+	}
+	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
+	if (off < (*frag)->fr_max)
+		return (NULL);
+
+	/* We have all the data */
+	frent = LIST_FIRST(&(*frag)->fr_queue);
+	KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__));
+	if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
+		DPFPRINTF(("drop: too big: %d\n", off));
+		pf_free_fragment(*frag);
+		*frag = NULL;
+		return (NULL);
+	}
+	next = LIST_NEXT(frent, fr_next);
+
+	/* Magic from ip_input */
+	ip = frent->fr_ip;
+	m = frent->fr_m;
+	m2 = m->m_next;
+	m->m_next = NULL;
+	m_cat(m, m2);
+	uma_zfree(V_pf_frent_z, frent);
+	for (frent = next; frent != NULL; frent = next) {
+		next = LIST_NEXT(frent, fr_next);
+
+		m2 = frent->fr_m;
+		uma_zfree(V_pf_frent_z, frent);
+		m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags;
+		m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data;
+		m_cat(m, m2);
+	}
+
+	while (m->m_pkthdr.csum_data & 0xffff0000)
+		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
+		    (m->m_pkthdr.csum_data >> 16);
+	ip->ip_src = (*frag)->fr_src;
+	ip->ip_dst = (*frag)->fr_dst;
+
+	/* Remove from fragment queue */
+	pf_remove_fragment(*frag);
+	*frag = NULL;
+
+	hlen = ip->ip_hl << 2;
+	ip->ip_len = htons(off + hlen);
+	m->m_len += hlen;
+	m->m_data -= hlen;
+
+	/* some debugging cruft by sklower, below, will go away soon */
+	/* XXX this should be done elsewhere */
+	if (m->m_flags & M_PKTHDR) {
+		int plen = 0;
+		for (m2 = m; m2; m2 = m2->m_next)
+			plen += m2->m_len;
+		m->m_pkthdr.len = plen;
+	}
+
+	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
+	return (m);
+
+ drop_fragment:
+	/* Oops - fail safe - drop packet */
+	uma_zfree(V_pf_frent_z, frent);
+	m_freem(m);
+	return (NULL);
+}
+
+static struct mbuf *
+pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
+    int drop, int *nomem)
+{
+	struct mbuf		*m = *m0;
+	struct pf_frent		*frp, *fra, *cur = NULL;
+	int			 ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
+	u_int16_t		 off = ntohs(h->ip_off) << 3;
+	u_int16_t		 max = ip_len + off;
+	int			 hosed = 0;
+
+	PF_FRAG_ASSERT();
+	KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
+	    ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
+
+	/* Create a new range queue for this packet */
+	if (*frag == NULL) {
+		*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+		if (*frag == NULL) {
+			pf_flush_fragments();
+			*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+			if (*frag == NULL)
+				goto no_mem;
+		}
+
+		/* Get an entry for the queue */
+		cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+		if (cur == NULL) {
+			uma_zfree(V_pf_frag_z, *frag);
+			*frag = NULL;
+			goto no_mem;
+		}
+
+		(*frag)->fr_flags = PFFRAG_NOBUFFER;
+		(*frag)->fr_max = 0;
+		(*frag)->fr_src = h->ip_src;
+		(*frag)->fr_dst = h->ip_dst;
+		(*frag)->fr_p = h->ip_p;
+		(*frag)->fr_id = h->ip_id;
+		(*frag)->fr_timeout = time_uptime;
+
+		cur->fr_off = off;
+		cur->fr_end = max;
+		LIST_INIT(&(*frag)->fr_queue);
+		LIST_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
+
+		RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
+		TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
+
+		DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
+
+		goto pass;
+	}
+
+	/*
+	 * Find a fragment after the current one:
+	 *  - off contains the real shifted offset.
+	 */
+	frp = NULL;
+	LIST_FOREACH(fra, &(*frag)->fr_queue, fr_next) {
+		if (fra->fr_off > off)
+			break;
+		frp = fra;
+	}
+
+	KASSERT((frp != NULL || fra != NULL),
+	    ("!(frp != NULL || fra != NULL): %s", __FUNCTION__));
+
+	if (frp != NULL) {
+		int	precut;
+
+		precut = frp->fr_end - off;
+		if (precut >= ip_len) {
+			/* Fragment is entirely a duplicate */
+			DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
+			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
+			goto drop_fragment;
+		}
+		if (precut == 0) {
+			/* They are adjacent.  Fixup cache entry */
+			DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
+			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
+			frp->fr_end = max;
+		} else if (precut > 0) {
+			/* The first part of this payload overlaps with a
+			 * fragment that has already been passed.
+			 * Need to trim off the first part of the payload.
+			 * But to do so easily, we need to create another
+			 * mbuf to throw the original header into.
+			 */
+
+			DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
+			    h->ip_id, precut, frp->fr_off, frp->fr_end, off,
+			    max));
+
+			off += precut;
+			max -= precut;
+			/* Update the previous frag to encompass this one */
+			frp->fr_end = max;
+
+			if (!drop) {
+				/* XXX Optimization opportunity
+				 * This is a very heavy way to trim the payload.
+				 * we could do it much faster by diddling mbuf
+				 * internals but that would be even less legible
+				 * than this mbuf magic.  For my next trick,
+				 * I'll pull a rabbit out of my laptop.
+				 */
+				*m0 = m_dup(m, M_NOWAIT);
+				if (*m0 == NULL)
+					goto no_mem;
+				/* From KAME Project : We have missed this! */
+				m_adj(*m0, (h->ip_hl << 2) -
+				    (*m0)->m_pkthdr.len);
+
+				KASSERT(((*m0)->m_next == NULL),
+				    ("(*m0)->m_next != NULL: %s",
+				    __FUNCTION__));
+				m_adj(m, precut + (h->ip_hl << 2));
+				m_cat(*m0, m);
+				m = *m0;
+				if (m->m_flags & M_PKTHDR) {
+					int plen = 0;
+					struct mbuf *t;
+					for (t = m; t; t = t->m_next)
+						plen += t->m_len;
+					m->m_pkthdr.len = plen;
+				}
+
+
+				h = mtod(m, struct ip *);
+
+				KASSERT(((int)m->m_len ==
+				    ntohs(h->ip_len) - precut),
+				    ("m->m_len != ntohs(h->ip_len) - precut: %s",
+				    __FUNCTION__));
+				h->ip_off = htons(ntohs(h->ip_off) +
+				    (precut >> 3));
+				h->ip_len = htons(ntohs(h->ip_len) - precut);
+			} else {
+				hosed++;
+			}
+		} else {
+			/* There is a gap between fragments */
+
+			DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
+			    h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
+			    max));
+
+			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+			if (cur == NULL)
+				goto no_mem;
+
+			cur->fr_off = off;
+			cur->fr_end = max;
+			LIST_INSERT_AFTER(frp, cur, fr_next);
+		}
+	}
+
+	if (fra != NULL) {
+		int	aftercut;
+		int	merge = 0;
+
+		aftercut = max - fra->fr_off;
+		if (aftercut == 0) {
+			/* Adjacent fragments */
+			DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
+			    h->ip_id, off, max, fra->fr_off, fra->fr_end));
+			fra->fr_off = off;
+			merge = 1;
+		} else if (aftercut > 0) {
+			/* Need to chop off the tail of this fragment */
+			DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
+			    h->ip_id, aftercut, off, max, fra->fr_off,
+			    fra->fr_end));
+			fra->fr_off = off;
+			max -= aftercut;
+
+			merge = 1;
+
+			if (!drop) {
+				m_adj(m, -aftercut);
+				if (m->m_flags & M_PKTHDR) {
+					int plen = 0;
+					struct mbuf *t;
+					for (t = m; t; t = t->m_next)
+						plen += t->m_len;
+					m->m_pkthdr.len = plen;
+				}
+				h = mtod(m, struct ip *);
+				KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut),
+				    ("m->m_len != ntohs(h->ip_len) - aftercut: %s",
+				    __FUNCTION__));
+				h->ip_len = htons(ntohs(h->ip_len) - aftercut);
+			} else {
+				hosed++;
+			}
+		} else if (frp == NULL) {
+			/* There is a gap between fragments */
+			DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
+			    h->ip_id, -aftercut, off, max, fra->fr_off,
+			    fra->fr_end));
+
+			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+			if (cur == NULL)
+				goto no_mem;
+
+			cur->fr_off = off;
+			cur->fr_end = max;
+			LIST_INSERT_BEFORE(fra, cur, fr_next);
+		}
+
+
+		/* Need to glue together two separate fragment descriptors */
+		if (merge) {
+			if (cur && fra->fr_off <= cur->fr_end) {
+				/* Need to merge in a previous 'cur' */
+				DPFPRINTF(("fragcache[%d]: adjacent(merge "
+				    "%d-%d) %d-%d (%d-%d)\n",
+				    h->ip_id, cur->fr_off, cur->fr_end, off,
+				    max, fra->fr_off, fra->fr_end));
+				fra->fr_off = cur->fr_off;
+				LIST_REMOVE(cur, fr_next);
+				uma_zfree(V_pf_frent_z, cur);
+				cur = NULL;
+
+			} else if (frp && fra->fr_off <= frp->fr_end) {
+				/* Need to merge in a modified 'frp' */
+				KASSERT((cur == NULL), ("cur != NULL: %s",
+				    __FUNCTION__));
+				DPFPRINTF(("fragcache[%d]: adjacent(merge "
+				    "%d-%d) %d-%d (%d-%d)\n",
+				    h->ip_id, frp->fr_off, frp->fr_end, off,
+				    max, fra->fr_off, fra->fr_end));
+				fra->fr_off = frp->fr_off;
+				LIST_REMOVE(frp, fr_next);
+				uma_zfree(V_pf_frent_z, frp);
+				frp = NULL;
+
+			}
+		}
+	}
+
+	if (hosed) {
+		/*
+		 * We must keep tracking the overall fragment even when
+		 * we're going to drop it anyway so that we know when to
+		 * free the overall descriptor.  Thus we drop the frag late.
+		 */
+		goto drop_fragment;
+	}
+
+
+ pass:
+	/* Update maximum data size */
+	if ((*frag)->fr_max < max)
+		(*frag)->fr_max = max;
+
+	/* This is the last segment */
+	if (!mff)
+		(*frag)->fr_flags |= PFFRAG_SEENLAST;
+
+	/* Check if we are completely reassembled */
+	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
+	    LIST_FIRST(&(*frag)->fr_queue)->fr_off == 0 &&
+	    LIST_FIRST(&(*frag)->fr_queue)->fr_end == (*frag)->fr_max) {
+		/* Remove from fragment queue */
+		DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
+		    (*frag)->fr_max));
+		pf_free_fragment(*frag);
+		*frag = NULL;
+	}
+
+	return (m);
+
+ no_mem:
+	*nomem = 1;
+
+	/* Still need to pay attention to !IP_MF */
+	if (!mff && *frag != NULL)
+		(*frag)->fr_flags |= PFFRAG_SEENLAST;
+
+	m_freem(m);
+	return (NULL);
+
+ drop_fragment:
+
+	/* Still need to pay attention to !IP_MF */
+	if (!mff && *frag != NULL)
+		(*frag)->fr_flags |= PFFRAG_SEENLAST;
+
+	if (drop) {
+		/* This fragment has been deemed bad.  Don't reass */
+		if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
+			DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
+			    h->ip_id));
+		(*frag)->fr_flags |= PFFRAG_DROP;
+	}
+
+	m_freem(m);
+	return (NULL);
+}
+
+int
+pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
+    struct pf_pdesc *pd)
+{
+	struct mbuf		*m = *m0;
+	struct pf_rule		*r;
+	struct pf_frent		*frent;
+	struct pf_fragment	*frag = NULL;
+	struct ip		*h = mtod(m, struct ip *);
+	int			 mff = (ntohs(h->ip_off) & IP_MF);
+	int			 hlen = h->ip_hl << 2;
+	u_int16_t		 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+	u_int16_t		 max;
+	int			 ip_len;
+	int			 ip_off;
+	int			 tag = -1;
+
+	PF_RULES_RASSERT();
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != AF_INET)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != h->ip_p)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr,
+		    (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr,
+		    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->match_tag && !pf_match_tag(m, r, &tag,
+		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
+			r = TAILQ_NEXT(r, entries);
+		else
+			break;
+	}
+
+	if (r == NULL || r->action == PF_NOSCRUB)
+		return (PF_PASS);
+	else {
+		r->packets[dir == PF_OUT]++;
+		r->bytes[dir == PF_OUT] += pd->tot_len;
+	}
+
+	/* Check for illegal packets */
+	if (hlen < (int)sizeof(struct ip))
+		goto drop;
+
+	if (hlen > ntohs(h->ip_len))
+		goto drop;
+
+	/* Clear IP_DF if the rule uses the no-df option */
+	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
+		u_int16_t ip_off = h->ip_off;
+
+		h->ip_off &= htons(~IP_DF);
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+	}
+
+	/* We will need other tests here */
+	if (!fragoff && !mff)
+		goto no_fragment;
+
+	/* We're dealing with a fragment now. Don't allow fragments
+	 * with IP_DF to enter the cache. If the flag was cleared by
+	 * no-df above, fine. Otherwise drop it.
+	 */
+	if (h->ip_off & htons(IP_DF)) {
+		DPFPRINTF(("IP_DF\n"));
+		goto bad;
+	}
+
+	ip_len = ntohs(h->ip_len) - hlen;
+	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+
+	/* All fragments are 8 byte aligned */
+	if (mff && (ip_len & 0x7)) {
+		DPFPRINTF(("mff and %d\n", ip_len));
+		goto bad;
+	}
+
+	/* Respect maximum length */
+	if (fragoff + ip_len > IP_MAXPACKET) {
+		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
+		goto bad;
+	}
+	max = fragoff + ip_len;
+
+	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
+
+		/* Fully buffer all of the fragments */
+		PF_FRAG_LOCK();
+		frag = pf_find_fragment(h, &V_pf_frag_tree);
+
+		/* Check if we saw the last fragment already */
+		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
+		    max > frag->fr_max)
+			goto bad;
+
+		/* Get an entry for the fragment queue */
+		frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+		if (frent == NULL) {
+			PF_FRAG_UNLOCK();
+			REASON_SET(reason, PFRES_MEMORY);
+			return (PF_DROP);
+		}
+		frent->fr_ip = h;
+		frent->fr_m = m;
+
+		/* Might return a completely reassembled mbuf, or NULL */
+		DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
+		*m0 = m = pf_reassemble(m0, &frag, frent, mff);
+		PF_FRAG_UNLOCK();
+
+		if (m == NULL)
+			return (PF_DROP);
+
+		/* use mtag from concatenated mbuf chain */
+		pd->pf_mtag = pf_find_mtag(m);
+#ifdef DIAGNOSTIC
+		if (pd->pf_mtag == NULL) {
+			printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
+			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
+				m_freem(m);
+				*m0 = NULL;
+				goto no_mem;
+			}
+		}
+#endif
+		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
+			goto drop;
+
+		h = mtod(m, struct ip *);
+	} else {
+		/* non-buffering fragment cache (drops or masks overlaps) */
+		int	nomem = 0;
+
+		if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
+			/*
+			 * Already passed the fragment cache in the
+			 * input direction.  If we continued, it would
+			 * appear to be a dup and would be dropped.
+			 */
+			goto fragment_pass;
+		}
+
+		PF_FRAG_LOCK();
+		frag = pf_find_fragment(h, &V_pf_cache_tree);
+
+		/* Check if we saw the last fragment already */
+		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
+		    max > frag->fr_max) {
+			if (r->rule_flag & PFRULE_FRAGDROP)
+				frag->fr_flags |= PFFRAG_DROP;
+			goto bad;
+		}
+
+		*m0 = m = pf_fragcache(m0, h, &frag, mff,
+		    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
+		PF_FRAG_UNLOCK();
+		if (m == NULL) {
+			if (nomem)
+				goto no_mem;
+			goto drop;
+		}
+
+		/* use mtag from copied and trimmed mbuf chain */
+		pd->pf_mtag = pf_find_mtag(m);
+#ifdef DIAGNOSTIC
+		if (pd->pf_mtag == NULL) {
+			printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
+			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
+				m_freem(m);
+				*m0 = NULL;
+				goto no_mem;
+			}
+		}
+#endif
+		if (dir == PF_IN)
+			pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
+
+		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
+			goto drop;
+		goto fragment_pass;
+	}
+
+ no_fragment:
+	/* At this point, only IP_DF is allowed in ip_off */
+	if (h->ip_off & ~htons(IP_DF)) {
+		u_int16_t ip_off = h->ip_off;
+
+		h->ip_off &= htons(IP_DF);
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+	}
+
+	/* not missing a return here */
+
+ fragment_pass:
+	pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
+
+	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
+		pd->flags |= PFDESC_IP_REAS;
+	return (PF_PASS);
+
+ no_mem:
+	REASON_SET(reason, PFRES_MEMORY);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
+		    1);
+	return (PF_DROP);
+
+ drop:
+	REASON_SET(reason, PFRES_NORM);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
+		    1);
+	return (PF_DROP);
+
+ bad:
+	DPFPRINTF(("dropping bad fragment\n"));
+
+	/* Free associated fragments */
+	if (frag != NULL) {
+		pf_free_fragment(frag);
+		PF_FRAG_UNLOCK();
+	}
+
+	REASON_SET(reason, PFRES_FRAG);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
+		    1);
+
+	return (PF_DROP);
+}
+#endif
+
+#ifdef INET6
+int
+pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
+    u_short *reason, struct pf_pdesc *pd)
+{
+	struct mbuf		*m = *m0;
+	struct pf_rule		*r;
+	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
+	int			 off;
+	struct ip6_ext		 ext;
+	struct ip6_opt		 opt;
+	struct ip6_opt_jumbo	 jumbo;
+	struct ip6_frag		 frag;
+	u_int32_t		 jumbolen = 0, plen;
+	u_int16_t		 fragoff = 0;
+	int			 optend;
+	int			 ooff;
+	u_int8_t		 proto;
+	int			 terminal;
+
+	PF_RULES_RASSERT();
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != AF_INET6)
+			r = r->skip[PF_SKIP_AF].ptr;
+#if 0 /* header chain! */
+		else if (r->proto && r->proto != h->ip6_nxt)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+#endif
+		else if (PF_MISMATCHAW(&r->src.addr,
+		    (struct pf_addr *)&h->ip6_src, AF_INET6,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr,
+		    (struct pf_addr *)&h->ip6_dst, AF_INET6,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else
+			break;
+	}
+
+	if (r == NULL || r->action == PF_NOSCRUB)
+		return (PF_PASS);
+	else {
+		r->packets[dir == PF_OUT]++;
+		r->bytes[dir == PF_OUT] += pd->tot_len;
+	}
+
+	/* Check for illegal packets */
+	if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
+		goto drop;
+
+	off = sizeof(struct ip6_hdr);
+	proto = h->ip6_nxt;
+	terminal = 0;
+	do {
+		switch (proto) {
+		case IPPROTO_FRAGMENT:
+			goto fragment;
+			break;
+		case IPPROTO_AH:
+		case IPPROTO_ROUTING:
+		case IPPROTO_DSTOPTS:
+			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
+			    NULL, AF_INET6))
+				goto shortpkt;
+			if (proto == IPPROTO_AH)
+				off += (ext.ip6e_len + 2) * 4;
+			else
+				off += (ext.ip6e_len + 1) * 8;
+			proto = ext.ip6e_nxt;
+			break;
+		case IPPROTO_HOPOPTS:
+			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
+			    NULL, AF_INET6))
+				goto shortpkt;
+			optend = off + (ext.ip6e_len + 1) * 8;
+			ooff = off + sizeof(ext);
+			do {
+				if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
+				    sizeof(opt.ip6o_type), NULL, NULL,
+				    AF_INET6))
+					goto shortpkt;
+				if (opt.ip6o_type == IP6OPT_PAD1) {
+					ooff++;
+					continue;
+				}
+				if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
+				    NULL, NULL, AF_INET6))
+					goto shortpkt;
+				if (ooff + sizeof(opt) + opt.ip6o_len > optend)
+					goto drop;
+				switch (opt.ip6o_type) {
+				case IP6OPT_JUMBO:
+					if (h->ip6_plen != 0)
+						goto drop;
+					if (!pf_pull_hdr(m, ooff, &jumbo,
+					    sizeof(jumbo), NULL, NULL,
+					    AF_INET6))
+						goto shortpkt;
+					memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
+					    sizeof(jumbolen));
+					jumbolen = ntohl(jumbolen);
+					if (jumbolen <= IPV6_MAXPACKET)
+						goto drop;
+					if (sizeof(struct ip6_hdr) + jumbolen !=
+					    m->m_pkthdr.len)
+						goto drop;
+					break;
+				default:
+					break;
+				}
+				ooff += sizeof(opt) + opt.ip6o_len;
+			} while (ooff < optend);
+
+			off = optend;
+			proto = ext.ip6e_nxt;
+			break;
+		default:
+			terminal = 1;
+			break;
+		}
+	} while (!terminal);
+
+	/* jumbo payload option must be present, or plen > 0 */
+	if (ntohs(h->ip6_plen) == 0)
+		plen = jumbolen;
+	else
+		plen = ntohs(h->ip6_plen);
+	if (plen == 0)
+		goto drop;
+	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
+		goto shortpkt;
+
+	pf_scrub_ip6(&m, r->min_ttl);
+
+	return (PF_PASS);
+
+ fragment:
+	if (ntohs(h->ip6_plen) == 0 || jumbolen)
+		goto drop;
+	plen = ntohs(h->ip6_plen);
+
+	if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
+		goto shortpkt;
+	fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
+	if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
+		goto badfrag;
+
+	/* do something about it */
+	/* remember to set pd->flags |= PFDESC_IP_REAS */
+	return (PF_PASS);
+
+ shortpkt:
+	REASON_SET(reason, PFRES_SHORT);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
+		    1);
+	return (PF_DROP);
+
+ drop:
+	REASON_SET(reason, PFRES_NORM);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
+		    1);
+	return (PF_DROP);
+
+ badfrag:
+	REASON_SET(reason, PFRES_FRAG);
+	if (r != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
+		    1);
+	return (PF_DROP);
+}
+#endif /* INET6 */
+
+int
+pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
+    int off, void *h, struct pf_pdesc *pd)
+{
+	struct pf_rule	*r, *rm = NULL;
+	struct tcphdr	*th = pd->hdr.tcp;
+	int		 rewrite = 0;
+	u_short		 reason;
+	u_int8_t	 flags;
+	sa_family_t	 af = pd->af;
+
+	PF_RULES_RASSERT();
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+	while (r != NULL) {
+		r->evaluations++;
+		if (pfi_kif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (r->src.port_op && !pf_match_port(r->src.port_op,
+			    r->src.port[0], r->src.port[1], th->th_sport))
+			r = r->skip[PF_SKIP_SRC_PORT].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
+			    r->dst.port[0], r->dst.port[1], th->th_dport))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
+			    pf_osfp_fingerprint(pd, m, off, th),
+			    r->os_fingerprint))
+			r = TAILQ_NEXT(r, entries);
+		else {
+			rm = r;
+			break;
+		}
+	}
+
+	if (rm == NULL || rm->action == PF_NOSCRUB)
+		return (PF_PASS);
+	else {
+		r->packets[dir == PF_OUT]++;
+		r->bytes[dir == PF_OUT] += pd->tot_len;
+	}
+
+	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
+		pd->flags |= PFDESC_TCP_NORM;
+
+	flags = th->th_flags;
+	if (flags & TH_SYN) {
+		/* Illegal packet */
+		if (flags & TH_RST)
+			goto tcp_drop;
+
+		if (flags & TH_FIN)
+			flags &= ~TH_FIN;
+	} else {
+		/* Illegal packet */
+		if (!(flags & (TH_ACK|TH_RST)))
+			goto tcp_drop;
+	}
+
+	if (!(flags & TH_ACK)) {
+		/* These flags are only valid if ACK is set */
+		if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
+			goto tcp_drop;
+	}
+
+	/* Check for illegal header length */
+	if (th->th_off < (sizeof(struct tcphdr) >> 2))
+		goto tcp_drop;
+
+	/* If flags changed, or reserved data set, then adjust */
+	if (flags != th->th_flags || th->th_x2 != 0) {
+		u_int16_t	ov, nv;
+
+		ov = *(u_int16_t *)(&th->th_ack + 1);
+		th->th_flags = flags;
+		th->th_x2 = 0;
+		nv = *(u_int16_t *)(&th->th_ack + 1);
+
+		th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
+		rewrite = 1;
+	}
+
+	/* Remove urgent pointer, if TH_URG is not set */
+	if (!(flags & TH_URG) && th->th_urp) {
+		th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
+		th->th_urp = 0;
+		rewrite = 1;
+	}
+
+	/* Process options */
+	if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
+		rewrite = 1;
+
+	/* copy back packet headers if we sanitized */
+	if (rewrite)
+		m_copyback(m, off, sizeof(*th), (caddr_t)th);
+
+	return (PF_PASS);
+
+ tcp_drop:
+	REASON_SET(&reason, PFRES_NORM);
+	if (rm != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET, dir, reason, r, NULL, NULL, pd,
+		    1);
+	return (PF_DROP);
+}
+
+int
+pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
+    struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
+{
+	u_int32_t tsval, tsecr;
+	u_int8_t hdr[60];
+	u_int8_t *opt;
+
+	KASSERT((src->scrub == NULL),
+	    ("pf_normalize_tcp_init: src->scrub != NULL"));
+
+	src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT);
+	if (src->scrub == NULL)
+		return (1);
+
+	switch (pd->af) {
+#ifdef INET
+	case AF_INET: {
+		struct ip *h = mtod(m, struct ip *);
+		src->scrub->pfss_ttl = h->ip_ttl;
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+		src->scrub->pfss_ttl = h->ip6_hlim;
+		break;
+	}
+#endif /* INET6 */
+	}
+
+
+	/*
+	 * All normalizations below are only begun if we see the start of
+	 * the connections.  They must all set an enabled bit in pfss_flags
+	 */
+	if ((th->th_flags & TH_SYN) == 0)
+		return (0);
+
+
+	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
+	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
+		/* Diddle with TCP options */
+		int hlen;
+		opt = hdr + sizeof(struct tcphdr);
+		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
+		while (hlen >= TCPOLEN_TIMESTAMP) {
+			switch (*opt) {
+			case TCPOPT_EOL:	/* FALLTHROUGH */
+			case TCPOPT_NOP:
+				opt++;
+				hlen--;
+				break;
+			case TCPOPT_TIMESTAMP:
+				if (opt[1] >= TCPOLEN_TIMESTAMP) {
+					src->scrub->pfss_flags |=
+					    PFSS_TIMESTAMP;
+					src->scrub->pfss_ts_mod =
+					    htonl(arc4random());
+
+					/* note PFSS_PAWS not set yet */
+					memcpy(&tsval, &opt[2],
+					    sizeof(u_int32_t));
+					memcpy(&tsecr, &opt[6],
+					    sizeof(u_int32_t));
+					src->scrub->pfss_tsval0 = ntohl(tsval);
+					src->scrub->pfss_tsval = ntohl(tsval);
+					src->scrub->pfss_tsecr = ntohl(tsecr);
+					getmicrouptime(&src->scrub->pfss_last);
+				}
+				/* FALLTHROUGH */
+			default:
+				hlen -= MAX(opt[1], 2);
+				opt += MAX(opt[1], 2);
+				break;
+			}
+		}
+	}
+
+	return (0);
+}
+
+void
+pf_normalize_tcp_cleanup(struct pf_state *state)
+{
+	if (state->src.scrub)
+		uma_zfree(V_pf_state_scrub_z, state->src.scrub);
+	if (state->dst.scrub)
+		uma_zfree(V_pf_state_scrub_z, state->dst.scrub);
+
+	/* Someday... flush the TCP segment reassembly descriptors. */
+}
+
+int
+pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
+    u_short *reason, struct tcphdr *th, struct pf_state *state,
+    struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
+{
+	struct timeval uptime;
+	u_int32_t tsval, tsecr;
+	u_int tsval_from_last;
+	u_int8_t hdr[60];
+	u_int8_t *opt;
+	int copyback = 0;
+	int got_ts = 0;
+
+	KASSERT((src->scrub || dst->scrub),
+	    ("%s: src->scrub && dst->scrub!", __func__));
+
+	/*
+	 * Enforce the minimum TTL seen for this connection.  Negate a common
+	 * technique to evade an intrusion detection system and confuse
+	 * firewall state code.
+	 */
+	switch (pd->af) {
+#ifdef INET
+	case AF_INET: {
+		if (src->scrub) {
+			struct ip *h = mtod(m, struct ip *);
+			if (h->ip_ttl > src->scrub->pfss_ttl)
+				src->scrub->pfss_ttl = h->ip_ttl;
+			h->ip_ttl = src->scrub->pfss_ttl;
+		}
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		if (src->scrub) {
+			struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+			if (h->ip6_hlim > src->scrub->pfss_ttl)
+				src->scrub->pfss_ttl = h->ip6_hlim;
+			h->ip6_hlim = src->scrub->pfss_ttl;
+		}
+		break;
+	}
+#endif /* INET6 */
+	}
+
+	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
+	    ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
+	    (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
+	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
+		/* Diddle with TCP options */
+		int hlen;
+		opt = hdr + sizeof(struct tcphdr);
+		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
+		while (hlen >= TCPOLEN_TIMESTAMP) {
+			switch (*opt) {
+			case TCPOPT_EOL:	/* FALLTHROUGH */
+			case TCPOPT_NOP:
+				opt++;
+				hlen--;
+				break;
+			case TCPOPT_TIMESTAMP:
+				/* Modulate the timestamps.  Can be used for
+				 * NAT detection, OS uptime determination or
+				 * reboot detection.
+				 */
+
+				if (got_ts) {
+					/* Huh?  Multiple timestamps!? */
+					if (V_pf_status.debug >= PF_DEBUG_MISC) {
+						DPFPRINTF(("multiple TS??"));
+						pf_print_state(state);
+						printf("\n");
+					}
+					REASON_SET(reason, PFRES_TS);
+					return (PF_DROP);
+				}
+				if (opt[1] >= TCPOLEN_TIMESTAMP) {
+					memcpy(&tsval, &opt[2],
+					    sizeof(u_int32_t));
+					if (tsval && src->scrub &&
+					    (src->scrub->pfss_flags &
+					    PFSS_TIMESTAMP)) {
+						tsval = ntohl(tsval);
+						pf_change_a(&opt[2],
+						    &th->th_sum,
+						    htonl(tsval +
+						    src->scrub->pfss_ts_mod),
+						    0);
+						copyback = 1;
+					}
+
+					/* Modulate TS reply iff valid (!0) */
+					memcpy(&tsecr, &opt[6],
+					    sizeof(u_int32_t));
+					if (tsecr && dst->scrub &&
+					    (dst->scrub->pfss_flags &
+					    PFSS_TIMESTAMP)) {
+						tsecr = ntohl(tsecr)
+						    - dst->scrub->pfss_ts_mod;
+						pf_change_a(&opt[6],
+						    &th->th_sum, htonl(tsecr),
+						    0);
+						copyback = 1;
+					}
+					got_ts = 1;
+				}
+				/* FALLTHROUGH */
+			default:
+				hlen -= MAX(opt[1], 2);
+				opt += MAX(opt[1], 2);
+				break;
+			}
+		}
+		if (copyback) {
+			/* Copyback the options, caller copys back header */
+			*writeback = 1;
+			m_copyback(m, off + sizeof(struct tcphdr),
+			    (th->th_off << 2) - sizeof(struct tcphdr), hdr +
+			    sizeof(struct tcphdr));
+		}
+	}
+
+
+	/*
+	 * Must invalidate PAWS checks on connections idle for too long.
+	 * The fastest allowed timestamp clock is 1ms.  That turns out to
+	 * be about 24 days before it wraps.  XXX Right now our lowerbound
+	 * TS echo check only works for the first 12 days of a connection
+	 * when the TS has exhausted half its 32bit space
+	 */
+#define TS_MAX_IDLE	(24*24*60*60)
+#define TS_MAX_CONN	(12*24*60*60)	/* XXX remove when better tsecr check */
+
+	getmicrouptime(&uptime);
+	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
+	    (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
+	    time_uptime - state->creation > TS_MAX_CONN))  {
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			DPFPRINTF(("src idled out of PAWS\n"));
+			pf_print_state(state);
+			printf("\n");
+		}
+		src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
+		    | PFSS_PAWS_IDLED;
+	}
+	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
+	    uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
+		if (V_pf_status.debug >= PF_DEBUG_MISC) {
+			DPFPRINTF(("dst idled out of PAWS\n"));
+			pf_print_state(state);
+			printf("\n");
+		}
+		dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
+		    | PFSS_PAWS_IDLED;
+	}
+
+	if (got_ts && src->scrub && dst->scrub &&
+	    (src->scrub->pfss_flags & PFSS_PAWS) &&
+	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
+		/* Validate that the timestamps are "in-window".
+		 * RFC1323 describes TCP Timestamp options that allow
+		 * measurement of RTT (round trip time) and PAWS
+		 * (protection against wrapped sequence numbers).  PAWS
+		 * gives us a set of rules for rejecting packets on
+		 * long fat pipes (packets that were somehow delayed
+		 * in transit longer than the time it took to send the
+		 * full TCP sequence space of 4Gb).  We can use these
+		 * rules and infer a few others that will let us treat
+		 * the 32bit timestamp and the 32bit echoed timestamp
+		 * as sequence numbers to prevent a blind attacker from
+		 * inserting packets into a connection.
+		 *
+		 * RFC1323 tells us:
+		 *  - The timestamp on this packet must be greater than
+		 *    or equal to the last value echoed by the other
+		 *    endpoint.  The RFC says those will be discarded
+		 *    since it is a dup that has already been acked.
+		 *    This gives us a lowerbound on the timestamp.
+		 *        timestamp >= other last echoed timestamp
+		 *  - The timestamp will be less than or equal to
+		 *    the last timestamp plus the time between the
+		 *    last packet and now.  The RFC defines the max
+		 *    clock rate as 1ms.  We will allow clocks to be
+		 *    up to 10% fast and will allow a total difference
+		 *    or 30 seconds due to a route change.  And this
+		 *    gives us an upperbound on the timestamp.
+		 *        timestamp <= last timestamp + max ticks
+		 *    We have to be careful here.  Windows will send an
+		 *    initial timestamp of zero and then initialize it
+		 *    to a random value after the 3whs; presumably to
+		 *    avoid a DoS by having to call an expensive RNG
+		 *    during a SYN flood.  Proof MS has at least one
+		 *    good security geek.
+		 *
+		 *  - The TCP timestamp option must also echo the other
+		 *    endpoints timestamp.  The timestamp echoed is the
+		 *    one carried on the earliest unacknowledged segment
+		 *    on the left edge of the sequence window.  The RFC
+		 *    states that the host will reject any echoed
+		 *    timestamps that were larger than any ever sent.
+		 *    This gives us an upperbound on the TS echo.
+		 *        tescr <= largest_tsval
+		 *  - The lowerbound on the TS echo is a little more
+		 *    tricky to determine.  The other endpoint's echoed
+		 *    values will not decrease.  But there may be
+		 *    network conditions that re-order packets and
+		 *    cause our view of them to decrease.  For now the
+		 *    only lowerbound we can safely determine is that
+		 *    the TS echo will never be less than the original
+		 *    TS.  XXX There is probably a better lowerbound.
+		 *    Remove TS_MAX_CONN with better lowerbound check.
+		 *        tescr >= other original TS
+		 *
+		 * It is also important to note that the fastest
+		 * timestamp clock of 1ms will wrap its 32bit space in
+		 * 24 days.  So we just disable TS checking after 24
+		 * days of idle time.  We actually must use a 12d
+		 * connection limit until we can come up with a better
+		 * lowerbound to the TS echo check.
+		 */
+		struct timeval delta_ts;
+		int ts_fudge;
+
+
+		/*
+		 * PFTM_TS_DIFF is how many seconds of leeway to allow
+		 * a host's timestamp.  This can happen if the previous
+		 * packet got delayed in transit for much longer than
+		 * this packet.
+		 */
+		if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
+			ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
+
+		/* Calculate max ticks since the last timestamp */
+#define TS_MAXFREQ	1100		/* RFC max TS freq of 1Khz + 10% skew */
+#define TS_MICROSECS	1000000		/* microseconds per second */
+		delta_ts = uptime;
+		timevalsub(&delta_ts, &src->scrub->pfss_last);
+		tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
+		tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
+
+		if ((src->state >= TCPS_ESTABLISHED &&
+		    dst->state >= TCPS_ESTABLISHED) &&
+		    (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
+		    SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
+		    (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
+		    SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
+			/* Bad RFC1323 implementation or an insertion attack.
+			 *
+			 * - Solaris 2.6 and 2.7 are known to send another ACK
+			 *   after the FIN,FIN|ACK,ACK closing that carries
+			 *   an old timestamp.
+			 */
+
+			DPFPRINTF(("Timestamp failed %c%c%c%c\n",
+			    SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
+			    SEQ_GT(tsval, src->scrub->pfss_tsval +
+			    tsval_from_last) ? '1' : ' ',
+			    SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
+			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
+			DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
+			    "idle: %jus %lums\n",
+			    tsval, tsecr, tsval_from_last,
+			    (uintmax_t)delta_ts.tv_sec,
+			    delta_ts.tv_usec / 1000));
+			DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
+			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
+			DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
+			    "\n", dst->scrub->pfss_tsval,
+			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				pf_print_state(state);
+				pf_print_flags(th->th_flags);
+				printf("\n");
+			}
+			REASON_SET(reason, PFRES_TS);
+			return (PF_DROP);
+		}
+
+		/* XXX I'd really like to require tsecr but it's optional */
+
+	} else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
+	    ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
+	    || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
+	    src->scrub && dst->scrub &&
+	    (src->scrub->pfss_flags & PFSS_PAWS) &&
+	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
+		/* Didn't send a timestamp.  Timestamps aren't really useful
+		 * when:
+		 *  - connection opening or closing (often not even sent).
+		 *    but we must not let an attacker to put a FIN on a
+		 *    data packet to sneak it through our ESTABLISHED check.
+		 *  - on a TCP reset.  RFC suggests not even looking at TS.
+		 *  - on an empty ACK.  The TS will not be echoed so it will
+		 *    probably not help keep the RTT calculation in sync and
+		 *    there isn't as much danger when the sequence numbers
+		 *    got wrapped.  So some stacks don't include TS on empty
+		 *    ACKs :-(
+		 *
+		 * To minimize the disruption to mostly RFC1323 conformant
+		 * stacks, we will only require timestamps on data packets.
+		 *
+		 * And what do ya know, we cannot require timestamps on data
+		 * packets.  There appear to be devices that do legitimate
+		 * TCP connection hijacking.  There are HTTP devices that allow
+		 * a 3whs (with timestamps) and then buffer the HTTP request.
+		 * If the intermediate device has the HTTP response cache, it
+		 * will spoof the response but not bother timestamping its
+		 * packets.  So we can look for the presence of a timestamp in
+		 * the first data packet and if there, require it in all future
+		 * packets.
+		 */
+
+		if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
+			/*
+			 * Hey!  Someone tried to sneak a packet in.  Or the
+			 * stack changed its RFC1323 behavior?!?!
+			 */
+			if (V_pf_status.debug >= PF_DEBUG_MISC) {
+				DPFPRINTF(("Did not receive expected RFC1323 "
+				    "timestamp\n"));
+				pf_print_state(state);
+				pf_print_flags(th->th_flags);
+				printf("\n");
+			}
+			REASON_SET(reason, PFRES_TS);
+			return (PF_DROP);
+		}
+	}
+
+
+	/*
+	 * We will note if a host sends his data packets with or without
+	 * timestamps.  And require all data packets to contain a timestamp
+	 * if the first does.  PAWS implicitly requires that all data packets be
+	 * timestamped.  But I think there are middle-man devices that hijack
+	 * TCP streams immediately after the 3whs and don't timestamp their
+	 * packets (seen in a WWW accelerator or cache).
+	 */
+	if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
+	    (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
+		if (got_ts)
+			src->scrub->pfss_flags |= PFSS_DATA_TS;
+		else {
+			src->scrub->pfss_flags |= PFSS_DATA_NOTS;
+			if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
+			    (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
+				/* Don't warn if other host rejected RFC1323 */
+				DPFPRINTF(("Broken RFC1323 stack did not "
+				    "timestamp data packet. Disabled PAWS "
+				    "security.\n"));
+				pf_print_state(state);
+				pf_print_flags(th->th_flags);
+				printf("\n");
+			}
+		}
+	}
+
+
+	/*
+	 * Update PAWS values
+	 */
+	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
+	    (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
+		getmicrouptime(&src->scrub->pfss_last);
+		if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
+		    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
+			src->scrub->pfss_tsval = tsval;
+
+		if (tsecr) {
+			if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
+			    (src->scrub->pfss_flags & PFSS_PAWS) == 0)
+				src->scrub->pfss_tsecr = tsecr;
+
+			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
+			    (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
+			    src->scrub->pfss_tsval0 == 0)) {
+				/* tsval0 MUST be the lowest timestamp */
+				src->scrub->pfss_tsval0 = tsval;
+			}
+
+			/* Only fully initialized after a TS gets echoed */
+			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
+				src->scrub->pfss_flags |= PFSS_PAWS;
+		}
+	}
+
+	/* I have a dream....  TCP segment reassembly.... */
+	return (0);
+}
+
+static int
+pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
+    int off, sa_family_t af)
+{
+	u_int16_t	*mss;
+	int		 thoff;
+	int		 opt, cnt, optlen = 0;
+	int		 rewrite = 0;
+	u_char		 opts[TCP_MAXOLEN];
+	u_char		*optp = opts;
+
+	thoff = th->th_off << 2;
+	cnt = thoff - sizeof(struct tcphdr);
+
+	if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
+	    NULL, NULL, af))
+		return (rewrite);
+
+	for (; cnt > 0; cnt -= optlen, optp += optlen) {
+		opt = optp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < 2)
+				break;
+			optlen = optp[1];
+			if (optlen < 2 || optlen > cnt)
+				break;
+		}
+		switch (opt) {
+		case TCPOPT_MAXSEG:
+			mss = (u_int16_t *)(optp + 2);
+			if ((ntohs(*mss)) > r->max_mss) {
+				th->th_sum = pf_cksum_fixup(th->th_sum,
+				    *mss, htons(r->max_mss), 0);
+				*mss = htons(r->max_mss);
+				rewrite = 1;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (rewrite)
+		m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
+
+	return (rewrite);
+}
+
+#ifdef INET
+static void
+pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
+{
+	struct mbuf		*m = *m0;
+	struct ip		*h = mtod(m, struct ip *);
+
+	/* Clear IP_DF if no-df was requested */
+	if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
+		u_int16_t ip_off = h->ip_off;
+
+		h->ip_off &= htons(~IP_DF);
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+	}
+
+	/* Enforce a minimum ttl, may cause endless packet loops */
+	if (min_ttl && h->ip_ttl < min_ttl) {
+		u_int16_t ip_ttl = h->ip_ttl;
+
+		h->ip_ttl = min_ttl;
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
+	}
+
+	/* Enforce tos */
+	if (flags & PFRULE_SET_TOS) {
+		u_int16_t	ov, nv;
+
+		ov = *(u_int16_t *)h;
+		h->ip_tos = tos;
+		nv = *(u_int16_t *)h;
+
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
+	}
+
+	/* random-id, but not for fragments */
+	if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
+		u_int16_t ip_id = h->ip_id;
+
+		h->ip_id = ip_randomid();
+		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
+	}
+}
+#endif /* INET */
+
+#ifdef INET6
+static void
+pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
+{
+	struct mbuf		*m = *m0;
+	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
+
+	/* Enforce a minimum ttl, may cause endless packet loops */
+	if (min_ttl && h->ip6_hlim < min_ttl)
+		h->ip6_hlim = min_ttl;
+}
+#endif
diff --git a/sys/netpfil/pf/pf_osfp.c b/sys/netpfil/pf/pf_osfp.c
new file mode 100644
index 0000000..29d4a40
--- /dev/null
+++ b/sys/netpfil/pf/pf_osfp.c
@@ -0,0 +1,526 @@
+/*	$OpenBSD: pf_osfp.c,v 1.14 2008/06/12 18:17:01 henning Exp $ */
+
+/*
+ * Copyright (c) 2003 Mike Frantzen <frantzen@w4g.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include <net/if.h>
+#include <net/pfvar.h>
+
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+
+static MALLOC_DEFINE(M_PFOSFP, "pf_osfp", "pf(4) operating system fingerprints");
+#define	DPFPRINTF(format, x...)		\
+	if (V_pf_status.debug >= PF_DEBUG_NOISY)	\
+		printf(format , ##x)
+
+SLIST_HEAD(pf_osfp_list, pf_os_fingerprint);
+static VNET_DEFINE(struct pf_osfp_list,	pf_osfp_list) =
+	SLIST_HEAD_INITIALIZER();
+#define	V_pf_osfp_list			VNET(pf_osfp_list)
+
+static struct pf_osfp_enlist	*pf_osfp_fingerprint_hdr(const struct ip *,
+				    const struct ip6_hdr *,
+				    const struct tcphdr *);
+static struct pf_os_fingerprint	*pf_osfp_find(struct pf_osfp_list *,
+				    struct pf_os_fingerprint *, u_int8_t);
+static struct pf_os_fingerprint	*pf_osfp_find_exact(struct pf_osfp_list *,
+				    struct pf_os_fingerprint *);
+static void			 pf_osfp_insert(struct pf_osfp_list *,
+				    struct pf_os_fingerprint *);
+#ifdef PFDEBUG
+static struct pf_os_fingerprint	*pf_osfp_validate(void);
+#endif
+
+/*
+ * Passively fingerprint the OS of the host (IPv4 TCP SYN packets only)
+ * Returns the list of possible OSes.
+ */
+struct pf_osfp_enlist *
+pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off,
+    const struct tcphdr *tcp)
+{
+	struct ip *ip;
+	struct ip6_hdr *ip6;
+	char hdr[60];
+
+	if ((pd->af != PF_INET && pd->af != PF_INET6) ||
+	    pd->proto != IPPROTO_TCP || (tcp->th_off << 2) < sizeof(*tcp))
+		return (NULL);
+
+	if (pd->af == PF_INET) {
+		ip = mtod(m, struct ip *);
+		ip6 = (struct ip6_hdr *)NULL;
+	} else {
+		ip = (struct ip *)NULL;
+		ip6 = mtod(m, struct ip6_hdr *);
+	}
+	if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL,
+	    pd->af)) return (NULL);
+
+	return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr));
+}
+
+static struct pf_osfp_enlist *
+pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const struct tcphdr *tcp)
+{
+	struct pf_os_fingerprint fp, *fpresult;
+	int cnt, optlen = 0;
+	const u_int8_t *optp;
+	char srcname[128];
+
+	if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN)
+		return (NULL);
+	if (ip) {
+		if ((ip->ip_off & htons(IP_OFFMASK)) != 0)
+			return (NULL);
+	}
+
+	memset(&fp, 0, sizeof(fp));
+
+	if (ip) {
+		fp.fp_psize = ntohs(ip->ip_len);
+		fp.fp_ttl = ip->ip_ttl;
+		if (ip->ip_off & htons(IP_DF))
+			fp.fp_flags |= PF_OSFP_DF;
+		strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname));
+	}
+#ifdef INET6
+	else if (ip6) {
+		/* jumbo payload? */
+		fp.fp_psize = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
+		fp.fp_ttl = ip6->ip6_hlim;
+		fp.fp_flags |= PF_OSFP_DF;
+		fp.fp_flags |= PF_OSFP_INET6;
+		strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src),
+		    sizeof(srcname));
+	}
+#endif
+	else
+		return (NULL);
+	fp.fp_wsize = ntohs(tcp->th_win);
+
+
+	cnt = (tcp->th_off << 2) - sizeof(*tcp);
+	optp = (const u_int8_t *)((const char *)tcp + sizeof(*tcp));
+	for (; cnt > 0; cnt -= optlen, optp += optlen) {
+		if (*optp == TCPOPT_EOL)
+			break;
+
+		fp.fp_optcnt++;
+		if (*optp == TCPOPT_NOP) {
+			fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) |
+			    PF_OSFP_TCPOPT_NOP;
+			optlen = 1;
+		} else {
+			if (cnt < 2)
+				return (NULL);
+			optlen = optp[1];
+			if (optlen > cnt || optlen < 2)
+				return (NULL);
+			switch (*optp) {
+			case TCPOPT_MAXSEG:
+				if (optlen >= TCPOLEN_MAXSEG)
+					memcpy(&fp.fp_mss, &optp[2],
+					    sizeof(fp.fp_mss));
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_MSS;
+				NTOHS(fp.fp_mss);
+				break;
+			case TCPOPT_WINDOW:
+				if (optlen >= TCPOLEN_WINDOW)
+					memcpy(&fp.fp_wscale, &optp[2],
+					    sizeof(fp.fp_wscale));
+				NTOHS(fp.fp_wscale);
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) |
+				    PF_OSFP_TCPOPT_WSCALE;
+				break;
+			case TCPOPT_SACK_PERMITTED:
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_SACK;
+				break;
+			case TCPOPT_TIMESTAMP:
+				if (optlen >= TCPOLEN_TIMESTAMP) {
+					u_int32_t ts;
+					memcpy(&ts, &optp[2], sizeof(ts));
+					if (ts == 0)
+						fp.fp_flags |= PF_OSFP_TS0;
+
+				}
+				fp.fp_tcpopts = (fp.fp_tcpopts <<
+				    PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_TS;
+				break;
+			default:
+				return (NULL);
+			}
+		}
+		optlen = MAX(optlen, 1);	/* paranoia */
+	}
+
+	DPFPRINTF("fingerprinted %s:%d  %d:%d:%d:%d:%llx (%d) "
+	    "(TS=%s,M=%s%d,W=%s%d)\n",
+	    srcname, ntohs(tcp->th_sport),
+	    fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0,
+	    fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt,
+	    (fp.fp_flags & PF_OSFP_TS0) ? "0" : "",
+	    (fp.fp_flags & PF_OSFP_MSS_MOD) ? "%" :
+	    (fp.fp_flags & PF_OSFP_MSS_DC) ? "*" : "",
+	    fp.fp_mss,
+	    (fp.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" :
+	    (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "",
+	    fp.fp_wscale);
+
+	if ((fpresult = pf_osfp_find(&V_pf_osfp_list, &fp,
+	    PF_OSFP_MAXTTL_OFFSET)))
+		return (&fpresult->fp_oses);
+	return (NULL);
+}
+
+/* Match a fingerprint ID against a list of OSes */
+int
+pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os)
+{
+	struct pf_osfp_entry *entry;
+	int os_class, os_version, os_subtype;
+	int en_class, en_version, en_subtype;
+
+	if (os == PF_OSFP_ANY)
+		return (1);
+	if (list == NULL) {
+		DPFPRINTF("osfp no match against %x\n", os);
+		return (os == PF_OSFP_UNKNOWN);
+	}
+	PF_OSFP_UNPACK(os, os_class, os_version, os_subtype);
+	SLIST_FOREACH(entry, list, fp_entry) {
+		PF_OSFP_UNPACK(entry->fp_os, en_class, en_version, en_subtype);
+		if ((os_class == PF_OSFP_ANY || en_class == os_class) &&
+		    (os_version == PF_OSFP_ANY || en_version == os_version) &&
+		    (os_subtype == PF_OSFP_ANY || en_subtype == os_subtype)) {
+			DPFPRINTF("osfp matched %s %s %s  %x==%x\n",
+			    entry->fp_class_nm, entry->fp_version_nm,
+			    entry->fp_subtype_nm, os, entry->fp_os);
+			return (1);
+		}
+	}
+	DPFPRINTF("fingerprint 0x%x didn't match\n", os);
+	return (0);
+}
+
+/* Flush the fingerprint list */
+void
+pf_osfp_flush(void)
+{
+	struct pf_os_fingerprint *fp;
+	struct pf_osfp_entry *entry;
+
+	while ((fp = SLIST_FIRST(&V_pf_osfp_list))) {
+		SLIST_REMOVE_HEAD(&V_pf_osfp_list, fp_next);
+		while ((entry = SLIST_FIRST(&fp->fp_oses))) {
+			SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry);
+			free(entry, M_PFOSFP);
+		}
+		free(fp, M_PFOSFP);
+	}
+}
+
+
+/* Add a fingerprint */
+int
+pf_osfp_add(struct pf_osfp_ioctl *fpioc)
+{
+	struct pf_os_fingerprint *fp, fpadd;
+	struct pf_osfp_entry *entry;
+
+	PF_RULES_WASSERT();
+
+	memset(&fpadd, 0, sizeof(fpadd));
+	fpadd.fp_tcpopts = fpioc->fp_tcpopts;
+	fpadd.fp_wsize = fpioc->fp_wsize;
+	fpadd.fp_psize = fpioc->fp_psize;
+	fpadd.fp_mss = fpioc->fp_mss;
+	fpadd.fp_flags = fpioc->fp_flags;
+	fpadd.fp_optcnt = fpioc->fp_optcnt;
+	fpadd.fp_wscale = fpioc->fp_wscale;
+	fpadd.fp_ttl = fpioc->fp_ttl;
+
+#if 0	/* XXX RYAN wants to fix logging */
+	DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d "
+	    "(TS=%s,M=%s%d,W=%s%d) %x\n",
+	    fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm,
+	    fpioc->fp_os.fp_subtype_nm,
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_MSS) ? "S" :
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_MTU) ? "T" :
+	    (fpadd.fp_flags & PF_OSFP_WSIZE_DC) ? "*" : "",
+	    fpadd.fp_wsize,
+	    fpadd.fp_ttl,
+	    (fpadd.fp_flags & PF_OSFP_DF) ? 1 : 0,
+	    (fpadd.fp_flags & PF_OSFP_PSIZE_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_PSIZE_DC) ? "*" : "",
+	    fpadd.fp_psize,
+	    (long long int)fpadd.fp_tcpopts, fpadd.fp_optcnt,
+	    (fpadd.fp_flags & PF_OSFP_TS0) ? "0" : "",
+	    (fpadd.fp_flags & PF_OSFP_MSS_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_MSS_DC) ? "*" : "",
+	    fpadd.fp_mss,
+	    (fpadd.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" :
+	    (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "",
+	    fpadd.fp_wscale,
+	    fpioc->fp_os.fp_os);
+#endif
+
+	if ((fp = pf_osfp_find_exact(&V_pf_osfp_list, &fpadd))) {
+		 SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) {
+			if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os))
+				return (EEXIST);
+		}
+		if ((entry = malloc(sizeof(*entry), M_PFOSFP, M_NOWAIT))
+		    == NULL)
+			return (ENOMEM);
+	} else {
+		if ((fp = malloc(sizeof(*fp), M_PFOSFP, M_ZERO | M_NOWAIT))
+		    == NULL)
+			return (ENOMEM);
+		fp->fp_tcpopts = fpioc->fp_tcpopts;
+		fp->fp_wsize = fpioc->fp_wsize;
+		fp->fp_psize = fpioc->fp_psize;
+		fp->fp_mss = fpioc->fp_mss;
+		fp->fp_flags = fpioc->fp_flags;
+		fp->fp_optcnt = fpioc->fp_optcnt;
+		fp->fp_wscale = fpioc->fp_wscale;
+		fp->fp_ttl = fpioc->fp_ttl;
+		SLIST_INIT(&fp->fp_oses);
+		if ((entry = malloc(sizeof(*entry), M_PFOSFP, M_NOWAIT))
+		    == NULL) {
+			free(fp, M_PFOSFP);
+			return (ENOMEM);
+		}
+		pf_osfp_insert(&V_pf_osfp_list, fp);
+	}
+	memcpy(entry, &fpioc->fp_os, sizeof(*entry));
+
+	/* Make sure the strings are NUL terminated */
+	entry->fp_class_nm[sizeof(entry->fp_class_nm)-1] = '\0';
+	entry->fp_version_nm[sizeof(entry->fp_version_nm)-1] = '\0';
+	entry->fp_subtype_nm[sizeof(entry->fp_subtype_nm)-1] = '\0';
+
+	SLIST_INSERT_HEAD(&fp->fp_oses, entry, fp_entry);
+
+#ifdef PFDEBUG
+	if ((fp = pf_osfp_validate()))
+		printf("Invalid fingerprint list\n");
+#endif /* PFDEBUG */
+	return (0);
+}
+
+
+/* Find a fingerprint in the list */
+static struct pf_os_fingerprint *
+pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find,
+    u_int8_t ttldiff)
+{
+	struct pf_os_fingerprint *f;
+
+#define	MATCH_INT(_MOD, _DC, _field)					\
+	if ((f->fp_flags & _DC) == 0) {					\
+		if ((f->fp_flags & _MOD) == 0) {			\
+			if (f->_field != find->_field)			\
+				continue;				\
+		} else {						\
+			if (f->_field == 0 || find->_field % f->_field)	\
+				continue;				\
+		}							\
+	}
+
+	SLIST_FOREACH(f, list, fp_next) {
+		if (f->fp_tcpopts != find->fp_tcpopts ||
+		    f->fp_optcnt != find->fp_optcnt ||
+		    f->fp_ttl < find->fp_ttl ||
+		    f->fp_ttl - find->fp_ttl > ttldiff ||
+		    (f->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)) !=
+		    (find->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)))
+			continue;
+
+		MATCH_INT(PF_OSFP_PSIZE_MOD, PF_OSFP_PSIZE_DC, fp_psize)
+		MATCH_INT(PF_OSFP_MSS_MOD, PF_OSFP_MSS_DC, fp_mss)
+		MATCH_INT(PF_OSFP_WSCALE_MOD, PF_OSFP_WSCALE_DC, fp_wscale)
+		if ((f->fp_flags & PF_OSFP_WSIZE_DC) == 0) {
+			if (f->fp_flags & PF_OSFP_WSIZE_MSS) {
+				if (find->fp_mss == 0)
+					continue;
+
+/*
+ * Some "smart" NAT devices and DSL routers will tweak the MSS size and
+ * will set it to whatever is suitable for the link type.
+ */
+#define	SMART_MSS	1460
+				if ((find->fp_wsize % find->fp_mss ||
+				    find->fp_wsize / find->fp_mss !=
+				    f->fp_wsize) &&
+				    (find->fp_wsize % SMART_MSS ||
+				    find->fp_wsize / SMART_MSS !=
+				    f->fp_wsize))
+					continue;
+			} else if (f->fp_flags & PF_OSFP_WSIZE_MTU) {
+				if (find->fp_mss == 0)
+					continue;
+
+#define	MTUOFF		(sizeof(struct ip) + sizeof(struct tcphdr))
+#define	SMART_MTU	(SMART_MSS + MTUOFF)
+				if ((find->fp_wsize % (find->fp_mss + MTUOFF) ||
+				    find->fp_wsize / (find->fp_mss + MTUOFF) !=
+				    f->fp_wsize) &&
+				    (find->fp_wsize % SMART_MTU ||
+				    find->fp_wsize / SMART_MTU !=
+				    f->fp_wsize))
+					continue;
+			} else if (f->fp_flags & PF_OSFP_WSIZE_MOD) {
+				if (f->fp_wsize == 0 || find->fp_wsize %
+				    f->fp_wsize)
+					continue;
+			} else {
+				if (f->fp_wsize != find->fp_wsize)
+					continue;
+			}
+		}
+		return (f);
+	}
+
+	return (NULL);
+}
+
+/* Find an exact fingerprint in the list */
+static struct pf_os_fingerprint *
+pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find)
+{
+	struct pf_os_fingerprint *f;
+
+	SLIST_FOREACH(f, list, fp_next) {
+		if (f->fp_tcpopts == find->fp_tcpopts &&
+		    f->fp_wsize == find->fp_wsize &&
+		    f->fp_psize == find->fp_psize &&
+		    f->fp_mss == find->fp_mss &&
+		    f->fp_flags == find->fp_flags &&
+		    f->fp_optcnt == find->fp_optcnt &&
+		    f->fp_wscale == find->fp_wscale &&
+		    f->fp_ttl == find->fp_ttl)
+			return (f);
+	}
+
+	return (NULL);
+}
+
+/* Insert a fingerprint into the list */
+static void
+pf_osfp_insert(struct pf_osfp_list *list, struct pf_os_fingerprint *ins)
+{
+	struct pf_os_fingerprint *f, *prev = NULL;
+
+	/* XXX need to go semi tree based.  can key on tcp options */
+
+	SLIST_FOREACH(f, list, fp_next)
+		prev = f;
+	if (prev)
+		SLIST_INSERT_AFTER(prev, ins, fp_next);
+	else
+		SLIST_INSERT_HEAD(list, ins, fp_next);
+}
+
+/* Fill a fingerprint by its number (from an ioctl) */
+int
+pf_osfp_get(struct pf_osfp_ioctl *fpioc)
+{
+	struct pf_os_fingerprint *fp;
+	struct pf_osfp_entry *entry;
+	int num = fpioc->fp_getnum;
+	int i = 0;
+
+
+	memset(fpioc, 0, sizeof(*fpioc));
+	SLIST_FOREACH(fp, &V_pf_osfp_list, fp_next) {
+		SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) {
+			if (i++ == num) {
+				fpioc->fp_mss = fp->fp_mss;
+				fpioc->fp_wsize = fp->fp_wsize;
+				fpioc->fp_flags = fp->fp_flags;
+				fpioc->fp_psize = fp->fp_psize;
+				fpioc->fp_ttl = fp->fp_ttl;
+				fpioc->fp_wscale = fp->fp_wscale;
+				fpioc->fp_getnum = num;
+				memcpy(&fpioc->fp_os, entry,
+				    sizeof(fpioc->fp_os));
+				return (0);
+			}
+		}
+	}
+
+	return (EBUSY);
+}
+
+
+#ifdef PFDEBUG
+/* Validate that each signature is reachable */
+static struct pf_os_fingerprint *
+pf_osfp_validate(void)
+{
+	struct pf_os_fingerprint *f, *f2, find;
+
+	SLIST_FOREACH(f, &V_pf_osfp_list, fp_next) {
+		memcpy(&find, f, sizeof(find));
+
+		/* We do a few MSS/th_win percolations to make things unique */
+		if (find.fp_mss == 0)
+			find.fp_mss = 128;
+		if (f->fp_flags & PF_OSFP_WSIZE_MSS)
+			find.fp_wsize *= find.fp_mss;
+		else if (f->fp_flags & PF_OSFP_WSIZE_MTU)
+			find.fp_wsize *= (find.fp_mss + 40);
+		else if (f->fp_flags & PF_OSFP_WSIZE_MOD)
+			find.fp_wsize *= 2;
+		if (f != (f2 = pf_osfp_find(&V_pf_osfp_list, &find, 0))) {
+			if (f2)
+				printf("Found \"%s %s %s\" instead of "
+				    "\"%s %s %s\"\n",
+				    SLIST_FIRST(&f2->fp_oses)->fp_class_nm,
+				    SLIST_FIRST(&f2->fp_oses)->fp_version_nm,
+				    SLIST_FIRST(&f2->fp_oses)->fp_subtype_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_class_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_version_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_subtype_nm);
+			else
+				printf("Couldn't find \"%s %s %s\"\n",
+				    SLIST_FIRST(&f->fp_oses)->fp_class_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_version_nm,
+				    SLIST_FIRST(&f->fp_oses)->fp_subtype_nm);
+			return (f);
+		}
+	}
+	return (NULL);
+}
+#endif /* PFDEBUG */
diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c
new file mode 100644
index 0000000..77652a6
--- /dev/null
+++ b/sys/netpfil/pf/pf_ruleset.c
@@ -0,0 +1,424 @@
+/*	$OpenBSD: pf_ruleset.c,v 1.2 2008/12/18 15:31:37 dhill Exp $ */
+
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002,2003 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#ifdef _KERNEL
+# include <sys/systm.h>
+# include <sys/refcount.h>
+#endif /* _KERNEL */
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include <net/if.h>
+#include <net/pfvar.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif /* INET6 */
+
+
+#ifdef _KERNEL
+#define DPFPRINTF(format, x...)				\
+	if (V_pf_status.debug >= PF_DEBUG_NOISY)	\
+		printf(format , ##x)
+#define rs_malloc(x)		malloc(x, M_TEMP, M_NOWAIT|M_ZERO)
+#define rs_free(x)		free(x, M_TEMP)
+
+#else
+/* Userland equivalents so we can lend code to pfctl et al. */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define rs_malloc(x)		 calloc(1, x)
+#define rs_free(x)		 free(x)
+
+#ifdef PFDEBUG
+#include <sys/stdarg.h>
+#define DPFPRINTF(format, x...)	fprintf(stderr, format , ##x)
+#else
+#define DPFPRINTF(format, x...)	((void)0)
+#endif /* PFDEBUG */
+#endif /* _KERNEL */
+
+#ifdef _KERNEL
+VNET_DEFINE(struct pf_anchor_global,	pf_anchors);
+VNET_DEFINE(struct pf_anchor,		pf_main_anchor);
+#else /* ! _KERNEL */
+struct pf_anchor_global	 pf_anchors;
+struct pf_anchor	 pf_main_anchor;
+#undef V_pf_anchors
+#define V_pf_anchors		 pf_anchors
+#undef pf_main_ruleset
+#define pf_main_ruleset		 pf_main_anchor.ruleset
+#endif /* _KERNEL */
+
+static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *);
+
+static struct pf_anchor		*pf_find_anchor(const char *);
+
+RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
+RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
+
+static __inline int
+pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b)
+{
+	int c = strcmp(a->path, b->path);
+
+	return (c ? (c < 0 ? -1 : 1) : 0);
+}
+
+int
+pf_get_ruleset_number(u_int8_t action)
+{
+	switch (action) {
+	case PF_SCRUB:
+	case PF_NOSCRUB:
+		return (PF_RULESET_SCRUB);
+		break;
+	case PF_PASS:
+	case PF_DROP:
+		return (PF_RULESET_FILTER);
+		break;
+	case PF_NAT:
+	case PF_NONAT:
+		return (PF_RULESET_NAT);
+		break;
+	case PF_BINAT:
+	case PF_NOBINAT:
+		return (PF_RULESET_BINAT);
+		break;
+	case PF_RDR:
+	case PF_NORDR:
+		return (PF_RULESET_RDR);
+		break;
+	default:
+		return (PF_RULESET_MAX);
+		break;
+	}
+}
+
+void
+pf_init_ruleset(struct pf_ruleset *ruleset)
+{
+	int	i;
+
+	memset(ruleset, 0, sizeof(struct pf_ruleset));
+	for (i = 0; i < PF_RULESET_MAX; i++) {
+		TAILQ_INIT(&ruleset->rules[i].queues[0]);
+		TAILQ_INIT(&ruleset->rules[i].queues[1]);
+		ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0];
+		ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1];
+	}
+}
+
+static struct pf_anchor *
+pf_find_anchor(const char *path)
+{
+	struct pf_anchor	*key, *found;
+
+	key = (struct pf_anchor *)rs_malloc(sizeof(*key));
+	if (key == NULL)
+		return (NULL);
+	strlcpy(key->path, path, sizeof(key->path));
+	found = RB_FIND(pf_anchor_global, &V_pf_anchors, key);
+	rs_free(key);
+	return (found);
+}
+
+struct pf_ruleset *
+pf_find_ruleset(const char *path)
+{
+	struct pf_anchor	*anchor;
+
+	while (*path == '/')
+		path++;
+	if (!*path)
+		return (&pf_main_ruleset);
+	anchor = pf_find_anchor(path);
+	if (anchor == NULL)
+		return (NULL);
+	else
+		return (&anchor->ruleset);
+}
+
+struct pf_ruleset *
+pf_find_or_create_ruleset(const char *path)
+{
+	char			*p, *q, *r;
+	struct pf_ruleset	*ruleset;
+	struct pf_anchor	*anchor = NULL, *dup, *parent = NULL;
+
+	if (path[0] == 0)
+		return (&pf_main_ruleset);
+	while (*path == '/')
+		path++;
+	ruleset = pf_find_ruleset(path);
+	if (ruleset != NULL)
+		return (ruleset);
+	p = (char *)rs_malloc(MAXPATHLEN);
+	if (p == NULL)
+		return (NULL);
+	strlcpy(p, path, MAXPATHLEN);
+	while (parent == NULL && (q = strrchr(p, '/')) != NULL) {
+		*q = 0;
+		if ((ruleset = pf_find_ruleset(p)) != NULL) {
+			parent = ruleset->anchor;
+			break;
+		}
+	}
+	if (q == NULL)
+		q = p;
+	else
+		q++;
+	strlcpy(p, path, MAXPATHLEN);
+	if (!*q) {
+		rs_free(p);
+		return (NULL);
+	}
+	while ((r = strchr(q, '/')) != NULL || *q) {
+		if (r != NULL)
+			*r = 0;
+		if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE ||
+		    (parent != NULL && strlen(parent->path) >=
+		    MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) {
+			rs_free(p);
+			return (NULL);
+		}
+		anchor = (struct pf_anchor *)rs_malloc(sizeof(*anchor));
+		if (anchor == NULL) {
+			rs_free(p);
+			return (NULL);
+		}
+		RB_INIT(&anchor->children);
+		strlcpy(anchor->name, q, sizeof(anchor->name));
+		if (parent != NULL) {
+			strlcpy(anchor->path, parent->path,
+			    sizeof(anchor->path));
+			strlcat(anchor->path, "/", sizeof(anchor->path));
+		}
+		strlcat(anchor->path, anchor->name, sizeof(anchor->path));
+		if ((dup = RB_INSERT(pf_anchor_global, &V_pf_anchors, anchor)) !=
+		    NULL) {
+			printf("pf_find_or_create_ruleset: RB_INSERT1 "
+			    "'%s' '%s' collides with '%s' '%s'\n",
+			    anchor->path, anchor->name, dup->path, dup->name);
+			rs_free(anchor);
+			rs_free(p);
+			return (NULL);
+		}
+		if (parent != NULL) {
+			anchor->parent = parent;
+			if ((dup = RB_INSERT(pf_anchor_node, &parent->children,
+			    anchor)) != NULL) {
+				printf("pf_find_or_create_ruleset: "
+				    "RB_INSERT2 '%s' '%s' collides with "
+				    "'%s' '%s'\n", anchor->path, anchor->name,
+				    dup->path, dup->name);
+				RB_REMOVE(pf_anchor_global, &V_pf_anchors,
+				    anchor);
+				rs_free(anchor);
+				rs_free(p);
+				return (NULL);
+			}
+		}
+		pf_init_ruleset(&anchor->ruleset);
+		anchor->ruleset.anchor = anchor;
+		parent = anchor;
+		if (r != NULL)
+			q = r + 1;
+		else
+			*q = 0;
+	}
+	rs_free(p);
+	return (&anchor->ruleset);
+}
+
+void
+pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset)
+{
+	struct pf_anchor	*parent;
+	int			 i;
+
+	while (ruleset != NULL) {
+		if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL ||
+		    !RB_EMPTY(&ruleset->anchor->children) ||
+		    ruleset->anchor->refcnt > 0 || ruleset->tables > 0 ||
+		    ruleset->topen)
+			return;
+		for (i = 0; i < PF_RULESET_MAX; ++i)
+			if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) ||
+			    !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) ||
+			    ruleset->rules[i].inactive.open)
+				return;
+		RB_REMOVE(pf_anchor_global, &V_pf_anchors, ruleset->anchor);
+		if ((parent = ruleset->anchor->parent) != NULL)
+			RB_REMOVE(pf_anchor_node, &parent->children,
+			    ruleset->anchor);
+		rs_free(ruleset->anchor);
+		if (parent == NULL)
+			return;
+		ruleset = &parent->ruleset;
+	}
+}
+
+int
+pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s,
+    const char *name)
+{
+	char			*p, *path;
+	struct pf_ruleset	*ruleset;
+
+	r->anchor = NULL;
+	r->anchor_relative = 0;
+	r->anchor_wildcard = 0;
+	if (!name[0])
+		return (0);
+	path = (char *)rs_malloc(MAXPATHLEN);
+	if (path == NULL)
+		return (1);
+	if (name[0] == '/')
+		strlcpy(path, name + 1, MAXPATHLEN);
+	else {
+		/* relative path */
+		r->anchor_relative = 1;
+		if (s->anchor == NULL || !s->anchor->path[0])
+			path[0] = 0;
+		else
+			strlcpy(path, s->anchor->path, MAXPATHLEN);
+		while (name[0] == '.' && name[1] == '.' && name[2] == '/') {
+			if (!path[0]) {
+				printf("pf_anchor_setup: .. beyond root\n");
+				rs_free(path);
+				return (1);
+			}
+			if ((p = strrchr(path, '/')) != NULL)
+				*p = 0;
+			else
+				path[0] = 0;
+			r->anchor_relative++;
+			name += 3;
+		}
+		if (path[0])
+			strlcat(path, "/", MAXPATHLEN);
+		strlcat(path, name, MAXPATHLEN);
+	}
+	if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) {
+		r->anchor_wildcard = 1;
+		*p = 0;
+	}
+	ruleset = pf_find_or_create_ruleset(path);
+	rs_free(path);
+	if (ruleset == NULL || ruleset->anchor == NULL) {
+		printf("pf_anchor_setup: ruleset\n");
+		return (1);
+	}
+	r->anchor = ruleset->anchor;
+	r->anchor->refcnt++;
+	return (0);
+}
+
+int
+pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r,
+    struct pfioc_rule *pr)
+{
+	pr->anchor_call[0] = 0;
+	if (r->anchor == NULL)
+		return (0);
+	if (!r->anchor_relative) {
+		strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call));
+		strlcat(pr->anchor_call, r->anchor->path,
+		    sizeof(pr->anchor_call));
+	} else {
+		char	*a, *p;
+		int	 i;
+
+		a = (char *)rs_malloc(MAXPATHLEN);
+		if (a == NULL)
+			return (1);
+		if (rs->anchor == NULL)
+			a[0] = 0;
+		else
+			strlcpy(a, rs->anchor->path, MAXPATHLEN);
+		for (i = 1; i < r->anchor_relative; ++i) {
+			if ((p = strrchr(a, '/')) == NULL)
+				p = a;
+			*p = 0;
+			strlcat(pr->anchor_call, "../",
+			    sizeof(pr->anchor_call));
+		}
+		if (strncmp(a, r->anchor->path, strlen(a))) {
+			printf("pf_anchor_copyout: '%s' '%s'\n", a,
+			    r->anchor->path);
+			rs_free(a);
+			return (1);
+		}
+		if (strlen(r->anchor->path) > strlen(a))
+			strlcat(pr->anchor_call, r->anchor->path + (a[0] ?
+			    strlen(a) + 1 : 0), sizeof(pr->anchor_call));
+		rs_free(a);
+	}
+	if (r->anchor_wildcard)
+		strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*",
+		    sizeof(pr->anchor_call));
+	return (0);
+}
+
+void
+pf_anchor_remove(struct pf_rule *r)
+{
+	if (r->anchor == NULL)
+		return;
+	if (r->anchor->refcnt <= 0) {
+		printf("pf_anchor_remove: broken refcount\n");
+		r->anchor = NULL;
+		return;
+	}
+	if (!--r->anchor->refcnt)
+		pf_remove_if_empty_ruleset(&r->anchor->ruleset);
+	r->anchor = NULL;
+}
diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c
new file mode 100644
index 0000000..fa88045
--- /dev/null
+++ b/sys/netpfil/pf/pf_table.c
@@ -0,0 +1,2191 @@
+/*	$OpenBSD: pf_table.c,v 1.79 2008/10/08 06:24:50 mcbride Exp $	*/
+
+/*
+ * Copyright (c) 2002 Cedric Berger
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/refcount.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <vm/uma.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+
+#define	ACCEPT_FLAGS(flags, oklist)		\
+	do {					\
+		if ((flags & ~(oklist)) &	\
+		    PFR_FLAG_ALLMASK)		\
+			return (EINVAL);	\
+	} while (0)
+
+#define	FILLIN_SIN(sin, addr)			\
+	do {					\
+		(sin).sin_len = sizeof(sin);	\
+		(sin).sin_family = AF_INET;	\
+		(sin).sin_addr = (addr);	\
+	} while (0)
+
+#define	FILLIN_SIN6(sin6, addr)			\
+	do {					\
+		(sin6).sin6_len = sizeof(sin6);	\
+		(sin6).sin6_family = AF_INET6;	\
+		(sin6).sin6_addr = (addr);	\
+	} while (0)
+
+#define	SWAP(type, a1, a2)			\
+	do {					\
+		type tmp = a1;			\
+		a1 = a2;			\
+		a2 = tmp;			\
+	} while (0)
+
+#define	SUNION2PF(su, af) (((af)==AF_INET) ?	\
+    (struct pf_addr *)&(su)->sin.sin_addr :	\
+    (struct pf_addr *)&(su)->sin6.sin6_addr)
+
+#define	AF_BITS(af)		(((af)==AF_INET)?32:128)
+#define	ADDR_NETWORK(ad)	((ad)->pfra_net < AF_BITS((ad)->pfra_af))
+#define	KENTRY_NETWORK(ke)	((ke)->pfrke_net < AF_BITS((ke)->pfrke_af))
+#define	KENTRY_RNF_ROOT(ke) \
+		((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0)
+
+#define	NO_ADDRESSES		(-1)
+#define	ENQUEUE_UNMARKED_ONLY	(1)
+#define	INVERT_NEG_FLAG		(1)
+
+struct pfr_walktree {
+	enum pfrw_op {
+		PFRW_MARK,
+		PFRW_SWEEP,
+		PFRW_ENQUEUE,
+		PFRW_GET_ADDRS,
+		PFRW_GET_ASTATS,
+		PFRW_POOL_GET,
+		PFRW_DYNADDR_UPDATE
+	}	 pfrw_op;
+	union {
+		struct pfr_addr		*pfrw1_addr;
+		struct pfr_astats	*pfrw1_astats;
+		struct pfr_kentryworkq	*pfrw1_workq;
+		struct pfr_kentry	*pfrw1_kentry;
+		struct pfi_dynaddr	*pfrw1_dyn;
+	}	 pfrw_1;
+	int	 pfrw_free;
+};
+#define	pfrw_addr	pfrw_1.pfrw1_addr
+#define	pfrw_astats	pfrw_1.pfrw1_astats
+#define	pfrw_workq	pfrw_1.pfrw1_workq
+#define	pfrw_kentry	pfrw_1.pfrw1_kentry
+#define	pfrw_dyn	pfrw_1.pfrw1_dyn
+#define	pfrw_cnt	pfrw_free
+
+#define	senderr(e)	do { rv = (e); goto _bad; } while (0)
+
+static MALLOC_DEFINE(M_PFTABLE, "pf_table", "pf(4) tables structures");
+static VNET_DEFINE(uma_zone_t, pfr_kentry_z);
+#define	V_pfr_kentry_z		VNET(pfr_kentry_z)
+static VNET_DEFINE(uma_zone_t, pfr_kcounters_z);
+#define	V_pfr_kcounters_z	VNET(pfr_kcounters_z)
+
+static struct pf_addr	 pfr_ffaddr = {
+	.addr32 = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }
+};
+
+static void		 pfr_copyout_addr(struct pfr_addr *,
+			    struct pfr_kentry *ke);
+static int		 pfr_validate_addr(struct pfr_addr *);
+static void		 pfr_enqueue_addrs(struct pfr_ktable *,
+			    struct pfr_kentryworkq *, int *, int);
+static void		 pfr_mark_addrs(struct pfr_ktable *);
+static struct pfr_kentry
+			*pfr_lookup_addr(struct pfr_ktable *,
+			    struct pfr_addr *, int);
+static struct pfr_kentry *pfr_create_kentry(struct pfr_addr *);
+static void		 pfr_destroy_kentries(struct pfr_kentryworkq *);
+static void		 pfr_destroy_kentry(struct pfr_kentry *);
+static void		 pfr_insert_kentries(struct pfr_ktable *,
+			    struct pfr_kentryworkq *, long);
+static void		 pfr_remove_kentries(struct pfr_ktable *,
+			    struct pfr_kentryworkq *);
+static void		 pfr_clstats_kentries(struct pfr_kentryworkq *, long,
+			    int);
+static void		 pfr_reset_feedback(struct pfr_addr *, int);
+static void		 pfr_prepare_network(union sockaddr_union *, int, int);
+static int		 pfr_route_kentry(struct pfr_ktable *,
+			    struct pfr_kentry *);
+static int		 pfr_unroute_kentry(struct pfr_ktable *,
+			    struct pfr_kentry *);
+static int		 pfr_walktree(struct radix_node *, void *);
+static int		 pfr_validate_table(struct pfr_table *, int, int);
+static int		 pfr_fix_anchor(char *);
+static void		 pfr_commit_ktable(struct pfr_ktable *, long);
+static void		 pfr_insert_ktables(struct pfr_ktableworkq *);
+static void		 pfr_insert_ktable(struct pfr_ktable *);
+static void		 pfr_setflags_ktables(struct pfr_ktableworkq *);
+static void		 pfr_setflags_ktable(struct pfr_ktable *, int);
+static void		 pfr_clstats_ktables(struct pfr_ktableworkq *, long,
+			    int);
+static void		 pfr_clstats_ktable(struct pfr_ktable *, long, int);
+static struct pfr_ktable
+			*pfr_create_ktable(struct pfr_table *, long, int);
+static void		 pfr_destroy_ktables(struct pfr_ktableworkq *, int);
+static void		 pfr_destroy_ktable(struct pfr_ktable *, int);
+static int		 pfr_ktable_compare(struct pfr_ktable *,
+			    struct pfr_ktable *);
+static struct pfr_ktable
+			*pfr_lookup_table(struct pfr_table *);
+static void		 pfr_clean_node_mask(struct pfr_ktable *,
+			    struct pfr_kentryworkq *);
+static int		 pfr_table_count(struct pfr_table *, int);
+static int		 pfr_skip_table(struct pfr_table *,
+			    struct pfr_ktable *, int);
+static struct pfr_kentry
+			*pfr_kentry_byidx(struct pfr_ktable *, int, int);
+
+static RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
+static RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
+
+struct pfr_ktablehead	 pfr_ktables;
+struct pfr_table	 pfr_nulltable;
+int			 pfr_ktable_cnt;
+
+void
+pfr_initialize(void)
+{
+
+	V_pfr_kentry_z = uma_zcreate("pf table entries",
+	    sizeof(struct pfr_kentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
+	    0);
+	V_pfr_kcounters_z = uma_zcreate("pf table counters",
+	    sizeof(struct pfr_kcounters), NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, 0);
+	V_pf_limits[PF_LIMIT_TABLE_ENTRIES].zone = V_pfr_kentry_z;
+	V_pf_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT;
+}
+
+void
+pfr_cleanup(void)
+{
+
+	uma_zdestroy(V_pfr_kentry_z);
+	uma_zdestroy(V_pfr_kcounters_z);
+}
+
+int
+pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentryworkq	 workq;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	pfr_enqueue_addrs(kt, &workq, ndel, 0);
+
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_remove_kentries(kt, &workq);
+		KASSERT(kt->pfrkt_cnt == 0, ("%s: non-null pfrkt_cnt", __func__));
+	}
+	return (0);
+}
+
+int
+pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *nadd, int flags)
+{
+	struct pfr_ktable	*kt, *tmpkt;
+	struct pfr_kentryworkq	 workq;
+	struct pfr_kentry	*p, *q;
+	struct pfr_addr		*ad;
+	int			 i, rv, xadd = 0;
+	long			 tzero = time_second;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
+	if (tmpkt == NULL)
+		return (ENOMEM);
+	SLIST_INIT(&workq);
+	for (i = 0, ad = addr; i < size; i++, ad++) {
+		if (pfr_validate_addr(ad))
+			senderr(EINVAL);
+		p = pfr_lookup_addr(kt, ad, 1);
+		q = pfr_lookup_addr(tmpkt, ad, 1);
+		if (flags & PFR_FLAG_FEEDBACK) {
+			if (q != NULL)
+				ad->pfra_fback = PFR_FB_DUPLICATE;
+			else if (p == NULL)
+				ad->pfra_fback = PFR_FB_ADDED;
+			else if (p->pfrke_not != ad->pfra_not)
+				ad->pfra_fback = PFR_FB_CONFLICT;
+			else
+				ad->pfra_fback = PFR_FB_NONE;
+		}
+		if (p == NULL && q == NULL) {
+			p = pfr_create_kentry(ad);
+			if (p == NULL)
+				senderr(ENOMEM);
+			if (pfr_route_kentry(tmpkt, p)) {
+				pfr_destroy_kentry(p);
+				ad->pfra_fback = PFR_FB_NONE;
+			} else {
+				SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+				xadd++;
+			}
+		}
+	}
+	pfr_clean_node_mask(tmpkt, &workq);
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_insert_kentries(kt, &workq, tzero);
+	else
+		pfr_destroy_kentries(&workq);
+	if (nadd != NULL)
+		*nadd = xadd;
+	pfr_destroy_ktable(tmpkt, 0);
+	return (0);
+_bad:
+	pfr_clean_node_mask(tmpkt, &workq);
+	pfr_destroy_kentries(&workq);
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size);
+	pfr_destroy_ktable(tmpkt, 0);
+	return (rv);
+}
+
+int
+pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *ndel, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentryworkq	 workq;
+	struct pfr_kentry	*p;
+	struct pfr_addr		*ad;
+	int			 i, rv, xdel = 0, log = 1;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	/*
+	 * there are two algorithms to choose from here.
+	 * with:
+	 *   n: number of addresses to delete
+	 *   N: number of addresses in the table
+	 *
+	 * one is O(N) and is better for large 'n'
+	 * one is O(n*LOG(N)) and is better for small 'n'
+	 *
+	 * following code try to decide which one is best.
+	 */
+	for (i = kt->pfrkt_cnt; i > 0; i >>= 1)
+		log++;
+	if (size > kt->pfrkt_cnt/log) {
+		/* full table scan */
+		pfr_mark_addrs(kt);
+	} else {
+		/* iterate over addresses to delete */
+		for (i = 0, ad = addr; i < size; i++, ad++) {
+			if (pfr_validate_addr(ad))
+				return (EINVAL);
+			p = pfr_lookup_addr(kt, ad, 1);
+			if (p != NULL)
+				p->pfrke_mark = 0;
+		}
+	}
+	SLIST_INIT(&workq);
+	for (i = 0, ad = addr; i < size; i++, ad++) {
+		if (pfr_validate_addr(ad))
+			senderr(EINVAL);
+		p = pfr_lookup_addr(kt, ad, 1);
+		if (flags & PFR_FLAG_FEEDBACK) {
+			if (p == NULL)
+				ad->pfra_fback = PFR_FB_NONE;
+			else if (p->pfrke_not != ad->pfra_not)
+				ad->pfra_fback = PFR_FB_CONFLICT;
+			else if (p->pfrke_mark)
+				ad->pfra_fback = PFR_FB_DUPLICATE;
+			else
+				ad->pfra_fback = PFR_FB_DELETED;
+		}
+		if (p != NULL && p->pfrke_not == ad->pfra_not &&
+		    !p->pfrke_mark) {
+			p->pfrke_mark = 1;
+			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+			xdel++;
+		}
+	}
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_remove_kentries(kt, &workq);
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+_bad:
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size);
+	return (rv);
+}
+
+int
+pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *size2, int *nadd, int *ndel, int *nchange, int flags,
+    u_int32_t ignore_pfrt_flags)
+{
+	struct pfr_ktable	*kt, *tmpkt;
+	struct pfr_kentryworkq	 addq, delq, changeq;
+	struct pfr_kentry	*p, *q;
+	struct pfr_addr		 ad;
+	int			 i, rv, xadd = 0, xdel = 0, xchange = 0;
+	long			 tzero = time_second;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, ignore_pfrt_flags, flags &
+	    PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+		return (EPERM);
+	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
+	if (tmpkt == NULL)
+		return (ENOMEM);
+	pfr_mark_addrs(kt);
+	SLIST_INIT(&addq);
+	SLIST_INIT(&delq);
+	SLIST_INIT(&changeq);
+	for (i = 0; i < size; i++) {
+		/*
+		 * XXXGL: undertand pf_if usage of this function
+		 * and make ad a moving pointer
+		 */
+		bcopy(addr + i, &ad, sizeof(ad));
+		if (pfr_validate_addr(&ad))
+			senderr(EINVAL);
+		ad.pfra_fback = PFR_FB_NONE;
+		p = pfr_lookup_addr(kt, &ad, 1);
+		if (p != NULL) {
+			if (p->pfrke_mark) {
+				ad.pfra_fback = PFR_FB_DUPLICATE;
+				goto _skip;
+			}
+			p->pfrke_mark = 1;
+			if (p->pfrke_not != ad.pfra_not) {
+				SLIST_INSERT_HEAD(&changeq, p, pfrke_workq);
+				ad.pfra_fback = PFR_FB_CHANGED;
+				xchange++;
+			}
+		} else {
+			q = pfr_lookup_addr(tmpkt, &ad, 1);
+			if (q != NULL) {
+				ad.pfra_fback = PFR_FB_DUPLICATE;
+				goto _skip;
+			}
+			p = pfr_create_kentry(&ad);
+			if (p == NULL)
+				senderr(ENOMEM);
+			if (pfr_route_kentry(tmpkt, p)) {
+				pfr_destroy_kentry(p);
+				ad.pfra_fback = PFR_FB_NONE;
+			} else {
+				SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
+				ad.pfra_fback = PFR_FB_ADDED;
+				xadd++;
+			}
+		}
+_skip:
+		if (flags & PFR_FLAG_FEEDBACK)
+			bcopy(&ad, addr + i, sizeof(ad));
+	}
+	pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY);
+	if ((flags & PFR_FLAG_FEEDBACK) && *size2) {
+		if (*size2 < size+xdel) {
+			*size2 = size+xdel;
+			senderr(0);
+		}
+		i = 0;
+		SLIST_FOREACH(p, &delq, pfrke_workq) {
+			pfr_copyout_addr(&ad, p);
+			ad.pfra_fback = PFR_FB_DELETED;
+			bcopy(&ad, addr + size + i, sizeof(ad));
+			i++;
+		}
+	}
+	pfr_clean_node_mask(tmpkt, &addq);
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_insert_kentries(kt, &addq, tzero);
+		pfr_remove_kentries(kt, &delq);
+		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
+	} else
+		pfr_destroy_kentries(&addq);
+	if (nadd != NULL)
+		*nadd = xadd;
+	if (ndel != NULL)
+		*ndel = xdel;
+	if (nchange != NULL)
+		*nchange = xchange;
+	if ((flags & PFR_FLAG_FEEDBACK) && size2)
+		*size2 = size+xdel;
+	pfr_destroy_ktable(tmpkt, 0);
+	return (0);
+_bad:
+	pfr_clean_node_mask(tmpkt, &addq);
+	pfr_destroy_kentries(&addq);
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size);
+	pfr_destroy_ktable(tmpkt, 0);
+	return (rv);
+}
+
+int
+pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+	int *nmatch, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentry	*p;
+	struct pfr_addr		*ad;
+	int			 i, xmatch = 0;
+
+	PF_RULES_RASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+
+	for (i = 0, ad = addr; i < size; i++, ad++) {
+		if (pfr_validate_addr(ad))
+			return (EINVAL);
+		if (ADDR_NETWORK(ad))
+			return (EINVAL);
+		p = pfr_lookup_addr(kt, ad, 0);
+		if (flags & PFR_FLAG_REPLACE)
+			pfr_copyout_addr(ad, p);
+		ad->pfra_fback = (p == NULL) ? PFR_FB_NONE :
+		    (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH);
+		if (p != NULL && !p->pfrke_not)
+			xmatch++;
+	}
+	if (nmatch != NULL)
+		*nmatch = xmatch;
+	return (0);
+}
+
+int
+pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size,
+	int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_walktree	 w;
+	int			 rv;
+
+	PF_RULES_RASSERT();
+
+	ACCEPT_FLAGS(flags, 0);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_cnt > *size) {
+		*size = kt->pfrkt_cnt;
+		return (0);
+	}
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_GET_ADDRS;
+	w.pfrw_addr = addr;
+	w.pfrw_free = kt->pfrkt_cnt;
+	rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+	if (!rv)
+		rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree,
+		    &w);
+	if (rv)
+		return (rv);
+
+	KASSERT(w.pfrw_free == 0, ("%s: corruption detected (%d)", __func__,
+	    w.pfrw_free));
+
+	*size = kt->pfrkt_cnt;
+	return (0);
+}
+
+int
+pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size,
+	int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_walktree	 w;
+	struct pfr_kentryworkq	 workq;
+	int			 rv;
+	long			 tzero = time_second;
+
+	PF_RULES_RASSERT();
+
+	/* XXX PFR_FLAG_CLSTATS disabled */
+	ACCEPT_FLAGS(flags, 0);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	if (kt->pfrkt_cnt > *size) {
+		*size = kt->pfrkt_cnt;
+		return (0);
+	}
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_GET_ASTATS;
+	w.pfrw_astats = addr;
+	w.pfrw_free = kt->pfrkt_cnt;
+	rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+	if (!rv)
+		rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree,
+		    &w);
+	if (!rv && (flags & PFR_FLAG_CLSTATS)) {
+		pfr_enqueue_addrs(kt, &workq, NULL, 0);
+		pfr_clstats_kentries(&workq, tzero, 0);
+	}
+	if (rv)
+		return (rv);
+
+	if (w.pfrw_free) {
+		printf("pfr_get_astats: corruption detected (%d).\n",
+		    w.pfrw_free);
+		return (ENOTTY);
+	}
+	*size = kt->pfrkt_cnt;
+	return (0);
+}
+
+int
+pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *nzero, int flags)
+{
+	struct pfr_ktable	*kt;
+	struct pfr_kentryworkq	 workq;
+	struct pfr_kentry	*p;
+	struct pfr_addr		*ad;
+	int			 i, rv, xzero = 0;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+	if (pfr_validate_table(tbl, 0, 0))
+		return (EINVAL);
+	kt = pfr_lookup_table(tbl);
+	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (ESRCH);
+	SLIST_INIT(&workq);
+	for (i = 0, ad = addr; i < size; i++, ad++) {
+		if (pfr_validate_addr(ad))
+			senderr(EINVAL);
+		p = pfr_lookup_addr(kt, ad, 1);
+		if (flags & PFR_FLAG_FEEDBACK) {
+			ad->pfra_fback = (p != NULL) ?
+			    PFR_FB_CLEARED : PFR_FB_NONE;
+		}
+		if (p != NULL) {
+			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+			xzero++;
+		}
+	}
+
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_clstats_kentries(&workq, 0, 0);
+	if (nzero != NULL)
+		*nzero = xzero;
+	return (0);
+_bad:
+	if (flags & PFR_FLAG_FEEDBACK)
+		pfr_reset_feedback(addr, size);
+	return (rv);
+}
+
+static int
+pfr_validate_addr(struct pfr_addr *ad)
+{
+	int i;
+
+	switch (ad->pfra_af) {
+#ifdef INET
+	case AF_INET:
+		if (ad->pfra_net > 32)
+			return (-1);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (ad->pfra_net > 128)
+			return (-1);
+		break;
+#endif /* INET6 */
+	default:
+		return (-1);
+	}
+	if (ad->pfra_net < 128 &&
+		(((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8))))
+			return (-1);
+	for (i = (ad->pfra_net+7)/8; i < sizeof(ad->pfra_u); i++)
+		if (((caddr_t)ad)[i])
+			return (-1);
+	if (ad->pfra_not && ad->pfra_not != 1)
+		return (-1);
+	if (ad->pfra_fback)
+		return (-1);
+	return (0);
+}
+
+static void
+pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq,
+	int *naddr, int sweep)
+{
+	struct pfr_walktree	w;
+
+	SLIST_INIT(workq);
+	bzero(&w, sizeof(w));
+	w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE;
+	w.pfrw_workq = workq;
+	if (kt->pfrkt_ip4 != NULL)
+		if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree,
+		    &w))
+			printf("pfr_enqueue_addrs: IPv4 walktree failed.\n");
+	if (kt->pfrkt_ip6 != NULL)
+		if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree,
+		    &w))
+			printf("pfr_enqueue_addrs: IPv6 walktree failed.\n");
+	if (naddr != NULL)
+		*naddr = w.pfrw_cnt;
+}
+
+static void
+pfr_mark_addrs(struct pfr_ktable *kt)
+{
+	struct pfr_walktree	w;
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_MARK;
+	if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w))
+		printf("pfr_mark_addrs: IPv4 walktree failed.\n");
+	if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w))
+		printf("pfr_mark_addrs: IPv6 walktree failed.\n");
+}
+
+
+static struct pfr_kentry *
+pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
+{
+	union sockaddr_union	 sa, mask;
+	struct radix_node_head	*head = NULL;
+	struct pfr_kentry	*ke;
+
+	bzero(&sa, sizeof(sa));
+	if (ad->pfra_af == AF_INET) {
+		FILLIN_SIN(sa.sin, ad->pfra_ip4addr);
+		head = kt->pfrkt_ip4;
+	} else if ( ad->pfra_af == AF_INET6 ) {
+		FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr);
+		head = kt->pfrkt_ip6;
+	}
+	if (ADDR_NETWORK(ad)) {
+		pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net);
+		ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+	} else {
+		ke = (struct pfr_kentry *)rn_match(&sa, head);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		if (exact && ke && KENTRY_NETWORK(ke))
+			ke = NULL;
+	}
+	return (ke);
+}
+
+static struct pfr_kentry *
+pfr_create_kentry(struct pfr_addr *ad)
+{
+	struct pfr_kentry	*ke;
+
+	ke =  uma_zalloc(V_pfr_kentry_z, M_NOWAIT | M_ZERO);
+	if (ke == NULL)
+		return (NULL);
+
+	if (ad->pfra_af == AF_INET)
+		FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr);
+	else if (ad->pfra_af == AF_INET6)
+		FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr);
+	ke->pfrke_af = ad->pfra_af;
+	ke->pfrke_net = ad->pfra_net;
+	ke->pfrke_not = ad->pfra_not;
+	return (ke);
+}
+
+static void
+pfr_destroy_kentries(struct pfr_kentryworkq *workq)
+{
+	struct pfr_kentry	*p, *q;
+
+	for (p = SLIST_FIRST(workq); p != NULL; p = q) {
+		q = SLIST_NEXT(p, pfrke_workq);
+		pfr_destroy_kentry(p);
+	}
+}
+
+static void
+pfr_destroy_kentry(struct pfr_kentry *ke)
+{
+	if (ke->pfrke_counters)
+		uma_zfree(V_pfr_kcounters_z, ke->pfrke_counters);
+	uma_zfree(V_pfr_kentry_z, ke);
+}
+
+static void
+pfr_insert_kentries(struct pfr_ktable *kt,
+    struct pfr_kentryworkq *workq, long tzero)
+{
+	struct pfr_kentry	*p;
+	int			 rv, n = 0;
+
+	SLIST_FOREACH(p, workq, pfrke_workq) {
+		rv = pfr_route_kentry(kt, p);
+		if (rv) {
+			printf("pfr_insert_kentries: cannot route entry "
+			    "(code=%d).\n", rv);
+			break;
+		}
+		p->pfrke_tzero = tzero;
+		n++;
+	}
+	kt->pfrkt_cnt += n;
+}
+
+int
+pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero)
+{
+	struct pfr_kentry	*p;
+	int			 rv;
+
+	p = pfr_lookup_addr(kt, ad, 1);
+	if (p != NULL)
+		return (0);
+	p = pfr_create_kentry(ad);
+	if (p == NULL)
+		return (EINVAL);
+
+	rv = pfr_route_kentry(kt, p);
+	if (rv)
+		return (rv);
+
+	p->pfrke_tzero = tzero;
+	kt->pfrkt_cnt++;
+
+	return (0);
+}
+
+static void
+pfr_remove_kentries(struct pfr_ktable *kt,
+    struct pfr_kentryworkq *workq)
+{
+	struct pfr_kentry	*p;
+	int			 n = 0;
+
+	SLIST_FOREACH(p, workq, pfrke_workq) {
+		pfr_unroute_kentry(kt, p);
+		n++;
+	}
+	kt->pfrkt_cnt -= n;
+	pfr_destroy_kentries(workq);
+}
+
+static void
+pfr_clean_node_mask(struct pfr_ktable *kt,
+    struct pfr_kentryworkq *workq)
+{
+	struct pfr_kentry	*p;
+
+	SLIST_FOREACH(p, workq, pfrke_workq)
+		pfr_unroute_kentry(kt, p);
+}
+
+static void
+pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange)
+{
+	struct pfr_kentry	*p;
+
+	SLIST_FOREACH(p, workq, pfrke_workq) {
+		if (negchange)
+			p->pfrke_not = !p->pfrke_not;
+		if (p->pfrke_counters) {
+			uma_zfree(V_pfr_kcounters_z, p->pfrke_counters);
+			p->pfrke_counters = NULL;
+		}
+		p->pfrke_tzero = tzero;
+	}
+}
+
+static void
+pfr_reset_feedback(struct pfr_addr *addr, int size)
+{
+	struct pfr_addr	*ad;
+	int		i;
+
+	for (i = 0, ad = addr; i < size; i++, ad++)
+		ad->pfra_fback = PFR_FB_NONE;
+}
+
+static void
+pfr_prepare_network(union sockaddr_union *sa, int af, int net)
+{
+	int	i;
+
+	bzero(sa, sizeof(*sa));
+	if (af == AF_INET) {
+		sa->sin.sin_len = sizeof(sa->sin);
+		sa->sin.sin_family = AF_INET;
+		sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0;
+	} else if (af == AF_INET6) {
+		sa->sin6.sin6_len = sizeof(sa->sin6);
+		sa->sin6.sin6_family = AF_INET6;
+		for (i = 0; i < 4; i++) {
+			if (net <= 32) {
+				sa->sin6.sin6_addr.s6_addr32[i] =
+				    net ? htonl(-1 << (32-net)) : 0;
+				break;
+			}
+			sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF;
+			net -= 32;
+		}
+	}
+}
+
+static int
+pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
+{
+	union sockaddr_union	 mask;
+	struct radix_node	*rn;
+	struct radix_node_head	*head = NULL;
+
+	bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
+	if (ke->pfrke_af == AF_INET)
+		head = kt->pfrkt_ip4;
+	else if (ke->pfrke_af == AF_INET6)
+		head = kt->pfrkt_ip6;
+
+	if (KENTRY_NETWORK(ke)) {
+		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
+		rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node);
+	} else
+		rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node);
+
+	return (rn == NULL ? -1 : 0);
+}
+
+static int
+pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
+{
+	union sockaddr_union	 mask;
+	struct radix_node	*rn;
+	struct radix_node_head	*head = NULL;
+
+	if (ke->pfrke_af == AF_INET)
+		head = kt->pfrkt_ip4;
+	else if (ke->pfrke_af == AF_INET6)
+		head = kt->pfrkt_ip6;
+
+	if (KENTRY_NETWORK(ke)) {
+		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
+		rn = rn_delete(&ke->pfrke_sa, &mask, head);
+	} else
+		rn = rn_delete(&ke->pfrke_sa, NULL, head);
+
+	if (rn == NULL) {
+		printf("pfr_unroute_kentry: delete failed.\n");
+		return (-1);
+	}
+	return (0);
+}
+
+static void
+pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke)
+{
+	bzero(ad, sizeof(*ad));
+	if (ke == NULL)
+		return;
+	ad->pfra_af = ke->pfrke_af;
+	ad->pfra_net = ke->pfrke_net;
+	ad->pfra_not = ke->pfrke_not;
+	if (ad->pfra_af == AF_INET)
+		ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr;
+	else if (ad->pfra_af == AF_INET6)
+		ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr;
+}
+
+static int
+pfr_walktree(struct radix_node *rn, void *arg)
+{
+	struct pfr_kentry	*ke = (struct pfr_kentry *)rn;
+	struct pfr_walktree	*w = arg;
+
+	switch (w->pfrw_op) {
+	case PFRW_MARK:
+		ke->pfrke_mark = 0;
+		break;
+	case PFRW_SWEEP:
+		if (ke->pfrke_mark)
+			break;
+		/* FALLTHROUGH */
+	case PFRW_ENQUEUE:
+		SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq);
+		w->pfrw_cnt++;
+		break;
+	case PFRW_GET_ADDRS:
+		if (w->pfrw_free-- > 0) {
+			pfr_copyout_addr(w->pfrw_addr, ke);
+			w->pfrw_addr++;
+		}
+		break;
+	case PFRW_GET_ASTATS:
+		if (w->pfrw_free-- > 0) {
+			struct pfr_astats as;
+
+			pfr_copyout_addr(&as.pfras_a, ke);
+
+			if (ke->pfrke_counters) {
+				bcopy(ke->pfrke_counters->pfrkc_packets,
+				    as.pfras_packets, sizeof(as.pfras_packets));
+				bcopy(ke->pfrke_counters->pfrkc_bytes,
+				    as.pfras_bytes, sizeof(as.pfras_bytes));
+			} else {
+				bzero(as.pfras_packets, sizeof(as.pfras_packets));
+				bzero(as.pfras_bytes, sizeof(as.pfras_bytes));
+				as.pfras_a.pfra_fback = PFR_FB_NOCOUNT;
+			}
+			as.pfras_tzero = ke->pfrke_tzero;
+
+			bcopy(&as, w->pfrw_astats, sizeof(as));
+			w->pfrw_astats++;
+		}
+		break;
+	case PFRW_POOL_GET:
+		if (ke->pfrke_not)
+			break; /* negative entries are ignored */
+		if (!w->pfrw_cnt--) {
+			w->pfrw_kentry = ke;
+			return (1); /* finish search */
+		}
+		break;
+	case PFRW_DYNADDR_UPDATE:
+	    {
+		union sockaddr_union	pfr_mask;
+
+		if (ke->pfrke_af == AF_INET) {
+			if (w->pfrw_dyn->pfid_acnt4++ > 0)
+				break;
+			pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net);
+			w->pfrw_dyn->pfid_addr4 = *SUNION2PF(&ke->pfrke_sa,
+			    AF_INET);
+			w->pfrw_dyn->pfid_mask4 = *SUNION2PF(&pfr_mask,
+			    AF_INET);
+		} else if (ke->pfrke_af == AF_INET6){
+			if (w->pfrw_dyn->pfid_acnt6++ > 0)
+				break;
+			pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net);
+			w->pfrw_dyn->pfid_addr6 = *SUNION2PF(&ke->pfrke_sa,
+			    AF_INET6);
+			w->pfrw_dyn->pfid_mask6 = *SUNION2PF(&pfr_mask,
+			    AF_INET6);
+		}
+		break;
+	    }
+	}
+	return (0);
+}
+
+int
+pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p;
+	int			 xdel = 0;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ALLRSETS);
+	if (pfr_fix_anchor(filter->pfrt_anchor))
+		return (EINVAL);
+	if (pfr_table_count(filter, flags) < 0)
+		return (ENOENT);
+
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (pfr_skip_table(filter, p, flags))
+			continue;
+		if (!strcmp(p->pfrkt_anchor, PF_RESERVED_ANCHOR))
+			continue;
+		if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE))
+			continue;
+		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		xdel++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_setflags_ktables(&workq);
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
+{
+	struct pfr_ktableworkq	 addq, changeq;
+	struct pfr_ktable	*p, *q, *r, key;
+	int			 i, rv, xadd = 0;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	SLIST_INIT(&addq);
+	SLIST_INIT(&changeq);
+	for (i = 0; i < size; i++) {
+		bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+		if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK,
+		    flags & PFR_FLAG_USERIOCTL))
+			senderr(EINVAL);
+		key.pfrkt_flags |= PFR_TFLAG_ACTIVE;
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p == NULL) {
+			p = pfr_create_ktable(&key.pfrkt_t, tzero, 1);
+			if (p == NULL)
+				senderr(ENOMEM);
+			SLIST_FOREACH(q, &addq, pfrkt_workq) {
+				if (!pfr_ktable_compare(p, q))
+					goto _skip;
+			}
+			SLIST_INSERT_HEAD(&addq, p, pfrkt_workq);
+			xadd++;
+			if (!key.pfrkt_anchor[0])
+				goto _skip;
+
+			/* find or create root table */
+			bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor));
+			r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+			if (r != NULL) {
+				p->pfrkt_root = r;
+				goto _skip;
+			}
+			SLIST_FOREACH(q, &addq, pfrkt_workq) {
+				if (!pfr_ktable_compare(&key, q)) {
+					p->pfrkt_root = q;
+					goto _skip;
+				}
+			}
+			key.pfrkt_flags = 0;
+			r = pfr_create_ktable(&key.pfrkt_t, 0, 1);
+			if (r == NULL)
+				senderr(ENOMEM);
+			SLIST_INSERT_HEAD(&addq, r, pfrkt_workq);
+			p->pfrkt_root = r;
+		} else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+			SLIST_FOREACH(q, &changeq, pfrkt_workq)
+				if (!pfr_ktable_compare(&key, q))
+					goto _skip;
+			p->pfrkt_nflags = (p->pfrkt_flags &
+			    ~PFR_TFLAG_USRMASK) | key.pfrkt_flags;
+			SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq);
+			xadd++;
+		}
+_skip:
+	;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_insert_ktables(&addq);
+		pfr_setflags_ktables(&changeq);
+	} else
+		 pfr_destroy_ktables(&addq, 0);
+	if (nadd != NULL)
+		*nadd = xadd;
+	return (0);
+_bad:
+	pfr_destroy_ktables(&addq, 0);
+	return (rv);
+}
+
+int
+pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p, *q, key;
+	int			 i, xdel = 0;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+		if (pfr_validate_table(&key.pfrkt_t, 0,
+		    flags & PFR_FLAG_USERIOCTL))
+			return (EINVAL);
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+			SLIST_FOREACH(q, &workq, pfrkt_workq)
+				if (!pfr_ktable_compare(p, q))
+					goto _skip;
+			p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
+			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+			xdel++;
+		}
+_skip:
+	;
+	}
+
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_setflags_ktables(&workq);
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size,
+	int flags)
+{
+	struct pfr_ktable	*p;
+	int			 n, nn;
+
+	PF_RULES_RASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
+	if (pfr_fix_anchor(filter->pfrt_anchor))
+		return (EINVAL);
+	n = nn = pfr_table_count(filter, flags);
+	if (n < 0)
+		return (ENOENT);
+	if (n > *size) {
+		*size = n;
+		return (0);
+	}
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (pfr_skip_table(filter, p, flags))
+			continue;
+		if (n-- <= 0)
+			continue;
+		bcopy(&p->pfrkt_t, tbl++, sizeof(*tbl));
+	}
+
+	KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n));
+
+	*size = nn;
+	return (0);
+}
+
+int
+pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
+	int flags)
+{
+	struct pfr_ktable	*p;
+	struct pfr_ktableworkq	 workq;
+	int			 n, nn;
+	long			 tzero = time_second;
+
+	/* XXX PFR_FLAG_CLSTATS disabled */
+	ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
+	if (pfr_fix_anchor(filter->pfrt_anchor))
+		return (EINVAL);
+	n = nn = pfr_table_count(filter, flags);
+	if (n < 0)
+		return (ENOENT);
+	if (n > *size) {
+		*size = n;
+		return (0);
+	}
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (pfr_skip_table(filter, p, flags))
+			continue;
+		if (n-- <= 0)
+			continue;
+		bcopy(&p->pfrkt_ts, tbl++, sizeof(*tbl));
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+	}
+	if (flags & PFR_FLAG_CLSTATS)
+		pfr_clstats_ktables(&workq, tzero,
+		    flags & PFR_FLAG_ADDRSTOO);
+
+	KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n));
+
+	*size = nn;
+	return (0);
+}
+
+int
+pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p, key;
+	int			 i, xzero = 0;
+	long			 tzero = time_second;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+		if (pfr_validate_table(&key.pfrkt_t, 0, 0))
+			return (EINVAL);
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p != NULL) {
+			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+			xzero++;
+		}
+	}
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO);
+	if (nzero != NULL)
+		*nzero = xzero;
+	return (0);
+}
+
+int
+pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag,
+	int *nchange, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p, *q, key;
+	int			 i, xchange = 0, xdel = 0;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	if ((setflag & ~PFR_TFLAG_USRMASK) ||
+	    (clrflag & ~PFR_TFLAG_USRMASK) ||
+	    (setflag & clrflag))
+		return (EINVAL);
+	SLIST_INIT(&workq);
+	for (i = 0; i < size; i++) {
+		bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+		if (pfr_validate_table(&key.pfrkt_t, 0,
+		    flags & PFR_FLAG_USERIOCTL))
+			return (EINVAL);
+		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+			p->pfrkt_nflags = (p->pfrkt_flags | setflag) &
+			    ~clrflag;
+			if (p->pfrkt_nflags == p->pfrkt_flags)
+				goto _skip;
+			SLIST_FOREACH(q, &workq, pfrkt_workq)
+				if (!pfr_ktable_compare(p, q))
+					goto _skip;
+			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+			if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) &&
+			    (clrflag & PFR_TFLAG_PERSIST) &&
+			    !(p->pfrkt_flags & PFR_TFLAG_REFERENCED))
+				xdel++;
+			else
+				xchange++;
+		}
+_skip:
+	;
+	}
+	if (!(flags & PFR_FLAG_DUMMY))
+		pfr_setflags_ktables(&workq);
+	if (nchange != NULL)
+		*nchange = xchange;
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p;
+	struct pf_ruleset	*rs;
+	int			 xdel = 0;
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	rs = pf_find_or_create_ruleset(trs->pfrt_anchor);
+	if (rs == NULL)
+		return (ENOMEM);
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+		    pfr_skip_table(trs, p, 0))
+			continue;
+		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		xdel++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_setflags_ktables(&workq);
+		if (ticket != NULL)
+			*ticket = ++rs->tticket;
+		rs->topen = 1;
+	} else
+		pf_remove_if_empty_ruleset(rs);
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+    int *nadd, int *naddr, u_int32_t ticket, int flags)
+{
+	struct pfr_ktableworkq	 tableq;
+	struct pfr_kentryworkq	 addrq;
+	struct pfr_ktable	*kt, *rt, *shadow, key;
+	struct pfr_kentry	*p;
+	struct pfr_addr		*ad;
+	struct pf_ruleset	*rs;
+	int			 i, rv, xadd = 0, xaddr = 0;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
+	if (size && !(flags & PFR_FLAG_ADDRSTOO))
+		return (EINVAL);
+	if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK,
+	    flags & PFR_FLAG_USERIOCTL))
+		return (EINVAL);
+	rs = pf_find_ruleset(tbl->pfrt_anchor);
+	if (rs == NULL || !rs->topen || ticket != rs->tticket)
+		return (EBUSY);
+	tbl->pfrt_flags |= PFR_TFLAG_INACTIVE;
+	SLIST_INIT(&tableq);
+	kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl);
+	if (kt == NULL) {
+		kt = pfr_create_ktable(tbl, 0, 1);
+		if (kt == NULL)
+			return (ENOMEM);
+		SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq);
+		xadd++;
+		if (!tbl->pfrt_anchor[0])
+			goto _skip;
+
+		/* find or create root table */
+		bzero(&key, sizeof(key));
+		strlcpy(key.pfrkt_name, tbl->pfrt_name, sizeof(key.pfrkt_name));
+		rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+		if (rt != NULL) {
+			kt->pfrkt_root = rt;
+			goto _skip;
+		}
+		rt = pfr_create_ktable(&key.pfrkt_t, 0, 1);
+		if (rt == NULL) {
+			pfr_destroy_ktables(&tableq, 0);
+			return (ENOMEM);
+		}
+		SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq);
+		kt->pfrkt_root = rt;
+	} else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE))
+		xadd++;
+_skip:
+	shadow = pfr_create_ktable(tbl, 0, 0);
+	if (shadow == NULL) {
+		pfr_destroy_ktables(&tableq, 0);
+		return (ENOMEM);
+	}
+	SLIST_INIT(&addrq);
+	for (i = 0, ad = addr; i < size; i++, ad++) {
+		if (pfr_validate_addr(ad))
+			senderr(EINVAL);
+		if (pfr_lookup_addr(shadow, ad, 1) != NULL)
+			continue;
+		p = pfr_create_kentry(ad);
+		if (p == NULL)
+			senderr(ENOMEM);
+		if (pfr_route_kentry(shadow, p)) {
+			pfr_destroy_kentry(p);
+			continue;
+		}
+		SLIST_INSERT_HEAD(&addrq, p, pfrke_workq);
+		xaddr++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		if (kt->pfrkt_shadow != NULL)
+			pfr_destroy_ktable(kt->pfrkt_shadow, 1);
+		kt->pfrkt_flags |= PFR_TFLAG_INACTIVE;
+		pfr_insert_ktables(&tableq);
+		shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ?
+		    xaddr : NO_ADDRESSES;
+		kt->pfrkt_shadow = shadow;
+	} else {
+		pfr_clean_node_mask(shadow, &addrq);
+		pfr_destroy_ktable(shadow, 0);
+		pfr_destroy_ktables(&tableq, 0);
+		pfr_destroy_kentries(&addrq);
+	}
+	if (nadd != NULL)
+		*nadd = xadd;
+	if (naddr != NULL)
+		*naddr = xaddr;
+	return (0);
+_bad:
+	pfr_destroy_ktable(shadow, 0);
+	pfr_destroy_ktables(&tableq, 0);
+	pfr_destroy_kentries(&addrq);
+	return (rv);
+}
+
+int
+pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags)
+{
+	struct pfr_ktableworkq	 workq;
+	struct pfr_ktable	*p;
+	struct pf_ruleset	*rs;
+	int			 xdel = 0;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	rs = pf_find_ruleset(trs->pfrt_anchor);
+	if (rs == NULL || !rs->topen || ticket != rs->tticket)
+		return (0);
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+		    pfr_skip_table(trs, p, 0))
+			continue;
+		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		xdel++;
+	}
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		pfr_setflags_ktables(&workq);
+		rs->topen = 0;
+		pf_remove_if_empty_ruleset(rs);
+	}
+	if (ndel != NULL)
+		*ndel = xdel;
+	return (0);
+}
+
+int
+pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd,
+    int *nchange, int flags)
+{
+	struct pfr_ktable	*p, *q;
+	struct pfr_ktableworkq	 workq;
+	struct pf_ruleset	*rs;
+	int			 xadd = 0, xchange = 0;
+	long			 tzero = time_second;
+
+	PF_RULES_WASSERT();
+
+	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+	rs = pf_find_ruleset(trs->pfrt_anchor);
+	if (rs == NULL || !rs->topen || ticket != rs->tticket)
+		return (EBUSY);
+
+	SLIST_INIT(&workq);
+	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+		    pfr_skip_table(trs, p, 0))
+			continue;
+		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+		if (p->pfrkt_flags & PFR_TFLAG_ACTIVE)
+			xchange++;
+		else
+			xadd++;
+	}
+
+	if (!(flags & PFR_FLAG_DUMMY)) {
+		for (p = SLIST_FIRST(&workq); p != NULL; p = q) {
+			q = SLIST_NEXT(p, pfrkt_workq);
+			pfr_commit_ktable(p, tzero);
+		}
+		rs->topen = 0;
+		pf_remove_if_empty_ruleset(rs);
+	}
+	if (nadd != NULL)
+		*nadd = xadd;
+	if (nchange != NULL)
+		*nchange = xchange;
+
+	return (0);
+}
+
+static void
+pfr_commit_ktable(struct pfr_ktable *kt, long tzero)
+{
+	struct pfr_ktable	*shadow = kt->pfrkt_shadow;
+	int			 nflags;
+
+	PF_RULES_WASSERT();
+
+	if (shadow->pfrkt_cnt == NO_ADDRESSES) {
+		if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+			pfr_clstats_ktable(kt, tzero, 1);
+	} else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) {
+		/* kt might contain addresses */
+		struct pfr_kentryworkq	 addrq, addq, changeq, delq, garbageq;
+		struct pfr_kentry	*p, *q, *next;
+		struct pfr_addr		 ad;
+
+		pfr_enqueue_addrs(shadow, &addrq, NULL, 0);
+		pfr_mark_addrs(kt);
+		SLIST_INIT(&addq);
+		SLIST_INIT(&changeq);
+		SLIST_INIT(&delq);
+		SLIST_INIT(&garbageq);
+		pfr_clean_node_mask(shadow, &addrq);
+		for (p = SLIST_FIRST(&addrq); p != NULL; p = next) {
+			next = SLIST_NEXT(p, pfrke_workq);	/* XXX */
+			pfr_copyout_addr(&ad, p);
+			q = pfr_lookup_addr(kt, &ad, 1);
+			if (q != NULL) {
+				if (q->pfrke_not != p->pfrke_not)
+					SLIST_INSERT_HEAD(&changeq, q,
+					    pfrke_workq);
+				q->pfrke_mark = 1;
+				SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq);
+			} else {
+				p->pfrke_tzero = tzero;
+				SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
+			}
+		}
+		pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY);
+		pfr_insert_kentries(kt, &addq, tzero);
+		pfr_remove_kentries(kt, &delq);
+		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
+		pfr_destroy_kentries(&garbageq);
+	} else {
+		/* kt cannot contain addresses */
+		SWAP(struct radix_node_head *, kt->pfrkt_ip4,
+		    shadow->pfrkt_ip4);
+		SWAP(struct radix_node_head *, kt->pfrkt_ip6,
+		    shadow->pfrkt_ip6);
+		SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt);
+		pfr_clstats_ktable(kt, tzero, 1);
+	}
+	nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) |
+	    (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE)
+		& ~PFR_TFLAG_INACTIVE;
+	pfr_destroy_ktable(shadow, 0);
+	kt->pfrkt_shadow = NULL;
+	pfr_setflags_ktable(kt, nflags);
+}
+
+static int
+pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved)
+{
+	int i;
+
+	if (!tbl->pfrt_name[0])
+		return (-1);
+	if (no_reserved && !strcmp(tbl->pfrt_anchor, PF_RESERVED_ANCHOR))
+		 return (-1);
+	if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1])
+		return (-1);
+	for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++)
+		if (tbl->pfrt_name[i])
+			return (-1);
+	if (pfr_fix_anchor(tbl->pfrt_anchor))
+		return (-1);
+	if (tbl->pfrt_flags & ~allowedflags)
+		return (-1);
+	return (0);
+}
+
+/*
+ * Rewrite anchors referenced by tables to remove slashes
+ * and check for validity.
+ */
+static int
+pfr_fix_anchor(char *anchor)
+{
+	size_t siz = MAXPATHLEN;
+	int i;
+
+	if (anchor[0] == '/') {
+		char *path;
+		int off;
+
+		path = anchor;
+		off = 1;
+		while (*++path == '/')
+			off++;
+		bcopy(path, anchor, siz - off);
+		memset(anchor + siz - off, 0, off);
+	}
+	if (anchor[siz - 1])
+		return (-1);
+	for (i = strlen(anchor); i < siz; i++)
+		if (anchor[i])
+			return (-1);
+	return (0);
+}
+
+static int
+pfr_table_count(struct pfr_table *filter, int flags)
+{
+	struct pf_ruleset *rs;
+
+	PF_RULES_ASSERT();
+
+	if (flags & PFR_FLAG_ALLRSETS)
+		return (pfr_ktable_cnt);
+	if (filter->pfrt_anchor[0]) {
+		rs = pf_find_ruleset(filter->pfrt_anchor);
+		return ((rs != NULL) ? rs->tables : -1);
+	}
+	return (pf_main_ruleset.tables);
+}
+
+static int
+pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags)
+{
+	if (flags & PFR_FLAG_ALLRSETS)
+		return (0);
+	if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor))
+		return (1);
+	return (0);
+}
+
+static void
+pfr_insert_ktables(struct pfr_ktableworkq *workq)
+{
+	struct pfr_ktable	*p;
+
+	SLIST_FOREACH(p, workq, pfrkt_workq)
+		pfr_insert_ktable(p);
+}
+
+static void
+pfr_insert_ktable(struct pfr_ktable *kt)
+{
+
+	PF_RULES_WASSERT();
+
+	RB_INSERT(pfr_ktablehead, &pfr_ktables, kt);
+	pfr_ktable_cnt++;
+	if (kt->pfrkt_root != NULL)
+		if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++)
+			pfr_setflags_ktable(kt->pfrkt_root,
+			    kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR);
+}
+
+static void
+pfr_setflags_ktables(struct pfr_ktableworkq *workq)
+{
+	struct pfr_ktable	*p, *q;
+
+	for (p = SLIST_FIRST(workq); p; p = q) {
+		q = SLIST_NEXT(p, pfrkt_workq);
+		pfr_setflags_ktable(p, p->pfrkt_nflags);
+	}
+}
+
+static void
+pfr_setflags_ktable(struct pfr_ktable *kt, int newf)
+{
+	struct pfr_kentryworkq	addrq;
+
+	PF_RULES_WASSERT();
+
+	if (!(newf & PFR_TFLAG_REFERENCED) &&
+	    !(newf & PFR_TFLAG_PERSIST))
+		newf &= ~PFR_TFLAG_ACTIVE;
+	if (!(newf & PFR_TFLAG_ACTIVE))
+		newf &= ~PFR_TFLAG_USRMASK;
+	if (!(newf & PFR_TFLAG_SETMASK)) {
+		RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt);
+		if (kt->pfrkt_root != NULL)
+			if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR])
+				pfr_setflags_ktable(kt->pfrkt_root,
+				    kt->pfrkt_root->pfrkt_flags &
+					~PFR_TFLAG_REFDANCHOR);
+		pfr_destroy_ktable(kt, 1);
+		pfr_ktable_cnt--;
+		return;
+	}
+	if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) {
+		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+		pfr_remove_kentries(kt, &addrq);
+	}
+	if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) {
+		pfr_destroy_ktable(kt->pfrkt_shadow, 1);
+		kt->pfrkt_shadow = NULL;
+	}
+	kt->pfrkt_flags = newf;
+}
+
+static void
+pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse)
+{
+	struct pfr_ktable	*p;
+
+	SLIST_FOREACH(p, workq, pfrkt_workq)
+		pfr_clstats_ktable(p, tzero, recurse);
+}
+
+static void
+pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse)
+{
+	struct pfr_kentryworkq	 addrq;
+
+	if (recurse) {
+		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+		pfr_clstats_kentries(&addrq, tzero, 0);
+	}
+	bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets));
+	bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes));
+	kt->pfrkt_match = kt->pfrkt_nomatch = 0;
+	kt->pfrkt_tzero = tzero;
+}
+
+static struct pfr_ktable *
+pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset)
+{
+	struct pfr_ktable	*kt;
+	struct pf_ruleset	*rs;
+
+	PF_RULES_WASSERT();
+
+	kt = malloc(sizeof(*kt), M_PFTABLE, M_NOWAIT|M_ZERO);
+	if (kt == NULL)
+		return (NULL);
+	kt->pfrkt_t = *tbl;
+
+	if (attachruleset) {
+		rs = pf_find_or_create_ruleset(tbl->pfrt_anchor);
+		if (!rs) {
+			pfr_destroy_ktable(kt, 0);
+			return (NULL);
+		}
+		kt->pfrkt_rs = rs;
+		rs->tables++;
+	}
+
+	if (!rn_inithead((void **)&kt->pfrkt_ip4,
+	    offsetof(struct sockaddr_in, sin_addr) * 8) ||
+	    !rn_inithead((void **)&kt->pfrkt_ip6,
+	    offsetof(struct sockaddr_in6, sin6_addr) * 8)) {
+		pfr_destroy_ktable(kt, 0);
+		return (NULL);
+	}
+	kt->pfrkt_tzero = tzero;
+
+	return (kt);
+}
+
+static void
+pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr)
+{
+	struct pfr_ktable	*p, *q;
+
+	for (p = SLIST_FIRST(workq); p; p = q) {
+		q = SLIST_NEXT(p, pfrkt_workq);
+		pfr_destroy_ktable(p, flushaddr);
+	}
+}
+
+static void
+pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
+{
+	struct pfr_kentryworkq	 addrq;
+
+	if (flushaddr) {
+		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+		pfr_clean_node_mask(kt, &addrq);
+		pfr_destroy_kentries(&addrq);
+	}
+	if (kt->pfrkt_ip4 != NULL) {
+		RADIX_NODE_HEAD_DESTROY(kt->pfrkt_ip4);
+		free((caddr_t)kt->pfrkt_ip4, M_RTABLE);
+	}
+	if (kt->pfrkt_ip6 != NULL) {
+		RADIX_NODE_HEAD_DESTROY(kt->pfrkt_ip6);
+		free((caddr_t)kt->pfrkt_ip6, M_RTABLE);
+	}
+	if (kt->pfrkt_shadow != NULL)
+		pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr);
+	if (kt->pfrkt_rs != NULL) {
+		kt->pfrkt_rs->tables--;
+		pf_remove_if_empty_ruleset(kt->pfrkt_rs);
+	}
+	free(kt, M_PFTABLE);
+}
+
+static int
+pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q)
+{
+	int d;
+
+	if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE)))
+		return (d);
+	return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor));
+}
+
+static struct pfr_ktable *
+pfr_lookup_table(struct pfr_table *tbl)
+{
+	/* struct pfr_ktable start like a struct pfr_table */
+	return (RB_FIND(pfr_ktablehead, &pfr_ktables,
+	    (struct pfr_ktable *)tbl));
+}
+
+int
+pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
+{
+	struct pfr_kentry	*ke = NULL;
+	int			 match;
+
+	PF_RULES_RASSERT();
+
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (0);
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+	    {
+		struct sockaddr_in sin;
+
+		bzero(&sin, sizeof(sin));
+		sin.sin_len = sizeof(sin);
+		sin.sin_family = AF_INET;
+		sin.sin_addr.s_addr = a->addr32[0];
+		ke = (struct pfr_kentry *)rn_match(&sin, kt->pfrkt_ip4);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+	    }
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+	    {
+		struct sockaddr_in6 sin6;
+
+		bzero(&sin6, sizeof(sin6));
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_family = AF_INET6;
+		bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
+		ke = (struct pfr_kentry *)rn_match(&sin6, kt->pfrkt_ip6);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+	    }
+#endif /* INET6 */
+	}
+	match = (ke && !ke->pfrke_not);
+	if (match)
+		kt->pfrkt_match++;
+	else
+		kt->pfrkt_nomatch++;
+	return (match);
+}
+
+void
+pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
+    u_int64_t len, int dir_out, int op_pass, int notrule)
+{
+	struct pfr_kentry	*ke = NULL;
+
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+	    {
+		struct sockaddr_in sin;
+
+		sin.sin_len = sizeof(sin);
+		sin.sin_family = AF_INET;
+		sin.sin_addr.s_addr = a->addr32[0];
+		ke = (struct pfr_kentry *)rn_match(&sin, kt->pfrkt_ip4);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+	    }
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+	    {
+		struct sockaddr_in6 sin6;
+
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_family = AF_INET6;
+		bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
+		ke = (struct pfr_kentry *)rn_match(&sin6, kt->pfrkt_ip6);
+		if (ke && KENTRY_RNF_ROOT(ke))
+			ke = NULL;
+		break;
+	    }
+#endif /* INET6 */
+	default:
+		;
+	}
+	if ((ke == NULL || ke->pfrke_not) != notrule) {
+		if (op_pass != PFR_OP_PASS)
+			printf("pfr_update_stats: assertion failed.\n");
+		op_pass = PFR_OP_XPASS;
+	}
+	kt->pfrkt_packets[dir_out][op_pass]++;
+	kt->pfrkt_bytes[dir_out][op_pass] += len;
+	if (ke != NULL && op_pass != PFR_OP_XPASS &&
+	    (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
+		if (ke->pfrke_counters == NULL)
+			ke->pfrke_counters = uma_zalloc(V_pfr_kcounters_z,
+			    M_NOWAIT | M_ZERO);
+		if (ke->pfrke_counters != NULL) {
+			ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++;
+			ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len;
+		}
+	}
+}
+
+struct pfr_ktable *
+pfr_attach_table(struct pf_ruleset *rs, char *name)
+{
+	struct pfr_ktable	*kt, *rt;
+	struct pfr_table	 tbl;
+	struct pf_anchor	*ac = rs->anchor;
+
+	PF_RULES_WASSERT();
+
+	bzero(&tbl, sizeof(tbl));
+	strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name));
+	if (ac != NULL)
+		strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor));
+	kt = pfr_lookup_table(&tbl);
+	if (kt == NULL) {
+		kt = pfr_create_ktable(&tbl, time_second, 1);
+		if (kt == NULL)
+			return (NULL);
+		if (ac != NULL) {
+			bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor));
+			rt = pfr_lookup_table(&tbl);
+			if (rt == NULL) {
+				rt = pfr_create_ktable(&tbl, 0, 1);
+				if (rt == NULL) {
+					pfr_destroy_ktable(kt, 0);
+					return (NULL);
+				}
+				pfr_insert_ktable(rt);
+			}
+			kt->pfrkt_root = rt;
+		}
+		pfr_insert_ktable(kt);
+	}
+	if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++)
+		pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED);
+	return (kt);
+}
+
+void
+pfr_detach_table(struct pfr_ktable *kt)
+{
+
+	PF_RULES_WASSERT();
+	KASSERT(kt->pfrkt_refcnt[PFR_REFCNT_RULE] > 0, ("%s: refcount %d\n",
+	    __func__, kt->pfrkt_refcnt[PFR_REFCNT_RULE]));
+
+	if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE])
+		pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED);
+}
+
+int
+pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
+    sa_family_t af)
+{
+	struct pf_addr		 *addr, *cur, *mask;
+	union sockaddr_union	 uaddr, umask;
+	struct pfr_kentry	*ke, *ke2 = NULL;
+	int			 idx = -1, use_counter = 0;
+
+	switch (af) {
+	case AF_INET:
+		uaddr.sin.sin_len = sizeof(struct sockaddr_in);
+		uaddr.sin.sin_family = AF_INET;
+		break;
+	case AF_INET6:
+		uaddr.sin6.sin6_len = sizeof(struct sockaddr_in6);
+		uaddr.sin6.sin6_family = AF_INET6;
+		break;
+	}
+	addr = SUNION2PF(&uaddr, af);
+
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (-1);
+
+	if (pidx != NULL)
+		idx = *pidx;
+	if (counter != NULL && idx >= 0)
+		use_counter = 1;
+	if (idx < 0)
+		idx = 0;
+
+_next_block:
+	ke = pfr_kentry_byidx(kt, idx, af);
+	if (ke == NULL) {
+		kt->pfrkt_nomatch++;
+		return (1);
+	}
+	pfr_prepare_network(&umask, af, ke->pfrke_net);
+	cur = SUNION2PF(&ke->pfrke_sa, af);
+	mask = SUNION2PF(&umask, af);
+
+	if (use_counter) {
+		/* is supplied address within block? */
+		if (!PF_MATCHA(0, cur, mask, counter, af)) {
+			/* no, go to next block in table */
+			idx++;
+			use_counter = 0;
+			goto _next_block;
+		}
+		PF_ACPY(addr, counter, af);
+	} else {
+		/* use first address of block */
+		PF_ACPY(addr, cur, af);
+	}
+
+	if (!KENTRY_NETWORK(ke)) {
+		/* this is a single IP address - no possible nested block */
+		PF_ACPY(counter, addr, af);
+		*pidx = idx;
+		kt->pfrkt_match++;
+		return (0);
+	}
+	for (;;) {
+		/* we don't want to use a nested block */
+		switch (af) {
+		case AF_INET:
+			ke2 = (struct pfr_kentry *)rn_match(&uaddr,
+			    kt->pfrkt_ip4);
+			break;
+		case AF_INET6:
+			ke2 = (struct pfr_kentry *)rn_match(&uaddr,
+			    kt->pfrkt_ip6);
+			break;
+		}
+		/* no need to check KENTRY_RNF_ROOT() here */
+		if (ke2 == ke) {
+			/* lookup return the same block - perfect */
+			PF_ACPY(counter, addr, af);
+			*pidx = idx;
+			kt->pfrkt_match++;
+			return (0);
+		}
+
+		/* we need to increase the counter past the nested block */
+		pfr_prepare_network(&umask, AF_INET, ke2->pfrke_net);
+		PF_POOLMASK(addr, addr, SUNION2PF(&umask, af), &pfr_ffaddr, af);
+		PF_AINC(addr, af);
+		if (!PF_MATCHA(0, cur, mask, addr, af)) {
+			/* ok, we reached the end of our main block */
+			/* go to next block in table */
+			idx++;
+			use_counter = 0;
+			goto _next_block;
+		}
+	}
+}
+
+static struct pfr_kentry *
+pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af)
+{
+	struct pfr_walktree	w;
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_POOL_GET;
+	w.pfrw_cnt = idx;
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+		return (w.pfrw_kentry);
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
+		return (w.pfrw_kentry);
+#endif /* INET6 */
+	default:
+		return (NULL);
+	}
+}
+
+void
+pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn)
+{
+	struct pfr_walktree	w;
+
+	bzero(&w, sizeof(w));
+	w.pfrw_op = PFRW_DYNADDR_UPDATE;
+	w.pfrw_dyn = dyn;
+
+	dyn->pfid_acnt4 = 0;
+	dyn->pfid_acnt6 = 0;
+	if (!dyn->pfid_af || dyn->pfid_af == AF_INET)
+		kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
+	if (!dyn->pfid_af || dyn->pfid_af == AF_INET6)
+		kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
+}
author	glebius <glebius@FreeBSD.org>	2012-09-14 11:51:49 +0000
committer	glebius <glebius@FreeBSD.org>	2012-09-14 11:51:49 +0000
commit	0ccf4838d7a8b4da2c3beaac7ea1fd977aa0ed11 (patch)
tree	ec60da6e90cde2e87aa91ac9450c84ce3446233a /sys/netpfil/pf
parent	f99fc207edf21e7c05c1147864077ce3fe1f3e2c (diff)
download	FreeBSD-src-0ccf4838d7a8b4da2c3beaac7ea1fd977aa0ed11.zip FreeBSD-src-0ccf4838d7a8b4da2c3beaac7ea1fd977aa0ed11.tar.gz