From e15f804c7b67f7cac4a68d0f6b6d0418e9f7309a Mon Sep 17 00:00:00 2001 From: bz Date: Tue, 28 Jun 2011 11:57:25 +0000 Subject: Update packet filter (pf) code to OpenBSD 4.5. You need to update userland (world and ports) tools to be in sync with the kernel. Submitted by: mlaier Submitted by: eri --- sys/contrib/altq/altq/altq_red.c | 11 +- sys/contrib/pf/net/if_pflog.c | 47 +- sys/contrib/pf/net/if_pflog.h | 26 +- sys/contrib/pf/net/if_pflow.h | 126 + sys/contrib/pf/net/if_pfsync.c | 4023 +++++++++++++++---------- sys/contrib/pf/net/if_pfsync.h | 493 ++-- sys/contrib/pf/net/pf.c | 5970 +++++++++++++++++++------------------- sys/contrib/pf/net/pf_if.c | 309 +- sys/contrib/pf/net/pf_ioctl.c | 1394 ++++++--- sys/contrib/pf/net/pf_lb.c | 792 +++++ sys/contrib/pf/net/pf_mtag.h | 6 +- sys/contrib/pf/net/pf_norm.c | 411 ++- sys/contrib/pf/net/pf_osfp.c | 140 +- sys/contrib/pf/net/pf_ruleset.c | 94 +- sys/contrib/pf/net/pf_subr.c | 168 -- sys/contrib/pf/net/pf_table.c | 430 ++- sys/contrib/pf/net/pfvar.h | 798 +++-- 17 files changed, 9262 insertions(+), 5976 deletions(-) create mode 100644 sys/contrib/pf/net/if_pflow.h create mode 100644 sys/contrib/pf/net/pf_lb.c delete mode 100644 sys/contrib/pf/net/pf_subr.c (limited to 'sys/contrib') diff --git a/sys/contrib/altq/altq/altq_red.c b/sys/contrib/altq/altq/altq_red.c index cd216bd..b461ce0 100644 --- a/sys/contrib/altq/altq/altq_red.c +++ b/sys/contrib/altq/altq/altq_red.c @@ -514,11 +514,9 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) struct mbuf *m0; struct pf_mtag *at; void *hdr; - int af; at = pf_find_mtag(m); if (at != NULL) { - af = at->af; hdr = at->hdr; #ifdef ALTQ3_COMPAT } else if (pktattr != NULL) { @@ -528,9 +526,6 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) } else return (0); - if (af != AF_INET && af != AF_INET6) - return (0); - /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)hdr >= m0->m_data) && @@ -541,8 +536,8 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) return (0); } - switch (af) { - case AF_INET: + switch (((struct ip *)hdr)->ip_v) { + case IPVERSION: if (flags & REDF_ECN4) { struct ip *ip = hdr; u_int8_t otos; @@ -575,7 +570,7 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) } break; #ifdef INET6 - case AF_INET6: + case (IPV6_VERSION >> 4): if (flags & REDF_ECN6) { struct ip6_hdr *ip6 = hdr; u_int32_t flowlabel; diff --git a/sys/contrib/pf/net/if_pflog.c b/sys/contrib/pf/net/if_pflog.c index d16a09b..0893e8d 100644 --- a/sys/contrib/pf/net/if_pflog.c +++ b/sys/contrib/pf/net/if_pflog.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pflog.c,v 1.22 2006/12/15 09:31:20 otto Exp $ */ +/* $OpenBSD: if_pflog.c,v 1.26 2007/10/18 21:58:18 mpf Exp $ */ /* * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and @@ -99,11 +99,11 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef INET #ifdef __FreeBSD__ +#ifdef INET #include -#endif -#endif +#endif /* INET */ +#endif /* __FreeBSD__ */ #define PFLOGMTU (32768 + MHLEN + MLEN) @@ -115,7 +115,11 @@ __FBSDID("$FreeBSD$"); void pflogattach(int); int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, - struct route *); +#ifdef __FreeBSD__ + struct route *); +#else + struct rtentry *); +#endif int pflogioctl(struct ifnet *, u_long, caddr_t); void pflogstart(struct ifnet *); #ifdef __FreeBSD__ @@ -128,7 +132,7 @@ int pflog_clone_destroy(struct ifnet *); LIST_HEAD(, pflog_softc) pflogif_list; #ifdef __FreeBSD__ -IFC_SIMPLE_DECLARE(pflog, 1); +IFC_SIMPLE_DECLARE(pflog, 1); #else struct if_clone pflog_cloner = IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy); @@ -136,10 +140,6 @@ struct if_clone pflog_cloner = struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */ -#ifndef __FreeBSD__ -extern int ifqmaxlen; -#endif - void pflogattach(int npflog) { @@ -147,9 +147,6 @@ pflogattach(int npflog) LIST_INIT(&pflogif_list); for (i = 0; i < PFLOGIFS_MAX; i++) pflogifs[i] = NULL; -#ifndef __FreeBSD__ - (void) pflog_clone_create(&pflog_cloner, 0); -#endif if_clone_attach(&pflog_cloner); } @@ -168,9 +165,9 @@ pflog_clone_create(struct if_clone *ifc, int unit) if (unit >= PFLOGIFS_MAX) return (EINVAL); - if ((pflogif = malloc(sizeof(*pflogif), M_DEVBUF, M_NOWAIT)) == NULL) + if ((pflogif = malloc(sizeof(*pflogif), + M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) return (ENOMEM); - bzero(pflogif, sizeof(*pflogif)); pflogif->sc_unit = unit; #ifdef __FreeBSD__ @@ -209,6 +206,7 @@ pflog_clone_create(struct if_clone *ifc, int unit) s = splnet(); #ifdef __FreeBSD__ + /* XXX: Why pf(4) lock?! Better add a pflog lock?! */ PF_LOCK(); #endif LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list); @@ -289,7 +287,11 @@ pflogstart(struct ifnet *ifp) int pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, - struct route *ro) +#ifdef __FreeBSD__ + struct route *rt) +#else + struct rtentry *rt) +#endif { m_freem(m); return (0); @@ -300,9 +302,6 @@ int pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { switch (cmd) { - case SIOCSIFADDR: - case SIOCAIFADDR: - case SIOCSIFDSTADDR: case SIOCSIFFLAGS: #ifdef __FreeBSD__ if (ifp->if_flags & IFF_UP) @@ -317,7 +316,7 @@ pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) #endif break; default: - return (EINVAL); + return (ENOTTY); } return (0); @@ -333,7 +332,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, struct pfloghdr hdr; if (kif == NULL || m == NULL || rm == NULL || pd == NULL) - return (-1); + return ( 1); if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf) return (0); @@ -347,7 +346,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, if (am == NULL) { hdr.rulenr = htonl(rm->nr); - hdr.subrulenr = -1; + hdr.subrulenr = 1; } else { hdr.rulenr = htonl(am->nr); hdr.subrulenr = htonl(rm->nr); @@ -357,11 +356,11 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, } if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done) #ifdef __FreeBSD__ - /* + /* * XXX: This should not happen as we force an early lookup * via debug.pfugidhack */ - ; /* empty */ + ; /* empty */ #else pd->lookup.done = pf_socket_lookup(dir, pd); #endif diff --git a/sys/contrib/pf/net/if_pflog.h b/sys/contrib/pf/net/if_pflog.h index a3c74d1..5f48f6c 100644 --- a/sys/contrib/pf/net/if_pflog.h +++ b/sys/contrib/pf/net/if_pflog.h @@ -1,5 +1,4 @@ -/* $FreeBSD$ */ -/* $OpenBSD: if_pflog.h,v 1.14 2006/10/25 11:27:01 henning Exp $ */ +/* $OpenBSD: if_pflog.h,v 1.13 2006/10/23 12:46:09 henning Exp $ */ /* * Copyright 2001 Niels Provos * All rights reserved. @@ -26,11 +25,10 @@ */ #ifndef _NET_IF_PFLOG_H_ -#define _NET_IF_PFLOG_H_ +#define _NET_IF_PFLOG_H_ #define PFLOGIFS_MAX 16 -#ifdef _KERNEL struct pflog_softc { #ifdef __FreeBSD__ struct ifnet *sc_ifp; /* the interface pointer */ @@ -40,9 +38,8 @@ struct pflog_softc { int sc_unit; LIST_ENTRY(pflog_softc) sc_list; }; -#endif /* _KERNEL */ -#define PFLOG_RULESET_NAME_SIZE 16 +#define PFLOG_RULESET_NAME_SIZE 16 struct pfloghdr { u_int8_t length; @@ -61,9 +58,9 @@ struct pfloghdr { u_int8_t pad[3]; }; -#define PFLOG_HDRLEN sizeof(struct pfloghdr) +#define PFLOG_HDRLEN sizeof(struct pfloghdr) /* minus pad, also used as a signature */ -#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad) +#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad) /* XXX remove later when old format logs are no longer needed */ struct old_pfloghdr { @@ -74,23 +71,24 @@ struct old_pfloghdr { u_short action; u_short dir; }; -#define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr) +#define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr) #ifdef _KERNEL - #ifdef __FreeBSD__ struct pf_rule; struct pf_ruleset; struct pfi_kif; struct pf_pdesc; +#if 0 typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, struct pf_pdesc *); extern pflog_packet_t *pflog_packet_ptr; -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) do { \ - if (pflog_packet_ptr != NULL) \ - pflog_packet_ptr(i,a,b,c,d,e,f,g,h); \ +#endif +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) do { \ + if (pflog_packet_ptr != NULL) \ + pflog_packet_ptr(i,a,b,c,d,e,f,g,h); \ } while (0) #else /* ! __FreeBSD__ */ #if NPFLOG > 0 @@ -98,6 +96,6 @@ extern pflog_packet_t *pflog_packet_ptr; #else #define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0) #endif /* NPFLOG > 0 */ -#endif /* __FreeBSD__ */ +#endif #endif /* _KERNEL */ #endif /* _NET_IF_PFLOG_H_ */ diff --git a/sys/contrib/pf/net/if_pflow.h b/sys/contrib/pf/net/if_pflow.h new file mode 100644 index 0000000..35ccbeb --- /dev/null +++ b/sys/contrib/pf/net/if_pflow.h @@ -0,0 +1,126 @@ +/* $OpenBSD: if_pflow.h,v 1.5 2009/02/27 11:09:36 gollo Exp $ */ + +/* + * Copyright (c) 2008 Henning Brauer + * Copyright (c) 2008 Joerg Goltermann + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef _NET_IF_PFLOW_H_ +#define _NET_IF_PFLOW_H_ + +#define PFLOW_ID_LEN sizeof(u_int64_t) + +#define PFLOW_MAXFLOWS 30 +#define PFLOW_VERSION 5 +#define PFLOW_ENGINE_TYPE 42 +#define PFLOW_ENGINE_ID 42 +#define PFLOW_MAXBYTES 0xffffffff +#define PFLOW_TIMEOUT 30 + +struct pflow_flow { + u_int32_t src_ip; + u_int32_t dest_ip; + u_int32_t nexthop_ip; + u_int16_t if_index_in; + u_int16_t if_index_out; + u_int32_t flow_packets; + u_int32_t flow_octets; + u_int32_t flow_start; + u_int32_t flow_finish; + u_int16_t src_port; + u_int16_t dest_port; + u_int8_t pad1; + u_int8_t tcp_flags; + u_int8_t protocol; + u_int8_t tos; + u_int16_t src_as; + u_int16_t dest_as; + u_int8_t src_mask; + u_int8_t dest_mask; + u_int16_t pad2; +} __packed; + +#ifdef _KERNEL + +extern int pflow_ok; + +struct pflow_softc { + struct ifnet sc_if; + struct ifnet *sc_pflow_ifp; + + unsigned int sc_count; + unsigned int sc_maxcount; + u_int64_t sc_gcounter; + struct ip_moptions sc_imo; +#ifdef __FreeBSD__ + struct callout sc_tmo; +#else + struct timeout sc_tmo; +#endif + struct in_addr sc_sender_ip; + u_int16_t sc_sender_port; + struct in_addr sc_receiver_ip; + u_int16_t sc_receiver_port; + struct mbuf *sc_mbuf; /* current cumulative mbuf */ + SLIST_ENTRY(pflow_softc) sc_next; +}; + +extern struct pflow_softc *pflowif; + +#endif /* _KERNEL */ + +struct pflow_header { + u_int16_t version; + u_int16_t count; + u_int32_t uptime_ms; + u_int32_t time_sec; + u_int32_t time_nanosec; + u_int32_t flow_sequence; + u_int8_t engine_type; + u_int8_t engine_id; + u_int8_t reserved1; + u_int8_t reserved2; +} __packed; + +#define PFLOW_HDRLEN sizeof(struct pflow_header) + +struct pflowstats { + u_int64_t pflow_flows; + u_int64_t pflow_packets; + u_int64_t pflow_onomem; + u_int64_t pflow_oerrors; +}; + +/* + * Configuration structure for SIOCSETPFLOW SIOCGETPFLOW + */ +struct pflowreq { + struct in_addr sender_ip; + struct in_addr receiver_ip; + u_int16_t receiver_port; + u_int16_t addrmask; +#define PFLOW_MASK_SRCIP 0x01 +#define PFLOW_MASK_DSTIP 0x02 +#define PFLOW_MASK_DSTPRT 0x04 +}; + +#ifdef _KERNEL +int export_pflow(struct pf_state *); +int pflow_sysctl(int *, u_int, void *, size_t *, void *, size_t); +#endif /* _KERNEL */ + +#endif /* _NET_IF_PFLOW_H_ */ diff --git a/sys/contrib/pf/net/if_pfsync.c b/sys/contrib/pf/net/if_pfsync.c index 9466118..ba8a348 100644 --- a/sys/contrib/pf/net/if_pfsync.c +++ b/sys/contrib/pf/net/if_pfsync.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff @@ -26,10 +26,25 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2009 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + #ifdef __FreeBSD__ #include "opt_inet.h" #include "opt_inet6.h" -#include "opt_carp.h" #include "opt_bpf.h" #include "opt_pf.h" @@ -56,7 +71,10 @@ __FBSDID("$FreeBSD$"); #endif /* __FreeBSD__ */ #include +#include #ifdef __FreeBSD__ +#include +#include #include #endif #include @@ -72,12 +90,14 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #else #include #include #endif -#include +#include +#ifndef __FreeBSD__ +#include +#endif #include #ifdef __FreeBSD__ @@ -86,6 +106,11 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#ifdef __FreeBSD__ +#include +#endif + #include #include #include @@ -117,23 +142,191 @@ __FBSDID("$FreeBSD$"); #include "pfsync.h" #endif -#define PFSYNC_MINMTU \ - (sizeof(struct pfsync_header) + sizeof(struct pf_state)) +#define PFSYNC_MINPKT ( \ + sizeof(struct ip) + \ + sizeof(struct pfsync_header) + \ + sizeof(struct pfsync_subheader) + \ + sizeof(struct pfsync_eof)) + +struct pfsync_pkt { + struct ip *ip; + struct in_addr src; + u_int8_t flags; +}; + +int pfsync_input_hmac(struct mbuf *, int); + +int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, + struct pfsync_state_peer *); + +int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); +int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); + +int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); + +int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { + pfsync_in_clr, /* PFSYNC_ACT_CLR */ + pfsync_in_ins, /* PFSYNC_ACT_INS */ + pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ + pfsync_in_upd, /* PFSYNC_ACT_UPD */ + pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ + pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ + pfsync_in_del, /* PFSYNC_ACT_DEL */ + pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ + pfsync_in_error, /* PFSYNC_ACT_INS_F */ + pfsync_in_error, /* PFSYNC_ACT_DEL_F */ + pfsync_in_bus, /* PFSYNC_ACT_BUS */ + pfsync_in_tdb, /* PFSYNC_ACT_TDB */ + pfsync_in_eof /* PFSYNC_ACT_EOF */ +}; + +struct pfsync_q { + int (*write)(struct pf_state *, struct mbuf *, int); + size_t len; + u_int8_t action; +}; + +/* we have one of these for every PFSYNC_S_ */ +int pfsync_out_state(struct pf_state *, struct mbuf *, int); +int pfsync_out_iack(struct pf_state *, struct mbuf *, int); +int pfsync_out_upd_c(struct pf_state *, struct mbuf *, int); +int pfsync_out_del(struct pf_state *, struct mbuf *, int); + +struct pfsync_q pfsync_qs[] = { + { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, + { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, + { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, + { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, + { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } +}; + +void pfsync_q_ins(struct pf_state *, int); +void pfsync_q_del(struct pf_state *); + +struct pfsync_upd_req_item { + TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; + struct pfsync_upd_req ur_msg; +}; +TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); + +struct pfsync_deferral { + TAILQ_ENTRY(pfsync_deferral) pd_entry; + struct pf_state *pd_st; + struct mbuf *pd_m; +#ifdef __FreeBSD__ + struct callout pd_tmo; +#else + struct timeout pd_tmo; +#endif +}; +TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); + +#define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ + sizeof(struct pfsync_deferral)) + +#ifdef notyet +int pfsync_out_tdb(struct tdb *, struct mbuf *, int); +#endif + +struct pfsync_softc { +#ifdef __FreeBSD__ + struct ifnet *sc_ifp; +#else + struct ifnet sc_if; +#endif + struct ifnet *sc_sync_if; + +#ifdef __FreeBSD__ + uma_zone_t sc_pool; +#else + struct pool sc_pool; +#endif + + struct ip_moptions sc_imo; + + struct in_addr sc_sync_peer; + u_int8_t sc_maxupdates; +#ifdef __FreeBSD__ + int pfsync_sync_ok; +#endif + + struct ip sc_template; -#ifdef PFSYNCDEBUG -#define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0) -int pfsyncdebug; + struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; + size_t sc_len; + + struct pfsync_upd_reqs sc_upd_req_list; + + struct pfsync_deferrals sc_deferrals; + u_int sc_deferred; + + void *sc_plus; + size_t sc_pluslen; + + u_int32_t sc_ureq_sent; + int sc_bulk_tries; +#ifdef __FreeBSD__ + struct callout sc_bulkfail_tmo; +#else + struct timeout sc_bulkfail_tmo; +#endif + + u_int32_t sc_ureq_received; + struct pf_state *sc_bulk_next; + struct pf_state *sc_bulk_last; +#ifdef __FreeBSD__ + struct callout sc_bulk_tmo; +#else + struct timeout sc_bulk_tmo; +#endif + + TAILQ_HEAD(, tdb) sc_tdb_q; + +#ifdef __FreeBSD__ + struct callout sc_tmo; #else -#define DPRINTF(x) + struct timeout sc_tmo; +#endif +#ifdef __FreeBSD__ + eventhandler_tag sc_detachtag; #endif +}; + +#ifdef __FreeBSD__ +static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; +#define V_pfsyncif VNET(pfsyncif) + +static VNET_DEFINE(struct pfsyncstats, pfsyncstats); +#define V_pfsyncstats VNET(pfsyncstats) + +SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); +SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW, + &VNET_NAME(pfsyncstats), pfsyncstats, + "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); +#else struct pfsync_softc *pfsyncif = NULL; struct pfsyncstats pfsyncstats; +#define V_pfsyncstats pfsyncstats +#endif + #ifdef __FreeBSD__ -SYSCTL_DECL(_net_inet_pfsync); -SYSCTL_STRUCT(_net_inet_pfsync, 0, stats, CTLFLAG_RW, - &pfsyncstats, pfsyncstats, - "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); +static void pfsyncintr(void *); +struct pfsync_swi { + void * pfsync_swi_cookie; +}; +static struct pfsync_swi pfsync_swi; +#define schednetisr(p) swi_sched(pfsync_swi.pfsync_swi_cookie, 0) +#define NETISR_PFSYNC #endif void pfsyncattach(int); @@ -144,45 +337,53 @@ void pfsync_clone_destroy(struct ifnet *); int pfsync_clone_create(struct if_clone *, int); int pfsync_clone_destroy(struct ifnet *); #endif -void pfsync_setmtu(struct pfsync_softc *, int); int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, struct pf_state_peer *); -int pfsync_insert_net_state(struct pfsync_state *, u_int8_t); -#ifdef PFSYNC_TDB void pfsync_update_net_tdb(struct pfsync_tdb *); -#endif int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, +#ifdef __FreeBSD__ struct route *); +#else + struct rtentry *); +#endif int pfsyncioctl(struct ifnet *, u_long, caddr_t); void pfsyncstart(struct ifnet *); -struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); -int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); -int pfsync_sendout(struct pfsync_softc *); -#ifdef PFSYNC_TDB +struct mbuf *pfsync_if_dequeue(struct ifnet *); +struct mbuf *pfsync_get_mbuf(struct pfsync_softc *); + +void pfsync_deferred(struct pf_state *, int); +void pfsync_undefer(struct pfsync_deferral *, int); +void pfsync_defer_tmo(void *); + +void pfsync_request_update(u_int32_t, u_int64_t); +void pfsync_update_state_req(struct pf_state *); + +void pfsync_drop(struct pfsync_softc *); +void pfsync_sendout(void); +void pfsync_send_plus(void *, size_t); int pfsync_tdb_sendout(struct pfsync_softc *); -#endif int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); void pfsync_timeout(void *); -#ifdef PFSYNC_TDB void pfsync_tdb_timeout(void *); -#endif void pfsync_send_bus(struct pfsync_softc *, u_int8_t); + +void pfsync_bulk_start(void); +void pfsync_bulk_status(u_int8_t); void pfsync_bulk_update(void *); -void pfsync_bulkfail(void *); +void pfsync_bulk_fail(void *); #ifdef __FreeBSD__ void pfsync_ifdetach(void *, struct ifnet *); -void pfsync_senddef(void *, int); /* XXX: ugly */ #define betoh64 (unsigned long long)be64toh #define timeout_del callout_stop #endif -int pfsync_sync_ok; +#define PFSYNC_MAX_BULKTRIES 12 #ifndef __FreeBSD__ -extern int ifqmaxlen; +int pfsync_sync_ok; #endif #ifdef __FreeBSD__ @@ -197,7 +398,6 @@ pfsyncattach(int npfsync) { if_clone_attach(&pfsync_cloner); } - int #ifdef __FreeBSD__ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) @@ -205,95 +405,103 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) pfsync_clone_create(struct if_clone *ifc, int unit) #endif { + struct pfsync_softc *sc; struct ifnet *ifp; + int q; if (unit != 0) return (EINVAL); +#ifndef __FreeBSD__ pfsync_sync_ok = 1; - if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT)) == NULL) +#endif + + sc = malloc(sizeof(struct pfsync_softc), M_DEVBUF, M_NOWAIT | M_ZERO); + if (sc == NULL) return (ENOMEM); - bzero(pfsyncif, sizeof(*pfsyncif)); + + for (q = 0; q < PFSYNC_S_COUNT; q++) + TAILQ_INIT(&sc->sc_qs[q]); + #ifdef __FreeBSD__ - if ((pfsyncif->sc_imo.imo_membership = (struct in_multi **)malloc( - (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_DEVBUF, - M_NOWAIT)) == NULL) { - free(pfsyncif, M_DEVBUF); - return (ENOSPC); + sc->pfsync_sync_ok = 1; + sc->sc_pool = uma_zcreate("pfsync", PFSYNC_PLSIZE, + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + if (sc->sc_pool == NULL) { + free(sc, M_DEVBUF); + return (ENOMEM); } - pfsyncif->sc_imo.imo_mfilters = NULL; - pfsyncif->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; - pfsyncif->sc_imo.imo_multicast_vif = -1; +#else + pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL); +#endif + TAILQ_INIT(&sc->sc_upd_req_list); + TAILQ_INIT(&sc->sc_deferrals); + sc->sc_deferred = 0; + + TAILQ_INIT(&sc->sc_tdb_q); + + sc->sc_len = PFSYNC_MINPKT; + sc->sc_maxupdates = 128; + +#ifdef __FreeBSD__ + sc->sc_imo.imo_membership = (struct in_multi **)malloc( + (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_DEVBUF, + M_NOWAIT | M_ZERO); + sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; + sc->sc_imo.imo_multicast_vif = -1; +#else + sc->sc_imo.imo_membership = (struct in_multi **)malloc( + (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, + M_WAITOK | M_ZERO); + sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; +#endif - ifp = pfsyncif->sc_ifp = if_alloc(IFT_PFSYNC); +#ifdef __FreeBSD__ + ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); if (ifp == NULL) { - free(pfsyncif->sc_imo.imo_membership, M_DEVBUF); - free(pfsyncif, M_DEVBUF); + free(sc->sc_imo.imo_membership, M_DEVBUF); + uma_zdestroy(sc->sc_pool); + free(sc, M_DEVBUF); return (ENOSPC); } if_initname(ifp, ifc->ifc_name, unit); - pfsyncif->sc_detachtag = EVENTHANDLER_REGISTER(ifnet_departure_event, + sc->sc_detachtag = EVENTHANDLER_REGISTER(ifnet_departure_event, +#ifdef __FreeBSD__ + pfsync_ifdetach, V_pfsyncif, EVENTHANDLER_PRI_ANY); +#else pfsync_ifdetach, pfsyncif, EVENTHANDLER_PRI_ANY); - if (pfsyncif->sc_detachtag == NULL) { +#endif + if (sc->sc_detachtag == NULL) { if_free(ifp); - free(pfsyncif->sc_imo.imo_membership, M_DEVBUF); - free(pfsyncif, M_DEVBUF); + free(sc->sc_imo.imo_membership, M_DEVBUF); + uma_zdestroy(sc->sc_pool); + free(sc, M_DEVBUF); return (ENOSPC); } - - pfsyncif->sc_ifq.ifq_maxlen = ifqmaxlen; - mtx_init(&pfsyncif->sc_ifq.ifq_mtx, ifp->if_xname, - "pfsync send queue", MTX_DEF); - TASK_INIT(&pfsyncif->sc_send_task, 0, pfsync_senddef, pfsyncif); -#endif - pfsyncif->sc_mbuf = NULL; - pfsyncif->sc_mbuf_net = NULL; -#ifdef PFSYNC_TDB - pfsyncif->sc_mbuf_tdb = NULL; -#endif - pfsyncif->sc_statep.s = NULL; - pfsyncif->sc_statep_net.s = NULL; -#ifdef PFSYNC_TDB - pfsyncif->sc_statep_tdb.t = NULL; -#endif - pfsyncif->sc_maxupdates = 128; -#ifdef __FreeBSD__ - pfsyncif->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); - pfsyncif->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); #else - pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; - pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; -#endif - pfsyncif->sc_ureq_received = 0; - pfsyncif->sc_ureq_sent = 0; - pfsyncif->sc_bulk_send_next = NULL; - pfsyncif->sc_bulk_terminator = NULL; -#ifndef __FreeBSD__ - ifp = &pfsyncif->sc_if; + ifp = &sc->sc_if; snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); #endif - ifp->if_softc = pfsyncif; + ifp->if_softc = sc; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_start = pfsyncstart; ifp->if_type = IFT_PFSYNC; ifp->if_snd.ifq_maxlen = ifqmaxlen; - ifp->if_hdrlen = PFSYNC_HDRLEN; - pfsync_setmtu(pfsyncif, ETHERMTU); + ifp->if_hdrlen = sizeof(struct pfsync_header); + ifp->if_mtu = 1500; /* XXX */ #ifdef __FreeBSD__ - callout_init(&pfsyncif->sc_tmo, CALLOUT_MPSAFE); -#ifdef PFSYNC_TDB - callout_init(&pfsyncif->sc_tdb_tmo, CALLOUT_MPSAFE); -#endif - callout_init(&pfsyncif->sc_bulk_tmo, CALLOUT_MPSAFE); - callout_init(&pfsyncif->sc_bulkfail_tmo, CALLOUT_MPSAFE); + callout_init(&sc->sc_tmo, CALLOUT_MPSAFE); + callout_init(&sc->sc_bulk_tmo, CALLOUT_MPSAFE); + callout_init(&sc->sc_bulkfail_tmo, CALLOUT_MPSAFE); #else - timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif); - timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif); - timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif); - timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif); + ifp->if_hardmtu = MCLBYTES; /* XXX */ + timeout_set(&sc->sc_tmo, pfsync_timeout, sc); + timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); + timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); #endif + if_attach(ifp); #ifndef __FreeBSD__ if_alloc_sadl(ifp); @@ -307,10 +515,16 @@ pfsync_clone_create(struct if_clone *ifc, int unit) #ifdef __FreeBSD__ bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); #else - bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); + bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); #endif #endif +#ifdef __FreeBSD__ + V_pfsyncif = sc; +#else + pfsyncif = sc; +#endif + return (0); } @@ -321,60 +535,92 @@ int #endif pfsync_clone_destroy(struct ifnet *ifp) { + struct pfsync_softc *sc = ifp->if_softc; + +#ifdef __FreeBSD__ + EVENTHANDLER_DEREGISTER(ifnet_departure_event, sc->sc_detachtag); +#endif + timeout_del(&sc->sc_bulk_tmo); + timeout_del(&sc->sc_tmo); +#if NCARP > 0 +#ifdef notyet #ifdef __FreeBSD__ - EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfsyncif->sc_detachtag); - callout_stop(&pfsyncif->sc_tmo); -#ifdef PFSYNC_TDB - callout_stop(&pfsyncif->sc_tdb_tmo); + if (!sc->pfsync_sync_ok) +#else + if (!pfsync_sync_ok) +#endif + carp_group_demote_adj(&sc->sc_if, -1); #endif - callout_stop(&pfsyncif->sc_bulk_tmo); - callout_stop(&pfsyncif->sc_bulkfail_tmo); - /* XXX: more? */ #endif - #if NBPFILTER > 0 bpfdetach(ifp); #endif if_detach(ifp); + + pfsync_drop(sc); + + while (sc->sc_deferred > 0) + pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); + +#ifdef __FreeBSD__ + UMA_DESTROY(sc->sc_pool); +#else + pool_destroy(&sc->sc_pool); +#endif #ifdef __FreeBSD__ if_free(ifp); - free(pfsyncif->sc_imo.imo_membership, M_DEVBUF); + free(sc->sc_imo.imo_membership, M_DEVBUF); +#else + free(sc->sc_imo.imo_membership, M_IPMOPTS); #endif - free(pfsyncif, M_DEVBUF); + free(sc, M_DEVBUF); + +#ifdef __FreeBSD__ + V_pfsyncif = NULL; +#else pfsyncif = NULL; +#endif + #ifndef __FreeBSD__ return (0); #endif } -/* - * Start output on the pfsync interface. - */ -void -pfsyncstart(struct ifnet *ifp) +struct mbuf * +pfsync_if_dequeue(struct ifnet *ifp) { struct mbuf *m; #ifndef __FreeBSD__ int s; #endif - for (;;) { #ifdef __FreeBSD__ - IF_LOCK(&ifp->if_snd); - _IF_DROP(&ifp->if_snd); - _IF_DEQUEUE(&ifp->if_snd, m); - IF_UNLOCK(&ifp->if_snd); + IF_LOCK(&ifp->if_snd); + _IF_DROP(&ifp->if_snd); + _IF_DEQUEUE(&ifp->if_snd, m); + IF_UNLOCK(&ifp->if_snd); #else - s = splnet(); - IF_DROP(&ifp->if_snd); - IF_DEQUEUE(&ifp->if_snd, m); - splx(s); + s = splnet(); + IF_DEQUEUE(&ifp->if_snd, m); + splx(s); #endif - if (m == NULL) - return; - else - m_freem(m); + return (m); +} + +/* + * Start output on the pfsync interface. + */ +void +pfsyncstart(struct ifnet *ifp) +{ + struct mbuf *m; + + while ((m = pfsync_if_dequeue(ifp)) != NULL) { +#ifndef __FreeBSD__ + IF_DROP(&ifp->if_snd); +#endif + m_freem(m); } } @@ -383,85 +629,196 @@ pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { - d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + d->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); +#else + d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); +#endif if (d->scrub == NULL) return (ENOMEM); - bzero(d->scrub, sizeof(*d->scrub)); } return (0); } +#ifndef __FreeBSD__ +void +pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) +{ + bzero(sp, sizeof(struct pfsync_state)); + + /* copy from state key */ + sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; + sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; + sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; + sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; + sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; + sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; + sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; + sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; + sp->proto = st->key[PF_SK_WIRE]->proto; + sp->af = st->key[PF_SK_WIRE]->af; + + /* copy from state */ + strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); + bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); + sp->creation = htonl(time_second - st->creation); + sp->expire = pf_state_expires(st); + if (sp->expire <= time_second) + sp->expire = htonl(0); + else + sp->expire = htonl(sp->expire - time_second); + + sp->direction = st->direction; + sp->log = st->log; + sp->timeout = st->timeout; + sp->state_flags = st->state_flags; + if (st->src_node) + sp->sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->nat_src_node) + sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; + + bcopy(&st->id, &sp->id, sizeof(sp->id)); + sp->creatorid = st->creatorid; + pf_state_peer_hton(&st->src, &sp->src); + pf_state_peer_hton(&st->dst, &sp->dst); + + if (st->rule.ptr == NULL) + sp->rule = htonl(-1); + else + sp->rule = htonl(st->rule.ptr->nr); + if (st->anchor.ptr == NULL) + sp->anchor = htonl(-1); + else + sp->anchor = htonl(st->anchor.ptr->nr); + if (st->nat_rule.ptr == NULL) + sp->nat_rule = htonl(-1); + else + sp->nat_rule = htonl(st->nat_rule.ptr->nr); + + pf_state_counter_hton(st->packets[0], sp->packets[0]); + pf_state_counter_hton(st->packets[1], sp->packets[1]); + pf_state_counter_hton(st->bytes[0], sp->bytes[0]); + pf_state_counter_hton(st->bytes[1], sp->bytes[1]); + +} +#endif + int -pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) +pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) { struct pf_state *st = NULL; + struct pf_state_key *skw = NULL, *sks = NULL; struct pf_rule *r = NULL; struct pfi_kif *kif; + int pool_flags; + int error; +#ifdef __FreeBSD__ + if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { - printf("pfsync_insert_net_state: invalid creator id:" +#endif + printf("pfsync_state_import: invalid creator id:" " %08x\n", ntohl(sp->creatorid)); return (EINVAL); } - kif = pfi_kif_get(sp->ifname); - if (kif == NULL) { + if ((kif = pfi_kif_get(sp->ifname)) == NULL) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert_net_state: " +#endif + printf("pfsync_state_import: " "unknown interface: %s\n", sp->ifname); - /* skip this state */ - return (0); + if (flags & PFSYNC_SI_IOCTL) + return (EINVAL); + return (0); /* skip this state */ } /* - * If the ruleset checksums match, it's safe to associate the state - * with the rule of that number. + * If the ruleset checksums match or the state is coming from the ioctl, + * it's safe to associate the state with the rule of that number. */ - if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag) + if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && + (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < + pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) r = pf_main_ruleset.rules[ PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; else +#ifdef __FreeBSD__ + r = &V_pf_default_rule; +#else r = &pf_default_rule; +#endif - if (!r->max_states || r->states < r->max_states) - st = pool_get(&pf_state_pl, PR_NOWAIT); - if (st == NULL) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - return (ENOMEM); - } - bzero(st, sizeof(*st)); + if ((r->max_states && r->states_cur >= r->max_states)) + goto cleanup; - /* allocate memory for scrub info */ - if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || - pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - if (st->src.scrub) - pool_put(&pf_state_scrub_pl, st->src.scrub); - pool_put(&pf_state_pl, st); - return (ENOMEM); - } +#ifdef __FreeBSD__ + if (flags & PFSYNC_SI_IOCTL) + pool_flags = PR_WAITOK | PR_ZERO; + else + pool_flags = PR_ZERO; - st->rule.ptr = r; - /* XXX get pointers to nat_rule and anchor */ + if ((st = pool_get(&V_pf_state_pl, pool_flags)) == NULL) + goto cleanup; +#else + if (flags & PFSYNC_SI_IOCTL) + pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; + else + pool_flags = PR_LIMITFAIL | PR_ZERO; - /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ - r->states++; + if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) + goto cleanup; +#endif - /* fill in the rest of the state entry */ - pf_state_host_ntoh(&sp->lan, &st->lan); - pf_state_host_ntoh(&sp->gwy, &st->gwy); - pf_state_host_ntoh(&sp->ext, &st->ext); + if ((skw = pf_alloc_state_key(pool_flags)) == NULL) + goto cleanup; - pf_state_peer_ntoh(&sp->src, &st->src); - pf_state_peer_ntoh(&sp->dst, &st->dst); + if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], + &sp->key[PF_SK_STACK].addr[0], sp->af) || + PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], + &sp->key[PF_SK_STACK].addr[1], sp->af) || + sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || + sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { + if ((sks = pf_alloc_state_key(pool_flags)) == NULL) + goto cleanup; + } else + sks = skw; + + /* allocate memory for scrub info */ + if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || + pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) + goto cleanup; + + /* copy to state key(s) */ + skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; + skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; + skw->port[0] = sp->key[PF_SK_WIRE].port[0]; + skw->port[1] = sp->key[PF_SK_WIRE].port[1]; + skw->proto = sp->proto; + skw->af = sp->af; + if (sks != skw) { + sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; + sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; + sks->port[0] = sp->key[PF_SK_STACK].port[0]; + sks->port[1] = sp->key[PF_SK_STACK].port[1]; + sks->proto = sp->proto; + sks->af = sp->af; + } + /* copy to state */ bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); st->creation = time_second - ntohl(sp->creation); - st->expire = ntohl(sp->expire) + time_second; + st->expire = time_second; + if (sp->expire) { + /* XXX No adaptive scaling. */ + st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); + } - st->af = sp->af; - st->proto = sp->proto; + st->expire = ntohl(sp->expire) + time_second; st->direction = sp->direction; st->log = sp->log; st->timeout = sp->timeout; @@ -469,621 +826,942 @@ pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) bcopy(sp->id, &st->id, sizeof(st->id)); st->creatorid = sp->creatorid; - st->sync_flags = PFSTATE_FROMSYNC; + pf_state_peer_ntoh(&sp->src, &st->src); + pf_state_peer_ntoh(&sp->dst, &st->dst); - if (pf_insert_state(kif, st)) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); + st->rule.ptr = r; + st->nat_rule.ptr = NULL; + st->anchor.ptr = NULL; + st->rt_kif = NULL; + + st->pfsync_time = time_second; + st->sync_state = PFSYNC_S_NONE; + + /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ + r->states_cur++; + r->states_tot++; + + if (!ISSET(flags, PFSYNC_SI_IOCTL)) + SET(st->state_flags, PFSTATE_NOSYNC); + + if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ - r->states--; - if (st->dst.scrub) - pool_put(&pf_state_scrub_pl, st->dst.scrub); - if (st->src.scrub) - pool_put(&pf_state_scrub_pl, st->src.scrub); - pool_put(&pf_state_pl, st); - return (EINVAL); + r->states_cur--; + goto cleanup_state; } + if (!ISSET(flags, PFSYNC_SI_IOCTL)) { + CLR(st->state_flags, PFSTATE_NOSYNC); + if (ISSET(st->state_flags, PFSTATE_ACK)) { + pfsync_q_ins(st, PFSYNC_S_IACK); + schednetisr(NETISR_PFSYNC); + } + } + CLR(st->state_flags, PFSTATE_ACK); + return (0); -} -void +cleanup: + error = ENOMEM; + if (skw == sks) + sks = NULL; #ifdef __FreeBSD__ -pfsync_input(struct mbuf *m, __unused int off) + if (skw != NULL) + pool_put(&V_pf_state_key_pl, skw); + if (sks != NULL) + pool_put(&V_pf_state_key_pl, sks); #else -pfsync_input(struct mbuf *m, ...) + if (skw != NULL) + pool_put(&pf_state_key_pl, skw); + if (sks != NULL) + pool_put(&pf_state_key_pl, sks); #endif -{ - struct ip *ip = mtod(m, struct ip *); - struct pfsync_header *ph; - struct pfsync_softc *sc = pfsyncif; - struct pf_state *st; - struct pf_state_cmp key; - struct pfsync_state *sp; - struct pfsync_state_upd *up; - struct pfsync_state_del *dp; - struct pfsync_state_clr *cp; - struct pfsync_state_upd_req *rup; - struct pfsync_state_bus *bus; -#ifdef PFSYNC_TDB - struct pfsync_tdb *pt; + +cleanup_state: /* pf_state_insert frees the state keys */ + if (st) { +#ifdef __FreeBSD__ + if (st->dst.scrub) + pool_put(&V_pf_state_scrub_pl, st->dst.scrub); + if (st->src.scrub) + pool_put(&V_pf_state_scrub_pl, st->src.scrub); + pool_put(&V_pf_state_pl, st); +#else + if (st->dst.scrub) + pool_put(&pf_state_scrub_pl, st->dst.scrub); + if (st->src.scrub) + pool_put(&pf_state_scrub_pl, st->src.scrub); + pool_put(&pf_state_pl, st); #endif - struct in_addr src; - struct mbuf *mp; - int iplen, action, error, i, s, count, offp, sfail, stale = 0; - u_int8_t chksum_flag = 0; + } + return (error); +} + +void +#ifdef __FreeBSD__ +pfsync_input(struct mbuf *m, __unused int off) +#else +pfsync_input(struct mbuf *m, ...) +#endif +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + struct pfsync_pkt pkt; + struct ip *ip = mtod(m, struct ip *); + struct pfsync_header *ph; + struct pfsync_subheader subh; - pfsyncstats.pfsyncs_ipackets++; + int offset; + int rv; + + V_pfsyncstats.pfsyncs_ipackets++; /* verify that we have a sync interface configured */ - if (!sc || !sc->sc_sync_ifp || !pf_status.running) +#ifdef __FreeBSD__ + if (!sc || !sc->sc_sync_if || !V_pf_status.running) +#else + if (!sc || !sc->sc_sync_if || !pf_status.running) +#endif goto done; /* verify that the packet came in on the right interface */ - if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { - pfsyncstats.pfsyncs_badif++; + if (sc->sc_sync_if != m->m_pkthdr.rcvif) { + V_pfsyncstats.pfsyncs_badif++; goto done; } - /* verify that the IP TTL is 255. */ +#ifdef __FreeBSD__ + sc->sc_ifp->if_ipackets++; + sc->sc_ifp->if_ibytes += m->m_pkthdr.len; +#else + sc->sc_if.if_ipackets++; + sc->sc_if.if_ibytes += m->m_pkthdr.len; +#endif + /* verify that the IP TTL is 255. */ if (ip->ip_ttl != PFSYNC_DFLTTL) { - pfsyncstats.pfsyncs_badttl++; + V_pfsyncstats.pfsyncs_badttl++; goto done; } - iplen = ip->ip_hl << 2; - - if (m->m_pkthdr.len < iplen + sizeof(*ph)) { - pfsyncstats.pfsyncs_hdrops++; + offset = ip->ip_hl << 2; + if (m->m_pkthdr.len < offset + sizeof(*ph)) { + V_pfsyncstats.pfsyncs_hdrops++; goto done; } - if (iplen + sizeof(*ph) > m->m_len) { - if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { - pfsyncstats.pfsyncs_hdrops++; - goto done; + if (offset + sizeof(*ph) > m->m_len) { + if (m_pullup(m, offset + sizeof(*ph)) == NULL) { + V_pfsyncstats.pfsyncs_hdrops++; + return; } ip = mtod(m, struct ip *); } - ph = (struct pfsync_header *)((char *)ip + iplen); + ph = (struct pfsync_header *)((char *)ip + offset); /* verify the version */ if (ph->version != PFSYNC_VERSION) { - pfsyncstats.pfsyncs_badver++; + V_pfsyncstats.pfsyncs_badver++; goto done; } - action = ph->action; - count = ph->count; - - /* make sure it's a valid action code */ - if (action >= PFSYNC_ACT_MAX) { - pfsyncstats.pfsyncs_badact++; +#if 0 + if (pfsync_input_hmac(m, offset) != 0) { + /* XXX stats */ goto done; } +#endif /* Cheaper to grab this now than having to mess with mbufs later */ - src = ip->ip_src; - - if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) - chksum_flag++; - - switch (action) { - case PFSYNC_ACT_CLR: { - struct pf_state *nexts; - struct pfi_kif *kif; - u_int32_t creatorid; - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - sizeof(*cp), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; + pkt.ip = ip; + pkt.src = ip->ip_src; + pkt.flags = 0; + +#ifdef __FreeBSD__ + if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) +#else + if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) +#endif + pkt.flags |= PFSYNC_SI_CKSUM; + + offset += sizeof(*ph); + for (;;) { + m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); + offset += sizeof(subh); + + if (subh.action >= PFSYNC_ACT_MAX) { + V_pfsyncstats.pfsyncs_badact++; + goto done; } - cp = (struct pfsync_state_clr *)(mp->m_data + offp); - creatorid = cp->creatorid; - s = splsoftnet(); + rv = (*pfsync_acts[subh.action])(&pkt, m, offset, + ntohs(subh.count)); + if (rv == -1) + return; + + offset += rv; + } + +done: + m_freem(m); +} + +int +pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct pfsync_clr *clr; + struct mbuf *mp; + int len = sizeof(*clr) * count; + int i, offp; + + struct pf_state *st, *nexts; + struct pf_state_key *sk, *nextsk; + struct pf_state_item *si; + u_int32_t creatorid; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + clr = (struct pfsync_clr *)(mp->m_data + offp); + + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - if (cp->ifname[0] == '\0') { + for (i = 0; i < count; i++) { + creatorid = clr[i].creatorid; + + if (clr[i].ifname[0] == '\0') { +#ifdef __FreeBSD__ + for (st = RB_MIN(pf_state_tree_id, &V_tree_id); + st; st = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, st); +#else for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); +#endif if (st->creatorid == creatorid) { - st->sync_flags |= PFSTATE_FROMSYNC; + SET(st->state_flags, PFSTATE_NOSYNC); pf_unlink_state(st); } } } else { - if ((kif = pfi_kif_get(cp->ifname)) == NULL) { + if (pfi_kif_get(clr[i].ifname) == NULL) + continue; + + /* XXX correct? */ #ifdef __FreeBSD__ - PF_UNLOCK(); + for (sk = RB_MIN(pf_state_tree, &V_pf_statetbl); +#else + for (sk = RB_MIN(pf_state_tree, &pf_statetbl); #endif - splx(s); - return; - } - for (st = RB_MIN(pf_state_tree_lan_ext, - &kif->pfik_lan_ext); st; st = nexts) { - nexts = RB_NEXT(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, st); - if (st->creatorid == creatorid) { - st->sync_flags |= PFSTATE_FROMSYNC; - pf_unlink_state(st); + sk; sk = nextsk) { + nextsk = RB_NEXT(pf_state_tree, +#ifdef __FreeBSD__ + &V_pf_statetbl, sk); +#else + &pf_statetbl, sk); +#endif + TAILQ_FOREACH(si, &sk->states, entry) { + if (si->s->creatorid == creatorid) { + SET(si->s->state_flags, + PFSTATE_NOSYNC); + pf_unlink_state(si->s); + } } } } + } #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); + splx(s); - break; + return (len); +} + +int +pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct mbuf *mp; + struct pfsync_state *sa, *sp; + int len = sizeof(*sp) * count; + int i, offp; + + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); } - case PFSYNC_ACT_INS: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*sp), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } + sa = (struct pfsync_state *)(mp->m_data + offp); - s = splsoftnet(); + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); - i < count; i++, sp++) { - /* check for invalid values */ - if (sp->timeout >= PFTM_MAX || - sp->src.state > PF_TCPS_PROXY_DST || - sp->dst.state > PF_TCPS_PROXY_DST || - sp->direction > PF_OUT || - (sp->af != AF_INET && sp->af != AF_INET6)) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert: PFSYNC_ACT_INS: " - "invalid value\n"); - pfsyncstats.pfsyncs_badstate++; - continue; - } + for (i = 0; i < count; i++) { + sp = &sa[i]; - if ((error = pfsync_insert_net_state(sp, - chksum_flag))) { - if (error == ENOMEM) { + /* check for invalid values */ + if (sp->timeout >= PFTM_MAX || + sp->src.state > PF_TCPS_PROXY_DST || + sp->dst.state > PF_TCPS_PROXY_DST || + sp->direction > PF_OUT || + (sp->af != AF_INET && sp->af != AF_INET6)) { #ifdef __FreeBSD__ - PF_UNLOCK(); + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { #endif - splx(s); - goto done; - } - continue; + printf("pfsync_input: PFSYNC5_ACT_INS: " + "invalid value\n"); } + V_pfsyncstats.pfsyncs_badval++; + continue; } + + if (pfsync_state_import(sp, pkt->flags) == ENOMEM) { + /* drop out, but process the rest of the actions */ + break; + } + } #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); - break; - case PFSYNC_ACT_UPD: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*sp), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } + splx(s); + + return (len); +} + +int +pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct pfsync_ins_ack *ia, *iaa; + struct pf_state_cmp id_key; + struct pf_state *st; + + struct mbuf *mp; + int len = count * sizeof(*ia); + int offp, i; + int s; - s = splsoftnet(); + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); + + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); - i < count; i++, sp++) { - int flags = PFSYNC_FLAG_STALE; - - /* check for invalid values */ - if (sp->timeout >= PFTM_MAX || - sp->src.state > PF_TCPS_PROXY_DST || - sp->dst.state > PF_TCPS_PROXY_DST) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert: PFSYNC_ACT_UPD: " - "invalid value\n"); - pfsyncstats.pfsyncs_badstate++; - continue; - } + for (i = 0; i < count; i++) { + ia = &iaa[i]; - bcopy(sp->id, &key.id, sizeof(key.id)); - key.creatorid = sp->creatorid; + bcopy(&ia->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = ia->creatorid; - st = pf_find_state_byid(&key); - if (st == NULL) { - /* insert the update */ - if (pfsync_insert_net_state(sp, chksum_flag)) - pfsyncstats.pfsyncs_badstate++; - continue; - } - sfail = 0; - if (st->proto == IPPROTO_TCP) { - /* - * The state should never go backwards except - * for syn-proxy states. Neither should the - * sequence window slide backwards. - */ - if (st->src.state > sp->src.state && - (st->src.state < PF_TCPS_PROXY_SRC || - sp->src.state >= PF_TCPS_PROXY_SRC)) - sfail = 1; - else if (SEQ_GT(st->src.seqlo, - ntohl(sp->src.seqlo))) - sfail = 3; - else if (st->dst.state > sp->dst.state) { - /* There might still be useful - * information about the src state here, - * so import that part of the update, - * then "fail" so we send the updated - * state back to the peer who is missing - * our what we know. */ - pf_state_peer_ntoh(&sp->src, &st->src); - /* XXX do anything with timeouts? */ - sfail = 7; - flags = 0; - } else if (st->dst.state >= TCPS_SYN_SENT && - SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) - sfail = 4; - } else { - /* - * Non-TCP protocol state machine always go - * forwards - */ - if (st->src.state > sp->src.state) - sfail = 5; - else if (st->dst.state > sp->dst.state) - sfail = 6; - } - if (sfail) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: %s stale update " - "(%d) id: %016llx " - "creatorid: %08x\n", - (sfail < 7 ? "ignoring" - : "partial"), sfail, - betoh64(st->id), - ntohl(st->creatorid)); - pfsyncstats.pfsyncs_badstate++; - - if (!(sp->sync_flags & PFSTATE_STALE)) { - /* we have a better state, send it */ - if (sc->sc_mbuf != NULL && !stale) - pfsync_sendout(sc); - stale++; - if (!st->sync_flags) - pfsync_pack_state( - PFSYNC_ACT_UPD, st, flags); - } - continue; - } - pfsync_alloc_scrub_memory(&sp->dst, &st->dst); - pf_state_peer_ntoh(&sp->src, &st->src); - pf_state_peer_ntoh(&sp->dst, &st->dst); - st->expire = ntohl(sp->expire) + time_second; - st->timeout = sp->timeout; - } - if (stale && sc->sc_mbuf != NULL) - pfsync_sendout(sc); + st = pf_find_state_byid(&id_key); + if (st == NULL) + continue; + + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(st, 0); + } #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); - break; + splx(s); /* - * It's not strictly necessary for us to support the "uncompressed" - * delete action, but it's relatively simple and maintains consistency. + * XXX this is not yet implemented, but we know the size of the + * message so we can skip it. */ - case PFSYNC_ACT_DEL: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*sp), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } - s = splsoftnet(); + return (count * sizeof(struct pfsync_ins_ack)); +} + +int +pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, + struct pfsync_state_peer *dst) +{ + int sfail = 0; + + /* + * The state should never go backwards except + * for syn-proxy states. Neither should the + * sequence window slide backwards. + */ + if (st->src.state > src->state && + (st->src.state < PF_TCPS_PROXY_SRC || + src->state >= PF_TCPS_PROXY_SRC)) + sfail = 1; + else if (SEQ_GT(st->src.seqlo, ntohl(src->seqlo))) + sfail = 3; + else if (st->dst.state > dst->state) { + /* There might still be useful + * information about the src state here, + * so import that part of the update, + * then "fail" so we send the updated + * state back to the peer who is missing + * our what we know. */ + pf_state_peer_ntoh(src, &st->src); + /* XXX do anything with timeouts? */ + sfail = 7; + } else if (st->dst.state >= TCPS_SYN_SENT && + SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))) + sfail = 4; + + return (sfail); +} + +int +pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct pfsync_state *sa, *sp; + struct pf_state_cmp id_key; + struct pf_state_key *sk; + struct pf_state *st; + int sfail; + + struct mbuf *mp; + int len = count * sizeof(*sp); + int offp, i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + sa = (struct pfsync_state *)(mp->m_data + offp); + + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); - i < count; i++, sp++) { - bcopy(sp->id, &key.id, sizeof(key.id)); - key.creatorid = sp->creatorid; + for (i = 0; i < count; i++) { + sp = &sa[i]; - st = pf_find_state_byid(&key); - if (st == NULL) { - pfsyncstats.pfsyncs_badstate++; - continue; - } - st->sync_flags |= PFSTATE_FROMSYNC; - pf_unlink_state(st); - } + /* check for invalid values */ + if (sp->timeout >= PFTM_MAX || + sp->src.state > PF_TCPS_PROXY_DST || + sp->dst.state > PF_TCPS_PROXY_DST) { #ifdef __FreeBSD__ - PF_UNLOCK(); + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { #endif - splx(s); - break; - case PFSYNC_ACT_UPD_C: { - int update_requested = 0; + printf("pfsync_input: PFSYNC_ACT_UPD: " + "invalid value\n"); + } + V_pfsyncstats.pfsyncs_badval++; + continue; + } - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*up), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; + bcopy(sp->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = sp->creatorid; + + st = pf_find_state_byid(&id_key); + if (st == NULL) { + /* insert the update */ + if (pfsync_state_import(sp, 0)) + V_pfsyncstats.pfsyncs_badstate++; + continue; } - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); - i < count; i++, up++) { - /* check for invalid values */ - if (up->timeout >= PFTM_MAX || - up->src.state > PF_TCPS_PROXY_DST || - up->dst.state > PF_TCPS_PROXY_DST) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert: " - "PFSYNC_ACT_UPD_C: " - "invalid value\n"); - pfsyncstats.pfsyncs_badstate++; - continue; - } + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(st, 1); - bcopy(up->id, &key.id, sizeof(key.id)); - key.creatorid = up->creatorid; + sk = st->key[PF_SK_WIRE]; /* XXX right one? */ + sfail = 0; + if (sk->proto == IPPROTO_TCP) + sfail = pfsync_upd_tcp(st, &sp->src, &sp->dst); + else { + /* + * Non-TCP protocol state machine always go + * forwards + */ + if (st->src.state > sp->src.state) + sfail = 5; + else if (st->dst.state > sp->dst.state) + sfail = 6; + } - st = pf_find_state_byid(&key); - if (st == NULL) { - /* We don't have this state. Ask for it. */ - error = pfsync_request_update(up, &src); - if (error == ENOMEM) { + if (sfail) { #ifdef __FreeBSD__ - PF_UNLOCK(); + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { #endif - splx(s); - goto done; - } - update_requested = 1; - pfsyncstats.pfsyncs_badstate++; - continue; - } - sfail = 0; - if (st->proto == IPPROTO_TCP) { - /* - * The state should never go backwards except - * for syn-proxy states. Neither should the - * sequence window slide backwards. - */ - if (st->src.state > up->src.state && - (st->src.state < PF_TCPS_PROXY_SRC || - up->src.state >= PF_TCPS_PROXY_SRC)) - sfail = 1; - else if (st->dst.state > up->dst.state) - sfail = 2; - else if (SEQ_GT(st->src.seqlo, - ntohl(up->src.seqlo))) - sfail = 3; - else if (st->dst.state >= TCPS_SYN_SENT && - SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) - sfail = 4; - } else { - /* - * Non-TCP protocol state machine always go - * forwards - */ - if (st->src.state > up->src.state) - sfail = 5; - else if (st->dst.state > up->dst.state) - sfail = 6; - } - if (sfail) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: ignoring stale update " - "(%d) id: %016llx " - "creatorid: %08x\n", sfail, - betoh64(st->id), - ntohl(st->creatorid)); - pfsyncstats.pfsyncs_badstate++; - - /* we have a better state, send it out */ - if ((!stale || update_requested) && - sc->sc_mbuf != NULL) { - pfsync_sendout(sc); - update_requested = 0; - } - stale++; - if (!st->sync_flags) - pfsync_pack_state(PFSYNC_ACT_UPD, st, - PFSYNC_FLAG_STALE); - continue; + printf("pfsync: %s stale update (%d)" + " id: %016llx creatorid: %08x\n", + (sfail < 7 ? "ignoring" : "partial"), + sfail, betoh64(st->id), + ntohl(st->creatorid)); } - pfsync_alloc_scrub_memory(&up->dst, &st->dst); - pf_state_peer_ntoh(&up->src, &st->src); - pf_state_peer_ntoh(&up->dst, &st->dst); - st->expire = ntohl(up->expire) + time_second; - st->timeout = up->timeout; + V_pfsyncstats.pfsyncs_stale++; + + pfsync_update_state(st); + schednetisr(NETISR_PFSYNC); + continue; } - if ((update_requested || stale) && sc->sc_mbuf) - pfsync_sendout(sc); + pfsync_alloc_scrub_memory(&sp->dst, &st->dst); + pf_state_peer_ntoh(&sp->src, &st->src); + pf_state_peer_ntoh(&sp->dst, &st->dst); + st->expire = ntohl(sp->expire) + time_second; + st->timeout = sp->timeout; + st->pfsync_time = time_second; + } #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); - break; + splx(s); + + return (len); +} + +int +pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct pfsync_upd_c *ua, *up; + struct pf_state_key *sk; + struct pf_state_cmp id_key; + struct pf_state *st; + + int len = count * sizeof(*up); + int sfail; + + struct mbuf *mp; + int offp, i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); } - case PFSYNC_ACT_DEL_C: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*dp), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } + ua = (struct pfsync_upd_c *)(mp->m_data + offp); - s = splsoftnet(); + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); - i < count; i++, dp++) { - bcopy(dp->id, &key.id, sizeof(key.id)); - key.creatorid = dp->creatorid; + for (i = 0; i < count; i++) { + up = &ua[i]; - st = pf_find_state_byid(&key); - if (st == NULL) { - pfsyncstats.pfsyncs_badstate++; - continue; - } - st->sync_flags |= PFSTATE_FROMSYNC; - pf_unlink_state(st); - } + /* check for invalid values */ + if (up->timeout >= PFTM_MAX || + up->src.state > PF_TCPS_PROXY_DST || + up->dst.state > PF_TCPS_PROXY_DST) { #ifdef __FreeBSD__ - PF_UNLOCK(); + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { #endif - splx(s); - break; - case PFSYNC_ACT_INS_F: - case PFSYNC_ACT_DEL_F: - /* not implemented */ - break; - case PFSYNC_ACT_UREQ: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*rup), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; + printf("pfsync_input: " + "PFSYNC_ACT_UPD_C: " + "invalid value\n"); + } + V_pfsyncstats.pfsyncs_badval++; + continue; } - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); - for (i = 0, - rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); - i < count; i++, rup++) { - bcopy(rup->id, &key.id, sizeof(key.id)); - key.creatorid = rup->creatorid; - - if (key.id == 0 && key.creatorid == 0) { - sc->sc_ureq_received = time_uptime; - if (sc->sc_bulk_send_next == NULL) - sc->sc_bulk_send_next = - TAILQ_FIRST(&state_list); - sc->sc_bulk_terminator = sc->sc_bulk_send_next; - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: received " - "bulk update request\n"); - pfsync_send_bus(sc, PFSYNC_BUS_START); -#ifdef __FreeBSD__ - callout_reset(&sc->sc_bulk_tmo, 1 * hz, - pfsync_bulk_update, pfsyncif); -#else - timeout_add(&sc->sc_bulk_tmo, 1 * hz); -#endif - } else { - st = pf_find_state_byid(&key); - if (st == NULL) { - pfsyncstats.pfsyncs_badstate++; - continue; - } - if (!st->sync_flags) - pfsync_pack_state(PFSYNC_ACT_UPD, - st, 0); - } + bcopy(&up->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = up->creatorid; + + st = pf_find_state_byid(&id_key); + if (st == NULL) { + /* We don't have this state. Ask for it. */ + pfsync_request_update(id_key.creatorid, id_key.id); + continue; } - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); - break; - case PFSYNC_ACT_BUS: - /* If we're not waiting for a bulk update, who cares. */ - if (sc->sc_ureq_sent == 0) - break; - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - sizeof(*bus), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(st, 1); + + sk = st->key[PF_SK_WIRE]; /* XXX right one? */ + sfail = 0; + if (sk->proto == IPPROTO_TCP) + sfail = pfsync_upd_tcp(st, &up->src, &up->dst); + else { + /* + * Non-TCP protocol state machine always go forwards + */ + if (st->src.state > up->src.state) + sfail = 5; + else if (st->dst.state > up->dst.state) + sfail = 6; } - bus = (struct pfsync_state_bus *)(mp->m_data + offp); - switch (bus->status) { - case PFSYNC_BUS_START: + + if (sfail) { #ifdef __FreeBSD__ - callout_reset(&sc->sc_bulkfail_tmo, - pf_pool_limits[PF_LIMIT_STATES].limit / - (PFSYNC_BULKPACKETS * sc->sc_maxcount), - pfsync_bulkfail, pfsyncif); + if (V_pf_status.debug >= PF_DEBUG_MISC) { #else - timeout_add(&sc->sc_bulkfail_tmo, - pf_pool_limits[PF_LIMIT_STATES].limit / - (PFSYNC_BULKPACKETS * sc->sc_maxcount)); + if (pf_status.debug >= PF_DEBUG_MISC) { #endif - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: received bulk " - "update start\n"); - break; - case PFSYNC_BUS_END: - if (time_uptime - ntohl(bus->endtime) >= - sc->sc_ureq_sent) { - /* that's it, we're happy */ - sc->sc_ureq_sent = 0; - sc->sc_bulk_tries = 0; - timeout_del(&sc->sc_bulkfail_tmo); + printf("pfsync: ignoring stale update " + "(%d) id: %016llx " + "creatorid: %08x\n", sfail, + betoh64(st->id), + ntohl(st->creatorid)); + } + V_pfsyncstats.pfsyncs_stale++; + + pfsync_update_state(st); + schednetisr(NETISR_PFSYNC); + continue; + } + pfsync_alloc_scrub_memory(&up->dst, &st->dst); + pf_state_peer_ntoh(&up->src, &st->src); + pf_state_peer_ntoh(&up->dst, &st->dst); + st->expire = ntohl(up->expire) + time_second; + st->timeout = up->timeout; + st->pfsync_time = time_second; + } +#ifdef __FreeBSD__ + PF_UNLOCK(); +#endif + splx(s); + + return (len); +} + +int +pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct pfsync_upd_req *ur, *ura; + struct mbuf *mp; + int len = count * sizeof(*ur); + int i, offp; + + struct pf_state_cmp id_key; + struct pf_state *st; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + ura = (struct pfsync_upd_req *)(mp->m_data + offp); + + for (i = 0; i < count; i++) { + ur = &ura[i]; + + bcopy(&ur->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = ur->creatorid; + + if (id_key.id == 0 && id_key.creatorid == 0) + pfsync_bulk_start(); + else { + st = pf_find_state_byid(&id_key); + if (st == NULL) { + V_pfsyncstats.pfsyncs_badstate++; + continue; + } + if (ISSET(st->state_flags, PFSTATE_NOSYNC)) + continue; + + pfsync_update_state_req(st); + } + } + + return (len); +} + +int +pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct mbuf *mp; + struct pfsync_state *sa, *sp; + struct pf_state_cmp id_key; + struct pf_state *st; + int len = count * sizeof(*sp); + int offp, i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + sa = (struct pfsync_state *)(mp->m_data + offp); + + s = splsoftnet(); +#ifdef __FreeBSD__ + PF_LOCK(); +#endif + for (i = 0; i < count; i++) { + sp = &sa[i]; + + bcopy(sp->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = sp->creatorid; + + st = pf_find_state_byid(&id_key); + if (st == NULL) { + V_pfsyncstats.pfsyncs_badstate++; + continue; + } + SET(st->state_flags, PFSTATE_NOSYNC); + pf_unlink_state(st); + } +#ifdef __FreeBSD__ + PF_UNLOCK(); +#endif + splx(s); + + return (len); +} + +int +pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + struct mbuf *mp; + struct pfsync_del_c *sa, *sp; + struct pf_state_cmp id_key; + struct pf_state *st; + int len = count * sizeof(*sp); + int offp, i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + sa = (struct pfsync_del_c *)(mp->m_data + offp); + + s = splsoftnet(); +#ifdef __FreeBSD__ + PF_LOCK(); +#endif + for (i = 0; i < count; i++) { + sp = &sa[i]; + + bcopy(&sp->id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = sp->creatorid; + + st = pf_find_state_byid(&id_key); + if (st == NULL) { + V_pfsyncstats.pfsyncs_badstate++; + continue; + } + + SET(st->state_flags, PFSTATE_NOSYNC); + pf_unlink_state(st); + } +#ifdef __FreeBSD__ + PF_LOCK(); +#endif + splx(s); + + return (len); +} + +int +pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + struct pfsync_bus *bus; + struct mbuf *mp; + int len = count * sizeof(*bus); + int offp; + + /* If we're not waiting for a bulk update, who cares. */ + if (sc->sc_ureq_sent == 0) + return (len); + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + bus = (struct pfsync_bus *)(mp->m_data + offp); + + switch (bus->status) { + case PFSYNC_BUS_START: +#ifdef __FreeBSD__ + callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, + V_pfsyncif); +#else + timeout_add_sec(&sc->sc_bulkfail_tmo, 5); /* XXX magic */ +#endif +#ifdef XXX + pf_pool_limits[PF_LIMIT_STATES].limit / + (PFSYNC_BULKPACKETS * sc->sc_maxcount)); +#endif +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) +#endif + printf("pfsync: received bulk update start\n"); + break; + + case PFSYNC_BUS_END: + if (time_uptime - ntohl(bus->endtime) >= + sc->sc_ureq_sent) { + /* that's it, we're happy */ + sc->sc_ureq_sent = 0; + sc->sc_bulk_tries = 0; + timeout_del(&sc->sc_bulkfail_tmo); #if NCARP > 0 - if (!pfsync_sync_ok) +#ifdef notyet #ifdef __FreeBSD__ -#ifdef CARP_ADVANCED - carp_group_demote_adj(sc->sc_ifp, -1); + if (!sc->pfsync_sync_ok) +#else + if (!pfsync_sync_ok) +#endif + carp_group_demote_adj(&sc->sc_if, -1); +#endif #endif +#ifdef __FreeBSD__ + sc->pfsync_sync_ok = 1; #else - carp_group_demote_adj(&sc->sc_if, -1); + pfsync_sync_ok = 1; #endif +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) #endif - pfsync_sync_ok = 1; - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: received valid " - "bulk update end\n"); - } else { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: received invalid " - "bulk update end: bad timestamp\n"); - } - break; + printf("pfsync: received valid " + "bulk update end\n"); + } else { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) +#endif + printf("pfsync: received invalid " + "bulk update end: bad timestamp\n"); } break; -#ifdef PFSYNC_TDB - case PFSYNC_ACT_TDB_UPD: - if ((mp = m_pulldown(m, iplen + sizeof(*ph), - count * sizeof(*pt), &offp)) == NULL) { - pfsyncstats.pfsyncs_badlen++; - return; - } - s = splsoftnet(); + } + + return (len); +} + +int +pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + int len = count * sizeof(struct pfsync_tdb); + +#if defined(IPSEC) + struct pfsync_tdb *tp; + struct mbuf *mp; + int offp; + int i; + int s; + + mp = m_pulldown(m, offset, len, &offp); + if (mp == NULL) { + V_pfsyncstats.pfsyncs_badlen++; + return (-1); + } + tp = (struct pfsync_tdb *)(mp->m_data + offp); + + s = splsoftnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); - i < count; i++, pt++) - pfsync_update_net_tdb(pt); + for (i = 0; i < count; i++) + pfsync_update_net_tdb(&tp[i]); #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); - break; + splx(s); #endif + + return (len); +} + +#if defined(IPSEC) +/* Update an in-kernel tdb. Silently fail if no tdb is found. */ +void +pfsync_update_net_tdb(struct pfsync_tdb *pt) +{ + struct tdb *tdb; + int s; + + /* check for invalid values */ + if (ntohl(pt->spi) <= SPI_RESERVED_MAX || + (pt->dst.sa.sa_family != AF_INET && + pt->dst.sa.sa_family != AF_INET6)) + goto bad; + + s = spltdb(); + tdb = gettdb(pt->spi, &pt->dst, pt->sproto); + if (tdb) { + pt->rpl = ntohl(pt->rpl); + pt->cur_bytes = betoh64(pt->cur_bytes); + + /* Neither replay nor byte counter should ever decrease. */ + if (pt->rpl < tdb->tdb_rpl || + pt->cur_bytes < tdb->tdb_cur_bytes) { + splx(s); + goto bad; + } + + tdb->tdb_rpl = pt->rpl; + tdb->tdb_cur_bytes = pt->cur_bytes; } + splx(s); + return; -done: - if (m) - m_freem(m); +bad: +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) +#endif + printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " + "invalid value\n"); + V_pfsyncstats.pfsyncs_badstate++; + return; +} +#endif + + +int +pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + /* check if we are at the right place in the packet */ + if (offset != m->m_pkthdr.len - sizeof(struct pfsync_eof)) + V_pfsyncstats.pfsyncs_badact++; + + /* we're done. free and let the caller return */ + m_freem(m); + return (-1); +} + +int +pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) +{ + V_pfsyncstats.pfsyncs_badact++; + + m_freem(m); + return (-1); } int pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, - struct route *ro) +#ifdef __FreeBSD__ + struct route *rt) +#else + struct rtentry *rt) +#endif { m_freem(m); return (0); @@ -1101,12 +1779,15 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct ip_moptions *imo = &sc->sc_imo; struct pfsyncreq pfsyncr; struct ifnet *sifp; + struct ip *ip; int s, error; switch (cmd) { +#if 0 case SIOCSIFADDR: case SIOCAIFADDR: case SIOCSIFDSTADDR: +#endif case SIOCSIFFLAGS: #ifdef __FreeBSD__ if (ifp->if_flags & IFF_UP) @@ -1121,32 +1802,33 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) #endif break; case SIOCSIFMTU: - if (ifr->ifr_mtu < PFSYNC_MINMTU) + if (ifr->ifr_mtu <= PFSYNC_MINPKT) return (EINVAL); - if (ifr->ifr_mtu > MCLBYTES) + if (ifr->ifr_mtu > MCLBYTES) /* XXX could be bigger */ ifr->ifr_mtu = MCLBYTES; - s = splnet(); + if (ifr->ifr_mtu < ifp->if_mtu) { + s = splnet(); #ifdef __FreeBSD__ - PF_LOCK(); + PF_LOCK(); #endif - if (ifr->ifr_mtu < ifp->if_mtu) - pfsync_sendout(sc); - pfsync_setmtu(sc, ifr->ifr_mtu); + pfsync_sendout(); #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_UNLOCK(); #endif - splx(s); + splx(s); + } + ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); - if (sc->sc_sync_ifp) + if (sc->sc_sync_if) { strlcpy(pfsyncr.pfsyncr_syncdev, - sc->sc_sync_ifp->if_xname, IFNAMSIZ); + sc->sc_sync_if->if_xname, IFNAMSIZ); + } pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; - if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) - return (error); - break; + return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); + case SIOCSETPFSYNC: #ifdef __FreeBSD__ if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) @@ -1182,20 +1864,13 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; if (pfsyncr.pfsyncr_syncdev[0] == 0) { - sc->sc_sync_ifp = NULL; - if (sc->sc_mbuf_net != NULL) { - /* Don't keep stale pfsync packets around. */ - s = splnet(); - m_freem(sc->sc_mbuf_net); - sc->sc_mbuf_net = NULL; - sc->sc_statep_net.s = NULL; - splx(s); - } + sc->sc_sync_if = NULL; #ifdef __FreeBSD__ PF_UNLOCK(); #endif if (imo->imo_num_memberships > 0) { - in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + in_delmulti(imo->imo_membership[ + --imo->imo_num_memberships]); imo->imo_multicast_ifp = NULL; } break; @@ -1206,27 +1881,21 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) #endif if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) return (EINVAL); + #ifdef __FreeBSD__ PF_LOCK(); #endif - s = splnet(); #ifdef __FreeBSD__ if (sifp->if_mtu < sc->sc_ifp->if_mtu || #else if (sifp->if_mtu < sc->sc_if.if_mtu || #endif - (sc->sc_sync_ifp != NULL && - sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || + (sc->sc_sync_if != NULL && + sifp->if_mtu < sc->sc_sync_if->if_mtu) || sifp->if_mtu < MCLBYTES - sizeof(struct ip)) - pfsync_sendout(sc); - sc->sc_sync_ifp = sifp; - -#ifdef __FreeBSD__ - pfsync_setmtu(sc, sc->sc_ifp->if_mtu); -#else - pfsync_setmtu(sc, sc->sc_if.if_mtu); -#endif + pfsync_sendout(); + sc->sc_sync_if = sifp; if (imo->imo_num_memberships > 0) { #ifdef __FreeBSD__ @@ -1239,7 +1908,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) imo->imo_multicast_ifp = NULL; } - if (sc->sc_sync_ifp && + if (sc->sc_sync_if && #ifdef __FreeBSD__ sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { #else @@ -1247,8 +1916,8 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) #endif struct in_addr addr; - if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { - sc->sc_sync_ifp = NULL; + if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { + sc->sc_sync_if = NULL; #ifdef __FreeBSD__ PF_UNLOCK(); #endif @@ -1266,8 +1935,8 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) PF_UNLOCK(); #endif if ((imo->imo_membership[0] = - in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) { - sc->sc_sync_ifp = NULL; + in_addmulti(&addr, sc->sc_sync_if)) == NULL) { + sc->sc_sync_if = NULL; splx(s); return (ENOBUFS); } @@ -1275,966 +1944,1359 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) PF_LOCK(); #endif imo->imo_num_memberships++; - imo->imo_multicast_ifp = sc->sc_sync_ifp; + imo->imo_multicast_ifp = sc->sc_sync_if; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; } - if (sc->sc_sync_ifp || -#ifdef __FreeBSD__ - sc->sc_sendaddr.s_addr != htonl(INADDR_PFSYNC_GROUP)) { -#else - sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { -#endif + ip = &sc->sc_template; + bzero(ip, sizeof(*ip)); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(sc->sc_template) >> 2; + ip->ip_tos = IPTOS_LOWDELAY; + /* len and id are set later */ + ip->ip_off = htons(IP_DF); + ip->ip_ttl = PFSYNC_DFLTTL; + ip->ip_p = IPPROTO_PFSYNC; + ip->ip_src.s_addr = INADDR_ANY; + ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; + + if (sc->sc_sync_if) { /* Request a full state table update. */ sc->sc_ureq_sent = time_uptime; #if NCARP > 0 +#ifdef notyet +#ifdef __FreeBSD__ + if (sc->pfsync_sync_ok) +#else if (pfsync_sync_ok) +#endif + carp_group_demote_adj(&sc->sc_if, 1); +#endif +#endif +#ifdef __FreeBSD__ + sc->pfsync_sync_ok = 0; +#else + pfsync_sync_ok = 0; +#endif +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) +#endif + printf("pfsync: requesting bulk update\n"); +#ifdef __FreeBSD__ + callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, + pfsync_bulk_fail, V_pfsyncif); +#else + timeout_add_sec(&sc->sc_bulkfail_tmo, 5); +#endif + pfsync_request_update(0, 0); + } +#ifdef __FreeBSD__ + PF_UNLOCK(); +#endif + splx(s); + + break; + + default: + return (ENOTTY); + } + + return (0); +} + +int +pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset) +{ + struct pfsync_state *sp = (struct pfsync_state *)(m->m_data + offset); + + pfsync_state_export(sp, st); + + return (sizeof(*sp)); +} + +int +pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset) +{ + struct pfsync_ins_ack *iack = + (struct pfsync_ins_ack *)(m->m_data + offset); + + iack->id = st->id; + iack->creatorid = st->creatorid; + + return (sizeof(*iack)); +} + +int +pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset) +{ + struct pfsync_upd_c *up = (struct pfsync_upd_c *)(m->m_data + offset); + + up->id = st->id; + pf_state_peer_hton(&st->src, &up->src); + pf_state_peer_hton(&st->dst, &up->dst); + up->creatorid = st->creatorid; + + up->expire = pf_state_expires(st); + if (up->expire <= time_second) + up->expire = htonl(0); + else + up->expire = htonl(up->expire - time_second); + up->timeout = st->timeout; + + bzero(up->_pad, sizeof(up->_pad)); /* XXX */ + + return (sizeof(*up)); +} + +int +pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset) +{ + struct pfsync_del_c *dp = (struct pfsync_del_c *)(m->m_data + offset); + + dp->id = st->id; + dp->creatorid = st->creatorid; + + SET(st->state_flags, PFSTATE_NOSYNC); + + return (sizeof(*dp)); +} + +void +pfsync_drop(struct pfsync_softc *sc) +{ + struct pf_state *st; + struct pfsync_upd_req_item *ur; +#ifdef notyet + struct tdb *t; +#endif + int q; + + for (q = 0; q < PFSYNC_S_COUNT; q++) { + if (TAILQ_EMPTY(&sc->sc_qs[q])) + continue; + + TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { +#ifdef PFSYNC_DEBUG +#ifdef __FreeBSD__ + KASSERT(st->sync_state == q, + ("%s: st->sync_state == q", + __FUNCTION__)); +#else + KASSERT(st->sync_state == q); +#endif +#endif + st->sync_state = PFSYNC_S_NONE; + } + TAILQ_INIT(&sc->sc_qs[q]); + } + + while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { + TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); + pool_put(&sc->sc_pool, ur); + } + + sc->sc_plus = NULL; + +#ifdef notyet + if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { + TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) + CLR(t->tdb_flags, TDBF_PFSYNC); + + TAILQ_INIT(&sc->sc_tdb_q); + } +#endif + + sc->sc_len = PFSYNC_MINPKT; +} + +void +pfsync_sendout(void) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif +#if NBPFILTER > 0 +#ifdef __FreeBSD__ + struct ifnet *ifp = sc->sc_ifp; +#else + struct ifnet *ifp = &sc->sc_if; +#endif + struct mbuf *m; +#endif + struct ip *ip; + struct pfsync_header *ph; + struct pfsync_subheader *subh; + struct pf_state *st; + struct pfsync_upd_req_item *ur; +#ifdef notyet + struct tdb *t; +#endif +#ifdef __FreeBSD__ + size_t pktlen; +#endif + int offset; + int q, count = 0; + +#ifdef __FreeBSD__ + PF_ASSERT(MA_OWNED); +#else + splassert(IPL_NET); +#endif + + if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) + return; + +#if NBPFILTER > 0 + if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { +#else + if (sc->sc_sync_if == NULL) { +#endif + pfsync_drop(sc); + return; + } + + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (m == NULL) { +#ifdef __FreeBSD__ + sc->sc_ifp->if_oerrors++; +#else + sc->sc_if.if_oerrors++; +#endif + V_pfsyncstats.pfsyncs_onomem++; + pfsync_drop(sc); + return; + } + +#ifdef __FreeBSD__ + pktlen = max_linkhdr + sc->sc_len; + if (pktlen > MHLEN) { + /* Find the right pool to allocate from. */ + /* XXX: This is ugly. */ + m_cljget(m, M_DONTWAIT, pktlen <= MSIZE ? MSIZE : + pktlen <= MCLBYTES ? MCLBYTES : +#if MJUMPAGESIZE != MCLBYTES + pktlen <= MJUMPAGESIZE ? MJUMPAGESIZE : +#endif + pktlen <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES); +#else + if (max_linkhdr + sc->sc_len > MHLEN) { + MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); +#endif + if (!ISSET(m->m_flags, M_EXT)) { + m_free(m); +#ifdef __FreeBSD__ + sc->sc_ifp->if_oerrors++; +#else + sc->sc_if.if_oerrors++; +#endif + V_pfsyncstats.pfsyncs_onomem++; + pfsync_drop(sc); + return; + } + } + m->m_data += max_linkhdr; + m->m_len = m->m_pkthdr.len = sc->sc_len; + + /* build the ip header */ + ip = (struct ip *)m->m_data; + bcopy(&sc->sc_template, ip, sizeof(*ip)); + offset = sizeof(*ip); + + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_id = htons(ip_randomid()); + + /* build the pfsync header */ + ph = (struct pfsync_header *)(m->m_data + offset); + bzero(ph, sizeof(*ph)); + offset += sizeof(*ph); + + ph->version = PFSYNC_VERSION; + ph->len = htons(sc->sc_len - sizeof(*ip)); +#ifdef __FreeBSD__ + bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); +#else + bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); +#endif + + /* walk the queues */ + for (q = 0; q < PFSYNC_S_COUNT; q++) { + if (TAILQ_EMPTY(&sc->sc_qs[q])) + continue; + + subh = (struct pfsync_subheader *)(m->m_data + offset); + offset += sizeof(*subh); + + count = 0; + TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { +#ifdef PFSYNC_DEBUG +#ifdef __FreeBSD__ + KASSERT(st->sync_state == q, + ("%s: st->sync_state == q", + __FUNCTION__)); +#else + KASSERT(st->sync_state == q); +#endif +#endif + + offset += pfsync_qs[q].write(st, m, offset); + st->sync_state = PFSYNC_S_NONE; + count++; + } + TAILQ_INIT(&sc->sc_qs[q]); + + bzero(subh, sizeof(*subh)); + subh->action = pfsync_qs[q].action; + subh->count = htons(count); + } + + if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { + subh = (struct pfsync_subheader *)(m->m_data + offset); + offset += sizeof(*subh); + + count = 0; + while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { + TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); + + bcopy(&ur->ur_msg, m->m_data + offset, + sizeof(ur->ur_msg)); + offset += sizeof(ur->ur_msg); + + pool_put(&sc->sc_pool, ur); + + count++; + } + + bzero(subh, sizeof(*subh)); + subh->action = PFSYNC_ACT_UPD_REQ; + subh->count = htons(count); + } + + /* has someone built a custom region for us to add? */ + if (sc->sc_plus != NULL) { + bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); + offset += sc->sc_pluslen; + + sc->sc_plus = NULL; + } + +#ifdef notyet + if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { + subh = (struct pfsync_subheader *)(m->m_data + offset); + offset += sizeof(*subh); + + count = 0; + TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { + offset += pfsync_out_tdb(t, m, offset); + CLR(t->tdb_flags, TDBF_PFSYNC); + + count++; + } + TAILQ_INIT(&sc->sc_tdb_q); + + bzero(subh, sizeof(*subh)); + subh->action = PFSYNC_ACT_TDB; + subh->count = htons(count); + } +#endif + + subh = (struct pfsync_subheader *)(m->m_data + offset); + offset += sizeof(*subh); + + bzero(subh, sizeof(*subh)); + subh->action = PFSYNC_ACT_EOF; + subh->count = htons(1); + + /* XXX write checksum in EOF here */ + + /* we're done, let's put it on the wire */ +#if NBPFILTER > 0 + if (ifp->if_bpf) { + m->m_data += sizeof(*ip); + m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); +#ifdef __FreeBSD__ + BPF_MTAP(ifp, m); +#else + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif + m->m_data -= sizeof(*ip); + m->m_len = m->m_pkthdr.len = sc->sc_len; + } + + if (sc->sc_sync_if == NULL) { + sc->sc_len = PFSYNC_MINPKT; + m_freem(m); + return; + } +#endif + +#ifdef __FreeBSD__ + sc->sc_ifp->if_opackets++; + sc->sc_ifp->if_obytes += m->m_pkthdr.len; +#else + sc->sc_if.if_opackets++; + sc->sc_if.if_obytes += m->m_pkthdr.len; +#endif + +#ifdef __FreeBSD__ + PF_UNLOCK(); +#endif + if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0) +#ifdef __FreeBSD__ + { + PF_LOCK(); +#endif + V_pfsyncstats.pfsyncs_opackets++; +#ifdef __FreeBSD__ + } +#endif + else +#ifdef __FreeBSD__ + { + PF_LOCK(); +#endif + V_pfsyncstats.pfsyncs_oerrors++; +#ifdef __FreeBSD__ + } +#endif + + /* start again */ + sc->sc_len = PFSYNC_MINPKT; +} + +void +pfsync_insert_state(struct pf_state *st) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + +#ifdef __FreeBSD__ + PF_ASSERT(MA_OWNED); +#else + splassert(IPL_SOFTNET); +#endif + + if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || + st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { + SET(st->state_flags, PFSTATE_NOSYNC); + return; + } + + if (sc == NULL || ISSET(st->state_flags, PFSTATE_NOSYNC)) + return; + +#ifdef PFSYNC_DEBUG +#ifdef __FreeBSD__ + KASSERT(st->sync_state == PFSYNC_S_NONE, + ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__)); +#else + KASSERT(st->sync_state == PFSYNC_S_NONE); +#endif +#endif + + if (sc->sc_len == PFSYNC_MINPKT) +#ifdef __FreeBSD__ + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, + V_pfsyncif); +#else + timeout_add_sec(&sc->sc_tmo, 1); +#endif + + pfsync_q_ins(st, PFSYNC_S_INS); + + if (ISSET(st->state_flags, PFSTATE_ACK)) + schednetisr(NETISR_PFSYNC); + else + st->sync_updates = 0; +} + +int defer = 10; + +int +pfsync_defer(struct pf_state *st, struct mbuf *m) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + struct pfsync_deferral *pd; + +#ifdef __FreeBSD__ + PF_ASSERT(MA_OWNED); +#else + splassert(IPL_SOFTNET); +#endif + + if (sc->sc_deferred >= 128) + pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); + + pd = pool_get(&sc->sc_pool, M_NOWAIT); + if (pd == NULL) + return (0); + sc->sc_deferred++; + +#ifdef __FreeBSD__ + m->m_flags |= M_SKIP_FIREWALL; +#else + m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; +#endif + SET(st->state_flags, PFSTATE_ACK); + + pd->pd_st = st; + pd->pd_m = m; + + TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); +#ifdef __FreeBSD__ + callout_init(&pd->pd_tmo, CALLOUT_MPSAFE); + callout_reset(&pd->pd_tmo, defer, pfsync_defer_tmo, + pd); +#else + timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd); + timeout_add(&pd->pd_tmo, defer); +#endif + + return (1); +} + +void +pfsync_undefer(struct pfsync_deferral *pd, int drop) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + int s; + +#ifdef __FreeBSD__ + PF_ASSERT(MA_OWNED); +#else + splassert(IPL_SOFTNET); +#endif + + TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); + sc->sc_deferred--; + + CLR(pd->pd_st->state_flags, PFSTATE_ACK); + timeout_del(&pd->pd_tmo); /* bah */ + if (drop) + m_freem(pd->pd_m); + else { + s = splnet(); +#ifdef __FreeBSD__ + /* XXX: use pf_defered?! */ + PF_UNLOCK(); +#endif + ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0, + (void *)NULL, (void *)NULL); +#ifdef __FreeBSD__ + PF_LOCK(); +#endif + splx(s); + } + + pool_put(&sc->sc_pool, pd); +} + +void +pfsync_defer_tmo(void *arg) +{ +#if defined(__FreeBSD__) && defined(VIMAGE) + struct pfsync_deferral *pd = arg; +#endif + int s; + + s = splsoftnet(); +#ifdef __FreeBSD__ + CURVNET_SET(pd->pd_m->m_pkthdr.rcvif->if_vnet); /* XXX */ + PF_LOCK(); +#endif + pfsync_undefer(arg, 0); +#ifdef __FreeBSD__ + PF_UNLOCK(); + CURVNET_RESTORE(); +#endif + splx(s); +} + +void +pfsync_deferred(struct pf_state *st, int drop) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + struct pfsync_deferral *pd; + + TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { + if (pd->pd_st == st) { + pfsync_undefer(pd, drop); + return; + } + } + + panic("pfsync_send_deferred: unable to find deferred state"); +} + +u_int pfsync_upds = 0; + +void +pfsync_update_state(struct pf_state *st) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + int sync = 0; + +#ifdef __FreeBSD__ + PF_ASSERT(MA_OWNED); +#else + splassert(IPL_SOFTNET); +#endif + + if (sc == NULL) + return; + + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(st, 0); + if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->sync_state != PFSYNC_S_NONE) + pfsync_q_del(st); + return; + } + + if (sc->sc_len == PFSYNC_MINPKT) +#ifdef __FreeBSD__ + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, + V_pfsyncif); +#else + timeout_add_sec(&sc->sc_tmo, 1); +#endif + + switch (st->sync_state) { + case PFSYNC_S_UPD_C: + case PFSYNC_S_UPD: + case PFSYNC_S_INS: + /* we're already handling it */ + + st->sync_updates++; + if (st->sync_updates >= sc->sc_maxupdates) + sync = 1; + break; + + case PFSYNC_S_IACK: + pfsync_q_del(st); + case PFSYNC_S_NONE: + pfsync_q_ins(st, PFSYNC_S_UPD_C); + st->sync_updates = 0; + break; + + default: + panic("pfsync_update_state: unexpected sync state %d", + st->sync_state); + } + + if (sync || (time_second - st->pfsync_time) < 2) { + pfsync_upds++; + schednetisr(NETISR_PFSYNC); + } +} + +void +pfsync_request_update(u_int32_t creatorid, u_int64_t id) +{ +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif + struct pfsync_upd_req_item *item; + size_t nlen = sizeof(struct pfsync_upd_req); + int s; + + /* + * this code does nothing to prevent multiple update requests for the + * same state being generated. + */ + + item = pool_get(&sc->sc_pool, PR_NOWAIT); + if (item == NULL) { + /* XXX stats */ + return; + } + + item->ur_msg.id = id; + item->ur_msg.creatorid = creatorid; + + if (TAILQ_EMPTY(&sc->sc_upd_req_list)) + nlen += sizeof(struct pfsync_subheader); + +#ifdef __FreeBSD__ + if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { +#else + if (sc->sc_len + nlen > sc->sc_if.if_mtu) { +#endif + s = splnet(); + pfsync_sendout(); + splx(s); + + nlen = sizeof(struct pfsync_subheader) + + sizeof(struct pfsync_upd_req); + } + + TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); + sc->sc_len += nlen; + + schednetisr(NETISR_PFSYNC); +} + +void +pfsync_update_state_req(struct pf_state *st) +{ #ifdef __FreeBSD__ -#ifdef CARP_ADVANCED - carp_group_demote_adj(sc->sc_ifp, 1); -#endif + struct pfsync_softc *sc = V_pfsyncif; #else - carp_group_demote_adj(&sc->sc_if, 1); -#endif + struct pfsync_softc *sc = pfsyncif; #endif - pfsync_sync_ok = 0; - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: requesting bulk update\n"); + + if (sc == NULL) + panic("pfsync_update_state_req: nonexistant instance"); + + if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->sync_state != PFSYNC_S_NONE) + pfsync_q_del(st); + return; + } + + switch (st->sync_state) { + case PFSYNC_S_UPD_C: + case PFSYNC_S_IACK: + pfsync_q_del(st); + case PFSYNC_S_NONE: + pfsync_q_ins(st, PFSYNC_S_UPD); + schednetisr(NETISR_PFSYNC); + return; + + case PFSYNC_S_INS: + case PFSYNC_S_UPD: + case PFSYNC_S_DEL: + /* we're already handling it */ + return; + + default: + panic("pfsync_update_state_req: unexpected sync state %d", + st->sync_state); + } +} + +void +pfsync_delete_state(struct pf_state *st) +{ #ifdef __FreeBSD__ - callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, - pfsync_bulkfail, pfsyncif); + struct pfsync_softc *sc = V_pfsyncif; #else - timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); + struct pfsync_softc *sc = pfsyncif; #endif - error = pfsync_request_update(NULL, NULL); - if (error == ENOMEM) { + #ifdef __FreeBSD__ - PF_UNLOCK(); + PF_ASSERT(MA_OWNED); +#else + splassert(IPL_SOFTNET); #endif - splx(s); - return (ENOMEM); - } - pfsync_sendout(sc); - } + + if (sc == NULL) + return; + + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(st, 1); + if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->sync_state != PFSYNC_S_NONE) + pfsync_q_del(st); + return; + } + + if (sc->sc_len == PFSYNC_MINPKT) #ifdef __FreeBSD__ - PF_UNLOCK(); + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, + V_pfsyncif); +#else + timeout_add_sec(&sc->sc_tmo, 1); #endif - splx(s); - break; + switch (st->sync_state) { + case PFSYNC_S_INS: + /* we never got to tell the world so just forget about it */ + pfsync_q_del(st); + return; + + case PFSYNC_S_UPD_C: + case PFSYNC_S_UPD: + case PFSYNC_S_IACK: + pfsync_q_del(st); + /* FALLTHROUGH to putting it on the del list */ + + case PFSYNC_S_NONE: + pfsync_q_ins(st, PFSYNC_S_DEL); + return; default: - return (ENOTTY); + panic("pfsync_delete_state: unexpected sync state %d", + st->sync_state); } - - return (0); } void -pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) +pfsync_clear_states(u_int32_t creatorid, const char *ifname) { - int mtu; + struct { + struct pfsync_subheader subh; + struct pfsync_clr clr; + } __packed r; - if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) - mtu = sc->sc_sync_ifp->if_mtu; - else - mtu = mtu_req; +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif - sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / - sizeof(struct pfsync_state); - if (sc->sc_maxcount > 254) - sc->sc_maxcount = 254; #ifdef __FreeBSD__ - sc->sc_ifp->if_mtu = sizeof(struct pfsync_header) + + PF_ASSERT(MA_OWNED); #else - sc->sc_if.if_mtu = sizeof(struct pfsync_header) + + splassert(IPL_SOFTNET); #endif - sc->sc_maxcount * sizeof(struct pfsync_state); + + if (sc == NULL) + return; + + bzero(&r, sizeof(r)); + + r.subh.action = PFSYNC_ACT_CLR; + r.subh.count = htons(1); + + strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); + r.clr.creatorid = creatorid; + + pfsync_send_plus(&r, sizeof(r)); } -struct mbuf * -pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) +void +pfsync_q_ins(struct pf_state *st, int q) { - struct pfsync_header *h; - struct mbuf *m; - int len; - - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == NULL) { #ifdef __FreeBSD__ - sc->sc_ifp->if_oerrors++; + struct pfsync_softc *sc = V_pfsyncif; #else - sc->sc_if.if_oerrors++; + struct pfsync_softc *sc = pfsyncif; #endif - return (NULL); - } + size_t nlen = pfsync_qs[q].len; + int s; - switch (action) { - case PFSYNC_ACT_CLR: - len = sizeof(struct pfsync_header) + - sizeof(struct pfsync_state_clr); - break; - case PFSYNC_ACT_UPD_C: - len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + - sizeof(struct pfsync_header); - break; - case PFSYNC_ACT_DEL_C: - len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + - sizeof(struct pfsync_header); - break; - case PFSYNC_ACT_UREQ: - len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + - sizeof(struct pfsync_header); - break; - case PFSYNC_ACT_BUS: - len = sizeof(struct pfsync_header) + - sizeof(struct pfsync_state_bus); - break; -#ifdef PFSYNC_TDB - case PFSYNC_ACT_TDB_UPD: - len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + - sizeof(struct pfsync_header); - break; +#ifdef __FreeBSD__ + KASSERT(st->sync_state == PFSYNC_S_NONE, + ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__)); +#else + KASSERT(st->sync_state == PFSYNC_S_NONE); #endif - default: - len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + - sizeof(struct pfsync_header); - break; - } - if (len > MHLEN) { - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { - m_free(m); +#if 1 || defined(PFSYNC_DEBUG) + if (sc->sc_len < PFSYNC_MINPKT) #ifdef __FreeBSD__ - sc->sc_ifp->if_oerrors++; + panic("pfsync pkt len is too low %zu", sc->sc_len); #else - sc->sc_if.if_oerrors++; + panic("pfsync pkt len is too low %d", sc->sc_len); #endif - return (NULL); - } - m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); - } else - MH_ALIGN(m, len); - - m->m_pkthdr.rcvif = NULL; - m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); - h = mtod(m, struct pfsync_header *); - h->version = PFSYNC_VERSION; - h->af = 0; - h->count = 0; - h->action = action; -#ifndef PFSYNC_TDB - if (action != PFSYNC_ACT_TDB_UPD) #endif - bcopy(&pf_status.pf_chksum, &h->pf_chksum, - PF_MD5_DIGEST_LENGTH); + if (TAILQ_EMPTY(&sc->sc_qs[q])) + nlen += sizeof(struct pfsync_subheader); - *sp = (void *)((char *)h + PFSYNC_HDRLEN); -#ifdef PFSYNC_TDB - if (action == PFSYNC_ACT_TDB_UPD) #ifdef __FreeBSD__ - callout_reset(&sc->sc_tdb_tmo, hz, pfsync_tdb_timeout, - pfsyncif); + if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { #else - timeout_add(&sc->sc_tdb_tmo, hz); + if (sc->sc_len + nlen > sc->sc_if.if_mtu) { #endif - else + s = splnet(); +#ifdef __FreeBSD__ + PF_LOCK(); #endif + pfsync_sendout(); #ifdef __FreeBSD__ - callout_reset(&sc->sc_tmo, hz, pfsync_timeout, pfsyncif); -#else - timeout_add(&sc->sc_tmo, hz); + PF_UNLOCK(); #endif - return (m); + splx(s); + + nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; + } + + sc->sc_len += nlen; + TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); + st->sync_state = q; } -int -pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) +void +pfsync_q_del(struct pf_state *st) { - struct ifnet *ifp = NULL; - struct pfsync_softc *sc = pfsyncif; - struct pfsync_header *h, *h_net; - struct pfsync_state *sp = NULL; - struct pfsync_state_upd *up = NULL; - struct pfsync_state_del *dp = NULL; - struct pf_rule *r; - u_long secs; - int s, ret = 0; - u_int8_t i = 255, newaction = 0; - - if (sc == NULL) - return (0); #ifdef __FreeBSD__ - ifp = sc->sc_ifp; + struct pfsync_softc *sc = V_pfsyncif; #else - ifp = &sc->sc_if; + struct pfsync_softc *sc = pfsyncif; #endif + int q = st->sync_state; - /* - * If a packet falls in the forest and there's nobody around to - * hear, does it make a sound? - */ - if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && #ifdef __FreeBSD__ - sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { + KASSERT(st->sync_state != PFSYNC_S_NONE, + ("%s: st->sync_state != PFSYNC_S_NONE", __FUNCTION__)); #else - sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { + KASSERT(st->sync_state != PFSYNC_S_NONE); #endif - /* Don't leave any stale pfsync packets hanging around. */ - if (sc->sc_mbuf != NULL) { - m_freem(sc->sc_mbuf); - sc->sc_mbuf = NULL; - sc->sc_statep.s = NULL; - } - return (0); - } - if (action >= PFSYNC_ACT_MAX) - return (EINVAL); + sc->sc_len -= pfsync_qs[q].len; + TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); + st->sync_state = PFSYNC_S_NONE; - s = splnet(); + if (TAILQ_EMPTY(&sc->sc_qs[q])) + sc->sc_len -= sizeof(struct pfsync_subheader); +} + +#ifdef notyet +void +pfsync_update_tdb(struct tdb *t, int output) +{ #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; #endif - if (sc->sc_mbuf == NULL) { - if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, - (void *)&sc->sc_statep.s)) == NULL) { - splx(s); - return (ENOMEM); - } - h = mtod(sc->sc_mbuf, struct pfsync_header *); - } else { - h = mtod(sc->sc_mbuf, struct pfsync_header *); - if (h->action != action) { - pfsync_sendout(sc); - if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, - (void *)&sc->sc_statep.s)) == NULL) { - splx(s); - return (ENOMEM); - } - h = mtod(sc->sc_mbuf, struct pfsync_header *); - } else { - /* - * If it's an update, look in the packet to see if - * we already have an update for the state. - */ - if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { - struct pfsync_state *usp = - (void *)((char *)h + PFSYNC_HDRLEN); - - for (i = 0; i < h->count; i++) { - if (!memcmp(usp->id, &st->id, - PFSYNC_ID_LEN) && - usp->creatorid == st->creatorid) { - sp = usp; - sp->updates++; - break; - } - usp++; - } - } - } - } - - secs = time_second; - - st->pfsync_time = time_uptime; - - if (sp == NULL) { - /* not a "duplicate" update */ - i = 255; - sp = sc->sc_statep.s++; - sc->sc_mbuf->m_pkthdr.len = - sc->sc_mbuf->m_len += sizeof(struct pfsync_state); - h->count++; - bzero(sp, sizeof(*sp)); - - bcopy(&st->id, sp->id, sizeof(sp->id)); - sp->creatorid = st->creatorid; - - strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname)); - pf_state_host_hton(&st->lan, &sp->lan); - pf_state_host_hton(&st->gwy, &sp->gwy); - pf_state_host_hton(&st->ext, &sp->ext); - - bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); - - sp->creation = htonl(secs - st->creation); - pf_state_counter_hton(st->packets[0], sp->packets[0]); - pf_state_counter_hton(st->packets[1], sp->packets[1]); - pf_state_counter_hton(st->bytes[0], sp->bytes[0]); - pf_state_counter_hton(st->bytes[1], sp->bytes[1]); - if ((r = st->rule.ptr) == NULL) - sp->rule = htonl(-1); - else - sp->rule = htonl(r->nr); - if ((r = st->anchor.ptr) == NULL) - sp->anchor = htonl(-1); - else - sp->anchor = htonl(r->nr); - sp->af = st->af; - sp->proto = st->proto; - sp->direction = st->direction; - sp->log = st->log; - sp->state_flags = st->state_flags; - sp->timeout = st->timeout; + size_t nlen = sizeof(struct pfsync_tdb); + int s; - if (flags & PFSYNC_FLAG_STALE) - sp->sync_flags |= PFSTATE_STALE; - } + if (sc == NULL) + return; - pf_state_peer_hton(&st->src, &sp->src); - pf_state_peer_hton(&st->dst, &sp->dst); + if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { + if (TAILQ_EMPTY(&sc->sc_tdb_q)) + nlen += sizeof(struct pfsync_subheader); - if (st->expire <= secs) - sp->expire = htonl(0); - else - sp->expire = htonl(st->expire - secs); + if (sc->sc_len + nlen > sc->sc_if.if_mtu) { + s = splnet(); + pfsync_sendout(); + splx(s); - /* do we need to build "compressed" actions for network transfer? */ - if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { - switch (action) { - case PFSYNC_ACT_UPD: - newaction = PFSYNC_ACT_UPD_C; - break; - case PFSYNC_ACT_DEL: - newaction = PFSYNC_ACT_DEL_C; - break; - default: - /* by default we just send the uncompressed states */ - break; + nlen = sizeof(struct pfsync_subheader) + + sizeof(struct pfsync_tdb); } - } - if (newaction) { - if (sc->sc_mbuf_net == NULL) { - if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, - (void *)&sc->sc_statep_net.s)) == NULL) { - splx(s); - return (ENOMEM); - } - } - h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); - - switch (newaction) { - case PFSYNC_ACT_UPD_C: - if (i != 255) { - up = (void *)((char *)h_net + - PFSYNC_HDRLEN + (i * sizeof(*up))); - up->updates++; - } else { - h_net->count++; - sc->sc_mbuf_net->m_pkthdr.len = - sc->sc_mbuf_net->m_len += sizeof(*up); - up = sc->sc_statep_net.u++; - - bzero(up, sizeof(*up)); - bcopy(&st->id, up->id, sizeof(up->id)); - up->creatorid = st->creatorid; - } - up->timeout = st->timeout; - up->expire = sp->expire; - up->src = sp->src; - up->dst = sp->dst; - break; - case PFSYNC_ACT_DEL_C: - sc->sc_mbuf_net->m_pkthdr.len = - sc->sc_mbuf_net->m_len += sizeof(*dp); - dp = sc->sc_statep_net.d++; - h_net->count++; - - bzero(dp, sizeof(*dp)); - bcopy(&st->id, dp->id, sizeof(dp->id)); - dp->creatorid = st->creatorid; - break; - } + sc->sc_len += nlen; + TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); + SET(t->tdb_flags, TDBF_PFSYNC); + t->tdb_updates = 0; + } else { + if (++t->tdb_updates >= sc->sc_maxupdates) + schednetisr(NETISR_PFSYNC); } - if (h->count == sc->sc_maxcount || - (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) - ret = pfsync_sendout(sc); - - splx(s); - return (ret); + if (output) + SET(t->tdb_flags, TDBF_PFSYNC_RPL); + else + CLR(t->tdb_flags, TDBF_PFSYNC_RPL); } -/* This must be called in splnet() */ -int -pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) +void +pfsync_delete_tdb(struct tdb *t) { - struct ifnet *ifp = NULL; - struct pfsync_header *h; - struct pfsync_softc *sc = pfsyncif; - struct pfsync_state_upd_req *rup; - int ret = 0; - - if (sc == NULL) - return (0); - #ifdef __FreeBSD__ - ifp = sc->sc_ifp; + struct pfsync_softc *sc = V_pfsyncif; #else - ifp = &sc->sc_if; + struct pfsync_softc *sc = pfsyncif; #endif - if (sc->sc_mbuf == NULL) { - if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, - (void *)&sc->sc_statep.s)) == NULL) - return (ENOMEM); - h = mtod(sc->sc_mbuf, struct pfsync_header *); - } else { - h = mtod(sc->sc_mbuf, struct pfsync_header *); - if (h->action != PFSYNC_ACT_UREQ) { - pfsync_sendout(sc); - if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, - (void *)&sc->sc_statep.s)) == NULL) - return (ENOMEM); - h = mtod(sc->sc_mbuf, struct pfsync_header *); - } - } - if (src != NULL) - sc->sc_sendaddr = *src; - sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); - h->count++; - rup = sc->sc_statep.r++; - bzero(rup, sizeof(*rup)); - if (up != NULL) { - bcopy(up->id, rup->id, sizeof(rup->id)); - rup->creatorid = up->creatorid; - } + if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) + return; - if (h->count == sc->sc_maxcount) - ret = pfsync_sendout(sc); + sc->sc_len -= sizeof(struct pfsync_tdb); + TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); + CLR(t->tdb_flags, TDBF_PFSYNC); - return (ret); + if (TAILQ_EMPTY(&sc->sc_tdb_q)) + sc->sc_len -= sizeof(struct pfsync_subheader); } int -pfsync_clear_states(u_int32_t creatorid, char *ifname) +pfsync_out_tdb(struct tdb *t, struct mbuf *m, int offset) { - struct ifnet *ifp = NULL; - struct pfsync_softc *sc = pfsyncif; - struct pfsync_state_clr *cp; - int s, ret; - - if (sc == NULL) - return (0); + struct pfsync_tdb *ut = (struct pfsync_tdb *)(m->m_data + offset); -#ifdef __FreeBSD__ - ifp = sc->sc_ifp; -#else - ifp = &sc->sc_if; -#endif -#ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); -#endif - s = splnet(); - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); - if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, - (void *)&sc->sc_statep.c)) == NULL) { - splx(s); - return (ENOMEM); - } - sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); - cp = sc->sc_statep.c; - cp->creatorid = creatorid; - if (ifname != NULL) - strlcpy(cp->ifname, ifname, IFNAMSIZ); + bzero(ut, sizeof(*ut)); + ut->spi = t->tdb_spi; + bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); + /* + * When a failover happens, the master's rpl is probably above + * what we see here (we may be up to a second late), so + * increase it a bit for outbound tdbs to manage most such + * situations. + * + * For now, just add an offset that is likely to be larger + * than the number of packets we can see in one second. The RFC + * just says the next packet must have a higher seq value. + * + * XXX What is a good algorithm for this? We could use + * a rate-determined increase, but to know it, we would have + * to extend struct tdb. + * XXX pt->rpl can wrap over MAXINT, but if so the real tdb + * will soon be replaced anyway. For now, just don't handle + * this edge case. + */ +#define RPL_INCR 16384 + ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? + RPL_INCR : 0)); + ut->cur_bytes = htobe64(t->tdb_cur_bytes); + ut->sproto = t->tdb_sproto; - ret = (pfsync_sendout(sc)); - splx(s); - return (ret); + return (sizeof(*ut)); } +#endif void -pfsync_timeout(void *v) +pfsync_bulk_start(void) { - struct pfsync_softc *sc = v; - int s; +#ifdef __FreeBSD__ + struct pfsync_softc *sc = V_pfsyncif; +#else + struct pfsync_softc *sc = pfsyncif; +#endif - s = splnet(); + sc->sc_ureq_received = time_uptime; + + if (sc->sc_bulk_next == NULL) #ifdef __FreeBSD__ - PF_LOCK(); + sc->sc_bulk_next = TAILQ_FIRST(&V_state_list); +#else + sc->sc_bulk_next = TAILQ_FIRST(&state_list); #endif - pfsync_sendout(sc); + sc->sc_bulk_last = sc->sc_bulk_next; + #ifdef __FreeBSD__ - PF_UNLOCK(); + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else + if (pf_status.debug >= PF_DEBUG_MISC) #endif - splx(s); + printf("pfsync: received bulk update request\n"); + + pfsync_bulk_status(PFSYNC_BUS_START); + pfsync_bulk_update(sc); } -#ifdef PFSYNC_TDB void -pfsync_tdb_timeout(void *v) +pfsync_bulk_update(void *arg) { - struct pfsync_softc *sc = v; + struct pfsync_softc *sc = arg; + struct pf_state *st = sc->sc_bulk_next; + int i = 0; int s; - s = splnet(); + s = splsoftnet(); #ifdef __FreeBSD__ + CURVNET_SET(sc->sc_ifp->if_vnet); PF_LOCK(); #endif - pfsync_tdb_sendout(sc); + do { + if (st->sync_state == PFSYNC_S_NONE && + st->timeout < PFTM_MAX && + st->pfsync_time <= sc->sc_ureq_received) { + pfsync_update_state_req(st); + i++; + } + + st = TAILQ_NEXT(st, entry_list); + if (st == NULL) #ifdef __FreeBSD__ - PF_UNLOCK(); + st = TAILQ_FIRST(&V_state_list); +#else + st = TAILQ_FIRST(&state_list); #endif - splx(s); -} + + if (i > 0 && TAILQ_EMPTY(&sc->sc_qs[PFSYNC_S_UPD])) { + sc->sc_bulk_next = st; +#ifdef __FreeBSD__ + callout_reset(&sc->sc_bulk_tmo, 1, + pfsync_bulk_fail, sc); +#else + timeout_add(&sc->sc_bulk_tmo, 1); #endif + goto out; + } + } while (st != sc->sc_bulk_last); -/* This must be called in splnet() */ -void -pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) -{ - struct pfsync_state_bus *bus; + /* we're done */ + sc->sc_bulk_next = NULL; + sc->sc_bulk_last = NULL; + pfsync_bulk_status(PFSYNC_BUS_END); +out: #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + PF_UNLOCK(); + CURVNET_RESTORE(); #endif - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); - - if (pfsync_sync_ok && - (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, - (void *)&sc->sc_statep.b)) != NULL) { - sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); - bus = sc->sc_statep.b; - bus->creatorid = pf_status.hostid; - bus->status = status; - bus->endtime = htonl(time_uptime - sc->sc_ureq_received); - pfsync_sendout(sc); - } + splx(s); } void -pfsync_bulk_update(void *v) +pfsync_bulk_status(u_int8_t status) { - struct pfsync_softc *sc = v; - int s, i = 0; - struct pf_state *state; - - s = splnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); - - /* - * Grab at most PFSYNC_BULKPACKETS worth of states which have not - * been sent since the latest request was made. - */ - state = sc->sc_bulk_send_next; - if (state) - do { - /* send state update if syncable and not already sent */ - if (!state->sync_flags - && state->timeout < PFTM_MAX - && state->pfsync_time <= sc->sc_ureq_received) { - pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); - i++; - } - - /* figure next state to send */ - state = TAILQ_NEXT(state, u.s.entry_list); - - /* wrap to start of list if we hit the end */ - if (!state) - state = TAILQ_FIRST(&state_list); - } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && - state != sc->sc_bulk_terminator); - - if (!state || state == sc->sc_bulk_terminator) { - /* we're done */ - pfsync_send_bus(sc, PFSYNC_BUS_END); - sc->sc_ureq_received = 0; - sc->sc_bulk_send_next = NULL; - sc->sc_bulk_terminator = NULL; - timeout_del(&sc->sc_bulk_tmo); - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: bulk update complete\n"); - } else { - /* look again for more in a bit */ + struct { + struct pfsync_subheader subh; + struct pfsync_bus bus; + } __packed r; + #ifdef __FreeBSD__ - callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, - pfsyncif); + struct pfsync_softc *sc = V_pfsyncif; #else - timeout_add(&sc->sc_bulk_tmo, 1); + struct pfsync_softc *sc = pfsyncif; #endif - sc->sc_bulk_send_next = state; - } - if (sc->sc_mbuf != NULL) - pfsync_sendout(sc); - splx(s); + + bzero(&r, sizeof(r)); + + r.subh.action = PFSYNC_ACT_BUS; + r.subh.count = htons(1); + #ifdef __FreeBSD__ - PF_UNLOCK(); + r.bus.creatorid = V_pf_status.hostid; +#else + r.bus.creatorid = pf_status.hostid; #endif + r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); + r.bus.status = status; + + pfsync_send_plus(&r, sizeof(r)); } void -pfsync_bulkfail(void *v) +pfsync_bulk_fail(void *arg) { - struct pfsync_softc *sc = v; - int s, error; + struct pfsync_softc *sc = arg; #ifdef __FreeBSD__ - PF_LOCK(); + CURVNET_SET(sc->sc_ifp->if_vnet); #endif + if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { - /* Try again in a bit */ + /* Try again */ #ifdef __FreeBSD__ - callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, - pfsyncif); + callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, + pfsync_bulk_fail, V_pfsyncif); #else - timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); + timeout_add_sec(&sc->sc_bulkfail_tmo, 5); #endif - s = splnet(); - error = pfsync_request_update(NULL, NULL); - if (error == ENOMEM) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: cannot allocate mbufs for " - "bulk update\n"); - } else - pfsync_sendout(sc); - splx(s); + pfsync_request_update(0, 0); } else { /* Pretend like the transfer was ok */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; #if NCARP > 0 - if (!pfsync_sync_ok) +#ifdef notyet #ifdef __FreeBSD__ -#ifdef CARP_ADVANCED - carp_group_demote_adj(sc->sc_ifp, -1); -#endif + if (!sc->pfsync_sync_ok) #else + if (!pfsync_sync_ok) +#endif carp_group_demote_adj(&sc->sc_if, -1); #endif #endif - pfsync_sync_ok = 1; - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: failed to receive " - "bulk update status\n"); - timeout_del(&sc->sc_bulkfail_tmo); - } #ifdef __FreeBSD__ - PF_UNLOCK(); + sc->pfsync_sync_ok = 1; +#else + pfsync_sync_ok = 1; #endif -} - -/* This must be called in splnet() */ -int -pfsync_sendout(struct pfsync_softc *sc) -{ -#if NBPFILTER > 0 #ifdef __FreeBSD__ - struct ifnet *ifp = sc->sc_ifp; + if (V_pf_status.debug >= PF_DEBUG_MISC) #else - struct ifnet *ifp = &sc->sc_if; -#endif + if (pf_status.debug >= PF_DEBUG_MISC) #endif - struct mbuf *m; + printf("pfsync: failed to receive bulk update\n"); + } #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + CURVNET_RESTORE(); #endif - timeout_del(&sc->sc_tmo); - - if (sc->sc_mbuf == NULL) - return (0); - m = sc->sc_mbuf; - sc->sc_mbuf = NULL; - sc->sc_statep.s = NULL; +} -#if NBPFILTER > 0 - if (ifp->if_bpf) +void +pfsync_send_plus(void *plus, size_t pluslen) +{ #ifdef __FreeBSD__ - BPF_MTAP(ifp, m); + struct pfsync_softc *sc = V_pfsyncif; #else - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); -#endif + struct pfsync_softc *sc = pfsyncif; #endif + int s; - if (sc->sc_mbuf_net) { - m_freem(m); - m = sc->sc_mbuf_net; - sc->sc_mbuf_net = NULL; - sc->sc_statep_net.s = NULL; - } - - return pfsync_sendout_mbuf(sc, m); -} - -#ifdef PFSYNC_TDB -int -pfsync_tdb_sendout(struct pfsync_softc *sc) -{ -#if NBPFILTER > 0 #ifdef __FreeBSD__ - struct ifnet *ifp = sc->sc_ifp; + if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) { #else - struct ifnet *ifp = &sc->sc_if; + if (sc->sc_len + pluslen > sc->sc_if.if_mtu) { #endif + s = splnet(); +#ifdef __FreeBSD__ + PF_LOCK(); #endif - struct mbuf *m; - + pfsync_sendout(); #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + PF_UNLOCK(); #endif - timeout_del(&sc->sc_tdb_tmo); + splx(s); + } - if (sc->sc_mbuf_tdb == NULL) - return (0); - m = sc->sc_mbuf_tdb; - sc->sc_mbuf_tdb = NULL; - sc->sc_statep_tdb.t = NULL; + sc->sc_plus = plus; + sc->sc_len += (sc->sc_pluslen = pluslen); -#if NBPFILTER > 0 - if (ifp->if_bpf) + s = splnet(); #ifdef __FreeBSD__ - BPF_MTAP(ifp, m); -#else - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); + PF_LOCK(); #endif + pfsync_sendout(); +#ifdef __FreeBSD__ + PF_UNLOCK(); #endif - - return pfsync_sendout_mbuf(sc, m); + splx(s); } -#endif int -pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) +pfsync_up(void) { - struct sockaddr sa; - struct ip *ip; - -#ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); -#endif - if (sc->sc_sync_ifp || -#ifdef __FreeBSD__ - sc->sc_sync_peer.s_addr != htonl(INADDR_PFSYNC_GROUP)) { -#else - sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { -#endif - M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); - if (m == NULL) { - pfsyncstats.pfsyncs_onomem++; - return (0); - } - ip = mtod(m, struct ip *); - ip->ip_v = IPVERSION; - ip->ip_hl = sizeof(*ip) >> 2; - ip->ip_tos = IPTOS_LOWDELAY; #ifdef __FreeBSD__ - ip->ip_len = m->m_pkthdr.len; + struct pfsync_softc *sc = V_pfsyncif; #else - ip->ip_len = htons(m->m_pkthdr.len); + struct pfsync_softc *sc = pfsyncif; #endif - ip->ip_id = htons(ip_randomid()); + #ifdef __FreeBSD__ - ip->ip_off = IP_DF; + if (sc == NULL || !ISSET(sc->sc_ifp->if_flags, IFF_DRV_RUNNING)) #else - ip->ip_off = htons(IP_DF); + if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) #endif - ip->ip_ttl = PFSYNC_DFLTTL; - ip->ip_p = IPPROTO_PFSYNC; - ip->ip_sum = 0; + return (0); - bzero(&sa, sizeof(sa)); - ip->ip_src.s_addr = INADDR_ANY; + return (1); +} +int +pfsync_state_in_use(struct pf_state *st) +{ #ifdef __FreeBSD__ - if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP)) + struct pfsync_softc *sc = V_pfsyncif; #else - if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) + struct pfsync_softc *sc = pfsyncif; #endif - m->m_flags |= M_MCAST; - ip->ip_dst = sc->sc_sendaddr; - sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; - pfsyncstats.pfsyncs_opackets++; + if (sc == NULL) + return (0); -#ifdef __FreeBSD__ - if (!IF_HANDOFF(&sc->sc_ifq, m, NULL)) - pfsyncstats.pfsyncs_oerrors++; - taskqueue_enqueue(taskqueue_thread, &pfsyncif->sc_send_task); -#else - if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) - pfsyncstats.pfsyncs_oerrors++; -#endif - } else - m_freem(m); + if (st->sync_state != PFSYNC_S_NONE) + return (1); - return (0); + if (sc->sc_bulk_next == NULL && sc->sc_bulk_last == NULL) + return (0); + + return (1); } -#ifdef PFSYNC_TDB -/* Update an in-kernel tdb. Silently fail if no tdb is found. */ +u_int pfsync_ints; +u_int pfsync_tmos; + void -pfsync_update_net_tdb(struct pfsync_tdb *pt) +pfsync_timeout(void *arg) { - struct tdb *tdb; - int s; - - /* check for invalid values */ - if (ntohl(pt->spi) <= SPI_RESERVED_MAX || - (pt->dst.sa.sa_family != AF_INET && - pt->dst.sa.sa_family != AF_INET6)) - goto bad; +#if defined(__FreeBSD__) && defined(VIMAGE) + struct pfsync_softc *sc = arg; +#endif + int s; - s = spltdb(); - tdb = gettdb(pt->spi, &pt->dst, pt->sproto); - if (tdb) { - pt->rpl = ntohl(pt->rpl); - pt->cur_bytes = betoh64(pt->cur_bytes); +#ifdef __FreeBSD__ + CURVNET_SET(sc->sc_ifp->if_vnet); +#endif - /* Neither replay nor byte counter should ever decrease. */ - if (pt->rpl < tdb->tdb_rpl || - pt->cur_bytes < tdb->tdb_cur_bytes) { - splx(s); - goto bad; - } + pfsync_tmos++; - tdb->tdb_rpl = pt->rpl; - tdb->tdb_cur_bytes = pt->cur_bytes; - } + s = splnet(); +#ifdef __FreeBSD__ + PF_LOCK(); +#endif + pfsync_sendout(); +#ifdef __FreeBSD__ + PF_UNLOCK(); +#endif splx(s); - return; - bad: - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " - "invalid value\n"); - pfsyncstats.pfsyncs_badstate++; - return; +#ifdef __FreeBSD__ + CURVNET_RESTORE(); +#endif } -/* One of our local tdbs have been updated, need to sync rpl with others */ -int -pfsync_update_tdb(struct tdb *tdb, int output) +/* this is a softnet/netisr handler */ +void +#ifdef __FreeBSD__ +pfsyncintr(void *arg) +#else +pfsyncintr(void) +#endif { - struct ifnet *ifp = NULL; - struct pfsync_softc *sc = pfsyncif; - struct pfsync_header *h; - struct pfsync_tdb *pt = NULL; - int s, i, ret; +#ifdef __FreeBSD__ + struct pfsync_softc *sc = arg; +#endif + int s; +#ifdef __FreeBSD__ if (sc == NULL) - return (0); + return; + CURVNET_SET(sc->sc_ifp->if_vnet); +#endif + pfsync_ints++; + + s = splnet(); #ifdef __FreeBSD__ - ifp = sc->sc_ifp; -#else - ifp = &sc->sc_if; + PF_LOCK(); #endif - if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && + pfsync_sendout(); #ifdef __FreeBSD__ - sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { -#else - sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { + PF_UNLOCK(); #endif - /* Don't leave any stale pfsync packets hanging around. */ - if (sc->sc_mbuf_tdb != NULL) { - m_freem(sc->sc_mbuf_tdb); - sc->sc_mbuf_tdb = NULL; - sc->sc_statep_tdb.t = NULL; - } - return (0); - } + splx(s); #ifdef __FreeBSD__ - PF_ASSERT(MA_OWNED); + CURVNET_RESTORE(); #endif - s = splnet(); - if (sc->sc_mbuf_tdb == NULL) { - if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD, - (void *)&sc->sc_statep_tdb.t)) == NULL) { - splx(s); - return (ENOMEM); - } - h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); - } else { - h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); - if (h->action != PFSYNC_ACT_TDB_UPD) { - /* - * XXX will never happen as long as there's - * only one "TDB action". - */ - pfsync_tdb_sendout(sc); - sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, - PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t); - if (sc->sc_mbuf_tdb == NULL) { - splx(s); - return (ENOMEM); - } - h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); - } else if (sc->sc_maxupdates) { - /* - * If it's an update, look in the packet to see if - * we already have an update for the state. - */ - struct pfsync_tdb *u = - (void *)((char *)h + PFSYNC_HDRLEN); - - for (i = 0; !pt && i < h->count; i++) { - if (tdb->tdb_spi == u->spi && - tdb->tdb_sproto == u->sproto && - !bcmp(&tdb->tdb_dst, &u->dst, - SA_LEN(&u->dst.sa))) { - pt = u; - pt->updates++; - } - u++; - } - } - } +} - if (pt == NULL) { - /* not a "duplicate" update */ - pt = sc->sc_statep_tdb.t++; - sc->sc_mbuf_tdb->m_pkthdr.len = - sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb); - h->count++; - bzero(pt, sizeof(*pt)); +int +pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) +{ - pt->spi = tdb->tdb_spi; - memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst); - pt->sproto = tdb->tdb_sproto; +#ifdef notyet + /* All sysctl names at this level are terminal. */ + if (namelen != 1) + return (ENOTDIR); + + switch (name[0]) { + case PFSYNCCTL_STATS: + if (newp != NULL) + return (EPERM); + return (sysctl_struct(oldp, oldlenp, newp, newlen, + &V_pfsyncstats, sizeof(V_pfsyncstats))); } - - /* - * When a failover happens, the master's rpl is probably above - * what we see here (we may be up to a second late), so - * increase it a bit for outbound tdbs to manage most such - * situations. - * - * For now, just add an offset that is likely to be larger - * than the number of packets we can see in one second. The RFC - * just says the next packet must have a higher seq value. - * - * XXX What is a good algorithm for this? We could use - * a rate-determined increase, but to know it, we would have - * to extend struct tdb. - * XXX pt->rpl can wrap over MAXINT, but if so the real tdb - * will soon be replaced anyway. For now, just don't handle - * this edge case. - */ -#define RPL_INCR 16384 - pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0)); - pt->cur_bytes = htobe64(tdb->tdb_cur_bytes); - - if (h->count == sc->sc_maxcount || - (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates))) - ret = pfsync_tdb_sendout(sc); - - splx(s); - return (ret); +#endif + return (ENOPROTOOPT); } -#endif /* PFSYNC_TDB */ #ifdef __FreeBSD__ void @@ -2243,18 +3305,15 @@ pfsync_ifdetach(void *arg, struct ifnet *ifp) struct pfsync_softc *sc = (struct pfsync_softc *)arg; struct ip_moptions *imo; - if (sc == NULL || sc->sc_sync_ifp != ifp) + if (sc == NULL || sc->sc_sync_if != ifp) return; /* not for us; unlocked read */ + CURVNET_SET(sc->sc_ifp->if_vnet); + PF_LOCK(); /* Deal with a member interface going away from under us. */ - sc->sc_sync_ifp = NULL; - if (sc->sc_mbuf_net != NULL) { - m_freem(sc->sc_mbuf_net); - sc->sc_mbuf_net = NULL; - sc->sc_statep_net.s = NULL; - } + sc->sc_sync_if = NULL; imo = &sc->sc_imo; if (imo->imo_num_memberships > 0) { KASSERT(imo->imo_num_memberships == 1, @@ -2272,29 +3331,73 @@ pfsync_ifdetach(void *arg, struct ifnet *ifp) } PF_UNLOCK(); + + CURVNET_RESTORE(); } -void -pfsync_senddef(void *arg, __unused int pending) +static int +vnet_pfsync_init(const void *unused) { - struct pfsync_softc *sc = (struct pfsync_softc *)arg; - struct mbuf *m; + int error = 0; - for(;;) { - IF_DEQUEUE(&sc->sc_ifq, m); - if (m == NULL) - break; - /* Deal with a member interface going away from under us. */ - if (sc->sc_sync_ifp == NULL) { - pfsyncstats.pfsyncs_oerrors++; - m_freem(m); - continue; - } - if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) - pfsyncstats.pfsyncs_oerrors++; - } + pfsyncattach(0); + + error = swi_add(NULL, "pfsync", pfsyncintr, V_pfsyncif, + SWI_NET, INTR_MPSAFE, &pfsync_swi.pfsync_swi_cookie); + if (error) + panic("%s: swi_add %d", __func__, error); + + pfsync_state_import_ptr = pfsync_state_import; + pfsync_up_ptr = pfsync_up; + pfsync_insert_state_ptr = pfsync_insert_state; + pfsync_update_state_ptr = pfsync_update_state; + pfsync_delete_state_ptr = pfsync_delete_state; + pfsync_clear_states_ptr = pfsync_clear_states; + pfsync_state_in_use_ptr = pfsync_state_in_use; + pfsync_defer_ptr = pfsync_defer; + + return (0); +} + +static int +vnet_pfsync_uninit(const void *unused) +{ + + swi_remove(pfsync_swi.pfsync_swi_cookie); + + pfsync_state_import_ptr = NULL; + pfsync_up_ptr = NULL; + pfsync_insert_state_ptr = NULL; + pfsync_update_state_ptr = NULL; + pfsync_delete_state_ptr = NULL; + pfsync_clear_states_ptr = NULL; + pfsync_state_in_use_ptr = NULL; + pfsync_defer_ptr = NULL; + + if_clone_detach(&pfsync_cloner); + + return (0); } +/* Define startup order. */ +#define PFSYNC_SYSINIT_ORDER SI_SUB_PROTO_BEGIN +#define PFSYNC_MODEVENT_ORDER (SI_ORDER_FIRST) /* On boot slot in here. */ +#define PFSYNC_VNET_ORDER (PFSYNC_MODEVENT_ORDER + 2) /* Later still. */ + +/* + * Starting up. + * VNET_SYSINIT is called for each existing vnet and each new vnet. + */ +VNET_SYSINIT(vnet_pfsync_init, PFSYNC_SYSINIT_ORDER, PFSYNC_VNET_ORDER, + vnet_pfsync_init, NULL); + +/* + * Closing up shop. These are done in REVERSE ORDER, + * Not called on reboot. + * VNET_SYSUNINIT is called for each exiting vnet as it exits. + */ +VNET_SYSUNINIT(vnet_pfsync_uninit, PFSYNC_SYSINIT_ORDER, PFSYNC_VNET_ORDER, + vnet_pfsync_uninit, NULL); static int pfsync_modevent(module_t mod, int type, void *data) { @@ -2302,10 +3405,14 @@ pfsync_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: +#ifndef __FreeBSD__ pfsyncattach(0); +#endif break; case MOD_UNLOAD: +#ifndef __FreeBSD__ if_clone_detach(&pfsync_cloner); +#endif break; default: error = EINVAL; @@ -2325,5 +3432,5 @@ static moduledata_t pfsync_mod = { DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); MODULE_VERSION(pfsync, PFSYNC_MODVER); -MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER); +MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); #endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/if_pfsync.h b/sys/contrib/pf/net/if_pfsync.h index f306610..17259b7 100644 --- a/sys/contrib/pf/net/if_pfsync.h +++ b/sys/contrib/pf/net/if_pfsync.h @@ -1,5 +1,4 @@ -/* $FreeBSD$ */ -/* $OpenBSD: if_pfsync.h,v 1.30 2006/10/31 14:49:01 henning Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.35 2008/06/29 08:42:15 mcbride Exp $ */ /* * Copyright (c) 2001 Michael Shalayeff @@ -27,227 +26,217 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2008 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + #ifndef _NET_IF_PFSYNC_H_ -#define _NET_IF_PFSYNC_H_ +#define _NET_IF_PFSYNC_H_ +#define PFSYNC_VERSION 5 +#define PFSYNC_DFLTTL 255 -#define PFSYNC_ID_LEN sizeof(u_int64_t) +#define PFSYNC_ACT_CLR 0 /* clear all states */ +#define PFSYNC_ACT_INS 1 /* insert state */ +#define PFSYNC_ACT_INS_ACK 2 /* ack of insterted state */ +#define PFSYNC_ACT_UPD 3 /* update state */ +#define PFSYNC_ACT_UPD_C 4 /* "compressed" update state */ +#define PFSYNC_ACT_UPD_REQ 5 /* request "uncompressed" state */ +#define PFSYNC_ACT_DEL 6 /* delete state */ +#define PFSYNC_ACT_DEL_C 7 /* "compressed" delete state */ +#define PFSYNC_ACT_INS_F 8 /* insert fragment */ +#define PFSYNC_ACT_DEL_F 9 /* delete fragments */ +#define PFSYNC_ACT_BUS 10 /* bulk update status */ +#define PFSYNC_ACT_TDB 11 /* TDB replay counter update */ +#define PFSYNC_ACT_EOF 12 /* end of frame */ +#define PFSYNC_ACT_MAX 13 + +#define PFSYNC_ACTIONS "CLR ST", \ + "INS ST", \ + "INS ST ACK", \ + "UPD ST", \ + "UPD ST COMP", \ + "UPD ST REQ", \ + "DEL ST", \ + "DEL ST COMP", \ + "INS FR", \ + "DEL FR", \ + "BULK UPD STAT", \ + "TDB UPD", \ + "EOF" + +#define PFSYNC_HMAC_LEN 20 -struct pfsync_state_scrub { - u_int16_t pfss_flags; - u_int8_t pfss_ttl; /* stashed TTL */ -#define PFSYNC_SCRUB_FLAG_VALID 0x01 - u_int8_t scrub_flag; - u_int32_t pfss_ts_mod; /* timestamp modulation */ -} __packed; +/* + * A pfsync frame is built from a header followed by several sections which + * are all prefixed with their own subheaders. Frames must be terminated with + * an EOF subheader. + * + * | ... | + * | IP header | + * +============================+ + * | pfsync_header | + * +----------------------------+ + * | pfsync_subheader | + * +----------------------------+ + * | first action fields | + * | ... | + * +----------------------------+ + * | pfsync_subheader | + * +----------------------------+ + * | second action fields | + * | ... | + * +----------------------------+ + * | EOF pfsync_subheader | + * +----------------------------+ + * | HMAC | + * +============================+ + */ -struct pfsync_state_host { - struct pf_addr addr; - u_int16_t port; - u_int16_t pad[3]; +/* + * Frame header + */ + +struct pfsync_header { + u_int8_t version; + u_int8_t _pad; + u_int16_t len; + u_int8_t pfcksum[PF_MD5_DIGEST_LENGTH]; } __packed; -struct pfsync_state_peer { - struct pfsync_state_scrub scrub; /* state is scrubbed */ - u_int32_t seqlo; /* Max sequence number sent */ - u_int32_t seqhi; /* Max the other end ACKd + win */ - u_int32_t seqdiff; /* Sequence number modulator */ - u_int16_t max_win; /* largest window (pre scaling) */ - u_int16_t mss; /* Maximum segment size option */ - u_int8_t state; /* active state level */ - u_int8_t wscale; /* window scaling factor */ - u_int8_t pad[6]; +/* + * Frame region subheader + */ + +struct pfsync_subheader { + u_int8_t action; + u_int8_t _pad; + u_int16_t count; } __packed; -struct pfsync_state { - u_int32_t id[2]; - char ifname[IFNAMSIZ]; - struct pfsync_state_host lan; - struct pfsync_state_host gwy; - struct pfsync_state_host ext; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; - struct pf_addr rt_addr; - u_int32_t rule; - u_int32_t anchor; - u_int32_t nat_rule; - u_int32_t creation; - u_int32_t expire; - u_int32_t packets[2][2]; - u_int32_t bytes[2][2]; - u_int32_t creatorid; - sa_family_t af; - u_int8_t proto; - u_int8_t direction; - u_int8_t log; - u_int8_t state_flags; - u_int8_t timeout; - u_int8_t sync_flags; - u_int8_t updates; +/* + * CLR + */ + +struct pfsync_clr { + char ifname[IFNAMSIZ]; + u_int32_t creatorid; } __packed; -#define PFSYNC_FLAG_COMPRESS 0x01 -#define PFSYNC_FLAG_STALE 0x02 +/* + * INS, UPD, DEL + */ + +/* these use struct pfsync_state in pfvar.h */ -#ifdef PFSYNC_TDB -struct pfsync_tdb { - u_int32_t spi; - union sockaddr_union dst; - u_int32_t rpl; - u_int64_t cur_bytes; - u_int8_t sproto; - u_int8_t updates; - u_int8_t pad[2]; +/* + * INS_ACK + */ + +struct pfsync_ins_ack { + u_int64_t id; + u_int32_t creatorid; } __packed; -#endif -struct pfsync_state_upd { - u_int32_t id[2]; +/* + * UPD_C + */ + +struct pfsync_upd_c { + u_int64_t id; struct pfsync_state_peer src; struct pfsync_state_peer dst; - u_int32_t creatorid; - u_int32_t expire; - u_int8_t timeout; - u_int8_t updates; - u_int8_t pad[6]; + u_int32_t creatorid; + u_int32_t expire; + u_int8_t timeout; + u_int8_t _pad[3]; } __packed; -struct pfsync_state_del { - u_int32_t id[2]; - u_int32_t creatorid; - struct { - u_int8_t state; - } src; - struct { - u_int8_t state; - } dst; - u_int8_t pad[2]; -} __packed; +/* + * UPD_REQ + */ -struct pfsync_state_upd_req { - u_int32_t id[2]; - u_int32_t creatorid; - u_int32_t pad; +struct pfsync_upd_req { + u_int64_t id; + u_int32_t creatorid; } __packed; -struct pfsync_state_clr { - char ifname[IFNAMSIZ]; - u_int32_t creatorid; - u_int32_t pad; -} __packed; +/* + * DEL_C + */ -struct pfsync_state_bus { - u_int32_t creatorid; - u_int32_t endtime; - u_int8_t status; -#define PFSYNC_BUS_START 1 -#define PFSYNC_BUS_END 2 - u_int8_t pad[7]; +struct pfsync_del_c { + u_int64_t id; + u_int32_t creatorid; } __packed; -#ifdef _KERNEL - -union sc_statep { - struct pfsync_state *s; - struct pfsync_state_upd *u; - struct pfsync_state_del *d; - struct pfsync_state_clr *c; - struct pfsync_state_bus *b; - struct pfsync_state_upd_req *r; -}; +/* + * INS_F, DEL_F + */ -#ifdef PFSYNC_TDB -union sc_tdb_statep { - struct pfsync_tdb *t; -}; -#endif +/* not implemented (yet) */ -extern int pfsync_sync_ok; +/* + * BUS + */ -struct pfsync_softc { -#ifdef __FreeBSD__ - struct ifnet *sc_ifp; -#else - struct ifnet sc_if; -#endif - struct ifnet *sc_sync_ifp; +struct pfsync_bus { + u_int32_t creatorid; + u_int32_t endtime; + u_int8_t status; +#define PFSYNC_BUS_START 1 +#define PFSYNC_BUS_END 2 + u_int8_t _pad[3]; +} __packed; - struct ip_moptions sc_imo; -#ifdef __FreeBSD__ - struct callout sc_tmo; -#ifdef PFSYNC_TDB - struct callout sc_tdb_tmo; -#endif - struct callout sc_bulk_tmo; - struct callout sc_bulkfail_tmo; -#else - struct timeout sc_tmo; - struct timeout sc_tdb_tmo; - struct timeout sc_bulk_tmo; - struct timeout sc_bulkfail_tmo; -#endif - struct in_addr sc_sync_peer; - struct in_addr sc_sendaddr; - struct mbuf *sc_mbuf; /* current cumulative mbuf */ - struct mbuf *sc_mbuf_net; /* current cumulative mbuf */ -#ifdef PFSYNC_TDB - struct mbuf *sc_mbuf_tdb; /* dito for TDB updates */ -#endif -#ifdef __FreeBSD__ - struct ifqueue sc_ifq; - struct task sc_send_task; -#endif - union sc_statep sc_statep; - union sc_statep sc_statep_net; -#ifdef PFSYNC_TDB - union sc_tdb_statep sc_statep_tdb; -#endif - u_int32_t sc_ureq_received; - u_int32_t sc_ureq_sent; - struct pf_state *sc_bulk_send_next; - struct pf_state *sc_bulk_terminator; - int sc_bulk_tries; - int sc_maxcount; /* number of states in mtu */ - int sc_maxupdates; /* number of updates/state */ -#ifdef __FreeBSD__ - eventhandler_tag sc_detachtag; -#endif -}; +/* + * TDB + */ -extern struct pfsync_softc *pfsyncif; -#endif +struct pfsync_tdb { + u_int32_t spi; + union sockaddr_union dst; + u_int32_t rpl; + u_int64_t cur_bytes; + u_int8_t sproto; + u_int8_t updates; + u_int8_t _pad[2]; +} __packed; +/* + * EOF + */ -struct pfsync_header { - u_int8_t version; -#define PFSYNC_VERSION 3 - u_int8_t af; - u_int8_t action; -#define PFSYNC_ACT_CLR 0 /* clear all states */ -#define PFSYNC_ACT_INS 1 /* insert state */ -#define PFSYNC_ACT_UPD 2 /* update state */ -#define PFSYNC_ACT_DEL 3 /* delete state */ -#define PFSYNC_ACT_UPD_C 4 /* "compressed" state update */ -#define PFSYNC_ACT_DEL_C 5 /* "compressed" state delete */ -#define PFSYNC_ACT_INS_F 6 /* insert fragment */ -#define PFSYNC_ACT_DEL_F 7 /* delete fragments */ -#define PFSYNC_ACT_UREQ 8 /* request "uncompressed" state */ -#define PFSYNC_ACT_BUS 9 /* Bulk Update Status */ -#define PFSYNC_ACT_TDB_UPD 10 /* TDB replay counter update */ -#define PFSYNC_ACT_MAX 11 - u_int8_t count; - u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; +struct pfsync_eof { + u_int8_t hmac[PFSYNC_HMAC_LEN]; } __packed; -#define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */ -#define PFSYNC_MAX_BULKTRIES 12 -#define PFSYNC_HDRLEN sizeof(struct pfsync_header) -#define PFSYNC_ACTIONS \ - "CLR ST", "INS ST", "UPD ST", "DEL ST", \ - "UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \ - "UPD REQ", "BLK UPD STAT", "TDB UPD" +#define PFSYNC_HDRLEN sizeof(struct pfsync_header) + -#define PFSYNC_DFLTTL 255 + +/* + * Names for PFSYNC sysctl objects + */ +#define PFSYNCCTL_STATS 1 /* PFSYNC stats */ +#define PFSYNCCTL_MAXID 2 + +#define PFSYNCCTL_NAMES { \ + { 0, 0 }, \ + { "stats", CTLTYPE_STRUCT }, \ +} struct pfsyncstats { u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */ @@ -280,96 +269,56 @@ struct pfsyncreq { }; #ifdef __FreeBSD__ -#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq) -#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq) +#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq) +#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq) #endif -#define pf_state_peer_hton(s,d) do { \ - (d)->seqlo = htonl((s)->seqlo); \ - (d)->seqhi = htonl((s)->seqhi); \ - (d)->seqdiff = htonl((s)->seqdiff); \ - (d)->max_win = htons((s)->max_win); \ - (d)->mss = htons((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub) { \ - (d)->scrub.pfss_flags = \ - htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ - (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ - (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ - (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ - } \ -} while (0) - -#define pf_state_peer_ntoh(s,d) do { \ - (d)->seqlo = ntohl((s)->seqlo); \ - (d)->seqhi = ntohl((s)->seqhi); \ - (d)->seqdiff = ntohl((s)->seqdiff); \ - (d)->max_win = ntohs((s)->max_win); \ - (d)->mss = ntohs((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ - (d)->scrub != NULL) { \ - (d)->scrub->pfss_flags = \ - ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ - (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ - (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ - } \ -} while (0) - -#define pf_state_host_hton(s,d) do { \ - bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \ - (d)->port = (s)->port; \ -} while (0) - -#define pf_state_host_ntoh(s,d) do { \ - bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \ - (d)->port = (s)->port; \ -} while (0) - -#define pf_state_counter_hton(s,d) do { \ - d[0] = htonl((s>>32)&0xffffffff); \ - d[1] = htonl(s&0xffffffff); \ -} while (0) - -#define pf_state_counter_ntoh(s,d) do { \ - d = ntohl(s[0]); \ - d = d<<32; \ - d += ntohl(s[1]); \ -} while (0) - #ifdef _KERNEL + +/* + * this shows where a pf state is with respect to the syncing. + */ +#define PFSYNC_S_INS 0x00 +#define PFSYNC_S_IACK 0x01 +#define PFSYNC_S_UPD 0x02 +#define PFSYNC_S_UPD_C 0x03 +#define PFSYNC_S_DEL 0x04 +#define PFSYNC_S_COUNT 0x05 + +#define PFSYNC_S_DEFER 0xfe +#define PFSYNC_S_NONE 0xff + #ifdef __FreeBSD__ -void pfsync_input(struct mbuf *, __unused int); +void pfsync_input(struct mbuf *, __unused int); #else -void pfsync_input(struct mbuf *, ...); +void pfsync_input(struct mbuf *, ...); #endif -int pfsync_clear_states(u_int32_t, char *); -int pfsync_pack_state(u_int8_t, struct pf_state *, int); -#define pfsync_insert_state(st) do { \ - if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || \ - (st->proto == IPPROTO_PFSYNC)) \ - st->sync_flags |= PFSTATE_NOSYNC; \ - else if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_INS, (st), \ - PFSYNC_FLAG_COMPRESS); \ - st->sync_flags &= ~PFSTATE_FROMSYNC; \ -} while (0) -#define pfsync_update_state(st) do { \ - if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_UPD, (st), \ - PFSYNC_FLAG_COMPRESS); \ - st->sync_flags &= ~PFSTATE_FROMSYNC; \ -} while (0) -#define pfsync_delete_state(st) do { \ - if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_DEL, (st), \ - PFSYNC_FLAG_COMPRESS); \ -} while (0) -#ifdef PFSYNC_TDB -int pfsync_update_tdb(struct tdb *, int); +int pfsync_sysctl(int *, u_int, void *, size_t *, + void *, size_t); + +#define PFSYNC_SI_IOCTL 0x01 +#define PFSYNC_SI_CKSUM 0x02 +#define PFSYNC_SI_ACK 0x04 +int pfsync_state_import(struct pfsync_state *, u_int8_t); +#ifndef __FreeBSD__ +void pfsync_state_export(struct pfsync_state *, + struct pf_state *); #endif + +void pfsync_insert_state(struct pf_state *); +void pfsync_update_state(struct pf_state *); +void pfsync_delete_state(struct pf_state *); +void pfsync_clear_states(u_int32_t, const char *); + +#ifdef notyet +void pfsync_update_tdb(struct tdb *, int); +void pfsync_delete_tdb(struct tdb *); +#endif + +int pfsync_defer(struct pf_state *, struct mbuf *); + +int pfsync_up(void); +int pfsync_state_in_use(struct pf_state *); #endif #endif /* _NET_IF_PFSYNC_H_ */ diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index 135d734..5efd651 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -1,9 +1,8 @@ -/* $OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */ -/* add: $OpenBSD: pf.c,v 1.559 2007/09/18 18:45:59 markus Exp $ */ +/* $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier - * Copyright (c) 2002,2003 Henning Brauer + * Copyright (c) 2002 - 2008 Henning Brauer * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,10 +65,17 @@ __FBSDID("$FreeBSD$"); #define NPFSYNC 0 #endif +#ifdef DEV_PFLOW +#define NPFLOW DEV_PFLOW +#else +#define NPFLOW 0 +#endif + #else #include "bpfilter.h" #include "pflog.h" #include "pfsync.h" +#include "pflow.h" #endif #include @@ -81,8 +87,10 @@ __FBSDID("$FreeBSD$"); #include #include #ifdef __FreeBSD__ +#include #include #include +#define betoh64 be64toh #else #include #endif @@ -95,11 +103,21 @@ __FBSDID("$FreeBSD$"); #include #endif +#ifdef __FreeBSD__ +#include +#else +#include +#endif + #include #include #include #include -#ifndef __FreeBSD__ +#ifdef __FreeBSD__ +#ifdef RADIX_MPATH +#include +#endif +#else #include #endif @@ -118,16 +136,18 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef __FreeBSD__ +#include +#include /* XXX: only for DIR_IN/DIR_OUT */ +#endif #ifndef __FreeBSD__ #include #endif #include #include - -#if NPFSYNC > 0 +#include #include -#endif /* NPFSYNC > 0 */ #ifdef INET6 #include @@ -147,15 +167,61 @@ __FBSDID("$FreeBSD$"); #include extern int ip_optcopy(struct ip *, struct ip *); -extern int debug_pfugidhack; #endif -#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x +#ifdef __FreeBSD__ +#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x +#else +#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x +#endif /* * Global variables */ +/* state tables */ +#ifdef __FreeBSD__ +VNET_DEFINE(struct pf_state_tree, pf_statetbl); + +VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]); +VNET_DEFINE(struct pf_palist, pf_pabuf); +VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active); +VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive); +VNET_DEFINE(struct pf_status, pf_status); + +VNET_DEFINE(u_int32_t, ticket_altqs_active); +VNET_DEFINE(u_int32_t, ticket_altqs_inactive); +VNET_DEFINE(int, altqs_inactive_open); +VNET_DEFINE(u_int32_t, ticket_pabuf); + +VNET_DEFINE(MD5_CTX, pf_tcp_secret_ctx); +#define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx) +VNET_DEFINE(u_char, pf_tcp_secret[16]); +#define V_pf_tcp_secret VNET(pf_tcp_secret) +VNET_DEFINE(int, pf_tcp_secret_init); +#define V_pf_tcp_secret_init VNET(pf_tcp_secret_init) +VNET_DEFINE(int, pf_tcp_iss_off); +#define V_pf_tcp_iss_off VNET(pf_tcp_iss_off) + +struct pf_anchor_stackframe { + struct pf_ruleset *rs; + struct pf_rule *r; + struct pf_anchor_node *parent; + struct pf_anchor *child; +}; +VNET_DEFINE(struct pf_anchor_stackframe, pf_anchor_stack[64]); +#define V_pf_anchor_stack VNET(pf_anchor_stack) + +VNET_DEFINE(uma_zone_t, pf_src_tree_pl); +VNET_DEFINE(uma_zone_t, pf_rule_pl); +VNET_DEFINE(uma_zone_t, pf_pooladdr_pl); +VNET_DEFINE(uma_zone_t, pf_state_pl); +VNET_DEFINE(uma_zone_t, pf_state_key_pl); +VNET_DEFINE(uma_zone_t, pf_state_item_pl); +VNET_DEFINE(uma_zone_t, pf_altq_pl); +#else +struct pf_state_tree pf_statetbl; + struct pf_altqqueue pf_altqs[2]; struct pf_palist pf_pabuf; struct pf_altqqueue *pf_altqs_active; @@ -167,6 +233,11 @@ u_int32_t ticket_altqs_inactive; int altqs_inactive_open; u_int32_t ticket_pabuf; +MD5_CTX pf_tcp_secret_ctx; +u_char pf_tcp_secret[16]; +int pf_tcp_secret_init; +int pf_tcp_iss_off; + struct pf_anchor_stackframe { struct pf_ruleset *rs; struct pf_rule *r; @@ -174,16 +245,11 @@ struct pf_anchor_stackframe { struct pf_anchor *child; } pf_anchor_stack[64]; -#ifdef __FreeBSD__ -uma_zone_t pf_src_tree_pl, pf_rule_pl; -uma_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl; -#else -struct pool pf_src_tree_pl, pf_rule_pl; -struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; +struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl; +struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; +struct pool pf_altq_pl; #endif -void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); - void pf_init_threshold(struct pf_threshold *, u_int32_t, u_int32_t); void pf_add_threshold(struct pf_threshold *); @@ -212,18 +278,12 @@ void pf_send_tcp(const struct pf_rule *, sa_family_t, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, u_int16_t, struct ether_header *, struct ifnet *); -void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, +static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_rule *); -struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, - int, int, struct pfi_kif *, - struct pf_addr *, u_int16_t, struct pf_addr *, - u_int16_t, int); -struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, - int, int, struct pfi_kif *, struct pf_src_node **, - struct pf_addr *, u_int16_t, - struct pf_addr *, u_int16_t, - struct pf_addr *, u_int16_t *); -int pf_test_tcp(struct pf_rule **, struct pf_state **, +void pf_detach_state(struct pf_state *); +void pf_state_key_detach(struct pf_state *, int); +u_int32_t pf_tcp_iss(struct pf_pdesc *); +int pf_test_rule(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, #ifdef __FreeBSD__ @@ -232,23 +292,14 @@ int pf_test_tcp(struct pf_rule **, struct pf_state **, #else struct pf_ruleset **, struct ifqueue *); #endif -int pf_test_udp(struct pf_rule **, struct pf_state **, - int, struct pfi_kif *, struct mbuf *, int, - void *, struct pf_pdesc *, struct pf_rule **, -#ifdef __FreeBSD__ - struct pf_ruleset **, struct ifqueue *, - struct inpcb *); -#else - struct pf_ruleset **, struct ifqueue *); -#endif -int pf_test_icmp(struct pf_rule **, struct pf_state **, - int, struct pfi_kif *, struct mbuf *, int, - void *, struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **, struct ifqueue *); -int pf_test_other(struct pf_rule **, struct pf_state **, - int, struct pfi_kif *, struct mbuf *, int, void *, - struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **, struct ifqueue *); +static __inline int pf_create_state(struct pf_rule *, struct pf_rule *, + struct pf_rule *, struct pf_pdesc *, + struct pf_src_node *, struct pf_state_key *, + struct pf_state_key *, struct pf_state_key *, + struct pf_state_key *, struct mbuf *, int, + u_int16_t, u_int16_t, int *, struct pfi_kif *, + struct pf_state **, int, u_int16_t, u_int16_t, + int); int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, @@ -257,7 +308,7 @@ int pf_tcp_track_full(struct pf_state_peer *, struct pf_state_peer *, struct pf_state **, struct pfi_kif *, struct mbuf *, int, struct pf_pdesc *, u_short *, int *); -int pf_tcp_track_sloppy(struct pf_state_peer *, +int pf_tcp_track_sloppy(struct pf_state_peer *, struct pf_state_peer *, struct pf_state **, struct pf_pdesc *, u_short *); int pf_test_state_tcp(struct pf_state **, int, @@ -270,30 +321,14 @@ int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); int pf_test_state_other(struct pf_state **, int, - struct pfi_kif *, struct pf_pdesc *); -int pf_match_tag(struct mbuf *, struct pf_rule *, - struct pf_mtag *, int *); -int pf_step_out_of_anchor(int *, struct pf_ruleset **, - int, struct pf_rule **, struct pf_rule **, - int *); -void pf_hash(struct pf_addr *, struct pf_addr *, - struct pf_poolhashkey *, sa_family_t); -int pf_map_addr(u_int8_t, struct pf_rule *, - struct pf_addr *, struct pf_addr *, - struct pf_addr *, struct pf_src_node **); -int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, - struct pf_addr *, struct pf_addr *, u_int16_t, - struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, - struct pf_src_node **); + struct pfi_kif *, struct mbuf *, struct pf_pdesc *); void pf_route(struct mbuf **, struct pf_rule *, int, struct ifnet *, struct pf_state *, struct pf_pdesc *); void pf_route6(struct mbuf **, struct pf_rule *, int, struct ifnet *, struct pf_state *, struct pf_pdesc *); -#ifdef __FreeBSD__ -/* XXX: import */ -#else +#ifndef __FreeBSD__ int pf_socket_lookup(int, struct pf_pdesc *); #endif u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, @@ -306,19 +341,32 @@ void pf_set_rt_ifp(struct pf_state *, struct pf_addr *); int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); +#ifndef __FreeBSD__ +struct pf_divert *pf_get_divert(struct mbuf *); +#endif +void pf_print_state_parts(struct pf_state *, + struct pf_state_key *, struct pf_state_key *); int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); -struct pf_state *pf_find_state_recurse(struct pfi_kif *, - struct pf_state_cmp *, u_int8_t); +int pf_compare_state_keys(struct pf_state_key *, + struct pf_state_key *, struct pfi_kif *, u_int); +#ifdef __FreeBSD__ +struct pf_state *pf_find_state(struct pfi_kif *, + struct pf_state_key_cmp *, u_int, struct mbuf *, + struct pf_mtag *); +#else +struct pf_state *pf_find_state(struct pfi_kif *, + struct pf_state_key_cmp *, u_int, struct mbuf *); +#endif int pf_src_connlimit(struct pf_state **); int pf_check_congestion(struct ifqueue *); #ifdef __FreeBSD__ int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); -extern int pf_end_threads; +VNET_DECLARE(int, pf_end_threads); -struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; +VNET_DEFINE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]); #else extern struct pool pfr_ktable_pl; extern struct pool pfr_kentry_pl; @@ -332,80 +380,101 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { }; #endif -#define STATE_LOOKUP() \ +#ifdef __FreeBSD__ +#define PPACKET_LOOPED() \ + (pd->pf_mtag->flags & PF_PACKET_LOOPED) + +#define PACKET_LOOPED() \ + (pd.pf_mtag->flags & PF_PACKET_LOOPED) + +#define STATE_LOOKUP(i, k, d, s, m, pt) \ do { \ - if (direction == PF_IN) \ - *state = pf_find_state_recurse( \ - kif, &key, PF_EXT_GWY); \ - else \ - *state = pf_find_state_recurse( \ - kif, &key, PF_LAN_EXT); \ - if (*state == NULL || (*state)->timeout == PFTM_PURGE) \ + s = pf_find_state(i, k, d, m, pt); \ + if (s == NULL || (s)->timeout == PFTM_PURGE) \ return (PF_DROP); \ - if (direction == PF_OUT && \ - (((*state)->rule.ptr->rt == PF_ROUTETO && \ - (*state)->rule.ptr->direction == PF_OUT) || \ - ((*state)->rule.ptr->rt == PF_REPLYTO && \ - (*state)->rule.ptr->direction == PF_IN)) && \ - (*state)->rt_kif != NULL && \ - (*state)->rt_kif != kif) \ + if (PPACKET_LOOPED()) \ + return (PF_PASS); \ + if (d == PF_OUT && \ + (((s)->rule.ptr->rt == PF_ROUTETO && \ + (s)->rule.ptr->direction == PF_OUT) || \ + ((s)->rule.ptr->rt == PF_REPLYTO && \ + (s)->rule.ptr->direction == PF_IN)) && \ + (s)->rt_kif != NULL && \ + (s)->rt_kif != i) \ return (PF_PASS); \ } while (0) +#else +#define STATE_LOOKUP(i, k, d, s, m) \ + do { \ + s = pf_find_state(i, k, d, m); \ + if (s == NULL || (s)->timeout == PFTM_PURGE) \ + return (PF_DROP); \ + if (d == PF_OUT && \ + (((s)->rule.ptr->rt == PF_ROUTETO && \ + (s)->rule.ptr->direction == PF_OUT) || \ + ((s)->rule.ptr->rt == PF_REPLYTO && \ + (s)->rule.ptr->direction == PF_IN)) && \ + (s)->rt_kif != NULL && \ + (s)->rt_kif != i) \ + return (PF_PASS); \ + } while (0) +#endif -#define STATE_TRANSLATE(s) \ - (s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \ - ((s)->af == AF_INET6 && \ - ((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \ - (s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \ - (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \ - (s)->lan.port != (s)->gwy.port - -#define BOUND_IFACE(r, k) \ +#ifdef __FreeBSD__ +#define BOUND_IFACE(r, k) \ + ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all +#else +#define BOUND_IFACE(r, k) \ ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all +#endif -#define STATE_INC_COUNTERS(s) \ +#define STATE_INC_COUNTERS(s) \ do { \ - s->rule.ptr->states++; \ - if (s->anchor.ptr != NULL) \ - s->anchor.ptr->states++; \ - if (s->nat_rule.ptr != NULL) \ - s->nat_rule.ptr->states++; \ + s->rule.ptr->states_cur++; \ + s->rule.ptr->states_tot++; \ + if (s->anchor.ptr != NULL) { \ + s->anchor.ptr->states_cur++; \ + s->anchor.ptr->states_tot++; \ + } \ + if (s->nat_rule.ptr != NULL) { \ + s->nat_rule.ptr->states_cur++; \ + s->nat_rule.ptr->states_tot++; \ + } \ } while (0) -#define STATE_DEC_COUNTERS(s) \ +#define STATE_DEC_COUNTERS(s) \ do { \ if (s->nat_rule.ptr != NULL) \ - s->nat_rule.ptr->states--; \ + s->nat_rule.ptr->states_cur--; \ if (s->anchor.ptr != NULL) \ - s->anchor.ptr->states--; \ - s->rule.ptr->states--; \ + s->anchor.ptr->states_cur--; \ + s->rule.ptr->states_cur--; \ } while (0) +static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); +static __inline int pf_state_compare_key(struct pf_state_key *, + struct pf_state_key *); +static __inline int pf_state_compare_id(struct pf_state *, + struct pf_state *); + +#ifdef __FreeBSD__ +VNET_DEFINE(struct pf_src_tree, tree_src_tracking); + +VNET_DEFINE(struct pf_state_tree_id, tree_id); +VNET_DEFINE(struct pf_state_queue, state_list); +#else struct pf_src_tree tree_src_tracking; struct pf_state_tree_id tree_id; struct pf_state_queue state_list; - -#ifdef __FreeBSD__ -static int pf_src_compare(struct pf_src_node *, struct pf_src_node *); -static int pf_state_compare_lan_ext(struct pf_state *, struct pf_state *); -static int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *); -static int pf_state_compare_id(struct pf_state *, struct pf_state *); #endif RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); -RB_GENERATE(pf_state_tree_lan_ext, pf_state, - u.s.entry_lan_ext, pf_state_compare_lan_ext); -RB_GENERATE(pf_state_tree_ext_gwy, pf_state, - u.s.entry_ext_gwy, pf_state_compare_ext_gwy); +RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); RB_GENERATE(pf_state_tree_id, pf_state, - u.s.entry_id, pf_state_compare_id); + entry_id, pf_state_compare_id); -#ifdef __FreeBSD__ -static int -#else static __inline int -#endif pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) { int diff; @@ -449,169 +518,6 @@ pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) return (0); } -#ifdef __FreeBSD__ -static int -#else -static __inline int -#endif -pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b) -{ - int diff; - - if ((diff = a->proto - b->proto) != 0) - return (diff); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#ifdef INET - case AF_INET: - if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) - return (1); - if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3]) - return (1); - if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3]) - return (-1); - if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) - return (1); - if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) - return (-1); - if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2]) - return (1); - if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2]) - return (-1); - if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) - return (1); - if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) - return (-1); - if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1]) - return (1); - if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1]) - return (-1); - if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) - return (1); - if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) - return (-1); - if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) - return (1); - if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - break; -#endif /* INET6 */ - } - - if ((diff = a->lan.port - b->lan.port) != 0) - return (diff); - if ((diff = a->ext.port - b->ext.port) != 0) - return (diff); - - return (0); -} - -#ifdef __FreeBSD__ -static int -#else -static __inline int -#endif -pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b) -{ - int diff; - - if ((diff = a->proto - b->proto) != 0) - return (diff); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#ifdef INET - case AF_INET: - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) - return (1); - if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) - return (-1); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) - return (1); - if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) - return (-1); - if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3]) - return (1); - if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3]) - return (-1); - if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) - return (1); - if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) - return (-1); - if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2]) - return (1); - if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2]) - return (-1); - if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) - return (1); - if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) - return (-1); - if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1]) - return (1); - if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) - return (1); - if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) - return (-1); - break; -#endif /* INET6 */ - } - - if ((diff = a->ext.port - b->ext.port) != 0) - return (diff); - if ((diff = a->gwy.port - b->gwy.port) != 0) - return (diff); - - return (0); -} - -#ifdef __FreeBSD__ -static int -#else -static __inline int -#endif -pf_state_compare_id(struct pf_state *a, struct pf_state *b) -{ - if (a->id > b->id) - return (1); - if (a->id < b->id) - return (-1); - if (a->creatorid > b->creatorid) - return (1); - if (a->creatorid < b->creatorid) - return (-1); - - return (0); -} - #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) @@ -632,80 +538,6 @@ pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) } #endif /* INET6 */ -struct pf_state * -pf_find_state_byid(struct pf_state_cmp *key) -{ - pf_status.fcounters[FCNT_STATE_SEARCH]++; - return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); -} - -struct pf_state * -pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree) -{ - struct pf_state *s; - - pf_status.fcounters[FCNT_STATE_SEARCH]++; - - switch (tree) { - case PF_LAN_EXT: - if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext, - (struct pf_state *)key)) != NULL) - return (s); - if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext, - (struct pf_state *)key)) != NULL) - return (s); - return (NULL); - case PF_EXT_GWY: - if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, - (struct pf_state *)key)) != NULL) - return (s); - if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy, - (struct pf_state *)key)) != NULL) - return (s); - return (NULL); - default: - panic("pf_find_state_recurse"); - } -} - -struct pf_state * -pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more) -{ - struct pf_state *s, *ss = NULL; - struct pfi_kif *kif; - - pf_status.fcounters[FCNT_STATE_SEARCH]++; - - switch (tree) { - case PF_LAN_EXT: - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { - s = RB_FIND(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, (struct pf_state *)key); - if (s == NULL) - continue; - if (more == NULL) - return (s); - ss = s; - (*more)++; - } - return (ss); - case PF_EXT_GWY: - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { - s = RB_FIND(pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy, (struct pf_state *)key); - if (s == NULL) - continue; - if (more == NULL) - return (s); - ss = s; - (*more)++; - } - return (ss); - default: - panic("pf_find_state_all"); - } -} - void pf_init_threshold(struct pf_threshold *threshold, u_int32_t limit, u_int32_t seconds) @@ -739,7 +571,6 @@ pf_check_threshold(struct pf_threshold *threshold) int pf_src_connlimit(struct pf_state **state) { - struct pf_state *s; int bad = 0; (*state)->src_node->conn++; @@ -749,13 +580,21 @@ pf_src_connlimit(struct pf_state **state) if ((*state)->rule.ptr->max_src_conn && (*state)->rule.ptr->max_src_conn < (*state)->src_node->conn) { +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_SRCCONN]++; +#else pf_status.lcounters[LCNT_SRCCONN]++; +#endif bad++; } if ((*state)->rule.ptr->max_src_conn_rate.limit && pf_check_threshold(&(*state)->src_node->conn_rate)) { +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_SRCCONNRATE]++; +#else pf_status.lcounters[LCNT_SRCCONNRATE]++; +#endif bad++; } @@ -766,16 +605,21 @@ pf_src_connlimit(struct pf_state **state) struct pfr_addr p; u_int32_t killed = 0; +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf_src_connlimit: blocking address "); pf_print_host(&(*state)->src_node->addr, 0, - (*state)->af); + (*state)->key[PF_SK_WIRE]->af); } bzero(&p, sizeof(p)); - p.pfra_af = (*state)->af; - switch ((*state)->af) { + p.pfra_af = (*state)->key[PF_SK_WIRE]->af; + switch ((*state)->key[PF_SK_WIRE]->af) { #ifdef INET case AF_INET: p.pfra_net = 32; @@ -795,34 +639,51 @@ pf_src_connlimit(struct pf_state **state) /* kill existing states if that's required. */ if ((*state)->rule.ptr->flush) { - pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + struct pf_state_key *sk; + struct pf_state *st; - RB_FOREACH(s, pf_state_tree_id, &tree_id) { +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + RB_FOREACH(st, pf_state_tree_id, &V_tree_id) { +#else + pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + RB_FOREACH(st, pf_state_tree_id, &tree_id) { +#endif + sk = st->key[PF_SK_WIRE]; /* * Kill states from this source. (Only those * from the same rule if PF_FLUSH_GLOBAL is not * set) */ - if (s->af == (*state)->af && + if (sk->af == + (*state)->key[PF_SK_WIRE]->af && (((*state)->direction == PF_OUT && PF_AEQ(&(*state)->src_node->addr, - &s->lan.addr, s->af)) || + &sk->addr[0], sk->af)) || ((*state)->direction == PF_IN && PF_AEQ(&(*state)->src_node->addr, - &s->ext.addr, s->af))) && + &sk->addr[1], sk->af))) && ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL || - (*state)->rule.ptr == s->rule.ptr)) { - s->timeout = PFTM_PURGE; - s->src.state = s->dst.state = + (*state)->rule.ptr == st->rule.ptr)) { + st->timeout = PFTM_PURGE; + st->src.state = st->dst.state = TCPS_CLOSED; killed++; } } +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else if (pf_status.debug >= PF_DEBUG_MISC) +#endif printf(", %u states killed", killed); } +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) +#else if (pf_status.debug >= PF_DEBUG_MISC) +#endif printf("\n"); } @@ -846,18 +707,30 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, k.rule.ptr = rule; else k.rule.ptr = NULL; +#ifdef __FreeBSD__ + V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + *sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k); +#else pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); +#endif } if (*sn == NULL) { if (!rule->max_src_nodes || rule->src_nodes < rule->max_src_nodes) - (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + (*sn) = pool_get(&V_pf_src_tree_pl, PR_NOWAIT | PR_ZERO); +#else + (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); +#endif else +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_SRCNODES]++; +#else pf_status.lcounters[LCNT_SRCNODES]++; +#endif if ((*sn) == NULL) return (-1); - bzero(*sn, sizeof(struct pf_src_node)); pf_init_threshold(&(*sn)->conn_rate, rule->max_src_conn_rate.limit, @@ -871,296 +744,805 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, (*sn)->rule.ptr = NULL; PF_ACPY(&(*sn)->addr, src, af); if (RB_INSERT(pf_src_tree, +#ifdef __FreeBSD__ + &V_tree_src_tracking, *sn) != NULL) { + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else &tree_src_tracking, *sn) != NULL) { if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: src_tree insert failed: "); pf_print_host(&(*sn)->addr, 0, af); printf("\n"); } +#ifdef __FreeBSD__ + pool_put(&V_pf_src_tree_pl, *sn); +#else pool_put(&pf_src_tree_pl, *sn); +#endif return (-1); } (*sn)->creation = time_second; (*sn)->ruletype = rule->action; if ((*sn)->rule.ptr != NULL) (*sn)->rule.ptr->src_nodes++; +#ifdef __FreeBSD__ + V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++; + V_pf_status.src_nodes++; +#else pf_status.scounters[SCNT_SRC_NODE_INSERT]++; pf_status.src_nodes++; +#endif } else { if (rule->max_src_states && (*sn)->states >= rule->max_src_states) { +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_SRCSTATES]++; +#else pf_status.lcounters[LCNT_SRCSTATES]++; +#endif return (-1); } } return (0); } -int -pf_insert_state(struct pfi_kif *kif, struct pf_state *state) +/* state table stuff */ + +static __inline int +pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) { - /* Thou MUST NOT insert multiple duplicate keys */ - state->u.s.kif = kif; - if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) { - if (pf_status.debug >= PF_DEBUG_MISC) { - printf("pf: state insert failed: tree_lan_ext"); - printf(" lan: "); - pf_print_host(&state->lan.addr, state->lan.port, - state->af); - printf(" gwy: "); - pf_print_host(&state->gwy.addr, state->gwy.port, - state->af); - printf(" ext: "); - pf_print_host(&state->ext.addr, state->ext.port, - state->af); - if (state->sync_flags & PFSTATE_FROMSYNC) - printf(" (from sync)"); - printf("\n"); - } - return (-1); + int diff; + + if ((diff = a->proto - b->proto) != 0) + return (diff); + if ((diff = a->af - b->af) != 0) + return (diff); + switch (a->af) { +#ifdef INET + case AF_INET: + if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) + return (1); + if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) + return (-1); + if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) + return (1); + if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) + return (-1); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (a->addr[0].addr32[3] > b->addr[0].addr32[3]) + return (1); + if (a->addr[0].addr32[3] < b->addr[0].addr32[3]) + return (-1); + if (a->addr[1].addr32[3] > b->addr[1].addr32[3]) + return (1); + if (a->addr[1].addr32[3] < b->addr[1].addr32[3]) + return (-1); + if (a->addr[0].addr32[2] > b->addr[0].addr32[2]) + return (1); + if (a->addr[0].addr32[2] < b->addr[0].addr32[2]) + return (-1); + if (a->addr[1].addr32[2] > b->addr[1].addr32[2]) + return (1); + if (a->addr[1].addr32[2] < b->addr[1].addr32[2]) + return (-1); + if (a->addr[0].addr32[1] > b->addr[0].addr32[1]) + return (1); + if (a->addr[0].addr32[1] < b->addr[0].addr32[1]) + return (-1); + if (a->addr[1].addr32[1] > b->addr[1].addr32[1]) + return (1); + if (a->addr[1].addr32[1] < b->addr[1].addr32[1]) + return (-1); + if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) + return (1); + if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) + return (-1); + if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) + return (1); + if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) + return (-1); + break; +#endif /* INET6 */ } - if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) { - if (pf_status.debug >= PF_DEBUG_MISC) { - printf("pf: state insert failed: tree_ext_gwy"); - printf(" lan: "); - pf_print_host(&state->lan.addr, state->lan.port, - state->af); - printf(" gwy: "); - pf_print_host(&state->gwy.addr, state->gwy.port, - state->af); - printf(" ext: "); - pf_print_host(&state->ext.addr, state->ext.port, - state->af); - if (state->sync_flags & PFSTATE_FROMSYNC) - printf(" (from sync)"); - printf("\n"); - } - RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state); + if ((diff = a->port[0] - b->port[0]) != 0) + return (diff); + if ((diff = a->port[1] - b->port[1]) != 0) + return (diff); + + return (0); +} + +static __inline int +pf_state_compare_id(struct pf_state *a, struct pf_state *b) +{ + if (a->id > b->id) + return (1); + if (a->id < b->id) + return (-1); + if (a->creatorid > b->creatorid) + return (1); + if (a->creatorid < b->creatorid) return (-1); - } - if (state->id == 0 && state->creatorid == 0) { - state->id = htobe64(pf_status.stateid++); - state->creatorid = pf_status.hostid; - } - if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) { - if (pf_status.debug >= PF_DEBUG_MISC) { + return (0); +} + +int +pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) +{ + struct pf_state_item *si; + struct pf_state_key *cur; + struct pf_state *olds = NULL; + #ifdef __FreeBSD__ - printf("pf: state insert failed: " - "id: %016llx creatorid: %08x", - (long long)be64toh(state->id), - ntohl(state->creatorid)); + KASSERT(s->key[idx] == NULL, ("%s: key is null!", __FUNCTION__)); #else - printf("pf: state insert failed: " - "id: %016llx creatorid: %08x", - betoh64(state->id), ntohl(state->creatorid)); + KASSERT(s->key[idx] == NULL); /* XXX handle this? */ #endif - if (state->sync_flags & PFSTATE_FROMSYNC) - printf(" (from sync)"); - printf("\n"); - } - RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state); - RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state); + +#ifdef __FreeBSD__ + if ((cur = RB_INSERT(pf_state_tree, &V_pf_statetbl, sk)) != NULL) { +#else + if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { +#endif + /* key exists. check for same kif, if none, add to key */ + TAILQ_FOREACH(si, &cur->states, entry) + if (si->s->kif == s->kif && + si->s->direction == s->direction) { + if (sk->proto == IPPROTO_TCP && + si->s->src.state >= TCPS_FIN_WAIT_2 && + si->s->dst.state >= TCPS_FIN_WAIT_2) { + si->s->src.state = si->s->dst.state = + TCPS_CLOSED; + /* unlink late or sks can go away */ + olds = si->s; + } else { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { +#endif + printf("pf: %s key attach " + "failed on %s: ", + (idx == PF_SK_WIRE) ? + "wire" : "stack", + s->kif->pfik_name); + pf_print_state_parts(s, + (idx == PF_SK_WIRE) ? + sk : NULL, + (idx == PF_SK_STACK) ? + sk : NULL); + printf(", existing: "); + pf_print_state_parts(si->s, + (idx == PF_SK_WIRE) ? + sk : NULL, + (idx == PF_SK_STACK) ? + sk : NULL); + printf("\n"); + } +#ifdef __FreeBSD__ + pool_put(&V_pf_state_key_pl, sk); +#else + pool_put(&pf_state_key_pl, sk); +#endif + return (-1); /* collision! */ + } + } +#ifdef __FreeBSD__ + pool_put(&V_pf_state_key_pl, sk); +#else + pool_put(&pf_state_key_pl, sk); +#endif + s->key[idx] = cur; + } else + s->key[idx] = sk; + +#ifdef __FreeBSD__ + if ((si = pool_get(&V_pf_state_item_pl, PR_NOWAIT)) == NULL) { +#else + if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { +#endif + pf_state_key_detach(s, idx); return (-1); } - TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list); - pf_status.fcounters[FCNT_STATE_INSERT]++; - pf_status.states++; - pfi_kif_ref(kif, PFI_KIF_REF_STATE); -#if NPFSYNC - pfsync_insert_state(state); + si->s = s; + + /* list is sorted, if-bound states before floating */ +#ifdef __FreeBSD__ + if (s->kif == V_pfi_all) +#else + if (s->kif == pfi_all) #endif + TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); + else + TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); + + if (olds) + pf_unlink_state(olds); + return (0); } void -pf_purge_thread(void *v) +pf_detach_state(struct pf_state *s) { - int nloops = 0, s; -#ifdef __FreeBSD__ - int locked; -#endif + if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) + s->key[PF_SK_WIRE] = NULL; - for (;;) { - tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); + if (s->key[PF_SK_STACK] != NULL) + pf_state_key_detach(s, PF_SK_STACK); -#ifdef __FreeBSD__ - sx_slock(&pf_consistency_lock); - PF_LOCK(); - locked = 0; + if (s->key[PF_SK_WIRE] != NULL) + pf_state_key_detach(s, PF_SK_WIRE); +} - if (pf_end_threads) { - PF_UNLOCK(); - sx_sunlock(&pf_consistency_lock); - sx_xlock(&pf_consistency_lock); - PF_LOCK(); - pf_purge_expired_states(pf_status.states, 1); - pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(1); - pf_end_threads++; +void +pf_state_key_detach(struct pf_state *s, int idx) +{ + struct pf_state_item *si; - sx_xunlock(&pf_consistency_lock); - PF_UNLOCK(); - wakeup(pf_purge_thread); - kproc_exit(0); - } -#endif - s = splsoftnet(); + si = TAILQ_FIRST(&s->key[idx]->states); + while (si && si->s != s) + si = TAILQ_NEXT(si, entry); - /* process a fraction of the state table every second */ + if (si) { + TAILQ_REMOVE(&s->key[idx]->states, si, entry); #ifdef __FreeBSD__ - if(!pf_purge_expired_states(1 + (pf_status.states - / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) { - PF_UNLOCK(); - sx_sunlock(&pf_consistency_lock); - sx_xlock(&pf_consistency_lock); - PF_LOCK(); - locked = 1; - - pf_purge_expired_states(1 + (pf_status.states - / pf_default_rule.timeout[PFTM_INTERVAL]), 1); - } + pool_put(&V_pf_state_item_pl, si); #else - pf_purge_expired_states(1 + (pf_status.states - / pf_default_rule.timeout[PFTM_INTERVAL])); + pool_put(&pf_state_item_pl, si); #endif + } - /* purge other expired types every PFTM_INTERVAL seconds */ - if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { - pf_purge_expired_fragments(); - if (!pf_purge_expired_src_nodes(locked)) { - PF_UNLOCK(); - sx_sunlock(&pf_consistency_lock); - sx_xlock(&pf_consistency_lock); - PF_LOCK(); - locked = 1; - pf_purge_expired_src_nodes(1); - } - nloops = 0; - } - - splx(s); + if (TAILQ_EMPTY(&s->key[idx]->states)) { #ifdef __FreeBSD__ - PF_UNLOCK(); - if (locked) - sx_xunlock(&pf_consistency_lock); - else - sx_sunlock(&pf_consistency_lock); + RB_REMOVE(pf_state_tree, &V_pf_statetbl, s->key[idx]); +#else + RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]); +#endif + if (s->key[idx]->reverse) + s->key[idx]->reverse->reverse = NULL; +#ifdef __FreeBSD__ + /* XXX: implement this */ +#else + if (s->key[idx]->inp) + s->key[idx]->inp->inp_pf_sk = NULL; +#endif +#ifdef __FreeBSD__ + pool_put(&V_pf_state_key_pl, s->key[idx]); +#else + pool_put(&pf_state_key_pl, s->key[idx]); #endif } + s->key[idx] = NULL; } -u_int32_t -pf_state_expires(const struct pf_state *state) +struct pf_state_key * +pf_alloc_state_key(int pool_flags) { - u_int32_t timeout; - u_int32_t start; - u_int32_t end; - u_int32_t states; + struct pf_state_key *sk; - /* handle all PFTM_* > PFTM_MAX here */ - if (state->timeout == PFTM_PURGE) - return (time_second); - if (state->timeout == PFTM_UNTIL_PACKET) - return (0); -#ifdef __FreeBSD__ - KASSERT(state->timeout != PFTM_UNLINKED, - ("pf_state_expires: timeout == PFTM_UNLINKED")); - KASSERT((state->timeout < PFTM_MAX), - ("pf_state_expires: timeout > PFTM_MAX")); +#ifdef __FreeBSD__ + if ((sk = pool_get(&V_pf_state_key_pl, pool_flags)) == NULL) #else - KASSERT(state->timeout != PFTM_UNLINKED); - KASSERT(state->timeout < PFTM_MAX); + if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) #endif - timeout = state->rule.ptr->timeout[state->timeout]; - if (!timeout) - timeout = pf_default_rule.timeout[state->timeout]; - start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; - if (start) { - end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; - states = state->rule.ptr->states; + return (NULL); + TAILQ_INIT(&sk->states); + + return (sk); +} + +int +pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr, + struct pf_state_key **skw, struct pf_state_key **sks, + struct pf_state_key **skp, struct pf_state_key **nkp, + struct pf_addr *saddr, struct pf_addr *daddr, + u_int16_t sport, u_int16_t dport) +{ +#ifdef __FreeBSD__ + KASSERT((*skp == NULL && *nkp == NULL), + ("%s: skp == NULL && nkp == NULL", __FUNCTION__)); +#else + KASSERT((*skp == NULL && *nkp == NULL)); +#endif + + if ((*skp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) + return (ENOMEM); + + PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af); + PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af); + (*skp)->port[pd->sidx] = sport; + (*skp)->port[pd->didx] = dport; + (*skp)->proto = pd->proto; + (*skp)->af = pd->af; + + if (nr != NULL) { + if ((*nkp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) + return (ENOMEM); /* caller must handle cleanup */ + + /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */ + PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af); + PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af); + (*nkp)->port[0] = (*skp)->port[0]; + (*nkp)->port[1] = (*skp)->port[1]; + (*nkp)->proto = pd->proto; + (*nkp)->af = pd->af; + } else + *nkp = *skp; + + if (pd->dir == PF_IN) { + *skw = *skp; + *sks = *nkp; } else { - start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; - end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; - states = pf_status.states; - } - if (end && states > start && start < end) { - if (states < end) - return (state->expire + timeout * (end - states) / - (end - start)); - else - return (time_second); + *sks = *skp; + *skw = *nkp; } - return (state->expire + timeout); + return (0); } -#ifdef __FreeBSD__ + int -pf_purge_expired_src_nodes(int waslocked) -#else -void -pf_purge_expired_src_nodes(int waslocked) -#endif +pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, + struct pf_state_key *sks, struct pf_state *s) { - struct pf_src_node *cur, *next; - int locked = waslocked; +#ifndef __FreeBSD__ + splassert(IPL_SOFTNET); +#endif - for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { - next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); + s->kif = kif; - if (cur->states <= 0 && cur->expire <= time_second) { - if (! locked) { + if (skw == sks) { + if (pf_state_key_attach(skw, s, PF_SK_WIRE)) + return (-1); + s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; + } else { + if (pf_state_key_attach(skw, s, PF_SK_WIRE)) { #ifdef __FreeBSD__ - if (!sx_try_upgrade(&pf_consistency_lock)) - return (0); + pool_put(&V_pf_state_key_pl, sks); #else - rw_enter_write(&pf_consistency_lock); + pool_put(&pf_state_key_pl, sks); #endif - next = RB_NEXT(pf_src_tree, - &tree_src_tracking, cur); - locked = 1; - } - if (cur->rule.ptr != NULL) { - cur->rule.ptr->src_nodes--; - if (cur->rule.ptr->states <= 0 && - cur->rule.ptr->max_src_nodes <= 0) - pf_rm_rule(NULL, cur->rule.ptr); - } - RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, cur); - } - } + return (-1); + } + if (pf_state_key_attach(sks, s, PF_SK_STACK)) { + pf_state_key_detach(s, PF_SK_WIRE); + return (-1); + } + } - if (locked && !waslocked) + if (s->id == 0 && s->creatorid == 0) { #ifdef __FreeBSD__ - sx_downgrade(&pf_consistency_lock); + s->id = htobe64(V_pf_status.stateid++); + s->creatorid = V_pf_status.hostid; #else - rw_exit_write(&pf_consistency_lock); + s->id = htobe64(pf_status.stateid++); + s->creatorid = pf_status.hostid; #endif - + } #ifdef __FreeBSD__ - return (1); + if (RB_INSERT(pf_state_tree_id, &V_tree_id, s) != NULL) { + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { + if (pf_status.debug >= PF_DEBUG_MISC) { +#endif + printf("pf: state insert failed: " + "id: %016llx creatorid: %08x", +#ifdef __FreeBSD__ + (unsigned long long)betoh64(s->id), ntohl(s->creatorid)); +#else + betoh64(s->id), ntohl(s->creatorid)); +#endif + printf("\n"); + } + pf_detach_state(s); + return (-1); + } +#ifdef __FreeBSD__ + TAILQ_INSERT_TAIL(&V_state_list, s, entry_list); + V_pf_status.fcounters[FCNT_STATE_INSERT]++; + V_pf_status.states++; +#else + TAILQ_INSERT_TAIL(&state_list, s, entry_list); + pf_status.fcounters[FCNT_STATE_INSERT]++; + pf_status.states++; +#endif + pfi_kif_ref(kif, PFI_KIF_REF_STATE); +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_insert_state_ptr != NULL) + pfsync_insert_state_ptr(s); +#else + pfsync_insert_state(s); #endif +#endif + return (0); } -void -pf_src_tree_remove_state(struct pf_state *s) +struct pf_state * +pf_find_state_byid(struct pf_state_cmp *key) { - u_int32_t timeout; +#ifdef __FreeBSD__ + V_pf_status.fcounters[FCNT_STATE_SEARCH]++; - if (s->src_node != NULL) { - if (s->proto == IPPROTO_TCP) { - if (s->src.tcp_est) - --s->src_node->conn; - } + return (RB_FIND(pf_state_tree_id, &V_tree_id, (struct pf_state *)key)); +#else + pf_status.fcounters[FCNT_STATE_SEARCH]++; + + return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); +#endif +} + +/* XXX debug function, intended to be removed one day */ +int +pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, + struct pfi_kif *kif, u_int dir) +{ + /* a (from hdr) and b (new) must be exact opposites of each other */ + if (a->af == b->af && a->proto == b->proto && + PF_AEQ(&a->addr[0], &b->addr[1], a->af) && + PF_AEQ(&a->addr[1], &b->addr[0], a->af) && + a->port[0] == b->port[1] && + a->port[1] == b->port[0]) + return (0); + else { + /* mismatch. must not happen. */ + printf("pf: state key linking mismatch! dir=%s, " + "if=%s, stored af=%u, a0: ", + dir == PF_OUT ? "OUT" : "IN", kif->pfik_name, a->af); + pf_print_host(&a->addr[0], a->port[0], a->af); + printf(", a1: "); + pf_print_host(&a->addr[1], a->port[1], a->af); + printf(", proto=%u", a->proto); + printf(", found af=%u, a0: ", b->af); + pf_print_host(&b->addr[0], b->port[0], b->af); + printf(", a1: "); + pf_print_host(&b->addr[1], b->port[1], b->af); + printf(", proto=%u", b->proto); + printf(".\n"); + return (-1); + } +} + +struct pf_state * +#ifdef __FreeBSD__ +pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, + struct mbuf *m, struct pf_mtag *pftag) +#else +pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, + struct mbuf *m) +#endif +{ + struct pf_state_key *sk; + struct pf_state_item *si; + +#ifdef __FreeBSD__ + V_pf_status.fcounters[FCNT_STATE_SEARCH]++; +#else + pf_status.fcounters[FCNT_STATE_SEARCH]++; +#endif + +#ifdef __FreeBSD__ + if (dir == PF_OUT && pftag->statekey && + ((struct pf_state_key *)pftag->statekey)->reverse) + sk = ((struct pf_state_key *)pftag->statekey)->reverse; + else { +#ifdef __FreeBSD__ + if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl, +#else + if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, +#endif + (struct pf_state_key *)key)) == NULL) + return (NULL); + if (dir == PF_OUT && pftag->statekey && + pf_compare_state_keys(pftag->statekey, sk, + kif, dir) == 0) { + ((struct pf_state_key *) + pftag->statekey)->reverse = sk; + sk->reverse = pftag->statekey; + } + } +#else + if (dir == PF_OUT && m->m_pkthdr.pf.statekey && + ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) + sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; + else { +#ifdef __FreeBSD__ + if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl, +#else + if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, +#endif + (struct pf_state_key *)key)) == NULL) + return (NULL); + if (dir == PF_OUT && m->m_pkthdr.pf.statekey && + pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk, + kif, dir) == 0) { + ((struct pf_state_key *) + m->m_pkthdr.pf.statekey)->reverse = sk; + sk->reverse = m->m_pkthdr.pf.statekey; + } + } +#endif + + if (dir == PF_OUT) +#ifdef __FreeBSD__ + pftag->statekey = NULL; +#else + m->m_pkthdr.pf.statekey = NULL; +#endif + + /* list is sorted, if-bound states before floating ones */ + TAILQ_FOREACH(si, &sk->states, entry) +#ifdef __FreeBSD__ + if ((si->s->kif == V_pfi_all || si->s->kif == kif) && +#else + if ((si->s->kif == pfi_all || si->s->kif == kif) && +#endif + sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : + si->s->key[PF_SK_STACK])) + return (si->s); + + return (NULL); +} + +struct pf_state * +pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) +{ + struct pf_state_key *sk; + struct pf_state_item *si, *ret = NULL; + +#ifdef __FreeBSD__ + V_pf_status.fcounters[FCNT_STATE_SEARCH]++; +#else + pf_status.fcounters[FCNT_STATE_SEARCH]++; +#endif + +#ifdef __FreeBSD__ + sk = RB_FIND(pf_state_tree, &V_pf_statetbl, (struct pf_state_key *)key); +#else + sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); +#endif + if (sk != NULL) { + TAILQ_FOREACH(si, &sk->states, entry) + if (dir == PF_INOUT || + (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : + si->s->key[PF_SK_STACK]))) { + if (more == NULL) + return (si->s); + + if (ret) + (*more)++; + else + ret = si; + } + } + return (ret ? ret->s : NULL); +} + +/* END state table stuff */ + + +void +pf_purge_thread(void *v) +{ + int nloops = 0, s; +#ifdef __FreeBSD__ + int locked; +#endif + + CURVNET_SET((struct vnet *)v); + + for (;;) { + tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); + +#ifdef __FreeBSD__ + sx_slock(&V_pf_consistency_lock); + PF_LOCK(); + locked = 0; + + if (V_pf_end_threads) { + PF_UNLOCK(); + sx_sunlock(&V_pf_consistency_lock); + sx_xlock(&V_pf_consistency_lock); + PF_LOCK(); + + pf_purge_expired_states(V_pf_status.states, 1); + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(1); + V_pf_end_threads++; + + sx_xunlock(&V_pf_consistency_lock); + PF_UNLOCK(); + wakeup(pf_purge_thread); + kproc_exit(0); + } +#endif + s = splsoftnet(); + + /* process a fraction of the state table every second */ +#ifdef __FreeBSD__ + if (!pf_purge_expired_states(1 + (V_pf_status.states / + V_pf_default_rule.timeout[PFTM_INTERVAL]), 0)) { + PF_UNLOCK(); + sx_sunlock(&V_pf_consistency_lock); + sx_xlock(&V_pf_consistency_lock); + PF_LOCK(); + locked = 1; + + pf_purge_expired_states(1 + (V_pf_status.states / + V_pf_default_rule.timeout[PFTM_INTERVAL]), 1); + } +#else + pf_purge_expired_states(1 + (pf_status.states + / pf_default_rule.timeout[PFTM_INTERVAL])); +#endif + + /* purge other expired types every PFTM_INTERVAL seconds */ +#ifdef __FreeBSD__ + if (++nloops >= V_pf_default_rule.timeout[PFTM_INTERVAL]) { +#else + if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { +#endif + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(0); + nloops = 0; + } + + splx(s); +#ifdef __FreeBSD__ + PF_UNLOCK(); + if (locked) + sx_xunlock(&V_pf_consistency_lock); + else + sx_sunlock(&V_pf_consistency_lock); +#endif + } + CURVNET_RESTORE(); +} + +u_int32_t +pf_state_expires(const struct pf_state *state) +{ + u_int32_t timeout; + u_int32_t start; + u_int32_t end; + u_int32_t states; + + /* handle all PFTM_* > PFTM_MAX here */ + if (state->timeout == PFTM_PURGE) + return (time_second); + if (state->timeout == PFTM_UNTIL_PACKET) + return (0); +#ifdef __FreeBSD__ + KASSERT(state->timeout != PFTM_UNLINKED, + ("pf_state_expires: timeout == PFTM_UNLINKED")); + KASSERT((state->timeout < PFTM_MAX), + ("pf_state_expires: timeout > PFTM_MAX")); +#else + KASSERT(state->timeout != PFTM_UNLINKED); + KASSERT(state->timeout < PFTM_MAX); +#endif + timeout = state->rule.ptr->timeout[state->timeout]; + if (!timeout) +#ifdef __FreeBSD__ + timeout = V_pf_default_rule.timeout[state->timeout]; +#else + timeout = pf_default_rule.timeout[state->timeout]; +#endif + start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; + if (start) { + end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; + states = state->rule.ptr->states_cur; + } else { +#ifdef __FreeBSD__ + start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START]; + end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END]; + states = V_pf_status.states; +#else + start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; + end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; + states = pf_status.states; +#endif + } + if (end && states > start && start < end) { + if (states < end) + return (state->expire + timeout * (end - states) / + (end - start)); + else + return (time_second); + } + return (state->expire + timeout); +} + +#ifdef __FreeBSD__ +int +pf_purge_expired_src_nodes(int waslocked) +#else +void +pf_purge_expired_src_nodes(int waslocked) +#endif +{ + struct pf_src_node *cur, *next; + int locked = waslocked; + +#ifdef __FreeBSD__ + for (cur = RB_MIN(pf_src_tree, &V_tree_src_tracking); cur; cur = next) { + next = RB_NEXT(pf_src_tree, &V_tree_src_tracking, cur); +#else + for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { + next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); +#endif + + if (cur->states <= 0 && cur->expire <= time_second) { + if (! locked) { +#ifdef __FreeBSD__ + if (!sx_try_upgrade(&V_pf_consistency_lock)) + return (0); +#else + rw_enter_write(&pf_consistency_lock); +#endif + next = RB_NEXT(pf_src_tree, +#ifdef __FreeBSD__ + &V_tree_src_tracking, cur); +#else + &tree_src_tracking, cur); +#endif + locked = 1; + } + if (cur->rule.ptr != NULL) { + cur->rule.ptr->src_nodes--; + if (cur->rule.ptr->states_cur <= 0 && + cur->rule.ptr->max_src_nodes <= 0) + pf_rm_rule(NULL, cur->rule.ptr); + } +#ifdef __FreeBSD__ + RB_REMOVE(pf_src_tree, &V_tree_src_tracking, cur); + V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + V_pf_status.src_nodes--; + pool_put(&V_pf_src_tree_pl, cur); +#else + RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, cur); +#endif + } + } + + if (locked && !waslocked) +#ifdef __FreeBSD__ + { + sx_downgrade(&V_pf_consistency_lock); + } + return (1); +#else + rw_exit_write(&pf_consistency_lock); +#endif +} + +void +pf_src_tree_remove_state(struct pf_state *s) +{ + u_int32_t timeout; + + if (s->src_node != NULL) { + if (s->src.tcp_est) + --s->src_node->conn; if (--s->src_node->states <= 0) { timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = +#ifdef __FreeBSD__ + V_pf_default_rule.timeout[PFTM_SRC_NODE]; +#else pf_default_rule.timeout[PFTM_SRC_NODE]; +#endif s->src_node->expire = time_second + timeout; } } @@ -1169,7 +1551,11 @@ pf_src_tree_remove_state(struct pf_state *s) timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = +#ifdef __FreeBSD__ + V_pf_default_rule.timeout[PFTM_SRC_NODE]; +#else pf_default_rule.timeout[PFTM_SRC_NODE]; +#endif s->nat_src_node->expire = time_second + timeout; } } @@ -1184,29 +1570,49 @@ pf_unlink_state(struct pf_state *cur) if (cur->local_flags & PFSTATE_EXPIRING) return; cur->local_flags |= PFSTATE_EXPIRING; +#else + splassert(IPL_SOFTNET); #endif + if (cur->src.state == PF_TCPS_PROXY_DST) { + /* XXX wire key the right one? */ #ifdef __FreeBSD__ - pf_send_tcp(NULL, cur->rule.ptr, cur->af, + pf_send_tcp(NULL, cur->rule.ptr, cur->key[PF_SK_WIRE]->af, #else - pf_send_tcp(cur->rule.ptr, cur->af, + pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, #endif - &cur->ext.addr, &cur->lan.addr, - cur->ext.port, cur->lan.port, + &cur->key[PF_SK_WIRE]->addr[1], + &cur->key[PF_SK_WIRE]->addr[0], + cur->key[PF_SK_WIRE]->port[1], + cur->key[PF_SK_WIRE]->port[0], cur->src.seqhi, cur->src.seqlo + 1, TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); } - RB_REMOVE(pf_state_tree_ext_gwy, - &cur->u.s.kif->pfik_ext_gwy, cur); - RB_REMOVE(pf_state_tree_lan_ext, - &cur->u.s.kif->pfik_lan_ext, cur); +#ifdef __FreeBSD__ + RB_REMOVE(pf_state_tree_id, &V_tree_id, cur); +#else RB_REMOVE(pf_state_tree_id, &tree_id, cur); -#if NPFSYNC - if (cur->creatorid == pf_status.hostid) - pfsync_delete_state(cur); +#endif +#if NPFLOW > 0 + if (cur->state_flags & PFSTATE_PFLOW) +#ifdef __FreeBSD__ + if (export_pflow_ptr != NULL) + export_pflow_ptr(cur); +#else + export_pflow(cur); +#endif +#endif +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_delete_state_ptr != NULL) + pfsync_delete_state_ptr(cur); +#else + pfsync_delete_state(cur); +#endif #endif cur->timeout = PFTM_UNLINKED; pf_src_tree_remove_state(cur); + pf_detach_state(cur); } /* callers should be at splsoftnet and hold the @@ -1214,10 +1620,17 @@ pf_unlink_state(struct pf_state *cur) void pf_free_state(struct pf_state *cur) { -#if NPFSYNC - if (pfsyncif != NULL && - (pfsyncif->sc_bulk_send_next == cur || - pfsyncif->sc_bulk_terminator == cur)) +#ifndef __FreeBSD__ + splassert(IPL_SOFTNET); +#endif + +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_state_in_use_ptr != NULL) + pfsync_state_in_use_ptr(cur); +#else + if (pfsync_state_in_use(cur)) +#endif return; #endif #ifdef __FreeBSD__ @@ -1226,24 +1639,34 @@ pf_free_state(struct pf_state *cur) #else KASSERT(cur->timeout == PFTM_UNLINKED); #endif - if (--cur->rule.ptr->states <= 0 && + if (--cur->rule.ptr->states_cur <= 0 && cur->rule.ptr->src_nodes <= 0) pf_rm_rule(NULL, cur->rule.ptr); if (cur->nat_rule.ptr != NULL) - if (--cur->nat_rule.ptr->states <= 0 && + if (--cur->nat_rule.ptr->states_cur <= 0 && cur->nat_rule.ptr->src_nodes <= 0) pf_rm_rule(NULL, cur->nat_rule.ptr); if (cur->anchor.ptr != NULL) - if (--cur->anchor.ptr->states <= 0) + if (--cur->anchor.ptr->states_cur <= 0) pf_rm_rule(NULL, cur->anchor.ptr); pf_normalize_tcp_cleanup(cur); - pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE); - TAILQ_REMOVE(&state_list, cur, u.s.entry_list); + pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); +#ifdef __FreeBSD__ + TAILQ_REMOVE(&V_state_list, cur, entry_list); +#else + TAILQ_REMOVE(&state_list, cur, entry_list); +#endif if (cur->tag) pf_tag_unref(cur->tag); +#ifdef __FreeBSD__ + pool_put(&V_pf_state_pl, cur); + V_pf_status.fcounters[FCNT_STATE_REMOVALS]++; + V_pf_status.states--; +#else pool_put(&pf_state_pl, cur); pf_status.fcounters[FCNT_STATE_REMOVALS]++; pf_status.states--; +#endif } #ifdef __FreeBSD__ @@ -1257,28 +1680,32 @@ pf_purge_expired_states(u_int32_t maxcheck) static struct pf_state *cur = NULL; struct pf_state *next; #ifdef __FreeBSD__ - int locked = waslocked; + int locked = waslocked; #else - int locked = 0; + int locked = 0; #endif while (maxcheck--) { /* wrap to start of list when we hit the end */ if (cur == NULL) { +#ifdef __FreeBSD__ + cur = TAILQ_FIRST(&V_state_list); +#else cur = TAILQ_FIRST(&state_list); +#endif if (cur == NULL) break; /* list empty */ } /* get next state, as cur may get deleted */ - next = TAILQ_NEXT(cur, u.s.entry_list); + next = TAILQ_NEXT(cur, entry_list); if (cur->timeout == PFTM_UNLINKED) { /* free unlinked state */ if (! locked) { #ifdef __FreeBSD__ - if (!sx_try_upgrade(&pf_consistency_lock)) - return (0); + if (!sx_try_upgrade(&V_pf_consistency_lock)) + return (0); #else rw_enter_write(&pf_consistency_lock); #endif @@ -1290,8 +1717,8 @@ pf_purge_expired_states(u_int32_t maxcheck) pf_unlink_state(cur); if (! locked) { #ifdef __FreeBSD__ - if (!sx_try_upgrade(&pf_consistency_lock)) - return (0); + if (!sx_try_upgrade(&V_pf_consistency_lock)) + return (0); #else rw_enter_write(&pf_consistency_lock); #endif @@ -1304,7 +1731,7 @@ pf_purge_expired_states(u_int32_t maxcheck) #ifdef __FreeBSD__ if (!waslocked && locked) - sx_downgrade(&pf_consistency_lock); + sx_downgrade(&V_pf_consistency_lock); return (1); #else @@ -1318,7 +1745,7 @@ pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) { if (aw->type != PF_ADDR_TABLE) return (0); - if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) + if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) return (1); return (0); } @@ -1365,34 +1792,33 @@ pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) #ifdef INET6 case AF_INET6: { u_int16_t b; - u_int8_t i, curstart = 255, curend = 0, - maxstart = 0, maxend = 0; + u_int8_t i, curstart, curend, maxstart, maxend; + curstart = curend = maxstart = maxend = 255; for (i = 0; i < 8; i++) { if (!addr->addr16[i]) { if (curstart == 255) curstart = i; - else - curend = i; + curend = i; } else { - if (curstart) { - if ((curend - curstart) > - (maxend - maxstart)) { - maxstart = curstart; - maxend = curend; - curstart = 255; - } + if ((curend - curstart) > + (maxend - maxstart)) { + maxstart = curstart; + maxend = curend; } + curstart = curend = 255; } } + if ((curend - curstart) > + (maxend - maxstart)) { + maxstart = curstart; + maxend = curend; + } for (i = 0; i < 8; i++) { if (i >= maxstart && i <= maxend) { - if (maxend != 7) { - if (i == maxstart) - printf(":"); - } else { - if (i == maxend) - printf(":"); - } + if (i == 0) + printf(":"); + if (i == maxend) + printf(":"); } else { b = ntohs(addr->addr16[i]); printf("%x", b); @@ -1413,39 +1839,87 @@ pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) void pf_print_state(struct pf_state *s) { - switch (s->proto) { + pf_print_state_parts(s, NULL, NULL); +} + +void +pf_print_state_parts(struct pf_state *s, + struct pf_state_key *skwp, struct pf_state_key *sksp) +{ + struct pf_state_key *skw, *sks; + u_int8_t proto, dir; + + /* Do our best to fill these, but they're skipped if NULL */ + skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); + sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); + proto = skw ? skw->proto : (sks ? sks->proto : 0); + dir = s ? s->direction : 0; + + switch (proto) { + case IPPROTO_IPV4: + printf("IPv4"); + break; + case IPPROTO_IPV6: + printf("IPv6"); + break; case IPPROTO_TCP: - printf("TCP "); + printf("TCP"); break; case IPPROTO_UDP: - printf("UDP "); + printf("UDP"); break; case IPPROTO_ICMP: - printf("ICMP "); + printf("ICMP"); break; case IPPROTO_ICMPV6: - printf("ICMPV6 "); + printf("ICMPv6"); break; default: - printf("%u ", s->proto); + printf("%u", skw->proto); + break; + } + switch (dir) { + case PF_IN: + printf(" in"); break; + case PF_OUT: + printf(" out"); + break; + } + if (skw) { + printf(" wire: "); + pf_print_host(&skw->addr[0], skw->port[0], skw->af); + printf(" "); + pf_print_host(&skw->addr[1], skw->port[1], skw->af); + } + if (sks) { + printf(" stack: "); + if (sks != skw) { + pf_print_host(&sks->addr[0], sks->port[0], sks->af); + printf(" "); + pf_print_host(&sks->addr[1], sks->port[1], sks->af); + } else + printf("-"); + } + if (s) { + if (proto == IPPROTO_TCP) { + printf(" [lo=%u high=%u win=%u modulator=%u", + s->src.seqlo, s->src.seqhi, + s->src.max_win, s->src.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", + s->src.wscale & PF_WSCALE_MASK); + printf("]"); + printf(" [lo=%u high=%u win=%u modulator=%u", + s->dst.seqlo, s->dst.seqhi, + s->dst.max_win, s->dst.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", + s->dst.wscale & PF_WSCALE_MASK); + printf("]"); + } + printf(" %u:%u", s->src.state, s->dst.state); } - pf_print_host(&s->lan.addr, s->lan.port, s->af); - printf(" "); - pf_print_host(&s->gwy.addr, s->gwy.port, s->af); - printf(" "); - pf_print_host(&s->ext.addr, s->ext.port, s->af); - printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, - s->src.seqhi, s->src.max_win, s->src.seqdiff); - if (s->src.wscale && s->dst.wscale) - printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK); - printf("]"); - printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo, - s->dst.seqhi, s->dst.max_win, s->dst.seqdiff); - if (s->src.wscale && s->dst.wscale) - printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK); - printf("]"); - printf(" %u:%u", s->src.state, s->dst.state); } void @@ -1528,6 +2002,7 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) return (1); switch (aw1->type) { case PF_ADDR_ADDRMASK: + case PF_ADDR_RANGE: if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) return (1); if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) @@ -1651,12 +2126,13 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, struct pf_addr oia, ooa; PF_ACPY(&oia, ia, af); - PF_ACPY(&ooa, oa, af); + if (oa) + PF_ACPY(&ooa, oa, af); /* Change inner protocol port, fix inner protocol checksum. */ if (ip != NULL) { u_int16_t oip = *ip; - u_int32_t opc = 0; /* make the compiler happy */ + u_int32_t opc; if (pc != NULL) opc = *pc; @@ -1700,31 +2176,33 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, break; #endif /* INET6 */ } - /* Change outer ip address, fix outer ip or icmpv6 checksum. */ - PF_ACPY(oa, na, af); - switch (af) { + /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ + if (oa) { + PF_ACPY(oa, na, af); + switch (af) { #ifdef INET - case AF_INET: - *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, - ooa.addr16[0], oa->addr16[0], 0), - ooa.addr16[1], oa->addr16[1], 0); - break; + case AF_INET: + *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, + ooa.addr16[0], oa->addr16[0], 0), + ooa.addr16[1], oa->addr16[1], 0); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( - pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( - pf_cksum_fixup(pf_cksum_fixup(*ic, - ooa.addr16[0], oa->addr16[0], u), - ooa.addr16[1], oa->addr16[1], u), - ooa.addr16[2], oa->addr16[2], u), - ooa.addr16[3], oa->addr16[3], u), - ooa.addr16[4], oa->addr16[4], u), - ooa.addr16[5], oa->addr16[5], u), - ooa.addr16[6], oa->addr16[6], u), - ooa.addr16[7], oa->addr16[7], u); - break; + case AF_INET6: + *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*ic, + ooa.addr16[0], oa->addr16[0], u), + ooa.addr16[1], oa->addr16[1], u), + ooa.addr16[2], oa->addr16[2], u), + ooa.addr16[3], oa->addr16[3], u), + ooa.addr16[4], oa->addr16[4], u), + ooa.addr16[5], oa->addr16[5], u), + ooa.addr16[6], oa->addr16[6], u), + ooa.addr16[7], oa->addr16[7], u); + break; #endif /* INET6 */ + } } } @@ -1746,7 +2224,7 @@ pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, int copyback = 0, i, olen; struct sackblk sack; -#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) +#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) if (hlen < TCPOLEN_SACKLEN || !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) return 0; @@ -1815,9 +2293,9 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, #endif /* INET6 */ struct tcphdr *th; char *opt; - struct pf_mtag *pf_mtag; - #ifdef __FreeBSD__ + struct pf_mtag *pf_mtag; + KASSERT( #ifdef INET af == AF_INET @@ -1839,7 +2317,7 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, #ifdef INET6 h6 = NULL; #endif -#endif +#endif /* __FreeBSD__ */ /* maximum segment size tcp option */ tlen = sizeof(struct tcphdr); @@ -1865,42 +2343,46 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, return; #ifdef __FreeBSD__ #ifdef MAC - if (replyto) - mac_netinet_firewall_reply(replyto, m); - else - mac_netinet_firewall_send(m); -#else - (void)replyto; -#endif + mac_netinet_firewall_send(m); #endif if ((pf_mtag = pf_get_mtag(m)) == NULL) { m_freem(m); return; } +#endif if (tag) #ifdef __FreeBSD__ m->m_flags |= M_SKIP_FIREWALL; + pf_mtag->tag = rtag; #else - pf_mtag->flags |= PF_TAG_GENERATED; + m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; + m->m_pkthdr.pf.tag = rtag; #endif - pf_mtag->tag = rtag; - if (r != NULL && r->rtableid >= 0) #ifdef __FreeBSD__ { M_SETFIB(m, r->rtableid); -#endif pf_mtag->rtableid = r->rtableid; +#else + m->m_pkthdr.pf.rtableid = r->rtableid; +#endif #ifdef __FreeBSD__ } #endif + #ifdef ALTQ if (r != NULL && r->qid) { +#ifdef __FreeBSD__ pf_mtag->qid = r->qid; + /* add hints for ecn */ - pf_mtag->af = af; pf_mtag->hdr = mtod(m, struct ip *); +#else + m->m_pkthdr.pf.qid = r->qid; + /* add hints for ecn */ + m->m_pkthdr.pf.hdr = mtod(m, struct ip *); +#endif } #endif /* ALTQ */ m->m_data += max_linkhdr; @@ -1966,18 +2448,19 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, #ifdef __FreeBSD__ h->ip_off = V_path_mtu_discovery ? IP_DF : 0; h->ip_len = len; + h->ip_ttl = ttl ? ttl : V_ip_defttl; #else - h->ip_off = htons(ip_mtudisc ? IP_DF : 0); h->ip_len = htons(len); + h->ip_off = htons(ip_mtudisc ? IP_DF : 0); + h->ip_ttl = ttl ? ttl : ip_defttl; #endif - h->ip_ttl = ttl ? ttl : V_ip_defttl; h->ip_sum = 0; if (eh == NULL) { #ifdef __FreeBSD__ - PF_UNLOCK(); - ip_output(m, (void *)NULL, (void *)NULL, 0, - (void *)NULL, (void *)NULL); - PF_LOCK(); + PF_UNLOCK(); + ip_output(m, (void *)NULL, (void *)NULL, 0, + (void *)NULL, (void *)NULL); + PF_LOCK(); #else /* ! __FreeBSD__ */ ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL, (void *)NULL); @@ -2025,23 +2508,23 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); PF_LOCK(); #else - ip6_output(m, NULL, NULL, 0, NULL, NULL); + ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); #endif break; #endif /* INET6 */ } } -void +static void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) { - struct pf_mtag *pf_mtag; struct mbuf *m0; #ifdef __FreeBSD__ #ifdef INET struct ip *ip; #endif + struct pf_mtag *pf_mtag; #endif #ifdef __FreeBSD__ @@ -2049,33 +2532,42 @@ pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, if (m0 == NULL) return; #else - m0 = m_copy(m, 0, M_COPYALL); + if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL) + return; #endif + +#ifdef __FreeBSD__ if ((pf_mtag = pf_get_mtag(m0)) == NULL) return; -#ifdef __FreeBSD__ /* XXX: revisit */ m0->m_flags |= M_SKIP_FIREWALL; #else - pf_mtag->flags |= PF_TAG_GENERATED; + m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; #endif if (r->rtableid >= 0) #ifdef __FreeBSD__ { M_SETFIB(m0, r->rtableid); -#endif pf_mtag->rtableid = r->rtableid; +#else + m0->m_pkthdr.pf.rtableid = r->rtableid; +#endif #ifdef __FreeBSD__ } #endif #ifdef ALTQ if (r->qid) { +#ifdef __FreeBSD__ pf_mtag->qid = r->qid; /* add hints for ecn */ - pf_mtag->af = af; pf_mtag->hdr = mtod(m0, struct ip *); +#else + m0->m_pkthdr.pf.qid = r->qid; + /* add hints for ecn */ + m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); +#endif } #endif /* ALTQ */ @@ -2155,6 +2647,44 @@ pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, } } +/* + * Return 1 if b <= a <= e, otherwise return 0. + */ +int +pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, + struct pf_addr *a, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: + if ((a->addr32[0] < b->addr32[0]) || + (a->addr32[0] > e->addr32[0])) + return (0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + int i; + + /* check a >= b */ + for (i = 0; i < 4; ++i) + if (a->addr32[i] > b->addr32[i]) + break; + else if (a->addr32[i] < b->addr32[i]) + return (0); + /* check a <= e */ + for (i = 0; i < 4; ++i) + if (a->addr32[i] < e->addr32[i]) + break; + else if (a->addr32[i] > e->addr32[i]) + return (0); + break; + } +#endif /* INET6 */ + } + return (1); +} + int pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) { @@ -2206,88 +2736,80 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) return (pf_match(op, a1, a2, g)); } -#ifndef __FreeBSD__ -struct pf_mtag * -pf_find_mtag(struct mbuf *m) -{ - struct m_tag *mtag; - - if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) - return (NULL); - - return ((struct pf_mtag *)(mtag + 1)); -} - -struct pf_mtag * -pf_get_mtag(struct mbuf *m) -{ - struct m_tag *mtag; - - if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) { - mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag), - M_NOWAIT); - if (mtag == NULL) - return (NULL); - bzero(mtag + 1, sizeof(struct pf_mtag)); - m_tag_prepend(m, mtag); - } - - return ((struct pf_mtag *)(mtag + 1)); -} -#endif - int -pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag, - int *tag) +#ifdef __FreeBSD__ +pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, + struct pf_mtag *pf_mtag) +#else +pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) +#endif { if (*tag == -1) +#ifdef __FreeBSD__ *tag = pf_mtag->tag; +#else + *tag = m->m_pkthdr.pf.tag; +#endif return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } int -pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid) +#ifdef __FreeBSD__ +pf_tag_packet(struct mbuf *m, int tag, int rtableid, + struct pf_mtag *pf_mtag) +#else +pf_tag_packet(struct mbuf *m, int tag, int rtableid) +#endif { if (tag <= 0 && rtableid < 0) return (0); - if (pf_mtag == NULL) - if ((pf_mtag = pf_get_mtag(m)) == NULL) - return (1); if (tag > 0) +#ifdef __FreeBSD__ pf_mtag->tag = tag; +#else + m->m_pkthdr.pf.tag = tag; +#endif if (rtableid >= 0) #ifdef __FreeBSD__ { M_SETFIB(m, rtableid); -#endif - pf_mtag->rtableid = rtableid; -#ifdef __FreeBSD__ } +#else + m->m_pkthdr.pf.rtableid = rtableid; #endif return (0); } -static void +void pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, - struct pf_rule **r, struct pf_rule **a, int *match) + struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; (*r)->anchor->match = 0; if (match) *match = 0; +#ifdef __FreeBSD__ + if (*depth >= sizeof(V_pf_anchor_stack) / + sizeof(V_pf_anchor_stack[0])) { +#else if (*depth >= sizeof(pf_anchor_stack) / sizeof(pf_anchor_stack[0])) { +#endif printf("pf_step_into_anchor: stack overflow\n"); *r = TAILQ_NEXT(*r, entries); return; } else if (*depth == 0 && a != NULL) *a = *r; +#ifdef __FreeBSD__ + f = V_pf_anchor_stack + (*depth)++; +#else f = pf_anchor_stack + (*depth)++; +#endif f->rs = *rs; f->r = *r; if ((*r)->anchor_wildcard) { @@ -2316,7 +2838,11 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, do { if (*depth <= 0) break; +#ifdef __FreeBSD__ + f = V_pf_anchor_stack + *depth - 1; +#else f = pf_anchor_stack + *depth - 1; +#endif if (f->parent != NULL && f->child != NULL) { if (f->child->match || (match != NULL && *match)) { @@ -2337,7 +2863,7 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, if (*depth == 0 && a != NULL) *a = NULL; *rs = f->rs; - if (f->r->anchor->match || (match != NULL && *match)) + if (f->r->anchor->match || (match != NULL && *match)) quick = f->r->quick; *r = TAILQ_NEXT(f->r, entries); } while (*r == NULL); @@ -2402,627 +2928,68 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) } #endif /* INET6 */ -#define mix(a,b,c) \ - do { \ - a -= b; a -= c; a ^= (c >> 13); \ - b -= c; b -= a; b ^= (a << 8); \ - c -= a; c -= b; c ^= (b >> 13); \ - a -= b; a -= c; a ^= (c >> 12); \ - b -= c; b -= a; b ^= (a << 16); \ - c -= a; c -= b; c ^= (b >> 5); \ - a -= b; a -= c; a ^= (c >> 3); \ - b -= c; b -= a; b ^= (a << 10); \ - c -= a; c -= b; c ^= (b >> 15); \ - } while (0) - -/* - * hash function based on bridge_hash in if_bridge.c - */ -void -pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, - struct pf_poolhashkey *key, sa_family_t af) -{ - u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; - - switch (af) { -#ifdef INET - case AF_INET: - a += inaddr->addr32[0]; - b += key->key32[1]; - mix(a, b, c); - hash->addr32[0] = c + key->key32[2]; - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - a += inaddr->addr32[0]; - b += inaddr->addr32[2]; - mix(a, b, c); - hash->addr32[0] = c; - a += inaddr->addr32[1]; - b += inaddr->addr32[3]; - c += key->key32[1]; - mix(a, b, c); - hash->addr32[1] = c; - a += inaddr->addr32[2]; - b += inaddr->addr32[1]; - c += key->key32[2]; - mix(a, b, c); - hash->addr32[2] = c; - a += inaddr->addr32[3]; - b += inaddr->addr32[0]; - c += key->key32[3]; - mix(a, b, c); - hash->addr32[3] = c; - break; -#endif /* INET6 */ - } -} - int -pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, - struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) +#ifdef __FreeBSD__ +pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg) +#else +pf_socket_lookup(int direction, struct pf_pdesc *pd) +#endif { - unsigned char hash[16]; - struct pf_pool *rpool = &r->rpool; - struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; - struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; - struct pf_pooladdr *acur = rpool->cur; - struct pf_src_node k; - - if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && - (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { - k.af = af; - PF_ACPY(&k.addr, saddr, af); - if (r->rule_flag & PFRULE_RULESRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) - k.rule.ptr = r; - else - k.rule.ptr = NULL; - pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; - *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); - if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { - PF_ACPY(naddr, &(*sn)->raddr, af); - if (pf_status.debug >= PF_DEBUG_MISC) { - printf("pf_map_addr: src tracking maps "); - pf_print_host(&k.addr, 0, af); - printf(" to "); - pf_print_host(naddr, 0, af); - printf("\n"); - } - return (0); - } - } + struct pf_addr *saddr, *daddr; + u_int16_t sport, dport; +#ifdef __FreeBSD__ + struct inpcbinfo *pi; +#else + struct inpcbtable *tb; +#endif + struct inpcb *inp; + + if (pd == NULL) + return (-1); + pd->lookup.uid = UID_MAX; + pd->lookup.gid = GID_MAX; + pd->lookup.pid = NO_PID; - if (rpool->cur->addr.type == PF_ADDR_NOROUTE) +#ifdef __FreeBSD__ + if (inp_arg != NULL) { + INP_LOCK_ASSERT(inp_arg); + pd->lookup.uid = inp_arg->inp_cred->cr_uid; + pd->lookup.gid = inp_arg->inp_cred->cr_groups[0]; return (1); - if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - switch (af) { -#ifdef INET - case AF_INET: - if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && - (rpool->opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN) - return (1); - raddr = &rpool->cur->addr.p.dyn->pfid_addr4; - rmask = &rpool->cur->addr.p.dyn->pfid_mask4; - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && - (rpool->opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN) - return (1); - raddr = &rpool->cur->addr.p.dyn->pfid_addr6; - rmask = &rpool->cur->addr.p.dyn->pfid_mask6; - break; -#endif /* INET6 */ - } - } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { - if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) - return (1); /* unsupported */ - } else { - raddr = &rpool->cur->addr.v.a.addr; - rmask = &rpool->cur->addr.v.a.mask; } +#endif - switch (rpool->opts & PF_POOL_TYPEMASK) { - case PF_POOL_NONE: - PF_ACPY(naddr, raddr, af); + switch (pd->proto) { + case IPPROTO_TCP: + if (pd->hdr.tcp == NULL) + return (-1); + sport = pd->hdr.tcp->th_sport; + dport = pd->hdr.tcp->th_dport; +#ifdef __FreeBSD__ + pi = &V_tcbinfo; +#else + tb = &tcbtable; +#endif break; - case PF_POOL_BITMASK: - PF_POOLMASK(naddr, raddr, rmask, saddr, af); + case IPPROTO_UDP: + if (pd->hdr.udp == NULL) + return (-1); + sport = pd->hdr.udp->uh_sport; + dport = pd->hdr.udp->uh_dport; +#ifdef __FreeBSD__ + pi = &V_udbinfo; +#else + tb = &udbtable; +#endif break; - case PF_POOL_RANDOM: - if (init_addr != NULL && PF_AZERO(init_addr, af)) { - switch (af) { -#ifdef INET - case AF_INET: - rpool->counter.addr32[0] = htonl(arc4random()); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (rmask->addr32[3] != 0xffffffff) - rpool->counter.addr32[3] = - htonl(arc4random()); - else - break; - if (rmask->addr32[2] != 0xffffffff) - rpool->counter.addr32[2] = - htonl(arc4random()); - else - break; - if (rmask->addr32[1] != 0xffffffff) - rpool->counter.addr32[1] = - htonl(arc4random()); - else - break; - if (rmask->addr32[0] != 0xffffffff) - rpool->counter.addr32[0] = - htonl(arc4random()); - break; -#endif /* INET6 */ - } - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); - PF_ACPY(init_addr, naddr, af); - - } else { - PF_AINC(&rpool->counter, af); - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); - } - break; - case PF_POOL_SRCHASH: - pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); - PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); - break; - case PF_POOL_ROUNDROBIN: - if (rpool->cur->addr.type == PF_ADDR_TABLE) { - if (!pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) - goto get_addr; - } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) - goto get_addr; - } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) - goto get_addr; - - try_next: - if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) - rpool->cur = TAILQ_FIRST(&rpool->list); - if (rpool->cur->addr.type == PF_ADDR_TABLE) { - rpool->tblidx = -1; - if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) { - /* table contains no address of type 'af' */ - if (rpool->cur != acur) - goto try_next; - return (1); - } - } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - rpool->tblidx = -1; - if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) { - /* table contains no address of type 'af' */ - if (rpool->cur != acur) - goto try_next; - return (1); - } - } else { - raddr = &rpool->cur->addr.v.a.addr; - rmask = &rpool->cur->addr.v.a.mask; - PF_ACPY(&rpool->counter, raddr, af); - } - - get_addr: - PF_ACPY(naddr, &rpool->counter, af); - if (init_addr != NULL && PF_AZERO(init_addr, af)) - PF_ACPY(init_addr, naddr, af); - PF_AINC(&rpool->counter, af); - break; - } - if (*sn != NULL) - PF_ACPY(&(*sn)->raddr, naddr, af); - - if (pf_status.debug >= PF_DEBUG_MISC && - (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { - printf("pf_map_addr: selected address "); - pf_print_host(naddr, 0, af); - printf("\n"); - } - - return (0); -} - -int -pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, - struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, - struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, - struct pf_src_node **sn) -{ - struct pf_state_cmp key; - struct pf_addr init_addr; - u_int16_t cut; - - bzero(&init_addr, sizeof(init_addr)); - if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) - return (1); - - if (proto == IPPROTO_ICMP) { - low = 1; - high = 65535; - } - - do { - key.af = af; - key.proto = proto; - PF_ACPY(&key.ext.addr, daddr, key.af); - PF_ACPY(&key.gwy.addr, naddr, key.af); - key.ext.port = dport; - - /* - * port search; start random, step; - * similar 2 portloop in in_pcbbind - */ - if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || - proto == IPPROTO_ICMP)) { - key.gwy.port = dport; - if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) - return (0); - } else if (low == 0 && high == 0) { - key.gwy.port = *nport; - if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) - return (0); - } else if (low == high) { - key.gwy.port = htons(low); - if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) { - *nport = htons(low); - return (0); - } - } else { - u_int16_t tmp; - - if (low > high) { - tmp = low; - low = high; - high = tmp; - } - /* low < high */ - cut = htonl(arc4random()) % (1 + high - low) + low; - /* low <= cut <= high */ - for (tmp = cut; tmp <= high; ++(tmp)) { - key.gwy.port = htons(tmp); - if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == - NULL) { - *nport = htons(tmp); - return (0); - } - } - for (tmp = cut - 1; tmp >= low; --(tmp)) { - key.gwy.port = htons(tmp); - if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == - NULL) { - *nport = htons(tmp); - return (0); - } - } - } - - switch (r->rpool.opts & PF_POOL_TYPEMASK) { - case PF_POOL_RANDOM: - case PF_POOL_ROUNDROBIN: - if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) - return (1); - break; - case PF_POOL_NONE: - case PF_POOL_SRCHASH: - case PF_POOL_BITMASK: - default: - return (1); - } - } while (! PF_AEQ(&init_addr, naddr, af) ); - - return (1); /* none available */ -} - -struct pf_rule * -pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, - int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, - struct pf_addr *daddr, u_int16_t dport, int rs_num) -{ - struct pf_rule *r, *rm = NULL; - struct pf_ruleset *ruleset = NULL; - int tag = -1; - int rtableid = -1; - int asd = 0; - - r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); - while (r && rm == NULL) { - struct pf_rule_addr *src = NULL, *dst = NULL; - struct pf_addr_wrap *xdst = NULL; - - if (r->action == PF_BINAT && direction == PF_IN) { - src = &r->dst; - if (r->rpool.cur != NULL) - xdst = &r->rpool.cur->addr; - } else { - src = &r->src; - dst = &r->dst; - } - - r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) - r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != direction) - r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != pd->af) - r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != pd->proto) - r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, - src->neg, kif)) - r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : - PF_SKIP_DST_ADDR].ptr; - else if (src->port_op && !pf_match_port(src->port_op, - src->port[0], src->port[1], sport)) - r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : - PF_SKIP_DST_PORT].ptr; - else if (dst != NULL && - PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) - r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, - 0, NULL)) - r = TAILQ_NEXT(r, entries); - else if (dst != NULL && dst->port_op && - !pf_match_port(dst->port_op, dst->port[0], - dst->port[1], dport)) - r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != - IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, - off, pd->hdr.tcp), r->os_fingerprint))) - r = TAILQ_NEXT(r, entries); - else { - if (r->tag) - tag = r->tag; - if (r->rtableid >= 0) - rtableid = r->rtableid; - if (r->anchor == NULL) { - rm = r; - } else - pf_step_into_anchor(&asd, &ruleset, rs_num, - &r, NULL, NULL); - } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, - NULL, NULL); - } - if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) - return (NULL); - if (rm != NULL && (rm->action == PF_NONAT || - rm->action == PF_NORDR || rm->action == PF_NOBINAT)) - return (NULL); - return (rm); -} - -struct pf_rule * -pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, - struct pfi_kif *kif, struct pf_src_node **sn, - struct pf_addr *saddr, u_int16_t sport, - struct pf_addr *daddr, u_int16_t dport, - struct pf_addr *naddr, u_int16_t *nport) -{ - struct pf_rule *r = NULL; - - if (direction == PF_OUT) { - r = pf_match_translation(pd, m, off, direction, kif, saddr, - sport, daddr, dport, PF_RULESET_BINAT); - if (r == NULL) - r = pf_match_translation(pd, m, off, direction, kif, - saddr, sport, daddr, dport, PF_RULESET_NAT); - } else { - r = pf_match_translation(pd, m, off, direction, kif, saddr, - sport, daddr, dport, PF_RULESET_RDR); - if (r == NULL) - r = pf_match_translation(pd, m, off, direction, kif, - saddr, sport, daddr, dport, PF_RULESET_BINAT); - } - - if (r != NULL) { - switch (r->action) { - case PF_NONAT: - case PF_NOBINAT: - case PF_NORDR: - return (NULL); - case PF_NAT: - if (pf_get_sport(pd->af, pd->proto, r, saddr, - daddr, dport, naddr, nport, r->rpool.proxy_port[0], - r->rpool.proxy_port[1], sn)) { - DPFPRINTF(PF_DEBUG_MISC, - ("pf: NAT proxy port allocation " - "(%u-%u) failed\n", - r->rpool.proxy_port[0], - r->rpool.proxy_port[1])); - return (NULL); - } - break; - case PF_BINAT: - switch (direction) { - case PF_OUT: - if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ - switch (pd->af) { -#ifdef INET - case AF_INET: - if (r->rpool.cur->addr.p.dyn-> - pfid_acnt4 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->rpool.cur->addr.p.dyn-> - pfid_addr4, - &r->rpool.cur->addr.p.dyn-> - pfid_mask4, - saddr, AF_INET); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (r->rpool.cur->addr.p.dyn-> - pfid_acnt6 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->rpool.cur->addr.p.dyn-> - pfid_addr6, - &r->rpool.cur->addr.p.dyn-> - pfid_mask6, - saddr, AF_INET6); - break; -#endif /* INET6 */ - } - } else - PF_POOLMASK(naddr, - &r->rpool.cur->addr.v.a.addr, - &r->rpool.cur->addr.v.a.mask, - saddr, pd->af); - break; - case PF_IN: - if (r->src.addr.type == PF_ADDR_DYNIFTL) { - switch (pd->af) { -#ifdef INET - case AF_INET: - if (r->src.addr.p.dyn-> - pfid_acnt4 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->src.addr.p.dyn-> - pfid_addr4, - &r->src.addr.p.dyn-> - pfid_mask4, - daddr, AF_INET); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (r->src.addr.p.dyn-> - pfid_acnt6 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->src.addr.p.dyn-> - pfid_addr6, - &r->src.addr.p.dyn-> - pfid_mask6, - daddr, AF_INET6); - break; -#endif /* INET6 */ - } - } else - PF_POOLMASK(naddr, - &r->src.addr.v.a.addr, - &r->src.addr.v.a.mask, daddr, - pd->af); - break; - } - break; - case PF_RDR: { - if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) - return (NULL); - if ((r->rpool.opts & PF_POOL_TYPEMASK) == - PF_POOL_BITMASK) - PF_POOLMASK(naddr, naddr, - &r->rpool.cur->addr.v.a.mask, daddr, - pd->af); - - if (r->rpool.proxy_port[1]) { - u_int32_t tmp_nport; - - tmp_nport = ((ntohs(dport) - - ntohs(r->dst.port[0])) % - (r->rpool.proxy_port[1] - - r->rpool.proxy_port[0] + 1)) + - r->rpool.proxy_port[0]; - - /* wrap around if necessary */ - if (tmp_nport > 65535) - tmp_nport -= 65535; - *nport = htons((u_int16_t)tmp_nport); - } else if (r->rpool.proxy_port[0]) - *nport = htons(r->rpool.proxy_port[0]); - break; - } - default: - return (NULL); - } - } - - return (r); -} - -int -#ifdef __FreeBSD__ -pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg) -#else -pf_socket_lookup(int direction, struct pf_pdesc *pd) -#endif -{ - struct pf_addr *saddr, *daddr; - u_int16_t sport, dport; -#ifdef __FreeBSD__ - struct inpcbinfo *pi; -#else - struct inpcbtable *tb; -#endif - struct inpcb *inp; - - if (pd == NULL) - return (-1); - pd->lookup.uid = UID_MAX; - pd->lookup.gid = GID_MAX; - pd->lookup.pid = NO_PID; /* XXX: revisit */ -#ifdef __FreeBSD__ - if (inp_arg != NULL) { - INP_LOCK_ASSERT(inp_arg); - pd->lookup.uid = inp_arg->inp_cred->cr_uid; - pd->lookup.gid = inp_arg->inp_cred->cr_groups[0]; - return (1); - } -#endif - switch (pd->proto) { - case IPPROTO_TCP: - if (pd->hdr.tcp == NULL) - return (-1); - sport = pd->hdr.tcp->th_sport; - dport = pd->hdr.tcp->th_dport; -#ifdef __FreeBSD__ - pi = &V_tcbinfo; -#else - tb = &tcbtable; -#endif - break; - case IPPROTO_UDP: - if (pd->hdr.udp == NULL) - return (-1); - sport = pd->hdr.udp->uh_sport; - dport = pd->hdr.udp->uh_dport; -#ifdef __FreeBSD__ - pi = &V_udbinfo; -#else - tb = &udbtable; -#endif - break; - default: - return (-1); - } - if (direction == PF_IN) { - saddr = pd->src; - daddr = pd->dst; - } else { - u_int16_t p; + default: + return (-1); + } + if (direction == PF_IN) { + saddr = pd->src; + daddr = pd->dst; + } else { + u_int16_t p; p = sport; sport = dport; @@ -3050,7 +3017,8 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) #else inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport); if (inp == NULL) { - inp = in_pcblookup_listen(tb, daddr->v4, dport, 0); + inp = in_pcblookup_listen(tb, daddr->v4, dport, 0, + NULL); if (inp == NULL) return (-1); } @@ -3077,7 +3045,8 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, dport); if (inp == NULL) { - inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0); + inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0, + NULL); if (inp == NULL) return (-1); } @@ -3147,7 +3116,11 @@ pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) int hlen; u_int8_t hdr[60]; u_int8_t *opt, optlen; +#ifdef __FreeBSD__ u_int16_t mss = V_tcp_mssdflt; +#else + u_int16_t mss = tcp_mssdflt; +#endif hlen = th_off << 2; /* hlen <= sizeof(hdr) */ if (hlen <= sizeof(struct tcphdr)) @@ -3191,8 +3164,13 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) struct route_in6 ro6; #endif /* INET6 */ struct rtentry *rt = NULL; - int hlen = 0; /* make the compiler happy */ +#ifdef __FreeBSD__ + int hlen = 0; u_int16_t mss = V_tcp_mssdflt; +#else + int hlen; + u_int16_t mss = tcp_mssdflt; +#endif switch (af) { #ifdef INET @@ -3240,7 +3218,11 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) if (rt && rt->rt_ifp) { mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); +#ifdef __FreeBSD__ mss = max(V_tcp_mssdflt, mss); +#else + mss = max(tcp_mssdflt, mss); +#endif RTFREE(rt); } mss = min(mss, offer); @@ -3252,55 +3234,113 @@ void pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) { struct pf_rule *r = s->rule.ptr; + struct pf_src_node *sn = NULL; s->rt_kif = NULL; if (!r->rt || r->rt == PF_FASTROUTE) return; - switch (s->af) { + switch (s->key[PF_SK_WIRE]->af) { #ifdef INET case AF_INET: - pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, - &s->nat_src_node); + pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn); s->rt_kif = r->rpool.cur->kif; break; #endif /* INET */ #ifdef INET6 case AF_INET6: - pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, - &s->nat_src_node); + pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn); s->rt_kif = r->rpool.cur->kif; break; #endif /* INET6 */ } } +u_int32_t +pf_tcp_iss(struct pf_pdesc *pd) +{ + MD5_CTX ctx; + u_int32_t digest[4]; + +#ifdef __FreeBSD__ + if (V_pf_tcp_secret_init == 0) { + read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); + MD5Init(&V_pf_tcp_secret_ctx); + MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret, + sizeof(V_pf_tcp_secret)); + V_pf_tcp_secret_init = 1; + } + + ctx = V_pf_tcp_secret_ctx; +#else + if (pf_tcp_secret_init == 0) { + arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); + MD5Init(&pf_tcp_secret_ctx); + MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, + sizeof(pf_tcp_secret)); + pf_tcp_secret_init = 1; + } + + ctx = pf_tcp_secret_ctx; +#endif + + MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); + MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); + if (pd->af == AF_INET6) { + MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr)); + MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr)); + } else { + MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr)); + MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); + } + MD5Final((u_char *)digest, &ctx); +#ifdef __FreeBSD__ + V_pf_tcp_iss_off += 4096; +#define ISN_RANDOM_INCREMENT (4096 - 1) + return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) + + V_pf_tcp_iss_off); +#undef ISN_RANDOM_INCREMENT +#else + pf_tcp_iss_off += 4096; + return (digest[0] + tcp_iss + pf_tcp_iss_off); +#endif +} + int -pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, +pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, -#ifdef __FreeBSD__ struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, +#ifdef __FreeBSD__ struct ifqueue *ifq, struct inpcb *inp) #else - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq) #endif { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; - struct tcphdr *th = pd->hdr.tcp; - u_int16_t bport, nport = 0; sa_family_t af = pd->af; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; + struct tcphdr *th = pd->hdr.tcp; + struct pf_state_key *skw = NULL, *sks = NULL; + struct pf_state_key *sk = NULL, *nk = NULL; u_short reason; - int rewrite = 0; + int rewrite = 0, hdrlen = 0; int tag = -1, rtableid = -1; - u_int16_t mss = V_tcp_mssdflt; int asd = 0; int match = 0; + int state_icmp = 0; +#ifdef __FreeBSD__ + u_int16_t sport = 0, dport = 0; + u_int16_t bproto_sum = 0, bip_sum = 0; +#else + u_int16_t sport, dport; + u_int16_t bproto_sum = 0, bip_sum; +#endif + u_int8_t icmptype = 0, icmpcode = 0; + - if (pf_check_congestion(ifq)) { + if (direction == PF_IN && pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); return (PF_DROP); } @@ -3308,746 +3348,33 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, #ifdef __FreeBSD__ if (inp != NULL) pd->lookup.done = pf_socket_lookup(direction, pd, inp); - else if (debug_pfugidhack) { + else if (V_debug_pfugidhack) { PF_UNLOCK(); DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); - pd->lookup.done = pf_socket_lookup(direction, pd, inp); + pd->lookup.done = pf_socket_lookup(direction, pd, inp); PF_LOCK(); } #endif - r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - - if (direction == PF_OUT) { - bport = nport = th->th_sport; - /* check outgoing packet for BINAT/NAT */ - if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, th->th_sport, daddr, th->th_dport, - &pd->naddr, &nport)) != NULL) { - PF_ACPY(&pd->baddr, saddr, af); - pf_change_ap(saddr, &th->th_sport, pd->ip_sum, - &th->th_sum, &pd->naddr, nport, 0, af); - rewrite++; - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } else { - bport = nport = th->th_dport; - /* check incoming packet for BINAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, th->th_sport, daddr, th->th_dport, - &pd->naddr, &nport)) != NULL) { - PF_ACPY(&pd->baddr, daddr, af); - pf_change_ap(daddr, &th->th_dport, pd->ip_sum, - &th->th_sum, &pd->naddr, nport, 0, af); - rewrite++; - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } - - while (r != NULL) { - r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) - r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != direction) - r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != af) - r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != IPPROTO_TCP) - r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, - r->src.neg, kif)) - r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (r->src.port_op && !pf_match_port(r->src.port_op, - r->src.port[0], r->src.port[1], th->th_sport)) - r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, - r->dst.neg, NULL)) - r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->dst.port_op && !pf_match_port(r->dst.port_op, - r->dst.port[0], r->dst.port[1], th->th_dport)) - r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos == pd->tos)) - r = TAILQ_NEXT(r, entries); - else if (r->rule_flag & PFRULE_FRAGMENT) - r = TAILQ_NEXT(r, entries); - else if ((r->flagset & th->th_flags) != r->flags) - r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = -#ifdef __FreeBSD__ - pf_socket_lookup(direction, pd, inp), 1)) && -#else - pf_socket_lookup(direction, pd), 1)) && -#endif - !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - pd->lookup.uid)) - r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = -#ifdef __FreeBSD__ - pf_socket_lookup(direction, pd, inp), 1)) && -#else - pf_socket_lookup(direction, pd), 1)) && -#endif - !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - pd->lookup.gid)) - r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= arc4random()) - r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( - pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) - r = TAILQ_NEXT(r, entries); - else { - if (r->tag) - tag = r->tag; - if (r->rtableid >= 0) - rtableid = r->rtableid; - if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; - if ((*rm)->quick) - break; - r = TAILQ_NEXT(r, entries); - } else - pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match); - } - if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match)) - break; - } - r = *rm; - a = *am; - ruleset = *rsm; - - REASON_SET(&reason, PFRES_MATCH); - - if (r->log || (nr != NULL && nr->natpass && nr->log)) { - if (rewrite) -#ifdef __FreeBSD__ - m_copyback(m, off, sizeof(*th), (caddr_t)th); -#else - m_copyback(m, off, sizeof(*th), th); -#endif - PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, - a, ruleset, pd); - } - - if ((r->action == PF_DROP) && - ((r->rule_flag & PFRULE_RETURNRST) || - (r->rule_flag & PFRULE_RETURNICMP) || - (r->rule_flag & PFRULE_RETURN))) { - /* undo NAT changes, if they have taken place */ - if (nr != NULL) { - if (direction == PF_OUT) { - pf_change_ap(saddr, &th->th_sport, pd->ip_sum, - &th->th_sum, &pd->baddr, bport, 0, af); - rewrite++; - } else { - pf_change_ap(daddr, &th->th_dport, pd->ip_sum, - &th->th_sum, &pd->baddr, bport, 0, af); - rewrite++; - } - } - if (((r->rule_flag & PFRULE_RETURNRST) || - (r->rule_flag & PFRULE_RETURN)) && - !(th->th_flags & TH_RST)) { - u_int32_t ack = ntohl(th->th_seq) + pd->p_len; - - if (th->th_flags & TH_SYN) - ack++; - if (th->th_flags & TH_FIN) - ack++; -#ifdef __FreeBSD__ - pf_send_tcp(m, r, af, pd->dst, -#else - pf_send_tcp(r, af, pd->dst, -#endif - pd->src, th->th_dport, th->th_sport, - ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); - } else if ((af == AF_INET) && r->return_icmp) - pf_send_icmp(m, r->return_icmp >> 8, - r->return_icmp & 255, af, r); - else if ((af == AF_INET6) && r->return_icmp6) - pf_send_icmp(m, r->return_icmp6 >> 8, - r->return_icmp6 & 255, af, r); - } - - if (r->action == PF_DROP) - return (PF_DROP); - - if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { - REASON_SET(&reason, PFRES_MEMORY); - return (PF_DROP); - } - - if (r->keep_state || nr != NULL || - (pd->flags & PFDESC_TCP_NORM)) { - /* create new state */ - u_int16_t len; - struct pf_state *s = NULL; - struct pf_src_node *sn = NULL; - - len = pd->tot_len - off - (th->th_off << 2); - - /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) { - pf_status.lcounters[LCNT_STATES]++; - REASON_SET(&reason, PFRES_MAXSTATES); - goto cleanup; - } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - ((direction == PF_OUT && - pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - s = pool_get(&pf_state_pl, PR_NOWAIT); - if (s == NULL) { - REASON_SET(&reason, PFRES_MEMORY); -cleanup: - if (sn != NULL && sn->states == 0 && sn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); - } - if (nsn != sn && nsn != NULL && nsn->states == 0 && - nsn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); - } - return (PF_DROP); - } - bzero(s, sizeof(*s)); - s->rule.ptr = r; - s->nat_rule.ptr = nr; - s->anchor.ptr = a; - STATE_INC_COUNTERS(s); - if (r->allow_opts) - s->state_flags |= PFSTATE_ALLOWOPTS; - if (r->rule_flag & PFRULE_STATESLOPPY) - s->state_flags |= PFSTATE_SLOPPY; - s->log = r->log & PF_LOG_ALL; - if (nr != NULL) - s->log |= nr->log & PF_LOG_ALL; - s->proto = IPPROTO_TCP; - s->direction = direction; - s->af = af; - if (direction == PF_OUT) { - PF_ACPY(&s->gwy.addr, saddr, af); - s->gwy.port = th->th_sport; /* sport */ - PF_ACPY(&s->ext.addr, daddr, af); - s->ext.port = th->th_dport; - if (nr != NULL) { - PF_ACPY(&s->lan.addr, &pd->baddr, af); - s->lan.port = bport; - } else { - PF_ACPY(&s->lan.addr, &s->gwy.addr, af); - s->lan.port = s->gwy.port; - } - } else { - PF_ACPY(&s->lan.addr, daddr, af); - s->lan.port = th->th_dport; - PF_ACPY(&s->ext.addr, saddr, af); - s->ext.port = th->th_sport; - if (nr != NULL) { - PF_ACPY(&s->gwy.addr, &pd->baddr, af); - s->gwy.port = bport; - } else { - PF_ACPY(&s->gwy.addr, &s->lan.addr, af); - s->gwy.port = s->lan.port; - } - } - - s->src.seqlo = ntohl(th->th_seq); - s->src.seqhi = s->src.seqlo + len + 1; - if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && - r->keep_state == PF_STATE_MODULATE) { - /* Generate sequence number modulator */ -#ifdef __FreeBSD__ - while ((s->src.seqdiff = - pf_new_isn(s) - s->src.seqlo) == 0) - ; -#else - while ((s->src.seqdiff = - tcp_rndiss_next() - s->src.seqlo) == 0) - ; -#endif - pf_change_a(&th->th_seq, &th->th_sum, - htonl(s->src.seqlo + s->src.seqdiff), 0); - rewrite = 1; - } else - s->src.seqdiff = 0; - if (th->th_flags & TH_SYN) { - s->src.seqhi++; - s->src.wscale = pf_get_wscale(m, off, th->th_off, af); - } - s->src.max_win = MAX(ntohs(th->th_win), 1); - if (s->src.wscale & PF_WSCALE_MASK) { - /* Remove scale factor from initial window */ - int win = s->src.max_win; - win += 1 << (s->src.wscale & PF_WSCALE_MASK); - s->src.max_win = (win - 1) >> - (s->src.wscale & PF_WSCALE_MASK); - } - if (th->th_flags & TH_FIN) - s->src.seqhi++; - s->dst.seqhi = 1; - s->dst.max_win = 1; - s->src.state = TCPS_SYN_SENT; - s->dst.state = TCPS_CLOSED; - s->creation = time_second; - s->expire = time_second; - s->timeout = PFTM_TCP_FIRST_PACKET; - pf_set_rt_ifp(s, saddr); - if (sn != NULL) { - s->src_node = sn; - s->src_node->states++; - } - if (nsn != NULL) { - PF_ACPY(&nsn->raddr, &pd->naddr, af); - s->nat_src_node = nsn; - s->nat_src_node->states++; - } - if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, - off, pd, th, &s->src, &s->dst)) { - REASON_SET(&reason, PFRES_MEMORY); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } - if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && - pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, - &s->src, &s->dst, &rewrite)) { - /* This really shouldn't happen!!! */ - DPFPRINTF(PF_DEBUG_URGENT, - ("pf_normalize_tcp_stateful failed on first pkt")); - pf_normalize_tcp_cleanup(s); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } - if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - pf_normalize_tcp_cleanup(s); - REASON_SET(&reason, PFRES_STATEINS); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } else - *sm = s; - if (tag > 0) { - pf_tag_ref(tag); - s->tag = tag; - } - if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && - r->keep_state == PF_STATE_SYNPROXY) { - s->src.state = PF_TCPS_PROXY_SRC; - if (nr != NULL) { - if (direction == PF_OUT) { - pf_change_ap(saddr, &th->th_sport, - pd->ip_sum, &th->th_sum, &pd->baddr, - bport, 0, af); - } else { - pf_change_ap(daddr, &th->th_dport, - pd->ip_sum, &th->th_sum, &pd->baddr, - bport, 0, af); - } - } - s->src.seqhi = htonl(arc4random()); - /* Find mss option */ - mss = pf_get_mss(m, off, th->th_off, af); - mss = pf_calc_mss(saddr, af, mss); - mss = pf_calc_mss(daddr, af, mss); - s->src.mss = mss; -#ifdef __FreeBSD__ - pf_send_tcp(NULL, r, af, daddr, saddr, th->th_dport, -#else - pf_send_tcp(r, af, daddr, saddr, th->th_dport, -#endif - th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); - REASON_SET(&reason, PFRES_SYNPROXY); - return (PF_SYNPROXY_DROP); - } - } - - /* copy back packet headers if we performed NAT operations */ - if (rewrite) - m_copyback(m, off, sizeof(*th), (caddr_t)th); - - return (PF_PASS); -} - -int -pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, - struct pfi_kif *kif, struct mbuf *m, int off, void *h, -#ifdef __FreeBSD__ - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, - struct ifqueue *ifq, struct inpcb *inp) -#else - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, - struct ifqueue *ifq) -#endif -{ - struct pf_rule *nr = NULL; - struct pf_addr *saddr = pd->src, *daddr = pd->dst; - struct udphdr *uh = pd->hdr.udp; - u_int16_t bport, nport = 0; - sa_family_t af = pd->af; - struct pf_rule *r, *a = NULL; - struct pf_ruleset *ruleset = NULL; - struct pf_src_node *nsn = NULL; - u_short reason; - int rewrite = 0; - int tag = -1, rtableid = -1; - int asd = 0; - int match = 0; - - if (pf_check_congestion(ifq)) { - REASON_SET(&reason, PFRES_CONGEST); - return (PF_DROP); - } - -#ifdef __FreeBSD__ - if (inp != NULL) - pd->lookup.done = pf_socket_lookup(direction, pd, inp); - else if (debug_pfugidhack) { - PF_UNLOCK(); - DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); - pd->lookup.done = pf_socket_lookup(direction, pd, inp); - PF_LOCK(); - } -#endif - - r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - - if (direction == PF_OUT) { - bport = nport = uh->uh_sport; - /* check outgoing packet for BINAT/NAT */ - if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, uh->uh_sport, daddr, uh->uh_dport, - &pd->naddr, &nport)) != NULL) { - PF_ACPY(&pd->baddr, saddr, af); - pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum, - &uh->uh_sum, &pd->naddr, nport, 1, af); - rewrite++; - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } else { - bport = nport = uh->uh_dport; - /* check incoming packet for BINAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr, - &nport)) != NULL) { - PF_ACPY(&pd->baddr, daddr, af); - pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum, - &uh->uh_sum, &pd->naddr, nport, 1, af); - rewrite++; - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } - - while (r != NULL) { - r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) - r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != direction) - r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != af) - r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != IPPROTO_UDP) - r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, - r->src.neg, kif)) - r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (r->src.port_op && !pf_match_port(r->src.port_op, - r->src.port[0], r->src.port[1], uh->uh_sport)) - r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, - r->dst.neg, NULL)) - r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->dst.port_op && !pf_match_port(r->dst.port_op, - r->dst.port[0], r->dst.port[1], uh->uh_dport)) - r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos == pd->tos)) - r = TAILQ_NEXT(r, entries); - else if (r->rule_flag & PFRULE_FRAGMENT) - r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = -#ifdef __FreeBSD__ - pf_socket_lookup(direction, pd, inp), 1)) && -#else - pf_socket_lookup(direction, pd), 1)) && -#endif - !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - pd->lookup.uid)) - r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = -#ifdef __FreeBSD__ - pf_socket_lookup(direction, pd, inp), 1)) && -#else - pf_socket_lookup(direction, pd), 1)) && -#endif - !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - pd->lookup.gid)) - r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= arc4random()) - r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->os_fingerprint != PF_OSFP_ANY) - r = TAILQ_NEXT(r, entries); - else { - if (r->tag) - tag = r->tag; - if (r->rtableid >= 0) - rtableid = r->rtableid; - if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; - if ((*rm)->quick) - break; - r = TAILQ_NEXT(r, entries); - } else - pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match); - } - if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match)) - break; - } - r = *rm; - a = *am; - ruleset = *rsm; - - REASON_SET(&reason, PFRES_MATCH); - - if (r->log || (nr != NULL && nr->natpass && nr->log)) { - if (rewrite) -#ifdef __FreeBSD__ - m_copyback(m, off, sizeof(*uh), (caddr_t)uh); -#else - m_copyback(m, off, sizeof(*uh), uh); -#endif - PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, - a, ruleset, pd); - } - - if ((r->action == PF_DROP) && - ((r->rule_flag & PFRULE_RETURNICMP) || - (r->rule_flag & PFRULE_RETURN))) { - /* undo NAT changes, if they have taken place */ - if (nr != NULL) { - if (direction == PF_OUT) { - pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum, - &uh->uh_sum, &pd->baddr, bport, 1, af); - rewrite++; - } else { - pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum, - &uh->uh_sum, &pd->baddr, bport, 1, af); - rewrite++; - } - } - if ((af == AF_INET) && r->return_icmp) - pf_send_icmp(m, r->return_icmp >> 8, - r->return_icmp & 255, af, r); - else if ((af == AF_INET6) && r->return_icmp6) - pf_send_icmp(m, r->return_icmp6 >> 8, - r->return_icmp6 & 255, af, r); - } - - if (r->action == PF_DROP) - return (PF_DROP); - - if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { - REASON_SET(&reason, PFRES_MEMORY); - return (PF_DROP); - } - - if (r->keep_state || nr != NULL) { - /* create new state */ - struct pf_state *s = NULL; - struct pf_src_node *sn = NULL; - - /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) { - pf_status.lcounters[LCNT_STATES]++; - REASON_SET(&reason, PFRES_MAXSTATES); - goto cleanup; - } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - ((direction == PF_OUT && - pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - s = pool_get(&pf_state_pl, PR_NOWAIT); - if (s == NULL) { - REASON_SET(&reason, PFRES_MEMORY); -cleanup: - if (sn != NULL && sn->states == 0 && sn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); - } - if (nsn != sn && nsn != NULL && nsn->states == 0 && - nsn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); - } - return (PF_DROP); - } - bzero(s, sizeof(*s)); - s->rule.ptr = r; - s->nat_rule.ptr = nr; - s->anchor.ptr = a; - STATE_INC_COUNTERS(s); - if (r->allow_opts) - s->state_flags |= PFSTATE_ALLOWOPTS; - if (r->rule_flag & PFRULE_STATESLOPPY) - s->state_flags |= PFSTATE_SLOPPY; - s->log = r->log & PF_LOG_ALL; - if (nr != NULL) - s->log |= nr->log & PF_LOG_ALL; - s->proto = IPPROTO_UDP; - s->direction = direction; - s->af = af; - if (direction == PF_OUT) { - PF_ACPY(&s->gwy.addr, saddr, af); - s->gwy.port = uh->uh_sport; - PF_ACPY(&s->ext.addr, daddr, af); - s->ext.port = uh->uh_dport; - if (nr != NULL) { - PF_ACPY(&s->lan.addr, &pd->baddr, af); - s->lan.port = bport; - } else { - PF_ACPY(&s->lan.addr, &s->gwy.addr, af); - s->lan.port = s->gwy.port; - } - } else { - PF_ACPY(&s->lan.addr, daddr, af); - s->lan.port = uh->uh_dport; - PF_ACPY(&s->ext.addr, saddr, af); - s->ext.port = uh->uh_sport; - if (nr != NULL) { - PF_ACPY(&s->gwy.addr, &pd->baddr, af); - s->gwy.port = bport; - } else { - PF_ACPY(&s->gwy.addr, &s->lan.addr, af); - s->gwy.port = s->lan.port; - } - } - s->src.state = PFUDPS_SINGLE; - s->dst.state = PFUDPS_NO_TRAFFIC; - s->creation = time_second; - s->expire = time_second; - s->timeout = PFTM_UDP_FIRST_PACKET; - pf_set_rt_ifp(s, saddr); - if (sn != NULL) { - s->src_node = sn; - s->src_node->states++; - } - if (nsn != NULL) { - PF_ACPY(&nsn->raddr, &pd->naddr, af); - s->nat_src_node = nsn; - s->nat_src_node->states++; - } - if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_STATEINS); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } else - *sm = s; - if (tag > 0) { - pf_tag_ref(tag); - s->tag = tag; - } - } - - /* copy back packet headers if we performed NAT operations */ - if (rewrite) - m_copyback(m, off, sizeof(*uh), (caddr_t)uh); - - return (PF_PASS); -} - -int -pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, - struct pfi_kif *kif, struct mbuf *m, int off, void *h, - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, - struct ifqueue *ifq) -{ - struct pf_rule *nr = NULL; - struct pf_addr *saddr = pd->src, *daddr = pd->dst; - struct pf_rule *r, *a = NULL; - struct pf_ruleset *ruleset = NULL; - struct pf_src_node *nsn = NULL; - u_short reason; - u_int16_t icmpid = 0, bport, nport = 0; - sa_family_t af = pd->af; - u_int8_t icmptype = 0; /* make the compiler happy */ - u_int8_t icmpcode = 0; /* make the compiler happy */ - int state_icmp = 0; - int tag = -1, rtableid = -1; -#ifdef INET6 - int rewrite = 0; -#endif /* INET6 */ - int asd = 0; - int match = 0; - - if (pf_check_congestion(ifq)) { - REASON_SET(&reason, PFRES_CONGEST); - return (PF_DROP); - } - switch (pd->proto) { + case IPPROTO_TCP: + sport = th->th_sport; + dport = th->th_dport; + hdrlen = sizeof(*th); + break; + case IPPROTO_UDP: + sport = pd->hdr.udp->uh_sport; + dport = pd->hdr.udp->uh_dport; + hdrlen = sizeof(*pd->hdr.udp); + break; #ifdef INET case IPPROTO_ICMP: + if (pd->af != AF_INET) + break; + sport = dport = pd->hdr.icmp->icmp_id; + hdrlen = sizeof(*pd->hdr.icmp); icmptype = pd->hdr.icmp->icmp_type; icmpcode = pd->hdr.icmp->icmp_code; - icmpid = pd->hdr.icmp->icmp_id; if (icmptype == ICMP_UNREACH || icmptype == ICMP_SOURCEQUENCH || @@ -4059,9 +3386,12 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: + if (af != AF_INET6) + break; + sport = dport = pd->hdr.icmp6->icmp6_id; + hdrlen = sizeof(*pd->hdr.icmp6); icmptype = pd->hdr.icmp6->icmp6_type; icmpcode = pd->hdr.icmp6->icmp6_code; - icmpid = pd->hdr.icmp6->icmp6_id; if (icmptype == ICMP6_DST_UNREACH || icmptype == ICMP6_PACKET_TOO_BIG || @@ -4070,67 +3400,141 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, state_icmp++; break; #endif /* INET6 */ + default: + sport = dport = hdrlen = 0; + break; } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - if (direction == PF_OUT) { - bport = nport = icmpid; - /* check outgoing packet for BINAT/NAT */ - if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) != - NULL) { - PF_ACPY(&pd->baddr, saddr, af); - switch (af) { + /* check packet for BINAT/NAT/RDR */ + if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, + &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) { + if (nk == NULL || sk == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + goto cleanup; + } + + if (pd->ip_sum) + bip_sum = *pd->ip_sum; + + switch (pd->proto) { + case IPPROTO_TCP: + bproto_sum = th->th_sum; + pd->proto_sum = &th->th_sum; + + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || + nk->port[pd->sidx] != sport) { + pf_change_ap(saddr, &th->th_sport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, af); + pd->sport = &th->th_sport; + sport = th->th_sport; + } + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || + nk->port[pd->didx] != dport) { + pf_change_ap(daddr, &th->th_dport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, af); + dport = th->th_dport; + pd->dport = &th->th_dport; + } + rewrite++; + break; + case IPPROTO_UDP: + bproto_sum = pd->hdr.udp->uh_sum; + pd->proto_sum = &pd->hdr.udp->uh_sum; + + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || + nk->port[pd->sidx] != sport) { + pf_change_ap(saddr, &pd->hdr.udp->uh_sport, + pd->ip_sum, &pd->hdr.udp->uh_sum, + &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, af); + sport = pd->hdr.udp->uh_sport; + pd->sport = &pd->hdr.udp->uh_sport; + } + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || + nk->port[pd->didx] != dport) { + pf_change_ap(daddr, &pd->hdr.udp->uh_dport, + pd->ip_sum, &pd->hdr.udp->uh_sum, + &nk->addr[pd->didx], + nk->port[pd->didx], 1, af); + dport = pd->hdr.udp->uh_dport; + pd->dport = &pd->hdr.udp->uh_dport; + } + rewrite++; + break; #ifdef INET - case AF_INET: + case IPPROTO_ICMP: + nk->port[0] = nk->port[1]; + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&saddr->v4.s_addr, pd->ip_sum, - pd->naddr.v4.s_addr, 0); + nk->addr[pd->sidx].v4.s_addr, 0); + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) + pf_change_a(&daddr->v4.s_addr, pd->ip_sum, + nk->addr[pd->didx].v4.s_addr, 0); + + if (nk->port[1] != pd->hdr.icmp->icmp_id) { pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( - pd->hdr.icmp->icmp_cksum, icmpid, nport, 0); - pd->hdr.icmp->icmp_id = nport; - m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); - break; + pd->hdr.icmp->icmp_cksum, sport, + nk->port[1], 0); + pd->hdr.icmp->icmp_id = nk->port[1]; + pd->sport = &pd->hdr.icmp->icmp_id; + } + m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: + case IPPROTO_ICMPV6: + nk->port[0] = nk->port[1]; + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, - &pd->naddr, 0); - rewrite++; - break; -#endif /* INET6 */ - } - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } else { - bport = nport = icmpid; - /* check incoming packet for BINAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) != - NULL) { - PF_ACPY(&pd->baddr, daddr, af); + &nk->addr[pd->sidx], 0); + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) + pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, + &nk->addr[pd->didx], 0); + rewrite++; + break; +#endif /* INET */ + default: switch (af) { #ifdef INET case AF_INET: - pf_change_a(&daddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); + if (PF_ANEQ(saddr, + &nk->addr[pd->sidx], AF_INET)) + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + nk->addr[pd->sidx].v4.s_addr, 0); + + if (PF_ANEQ(daddr, + &nk->addr[pd->didx], AF_INET)) + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, + nk->addr[pd->didx].v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: - pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, - &pd->naddr, 0); - rewrite++; + if (PF_ANEQ(saddr, + &nk->addr[pd->sidx], AF_INET6)) + PF_ACPY(saddr, &nk->addr[pd->sidx], af); + + if (PF_ANEQ(daddr, + &nk->addr[pd->didx], AF_INET6)) + PF_ACPY(saddr, &nk->addr[pd->didx], af); break; -#endif /* INET6 */ +#endif /* INET */ } - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; + break; } + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; } while (r != NULL) { @@ -4146,284 +3550,67 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; + /* tcp/udp only. port_op always 0 in other cases */ + else if (r->src.port_op && !pf_match_port(r->src.port_op, + r->src.port[0], r->src.port[1], sport)) + r = r->skip[PF_SKIP_SRC_PORT].ptr; else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; + /* tcp/udp only. port_op always 0 in other cases */ + else if (r->dst.port_op && !pf_match_port(r->dst.port_op, + r->dst.port[0], r->dst.port[1], dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; + /* icmp only. type always 0 in other cases */ else if (r->type && r->type != icmptype + 1) r = TAILQ_NEXT(r, entries); + /* icmp only. type always 0 in other cases */ else if (r->code && r->code != icmpcode + 1) r = TAILQ_NEXT(r, entries); else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= arc4random()) - r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->os_fingerprint != PF_OSFP_ANY) + else if (pd->proto == IPPROTO_TCP && + (r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); - else { - if (r->tag) - tag = r->tag; - if (r->rtableid >= 0) - rtableid = r->rtableid; - if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; - if ((*rm)->quick) - break; - r = TAILQ_NEXT(r, entries); - } else - pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match); - } - if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a, &match)) - break; - } - r = *rm; - a = *am; - ruleset = *rsm; - - REASON_SET(&reason, PFRES_MATCH); - - if (r->log || (nr != NULL && nr->natpass && nr->log)) { -#ifdef INET6 - if (rewrite) - m_copyback(m, off, sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); -#endif /* INET6 */ - PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, - a, ruleset, pd); - } - - if (r->action != PF_PASS) - return (PF_DROP); - - if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { - REASON_SET(&reason, PFRES_MEMORY); - return (PF_DROP); - } - - if (!state_icmp && (r->keep_state || nr != NULL)) { - /* create new state */ - struct pf_state *s = NULL; - struct pf_src_node *sn = NULL; - - /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) { - pf_status.lcounters[LCNT_STATES]++; - REASON_SET(&reason, PFRES_MAXSTATES); - goto cleanup; - } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - ((direction == PF_OUT && - pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - s = pool_get(&pf_state_pl, PR_NOWAIT); - if (s == NULL) { - REASON_SET(&reason, PFRES_MEMORY); -cleanup: - if (sn != NULL && sn->states == 0 && sn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); - } - if (nsn != sn && nsn != NULL && nsn->states == 0 && - nsn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); - } - return (PF_DROP); - } - bzero(s, sizeof(*s)); - s->rule.ptr = r; - s->nat_rule.ptr = nr; - s->anchor.ptr = a; - STATE_INC_COUNTERS(s); - if (r->allow_opts) - s->state_flags |= PFSTATE_ALLOWOPTS; - if (r->rule_flag & PFRULE_STATESLOPPY) - s->state_flags |= PFSTATE_SLOPPY; - s->log = r->log & PF_LOG_ALL; - if (nr != NULL) - s->log |= nr->log & PF_LOG_ALL; - s->proto = pd->proto; - s->direction = direction; - s->af = af; - if (direction == PF_OUT) { - PF_ACPY(&s->gwy.addr, saddr, af); - s->gwy.port = nport; - PF_ACPY(&s->ext.addr, daddr, af); - s->ext.port = 0; - if (nr != NULL) { - PF_ACPY(&s->lan.addr, &pd->baddr, af); - s->lan.port = bport; - } else { - PF_ACPY(&s->lan.addr, &s->gwy.addr, af); - s->lan.port = s->gwy.port; - } - } else { - PF_ACPY(&s->lan.addr, daddr, af); - s->lan.port = nport; - PF_ACPY(&s->ext.addr, saddr, af); - s->ext.port = 0; - if (nr != NULL) { - PF_ACPY(&s->gwy.addr, &pd->baddr, af); - s->gwy.port = bport; - } else { - PF_ACPY(&s->gwy.addr, &s->lan.addr, af); - s->gwy.port = s->lan.port; - } - } - s->creation = time_second; - s->expire = time_second; - s->timeout = PFTM_ICMP_FIRST_PACKET; - pf_set_rt_ifp(s, saddr); - if (sn != NULL) { - s->src_node = sn; - s->src_node->states++; - } - if (nsn != NULL) { - PF_ACPY(&nsn->raddr, &pd->naddr, af); - s->nat_src_node = nsn; - s->nat_src_node->states++; - } - if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_STATEINS); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } else - *sm = s; - if (tag > 0) { - pf_tag_ref(tag); - s->tag = tag; - } - } - -#ifdef INET6 - /* copy back packet headers if we performed IPv6 NAT operations */ - if (rewrite) - m_copyback(m, off, sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); -#endif /* INET6 */ - - return (PF_PASS); -} - -int -pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, - struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, - struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq) -{ - struct pf_rule *nr = NULL; - struct pf_rule *r, *a = NULL; - struct pf_ruleset *ruleset = NULL; - struct pf_src_node *nsn = NULL; - struct pf_addr *saddr = pd->src, *daddr = pd->dst; - sa_family_t af = pd->af; - u_short reason; - int tag = -1, rtableid = -1; - int asd = 0; - int match = 0; - - if (pf_check_congestion(ifq)) { - REASON_SET(&reason, PFRES_CONGEST); - return (PF_DROP); - } - - r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - - if (direction == PF_OUT) { - /* check outgoing packet for BINAT/NAT */ - if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { - PF_ACPY(&pd->baddr, saddr, af); - switch (af) { -#ifdef INET - case AF_INET: - pf_change_a(&saddr->v4.s_addr, pd->ip_sum, - pd->naddr.v4.s_addr, 0); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - PF_ACPY(saddr, &pd->naddr, af); - break; -#endif /* INET6 */ - } - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } else { - /* check incoming packet for BINAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { - PF_ACPY(&pd->baddr, daddr, af); - switch (af) { -#ifdef INET - case AF_INET: - pf_change_a(&daddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - PF_ACPY(daddr, &pd->naddr, af); - break; -#endif /* INET6 */ - } - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } - - while (r != NULL) { - r->evaluations++; - if (pfi_kif_match(r->kif, kif) == r->ifnot) - r = r->skip[PF_SKIP_IFP].ptr; - else if (r->direction && r->direction != direction) - r = r->skip[PF_SKIP_DIR].ptr; - else if (r->af && r->af != af) - r = r->skip[PF_SKIP_AF].ptr; - else if (r->proto && r->proto != pd->proto) - r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, - r->src.neg, kif)) - r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, - r->dst.neg, NULL)) - r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->tos && !(r->tos == pd->tos)) + /* tcp/udp only. uid.op always 0 in other cases */ + else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = +#ifdef __FreeBSD__ + pf_socket_lookup(direction, pd, inp), 1)) && +#else + pf_socket_lookup(direction, pd), 1)) && +#endif + !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], + pd->lookup.uid)) r = TAILQ_NEXT(r, entries); - else if (r->rule_flag & PFRULE_FRAGMENT) + /* tcp/udp only. gid.op always 0 in other cases */ + else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = +#ifdef __FreeBSD__ + pf_socket_lookup(direction, pd, inp), 1)) && +#else + pf_socket_lookup(direction, pd), 1)) && +#endif + !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], + pd->lookup.gid)) r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= arc4random()) + else if (r->prob && +#ifdef __FreeBSD__ + r->prob <= arc4random()) +#else + r->prob <= arc4random_uniform(UINT_MAX - 1) + 1) +#endif r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) +#ifdef __FreeBSD__ + else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) +#else + else if (r->match_tag && !pf_match_tag(m, r, &tag)) +#endif r = TAILQ_NEXT(r, entries); - else if (r->os_fingerprint != PF_OSFP_ANY) + else if (r->os_fingerprint != PF_OSFP_ANY && + (pd->proto != IPPROTO_TCP || !pf_osfp_match( + pf_osfp_fingerprint(pd, m, off, th), + r->os_fingerprint))) r = TAILQ_NEXT(r, entries); else { if (r->tag) @@ -4452,157 +3639,428 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log || (nr != NULL && nr->natpass && nr->log)) + if (r->log || (nr != NULL && nr->log)) { + if (rewrite) + m_copyback(m, off, hdrlen, pd->hdr.any); PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, a, ruleset, pd); + } if ((r->action == PF_DROP) && - ((r->rule_flag & PFRULE_RETURNICMP) || + ((r->rule_flag & PFRULE_RETURNRST) || + (r->rule_flag & PFRULE_RETURNICMP) || (r->rule_flag & PFRULE_RETURN))) { - struct pf_addr *a = NULL; - + /* undo NAT changes, if they have taken place */ if (nr != NULL) { - if (direction == PF_OUT) - a = saddr; - else - a = daddr; - } - if (a != NULL) { + PF_ACPY(saddr, &sk->addr[pd->sidx], af); + PF_ACPY(daddr, &sk->addr[pd->didx], af); + if (pd->sport) + *pd->sport = sk->port[pd->sidx]; + if (pd->dport) + *pd->dport = sk->port[pd->didx]; + if (pd->proto_sum) + *pd->proto_sum = bproto_sum; + if (pd->ip_sum) + *pd->ip_sum = bip_sum; + m_copyback(m, off, hdrlen, pd->hdr.any); + } + if (pd->proto == IPPROTO_TCP && + ((r->rule_flag & PFRULE_RETURNRST) || + (r->rule_flag & PFRULE_RETURN)) && + !(th->th_flags & TH_RST)) { + u_int32_t ack = ntohl(th->th_seq) + pd->p_len; + int len = 0; +#ifdef INET + struct ip *h4; +#endif +#ifdef INET6 + struct ip6_hdr *h6; +#endif + switch (af) { #ifdef INET case AF_INET: - pf_change_a(&a->v4.s_addr, pd->ip_sum, - pd->baddr.v4.s_addr, 0); + h4 = mtod(m, struct ip *); + len = ntohs(h4->ip_len) - off; break; -#endif /* INET */ +#endif #ifdef INET6 case AF_INET6: - PF_ACPY(a, &pd->baddr, af); + h6 = mtod(m, struct ip6_hdr *); + len = ntohs(h6->ip6_plen) - (off - sizeof(*h6)); break; -#endif /* INET6 */ +#endif + } + + if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) + REASON_SET(&reason, PFRES_PROTCKSUM); + else { + if (th->th_flags & TH_SYN) + ack++; + if (th->th_flags & TH_FIN) + ack++; +#ifdef __FreeBSD__ + pf_send_tcp(m, r, af, pd->dst, +#else + pf_send_tcp(r, af, pd->dst, +#endif + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, + r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); } - } - if ((af == AF_INET) && r->return_icmp) + } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && + r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r); - else if ((af == AF_INET6) && r->return_icmp6) + else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 && + r->return_icmp6) pf_send_icmp(m, r->return_icmp6 >> 8, r->return_icmp6 & 255, af, r); } - if (r->action != PF_PASS) - return (PF_DROP); + if (r->action == PF_DROP) + goto cleanup; - if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { +#ifdef __FreeBSD__ + if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) { +#else + if (pf_tag_packet(m, tag, rtableid)) { +#endif REASON_SET(&reason, PFRES_MEMORY); - return (PF_DROP); + goto cleanup; } - if (r->keep_state || nr != NULL) { - /* create new state */ - struct pf_state *s = NULL; - struct pf_src_node *sn = NULL; + if (!state_icmp && (r->keep_state || nr != NULL || + (pd->flags & PFDESC_TCP_NORM))) { + int action; + action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m, + off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, + bip_sum, hdrlen); + if (action != PF_PASS) + return (action); + } else { +#ifdef __FreeBSD__ + if (sk != NULL) + pool_put(&V_pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&V_pf_state_key_pl, nk); +#else + if (sk != NULL) + pool_put(&pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&pf_state_key_pl, nk); +#endif + } + + /* copy back packet headers if we performed NAT operations */ + if (rewrite) + m_copyback(m, off, hdrlen, pd->hdr.any); + +#if NPFSYNC > 0 + if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && +#ifdef __FreeBSD__ + direction == PF_OUT && pfsync_up_ptr != NULL && pfsync_up_ptr()) { +#else + direction == PF_OUT && pfsync_up()) { +#endif + /* + * We want the state created, but we dont + * want to send this in case a partner + * firewall has to know about it to allow + * replies through it. + */ +#ifdef __FreeBSD__ + if (pfsync_defer_ptr != NULL) + pfsync_defer_ptr(*sm, m); +#else + if (pfsync_defer(*sm, m)) +#endif + return (PF_DEFER); + } +#endif + + return (PF_PASS); - /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) { - pf_status.lcounters[LCNT_STATES]++; - REASON_SET(&reason, PFRES_MAXSTATES); - goto cleanup; - } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - ((direction == PF_OUT && - pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - s = pool_get(&pf_state_pl, PR_NOWAIT); - if (s == NULL) { - REASON_SET(&reason, PFRES_MEMORY); cleanup: - if (sn != NULL && sn->states == 0 && sn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); - } - if (nsn != sn && nsn != NULL && nsn->states == 0 && - nsn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); - } - return (PF_DROP); +#ifdef __FreeBSD__ + if (sk != NULL) + pool_put(&V_pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&V_pf_state_key_pl, nk); +#else + if (sk != NULL) + pool_put(&pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&pf_state_key_pl, nk); +#endif + return (PF_DROP); +} + +static __inline int +pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, + struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw, + struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk, + struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, + struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, + u_int16_t bip_sum, int hdrlen) +{ + struct pf_state *s = NULL; + struct pf_src_node *sn = NULL; + struct tcphdr *th = pd->hdr.tcp; +#ifdef __FreeBSD__ + u_int16_t mss = V_tcp_mssdflt; +#else + u_int16_t mss = tcp_mssdflt; +#endif + u_short reason; + + /* check maximums */ + if (r->max_states && (r->states_cur >= r->max_states)) { +#ifdef __FreeBSD__ + V_pf_status.lcounters[LCNT_STATES]++; +#else + pf_status.lcounters[LCNT_STATES]++; +#endif + REASON_SET(&reason, PFRES_MAXSTATES); + return (PF_DROP); + } + /* src node for filter rule */ + if ((r->rule_flag & PFRULE_SRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); + goto csfailed; + } + /* src node for translation rule */ + if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) { + REASON_SET(&reason, PFRES_SRCLIMIT); + goto csfailed; + } +#ifdef __FreeBSD__ + s = pool_get(&V_pf_state_pl, PR_NOWAIT | PR_ZERO); +#else + s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); +#endif + if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + goto csfailed; + } + s->rule.ptr = r; + s->nat_rule.ptr = nr; + s->anchor.ptr = a; + STATE_INC_COUNTERS(s); + if (r->allow_opts) + s->state_flags |= PFSTATE_ALLOWOPTS; + if (r->rule_flag & PFRULE_STATESLOPPY) + s->state_flags |= PFSTATE_SLOPPY; + if (r->rule_flag & PFRULE_PFLOW) + s->state_flags |= PFSTATE_PFLOW; + s->log = r->log & PF_LOG_ALL; + s->sync_state = PFSYNC_S_NONE; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; + switch (pd->proto) { + case IPPROTO_TCP: + s->src.seqlo = ntohl(th->th_seq); + s->src.seqhi = s->src.seqlo + pd->p_len + 1; + if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && + r->keep_state == PF_STATE_MODULATE) { + /* Generate sequence number modulator */ + if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == + 0) + s->src.seqdiff = 1; + pf_change_a(&th->th_seq, &th->th_sum, + htonl(s->src.seqlo + s->src.seqdiff), 0); + *rewrite = 1; + } else + s->src.seqdiff = 0; + if (th->th_flags & TH_SYN) { + s->src.seqhi++; + s->src.wscale = pf_get_wscale(m, off, + th->th_off, pd->af); } - bzero(s, sizeof(*s)); - s->rule.ptr = r; - s->nat_rule.ptr = nr; - s->anchor.ptr = a; - STATE_INC_COUNTERS(s); - if (r->allow_opts) - s->state_flags |= PFSTATE_ALLOWOPTS; - if (r->rule_flag & PFRULE_STATESLOPPY) - s->state_flags |= PFSTATE_SLOPPY; - s->log = r->log & PF_LOG_ALL; - if (nr != NULL) - s->log |= nr->log & PF_LOG_ALL; - s->proto = pd->proto; - s->direction = direction; - s->af = af; - if (direction == PF_OUT) { - PF_ACPY(&s->gwy.addr, saddr, af); - PF_ACPY(&s->ext.addr, daddr, af); - if (nr != NULL) - PF_ACPY(&s->lan.addr, &pd->baddr, af); - else - PF_ACPY(&s->lan.addr, &s->gwy.addr, af); - } else { - PF_ACPY(&s->lan.addr, daddr, af); - PF_ACPY(&s->ext.addr, saddr, af); - if (nr != NULL) - PF_ACPY(&s->gwy.addr, &pd->baddr, af); - else - PF_ACPY(&s->gwy.addr, &s->lan.addr, af); + s->src.max_win = MAX(ntohs(th->th_win), 1); + if (s->src.wscale & PF_WSCALE_MASK) { + /* Remove scale factor from initial window */ + int win = s->src.max_win; + win += 1 << (s->src.wscale & PF_WSCALE_MASK); + s->src.max_win = (win - 1) >> + (s->src.wscale & PF_WSCALE_MASK); } + if (th->th_flags & TH_FIN) + s->src.seqhi++; + s->dst.seqhi = 1; + s->dst.max_win = 1; + s->src.state = TCPS_SYN_SENT; + s->dst.state = TCPS_CLOSED; + s->timeout = PFTM_TCP_FIRST_PACKET; + break; + case IPPROTO_UDP: + s->src.state = PFUDPS_SINGLE; + s->dst.state = PFUDPS_NO_TRAFFIC; + s->timeout = PFTM_UDP_FIRST_PACKET; + break; + case IPPROTO_ICMP: +#ifdef INET6 + case IPPROTO_ICMPV6: +#endif + s->timeout = PFTM_ICMP_FIRST_PACKET; + break; + default: s->src.state = PFOTHERS_SINGLE; s->dst.state = PFOTHERS_NO_TRAFFIC; - s->creation = time_second; - s->expire = time_second; s->timeout = PFTM_OTHER_FIRST_PACKET; - pf_set_rt_ifp(s, saddr); - if (sn != NULL) { - s->src_node = sn; - s->src_node->states++; - } - if (nsn != NULL) { - PF_ACPY(&nsn->raddr, &pd->naddr, af); - s->nat_src_node = nsn; - s->nat_src_node->states++; + } + + s->creation = time_second; + s->expire = time_second; + + if (sn != NULL) { + s->src_node = sn; + s->src_node->states++; + } + if (nsn != NULL) { + /* XXX We only modify one side for now. */ + PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); + s->nat_src_node = nsn; + s->nat_src_node->states++; + } + if (pd->proto == IPPROTO_TCP) { + if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, + off, pd, th, &s->src, &s->dst)) { + REASON_SET(&reason, PFRES_MEMORY); + pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); +#ifdef __FreeBSD__ + pool_put(&V_pf_state_pl, s); +#else + pool_put(&pf_state_pl, s); +#endif + return (PF_DROP); } - if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_STATEINS); + if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && + pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, + &s->src, &s->dst, rewrite)) { + /* This really shouldn't happen!!! */ + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_normalize_tcp_stateful failed on first pkt")); + pf_normalize_tcp_cleanup(s); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); +#ifdef __FreeBSD__ + pool_put(&V_pf_state_pl, s); +#else pool_put(&pf_state_pl, s); +#endif return (PF_DROP); - } else - *sm = s; - if (tag > 0) { - pf_tag_ref(tag); - s->tag = tag; } } + s->direction = pd->dir; + + if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk, + pd->src, pd->dst, sport, dport)) + goto csfailed; + + if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) { + if (pd->proto == IPPROTO_TCP) + pf_normalize_tcp_cleanup(s); + REASON_SET(&reason, PFRES_STATEINS); + pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); +#ifdef __FreeBSD__ + pool_put(&V_pf_state_pl, s); +#else + pool_put(&pf_state_pl, s); +#endif + return (PF_DROP); + } else + *sm = s; + + pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } + if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == + TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { + s->src.state = PF_TCPS_PROXY_SRC; + /* undo NAT changes, if they have taken place */ + if (nr != NULL) { + struct pf_state_key *skt = s->key[PF_SK_WIRE]; + if (pd->dir == PF_OUT) + skt = s->key[PF_SK_STACK]; + PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af); + PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af); + if (pd->sport) + *pd->sport = skt->port[pd->sidx]; + if (pd->dport) + *pd->dport = skt->port[pd->didx]; + if (pd->proto_sum) + *pd->proto_sum = bproto_sum; + if (pd->ip_sum) + *pd->ip_sum = bip_sum; + m_copyback(m, off, hdrlen, pd->hdr.any); + } + s->src.seqhi = htonl(arc4random()); + /* Find mss option */ + mss = pf_get_mss(m, off, th->th_off, pd->af); + mss = pf_calc_mss(pd->src, pd->af, mss); + mss = pf_calc_mss(pd->dst, pd->af, mss); + s->src.mss = mss; +#ifdef __FreeBSD__ + pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport, +#else + pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, +#endif + th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); + REASON_SET(&reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } return (PF_PASS); + +csfailed: +#ifdef __FreeBSD__ + if (sk != NULL) + pool_put(&V_pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&V_pf_state_key_pl, nk); +#else + if (sk != NULL) + pool_put(&pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&pf_state_key_pl, nk); +#endif + + if (sn != NULL && sn->states == 0 && sn->expire == 0) { +#ifdef __FreeBSD__ + RB_REMOVE(pf_src_tree, &V_tree_src_tracking, sn); + V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + V_pf_status.src_nodes--; + pool_put(&V_pf_src_tree_pl, sn); +#else + RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, sn); +#endif + } + if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { +#ifdef __FreeBSD__ + RB_REMOVE(pf_src_tree, &V_tree_src_tracking, nsn); + V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + V_pf_status.src_nodes--; + pool_put(&V_pf_src_tree_pl, nsn); +#else + RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, nsn); +#endif + } + return (PF_DROP); } int @@ -4649,9 +4107,14 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, pd->proto == IPPROTO_ICMPV6) && (r->type || r->code)) r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= arc4random()) + else if (r->prob && r->prob <= + (arc4random() % (UINT_MAX - 1) + 1)) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) +#ifdef __FreeBSD__ + else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) +#else + else if (r->match_tag && !pf_match_tag(m, r, &tag)) +#endif r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { @@ -4683,7 +4146,11 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) { +#ifdef __FreeBSD__ + if (pf_tag_packet(m, tag, -1, pd->pf_mtag)) { +#else + if (pf_tag_packet(m, tag, -1)) { +#endif REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -4696,11 +4163,11 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd, u_short *reason, int *copyback) { - struct tcphdr *th = pd->hdr.tcp; - u_int16_t win = ntohs(th->th_win); - u_int32_t ack, end, seq, orig_seq; - u_int8_t sws, dws; - int ackskew; + struct tcphdr *th = pd->hdr.tcp; + u_int16_t win = ntohs(th->th_win); + u_int32_t ack, end, seq, orig_seq; + u_int8_t sws, dws; + int ackskew; if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { sws = src->wscale & PF_WSCALE_MASK; @@ -4728,13 +4195,9 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, /* Deferred generation of sequence number modulator */ if (dst->seqdiff && !src->seqdiff) { -#ifdef __FreeBSD__ - while ((src->seqdiff = pf_new_isn(*state) - seq) == 0) - ; -#else - while ((src->seqdiff = tcp_rndiss_next() - seq) == 0) + /* use random iss for the TCP server */ + while ((src->seqdiff = arc4random() - seq) == 0) ; -#endif ack = ntohl(th->th_ack) - dst->seqdiff; pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + src->seqdiff), 0); @@ -4841,7 +4304,7 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, } -#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ +#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ if (SEQ_GEQ(src->seqhi, end) && /* Last octet inside other's window space */ SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && @@ -4851,7 +4314,8 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || - (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) { + (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || + (pd->flags & PFDESC_IP_REAS) == 0)) { /* Require an exact/+1 sequence match on resets when possible */ if (dst->scrub || src->scrub) { @@ -4941,19 +4405,25 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, * and keep updating the state TTL. */ +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: loose state match: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " - "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len, + "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, #ifdef __FreeBSD__ - ackskew, (unsigned long long)(*state)->packets[0], - (unsigned long long)(*state)->packets[1]); + pd->p_len, ackskew, (unsigned long long)(*state)->packets[0], + (unsigned long long)(*state)->packets[1], #else - ackskew, (*state)->packets[0], - (*state)->packets[1]); + pd->p_len, ackskew, (*state)->packets[0], + (*state)->packets[1], #endif + pd->dir == PF_IN ? "in" : "out", + pd->dir == (*state)->direction ? "fwd" : "rev"); } if (dst->scrub || src->scrub) { @@ -4991,7 +4461,7 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, /* Send RST for state mismatches during handshake */ if (!(th->th_flags & TH_RST)) #ifdef __FreeBSD__ - pf_send_tcp(m, (*state)->rule.ptr, pd->af, + pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, #else pf_send_tcp((*state)->rule.ptr, pd->af, #endif @@ -5003,16 +4473,16 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, src->seqlo = 0; src->seqhi = 1; src->max_win = 1; +#ifdef __FreeBSD__ + } else if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else } else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " -#ifdef notyet "pkts=%llu:%llu dir=%s,%s\n", -#else - "pkts=%llu:%llu%s\n", -#endif seq, orig_seq, ack, pd->p_len, ackskew, #ifdef __FreeBSD__ (unsigned long long)(*state)->packets[0], @@ -5020,12 +4490,8 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, #else (*state)->packets[0], (*state)->packets[1], #endif -#ifdef notyet - direction == PF_IN ? "in" : "out", - direction == (*state)->direction ? "fwd" : "rev"); -#else - ""); -#endif + pd->dir == PF_IN ? "in" : "out", + pd->dir == (*state)->direction ? "fwd" : "rev"); printf("pf: State failure on: %c %c %c %c | %c %c\n", SEQ_GEQ(src->seqhi, end) ? ' ' : '1', SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? @@ -5039,7 +4505,6 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, return (PF_DROP); } - /* Any packets which have gotten here are to be passed */ return (PF_PASS); } @@ -5114,32 +4579,36 @@ pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, return (PF_PASS); } - int pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { - struct pf_state_cmp key; + struct pf_state_key_cmp key; struct tcphdr *th = pd->hdr.tcp; int copyback = 0; struct pf_state_peer *src, *dst; + struct pf_state_key *sk; key.af = pd->af; key.proto = IPPROTO_TCP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = th->th_sport; - key.gwy.port = th->th_dport; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = th->th_sport; - key.ext.port = th->th_dport; + if (direction == PF_IN) { /* wire side, straight */ + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = th->th_sport; + key.port[1] = th->th_dport; + } else { /* stack side, reverse */ + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); + key.port[1] = th->th_sport; + key.port[0] = th->th_dport; } - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif if (direction == (*state)->direction) { src = &(*state)->src; @@ -5149,6 +4618,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, dst = &(*state)->src; } + sk = (*state)->key[pd->didx]; + if ((*state)->src.state == PF_TCPS_PROXY_SRC) { if (direction != (*state)->direction) { REASON_SET(reason, PFRES_SYNPROXY); @@ -5183,15 +4654,6 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->src.state = PF_TCPS_PROXY_DST; } if ((*state)->src.state == PF_TCPS_PROXY_DST) { - struct pf_state_host *src, *dst; - - if (direction == PF_OUT) { - src = &(*state)->gwy; - dst = &(*state)->ext; - } else { - src = &(*state)->ext; - dst = &(*state)->lan; - } if (direction == (*state)->direction) { if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || @@ -5204,11 +4666,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->dst.seqhi = htonl(arc4random()); #ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, - &src->addr, #else - pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, + pf_send_tcp((*state)->rule.ptr, pd->af, #endif - &dst->addr, src->port, dst->port, + &sk->addr[pd->sidx], &sk->addr[pd->didx], + sk->port[pd->sidx], sk->port[pd->didx], (*state)->dst.seqhi, 0, TH_SYN, 0, (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); REASON_SET(reason, PFRES_SYNPROXY); @@ -5232,11 +4694,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->tag, NULL, NULL); #ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, - &src->addr, #else - pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, + pf_send_tcp((*state)->rule.ptr, pd->af, #endif - &dst->addr, src->port, dst->port, + &sk->addr[pd->sidx], &sk->addr[pd->didx], + sk->port[pd->sidx], sk->port[pd->didx], (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL, NULL); @@ -5259,7 +4721,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && dst->state >= TCPS_FIN_WAIT_2 && src->state >= TCPS_FIN_WAIT_2) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: state reuse "); pf_print_state(*state); pf_print_flags(th->th_flags); @@ -5282,21 +4748,31 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, } /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE(*state)) { - if (direction == PF_OUT) + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || + nk->port[pd->sidx] != th->th_sport) pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, - &th->th_sum, &(*state)->gwy.addr, - (*state)->gwy.port, 0, pd->af); - else + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, pd->af); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || + nk->port[pd->didx] != th->th_dport) pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, - &th->th_sum, &(*state)->lan.addr, - (*state)->lan.port, 0, pd->af); - m_copyback(m, off, sizeof(*th), (caddr_t)th); - } else if (copyback) { - /* Copyback sequence modulation or stateful scrub changes */ - m_copyback(m, off, sizeof(*th), (caddr_t)th); + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, pd->af); + copyback = 1; } + /* Copyback sequence modulation or stateful scrub changes if needed */ + if (copyback) +#ifdef __FreeBSD__ + m_copyback(m, off, sizeof(*th), (caddr_t)th); +#else + m_copyback(m, off, sizeof(*th), th); +#endif + return (PF_PASS); } @@ -5305,24 +4781,28 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state_cmp key; + struct pf_state_key_cmp key; struct udphdr *uh = pd->hdr.udp; key.af = pd->af; key.proto = IPPROTO_UDP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = uh->uh_sport; - key.gwy.port = uh->uh_dport; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = uh->uh_sport; - key.ext.port = uh->uh_dport; + if (direction == PF_IN) { /* wire side, straight */ + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = uh->uh_sport; + key.port[1] = uh->uh_dport; + } else { /* stack side, reverse */ + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); + key.port[1] = uh->uh_sport; + key.port[0] = uh->uh_dport; } - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif if (direction == (*state)->direction) { src = &(*state)->src; @@ -5346,16 +4826,25 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->timeout = PFTM_UDP_SINGLE; /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE(*state)) { - if (direction == PF_OUT) + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || + nk->port[pd->sidx] != uh->uh_sport) pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, - &uh->uh_sum, &(*state)->gwy.addr, - (*state)->gwy.port, 1, pd->af); - else + &uh->uh_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, pd->af); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || + nk->port[pd->didx] != uh->uh_dport) pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, - &uh->uh_sum, &(*state)->lan.addr, - (*state)->lan.port, 1, pd->af); + &uh->uh_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 1, pd->af); +#ifdef __FreeBSD__ m_copyback(m, off, sizeof(*uh), (caddr_t)uh); +#else + m_copyback(m, off, sizeof(*uh), uh); +#endif } return (PF_PASS); @@ -5365,12 +4854,15 @@ int pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { - struct pf_addr *saddr = pd->src, *daddr = pd->dst; - u_int16_t icmpid = 0; /* make the compiler happy */ - u_int16_t *icmpsum = NULL; /* make the compiler happy */ - u_int8_t icmptype = 0; /* make the compiler happy */ + struct pf_addr *saddr = pd->src, *daddr = pd->dst; +#ifdef __FreeBSD__ + u_int16_t icmpid = 0, *icmpsum; +#else + u_int16_t icmpid, *icmpsum; +#endif + u_int8_t icmptype; int state_icmp = 0; - struct pf_state_cmp key; + struct pf_state_key_cmp key; switch (pd->proto) { #ifdef INET @@ -5410,84 +4902,84 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, */ key.af = pd->af; key.proto = pd->proto; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = 0; - key.gwy.port = icmpid; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = icmpid; - key.ext.port = 0; + key.port[0] = key.port[1] = icmpid; + if (direction == PF_IN) { /* wire side, straight */ + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + } else { /* stack side, reverse */ + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); } - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif (*state)->expire = time_second; (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE(*state)) { - if (direction == PF_OUT) { - switch (pd->af) { -#ifdef INET - case AF_INET: - pf_change_a(&saddr->v4.s_addr, - pd->ip_sum, - (*state)->gwy.addr.v4.s_addr, 0); - pd->hdr.icmp->icmp_cksum = - pf_cksum_fixup( - pd->hdr.icmp->icmp_cksum, icmpid, - (*state)->gwy.port, 0); - pd->hdr.icmp->icmp_id = - (*state)->gwy.port; - m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - pf_change_a6(saddr, - &pd->hdr.icmp6->icmp6_cksum, - &(*state)->gwy.addr, 0); - m_copyback(m, off, - sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); - break; -#endif /* INET6 */ - } - } else { - switch (pd->af) { + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + + switch (pd->af) { #ifdef INET - case AF_INET: + case AF_INET: + if (PF_ANEQ(pd->src, + &nk->addr[pd->sidx], AF_INET)) + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + nk->addr[pd->sidx].v4.s_addr, 0); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], + AF_INET)) pf_change_a(&daddr->v4.s_addr, pd->ip_sum, - (*state)->lan.addr.v4.s_addr, 0); + nk->addr[pd->didx].v4.s_addr, 0); + + if (nk->port[0] != + pd->hdr.icmp->icmp_id) { pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( pd->hdr.icmp->icmp_cksum, icmpid, - (*state)->lan.port, 0); + nk->port[pd->sidx], 0); pd->hdr.icmp->icmp_id = - (*state)->lan.port; - m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); - break; + nk->port[pd->sidx]; + } + + m_copyback(m, off, ICMP_MINLEN, +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: + case AF_INET6: + if (PF_ANEQ(pd->src, + &nk->addr[pd->sidx], AF_INET6)) + pf_change_a6(saddr, + &pd->hdr.icmp6->icmp6_cksum, + &nk->addr[pd->sidx], 0); + + if (PF_ANEQ(pd->dst, + &nk->addr[pd->didx], AF_INET6)) pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, - &(*state)->lan.addr, 0); - m_copyback(m, off, - sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); - break; + &nk->addr[pd->didx], 0); + + m_copyback(m, off, + sizeof(struct icmp6_hdr), +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp6); + break; #endif /* INET6 */ - } } } - return (PF_PASS); } else { @@ -5497,6 +4989,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, */ struct pf_pdesc pd2; +#ifdef __FreeBSD__ + bzero(&pd2, sizeof pd2); +#endif #ifdef INET struct ip h2; #endif /* INET */ @@ -5504,10 +4999,18 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct ip6_hdr h2_6; int terminal = 0; #endif /* INET6 */ - int ipoff2 = 0; /* make the compiler happy */ - int off2 = 0; /* make the compiler happy */ +#ifdef __FreeBSD__ + int ipoff2 = 0; + int off2 = 0; +#else + int ipoff2; + int off2; +#endif pd2.af = pd->af; + /* Payload packet is from the opposite direction. */ + pd2.sidx = (direction == PF_IN) ? 1 : 0; + pd2.didx = (direction == PF_IN) ? 0 : 1; switch (pd->af) { #ifdef INET case AF_INET: @@ -5593,10 +5096,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } while (!terminal); break; #endif /* INET6 */ -#ifdef __FreeBSD__ - default: - panic("AF not supported: %d", pd->af); -#endif } switch (pd2.proto) { @@ -5622,19 +5121,16 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_TCP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = th.th_dport; - key.gwy.port = th.th_sport; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = th.th_dport; - key.ext.port = th.th_sport; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[pd2.sidx] = th.th_sport; + key.port[pd2.didx] = th.th_dport; - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif if (direction == (*state)->direction) { src = &(*state)->dst; @@ -5660,7 +5156,11 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, if (!((*state)->state_flags & PFSTATE_SLOPPY) && (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif printf("pf: BAD ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); pf_print_host(pd->src, 0, pd->af); @@ -5672,22 +5172,47 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); + } else { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { +#endif + printf("pf: OK ICMP %d:%d ", + icmptype, pd->hdr.icmp->icmp_code); + pf_print_host(pd->src, 0, pd->af); + printf(" -> "); + pf_print_host(pd->dst, 0, pd->af); + printf(" state: "); + pf_print_state(*state); + printf(" seq=%u\n", seq); + } } - if (STATE_TRANSLATE(*state)) { - if (direction == PF_IN) { + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; + + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != th.th_sport) pf_change_icmp(pd2.src, &th.th_sport, - daddr, &(*state)->lan.addr, - (*state)->lan.port, NULL, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != th.th_dport) pf_change_icmp(pd2.dst, &th.th_dport, - saddr, &(*state)->gwy.addr, - (*state)->gwy.port, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } copyback = 1; } @@ -5696,22 +5221,38 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), - (caddr_t)&h2); +#ifdef __FreeBSD__ + (caddr_t) +#endif + &h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), - (caddr_t)&h2_6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + &h2_6); break; #endif /* INET6 */ } +#ifdef __FreeBSD__ m_copyback(m, off2, 8, (caddr_t)&th); +#else + m_copyback(m, off2, 8, &th); +#endif } return (PF_PASS); @@ -5730,57 +5271,79 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_UDP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = uh.uh_dport; - key.gwy.port = uh.uh_sport; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = uh.uh_dport; - key.ext.port = uh.uh_sport; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[pd2.sidx] = uh.uh_sport; + key.port[pd2.didx] = uh.uh_dport; + +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif - STATE_LOOKUP(); + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; - if (STATE_TRANSLATE(*state)) { - if (direction == PF_IN) { + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != uh.uh_sport) pf_change_icmp(pd2.src, &uh.uh_sport, - daddr, &(*state)->lan.addr, - (*state)->lan.port, &uh.uh_sum, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != uh.uh_dport) pf_change_icmp(pd2.dst, &uh.uh_dport, - saddr, &(*state)->gwy.addr, - (*state)->gwy.port, &uh.uh_sum, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); - } + switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); - m_copyback(m, ipoff2, sizeof(h2), - (caddr_t)&h2); +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp); +#ifdef __FreeBSD__ + m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); +#else + m_copyback(m, ipoff2, sizeof(h2), &h2); +#endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), - (caddr_t)&h2_6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + &h2_6); break; #endif /* INET6 */ } - m_copyback(m, off2, sizeof(uh), - (caddr_t)&uh); +#ifdef __FreeBSD__ + m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); +#else + m_copyback(m, off2, sizeof(uh), &uh); +#endif } - return (PF_PASS); break; } @@ -5798,42 +5361,51 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_ICMP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = 0; - key.gwy.port = iih.icmp_id; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = iih.icmp_id; - key.ext.port = 0; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[0] = key.port[1] = iih.icmp_id; + +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif - STATE_LOOKUP(); + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; - if (STATE_TRANSLATE(*state)) { - if (direction == PF_IN) { + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != iih.icmp_id) pf_change_icmp(pd2.src, &iih.icmp_id, - daddr, &(*state)->lan.addr, - (*state)->lan.port, NULL, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != iih.icmp_id) pf_change_icmp(pd2.dst, &iih.icmp_id, - saddr, &(*state)->gwy.addr, - (*state)->gwy.port, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); - } - m_copyback(m, off, ICMP_MINLEN, - (caddr_t)pd->hdr.icmp); - m_copyback(m, ipoff2, sizeof(h2), - (caddr_t)&h2); - m_copyback(m, off2, ICMP_MINLEN, - (caddr_t)&iih); - } +#ifdef __FreeBSD__ + m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); + m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); +#else + m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), &h2); + m_copyback(m, off2, ICMP_MINLEN, &iih); +#endif + } return (PF_PASS); break; } @@ -5852,42 +5424,55 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_ICMPV6; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = 0; - key.gwy.port = iih.icmp6_id; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = iih.icmp6_id; - key.ext.port = 0; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[0] = key.port[1] = iih.icmp6_id; + +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif - STATE_LOOKUP(); + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; - if (STATE_TRANSLATE(*state)) { - if (direction == PF_IN) { + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != iih.icmp6_id) pf_change_icmp(pd2.src, &iih.icmp6_id, - daddr, &(*state)->lan.addr, - (*state)->lan.port, NULL, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != iih.icmp6_id) pf_change_icmp(pd2.dst, &iih.icmp6_id, - saddr, &(*state)->gwy.addr, - (*state)->gwy.port, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); - } + +#ifdef __FreeBSD__ m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t)pd->hdr.icmp6); - m_copyback(m, ipoff2, sizeof(h2_6), - (caddr_t)&h2_6); + m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); m_copyback(m, off2, sizeof(struct icmp6_hdr), (caddr_t)&iih); +#else + m_copyback(m, off, sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); + m_copyback(m, off2, sizeof(struct icmp6_hdr), + &iih); +#endif } - return (PF_PASS); break; } @@ -5895,55 +5480,68 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, default: { key.af = pd2.af; key.proto = pd2.proto; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = 0; - key.gwy.port = 0; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = 0; - key.ext.port = 0; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[0] = key.port[1] = 0; + +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif - STATE_LOOKUP(); + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; - if (STATE_TRANSLATE(*state)) { - if (direction == PF_IN) { - pf_change_icmp(pd2.src, NULL, - daddr, &(*state)->lan.addr, - 0, NULL, + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af)) + pf_change_icmp(pd2.src, NULL, daddr, + &nk->addr[pd2.sidx], 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } else { - pf_change_icmp(pd2.dst, NULL, - saddr, &(*state)->gwy.addr, - 0, NULL, + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af)) + pf_change_icmp(pd2.src, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } + switch (pd2.af) { #ifdef INET case AF_INET: +#ifdef __FreeBSD__ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); - m_copyback(m, ipoff2, sizeof(h2), - (caddr_t)&h2); + m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); +#else + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), &h2); +#endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), - (caddr_t)pd->hdr.icmp6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), - (caddr_t)&h2_6); +#ifdef __FreeBSD__ + (caddr_t) +#endif + &h2_6); break; #endif /* INET6 */ } } - return (PF_PASS); break; } @@ -5953,26 +5551,28 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, int pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, - struct pf_pdesc *pd) + struct mbuf *m, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state_cmp key; + struct pf_state_key_cmp key; key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = 0; - key.gwy.port = 0; + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = key.port[1] = 0; } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = 0; - key.ext.port = 0; + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); + key.port[1] = key.port[0] = 0; } - STATE_LOOKUP(); +#ifdef __FreeBSD__ + STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); +#else + STATE_LOOKUP(kif, &key, direction, *state, m); +#endif if (direction == (*state)->direction) { src = &(*state)->src; @@ -5996,39 +5596,48 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->timeout = PFTM_OTHER_SINGLE; /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE(*state)) { - if (direction == PF_OUT) - switch (pd->af) { + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + +#ifdef __FreeBSD__ + KASSERT(nk, ("%s: nk is null", __FUNCTION__)); + KASSERT(pd, ("%s: pd is null", __FUNCTION__)); + KASSERT(pd->src, ("%s: pd->src is null", __FUNCTION__)); + KASSERT(pd->dst, ("%s: pd->dst is null", __FUNCTION__)); +#else + KASSERT(nk); + KASSERT(pd); + KASSERT(pd->src); + KASSERT(pd->dst); +#endif + switch (pd->af) { #ifdef INET - case AF_INET: + case AF_INET: + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&pd->src->v4.s_addr, - pd->ip_sum, (*state)->gwy.addr.v4.s_addr, + pd->ip_sum, + nk->addr[pd->sidx].v4.s_addr, 0); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af); - break; -#endif /* INET6 */ - } - else - switch (pd->af) { -#ifdef INET - case AF_INET: + + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) pf_change_a(&pd->dst->v4.s_addr, - pd->ip_sum, (*state)->lan.addr.v4.s_addr, + pd->ip_sum, + nk->addr[pd->didx].v4.s_addr, 0); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af); - break; + case AF_INET6: + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) + PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) + PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); #endif /* INET6 */ - } + } } - return (PF_PASS); } @@ -6086,6 +5695,11 @@ pf_pull_hdr(struct mbuf *m, int off, void *p, int len, int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) { +#ifdef __FreeBSD__ +#ifdef RADIX_MPATH + struct radix_node_head *rnh; +#endif +#endif struct sockaddr_in *dst; int ret = 1; int check_mpath; @@ -6106,6 +5720,14 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) struct ifnet *ifp; check_mpath = 0; +#ifdef __FreeBSD__ +#ifdef RADIX_MPATH + /* XXX: stick to table 0 for now */ + rnh = rt_tables_get_rnh(0, af); + if (rnh != NULL && rn_mpath_capable(rnh)) + check_mpath = 1; +#endif +#endif bzero(&ro, sizeof(ro)); switch (af) { case AF_INET: @@ -6113,18 +5735,24 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; -#ifndef __FreeBSD__ /* MULTIPATH_ROUTING */ +#ifndef __FreeBSD__ if (ipmultipath) check_mpath = 1; #endif break; #ifdef INET6 case AF_INET6: + /* + * Skip check for addresses with embedded interface scope, + * as they would always match anyway. + */ + if (IN6_IS_SCOPE_EMBED(&addr->v6)) + goto out; dst6 = (struct sockaddr_in6 *)&ro.ro_dst; dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; -#ifndef __FreeBSD__ /* MULTIPATH_ROUTING */ +#ifndef __FreeBSD__ if (ip6_multipath) check_mpath = 1; #endif @@ -6174,11 +5802,13 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) if (kif->pfik_ifp == ifp) ret = 1; -#ifdef __FreeBSD__ /* MULTIPATH_ROUTING */ - rn = NULL; -#else +#ifdef __FreeBSD__ +#ifdef RADIX_MPATH rn = rn_mpath_next(rn); #endif +#else + rn = rn_mpath_next(rn, 0); +#endif } while (check_mpath == 1 && rn != NULL && ret == 0); } else ret = 0; @@ -6249,7 +5879,6 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) } #ifdef INET - void pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s, struct pf_pdesc *pd) @@ -6274,7 +5903,11 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route: invalid parameters"); +#ifdef __FreeBSD__ if (pd->pf_mtag->routed++ > 3) { +#else + if ((*m)->m_pkthdr.pf.routed++ > 3) { +#endif m0 = *m; *m = NULL; goto bad; @@ -6309,9 +5942,17 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, dst->sin_addr = ip->ip_dst; if (r->rt == PF_FASTROUTE) { +#ifdef __FreeBSD__ in_rtalloc(ro, 0); +#else + rtalloc(ro); +#endif if (ro->ro_rt == 0) { +#ifdef __FreeBSD__ KMOD_IPSTAT_INC(ips_noroute); +#else + ipstat.ips_noroute++; +#endif goto bad; } @@ -6377,7 +6018,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least) */ NTOHS(ip->ip_len); - NTOHS(ip->ip_off); /* XXX: needed? */ + NTOHS(ip->ip_off); /* XXX: needed? */ in_delayed_cksum(m0); HTONS(ip->ip_len); HTONS(ip->ip_off); @@ -6386,9 +6027,8 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, m0->m_pkthdr.csum_flags &= ifp->if_hwassist; if (ntohs(ip->ip_len) <= ifp->if_mtu || - (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || (ifp->if_hwassist & CSUM_FRAGMENT && - ((ip->ip_off & htons(IP_DF)) == 0))) { + ((ip->ip_off & htons(IP_DF)) == 0))) { /* * ip->ip_len = htons(ip->ip_len); * ip->ip_off = htons(ip->ip_off); @@ -6408,7 +6048,6 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, PF_LOCK(); goto done; } - #else /* Copied from ip_output. */ #ifdef IPSEC @@ -6429,25 +6068,28 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clr */ } } else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) { if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clr */ } } if (ntohs(ip->ip_len) <= ifp->if_mtu) { + ip->ip_sum = 0; if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && ifp->if_bridge == NULL) { m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; +#ifdef __FreeBSD__ KMOD_IPSTAT_INC(ips_outhwcsum); - } else { - ip->ip_sum = 0; +#else + ipstat.ips_outhwcsum++; +#endif + } else ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); - } /* Update relevant hardware checksum stats for TCP/UDP */ if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) KMOD_TCPSTAT_INC(tcps_outhwcsum); @@ -6457,12 +6099,17 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, goto done; } #endif + /* * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. */ - if (ip->ip_off & htons(IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { + if (ip->ip_off & htons(IP_DF)) { +#ifdef __FreeBSD__ KMOD_IPSTAT_INC(ips_cantfrag); +#else + ipstat.ips_cantfrag++; +#endif if (r->rt != PF_DUPTO) { #ifdef __FreeBSD__ /* icmp_error() expects host byte ordering */ @@ -6493,7 +6140,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, error = ip_fragment(m0, ifp, ifp->if_mtu); #endif if (error) { -#ifndef __FreeBSD__ /* ip_fragment does not do m_freem() on FreeBSD */ +#ifndef __FreeBSD__ /* ip_fragment does not do m_freem() on FreeBSD */ m0 = NULL; #endif goto bad; @@ -6519,7 +6166,11 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, } if (error == 0) +#ifdef __FreeBSD__ KMOD_IPSTAT_INC(ips_fragmented); +#else + ipstat.ips_fragmented++; +#endif done: if (r->rt != PF_DUPTO) @@ -6547,13 +6198,16 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; - int error = 0; if (m == NULL || *m == NULL || r == NULL || (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route6: invalid parameters"); +#ifdef __FreeBSD__ if (pd->pf_mtag->routed++ > 3) { +#else + if ((*m)->m_pkthdr.pf.routed++ > 3) { +#endif m0 = *m; *m = NULL; goto bad; @@ -6592,14 +6246,9 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, m0->m_flags |= M_SKIP_FIREWALL; PF_UNLOCK(); ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); - PF_LOCK(); #else - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m0, mtag); - pd->pf_mtag->flags |= PF_TAG_GENERATED; - ip6_output(m0, NULL, NULL, 0, NULL, NULL); + m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; + ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); #endif return; } @@ -6660,7 +6309,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, #ifdef __FreeBSD__ PF_UNLOCK(); #endif - error = nd6_output(ifp, ifp, m0, dst, NULL); + nd6_output(ifp, ifp, m0, dst, NULL); #ifdef __FreeBSD__ PF_LOCK(); #endif @@ -6671,7 +6320,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, PF_UNLOCK(); icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); PF_LOCK(); - } else + } else #else if (r->rt != PF_DUPTO) icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); @@ -6691,7 +6340,6 @@ bad: } #endif /* INET6 */ - #ifdef __FreeBSD__ /* * FreeBSD supports cksum offloads for the following drivers. @@ -6729,10 +6377,10 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { sum = m->m_pkthdr.csum_data; } else { - ip = mtod(m, struct ip *); + ip = mtod(m, struct ip *); sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htonl((u_short)len + - m->m_pkthdr.csum_data + IPPROTO_TCP)); + ip->ip_dst.s_addr, htonl((u_short)len + + m->m_pkthdr.csum_data + IPPROTO_TCP)); } sum ^= 0xffff; ++hw_assist; @@ -6743,14 +6391,14 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { sum = m->m_pkthdr.csum_data; } else { - ip = mtod(m, struct ip *); + ip = mtod(m, struct ip *); sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htonl((u_short)len + - m->m_pkthdr.csum_data + IPPROTO_UDP)); + ip->ip_dst.s_addr, htonl((u_short)len + + m->m_pkthdr.csum_data + IPPROTO_UDP)); } sum ^= 0xffff; ++hw_assist; - } + } break; case IPPROTO_ICMP: #ifdef INET6 @@ -6827,6 +6475,7 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a return (0); } #else /* !__FreeBSD__ */ + /* * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag * off is the offset where the protocol header starts @@ -6919,7 +6568,37 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, m->m_pkthdr.csum_flags |= flag_ok; return (0); } -#endif /* __FreeBSD__ */ +#endif + +#ifndef __FreeBSD__ +struct pf_divert * +pf_find_divert(struct mbuf *m) +{ + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) + return (NULL); + + return ((struct pf_divert *)(mtag + 1)); +} + +struct pf_divert * +pf_get_divert(struct mbuf *m) +{ + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { + mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), + M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, sizeof(struct pf_divert)); + m_tag_prepend(m, mtag); + } + + return ((struct pf_divert *)(mtag + 1)); +} +#endif #ifdef INET int @@ -6934,8 +6613,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0; - struct ip *h = NULL; /* make the compiler happy */ +#ifdef __FreeBSD__ + struct ip *h = NULL; + struct m_tag *ipfwtag; + struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; +#else + struct ip *h; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; +#endif struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; @@ -6943,44 +6628,32 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, #ifdef __FreeBSD__ PF_LOCK(); -#endif - if (!pf_status.running) -#ifdef __FreeBSD__ + if (!V_pf_status.running) { PF_UNLOCK(); -#endif return (PF_PASS); -#ifdef __FreeBSD__ } +#else + if (!pf_status.running) + return (PF_PASS); #endif memset(&pd, 0, sizeof(pd)); - if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { #ifdef __FreeBSD__ + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { PF_UNLOCK(); -#endif DPFPRINTF(PF_DEBUG_URGENT, ("pf_test: pf_get_mtag returned NULL\n")); return (PF_DROP); } -#ifdef __FreeBSD__ - if (m->m_flags & M_SKIP_FIREWALL) { - PF_UNLOCK(); - return (PF_PASS); - } -#else - if (pd.pf_mtag->flags & PF_TAG_GENERATED) - return (PF_PASS); #endif - -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ -#else +#ifndef __FreeBSD__ if (ifp->if_type == IFT_CARP && ifp->if_carpdev) - ifp = ifp->if_carpdev; + kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; + else #endif + kif = (struct pfi_kif *)ifp->if_pf_kif; - kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { #ifdef __FreeBSD__ PF_UNLOCK(); @@ -6989,12 +6662,15 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } - if (kif->pfik_flags & PFI_IFLAG_SKIP) { + if (kif->pfik_flags & PFI_IFLAG_SKIP) #ifdef __FreeBSD__ + { PF_UNLOCK(); #endif return (PF_PASS); +#ifdef __FreeBSD__ } +#endif #ifdef __FreeBSD__ M_ASSERTPKTHDR(m); @@ -7003,7 +6679,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, if ((m->m_flags & M_PKTHDR) == 0) panic("non-M_PKTHDR is passed to pf_test"); #endif /* DIAGNOSTIC */ -#endif /* __FreeBSD__ */ +#endif if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; @@ -7012,12 +6688,36 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, goto done; } +#ifdef __FreeBSD__ + if (m->m_flags & M_SKIP_FIREWALL) { + PF_UNLOCK(); + return (PF_PASS); + } +#else + if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) + return (PF_PASS); +#endif + +#ifdef __FreeBSD__ + if (ip_divert_ptr != NULL && + ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { + struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); + if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) { + pd.pf_mtag->flags |= PF_PACKET_LOOPED; + m_tag_delete(m, ipfwtag); + } + if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { + m->m_flags |= M_FASTFWD_OURS; + pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; + } + } else +#endif /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } - m = *m0; + m = *m0; /* pf_normalize messes with m0 */ h = mtod(m, struct ip *); off = h->ip_hl << 2; @@ -7030,9 +6730,13 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, pd.src = (struct pf_addr *)&h->ip_src; pd.dst = (struct pf_addr *)&h->ip_dst; - PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET); + pd.sport = pd.dport = NULL; pd.ip_sum = &h->ip_sum; + pd.proto_sum = NULL; pd.proto = h->ip_p; + pd.dir = dir; + pd.sidx = (dir == PF_IN) ? 0 : 1; + pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET; pd.tos = h->ip_tos; pd.tot_len = ntohs(h->ip_len); @@ -7056,12 +6760,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, - ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) { - REASON_SET(&reason, PFRES_PROTCKSUM); - action = PF_DROP; - goto done; - } pd.p_len = pd.tot_len - off - (th.th_off << 2); if ((th.th_flags & TH_ACK) && pd.p_len == 0) pqid = 1; @@ -7071,18 +6769,23 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_tcp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_tcp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); #endif break; @@ -7097,12 +6800,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, - off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_PROTCKSUM); - goto done; - } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { @@ -7112,18 +6809,23 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_udp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_udp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); #endif break; @@ -7138,47 +6840,60 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, - ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_PROTCKSUM); - goto done; - } action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_icmp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL); + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_icmp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); #endif break; } +#ifdef INET6 + case IPPROTO_ICMPV6: { + action = PF_DROP; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping IPv4 packet with ICMPv6 payload\n")); + goto done; + } +#endif + default: - action = pf_test_state_other(&s, dir, kif, &pd); + action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_other(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, NULL); + action = pf_test_rule(&r, &s, dir, kif, m, off, h, + &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_other(&r, &s, dir, kif, m, off, h, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); #endif break; @@ -7195,17 +6910,37 @@ done: } if ((s && s->tag) || r->rtableid) - pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); +#ifdef __FreeBSD__ + pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag); +#else + pf_tag_packet(m, s ? s->tag : 0, r->rtableid); +#endif + + if (dir == PF_IN && s && s->key[PF_SK_STACK]) +#ifdef __FreeBSD__ + pd.pf_mtag->statekey = s->key[PF_SK_STACK]; +#else + m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; +#endif #ifdef ALTQ if (action == PF_PASS && r->qid) { +#ifdef __FreeBSD__ if (pqid || (pd.tos & IPTOS_LOWDELAY)) pd.pf_mtag->qid = r->pqid; else pd.pf_mtag->qid = r->qid; /* add hints for ecn */ - pd.pf_mtag->af = AF_INET; pd.pf_mtag->hdr = h; + +#else + if (pqid || (pd.tos & IPTOS_LOWDELAY)) + m->m_pkthdr.pf.qid = r->pqid; + else + m->m_pkthdr.pf.qid = r->qid; + /* add hints for ecn */ + m->m_pkthdr.pf.hdr = h; +#endif } #endif /* ALTQ */ @@ -7219,7 +6954,55 @@ done: (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) - pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; +#ifdef __FreeBSD__ + m->m_flags |= M_SKIP_FIREWALL; +#else + m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; +#endif + +#ifdef __FreeBSD__ + if (action == PF_PASS && r->divert.port && + ip_divert_ptr != NULL && !PACKET_LOOPED()) { + + ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0, + sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); + if (ipfwtag != NULL) { + ((struct ipfw_rule_ref *)(ipfwtag+1))->info = r->divert.port; + ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir; + + m_tag_prepend(m, ipfwtag); + + PF_UNLOCK(); + + if (m->m_flags & M_FASTFWD_OURS) { + pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT; + m->m_flags &= ~M_FASTFWD_OURS; + } + + ip_divert_ptr(*m0, + dir == PF_IN ? DIR_IN : DIR_OUT); + *m0 = NULL; + return (action); + } else { + /* XXX: ipfw has the same behaviour! */ + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate divert tag\n")); + } + } +#else + if (dir == PF_IN && action == PF_PASS && r->divert.port) { + struct pf_divert *divert; + + if ((divert = pf_get_divert(m))) { + m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; + divert->port = r->divert.port; + divert->addr.ipv4 = r->divert.addr.v4; + } + } +#endif if (log) { struct pf_rule *lr; @@ -7263,53 +7046,44 @@ done: } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; - if (nr != NULL) { - struct pf_addr *x; - /* - * XXX: we need to make sure that the addresses - * passed to pfr_update_stats() are the same than - * the addresses used during matching (pfr_match) - */ - if (r == &pf_default_rule) { - tr = nr; - x = (s == NULL || s->direction == dir) ? - &pd.baddr : &pd.naddr; - } else - x = (s == NULL || s->direction == dir) ? - &pd.naddr : &pd.baddr; - if (x == &pd.baddr || s == NULL) { - /* we need to change the address */ - if (dir == PF_OUT) - pd.src = x; - else - pd.dst = x; - } - } +#ifdef __FreeBSD__ + if (nr != NULL && r == &V_pf_default_rule) +#else + if (nr != NULL && r == &pf_default_rule) +#endif + tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || - s->direction == dir) ? pd.src : pd.dst, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->src.neg); + pfr_update_stats(tr->src.addr.p.tbl, + (s == NULL) ? pd.src : + &s->key[(s->direction == PF_IN)]-> + addr[(s->direction == PF_OUT)], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || - s->direction == dir) ? pd.dst : pd.src, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->dst.neg); + pfr_update_stats(tr->dst.addr.p.tbl, + (s == NULL) ? pd.dst : + &s->key[(s->direction == PF_IN)]-> + addr[(s->direction == PF_IN)], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->dst.neg); } - - if (action == PF_SYNPROXY_DROP) { + switch (action) { + case PF_SYNPROXY_DROP: m_freem(*m0); + case PF_DEFER: *m0 = NULL; action = PF_PASS; - } else if (r->rt) + break; + default: /* pf_route can free the mbuf causing *m0 to become NULL */ - pf_route(m0, r, dir, ifp, s, &pd); - + if (r->rt) + pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); + break; + } #ifdef __FreeBSD__ PF_UNLOCK(); #endif - return (action); } #endif /* INET */ @@ -7327,8 +7101,13 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0, *n = NULL; +#ifdef __FreeBSD__ + struct ip6_hdr *h = NULL; + struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; +#else struct ip6_hdr *h; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; +#endif struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; @@ -7336,38 +7115,31 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, #ifdef __FreeBSD__ PF_LOCK(); -#endif - - if (!pf_status.running) -#ifdef __FreeBSD__ - { + if (!V_pf_status.running) { PF_UNLOCK(); -#endif return (PF_PASS); -#ifdef __FreeBSD__ } +#else + if (!pf_status.running) + return (PF_PASS); #endif memset(&pd, 0, sizeof(pd)); - if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { #ifdef __FreeBSD__ + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { PF_UNLOCK(); -#endif DPFPRINTF(PF_DEBUG_URGENT, - ("pf_test6: pf_get_mtag returned NULL\n")); + ("pf_test: pf_get_mtag returned NULL\n")); return (PF_DROP); } - if (pd.pf_mtag->flags & PF_TAG_GENERATED) - return (PF_PASS); - -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ -#else +#endif +#ifndef __FreeBSD__ if (ifp->if_type == IFT_CARP && ifp->if_carpdev) - ifp = ifp->if_carpdev; + kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; + else #endif + kif = (struct pfi_kif *)ifp->if_pf_kif; - kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { #ifdef __FreeBSD__ PF_UNLOCK(); @@ -7376,12 +7148,15 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } - if (kif->pfik_flags & PFI_IFLAG_SKIP) { + if (kif->pfik_flags & PFI_IFLAG_SKIP) #ifdef __FreeBSD__ + { PF_UNLOCK(); #endif return (PF_PASS); +#ifdef __FreeBSD__ } +#endif #ifdef __FreeBSD__ M_ASSERTPKTHDR(m); @@ -7392,10 +7167,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, #endif /* DIAGNOSTIC */ #endif -#ifdef __FreeBSD__ - h = NULL; /* make the compiler happy */ -#endif - if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); @@ -7403,12 +7174,19 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, goto done; } +#ifdef __FreeBSD__ + if (pd.pf_mtag->flags & PF_TAG_GENERATED) +#else + if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) +#endif + return (PF_PASS); + /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } - m = *m0; + m = *m0; /* pf_normalize messes with m0 */ h = mtod(m, struct ip6_hdr *); #if 1 @@ -7425,8 +7203,12 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; - PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); + pd.sport = pd.dport = NULL; pd.ip_sum = NULL; + pd.proto_sum = NULL; + pd.dir = dir; + pd.sidx = (dir == PF_IN) ? 0 : 1; + pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET6; pd.tos = 0; pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); @@ -7470,7 +7252,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = 1; goto done; } - /* fallthrough */ + /* FALLTHROUGH */ } case IPPROTO_AH: case IPPROTO_HOPOPTS: @@ -7515,13 +7297,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(n, off, - ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), - IPPROTO_TCP, AF_INET6)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_PROTCKSUM); - goto done; - } pd.p_len = pd.tot_len - off - (th.th_off << 2); action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); if (action == PF_DROP) @@ -7529,18 +7304,23 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_tcp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_tcp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); #endif break; @@ -7555,13 +7335,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n, - off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), - IPPROTO_UDP, AF_INET6)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_PROTCKSUM); - goto done; - } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { @@ -7571,23 +7344,35 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_udp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_udp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); #endif break; } + case IPPROTO_ICMP: { + action = PF_DROP; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping IPv6 packet with ICMPv4 payload\n")); + goto done; + } + case IPPROTO_ICMPV6: { struct icmp6_hdr ih; @@ -7597,54 +7382,62 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(n, off, - ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), - IPPROTO_ICMPV6, AF_INET6)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_PROTCKSUM); - goto done; - } action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_icmp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL); + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_icmp(&r, &s, dir, kif, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); #endif break; } default: - action = pf_test_state_other(&s, dir, kif, &pd); + action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { -#if NPFSYNC +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); +#else pfsync_update_state(s); +#endif #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) #ifdef __FreeBSD__ - action = pf_test_other(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, NULL); + action = pf_test_rule(&r, &s, dir, kif, m, off, h, + &pd, &a, &ruleset, NULL, inp); #else - action = pf_test_other(&r, &s, dir, kif, m, off, h, + action = pf_test_rule(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); #endif break; } done: + if (n != m) { + m_freem(n); + n = NULL; + } + /* handle dangerous IPv6 extension headers. */ if (action == PF_PASS && rh_cnt && !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { @@ -7656,17 +7449,36 @@ done: } if ((s && s->tag) || r->rtableid) - pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); +#ifdef __FreeBSD__ + pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag); +#else + pf_tag_packet(m, s ? s->tag : 0, r->rtableid); +#endif + + if (dir == PF_IN && s && s->key[PF_SK_STACK]) +#ifdef __FreeBSD__ + pd.pf_mtag->statekey = s->key[PF_SK_STACK]; +#else + m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; +#endif #ifdef ALTQ if (action == PF_PASS && r->qid) { +#ifdef __FreeBSD__ if (pd.tos & IPTOS_LOWDELAY) pd.pf_mtag->qid = r->pqid; else pd.pf_mtag->qid = r->qid; /* add hints for ecn */ - pd.pf_mtag->af = AF_INET6; pd.pf_mtag->hdr = h; +#else + if (pd.tos & IPTOS_LOWDELAY) + m->m_pkthdr.pf.qid = r->pqid; + else + m->m_pkthdr.pf.qid = r->qid; + /* add hints for ecn */ + m->m_pkthdr.pf.hdr = h; +#endif } #endif /* ALTQ */ @@ -7675,7 +7487,27 @@ done: (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) - pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; +#ifdef __FreeBSD__ + m->m_flags |= M_SKIP_FIREWALL; +#else + m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; +#endif + +#ifdef __FreeBSD__ + /* XXX: Anybody working on it?! */ + if (r->divert.port) + printf("pf: divert(9) is not supported for IPv6\n"); +#else + if (dir == PF_IN && action == PF_PASS && r->divert.port) { + struct pf_divert *divert; + + if ((divert = pf_get_divert(m))) { + m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; + divert->port = r->divert.port; + divert->addr.ipv6 = r->divert.addr.v6; + } + } +#endif if (log) { struct pf_rule *lr; @@ -7719,48 +7551,39 @@ done: } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; - if (nr != NULL) { - struct pf_addr *x; - /* - * XXX: we need to make sure that the addresses - * passed to pfr_update_stats() are the same than - * the addresses used during matching (pfr_match) - */ - if (r == &pf_default_rule) { - tr = nr; - x = (s == NULL || s->direction == dir) ? - &pd.baddr : &pd.naddr; - } else { - x = (s == NULL || s->direction == dir) ? - &pd.naddr : &pd.baddr; - } - if (x == &pd.baddr || s == NULL) { - if (dir == PF_OUT) - pd.src = x; - else - pd.dst = x; - } - } +#ifdef __FreeBSD__ + if (nr != NULL && r == &V_pf_default_rule) +#else + if (nr != NULL && r == &pf_default_rule) +#endif + tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || - s->direction == dir) ? pd.src : pd.dst, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->src.neg); + pfr_update_stats(tr->src.addr.p.tbl, + (s == NULL) ? pd.src : + &s->key[(s->direction == PF_IN)]->addr[0], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || - s->direction == dir) ? pd.dst : pd.src, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->dst.neg); + pfr_update_stats(tr->dst.addr.p.tbl, + (s == NULL) ? pd.dst : + &s->key[(s->direction == PF_IN)]->addr[1], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->dst.neg); } - - if (action == PF_SYNPROXY_DROP) { + switch (action) { + case PF_SYNPROXY_DROP: m_freem(*m0); + case PF_DEFER: *m0 = NULL; action = PF_PASS; - } else if (r->rt) + break; + default: /* pf_route6 can free the mbuf causing *m0 to become NULL */ - pf_route6(m0, r, dir, ifp, s, &pd); + if (r->rt) + pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); + break; + } #ifdef __FreeBSD__ PF_UNLOCK(); @@ -7782,3 +7605,20 @@ pf_check_congestion(struct ifqueue *ifq) return (0); #endif } + +/* + * must be called whenever any addressing information such as + * address, port, protocol has changed + */ +void +pf_pkt_addr_changed(struct mbuf *m) +{ +#ifdef __FreeBSD__ + struct pf_mtag *pf_tag; + + if ((pf_tag = pf_find_mtag(m)) != NULL) + pf_tag->statekey = NULL; +#else + m->m_pkthdr.pf.statekey = NULL; +#endif +} diff --git a/sys/contrib/pf/net/pf_if.c b/sys/contrib/pf/net/pf_if.c index 156fb22..6336c79 100644 --- a/sys/contrib/pf/net/pf_if.c +++ b/sys/contrib/pf/net/pf_if.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_if.c,v 1.46 2006/12/13 09:01:59 itojun Exp $ */ +/* $OpenBSD: pf_if.c,v 1.54 2008/06/14 16:55:28 mk Exp $ */ /* * Copyright 2005 Henning Brauer @@ -54,6 +54,9 @@ __FBSDID("$FreeBSD$"); #include #endif #include +#ifndef __FreeBSD__ +#include +#endif #include #include @@ -73,25 +76,35 @@ __FBSDID("$FreeBSD$"); #include #endif /* INET6 */ -struct pfi_kif *pfi_all = NULL; -struct pfi_statehead pfi_statehead; #ifdef __FreeBSD__ -uma_zone_t pfi_addr_pl; +VNET_DEFINE(struct pfi_kif *, pfi_all); +VNET_DEFINE(uma_zone_t, pfi_addr_pl); +VNET_DEFINE(struct pfi_ifhead, pfi_ifs); +#define V_pfi_ifs VNET(pfi_ifs) +VNET_DEFINE(long, pfi_update); +#define V_pfi_update VNET(pfi_update) +VNET_DEFINE(struct pfr_addr *, pfi_buffer); +#define V_pfi_buffer VNET(pfi_buffer) +VNET_DEFINE(int, pfi_buffer_cnt); +#define V_pfi_buffer_cnt VNET(pfi_buffer_cnt) +VNET_DEFINE(int, pfi_buffer_max); +#define V_pfi_buffer_max VNET(pfi_buffer_max) #else +struct pfi_kif *pfi_all = NULL; struct pool pfi_addr_pl; -#endif struct pfi_ifhead pfi_ifs; long pfi_update = 1; struct pfr_addr *pfi_buffer; int pfi_buffer_cnt; int pfi_buffer_max; +#endif #ifdef __FreeBSD__ -eventhandler_tag pfi_attach_cookie = NULL; -eventhandler_tag pfi_detach_cookie = NULL; -eventhandler_tag pfi_attach_group_cookie = NULL; -eventhandler_tag pfi_change_group_cookie = NULL; -eventhandler_tag pfi_detach_group_cookie = NULL; -eventhandler_tag pfi_ifaddr_event_cookie = NULL; +eventhandler_tag pfi_attach_cookie; +eventhandler_tag pfi_detach_cookie; +eventhandler_tag pfi_attach_group_cookie; +eventhandler_tag pfi_change_group_cookie; +eventhandler_tag pfi_detach_group_cookie; +eventhandler_tag pfi_ifaddr_event_cookie; #endif void pfi_kif_update(struct pfi_kif *); @@ -107,11 +120,10 @@ int pfi_unmask(void *); #ifdef __FreeBSD__ void pfi_attach_ifnet_event(void * __unused, struct ifnet *); void pfi_detach_ifnet_event(void * __unused, struct ifnet *); -void pfi_attach_group_event(void * __unused, struct ifg_group *); -void pfi_change_group_event(void * __unused, char *); -void pfi_detach_group_event(void * __unused, struct ifg_group *); +void pfi_attach_group_event(void *, struct ifg_group *); +void pfi_change_group_event(void *, char *); +void pfi_detach_group_event(void *, struct ifg_group *); void pfi_ifaddr_event(void * __unused, struct ifnet *); - #endif RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); @@ -123,22 +135,31 @@ RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); void pfi_initialize(void) { - +#ifdef __FreeBSD__ + if (V_pfi_all != NULL) /* already initialized */ +#else if (pfi_all != NULL) /* already initialized */ +#endif return; - TAILQ_INIT(&pfi_statehead); #ifndef __FreeBSD__ - pool_init(&pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, 0, 0, + pool_init(&V_pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, 0, 0, "pfiaddrpl", &pool_allocator_nointr); #endif +#ifdef __FreeBSD__ + V_pfi_buffer_max = 64; + V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer), + PFI_MTYPE, M_WAITOK); + + if ((V_pfi_all = pfi_kif_get(IFG_ALL)) == NULL) +#else pfi_buffer_max = 64; pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer), PFI_MTYPE, M_WAITOK); if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL) +#endif panic("pfi_kif_get for pfi_all failed"); - #ifdef __FreeBSD__ struct ifg_group *ifg; struct ifnet *ifp; @@ -155,11 +176,11 @@ pfi_initialize(void) pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event, pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); pfi_attach_group_cookie = EVENTHANDLER_REGISTER(group_attach_event, - pfi_attach_group_event, NULL, EVENTHANDLER_PRI_ANY); + pfi_attach_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_change_group_cookie = EVENTHANDLER_REGISTER(group_change_event, - pfi_change_group_event, NULL, EVENTHANDLER_PRI_ANY); + pfi_change_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_detach_group_cookie = EVENTHANDLER_REGISTER(group_detach_event, - pfi_detach_group_event, NULL, EVENTHANDLER_PRI_ANY); + pfi_detach_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event, pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); #endif @@ -180,18 +201,18 @@ pfi_cleanup(void) EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie); PF_LOCK(); - pfi_all = NULL; - while ((p = RB_MIN(pfi_ifhead, &pfi_ifs))) { + V_pfi_all = NULL; + while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) { if (p->pfik_rules || p->pfik_states) { printf("pfi_cleanup: dangling refs for %s\n", p->pfik_name); } - RB_REMOVE(pfi_ifhead, &pfi_ifs, p); + RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p); free(p, PFI_MTYPE); } - free(pfi_buffer, PFI_MTYPE); + free(V_pfi_buffer, PFI_MTYPE); } #endif @@ -203,18 +224,21 @@ pfi_kif_get(const char *kif_name) bzero(&s, sizeof(s)); strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name)); +#ifdef __FreeBSD__ + if ((kif = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s)) != NULL) +#else if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL) +#endif return (kif); /* create new one */ #ifdef __FreeBSD__ - if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) + if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL) #else - if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT)) == NULL) + if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT|M_ZERO)) == NULL) #endif return (NULL); - bzero(kif, sizeof(*kif)); strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name)); #ifdef __FreeBSD__ /* @@ -230,7 +254,12 @@ pfi_kif_get(const char *kif_name) #endif TAILQ_INIT(&kif->pfik_dynaddrs); +#ifdef __FreeBSD__ + RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif); +#else RB_INSERT(pfi_ifhead, &pfi_ifs, kif); +#endif + return (kif); } @@ -242,8 +271,7 @@ pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what) kif->pfik_rules++; break; case PFI_KIF_REF_STATE: - if (!kif->pfik_states++) - TAILQ_INSERT_TAIL(&pfi_statehead, kif, pfik_w_states); + kif->pfik_states++; break; default: panic("pfi_kif_ref with unknown type"); @@ -271,20 +299,27 @@ pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what) printf("pfi_kif_unref: state refcount <= 0\n"); return; } - if (!--kif->pfik_states) - TAILQ_REMOVE(&pfi_statehead, kif, pfik_w_states); + kif->pfik_states--; break; default: panic("pfi_kif_unref with unknown type"); } +#ifdef __FreeBSD__ + if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all) +#else if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all) +#endif return; if (kif->pfik_rules || kif->pfik_states) return; +#ifdef __FreeBSD__ + RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif); +#else RB_REMOVE(pfi_ifhead, &pfi_ifs, kif); +#endif free(kif, PFI_MTYPE); } @@ -312,7 +347,11 @@ pfi_attach_ifnet(struct ifnet *ifp) pfi_initialize(); s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif if ((kif = pfi_kif_get(ifp->if_xname)) == NULL) panic("pfi_kif_get failed"); @@ -341,7 +380,11 @@ pfi_detach_ifnet(struct ifnet *ifp) return; s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif #ifndef __FreeBSD__ hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie); #endif @@ -361,7 +404,11 @@ pfi_attach_ifgroup(struct ifg_group *ifg) pfi_initialize(); s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL) panic("pfi_kif_get failed"); @@ -381,7 +428,11 @@ pfi_detach_ifgroup(struct ifg_group *ifg) return; s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif kif->pfik_group = NULL; ifg->ifg_pf_kif = NULL; @@ -396,7 +447,11 @@ pfi_group_change(const char *group) int s; s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif if ((kif = pfi_kif_get(group)) == NULL) panic("pfi_kif_get failed"); @@ -450,9 +505,14 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) if (aw->type != PF_ADDR_DYNIFTL) return (0); - if ((dyn = pool_get(&pfi_addr_pl, PR_NOWAIT)) == NULL) +#ifdef __FreeBSD__ + /* XXX: revisit! */ + if ((dyn = pool_get(&V_pfi_addr_pl, PR_WAITOK | PR_ZERO)) +#else + if ((dyn = pool_get(&pfi_addr_pl, PR_WAITOK | PR_LIMITFAIL | PR_ZERO)) +#endif + == NULL) return (1); - bzero(dyn, sizeof(*dyn)); s = splsoftnet(); if (!strcmp(aw->v.ifname, "self")) @@ -485,7 +545,7 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) goto _bad; } - if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) { + if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname, 1)) == NULL) { rv = 1; goto _bad; } @@ -507,7 +567,11 @@ _bad: pf_remove_if_empty_ruleset(ruleset); if (dyn->pfid_kif != NULL) pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); +#ifdef __FreeBSD__ + pool_put(&V_pfi_addr_pl, dyn); +#else pool_put(&pfi_addr_pl, dyn); +#endif splx(s); return (rv); } @@ -541,10 +605,18 @@ pfi_dynaddr_update(struct pfi_dynaddr *dyn) kif = dyn->pfid_kif; kt = dyn->pfid_kt; +#ifdef __FreeBSD__ + if (kt->pfrkt_larg != V_pfi_update) { +#else if (kt->pfrkt_larg != pfi_update) { +#endif /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); +#ifdef __FreeBSD__ + kt->pfrkt_larg = V_pfi_update; +#else kt->pfrkt_larg = pfi_update; +#endif } pfr_dynaddr_update(kt, dyn); } @@ -555,7 +627,11 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) int e, size2 = 0; struct ifg_member *ifgm; +#ifdef __FreeBSD__ + V_pfi_buffer_cnt = 0; +#else pfi_buffer_cnt = 0; +#endif if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); @@ -563,10 +639,17 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) pfi_instance_add(ifgm->ifgm_ifp, net, flags); +#ifdef __FreeBSD__ + if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2, + NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) + printf("pfi_table_update: cannot set %d new addresses " + "into table %s: %d\n", V_pfi_buffer_cnt, kt->pfrkt_name, e); +#else if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2, NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) printf("pfi_table_update: cannot set %d new addresses " "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e); +#endif } void @@ -587,9 +670,9 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) #ifdef __FreeBSD__ /* * XXX: For point-to-point interfaces, (ifname:0) and IPv4, - * jump over addresses without a proper route to work - * around a problem with ppp not fully removing the - * address used during IPCP. + * jump over addresses without a proper route to work + * around a problem with ppp not fully removing the + * address used during IPCP. */ if ((ifp->if_flags & IFF_POINTOPOINT) && !(ia->ifa_flags & IFA_ROUTE) && @@ -644,15 +727,24 @@ pfi_address_add(struct sockaddr *sa, int af, int net) struct pfr_addr *p; int i; +#ifdef __FreeBSD__ + if (V_pfi_buffer_cnt >= V_pfi_buffer_max) { + int new_max = V_pfi_buffer_max * 2; +#else if (pfi_buffer_cnt >= pfi_buffer_max) { int new_max = pfi_buffer_max * 2; +#endif if (new_max > PFI_BUFFER_MAX) { printf("pfi_address_add: address buffer full (%d/%d)\n", +#ifdef __FreeBSD__ + V_pfi_buffer_cnt, PFI_BUFFER_MAX); +#else pfi_buffer_cnt, PFI_BUFFER_MAX); +#endif return; } - p = malloc(new_max * sizeof(*pfi_buffer), PFI_MTYPE, + p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE, #ifdef __FreeBSD__ M_NOWAIT); #else @@ -660,18 +752,34 @@ pfi_address_add(struct sockaddr *sa, int af, int net) #endif if (p == NULL) { printf("pfi_address_add: no memory to grow buffer " +#ifdef __FreeBSD__ + "(%d/%d)\n", V_pfi_buffer_cnt, PFI_BUFFER_MAX); +#else "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX); +#endif return; } - memcpy(p, pfi_buffer, pfi_buffer_max * sizeof(*pfi_buffer)); +#ifdef __FreeBSD__ + memcpy(V_pfi_buffer, p, V_pfi_buffer_cnt * sizeof(*V_pfi_buffer)); + /* no need to zero buffer */ + free(V_pfi_buffer, PFI_MTYPE); + V_pfi_buffer = p; + V_pfi_buffer_max = new_max; +#else + memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof(*pfi_buffer)); /* no need to zero buffer */ free(pfi_buffer, PFI_MTYPE); pfi_buffer = p; pfi_buffer_max = new_max; +#endif } if (af == AF_INET && net > 32) net = 128; +#ifdef __FreeBSD__ + p = V_pfi_buffer + V_pfi_buffer_cnt++; +#else p = pfi_buffer + pfi_buffer_cnt++; +#endif bzero(p, sizeof(*p)); p->pfra_af = af; p->pfra_net = net; @@ -704,7 +812,11 @@ pfi_dynaddr_remove(struct pf_addr_wrap *aw) aw->p.dyn->pfid_kif = NULL; pfr_detach_table(aw->p.dyn->pfid_kt); aw->p.dyn->pfid_kt = NULL; +#ifdef __FreeBSD__ + pool_put(&V_pfi_addr_pl, aw->p.dyn); +#else pool_put(&pfi_addr_pl, aw->p.dyn); +#endif aw->p.dyn = NULL; splx(s); } @@ -725,7 +837,11 @@ pfi_kifaddr_update(void *v) struct pfi_kif *kif = (struct pfi_kif *)v; s = splsoftnet(); +#ifdef __FreeBSD__ + V_pfi_update++; +#else pfi_update++; +#endif pfi_kif_update(kif); splx(s); } @@ -737,49 +853,61 @@ pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) } void -pfi_fill_oldstatus(struct pf_status *pfs) +pfi_update_status(const char *name, struct pf_status *pfs) { struct pfi_kif *p; - struct pfi_kif_cmp key; + struct pfi_kif_cmp key; + struct ifg_member p_member, *ifgm; + TAILQ_HEAD(, ifg_member) ifg_members; int i, j, k, s; - strlcpy(key.pfik_name, pfs->ifname, sizeof(key.pfik_name)); + strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); s = splsoftnet(); +#ifdef __FreeBSD__ + p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key); +#else p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); +#endif if (p == NULL) { splx(s); return; } - bzero(pfs->pcounters, sizeof(pfs->pcounters)); - bzero(pfs->bcounters, sizeof(pfs->bcounters)); - for (i = 0; i < 2; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) { - pfs->pcounters[i][j][k] = - p->pfik_packets[i][j][k]; - pfs->bcounters[i][j] += - p->pfik_bytes[i][j][k]; - } - splx(s); -} - -int -pfi_clr_istats(const char *name) -{ - struct pfi_kif *p; - int s; + if (p->pfik_group != NULL) { + bcopy(&p->pfik_group->ifg_members, &ifg_members, + sizeof(ifg_members)); + } else { + /* build a temporary list for p only */ + bzero(&p_member, sizeof(p_member)); + p_member.ifgm_ifp = p->pfik_ifp; + TAILQ_INIT(&ifg_members); + TAILQ_INSERT_TAIL(&ifg_members, &p_member, ifgm_next); + } + if (pfs) { + bzero(pfs->pcounters, sizeof(pfs->pcounters)); + bzero(pfs->bcounters, sizeof(pfs->bcounters)); + } + TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) { + if (ifgm->ifgm_ifp == NULL) + continue; + p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif; - s = splsoftnet(); - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p)) + /* just clear statistics */ + if (pfs == NULL) { + bzero(p->pfik_packets, sizeof(p->pfik_packets)); + bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); + p->pfik_tzero = time_second; continue; - bzero(p->pfik_packets, sizeof(p->pfik_packets)); - bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); - p->pfik_tzero = time_second; + } + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) { + pfs->pcounters[i][j][k] += + p->pfik_packets[i][j][k]; + pfs->bcounters[i][j] += + p->pfik_bytes[i][j][k]; + } } splx(s); - - return (0); } int @@ -792,8 +920,13 @@ pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) #endif s = splsoftnet(); +#ifdef __FreeBSD__ + for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) { + nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); +#else for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) { nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); +#endif if (pfi_skip_if(name, p)) continue; if (*size > n++) { @@ -810,7 +943,11 @@ pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) splx(s); return (EFAULT); } +#ifdef __FreeBSD__ + nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); +#else nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); +#endif pfi_kif_unref(p, PFI_KIF_REF_RULE); } } @@ -845,7 +982,11 @@ pfi_set_flags(const char *name, int flags) int s; s = splsoftnet(); +#ifdef __FreeBSD__ + RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) { +#else RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { +#endif if (pfi_skip_if(name, p)) continue; p->pfik_flags |= flags; @@ -861,7 +1002,11 @@ pfi_clear_flags(const char *name, int flags) int s; s = splsoftnet(); +#ifdef __FreeBSD__ + RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) { +#else RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { +#endif if (pfi_skip_if(name, p)) continue; p->pfik_flags &= ~flags; @@ -894,55 +1039,73 @@ pfi_unmask(void *addr) void pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp) { + + CURVNET_SET(ifp->if_vnet); PF_LOCK(); pfi_attach_ifnet(ifp); #ifdef ALTQ pf_altq_ifnet_event(ifp, 0); #endif PF_UNLOCK(); + CURVNET_RESTORE(); } void pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp) { + + CURVNET_SET(ifp->if_vnet); PF_LOCK(); pfi_detach_ifnet(ifp); #ifdef ALTQ pf_altq_ifnet_event(ifp, 1); #endif PF_UNLOCK(); + CURVNET_RESTORE(); } void -pfi_attach_group_event(void *arg __unused, struct ifg_group *ifg) +pfi_attach_group_event(void *arg , struct ifg_group *ifg) { + + CURVNET_SET((struct vnet *)arg); PF_LOCK(); pfi_attach_ifgroup(ifg); PF_UNLOCK(); + CURVNET_RESTORE(); } void -pfi_change_group_event(void *arg __unused, char *gname) +pfi_change_group_event(void *arg, char *gname) { + + CURVNET_SET((struct vnet *)arg); PF_LOCK(); pfi_group_change(gname); PF_UNLOCK(); + CURVNET_RESTORE(); } void -pfi_detach_group_event(void *arg __unused, struct ifg_group *ifg) +pfi_detach_group_event(void *arg, struct ifg_group *ifg) { + + CURVNET_SET((struct vnet *)arg); PF_LOCK(); pfi_detach_ifgroup(ifg); PF_UNLOCK(); + CURVNET_RESTORE(); } void pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp) { + + CURVNET_SET(ifp->if_vnet); PF_LOCK(); if (ifp && ifp->if_pf_kif) pfi_kifaddr_update(ifp->if_pf_kif); PF_UNLOCK(); + CURVNET_RESTORE(); } #endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index c41fcc6..2ca1630 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.175 2007/02/26 22:47:43 deraadt Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -45,27 +45,26 @@ __FBSDID("$FreeBSD$"); #include "opt_pf.h" #ifdef DEV_BPF -#define NBPFILTER DEV_BPF +#define NBPFILTER DEV_BPF #else -#define NBPFILTER 0 +#define NBPFILTER 0 #endif #ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG +#define NPFLOG DEV_PFLOG #else -#define NPFLOG 0 +#define NPFLOG 0 #endif #ifdef DEV_PFSYNC -#define NPFSYNC DEV_PFSYNC +#define NPFSYNC DEV_PFSYNC #else -#define NPFSYNC 0 +#define NPFSYNC 0 #endif #else -#include "bpfilter.h" -#include "pflog.h" #include "pfsync.h" +#include "pflog.h" #endif #include @@ -77,8 +76,9 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #ifdef __FreeBSD__ +#include +#include #include #include #include @@ -100,6 +100,7 @@ __FBSDID("$FreeBSD$"); #ifdef __FreeBSD__ #include #endif +#include #include #include @@ -116,11 +117,11 @@ __FBSDID("$FreeBSD$"); #endif #include -#if NPFSYNC > 0 #include -#endif /* NPFSYNC > 0 */ +#if NPFLOG > 0 #include +#endif /* NPFLOG > 0 */ #ifdef INET6 #include @@ -156,7 +157,7 @@ void pf_empty_pool(struct pf_palist *); #ifdef __FreeBSD__ int pfioctl(struct cdev *, u_long, caddr_t, int, struct thread *); #else -int pfioctl(struct cdev *, u_long, caddr_t, int, struct proc *); +int pfioctl(dev_t, u_long, caddr_t, int, struct proc *); #endif #ifdef ALTQ int pf_begin_altq(u_int32_t *); @@ -171,25 +172,43 @@ int pf_setup_pfsync_matching(struct pf_ruleset *); void pf_hash_rule(MD5_CTX *, struct pf_rule *); void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); int pf_commit_rules(u_int32_t, int, char *); +int pf_addr_setup(struct pf_ruleset *, + struct pf_addr_wrap *, sa_family_t); +void pf_addr_copyout(struct pf_addr_wrap *); + +#define TAGID_MAX 50000 -struct pf_rule pf_default_rule; #ifdef __FreeBSD__ -struct sx pf_consistency_lock; -SX_SYSINIT(pf_consistency_lock, &pf_consistency_lock, "pf_statetbl_lock"); -#else -struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER; +VNET_DEFINE(struct pf_rule, pf_default_rule); +VNET_DEFINE(struct sx, pf_consistency_lock); + +#ifdef ALTQ +static VNET_DEFINE(int, pf_altq_running); +#define V_pf_altq_running VNET(pf_altq_running) #endif + +TAILQ_HEAD(pf_tags, pf_tagname); + +#define V_pf_tags VNET(pf_tags) +VNET_DEFINE(struct pf_tags, pf_tags); +#define V_pf_qids VNET(pf_qids) +VNET_DEFINE(struct pf_tags, pf_qids); + +#else /* !__FreeBSD__ */ +struct pf_rule pf_default_rule; +struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER("pfcnslk"); #ifdef ALTQ static int pf_altq_running; #endif -#define TAGID_MAX 50000 TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); +#endif /* __FreeBSD__ */ #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif + u_int16_t tagname2tag(struct pf_tags *, char *); void tag2tagname(struct pf_tags *, u_int16_t, char *); void tag_unref(struct pf_tags *, u_int16_t); @@ -197,12 +216,15 @@ int pf_rtlabel_add(struct pf_addr_wrap *); void pf_rtlabel_remove(struct pf_addr_wrap *); void pf_rtlabel_copyout(struct pf_addr_wrap *); +#ifdef __FreeBSD__ +#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x +#else #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x - +#endif #ifdef __FreeBSD__ -static struct cdev *pf_dev; - +struct cdev *pf_dev; + /* * XXX - These are new and need to be checked when moveing to a new version */ @@ -218,22 +240,22 @@ static void pf_clear_srcnodes(void); */ #ifdef INET static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, struct inpcb *inp); + int dir, struct inpcb *inp); static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, struct inpcb *inp); + int dir, struct inpcb *inp); #endif #ifdef INET6 static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, struct inpcb *inp); + int dir, struct inpcb *inp); static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, - int dir, struct inpcb *inp); + int dir, struct inpcb *inp); #endif - -static int hook_pf(void); -static int dehook_pf(void); -static int shutdown_pf(void); -static int pf_load(void); -static int pf_unload(void); + +static int hook_pf(void); +static int dehook_pf(void); +static int shutdown_pf(void); +static int pf_load(void); +static int pf_unload(void); static struct cdevsw pf_cdevsw = { .d_ioctl = pfioctl, @@ -241,78 +263,97 @@ static struct cdevsw pf_cdevsw = { .d_version = D_VERSION, }; -static volatile int pf_pfil_hooked = 0; -int pf_end_threads = 0; -struct mtx pf_task_mtx; -pflog_packet_t *pflog_packet_ptr = NULL; - -int debug_pfugidhack = 0; -SYSCTL_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW, &debug_pfugidhack, 0, - "Enable/disable pf user/group rules mpsafe hack"); +static volatile VNET_DEFINE(int, pf_pfil_hooked); +#define V_pf_pfil_hooked VNET(pf_pfil_hooked) +VNET_DEFINE(int, pf_end_threads); +VNET_DEFINE(struct mtx, pf_task_mtx); + +/* pfsync */ +pfsync_state_import_t *pfsync_state_import_ptr = NULL; +pfsync_insert_state_t *pfsync_insert_state_ptr = NULL; +pfsync_update_state_t *pfsync_update_state_ptr = NULL; +pfsync_delete_state_t *pfsync_delete_state_ptr = NULL; +pfsync_clear_states_t *pfsync_clear_states_ptr = NULL; +pfsync_state_in_use_t *pfsync_state_in_use_ptr = NULL; +pfsync_defer_t *pfsync_defer_ptr = NULL; +pfsync_up_t *pfsync_up_ptr = NULL; +/* pflow */ +export_pflow_t *export_pflow_ptr = NULL; +/* pflog */ +pflog_packet_t *pflog_packet_ptr = NULL; + +VNET_DEFINE(int, debug_pfugidhack); +SYSCTL_VNET_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW, + &VNET_NAME(debug_pfugidhack), 0, + "Enable/disable pf user/group rules mpsafe hack"); void init_pf_mutex(void) { - mtx_init(&pf_task_mtx, "pf task mtx", NULL, MTX_DEF); + + mtx_init(&V_pf_task_mtx, "pf task mtx", NULL, MTX_DEF); } void destroy_pf_mutex(void) { - mtx_destroy(&pf_task_mtx); -} + mtx_destroy(&V_pf_task_mtx); +} void init_zone_var(void) { - pf_src_tree_pl = pf_rule_pl = NULL; - pf_state_pl = pf_altq_pl = pf_pooladdr_pl = NULL; - pf_frent_pl = pf_frag_pl = pf_cache_pl = pf_cent_pl = NULL; - pf_state_scrub_pl = NULL; - pfr_ktable_pl = pfr_kentry_pl = NULL; + V_pf_src_tree_pl = V_pf_rule_pl = NULL; + V_pf_state_pl = V_pf_state_key_pl = V_pf_state_item_pl = NULL; + V_pf_altq_pl = V_pf_pooladdr_pl = NULL; + V_pf_frent_pl = V_pf_frag_pl = V_pf_cache_pl = V_pf_cent_pl = NULL; + V_pf_state_scrub_pl = NULL; + V_pfr_ktable_pl = V_pfr_kentry_pl = NULL; } void cleanup_pf_zone(void) { - UMA_DESTROY(pf_src_tree_pl); - UMA_DESTROY(pf_rule_pl); - UMA_DESTROY(pf_state_pl); - UMA_DESTROY(pf_altq_pl); - UMA_DESTROY(pf_pooladdr_pl); - UMA_DESTROY(pf_frent_pl); - UMA_DESTROY(pf_frag_pl); - UMA_DESTROY(pf_cache_pl); - UMA_DESTROY(pf_cent_pl); - UMA_DESTROY(pfr_ktable_pl); - UMA_DESTROY(pfr_kentry_pl2); - UMA_DESTROY(pfr_kentry_pl); - UMA_DESTROY(pf_state_scrub_pl); - UMA_DESTROY(pfi_addr_pl); + UMA_DESTROY(V_pf_src_tree_pl); + UMA_DESTROY(V_pf_rule_pl); + UMA_DESTROY(V_pf_state_pl); + UMA_DESTROY(V_pf_state_key_pl); + UMA_DESTROY(V_pf_state_item_pl); + UMA_DESTROY(V_pf_altq_pl); + UMA_DESTROY(V_pf_pooladdr_pl); + UMA_DESTROY(V_pf_frent_pl); + UMA_DESTROY(V_pf_frag_pl); + UMA_DESTROY(V_pf_cache_pl); + UMA_DESTROY(V_pf_cent_pl); + UMA_DESTROY(V_pfr_ktable_pl); + UMA_DESTROY(V_pfr_kentry_pl); + UMA_DESTROY(V_pf_state_scrub_pl); + UMA_DESTROY(V_pfi_addr_pl); } int pfattach(void) { - u_int32_t *my_timeout = pf_default_rule.timeout; + u_int32_t *my_timeout = V_pf_default_rule.timeout; int error = 1; do { - UMA_CREATE(pf_src_tree_pl,struct pf_src_node, "pfsrctrpl"); - UMA_CREATE(pf_rule_pl, struct pf_rule, "pfrulepl"); - UMA_CREATE(pf_state_pl, struct pf_state, "pfstatepl"); - UMA_CREATE(pf_altq_pl, struct pf_altq, "pfaltqpl"); - UMA_CREATE(pf_pooladdr_pl, struct pf_pooladdr, "pfpooladdrpl"); - UMA_CREATE(pfr_ktable_pl, struct pfr_ktable, "pfrktable"); - UMA_CREATE(pfr_kentry_pl, struct pfr_kentry, "pfrkentry"); - UMA_CREATE(pfr_kentry_pl2, struct pfr_kentry, "pfrkentry2"); - UMA_CREATE(pf_frent_pl, struct pf_frent, "pffrent"); - UMA_CREATE(pf_frag_pl, struct pf_fragment, "pffrag"); - UMA_CREATE(pf_cache_pl, struct pf_fragment, "pffrcache"); - UMA_CREATE(pf_cent_pl, struct pf_frcache, "pffrcent"); - UMA_CREATE(pf_state_scrub_pl, struct pf_state_scrub, + UMA_CREATE(V_pf_src_tree_pl, struct pf_src_node, "pfsrctrpl"); + UMA_CREATE(V_pf_rule_pl, struct pf_rule, "pfrulepl"); + UMA_CREATE(V_pf_state_pl, struct pf_state, "pfstatepl"); + UMA_CREATE(V_pf_state_key_pl, struct pf_state, "pfstatekeypl"); + UMA_CREATE(V_pf_state_item_pl, struct pf_state, "pfstateitempl"); + UMA_CREATE(V_pf_altq_pl, struct pf_altq, "pfaltqpl"); + UMA_CREATE(V_pf_pooladdr_pl, struct pf_pooladdr, "pfpooladdrpl"); + UMA_CREATE(V_pfr_ktable_pl, struct pfr_ktable, "pfrktable"); + UMA_CREATE(V_pfr_kentry_pl, struct pfr_kentry, "pfrkentry"); + UMA_CREATE(V_pf_frent_pl, struct pf_frent, "pffrent"); + UMA_CREATE(V_pf_frag_pl, struct pf_fragment, "pffrag"); + UMA_CREATE(V_pf_cache_pl, struct pf_fragment, "pffrcache"); + UMA_CREATE(V_pf_cent_pl, struct pf_frcache, "pffrcent"); + UMA_CREATE(V_pf_state_scrub_pl, struct pf_state_scrub, "pfstatescrub"); - UMA_CREATE(pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl"); + UMA_CREATE(V_pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl"); error = 0; } while(0); if (error) { @@ -327,34 +368,35 @@ pfattach(void) return (error); } - pf_pool_limits[PF_LIMIT_STATES].pp = pf_state_pl; - pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; - pf_pool_limits[PF_LIMIT_SRC_NODES].pp = pf_src_tree_pl; - pf_pool_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; - pf_pool_limits[PF_LIMIT_FRAGS].pp = pf_frent_pl; - pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; - pf_pool_limits[PF_LIMIT_TABLES].pp = pfr_ktable_pl; - pf_pool_limits[PF_LIMIT_TABLES].limit = PFR_KTABLE_HIWAT; - pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].pp = pfr_kentry_pl; - pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT; - uma_zone_set_max(pf_pool_limits[PF_LIMIT_STATES].pp, - pf_pool_limits[PF_LIMIT_STATES].limit); - - RB_INIT(&tree_src_tracking); - RB_INIT(&pf_anchors); + V_pf_pool_limits[PF_LIMIT_STATES].pp = V_pf_state_pl; + V_pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; + V_pf_pool_limits[PF_LIMIT_SRC_NODES].pp = V_pf_src_tree_pl; + V_pf_pool_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; + V_pf_pool_limits[PF_LIMIT_FRAGS].pp = V_pf_frent_pl; + V_pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; + V_pf_pool_limits[PF_LIMIT_TABLES].pp = V_pfr_ktable_pl; + V_pf_pool_limits[PF_LIMIT_TABLES].limit = PFR_KTABLE_HIWAT; + V_pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].pp = V_pfr_kentry_pl; + V_pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT; + uma_zone_set_max(V_pf_pool_limits[PF_LIMIT_STATES].pp, + V_pf_pool_limits[PF_LIMIT_STATES].limit); + + RB_INIT(&V_tree_src_tracking); + RB_INIT(&V_pf_anchors); pf_init_ruleset(&pf_main_ruleset); - TAILQ_INIT(&pf_altqs[0]); - TAILQ_INIT(&pf_altqs[1]); - TAILQ_INIT(&pf_pabuf); - pf_altqs_active = &pf_altqs[0]; - pf_altqs_inactive = &pf_altqs[1]; - TAILQ_INIT(&state_list); + + TAILQ_INIT(&V_pf_altqs[0]); + TAILQ_INIT(&V_pf_altqs[1]); + TAILQ_INIT(&V_pf_pabuf); + V_pf_altqs_active = &V_pf_altqs[0]; + V_pf_altqs_inactive = &V_pf_altqs[1]; + TAILQ_INIT(&V_state_list); /* default rule should never be garbage collected */ - pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; - pf_default_rule.action = PF_PASS; - pf_default_rule.nr = -1; - pf_default_rule.rtableid = -1; + V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next; + V_pf_default_rule.action = PF_PASS; + V_pf_default_rule.nr = -1; + V_pf_default_rule.rtableid = -1; /* initialize default timeouts */ my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; @@ -379,20 +421,24 @@ pfattach(void) my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; pf_normalize_init(); - bzero(&pf_status, sizeof(pf_status)); - pf_status.debug = PF_DEBUG_URGENT; - pf_pfil_hooked = 0; + bzero(&V_pf_status, sizeof(V_pf_status)); + V_pf_status.debug = PF_DEBUG_URGENT; + + V_pf_pfil_hooked = 0; /* XXX do our best to avoid a conflict */ - pf_status.hostid = arc4random(); + V_pf_status.hostid = arc4random(); - if (kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pfpurge")) + if (kproc_create(pf_purge_thread, curvnet, NULL, 0, 0, "pfpurge")) return (ENXIO); + m_addr_chg_pf_p = pf_pkt_addr_changed; + return (error); } #else /* !__FreeBSD__ */ + void pfattach(int num) { @@ -404,6 +450,10 @@ pfattach(int num) "pfsrctrpl", NULL); pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl", NULL); + pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0, 0, 0, + "pfstatekeypl", NULL); + pool_init(&pf_state_item_pl, sizeof(struct pf_state_item), 0, 0, 0, + "pfstateitempl", NULL); pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl", &pool_allocator_nointr); pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0, @@ -415,7 +465,7 @@ pfattach(int num) pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp, pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0); - if (ctob(physmem) <= 100*1024*1024) + if (physmem <= atop(100*1024*1024)) pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT_SMALL; @@ -465,32 +515,32 @@ pfattach(int num) pf_status.hostid = arc4random(); /* require process context to purge states, so perform in a thread */ - kproc_create_deferred(pf_thread_create, NULL); + kthread_create_deferred(pf_thread_create, NULL); } void pf_thread_create(void *v) { - if (kproc_create(pf_purge_thread, NULL, NULL, "pfpurge")) + if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge")) panic("pfpurge thread"); } int -pfopen(struct cdev *dev, int flags, int fmt, struct proc *p) +pfopen(dev_t dev, int flags, int fmt, struct proc *p) { - if (dev2unit(dev) >= 1) + if (minor(dev) >= 1) return (ENXIO); return (0); } int -pfclose(struct cdev *dev, int flags, int fmt, struct proc *p) +pfclose(dev_t dev, int flags, int fmt, struct proc *p) { - if (dev2unit(dev) >= 1) + if (minor(dev) >= 1) return (ENXIO); return (0); } -#endif /* __FreeBSD__ */ +#endif struct pf_pool * pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, @@ -557,7 +607,11 @@ pf_empty_pool(struct pf_palist *poola) pf_tbladdr_remove(&empty_pool_pa->addr); pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE); TAILQ_REMOVE(poola, empty_pool_pa, entries); +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, empty_pool_pa); +#else pool_put(&pf_pooladdr_pl, empty_pool_pa); +#endif } } @@ -565,7 +619,7 @@ void pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) { if (rulequeue != NULL) { - if (rule->states <= 0) { + if (rule->states_cur <= 0) { /* * XXX - we need to remove the table *before* detaching * the rule to make sure the table code does not delete @@ -581,7 +635,7 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) rule->nr = -1; } - if (rule->states > 0 || rule->src_nodes > 0 || + if (rule->states_cur > 0 || rule->src_nodes > 0 || rule->entries.tqe_prev != NULL) return; pf_tag_unref(rule->tag); @@ -604,7 +658,11 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE); pf_anchor_remove(rule); pf_empty_pool(&rule->rpool.list); +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, rule); +#else pool_put(&pf_rule_pl, rule); +#endif } u_int16_t @@ -635,11 +693,9 @@ tagname2tag(struct pf_tags *head, char *tagname) return (0); /* allocate and fill new struct pf_tagname */ - tag = (struct pf_tagname *)malloc(sizeof(struct pf_tagname), - M_TEMP, M_NOWAIT); + tag = malloc(sizeof(*tag), M_TEMP, M_NOWAIT|M_ZERO); if (tag == NULL) return (0); - bzero(tag, sizeof(struct pf_tagname)); strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; tag->ref++; @@ -687,13 +743,21 @@ tag_unref(struct pf_tags *head, u_int16_t tag) u_int16_t pf_tagname2tag(char *tagname) { +#ifdef __FreeBSD__ + return (tagname2tag(&V_pf_tags, tagname)); +#else return (tagname2tag(&pf_tags, tagname)); +#endif } void pf_tag2tagname(u_int16_t tagid, char *p) { +#ifdef __FreeBSD__ + tag2tagname(&V_pf_tags, tagid, p); +#else tag2tagname(&pf_tags, tagid, p); +#endif } void @@ -701,7 +765,11 @@ pf_tag_ref(u_int16_t tag) { struct pf_tagname *t; +#ifdef __FreeBSD__ + TAILQ_FOREACH(t, &V_pf_tags, entries) +#else TAILQ_FOREACH(t, &pf_tags, entries) +#endif if (t->tag == tag) break; if (t != NULL) @@ -711,7 +779,11 @@ pf_tag_ref(u_int16_t tag) void pf_tag_unref(u_int16_t tag) { +#ifdef __FreeBSD__ + tag_unref(&V_pf_tags, tag); +#else tag_unref(&pf_tags, tag); +#endif } int @@ -764,19 +836,31 @@ pf_rtlabel_copyout(struct pf_addr_wrap *a) u_int32_t pf_qname2qid(char *qname) { +#ifdef __FreeBSD__ + return ((u_int32_t)tagname2tag(&V_pf_qids, qname)); +#else return ((u_int32_t)tagname2tag(&pf_qids, qname)); +#endif } void pf_qid2qname(u_int32_t qid, char *p) { +#ifdef __FreeBSD__ + tag2tagname(&V_pf_qids, (u_int16_t)qid, p); +#else tag2tagname(&pf_qids, (u_int16_t)qid, p); +#endif } void pf_qid_unref(u_int32_t qid) { +#ifdef __FreeBSD__ + tag_unref(&V_pf_qids, (u_int16_t)qid); +#else tag_unref(&pf_qids, (u_int16_t)qid); +#endif } int @@ -786,24 +870,35 @@ pf_begin_altq(u_int32_t *ticket) int error = 0; /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(pf_altqs_inactive, altq, entries); #ifdef __FreeBSD__ + while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { #else + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0) { #endif /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, altq); +#else pool_put(&pf_altq_pl, altq); +#endif } if (error) return (error); +#ifdef __FreeBSD__ + *ticket = ++V_ticket_altqs_inactive; + V_altqs_inactive_open = 1; +#else *ticket = ++ticket_altqs_inactive; altqs_inactive_open = 1; +#endif return (0); } @@ -813,24 +908,37 @@ pf_rollback_altq(u_int32_t ticket) struct pf_altq *altq; int error = 0; +#ifdef __FreeBSD__ + if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) + return (0); + /* Purge the old altq list */ + while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); + if (altq->qname[0] == 0 && + (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { +#else if (!altqs_inactive_open || ticket != ticket_altqs_inactive) return (0); /* Purge the old altq list */ while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(pf_altqs_inactive, altq, entries); -#ifdef __FreeBSD__ - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else if (altq->qname[0] == 0) { #endif /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, altq); +#else pool_put(&pf_altq_pl, altq); +#endif } +#ifdef __FreeBSD__ + V_altqs_inactive_open = 0; +#else altqs_inactive_open = 0; +#endif return (error); } @@ -841,27 +949,43 @@ pf_commit_altq(u_int32_t ticket) struct pf_altq *altq; int s, err, error = 0; +#ifdef __FreeBSD__ + if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) +#else if (!altqs_inactive_open || ticket != ticket_altqs_inactive) +#endif return (EBUSY); /* swap altqs, keep the old. */ s = splsoftnet(); +#ifdef __FreeBSD__ + old_altqs = V_pf_altqs_active; + V_pf_altqs_active = V_pf_altqs_inactive; + V_pf_altqs_inactive = old_altqs; + V_ticket_altqs_active = V_ticket_altqs_inactive; +#else old_altqs = pf_altqs_active; pf_altqs_active = pf_altqs_inactive; pf_altqs_inactive = old_altqs; ticket_altqs_active = ticket_altqs_inactive; +#endif /* Attach new disciplines */ - TAILQ_FOREACH(altq, pf_altqs_active, entries) { #ifdef __FreeBSD__ - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { + TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { + if (altq->qname[0] == 0 && + (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { #else + TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { #endif /* attach the discipline */ error = altq_pfattach(altq); +#ifdef __FreeBSD__ + if (error == 0 && V_pf_altq_running) +#else if (error == 0 && pf_altq_running) +#endif error = pf_enable_altq(altq); if (error != 0) { splx(s); @@ -871,16 +995,22 @@ pf_commit_altq(u_int32_t ticket) } /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(pf_altqs_inactive, altq, entries); #ifdef __FreeBSD__ + while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { #else + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0) { #endif /* detach and destroy the discipline */ +#ifdef __FreeBSD__ + if (V_pf_altq_running) +#else if (pf_altq_running) +#endif error = pf_disable_altq(altq); err = altq_pfdetach(altq); if (err != 0 && error == 0) @@ -890,11 +1020,19 @@ pf_commit_altq(u_int32_t ticket) error = err; } else pf_qid_unref(altq->qid); +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, altq); +#else pool_put(&pf_altq_pl, altq); +#endif } splx(s); +#ifdef __FreeBSD__ + V_altqs_inactive_open = 0; +#else altqs_inactive_open = 0; +#endif return (error); } @@ -969,22 +1107,32 @@ pf_disable_altq(struct pf_altq *altq) void pf_altq_ifnet_event(struct ifnet *ifp, int remove) { - struct ifnet *ifp1; - struct pf_altq *a1, *a2, *a3; - u_int32_t ticket; - int error = 0; + struct ifnet *ifp1; + struct pf_altq *a1, *a2, *a3; + u_int32_t ticket; + int error = 0; /* Interrupt userland queue modifications */ +#ifdef __FreeBSD__ + if (V_altqs_inactive_open) + pf_rollback_altq(V_ticket_altqs_inactive); +#else if (altqs_inactive_open) pf_rollback_altq(ticket_altqs_inactive); +#endif /* Start new altq ruleset */ if (pf_begin_altq(&ticket)) return; /* Copy the current active set */ +#ifdef __FreeBSD__ + TAILQ_FOREACH(a1, V_pf_altqs_active, entries) { + a2 = pool_get(&V_pf_altq_pl, PR_NOWAIT); +#else TAILQ_FOREACH(a1, pf_altqs_active, entries) { a2 = pool_get(&pf_altq_pl, PR_NOWAIT); +#endif if (a2 == NULL) { error = ENOMEM; break; @@ -994,11 +1142,19 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) if (a2->qname[0] != 0) { if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { error = EBUSY; +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, a2); +#else pool_put(&pf_altq_pl, a2); +#endif break; } a2->altq_disc = NULL; +#ifdef __FreeBSD__ + TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) { +#else TAILQ_FOREACH(a3, pf_altqs_inactive, entries) { +#endif if (strncmp(a3->ifname, a2->ifname, IFNAMSIZ) == 0 && a3->qname[0] == 0) { a2->altq_disc = a3->altq_disc; @@ -1016,23 +1172,35 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) error = altq_add(a2); PF_LOCK(); +#ifdef __FreeBSD__ + if (ticket != V_ticket_altqs_inactive) +#else if (ticket != ticket_altqs_inactive) +#endif error = EBUSY; if (error) { +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, a2); +#else pool_put(&pf_altq_pl, a2); +#endif break; } } +#ifdef __FreeBSD__ + TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries); +#else TAILQ_INSERT_TAIL(pf_altqs_inactive, a2, entries); +#endif } if (error != 0) pf_rollback_altq(ticket); else pf_commit_altq(ticket); -} + } #endif #endif /* ALTQ */ @@ -1252,10 +1420,33 @@ pf_setup_pfsync_matching(struct pf_ruleset *rs) } MD5Final(digest, &ctx); +#ifdef __FreeBSD__ + memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum)); +#else memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum)); +#endif + return (0); +} + +int +pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr, + sa_family_t af) +{ + if (pfi_dynaddr_setup(addr, af) || + pf_tbladdr_setup(ruleset, addr)) + return (EINVAL); + return (0); } +void +pf_addr_copyout(struct pf_addr_wrap *addr) +{ + pfi_dynaddr_copyout(addr); + pf_tbladdr_copyout(addr); + pf_rtlabel_copyout(addr); +} + int #ifdef __FreeBSD__ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) @@ -1270,6 +1461,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #endif int error = 0; + CURVNET_SET(TD_TO_VNET(td)); + /* XXX keep in sync with switch() below */ #ifdef __FreeBSD__ if (securelevel_gt(td->td_ucred, 2)) @@ -1373,7 +1566,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } return (EACCES); case DIOCGETRULE: - if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR) + if (((struct pfioc_rule *)addr)->action == + PF_GET_CLR_CNTR) return (EACCES); break; default: @@ -1382,9 +1576,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (flags & FWRITE) #ifdef __FreeBSD__ - sx_xlock(&pf_consistency_lock); + sx_xlock(&V_pf_consistency_lock); else - sx_slock(&pf_consistency_lock); + sx_slock(&V_pf_consistency_lock); #else rw_enter_write(&pf_consistency_lock); else @@ -1399,7 +1593,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) switch (cmd) { case DIOCSTART: +#ifdef __FreeBSD__ + if (V_pf_status.running) +#else if (pf_status.running) +#endif error = EEXIST; else { #ifdef __FreeBSD__ @@ -1411,33 +1609,48 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) ("pf: pfil registeration fail\n")); break; } -#endif + V_pf_status.running = 1; + V_pf_status.since = time_second; + + if (V_pf_status.stateid == 0) { + V_pf_status.stateid = time_second; + V_pf_status.stateid = V_pf_status.stateid << 32; + } +#else pf_status.running = 1; pf_status.since = time_second; + if (pf_status.stateid == 0) { pf_status.stateid = time_second; pf_status.stateid = pf_status.stateid << 32; } +#endif DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); } break; case DIOCSTOP: - if (!pf_status.running) +#ifdef __FreeBSD__ + if (!V_pf_status.running) error = ENOENT; else { - pf_status.running = 0; -#ifdef __FreeBSD__ + V_pf_status.running = 0; PF_UNLOCK(); error = dehook_pf(); PF_LOCK(); if (error) { - pf_status.running = 1; + V_pf_status.running = 1; DPFPRINTF(PF_DEBUG_MISC, - ("pf: pfil unregisteration failed\n")); + ("pf: pfil unregisteration failed\n")); } -#endif + V_pf_status.since = time_second; +#else + if (!pf_status.running) + error = ENOENT; + else { + pf_status.running = 0; pf_status.since = time_second; +#endif DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); } break; @@ -1473,16 +1686,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } - if (pr->pool_ticket != ticket_pabuf) { #ifdef __FreeBSD__ + if (pr->pool_ticket != V_ticket_pabuf) { DPFPRINTF(PF_DEBUG_MISC, ("pool_ticket: %d != %d\n", pr->pool_ticket, - ticket_pabuf)); + V_ticket_pabuf)); +#else + if (pr->pool_ticket != ticket_pabuf) { #endif error = EBUSY; break; } - rule = pool_get(&pf_rule_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + rule = pool_get(&V_pf_rule_pl, PR_NOWAIT); +#else + rule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL); +#endif if (rule == NULL) { error = ENOMEM; break; @@ -1499,19 +1718,27 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rule->kif = NULL; TAILQ_INIT(&rule->rpool.list); /* initialize refcounting */ - rule->states = 0; + rule->states_cur = 0; rule->src_nodes = 0; rule->entries.tqe_prev = NULL; #ifndef INET if (rule->af == AF_INET) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, rule); +#else pool_put(&pf_rule_pl, rule); +#endif error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (rule->af == AF_INET6) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, rule); +#else pool_put(&pf_rule_pl, rule); +#endif error = EAFNOSUPPORT; break; } @@ -1525,7 +1752,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (rule->ifname[0]) { rule->kif = pfi_kif_get(rule->ifname); if (rule->kif == NULL) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, rule); +#else pool_put(&pf_rule_pl, rule); +#endif error = EINVAL; break; } @@ -1562,40 +1793,42 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (rule->rt && !rule->direction) error = EINVAL; #if NPFLOG > 0 -#ifdef __FreeBSD__ if (!rule->log) rule->logif = 0; -#endif if (rule->logif >= PFLOGIFS_MAX) error = EINVAL; #endif if (pf_rtlabel_add(&rule->src.addr) || pf_rtlabel_add(&rule->dst.addr)) error = EBUSY; - if (pfi_dynaddr_setup(&rule->src.addr, rule->af)) + if (pf_addr_setup(ruleset, &rule->src.addr, rule->af)) error = EINVAL; - if (pfi_dynaddr_setup(&rule->dst.addr, rule->af)) - error = EINVAL; - if (pf_tbladdr_setup(ruleset, &rule->src.addr)) - error = EINVAL; - if (pf_tbladdr_setup(ruleset, &rule->dst.addr)) + if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af)) error = EINVAL; if (pf_anchor_setup(rule, ruleset, pr->anchor_call)) error = EINVAL; +#ifdef __FreeBSD__ + TAILQ_FOREACH(pa, &V_pf_pabuf, entries) +#else TAILQ_FOREACH(pa, &pf_pabuf, entries) +#endif if (pf_tbladdr_setup(ruleset, &pa->addr)) error = EINVAL; if (rule->overload_tblname[0]) { if ((rule->overload_tbl = pfr_attach_table(ruleset, - rule->overload_tblname)) == NULL) + rule->overload_tblname, 0)) == NULL) error = EINVAL; else rule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } +#ifdef __FreeBSD__ + pf_mv_pool(&V_pf_pabuf, &rule->rpool.list); +#else pf_mv_pool(&pf_pabuf, &rule->rpool.list); +#endif if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || (rule->action == PF_BINAT)) && rule->anchor == NULL) || (rule->rt > PF_FASTROUTE)) && @@ -1608,14 +1841,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #ifdef __FreeBSD__ - if (!debug_pfugidhack && (rule->uid.op || rule->gid.op || + if (!V_debug_pfugidhack && (rule->uid.op || rule->gid.op || rule->log & PF_LOG_SOCKET_LOOKUP)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: debug.pfugidhack enabled\n")); - debug_pfugidhack = 1; + V_debug_pfugidhack = 1; } #endif - rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); rule->evaluations = rule->packets[0] = rule->packets[1] = rule->bytes[0] = rule->bytes[1] = 0; @@ -1685,12 +1917,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } - pfi_dynaddr_copyout(&pr->rule.src.addr); - pfi_dynaddr_copyout(&pr->rule.dst.addr); - pf_tbladdr_copyout(&pr->rule.src.addr); - pf_tbladdr_copyout(&pr->rule.dst.addr); - pf_rtlabel_copyout(&pr->rule.src.addr); - pf_rtlabel_copyout(&pr->rule.dst.addr); + pf_addr_copyout(&pr->rule.src.addr); + pf_addr_copyout(&pr->rule.dst.addr); for (i = 0; i < PF_SKIP_COUNT; ++i) if (rule->skip[i].ptr == NULL) pr->rule.skip[i].nr = -1; @@ -1702,6 +1930,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rule->evaluations = 0; rule->packets[0] = rule->packets[1] = 0; rule->bytes[0] = rule->bytes[1] = 0; + rule->states_tot = 0; } break; } @@ -1715,7 +1944,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (!(pcr->action == PF_CHANGE_REMOVE || pcr->action == PF_CHANGE_GET_TICKET) && +#ifdef __FreeBSD__ + pcr->pool_ticket != V_ticket_pabuf) { +#else pcr->pool_ticket != ticket_pabuf) { +#endif error = EBUSY; break; } @@ -1752,7 +1985,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (pcr->action != PF_CHANGE_REMOVE) { - newrule = pool_get(&pf_rule_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + newrule = pool_get(&V_pf_rule_pl, PR_NOWAIT); +#else + newrule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL); +#endif if (newrule == NULL) { error = ENOMEM; break; @@ -1767,18 +2004,26 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #endif TAILQ_INIT(&newrule->rpool.list); /* initialize refcounting */ - newrule->states = 0; + newrule->states_cur = 0; newrule->entries.tqe_prev = NULL; #ifndef INET if (newrule->af == AF_INET) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, newrule); +#else pool_put(&pf_rule_pl, newrule); +#endif error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (newrule->af == AF_INET6) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, newrule); +#else pool_put(&pf_rule_pl, newrule); +#endif error = EAFNOSUPPORT; break; } @@ -1786,7 +2031,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (newrule->ifname[0]) { newrule->kif = pfi_kif_get(newrule->ifname); if (newrule->kif == NULL) { +#ifdef __FreeBSD__ + pool_put(&V_pf_rule_pl, newrule); +#else pool_put(&pf_rule_pl, newrule); +#endif error = EINVAL; break; } @@ -1826,34 +2075,32 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; if (newrule->rt && !newrule->direction) error = EINVAL; -#ifdef __FreeBSD__ #if NPFLOG > 0 if (!newrule->log) newrule->logif = 0; if (newrule->logif >= PFLOGIFS_MAX) error = EINVAL; #endif -#endif if (pf_rtlabel_add(&newrule->src.addr) || pf_rtlabel_add(&newrule->dst.addr)) error = EBUSY; - if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af)) - error = EINVAL; - if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af)) - error = EINVAL; - if (pf_tbladdr_setup(ruleset, &newrule->src.addr)) + if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af)) error = EINVAL; - if (pf_tbladdr_setup(ruleset, &newrule->dst.addr)) + if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af)) error = EINVAL; if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) error = EINVAL; +#ifdef __FreeBSD__ + TAILQ_FOREACH(pa, &V_pf_pabuf, entries) +#else TAILQ_FOREACH(pa, &pf_pabuf, entries) +#endif if (pf_tbladdr_setup(ruleset, &pa->addr)) error = EINVAL; if (newrule->overload_tblname[0]) { if ((newrule->overload_tbl = pfr_attach_table( - ruleset, newrule->overload_tblname)) == + ruleset, newrule->overload_tblname, 0)) == NULL) error = EINVAL; else @@ -1861,7 +2108,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) PFR_TFLAG_ACTIVE; } +#ifdef __FreeBSD__ + pf_mv_pool(&V_pf_pabuf, &newrule->rpool.list); +#else pf_mv_pool(&pf_pabuf, &newrule->rpool.list); +#endif if (((((newrule->action == PF_NAT) || (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || @@ -1876,12 +2127,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #ifdef __FreeBSD__ - if (!debug_pfugidhack && (newrule->uid.op || + if (!V_debug_pfugidhack && (newrule->uid.op || newrule->gid.op || newrule->log & PF_LOG_SOCKET_LOOKUP)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: debug.pfugidhack enabled\n")); - debug_pfugidhack = 1; + V_debug_pfugidhack = 1; } #endif @@ -1890,7 +2141,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) newrule->packets[0] = newrule->packets[1] = 0; newrule->bytes[0] = newrule->bytes[1] = 0; } +#ifdef __FreeBSD__ + pf_empty_pool(&V_pf_pabuf); +#else pf_empty_pool(&pf_pabuf); +#endif if (pcr->action == PF_CHANGE_ADD_HEAD) oldrule = TAILQ_FIRST( @@ -1943,166 +2198,164 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCCLRSTATES: { - struct pf_state *state, *nexts; + struct pf_state *s, *nexts; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; - int killed = 0; + u_int killed = 0; - for (state = RB_MIN(pf_state_tree_id, &tree_id); state; - state = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); +#ifdef __FreeBSD__ + for (s = RB_MIN(pf_state_tree_id, &V_tree_id); s; s = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, s); +#else + for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); +#endif if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, - state->u.s.kif->pfik_name)) { -#if NPFSYNC + s->kif->pfik_name)) { +#if NPFSYNC > 0 /* don't send out individual delete messages */ - state->sync_flags = PFSTATE_NOSYNC; + SET(s->state_flags, PFSTATE_NOSYNC); #endif - pf_unlink_state(state); + pf_unlink_state(s); killed++; } } - psk->psk_af = killed; -#if NPFSYNC + psk->psk_killed = killed; +#if NPFSYNC > 0 +#ifdef __FreeBSD__ + if (pfsync_clear_states_ptr != NULL) + pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname); +#else pfsync_clear_states(pf_status.hostid, psk->psk_ifname); #endif +#endif break; } case DIOCKILLSTATES: { - struct pf_state *state, *nexts; - struct pf_state_host *src, *dst; + struct pf_state *s, *nexts; + struct pf_state_key *sk; + struct pf_addr *srcaddr, *dstaddr; + u_int16_t srcport, dstport; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; - int killed = 0; - - for (state = RB_MIN(pf_state_tree_id, &tree_id); state; - state = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); + u_int killed = 0; - if (state->direction == PF_OUT) { - src = &state->lan; - dst = &state->ext; + if (psk->psk_pfcmp.id) { + if (psk->psk_pfcmp.creatorid == 0) +#ifdef __FreeBSD__ + psk->psk_pfcmp.creatorid = V_pf_status.hostid; +#else + psk->psk_pfcmp.creatorid = pf_status.hostid; +#endif + if ((s = pf_find_state_byid(&psk->psk_pfcmp))) { + pf_unlink_state(s); + psk->psk_killed = 1; + } + break; + } + +#ifdef __FreeBSD__ + for (s = RB_MIN(pf_state_tree_id, &V_tree_id); s; + s = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, s); +#else + for (s = RB_MIN(pf_state_tree_id, &tree_id); s; + s = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); +#endif + sk = s->key[PF_SK_WIRE]; + + if (s->direction == PF_OUT) { + srcaddr = &sk->addr[1]; + dstaddr = &sk->addr[0]; + srcport = sk->port[0]; + dstport = sk->port[0]; } else { - src = &state->ext; - dst = &state->lan; + srcaddr = &sk->addr[0]; + dstaddr = &sk->addr[1]; + srcport = sk->port[0]; + dstport = sk->port[0]; } - if ((!psk->psk_af || state->af == psk->psk_af) + if ((!psk->psk_af || sk->af == psk->psk_af) && (!psk->psk_proto || psk->psk_proto == - state->proto) && + sk->proto) && PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, - &src->addr, state->af) && + srcaddr, sk->af) && PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, - &dst->addr, state->af) && + dstaddr, sk->af) && (psk->psk_src.port_op == 0 || pf_match_port(psk->psk_src.port_op, psk->psk_src.port[0], psk->psk_src.port[1], - src->port)) && + srcport)) && (psk->psk_dst.port_op == 0 || pf_match_port(psk->psk_dst.port_op, psk->psk_dst.port[0], psk->psk_dst.port[1], - dst->port)) && + dstport)) && + (!psk->psk_label[0] || (s->rule.ptr->label[0] && + !strcmp(psk->psk_label, s->rule.ptr->label))) && (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, - state->u.s.kif->pfik_name))) { -#if NPFSYNC > 0 - /* send immediate delete of state */ - pfsync_delete_state(state); - state->sync_flags |= PFSTATE_NOSYNC; -#endif - pf_unlink_state(state); + s->kif->pfik_name))) { + pf_unlink_state(s); killed++; } } - psk->psk_af = killed; + psk->psk_killed = killed; break; } case DIOCADDSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; - struct pf_state *state; - struct pfi_kif *kif; + struct pfsync_state *sp = &ps->state; - if (ps->state.timeout >= PFTM_MAX && - ps->state.timeout != PFTM_UNTIL_PACKET) { + if (sp->timeout >= PFTM_MAX && + sp->timeout != PFTM_UNTIL_PACKET) { error = EINVAL; break; } - state = pool_get(&pf_state_pl, PR_NOWAIT); - if (state == NULL) { - error = ENOMEM; - break; - } - kif = pfi_kif_get(ps->state.u.ifname); - if (kif == NULL) { - pool_put(&pf_state_pl, state); - error = ENOENT; - break; - } - bcopy(&ps->state, state, sizeof(struct pf_state)); - bzero(&state->u, sizeof(state->u)); - state->rule.ptr = &pf_default_rule; - state->nat_rule.ptr = NULL; - state->anchor.ptr = NULL; - state->rt_kif = NULL; - state->creation = time_second; - state->pfsync_time = 0; - state->packets[0] = state->packets[1] = 0; - state->bytes[0] = state->bytes[1] = 0; - - if (pf_insert_state(kif, state)) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - pool_put(&pf_state_pl, state); - error = ENOMEM; - } +#ifdef __FreeBSD__ + if (pfsync_state_import_ptr != NULL) + error = pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL); +#else + error = pfsync_state_import(sp, PFSYNC_SI_IOCTL); +#endif break; } case DIOCGETSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; - struct pf_state *state; - u_int32_t nr; - int secs; + struct pf_state *s; + struct pf_state_cmp id_key; - nr = 0; - RB_FOREACH(state, pf_state_tree_id, &tree_id) { - if (nr >= ps->nr) - break; - nr++; - } - if (state == NULL) { - error = EBUSY; + bcopy(ps->state.id, &id_key.id, sizeof(id_key.id)); + id_key.creatorid = ps->state.creatorid; + + s = pf_find_state_byid(&id_key); + if (s == NULL) { + error = ENOENT; break; } - secs = time_second; - bcopy(state, &ps->state, sizeof(ps->state)); - strlcpy(ps->state.u.ifname, state->u.s.kif->pfik_name, - sizeof(ps->state.u.ifname)); - ps->state.rule.nr = state->rule.ptr->nr; - ps->state.nat_rule.nr = (state->nat_rule.ptr == NULL) ? - -1 : state->nat_rule.ptr->nr; - ps->state.anchor.nr = (state->anchor.ptr == NULL) ? - -1 : state->anchor.ptr->nr; - ps->state.creation = secs - ps->state.creation; - ps->state.expire = pf_state_expires(state); - if (ps->state.expire > secs) - ps->state.expire -= secs; - else - ps->state.expire = 0; + + pfsync_state_export(&ps->state, s); break; } case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; struct pf_state *state; - struct pf_state *p, *pstore; + struct pfsync_state *p, *pstore; u_int32_t nr = 0; - int space = ps->ps_len; - if (space == 0) { + if (ps->ps_len == 0) { +#ifdef __FreeBSD__ + nr = V_pf_status.states; +#else nr = pf_status.states; - ps->ps_len = sizeof(struct pf_state) * nr; +#endif + ps->ps_len = sizeof(struct pfsync_state) * nr; break; } @@ -2116,29 +2369,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) p = ps->ps_states; +#ifdef __FreeBSD__ + state = TAILQ_FIRST(&V_state_list); +#else state = TAILQ_FIRST(&state_list); +#endif while (state) { if (state->timeout != PFTM_UNLINKED) { - int secs = time_second; - if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) break; - - bcopy(state, pstore, sizeof(*pstore)); - strlcpy(pstore->u.ifname, - state->u.s.kif->pfik_name, - sizeof(pstore->u.ifname)); - pstore->rule.nr = state->rule.ptr->nr; - pstore->nat_rule.nr = (state->nat_rule.ptr == - NULL) ? -1 : state->nat_rule.ptr->nr; - pstore->anchor.nr = (state->anchor.ptr == - NULL) ? -1 : state->anchor.ptr->nr; - pstore->creation = secs - pstore->creation; - pstore->expire = pf_state_expires(state); - if (pstore->expire > secs) - pstore->expire -= secs; - else - pstore->expire = 0; + pfsync_state_export(pstore, state); #ifdef __FreeBSD__ PF_COPYOUT(pstore, p, sizeof(*p), error); #else @@ -2151,10 +2391,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) p++; nr++; } - state = TAILQ_NEXT(state, u.s.entry_list); + state = TAILQ_NEXT(state, entry_list); } - ps->ps_len = sizeof(struct pf_state) * nr; + ps->ps_len = sizeof(struct pfsync_state) * nr; free(pstore, M_TEMP); break; @@ -2162,8 +2402,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETSTATUS: { struct pf_status *s = (struct pf_status *)addr; +#ifdef __FreeBSD__ + bcopy(&V_pf_status, s, sizeof(struct pf_status)); +#else bcopy(&pf_status, s, sizeof(struct pf_status)); - pfi_fill_oldstatus(s); +#endif + pfi_update_status(s->ifname, s); break; } @@ -2171,35 +2415,51 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_if *pi = (struct pfioc_if *)addr; if (pi->ifname[0] == 0) { +#ifdef __FreeBSD__ + bzero(V_pf_status.ifname, IFNAMSIZ); +#else bzero(pf_status.ifname, IFNAMSIZ); +#endif break; } - if (ifunit(pi->ifname) == NULL) { - error = EINVAL; - break; - } +#ifdef __FreeBSD__ + strlcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ); +#else strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ); +#endif break; } case DIOCCLRSTATUS: { +#ifdef __FreeBSD__ + bzero(V_pf_status.counters, sizeof(V_pf_status.counters)); + bzero(V_pf_status.fcounters, sizeof(V_pf_status.fcounters)); + bzero(V_pf_status.scounters, sizeof(V_pf_status.scounters)); + V_pf_status.since = time_second; + if (*V_pf_status.ifname) + pfi_update_status(V_pf_status.ifname, NULL); +#else bzero(pf_status.counters, sizeof(pf_status.counters)); bzero(pf_status.fcounters, sizeof(pf_status.fcounters)); bzero(pf_status.scounters, sizeof(pf_status.scounters)); pf_status.since = time_second; if (*pf_status.ifname) - pfi_clr_istats(pf_status.ifname); + pfi_update_status(pf_status.ifname, NULL); +#endif break; } case DIOCNATLOOK: { struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; + struct pf_state_key *sk; struct pf_state *state; - struct pf_state_cmp key; + struct pf_state_key_cmp key; int m = 0, direction = pnl->direction; + int sidx, didx; - key.af = pnl->af; - key.proto = pnl->proto; + /* NATLOOK src and dst are reversed, so reverse sidx/didx */ + sidx = (direction == PF_IN) ? 1 : 0; + didx = (direction == PF_IN) ? 0 : 1; if (!pnl->proto || PF_AZERO(&pnl->saddr, pnl->af) || @@ -2209,43 +2469,23 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) (!pnl->dport || !pnl->sport))) error = EINVAL; else { - /* - * userland gives us source and dest of connection, - * reverse the lookup so we ask for what happens with - * the return traffic, enabling us to find it in the - * state tree. - */ - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af); - key.ext.port = pnl->dport; - PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af); - key.gwy.port = pnl->sport; - state = pf_find_state_all(&key, PF_EXT_GWY, &m); - } else { - PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af); - key.lan.port = pnl->dport; - PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af); - key.ext.port = pnl->sport; - state = pf_find_state_all(&key, PF_LAN_EXT, &m); - } + key.af = pnl->af; + key.proto = pnl->proto; + PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af); + key.port[sidx] = pnl->sport; + PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af); + key.port[didx] = pnl->dport; + + state = pf_find_state_all(&key, direction, &m); + if (m > 1) error = E2BIG; /* more than one state */ else if (state != NULL) { - if (direction == PF_IN) { - PF_ACPY(&pnl->rsaddr, &state->lan.addr, - state->af); - pnl->rsport = state->lan.port; - PF_ACPY(&pnl->rdaddr, &pnl->daddr, - pnl->af); - pnl->rdport = pnl->dport; - } else { - PF_ACPY(&pnl->rdaddr, &state->gwy.addr, - state->af); - pnl->rdport = state->gwy.port; - PF_ACPY(&pnl->rsaddr, &pnl->saddr, - pnl->af); - pnl->rsport = pnl->sport; - } + sk = state->key[sidx]; + PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af); + pnl->rsport = sk->port[sidx]; + PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af); + pnl->rdport = sk->port[didx]; } else error = ENOENT; } @@ -2261,10 +2501,18 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; goto fail; } +#ifdef __FreeBSD__ + old = V_pf_default_rule.timeout[pt->timeout]; +#else old = pf_default_rule.timeout[pt->timeout]; +#endif if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) pt->seconds = 1; +#ifdef __FreeBSD__ + V_pf_default_rule.timeout[pt->timeout] = pt->seconds; +#else pf_default_rule.timeout[pt->timeout] = pt->seconds; +#endif if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) wakeup(pf_purge_thread); pt->seconds = old; @@ -2278,7 +2526,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; goto fail; } +#ifdef __FreeBSD__ + pt->seconds = V_pf_default_rule.timeout[pt->timeout]; +#else pt->seconds = pf_default_rule.timeout[pt->timeout]; +#endif break; } @@ -2289,7 +2541,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; goto fail; } +#ifdef __FreeBSD__ + pl->limit = V_pf_pool_limits[pl->index].limit; +#else pl->limit = pf_pool_limits[pl->index].limit; +#endif break; } @@ -2298,29 +2554,40 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) int old_limit; if (pl->index < 0 || pl->index >= PF_LIMIT_MAX || +#ifdef __FreeBSD__ + V_pf_pool_limits[pl->index].pp == NULL) { +#else pf_pool_limits[pl->index].pp == NULL) { +#endif error = EINVAL; goto fail; } #ifdef __FreeBSD__ - uma_zone_set_max(pf_pool_limits[pl->index].pp, pl->limit); + uma_zone_set_max(V_pf_pool_limits[pl->index].pp, pl->limit); + old_limit = V_pf_pool_limits[pl->index].limit; + V_pf_pool_limits[pl->index].limit = pl->limit; + pl->limit = old_limit; #else if (pool_sethardlimit(pf_pool_limits[pl->index].pp, pl->limit, NULL, 0) != 0) { error = EBUSY; goto fail; } -#endif old_limit = pf_pool_limits[pl->index].limit; pf_pool_limits[pl->index].limit = pl->limit; pl->limit = old_limit; +#endif break; } case DIOCSETDEBUG: { u_int32_t *level = (u_int32_t *)addr; +#ifdef __FreeBSD__ + V_pf_status.debug = *level; +#else pf_status.debug = *level; +#endif break; } @@ -2363,11 +2630,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_altq *altq; /* enable all altq interfaces on active list */ - TAILQ_FOREACH(altq, pf_altqs_active, entries) { #ifdef __FreeBSD__ + TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { #else + TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { #endif error = pf_enable_altq(altq); @@ -2376,7 +2644,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } if (error == 0) +#ifdef __FreeBSD__ + V_pf_altq_running = 1; +#else pf_altq_running = 1; +#endif DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); break; } @@ -2385,11 +2657,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_altq *altq; /* disable all altq interfaces on active list */ - TAILQ_FOREACH(altq, pf_altqs_active, entries) { #ifdef __FreeBSD__ + TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { #else + TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { #endif error = pf_disable_altq(altq); @@ -2398,7 +2671,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } if (error == 0) +#ifdef __FreeBSD__ + V_pf_altq_running = 0; +#else pf_altq_running = 0; +#endif DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); break; } @@ -2407,11 +2684,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq, *a; +#ifdef __FreeBSD__ + if (pa->ticket != V_ticket_altqs_inactive) { +#else if (pa->ticket != ticket_altqs_inactive) { +#endif error = EBUSY; break; } - altq = pool_get(&pf_altq_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + altq = pool_get(&V_pf_altq_pl, PR_NOWAIT); +#else + altq = pool_get(&pf_altq_pl, PR_WAITOK|PR_LIMITFAIL); +#endif if (altq == NULL) { error = ENOMEM; break; @@ -2428,11 +2713,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (altq->qname[0] != 0) { if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { error = EBUSY; +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, altq); +#else pool_put(&pf_altq_pl, altq); +#endif break; } altq->altq_disc = NULL; +#ifdef __FreeBSD__ + TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) { +#else TAILQ_FOREACH(a, pf_altqs_inactive, entries) { +#endif if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 && a->qname[0] == 0) { altq->altq_disc = a->altq_disc; @@ -2448,18 +2741,26 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; } else { PF_UNLOCK(); -#endif +#endif error = altq_add(altq); #ifdef __FreeBSD__ PF_LOCK(); } #endif if (error) { +#ifdef __FreeBSD__ + pool_put(&V_pf_altq_pl, altq); +#else pool_put(&pf_altq_pl, altq); +#endif break; } +#ifdef __FreeBSD__ + TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); +#else TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries); +#endif bcopy(altq, &pa->altq, sizeof(struct pf_altq)); break; } @@ -2469,9 +2770,15 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_altq *altq; pa->nr = 0; +#ifdef __FreeBSD__ + TAILQ_FOREACH(altq, V_pf_altqs_active, entries) + pa->nr++; + pa->ticket = V_ticket_altqs_active; +#else TAILQ_FOREACH(altq, pf_altqs_active, entries) pa->nr++; pa->ticket = ticket_altqs_active; +#endif break; } @@ -2480,12 +2787,20 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_altq *altq; u_int32_t nr; +#ifdef __FreeBSD__ + if (pa->ticket != V_ticket_altqs_active) { +#else if (pa->ticket != ticket_altqs_active) { +#endif error = EBUSY; break; } nr = 0; +#ifdef __FreeBSD__ + altq = TAILQ_FIRST(V_pf_altqs_active); +#else altq = TAILQ_FIRST(pf_altqs_active); +#endif while ((altq != NULL) && (nr < pa->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; @@ -2509,13 +2824,21 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) u_int32_t nr; int nbytes; +#ifdef __FreeBSD__ + if (pq->ticket != V_ticket_altqs_active) { +#else if (pq->ticket != ticket_altqs_active) { +#endif error = EBUSY; break; } nbytes = pq->nbytes; nr = 0; +#ifdef __FreeBSD__ + altq = TAILQ_FIRST(V_pf_altqs_active); +#else altq = TAILQ_FIRST(pf_altqs_active); +#endif while ((altq != NULL) && (nr < pq->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; @@ -2524,6 +2847,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } + #ifdef __FreeBSD__ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) { error = ENXIO; @@ -2546,15 +2870,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCBEGINADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; +#ifdef __FreeBSD__ + pf_empty_pool(&V_pf_pabuf); + pp->ticket = ++V_ticket_pabuf; +#else pf_empty_pool(&pf_pabuf); pp->ticket = ++ticket_pabuf; +#endif break; } case DIOCADDADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; +#ifdef __FreeBSD__ + if (pp->ticket != V_ticket_pabuf) { +#else if (pp->ticket != ticket_pabuf) { +#endif error = EBUSY; break; } @@ -2576,7 +2909,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } - pa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + pa = pool_get(&V_pf_pooladdr_pl, PR_NOWAIT); +#else + pa = pool_get(&pf_pooladdr_pl, PR_WAITOK|PR_LIMITFAIL); +#endif if (pa == NULL) { error = ENOMEM; break; @@ -2585,7 +2922,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pa->ifname[0]) { pa->kif = pfi_kif_get(pa->ifname); if (pa->kif == NULL) { +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, pa); +#else pool_put(&pf_pooladdr_pl, pa); +#endif error = EINVAL; break; } @@ -2594,11 +2935,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pfi_dynaddr_setup(&pa->addr, pp->af)) { pfi_dynaddr_remove(&pa->addr); pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE); +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, pa); +#else pool_put(&pf_pooladdr_pl, pa); +#endif error = EINVAL; break; } +#ifdef __FreeBSD__ + TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries); +#else TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries); +#endif break; } @@ -2637,9 +2986,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); - pfi_dynaddr_copyout(&pp->addr.addr); - pf_tbladdr_copyout(&pp->addr.addr); - pf_rtlabel_copyout(&pp->addr.addr); + pf_addr_copyout(&pp->addr.addr); break; } @@ -2672,7 +3019,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } if (pca->action != PF_CHANGE_REMOVE) { - newpa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + newpa = pool_get(&V_pf_pooladdr_pl, + PR_NOWAIT); +#else + newpa = pool_get(&pf_pooladdr_pl, + PR_WAITOK|PR_LIMITFAIL); +#endif if (newpa == NULL) { error = ENOMEM; break; @@ -2680,14 +3033,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); #ifndef INET if (pca->af == AF_INET) { +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, newpa); +#else pool_put(&pf_pooladdr_pl, newpa); +#endif error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pca->af == AF_INET6) { +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, newpa); +#else pool_put(&pf_pooladdr_pl, newpa); +#endif error = EAFNOSUPPORT; break; } @@ -2695,7 +3056,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (newpa->ifname[0]) { newpa->kif = pfi_kif_get(newpa->ifname); if (newpa->kif == NULL) { +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, newpa); +#else pool_put(&pf_pooladdr_pl, newpa); +#endif error = EINVAL; break; } @@ -2706,7 +3071,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pf_tbladdr_setup(ruleset, &newpa->addr)) { pfi_dynaddr_remove(&newpa->addr); pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE); +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, newpa); +#else pool_put(&pf_pooladdr_pl, newpa); +#endif error = EINVAL; break; } @@ -2735,7 +3104,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pfi_dynaddr_remove(&oldpa->addr); pf_tbladdr_remove(&oldpa->addr); pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE); +#ifdef __FreeBSD__ + pool_put(&V_pf_pooladdr_pl, oldpa); +#else pool_put(&pf_pooladdr_pl, oldpa); +#endif } else { if (oldpa == NULL) TAILQ_INSERT_TAIL(&pool->list, newpa, entries); @@ -2766,7 +3139,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pr->nr = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ +#ifdef __FreeBSD__ + RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors) +#else RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) +#endif if (anchor->parent == NULL) pr->nr++; } else { @@ -2791,7 +3168,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pr->name[0] = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ +#ifdef __FreeBSD__ + RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors) +#else RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) +#endif if (anchor->parent == NULL && nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); @@ -3036,17 +3417,15 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #ifdef __FreeBSD__ PF_UNLOCK(); #endif - ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), - M_TEMP, M_WAITOK); - table = (struct pfr_table *)malloc(sizeof(*table), - M_TEMP, M_WAITOK); + ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); + table = malloc(sizeof(*table), M_TEMP, M_WAITOK); #ifdef __FreeBSD__ PF_LOCK(); #endif for (i = 0; i < io->size; i++) { #ifdef __FreeBSD__ - PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); - if (error) { + PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); + if (error) { #else if (copyin(io->array+i, ioe, sizeof(*ioe))) { #endif @@ -3122,10 +3501,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #ifdef __FreeBSD__ PF_UNLOCK(); #endif - ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), - M_TEMP, M_WAITOK); - table = (struct pfr_table *)malloc(sizeof(*table), - M_TEMP, M_WAITOK); + ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); + table = malloc(sizeof(*table), M_TEMP, M_WAITOK); #ifdef __FreeBSD__ PF_LOCK(); #endif @@ -3197,10 +3574,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #ifdef __FreeBSD__ PF_UNLOCK(); #endif - ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), - M_TEMP, M_WAITOK); - table = (struct pfr_table *)malloc(sizeof(*table), - M_TEMP, M_WAITOK); + ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); + table = malloc(sizeof(*table), M_TEMP, M_WAITOK); #ifdef __FreeBSD__ PF_LOCK(); #endif @@ -3226,8 +3601,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; goto fail; } +#ifdef __FreeBSD__ + if (!V_altqs_inactive_open || ioe->ticket != + V_ticket_altqs_inactive) { +#else if (!altqs_inactive_open || ioe->ticket != ticket_altqs_inactive) { +#endif free(table, M_TEMP); free(ioe, M_TEMP); error = EBUSY; @@ -3238,7 +3618,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case PF_RULESET_TABLE: rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || !rs->topen || ioe->ticket != - rs->tticket) { + rs->tticket) { free(table, M_TEMP); free(ioe, M_TEMP); error = EBUSY; @@ -3322,7 +3702,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) int space = psn->psn_len; if (space == 0) { +#ifdef __FreeBSD__ + RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) +#else RB_FOREACH(n, pf_src_tree, &tree_src_tracking) +#endif nr++; psn->psn_len = sizeof(struct pf_src_node) * nr; break; @@ -3335,9 +3719,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) #ifdef __FreeBSD__ PF_LOCK(); #endif - p = psn->psn_src_nodes; +#ifdef __FreeBSD__ + RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { +#else RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { +#endif int secs = time_second, diff; if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) @@ -3383,39 +3770,59 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_src_node *n; struct pf_state *state; +#ifdef __FreeBSD__ + RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { +#else RB_FOREACH(state, pf_state_tree_id, &tree_id) { +#endif state->src_node = NULL; state->nat_src_node = NULL; } +#ifdef __FreeBSD__ + RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { +#else RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { +#endif n->expire = 1; n->states = 0; } pf_purge_expired_src_nodes(1); +#ifdef __FreeBSD__ + V_pf_status.src_nodes = 0; +#else pf_status.src_nodes = 0; +#endif break; } case DIOCKILLSRCNODES: { struct pf_src_node *sn; struct pf_state *s; - struct pfioc_src_node_kill *psnk = \ - (struct pfioc_src_node_kill *) addr; - int killed = 0; + struct pfioc_src_node_kill *psnk = + (struct pfioc_src_node_kill *)addr; + u_int killed = 0; +#ifdef __FreeBSD__ + RB_FOREACH(sn, pf_src_tree, &V_tree_src_tracking) { +#else RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) { - if (PF_MATCHA(psnk->psnk_src.neg, \ - &psnk->psnk_src.addr.v.a.addr, \ - &psnk->psnk_src.addr.v.a.mask, \ - &sn->addr, sn->af) && - PF_MATCHA(psnk->psnk_dst.neg, \ - &psnk->psnk_dst.addr.v.a.addr, \ - &psnk->psnk_dst.addr.v.a.mask, \ - &sn->raddr, sn->af)) { +#endif + if (PF_MATCHA(psnk->psnk_src.neg, + &psnk->psnk_src.addr.v.a.addr, + &psnk->psnk_src.addr.v.a.mask, + &sn->addr, sn->af) && + PF_MATCHA(psnk->psnk_dst.neg, + &psnk->psnk_dst.addr.v.a.addr, + &psnk->psnk_dst.addr.v.a.mask, + &sn->raddr, sn->af)) { /* Handle state to src_node linkage */ if (sn->states != 0) { - RB_FOREACH(s, pf_state_tree_id, + RB_FOREACH(s, pf_state_tree_id, +#ifdef __FreeBSD__ + &V_tree_id) { +#else &tree_id) { +#endif if (s->src_node == sn) s->src_node = NULL; if (s->nat_src_node == sn) @@ -3431,17 +3838,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (killed > 0) pf_purge_expired_src_nodes(1); - psnk->psnk_af = killed; + psnk->psnk_killed = killed; break; } case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; +#ifdef __FreeBSD__ + if (*hostid == 0) + V_pf_status.hostid = arc4random(); + else + V_pf_status.hostid = *hostid; +#else if (*hostid == 0) pf_status.hostid = arc4random(); else pf_status.hostid = *hostid; +#endif break; } @@ -3484,43 +3898,110 @@ fail: PF_UNLOCK(); if (flags & FWRITE) - sx_xunlock(&pf_consistency_lock); + sx_xunlock(&V_pf_consistency_lock); else - sx_sunlock(&pf_consistency_lock); + sx_sunlock(&V_pf_consistency_lock); #else splx(s); - /* XXX: Lock order? */ if (flags & FWRITE) rw_exit_write(&pf_consistency_lock); else rw_exit_read(&pf_consistency_lock); #endif + + CURVNET_RESTORE(); + return (error); } #ifdef __FreeBSD__ +void +pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) +{ + bzero(sp, sizeof(struct pfsync_state)); + + /* copy from state key */ + sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; + sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; + sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; + sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; + sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; + sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; + sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; + sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; + sp->proto = st->key[PF_SK_WIRE]->proto; + sp->af = st->key[PF_SK_WIRE]->af; + + /* copy from state */ + strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); + bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); + sp->creation = htonl(time_second - st->creation); + sp->expire = pf_state_expires(st); + if (sp->expire <= time_second) + sp->expire = htonl(0); + else + sp->expire = htonl(sp->expire - time_second); + + sp->direction = st->direction; + sp->log = st->log; + sp->timeout = st->timeout; + sp->state_flags = st->state_flags; + if (st->src_node) + sp->sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->nat_src_node) + sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; + + bcopy(&st->id, &sp->id, sizeof(sp->id)); + sp->creatorid = st->creatorid; + pf_state_peer_hton(&st->src, &sp->src); + pf_state_peer_hton(&st->dst, &sp->dst); + + if (st->rule.ptr == NULL) + sp->rule = htonl(-1); + else + sp->rule = htonl(st->rule.ptr->nr); + if (st->anchor.ptr == NULL) + sp->anchor = htonl(-1); + else + sp->anchor = htonl(st->anchor.ptr->nr); + if (st->nat_rule.ptr == NULL) + sp->nat_rule = htonl(-1); + else + sp->nat_rule = htonl(st->nat_rule.ptr->nr); + + pf_state_counter_hton(st->packets[0], sp->packets[0]); + pf_state_counter_hton(st->packets[1], sp->packets[1]); + pf_state_counter_hton(st->bytes[0], sp->bytes[0]); + pf_state_counter_hton(st->bytes[1], sp->bytes[1]); + +} + /* * XXX - Check for version missmatch!!! */ static void pf_clear_states(void) { - struct pf_state *state; - + struct pf_state *state; + +#ifdef __FreeBSD__ + RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { +#else RB_FOREACH(state, pf_state_tree_id, &tree_id) { +#endif state->timeout = PFTM_PURGE; #if NPFSYNC /* don't send out individual delete messages */ - state->sync_flags = PFSTATE_NOSYNC; + state->sync_state = PFSTATE_NOSYNC; #endif pf_unlink_state(state); } - + #if 0 /* NPFSYNC */ /* * XXX This is called on module unload, we do not want to sync that over? */ */ - pfsync_clear_states(pf_status.hostid, psk->psk_ifname); + pfsync_clear_states(V_pf_status.hostid, psk->psk_ifname); #endif } @@ -3544,11 +4025,19 @@ pf_clear_srcnodes(void) struct pf_src_node *n; struct pf_state *state; +#ifdef __FreeBSD__ + RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { +#else RB_FOREACH(state, pf_state_tree_id, &tree_id) { +#endif state->src_node = NULL; state->nat_src_node = NULL; } +#ifdef __FreeBSD__ + RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { +#else RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { +#endif n->expire = 1; n->states = 0; } @@ -3566,8 +4055,8 @@ shutdown_pf(void) int error = 0; u_int32_t t[5]; char nn = '\0'; - - pf_status.running = 0; + + V_pf_status.running = 0; do { if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn)) != 0) { @@ -3577,22 +4066,22 @@ shutdown_pf(void) if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } /* XXX: these should always succeed here */ @@ -3605,13 +4094,13 @@ shutdown_pf(void) if ((error = pf_clear_tables()) != 0) break; -#ifdef ALTQ + #ifdef ALTQ if ((error = pf_begin_altq(&t[0])) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n")); break; } pf_commit_altq(t[0]); -#endif + #endif pf_clear_states(); @@ -3621,7 +4110,7 @@ shutdown_pf(void) /* fingerprints and interfaces have thier own cleanup code */ } while(0); - return (error); + return (error); } #ifdef INET @@ -3643,10 +4132,12 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, if ((*m)->m_pkthdr.len >= (int)sizeof(struct ip)) { /* if m_pkthdr.len is less than ip header, pf will handle. */ h = mtod(*m, struct ip *); - HTONS(h->ip_len); - HTONS(h->ip_off); + HTONS(h->ip_len); + HTONS(h->ip_off); } + CURVNET_SET(ifp->if_vnet); chk = pf_test(PF_IN, ifp, m, NULL, inp); + CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; @@ -3683,10 +4174,12 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, if ((*m)->m_pkthdr.len >= (int)sizeof(*h)) { /* if m_pkthdr.len is less than ip header, pf will handle. */ h = mtod(*m, struct ip *); - HTONS(h->ip_len); - HTONS(h->ip_off); + HTONS(h->ip_len); + HTONS(h->ip_off); } + CURVNET_SET(ifp->if_vnet); chk = pf_test(PF_OUT, ifp, m, NULL, inp); + CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; @@ -3717,8 +4210,10 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, * order to support scoped addresses. In order to support stateful * filtering we have change this to lo0 as it is the case in IPv4. */ + CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, NULL, inp); + CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; @@ -3743,7 +4238,9 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, #endif (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } + CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_OUT, ifp, m, NULL, inp); + CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; @@ -3755,20 +4252,22 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, static int hook_pf(void) { +#ifdef INET struct pfil_head *pfh_inet; +#endif #ifdef INET6 struct pfil_head *pfh_inet6; #endif - + PF_ASSERT(MA_NOTOWNED); - if (pf_pfil_hooked) + if (V_pf_pfil_hooked) return (0); - + +#ifdef INET pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return (ESRCH); /* XXX */ -#ifdef INET pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); #endif @@ -3787,27 +4286,29 @@ hook_pf(void) pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); #endif - pf_pfil_hooked = 1; + V_pf_pfil_hooked = 1; return (0); } static int dehook_pf(void) { +#ifdef INET struct pfil_head *pfh_inet; +#endif #ifdef INET6 struct pfil_head *pfh_inet6; #endif PF_ASSERT(MA_NOTOWNED); - if (pf_pfil_hooked == 0) + if (V_pf_pfil_hooked == 0) return (0); +#ifdef INET pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return (ESRCH); /* XXX */ -#ifdef INET pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, @@ -3823,21 +4324,69 @@ dehook_pf(void) pfh_inet6); #endif - pf_pfil_hooked = 0; + V_pf_pfil_hooked = 0; + return (0); +} + +/* Vnet accessors */ +static int +vnet_pf_init(const void *unused) +{ + + V_pf_pfil_hooked = 0; + V_pf_end_threads = 0; + + V_debug_pfugidhack = 0; + + TAILQ_INIT(&V_pf_tags); + TAILQ_INIT(&V_pf_qids); + + pf_load(); + return (0); } static int +vnet_pf_uninit(const void *unused) +{ + + pf_unload(); + + return (0); +} + +/* Define startup order. */ +#define PF_SYSINIT_ORDER SI_SUB_PROTO_BEGIN +#define PF_MODEVENT_ORDER (SI_ORDER_FIRST) /* On boot slot in here. */ +#define PF_VNET_ORDER (PF_MODEVENT_ORDER + 2) /* Later still. */ + +/* + * Starting up. + * VNET_SYSINIT is called for each existing vnet and each new vnet. + */ +VNET_SYSINIT(vnet_pf_init, PF_SYSINIT_ORDER, PF_VNET_ORDER, + vnet_pf_init, NULL); + +/* + * Closing up shop. These are done in REVERSE ORDER, + * Not called on reboot. + * VNET_SYSUNINIT is called for each exiting vnet as it exits. + */ +VNET_SYSUNINIT(vnet_pf_uninit, PF_SYSINIT_ORDER, PF_VNET_ORDER, + vnet_pf_uninit, NULL); + +static int pf_load(void) { + init_zone_var(); + sx_init(&V_pf_consistency_lock, "pf_statetbl_lock"); init_pf_mutex(); - pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME); if (pfattach() < 0) { - destroy_dev(pf_dev); destroy_pf_mutex(); return (ENOMEM); } + return (0); } @@ -3847,7 +4396,7 @@ pf_unload(void) int error = 0; PF_LOCK(); - pf_status.running = 0; + V_pf_status.running = 0; PF_UNLOCK(); error = dehook_pf(); if (error) { @@ -3861,18 +4410,18 @@ pf_unload(void) } PF_LOCK(); shutdown_pf(); - pf_end_threads = 1; - while (pf_end_threads < 2) { + V_pf_end_threads = 1; + while (V_pf_end_threads < 2) { wakeup_one(pf_purge_thread); - msleep(pf_purge_thread, &pf_task_mtx, 0, "pftmo", hz); + msleep(pf_purge_thread, &V_pf_task_mtx, 0, "pftmo", hz); } pfi_cleanup(); pf_osfp_flush(); pf_osfp_cleanup(); cleanup_pf_zone(); PF_UNLOCK(); - destroy_dev(pf_dev); destroy_pf_mutex(); + sx_destroy(&V_pf_consistency_lock); return error; } @@ -3883,11 +4432,10 @@ pf_modevent(module_t mod, int type, void *data) switch(type) { case MOD_LOAD: - error = pf_load(); + pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME); break; - case MOD_UNLOAD: - error = pf_unload(); + destroy_dev(pf_dev); break; default: error = EINVAL; @@ -3895,7 +4443,7 @@ pf_modevent(module_t mod, int type, void *data) } return error; } - + static moduledata_t pf_mod = { "pf", pf_modevent, @@ -3904,4 +4452,4 @@ static moduledata_t pf_mod = { DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST); MODULE_VERSION(pf, PF_MODVER); -#endif /* __FreeBSD__ */ +#endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/pf_lb.c b/sys/contrib/pf/net/pf_lb.c new file mode 100644 index 0000000..f4c9a00 --- /dev/null +++ b/sys/contrib/pf/net/pf_lb.c @@ -0,0 +1,792 @@ +/* $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002 - 2008 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#ifdef __FreeBSD__ +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +__FBSDID("$FreeBSD$"); +#endif + +#ifdef __FreeBSD__ +#include "opt_bpf.h" +#include "opt_pf.h" + +#ifdef DEV_BPF +#define NBPFILTER DEV_BPF +#else +#define NBPFILTER 0 +#endif + +#ifdef DEV_PFLOG +#define NPFLOG DEV_PFLOG +#else +#define NPFLOG 0 +#endif + +#ifdef DEV_PFSYNC +#define NPFSYNC DEV_PFSYNC +#else +#define NPFSYNC 0 +#endif + +#ifdef DEV_PFLOW +#define NPFLOW DEV_PFLOW +#else +#define NPFLOW 0 +#endif + +#else +#include "bpfilter.h" +#include "pflog.h" +#include "pfsync.h" +#include "pflow.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __FreeBSD__ +#include +#endif +#ifndef __FreeBSD__ +#include +#endif +#include +#ifdef __FreeBSD__ +#include +#include +#include +#else +#include +#endif + +#ifdef __FreeBSD__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef __FreeBSD__ +#include +#endif +#include +#include +#include + +#if NPFSYNC > 0 +#include +#endif /* NPFSYNC > 0 */ + +#ifdef INET6 +#include +#include +#include +#include +#endif /* INET6 */ + + +#ifdef __FreeBSD__ +#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x +#else +#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x +#endif + +/* + * Global variables + */ + +void pf_hash(struct pf_addr *, struct pf_addr *, + struct pf_poolhashkey *, sa_family_t); +struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct pfi_kif *, + struct pf_addr *, u_int16_t, struct pf_addr *, + u_int16_t, int); +int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, + struct pf_addr *, struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, + struct pf_src_node **); + +#define mix(a,b,c) \ + do { \ + a -= b; a -= c; a ^= (c >> 13); \ + b -= c; b -= a; b ^= (a << 8); \ + c -= a; c -= b; c ^= (b >> 13); \ + a -= b; a -= c; a ^= (c >> 12); \ + b -= c; b -= a; b ^= (a << 16); \ + c -= a; c -= b; c ^= (b >> 5); \ + a -= b; a -= c; a ^= (c >> 3); \ + b -= c; b -= a; b ^= (a << 10); \ + c -= a; c -= b; c ^= (b >> 15); \ + } while (0) + +/* + * hash function based on bridge_hash in if_bridge.c + */ +void +pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, + struct pf_poolhashkey *key, sa_family_t af) +{ + u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; + + switch (af) { +#ifdef INET + case AF_INET: + a += inaddr->addr32[0]; + b += key->key32[1]; + mix(a, b, c); + hash->addr32[0] = c + key->key32[2]; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + a += inaddr->addr32[0]; + b += inaddr->addr32[2]; + mix(a, b, c); + hash->addr32[0] = c; + a += inaddr->addr32[1]; + b += inaddr->addr32[3]; + c += key->key32[1]; + mix(a, b, c); + hash->addr32[1] = c; + a += inaddr->addr32[2]; + b += inaddr->addr32[1]; + c += key->key32[2]; + mix(a, b, c); + hash->addr32[2] = c; + a += inaddr->addr32[3]; + b += inaddr->addr32[0]; + c += key->key32[3]; + mix(a, b, c); + hash->addr32[3] = c; + break; +#endif /* INET6 */ + } +} + +struct pf_rule * +pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, + int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, + struct pf_addr *daddr, u_int16_t dport, int rs_num) +{ + struct pf_rule *r, *rm = NULL; + struct pf_ruleset *ruleset = NULL; + int tag = -1; + int rtableid = -1; + int asd = 0; + + r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); + while (r && rm == NULL) { + struct pf_rule_addr *src = NULL, *dst = NULL; + struct pf_addr_wrap *xdst = NULL; + + if (r->action == PF_BINAT && direction == PF_IN) { + src = &r->dst; + if (r->rpool.cur != NULL) + xdst = &r->rpool.cur->addr; + } else { + src = &r->src; + dst = &r->dst; + } + + r->evaluations++; + if (pfi_kif_match(r->kif, kif) == r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != pd->af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, + src->neg, kif)) + r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : + PF_SKIP_DST_ADDR].ptr; + else if (src->port_op && !pf_match_port(src->port_op, + src->port[0], src->port[1], sport)) + r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : + PF_SKIP_DST_PORT].ptr; + else if (dst != NULL && + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, + 0, NULL)) + r = TAILQ_NEXT(r, entries); + else if (dst != NULL && dst->port_op && + !pf_match_port(dst->port_op, dst->port[0], + dst->port[1], dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; +#ifdef __FreeBSD__ + else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) +#else + else if (r->match_tag && !pf_match_tag(m, r, &tag)) +#endif + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != + IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, + off, pd->hdr.tcp), r->os_fingerprint))) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; + if (r->anchor == NULL) { + rm = r; + } else + pf_step_into_anchor(&asd, &ruleset, rs_num, + &r, NULL, NULL); + } + if (r == NULL) + pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, + NULL, NULL); + } +#ifdef __FreeBSD__ + if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) +#else + if (pf_tag_packet(m, tag, rtableid)) +#endif + return (NULL); + if (rm != NULL && (rm->action == PF_NONAT || + rm->action == PF_NORDR || rm->action == PF_NOBINAT)) + return (NULL); + return (rm); +} + +int +pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, + struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, + struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, + struct pf_src_node **sn) +{ + struct pf_state_key_cmp key; + struct pf_addr init_addr; + u_int16_t cut; + + bzero(&init_addr, sizeof(init_addr)); + if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + return (1); + + if (proto == IPPROTO_ICMP) { + low = 1; + high = 65535; + } + + do { + key.af = af; + key.proto = proto; + PF_ACPY(&key.addr[1], daddr, key.af); + PF_ACPY(&key.addr[0], naddr, key.af); + key.port[1] = dport; + + /* + * port search; start random, step; + * similar 2 portloop in in_pcbbind + */ + if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || + proto == IPPROTO_ICMP)) { + key.port[0] = dport; + if (pf_find_state_all(&key, PF_IN, NULL) == NULL) + return (0); + } else if (low == 0 && high == 0) { + key.port[0] = *nport; + if (pf_find_state_all(&key, PF_IN, NULL) == NULL) + return (0); + } else if (low == high) { + key.port[0] = htons(low); + if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { + *nport = htons(low); + return (0); + } + } else { + u_int16_t tmp; + + if (low > high) { + tmp = low; + low = high; + high = tmp; + } + /* low < high */ +#ifdef __FreeBSD__ + cut = htonl(arc4random()) % (1 + high - low) + low; +#else + cut = arc4random_uniform(1 + high - low) + low; +#endif + /* low <= cut <= high */ + for (tmp = cut; tmp <= high; ++(tmp)) { + key.port[0] = htons(tmp); + if (pf_find_state_all(&key, PF_IN, NULL) == +#ifdef __FreeBSD__ + NULL) { +#else + NULL && !in_baddynamic(tmp, proto)) { +#endif + *nport = htons(tmp); + return (0); + } + } + for (tmp = cut - 1; tmp >= low; --(tmp)) { + key.port[0] = htons(tmp); + if (pf_find_state_all(&key, PF_IN, NULL) == +#ifdef __FreeBSD__ + NULL) { +#else + NULL && !in_baddynamic(tmp, proto)) { +#endif + *nport = htons(tmp); + return (0); + } + } + } + + switch (r->rpool.opts & PF_POOL_TYPEMASK) { + case PF_POOL_RANDOM: + case PF_POOL_ROUNDROBIN: + if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + return (1); + break; + case PF_POOL_NONE: + case PF_POOL_SRCHASH: + case PF_POOL_BITMASK: + default: + return (1); + } + } while (! PF_AEQ(&init_addr, naddr, af) ); + return (1); /* none available */ +} + +int +pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, + struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) +{ + unsigned char hash[16]; + struct pf_pool *rpool = &r->rpool; + struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; + struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; + struct pf_pooladdr *acur = rpool->cur; + struct pf_src_node k; + + if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && + (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + k.af = af; + PF_ACPY(&k.addr, saddr, af); + if (r->rule_flag & PFRULE_RULESRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) + k.rule.ptr = r; + else + k.rule.ptr = NULL; +#ifdef __FreeBSD__ + V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + *sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k); +#else + pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); +#endif + if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { + PF_ACPY(naddr, &(*sn)->raddr, af); +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else + if (pf_status.debug >= PF_DEBUG_MISC) { +#endif + printf("pf_map_addr: src tracking maps "); + pf_print_host(&k.addr, 0, af); + printf(" to "); + pf_print_host(naddr, 0, af); + printf("\n"); + } + return (0); + } + } + + if (rpool->cur->addr.type == PF_ADDR_NOROUTE) + return (1); + if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + switch (af) { +#ifdef INET + case AF_INET: + if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && + (rpool->opts & PF_POOL_TYPEMASK) != + PF_POOL_ROUNDROBIN) + return (1); + raddr = &rpool->cur->addr.p.dyn->pfid_addr4; + rmask = &rpool->cur->addr.p.dyn->pfid_mask4; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && + (rpool->opts & PF_POOL_TYPEMASK) != + PF_POOL_ROUNDROBIN) + return (1); + raddr = &rpool->cur->addr.p.dyn->pfid_addr6; + rmask = &rpool->cur->addr.p.dyn->pfid_mask6; + break; +#endif /* INET6 */ + } + } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { + if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) + return (1); /* unsupported */ + } else { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + } + + switch (rpool->opts & PF_POOL_TYPEMASK) { + case PF_POOL_NONE: + PF_ACPY(naddr, raddr, af); + break; + case PF_POOL_BITMASK: + PF_POOLMASK(naddr, raddr, rmask, saddr, af); + break; + case PF_POOL_RANDOM: + if (init_addr != NULL && PF_AZERO(init_addr, af)) { + switch (af) { +#ifdef INET + case AF_INET: + rpool->counter.addr32[0] = htonl(arc4random()); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (rmask->addr32[3] != 0xffffffff) + rpool->counter.addr32[3] = + htonl(arc4random()); + else + break; + if (rmask->addr32[2] != 0xffffffff) + rpool->counter.addr32[2] = + htonl(arc4random()); + else + break; + if (rmask->addr32[1] != 0xffffffff) + rpool->counter.addr32[1] = + htonl(arc4random()); + else + break; + if (rmask->addr32[0] != 0xffffffff) + rpool->counter.addr32[0] = + htonl(arc4random()); + break; +#endif /* INET6 */ + } + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + PF_ACPY(init_addr, naddr, af); + + } else { + PF_AINC(&rpool->counter, af); + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + } + break; + case PF_POOL_SRCHASH: + pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); + PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); + break; + case PF_POOL_ROUNDROBIN: + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + if (!pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) + goto get_addr; + } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) + goto get_addr; + } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) + goto get_addr; + + try_next: + if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) + rpool->cur = TAILQ_FIRST(&rpool->list); + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + rpool->tblidx = -1; + if (pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) { + /* table contains no address of type 'af' */ + if (rpool->cur != acur) + goto try_next; + return (1); + } + } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + rpool->tblidx = -1; + if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) { + /* table contains no address of type 'af' */ + if (rpool->cur != acur) + goto try_next; + return (1); + } + } else { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + PF_ACPY(&rpool->counter, raddr, af); + } + + get_addr: + PF_ACPY(naddr, &rpool->counter, af); + if (init_addr != NULL && PF_AZERO(init_addr, af)) + PF_ACPY(init_addr, naddr, af); + PF_AINC(&rpool->counter, af); + break; + } + if (*sn != NULL) + PF_ACPY(&(*sn)->raddr, naddr, af); + +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC && +#else + if (pf_status.debug >= PF_DEBUG_MISC && +#endif + (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + printf("pf_map_addr: selected address "); + pf_print_host(naddr, 0, af); + printf("\n"); + } + + return (0); +} + +struct pf_rule * +pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, + struct pfi_kif *kif, struct pf_src_node **sn, + struct pf_state_key **skw, struct pf_state_key **sks, + struct pf_state_key **skp, struct pf_state_key **nkp, + struct pf_addr *saddr, struct pf_addr *daddr, + u_int16_t sport, u_int16_t dport) +{ + struct pf_rule *r = NULL; + + + if (direction == PF_OUT) { + r = pf_match_translation(pd, m, off, direction, kif, saddr, + sport, daddr, dport, PF_RULESET_BINAT); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sport, daddr, dport, PF_RULESET_NAT); + } else { + r = pf_match_translation(pd, m, off, direction, kif, saddr, + sport, daddr, dport, PF_RULESET_RDR); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sport, daddr, dport, PF_RULESET_BINAT); + } + + if (r != NULL) { + struct pf_addr *naddr; + u_int16_t *nport; + + if (pf_state_key_setup(pd, r, skw, sks, skp, nkp, + saddr, daddr, sport, dport)) + return r; + + /* XXX We only modify one side for now. */ + naddr = &(*nkp)->addr[1]; + nport = &(*nkp)->port[1]; + + switch (r->action) { + case PF_NONAT: + case PF_NOBINAT: + case PF_NORDR: + return (NULL); + case PF_NAT: + if (pf_get_sport(pd->af, pd->proto, r, saddr, + daddr, dport, naddr, nport, r->rpool.proxy_port[0], + r->rpool.proxy_port[1], sn)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: NAT proxy port allocation " + "(%u-%u) failed\n", + r->rpool.proxy_port[0], + r->rpool.proxy_port[1])); + return (NULL); + } + break; + case PF_BINAT: + switch (direction) { + case PF_OUT: + if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ + switch (pd->af) { +#ifdef INET + case AF_INET: + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt4 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->rpool.cur->addr.p.dyn-> + pfid_addr4, + &r->rpool.cur->addr.p.dyn-> + pfid_mask4, + saddr, AF_INET); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt6 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->rpool.cur->addr.p.dyn-> + pfid_addr6, + &r->rpool.cur->addr.p.dyn-> + pfid_mask6, + saddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else + PF_POOLMASK(naddr, + &r->rpool.cur->addr.v.a.addr, + &r->rpool.cur->addr.v.a.mask, + saddr, pd->af); + break; + case PF_IN: + if (r->src.addr.type == PF_ADDR_DYNIFTL) { + switch (pd->af) { +#ifdef INET + case AF_INET: + if (r->src.addr.p.dyn-> + pfid_acnt4 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->src.addr.p.dyn-> + pfid_addr4, + &r->src.addr.p.dyn-> + pfid_mask4, + daddr, AF_INET); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (r->src.addr.p.dyn-> + pfid_acnt6 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->src.addr.p.dyn-> + pfid_addr6, + &r->src.addr.p.dyn-> + pfid_mask6, + daddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else + PF_POOLMASK(naddr, + &r->src.addr.v.a.addr, + &r->src.addr.v.a.mask, daddr, + pd->af); + break; + } + break; + case PF_RDR: { + if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) + return (NULL); + if ((r->rpool.opts & PF_POOL_TYPEMASK) == + PF_POOL_BITMASK) + PF_POOLMASK(naddr, naddr, + &r->rpool.cur->addr.v.a.mask, daddr, + pd->af); + + if (r->rpool.proxy_port[1]) { + u_int32_t tmp_nport; + + tmp_nport = ((ntohs(dport) - + ntohs(r->dst.port[0])) % + (r->rpool.proxy_port[1] - + r->rpool.proxy_port[0] + 1)) + + r->rpool.proxy_port[0]; + + /* wrap around if necessary */ + if (tmp_nport > 65535) + tmp_nport -= 65535; + *nport = htons((u_int16_t)tmp_nport); + } else if (r->rpool.proxy_port[0]) + *nport = htons(r->rpool.proxy_port[0]); + break; + } + default: + return (NULL); + } + /* + * Translation was a NOP. + * Pretend there was no match. + */ + if (!bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) { +#ifdef __FreeBSD__ + pool_put(&V_pf_state_key_pl, *nkp); + pool_put(&V_pf_state_key_pl, *skp); +#else + pool_put(&pf_state_key_pl, *nkp); + pool_put(&pf_state_key_pl, *skp); +#endif + *skw = *sks = *nkp = *skp = NULL; + return (NULL); + } + } + + return (r); +} + diff --git a/sys/contrib/pf/net/pf_mtag.h b/sys/contrib/pf/net/pf_mtag.h index a0ebf7e..141a867 100644 --- a/sys/contrib/pf/net/pf_mtag.h +++ b/sys/contrib/pf/net/pf_mtag.h @@ -37,15 +37,17 @@ #define PF_TAG_GENERATED 0x01 #define PF_TAG_FRAGCACHE 0x02 #define PF_TAG_TRANSLATE_LOCALHOST 0x04 +#define PF_PACKET_LOOPED 0x08 +#define PF_FASTFWD_OURS_PRESENT 0x10 struct pf_mtag { void *hdr; /* saved hdr pos in mbuf, for ECN */ - u_int rtableid; /* alternate routing table id */ + void *statekey; /* pf stackside statekey */ u_int32_t qid; /* queue id */ + u_int rtableid; /* alternate routing table id */ u_int16_t tag; /* tag id */ u_int8_t flags; u_int8_t routed; - sa_family_t af; /* for ECN */ }; static __inline struct pf_mtag *pf_find_mtag(struct mbuf *); diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c index cd6d65a..6c04eee 100644 --- a/sys/contrib/pf/net/pf_norm.c +++ b/sys/contrib/pf/net/pf_norm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $ */ /* * Copyright 2001 Niels Provos @@ -34,9 +34,9 @@ __FBSDID("$FreeBSD$"); #ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG +#define NPFLOG DEV_PFLOG #else -#define NPFLOG 0 +#define NPFLOG 0 #endif #else #include "pflog.h" @@ -78,8 +78,6 @@ __FBSDID("$FreeBSD$"); #include #ifndef __FreeBSD__ -#include - struct pf_frent { LIST_ENTRY(pf_frent) fr_next; struct ip *fr_ip; @@ -118,17 +116,35 @@ struct pf_fragment { }; #endif +#ifdef __FreeBSD__ +TAILQ_HEAD(pf_fragqueue, pf_fragment); +TAILQ_HEAD(pf_cachequeue, pf_fragment); +VNET_DEFINE(struct pf_fragqueue, pf_fragqueue); +#define V_pf_fragqueue VNET(pf_fragqueue) +VNET_DEFINE(struct pf_cachequeue, pf_cachequeue); +#define V_pf_cachequeue VNET(pf_cachequeue) +#else TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; +#endif #ifndef __FreeBSD__ static __inline int pf_frag_compare(struct pf_fragment *, struct pf_fragment *); #else -static int pf_frag_compare(struct pf_fragment *, +static int pf_frag_compare(struct pf_fragment *, struct pf_fragment *); #endif + +#ifdef __FreeBSD__ +RB_HEAD(pf_frag_tree, pf_fragment); +VNET_DEFINE(struct pf_frag_tree, pf_frag_tree); +#define V_pf_frag_tree VNET(pf_frag_tree) +VNET_DEFINE(struct pf_frag_tree, pf_cache_tree); +#define V_pf_cache_tree VNET(pf_cache_tree) +#else RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; +#endif RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); @@ -143,24 +159,45 @@ struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, struct mbuf *pf_fragcache(struct mbuf **, struct ip*, struct pf_fragment **, int, int, int *); int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, - struct tcphdr *, int); - + struct tcphdr *, int, sa_family_t); +void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t, + u_int8_t); +#ifdef INET6 +void pf_scrub_ip6(struct mbuf **, u_int8_t); +#endif +#ifdef __FreeBSD__ +#define DPFPRINTF(x) do { \ + if (V_pf_status.debug >= PF_DEBUG_MISC) { \ + printf("%s: ", __func__); \ + printf x ; \ + } \ +} while(0) +#else #define DPFPRINTF(x) do { \ if (pf_status.debug >= PF_DEBUG_MISC) { \ printf("%s: ", __func__); \ printf x ; \ } \ } while(0) +#endif /* Globals */ #ifdef __FreeBSD__ -uma_zone_t pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; -uma_zone_t pf_state_scrub_pl; +VNET_DEFINE(uma_zone_t, pf_frent_pl); +VNET_DEFINE(uma_zone_t, pf_frag_pl); +VNET_DEFINE(uma_zone_t, pf_cache_pl); +VNET_DEFINE(uma_zone_t, pf_cent_pl); +VNET_DEFINE(uma_zone_t, pf_state_scrub_pl); + +VNET_DEFINE(int, pf_nfrents); +#define V_pf_nfrents VNET(pf_nfrents) +VNET_DEFINE(int, pf_ncache); +#define V_pf_ncache VNET(pf_ncache) #else struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; struct pool pf_state_scrub_pl; -#endif int pf_nfrents, pf_ncache; +#endif void pf_normalize_init(void) @@ -171,9 +208,9 @@ pf_normalize_init(void) * No high water mark support(It's hint not hard limit). * uma_zone_set_max(pf_frag_pl, PFFRAG_FRAG_HIWAT); */ - uma_zone_set_max(pf_frent_pl, PFFRAG_FRENT_HIWAT); - uma_zone_set_max(pf_cache_pl, PFFRAG_FRCACHE_HIWAT); - uma_zone_set_max(pf_cent_pl, PFFRAG_FRCENT_HIWAT); + uma_zone_set_max(V_pf_frent_pl, PFFRAG_FRENT_HIWAT); + uma_zone_set_max(V_pf_cache_pl, PFFRAG_FRCACHE_HIWAT); + uma_zone_set_max(V_pf_cent_pl, PFFRAG_FRCENT_HIWAT); #else pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", NULL); @@ -192,8 +229,13 @@ pf_normalize_init(void) pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); #endif +#ifdef __FreeBSD__ + TAILQ_INIT(&V_pf_fragqueue); + TAILQ_INIT(&V_pf_cachequeue); +#else TAILQ_INIT(&pf_fragqueue); TAILQ_INIT(&pf_cachequeue); +#endif } #ifdef __FreeBSD__ @@ -224,14 +266,20 @@ void pf_purge_expired_fragments(void) { struct pf_fragment *frag; +#ifdef __FreeBSD__ + u_int32_t expire = time_second - + V_pf_default_rule.timeout[PFTM_FRAG]; +#else u_int32_t expire = time_second - pf_default_rule.timeout[PFTM_FRAG]; +#endif - while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { #ifdef __FreeBSD__ + while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) { KASSERT((BUFFER_FRAGMENTS(frag)), - ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__)); + ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__)); #else + while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { KASSERT(BUFFER_FRAGMENTS(frag)); #endif if (frag->fr_timeout > expire) @@ -241,11 +289,12 @@ pf_purge_expired_fragments(void) pf_free_fragment(frag); } - while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { #ifdef __FreeBSD__ + while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) { KASSERT((!BUFFER_FRAGMENTS(frag)), - ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__)); + ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__)); #else + while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { KASSERT(!BUFFER_FRAGMENTS(frag)); #endif if (frag->fr_timeout > expire) @@ -254,8 +303,8 @@ pf_purge_expired_fragments(void) DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); pf_free_fragment(frag); #ifdef __FreeBSD__ - KASSERT((TAILQ_EMPTY(&pf_cachequeue) || - TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag), + KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) || + TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag), ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s", __FUNCTION__)); #else @@ -275,22 +324,44 @@ pf_flush_fragments(void) struct pf_fragment *frag; int goal; +#ifdef __FreeBSD__ + goal = V_pf_nfrents * 9 / 10; + DPFPRINTF(("trying to free > %d frents\n", + V_pf_nfrents - goal)); + while (goal < V_pf_nfrents) { +#else goal = pf_nfrents * 9 / 10; DPFPRINTF(("trying to free > %d frents\n", pf_nfrents - goal)); while (goal < pf_nfrents) { +#endif +#ifdef __FreeBSD__ + frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue); +#else frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); +#endif if (frag == NULL) break; pf_free_fragment(frag); } +#ifdef __FreeBSD__ + goal = V_pf_ncache * 9 / 10; + DPFPRINTF(("trying to free > %d cache entries\n", + V_pf_ncache - goal)); + while (goal < V_pf_ncache) { +#else goal = pf_ncache * 9 / 10; DPFPRINTF(("trying to free > %d cache entries\n", pf_ncache - goal)); while (goal < pf_ncache) { +#endif +#ifdef __FreeBSD__ + frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue); +#else frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); +#endif if (frag == NULL) break; pf_free_fragment(frag); @@ -312,8 +383,13 @@ pf_free_fragment(struct pf_fragment *frag) LIST_REMOVE(frent, fr_next); m_freem(frent->fr_m); +#ifdef __FreeBSD__ + pool_put(&V_pf_frent_pl, frent); + V_pf_nfrents--; +#else pool_put(&pf_frent_pl, frent); pf_nfrents--; +#endif } } else { for (frcache = LIST_FIRST(&frag->fr_cache); frcache; @@ -325,15 +401,18 @@ pf_free_fragment(struct pf_fragment *frag) LIST_FIRST(&frag->fr_cache)->fr_off > frcache->fr_end), ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >" - " frcache->fr_end): %s", __FUNCTION__)); + " frcache->fr_end): %s", __FUNCTION__)); + + pool_put(&V_pf_cent_pl, frcache); + V_pf_ncache--; #else KASSERT(LIST_EMPTY(&frag->fr_cache) || LIST_FIRST(&frag->fr_cache)->fr_off > frcache->fr_end); -#endif pool_put(&pf_cent_pl, frcache); pf_ncache--; +#endif } } @@ -362,11 +441,21 @@ pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) /* XXX Are we sure we want to update the timeout? */ frag->fr_timeout = time_second; if (BUFFER_FRAGMENTS(frag)) { +#ifdef __FreeBSD__ + TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); + TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); +#else TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); +#endif } else { +#ifdef __FreeBSD__ + TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next); + TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next); +#else TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); +#endif } } @@ -379,13 +468,25 @@ void pf_remove_fragment(struct pf_fragment *frag) { if (BUFFER_FRAGMENTS(frag)) { +#ifdef __FreeBSD__ + RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag); + TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); + pool_put(&V_pf_frag_pl, frag); +#else RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); pool_put(&pf_frag_pl, frag); +#endif } else { +#ifdef __FreeBSD__ + RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag); + TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next); + pool_put(&V_pf_cache_pl, frag); +#else RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); pool_put(&pf_cache_pl, frag); +#endif } } @@ -416,10 +517,18 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, /* Create a new reassembly queue for this packet */ if (*frag == NULL) { +#ifdef __FreeBSD__ + *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT); +#else *frag = pool_get(&pf_frag_pl, PR_NOWAIT); +#endif if (*frag == NULL) { pf_flush_fragments(); +#ifdef __FreeBSD__ + *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT); +#else *frag = pool_get(&pf_frag_pl, PR_NOWAIT); +#endif if (*frag == NULL) goto drop_fragment; } @@ -433,8 +542,13 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, (*frag)->fr_timeout = time_second; LIST_INIT(&(*frag)->fr_queue); +#ifdef __FreeBSD__ + RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag); + TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next); +#else RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); +#endif /* We do not have a previous fragment */ frep = NULL; @@ -499,8 +613,13 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, next = LIST_NEXT(frea, fr_next); m_freem(frea->fr_m); LIST_REMOVE(frea, fr_next); +#ifdef __FreeBSD__ + pool_put(&V_pf_frent_pl, frea); + V_pf_nfrents--; +#else pool_put(&pf_frent_pl, frea); pf_nfrents--; +#endif } insert: @@ -560,26 +679,36 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, m2 = m->m_next; m->m_next = NULL; m_cat(m, m2); +#ifdef __FreeBSD__ + pool_put(&V_pf_frent_pl, frent); + V_pf_nfrents--; +#else pool_put(&pf_frent_pl, frent); pf_nfrents--; +#endif for (frent = next; frent != NULL; frent = next) { next = LIST_NEXT(frent, fr_next); m2 = frent->fr_m; +#ifdef __FreeBSD__ + pool_put(&V_pf_frent_pl, frent); + V_pf_nfrents--; +#else pool_put(&pf_frent_pl, frent); pf_nfrents--; +#endif #ifdef __FreeBSD__ m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags; m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data; #endif m_cat(m, m2); } + #ifdef __FreeBSD__ while (m->m_pkthdr.csum_data & 0xffff0000) m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16); #endif - ip->ip_src = (*frag)->fr_src; ip->ip_dst = (*frag)->fr_dst; @@ -606,8 +735,13 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, drop_fragment: /* Oops - fail safe - drop packet */ +#ifdef __FreeBSD__ + pool_put(&V_pf_frent_pl, frent); + V_pf_nfrents--; +#else pool_put(&pf_frent_pl, frent); pf_nfrents--; +#endif m_freem(m); return (NULL); } @@ -632,22 +766,40 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, /* Create a new range queue for this packet */ if (*frag == NULL) { +#ifdef __FreeBSD__ + *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT); +#else *frag = pool_get(&pf_cache_pl, PR_NOWAIT); +#endif if (*frag == NULL) { pf_flush_fragments(); +#ifdef __FreeBSD__ + *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT); +#else *frag = pool_get(&pf_cache_pl, PR_NOWAIT); +#endif if (*frag == NULL) goto no_mem; } /* Get an entry for the queue */ +#ifdef __FreeBSD__ + cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); + if (cur == NULL) { + pool_put(&V_pf_cache_pl, *frag); +#else cur = pool_get(&pf_cent_pl, PR_NOWAIT); if (cur == NULL) { pool_put(&pf_cache_pl, *frag); +#endif *frag = NULL; goto no_mem; } +#ifdef __FreeBSD__ + V_pf_ncache++; +#else pf_ncache++; +#endif (*frag)->fr_flags = PFFRAG_NOBUFFER; (*frag)->fr_max = 0; @@ -662,8 +814,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, LIST_INIT(&(*frag)->fr_cache); LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); +#ifdef __FreeBSD__ + RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag); + TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next); +#else RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); +#endif DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); @@ -782,10 +939,18 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, h->ip_id, -precut, frp->fr_off, frp->fr_end, off, max)); +#ifdef __FreeBSD__ + cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); +#else cur = pool_get(&pf_cent_pl, PR_NOWAIT); +#endif if (cur == NULL) goto no_mem; +#ifdef __FreeBSD__ + V_pf_ncache++; +#else pf_ncache++; +#endif cur->fr_off = off; cur->fr_end = max; @@ -842,10 +1007,18 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, h->ip_id, -aftercut, off, max, fra->fr_off, fra->fr_end)); +#ifdef __FreeBSD__ + cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); +#else cur = pool_get(&pf_cent_pl, PR_NOWAIT); +#endif if (cur == NULL) goto no_mem; +#ifdef __FreeBSD__ + V_pf_ncache++; +#else pf_ncache++; +#endif cur->fr_off = off; cur->fr_end = max; @@ -863,8 +1036,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, max, fra->fr_off, fra->fr_end)); fra->fr_off = cur->fr_off; LIST_REMOVE(cur, fr_next); +#ifdef __FreeBSD__ + pool_put(&V_pf_cent_pl, cur); + V_pf_ncache--; +#else pool_put(&pf_cent_pl, cur); pf_ncache--; +#endif cur = NULL; } else if (frp && fra->fr_off <= frp->fr_end) { @@ -881,8 +1059,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, max, fra->fr_off, fra->fr_end)); fra->fr_off = frp->fr_off; LIST_REMOVE(frp, fr_next); +#ifdef __FreeBSD__ + pool_put(&V_pf_cent_pl, frp); + V_pf_ncache--; +#else pool_put(&pf_cent_pl, frp); pf_ncache--; +#endif frp = NULL; } @@ -965,6 +1148,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, u_int16_t max; int ip_len; int ip_off; + int tag = -1; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { @@ -985,6 +1169,12 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; +#ifdef __FreeBSD__ + else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) +#else + else if (r->match_tag && !pf_match_tag(m, r, &tag)) +#endif + r = TAILQ_NEXT(r, entries); else break; } @@ -1043,7 +1233,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { /* Fully buffer all of the fragments */ +#ifdef __FreeBSD__ + frag = pf_find_fragment(h, &V_pf_frag_tree); +#else frag = pf_find_fragment(h, &pf_frag_tree); +#endif /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && @@ -1051,12 +1245,20 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto bad; /* Get an entry for the fragment queue */ +#ifdef __FreeBSD__ + frent = pool_get(&V_pf_frent_pl, PR_NOWAIT); +#else frent = pool_get(&pf_frent_pl, PR_NOWAIT); +#endif if (frent == NULL) { REASON_SET(reason, PFRES_MEMORY); return (PF_DROP); } +#ifdef __FreeBSD__ + V_pf_nfrents++; +#else pf_nfrents++; +#endif frent->fr_ip = h; frent->fr_m = m; @@ -1087,7 +1289,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, /* non-buffering fragment cache (drops or masks overlaps) */ int nomem = 0; +#ifdef __FreeBSD__ if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) { +#else + if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) { +#endif /* * Already passed the fragment cache in the * input direction. If we continued, it would @@ -1096,7 +1302,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto fragment_pass; } +#ifdef __FreeBSD__ + frag = pf_find_fragment(h, &V_pf_cache_tree); +#else frag = pf_find_fragment(h, &pf_cache_tree); +#endif /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && @@ -1127,7 +1337,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, } #endif if (dir == PF_IN) +#ifdef __FreeBSD__ pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; +#else + m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE; +#endif if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; @@ -1143,33 +1357,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); } - /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) { - u_int16_t ip_ttl = h->ip_ttl; - - h->ip_ttl = r->min_ttl; - h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); - } - - if (r->rule_flag & PFRULE_RANDOMID) { - u_int16_t ip_id = h->ip_id; - - h->ip_id = ip_randomid(); - h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); - } - if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) - pd->flags |= PFDESC_IP_REAS; - - return (PF_PASS); + /* not missing a return here */ fragment_pass: - /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) { - u_int16_t ip_ttl = h->ip_ttl; + pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos); - h->ip_ttl = r->min_ttl; - h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); - } if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) pd->flags |= PFDESC_IP_REAS; return (PF_PASS); @@ -1339,9 +1531,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) goto shortpkt; - /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip6_hlim < r->min_ttl) - h->ip6_hlim = r->min_ttl; + pf_scrub_ip6(&m, r->min_ttl); return (PF_PASS); @@ -1479,12 +1669,16 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, } /* Process options */ - if (r->max_mss && pf_normalize_tcpopt(r, m, th, off)) + if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af)) rewrite = 1; /* copy back packet headers if we sanitized */ if (rewrite) +#ifdef __FreeBSD__ m_copyback(m, off, sizeof(*th), (caddr_t)th); +#else + m_copyback(m, off, sizeof(*th), th); +#endif return (PF_PASS); @@ -1506,11 +1700,13 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, #ifdef __FreeBSD__ KASSERT((src->scrub == NULL), ("pf_normalize_tcp_init: src->scrub != NULL")); + + src->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT); #else KASSERT(src->scrub == NULL); -#endif src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); +#endif if (src->scrub == NULL) return (1); bzero(src->scrub, sizeof(*src->scrub)); @@ -1586,10 +1782,17 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, void pf_normalize_tcp_cleanup(struct pf_state *state) { +#ifdef __FreeBSD__ + if (state->src.scrub) + pool_put(&V_pf_state_scrub_pl, state->src.scrub); + if (state->dst.scrub) + pool_put(&V_pf_state_scrub_pl, state->dst.scrub); +#else if (state->src.scrub) pool_put(&pf_state_scrub_pl, state->src.scrub); if (state->dst.scrub) pool_put(&pf_state_scrub_pl, state->dst.scrub); +#endif /* Someday... flush the TCP segment reassembly descriptors. */ } @@ -1667,7 +1870,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, if (got_ts) { /* Huh? Multiple timestamps!? */ +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif DPFPRINTF(("multiple TS??")); pf_print_state(state); printf("\n"); @@ -1736,7 +1943,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || time_second - state->creation > TS_MAX_CONN)) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif DPFPRINTF(("src idled out of PAWS\n")); pf_print_state(state); printf("\n"); @@ -1746,7 +1957,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, } if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif DPFPRINTF(("dst idled out of PAWS\n")); pf_print_state(state); printf("\n"); @@ -1807,7 +2022,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * network conditions that re-order packets and * cause our view of them to decrease. For now the * only lowerbound we can safely determine is that - * the TS echo will never be less than the orginal + * the TS echo will never be less than the original * TS. XXX There is probably a better lowerbound. * Remove TS_MAX_CONN with better lowerbound check. * tescr >= other original TS @@ -1830,7 +2045,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * this packet. */ if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) +#ifdef __FreeBSD__ + ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF]; +#else ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; +#endif /* Calculate max ticks since the last timestamp */ @@ -1838,7 +2057,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, #define TS_MICROSECS 1000000 /* microseconds per second */ #ifdef __FreeBSD__ #ifndef timersub -#define timersub(tvp, uvp, vvp) \ +#define timersub(tvp, uvp, vvp) \ do { \ (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ @@ -1895,7 +2114,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, "\n", dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); #endif +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif pf_print_state(state); pf_print_flags(th->th_flags); printf("\n"); @@ -1943,7 +2166,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * Hey! Someone tried to sneak a packet in. Or the * stack changed its RFC1323 behavior?!?! */ +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC) { +#else if (pf_status.debug >= PF_DEBUG_MISC) { +#endif DPFPRINTF(("Did not receive expected RFC1323 " "timestamp\n")); pf_print_state(state); @@ -1970,7 +2197,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, src->scrub->pfss_flags |= PFSS_DATA_TS; else { src->scrub->pfss_flags |= PFSS_DATA_NOTS; +#ifdef __FreeBSD__ + if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub && +#else if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && +#endif (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { /* Don't warn if other host rejected RFC1323 */ DPFPRINTF(("Broken RFC1323 stack did not " @@ -2018,17 +2249,25 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, int pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, - int off) + int off, sa_family_t af) { u_int16_t *mss; int thoff; int opt, cnt, optlen = 0; int rewrite = 0; - u_char *optp; +#ifdef __FreeBSD__ + u_char opts[TCP_MAXOLEN]; +#else + u_char opts[MAX_TCPOPTLEN]; +#endif + u_char *optp = opts; thoff = th->th_off << 2; cnt = thoff - sizeof(struct tcphdr); - optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr); + + if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt, + NULL, NULL, af)) + return (rewrite); for (; cnt > 0; cnt -= optlen, optp += optlen) { opt = optp[0]; @@ -2058,5 +2297,63 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, } } + if (rewrite) + m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts); + return (rewrite); } + +void +pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) +{ + struct mbuf *m = *m0; + struct ip *h = mtod(m, struct ip *); + + /* Clear IP_DF if no-df was requested */ + if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) { + u_int16_t ip_off = h->ip_off; + + h->ip_off &= htons(~IP_DF); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); + } + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (min_ttl && h->ip_ttl < min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + + h->ip_ttl = min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } + + /* Enforce tos */ + if (flags & PFRULE_SET_TOS) { + u_int16_t ov, nv; + + ov = *(u_int16_t *)h; + h->ip_tos = tos; + nv = *(u_int16_t *)h; + + h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0); + } + + /* random-id, but not for fragments */ + if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) { + u_int16_t ip_id = h->ip_id; + + h->ip_id = ip_randomid(); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); + } +} + +#ifdef INET6 +void +pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl) +{ + struct mbuf *m = *m0; + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (min_ttl && h->ip6_hlim < min_ttl) + h->ip6_hlim = min_ttl; +} +#endif diff --git a/sys/contrib/pf/net/pf_osfp.c b/sys/contrib/pf/net/pf_osfp.c index 225528d..dcd8af7 100644 --- a/sys/contrib/pf/net/pf_osfp.c +++ b/sys/contrib/pf/net/pf_osfp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_osfp.c,v 1.12 2006/12/13 18:14:10 itojun Exp $ */ +/* $OpenBSD: pf_osfp.c,v 1.14 2008/06/12 18:17:01 henning Exp $ */ /* * Copyright (c) 2003 Mike Frantzen @@ -25,7 +25,10 @@ __FBSDID("$FreeBSD$"); #include #include #ifdef _KERNEL -# include +#include +#ifndef __FreeBSD__ +#include +#endif #endif /* _KERNEL */ #include @@ -42,10 +45,17 @@ __FBSDID("$FreeBSD$"); #include #endif + #ifdef _KERNEL -# define DPFPRINTF(format, x...) \ +#ifdef __FreeBSD__ +#define DPFPRINTF(format, x...) \ + if (V_pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format , ##x) +#else +#define DPFPRINTF(format, x...) \ if (pf_status.debug >= PF_DEBUG_NOISY) \ printf(format , ##x) +#endif #ifdef __FreeBSD__ typedef uma_zone_t pool_t; #else @@ -55,33 +65,43 @@ typedef struct pool pool_t; #else /* Userland equivalents so we can lend code to tcpdump et al. */ -# include -# include -# include -# include -# include -# include -# define pool_t int -# define pool_get(pool, flags) malloc(*(pool)) -# define pool_put(pool, item) free(item) -# define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size) - -# ifdef __FreeBSD__ -# define NTOHS(x) (x) = ntohs((u_int16_t)(x)) -# endif - -# ifdef PFDEBUG -# include -# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) -# else -# define DPFPRINTF(format, x...) ((void)0) -# endif /* PFDEBUG */ +#include +#include +#include +#include +#include +#include +#define pool_t int +#define pool_get(pool, flags) malloc(*(pool)) +#define pool_put(pool, item) free(item) +#define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size) + +#ifdef __FreeBSD__ +#define NTOHS(x) (x) = ntohs((u_int16_t)(x)) +#endif + +#ifdef PFDEBUG +#include +#define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) +#else +#define DPFPRINTF(format, x...) ((void)0) +#endif /* PFDEBUG */ #endif /* _KERNEL */ +#ifdef __FreeBSD__ +SLIST_HEAD(pf_osfp_list, pf_os_fingerprint); +VNET_DEFINE(struct pf_osfp_list, pf_osfp_list); +#define V_pf_osfp_list VNET(pf_osfp_list) +VNET_DEFINE(pool_t, pf_osfp_entry_pl); +#define pf_osfp_entry_pl VNET(pf_osfp_entry_pl) +VNET_DEFINE(pool_t, pf_osfp_pl); +#define pf_osfp_pl VNET(pf_osfp_pl) +#else SLIST_HEAD(pf_osfp_list, pf_os_fingerprint) pf_osfp_list; pool_t pf_osfp_entry_pl; pool_t pf_osfp_pl; +#endif struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *, struct pf_os_fingerprint *, u_int8_t); @@ -264,7 +284,11 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", fp.fp_wscale); +#ifdef __FreeBSD__ + if ((fpresult = pf_osfp_find(&V_pf_osfp_list, &fp, +#else if ((fpresult = pf_osfp_find(&pf_osfp_list, &fp, +#endif PF_OSFP_MAXTTL_OFFSET))) return (&fpresult->fp_oses); return (NULL); @@ -310,20 +334,23 @@ pf_osfp_initialize(void) { #if defined(__FreeBSD__) && defined(_KERNEL) int error = ENOMEM; - + do { pf_osfp_entry_pl = pf_osfp_pl = NULL; UMA_CREATE(pf_osfp_entry_pl, struct pf_osfp_entry, "pfospfen"); UMA_CREATE(pf_osfp_pl, struct pf_os_fingerprint, "pfosfp"); error = 0; } while(0); + + SLIST_INIT(&V_pf_osfp_list); #else pool_init(&pf_osfp_entry_pl, sizeof(struct pf_osfp_entry), 0, 0, 0, "pfosfpen", &pool_allocator_nointr); pool_init(&pf_osfp_pl, sizeof(struct pf_os_fingerprint), 0, 0, 0, "pfosfp", &pool_allocator_nointr); -#endif SLIST_INIT(&pf_osfp_list); +#endif + #ifdef __FreeBSD__ #ifdef _KERNEL return (error); @@ -337,6 +364,7 @@ pf_osfp_initialize(void) void pf_osfp_cleanup(void) { + UMA_DESTROY(pf_osfp_entry_pl); UMA_DESTROY(pf_osfp_pl); } @@ -349,8 +377,13 @@ pf_osfp_flush(void) struct pf_os_fingerprint *fp; struct pf_osfp_entry *entry; +#ifdef __FreeBSD__ + while ((fp = SLIST_FIRST(&V_pf_osfp_list))) { + SLIST_REMOVE_HEAD(&V_pf_osfp_list, fp_next); +#else while ((fp = SLIST_FIRST(&pf_osfp_list))) { SLIST_REMOVE_HEAD(&pf_osfp_list, fp_next); +#endif while ((entry = SLIST_FIRST(&fp->fp_oses))) { SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry); pool_put(&pf_osfp_entry_pl, entry); @@ -377,6 +410,7 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fpadd.fp_wscale = fpioc->fp_wscale; fpadd.fp_ttl = fpioc->fp_ttl; +#if 0 /* XXX RYAN wants to fix logging */ DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d " "(TS=%s,M=%s%d,W=%s%d) %x\n", fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm, @@ -400,17 +434,31 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", fpadd.fp_wscale, fpioc->fp_os.fp_os); +#endif - +#ifdef __FreeBSD__ + if ((fp = pf_osfp_find_exact(&V_pf_osfp_list, &fpadd))) { +#else if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) { +#endif SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os)) return (EEXIST); } - if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) + if ((entry = pool_get(&pf_osfp_entry_pl, +#ifdef __FreeBSD__ + PR_NOWAIT)) == NULL) +#else + PR_WAITOK|PR_LIMITFAIL)) == NULL) +#endif return (ENOMEM); } else { - if ((fp = pool_get(&pf_osfp_pl, PR_NOWAIT)) == NULL) + if ((fp = pool_get(&pf_osfp_pl, +#ifdef __FreeBSD__ + PR_NOWAIT)) == NULL) +#else + PR_WAITOK|PR_LIMITFAIL)) == NULL) +#endif return (ENOMEM); memset(fp, 0, sizeof(*fp)); fp->fp_tcpopts = fpioc->fp_tcpopts; @@ -422,11 +470,20 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fp->fp_wscale = fpioc->fp_wscale; fp->fp_ttl = fpioc->fp_ttl; SLIST_INIT(&fp->fp_oses); - if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) { + if ((entry = pool_get(&pf_osfp_entry_pl, +#ifdef __FreeBSD__ + PR_NOWAIT)) == NULL) { +#else + PR_WAITOK|PR_LIMITFAIL)) == NULL) { +#endif pool_put(&pf_osfp_pl, fp); return (ENOMEM); } +#ifdef __FreeBSD__ + pf_osfp_insert(&V_pf_osfp_list, fp); +#else pf_osfp_insert(&pf_osfp_list, fp); +#endif } memcpy(entry, &fpioc->fp_os, sizeof(*entry)); @@ -452,7 +509,7 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, { struct pf_os_fingerprint *f; -#define MATCH_INT(_MOD, _DC, _field) \ +#define MATCH_INT(_MOD, _DC, _field) \ if ((f->fp_flags & _DC) == 0) { \ if ((f->fp_flags & _MOD) == 0) { \ if (f->_field != find->_field) \ @@ -480,10 +537,11 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, if (find->fp_mss == 0) continue; -/* Some "smart" NAT devices and DSL routers will tweak the MSS size and +/* + * Some "smart" NAT devices and DSL routers will tweak the MSS size and * will set it to whatever is suitable for the link type. */ -#define SMART_MSS 1460 +#define SMART_MSS 1460 if ((find->fp_wsize % find->fp_mss || find->fp_wsize / find->fp_mss != f->fp_wsize) && @@ -495,8 +553,8 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, if (find->fp_mss == 0) continue; -#define MTUOFF (sizeof(struct ip) + sizeof(struct tcphdr)) -#define SMART_MTU (SMART_MSS + MTUOFF) +#define MTUOFF (sizeof(struct ip) + sizeof(struct tcphdr)) +#define SMART_MTU (SMART_MSS + MTUOFF) if ((find->fp_wsize % (find->fp_mss + MTUOFF) || find->fp_wsize / (find->fp_mss + MTUOFF) != f->fp_wsize) && @@ -567,7 +625,11 @@ pf_osfp_get(struct pf_osfp_ioctl *fpioc) memset(fpioc, 0, sizeof(*fpioc)); +#ifdef __FreeBSD__ + SLIST_FOREACH(fp, &V_pf_osfp_list, fp_next) { +#else SLIST_FOREACH(fp, &pf_osfp_list, fp_next) { +#endif SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (i++ == num) { fpioc->fp_mss = fp->fp_mss; @@ -594,7 +656,11 @@ pf_osfp_validate(void) { struct pf_os_fingerprint *f, *f2, find; +#ifdef __FreeBSD__ + SLIST_FOREACH(f, &V_pf_osfp_list, fp_next) { +#else SLIST_FOREACH(f, &pf_osfp_list, fp_next) { +#endif memcpy(&find, f, sizeof(find)); /* We do a few MSS/th_win percolations to make things unique */ @@ -606,7 +672,11 @@ pf_osfp_validate(void) find.fp_wsize *= (find.fp_mss + 40); else if (f->fp_flags & PF_OSFP_WSIZE_MOD) find.fp_wsize *= 2; +#ifdef __FreeBSD__ + if (f != (f2 = pf_osfp_find(&V_pf_osfp_list, &find, 0))) { +#else if (f != (f2 = pf_osfp_find(&pf_osfp_list, &find, 0))) { +#endif if (f2) printf("Found \"%s %s %s\" instead of " "\"%s %s %s\"\n", diff --git a/sys/contrib/pf/net/pf_ruleset.c b/sys/contrib/pf/net/pf_ruleset.c index 7ff6a77..ca8667c 100644 --- a/sys/contrib/pf/net/pf_ruleset.c +++ b/sys/contrib/pf/net/pf_ruleset.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ruleset.c,v 1.1 2006/10/27 13:56:51 mcbride Exp $ */ +/* $OpenBSD: pf_ruleset.c,v 1.2 2008/12/18 15:31:37 dhill Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -61,48 +61,55 @@ __FBSDID("$FreeBSD$"); #ifdef _KERNEL -# define DPFPRINTF(format, x...) \ - if (pf_status.debug >= PF_DEBUG_NOISY) \ +#ifdef __FreeBSD__ +#define DPFPRINTF(format, x...) \ + if (V_pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format , ##x) +#else +#define DPFPRINTF(format, x...) \ + if (pf_status.debug >= PF_DEBUG_NOISY) \ printf(format , ##x) +#endif #ifdef __FreeBSD__ -#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT) +#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO) #else -#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK) +#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO) #endif #define rs_free(x) free(x, M_TEMP) #else /* Userland equivalents so we can lend code to pfctl et al. */ -# include -# include -# include -# include -# include -# define rs_malloc(x) malloc(x) -# define rs_free(x) free(x) - -# ifdef PFDEBUG -# include -# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) -# else -# define DPFPRINTF(format, x...) ((void)0) -# endif /* PFDEBUG */ +#include +#include +#include +#include +#include +#define rs_malloc(x) calloc(1, x) +#define rs_free(x) free(x) + +#ifdef PFDEBUG +#include +#define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) +#else +#define DPFPRINTF(format, x...) ((void)0) +#endif /* PFDEBUG */ #endif /* _KERNEL */ +#if defined(__FreeBSD__) && !defined(_KERNEL) +#undef V_pf_anchors +#define V_pf_anchors pf_anchors + +#undef pf_main_ruleset +#define pf_main_ruleset pf_main_anchor.ruleset +#endif +#if defined(__FreeBSD__) && defined(_KERNEL) +VNET_DEFINE(struct pf_anchor_global, pf_anchors); +VNET_DEFINE(struct pf_anchor, pf_main_anchor); +#else struct pf_anchor_global pf_anchors; struct pf_anchor pf_main_anchor; - -#ifndef __FreeBSD__ -/* XXX: hum? */ -int pf_get_ruleset_number(u_int8_t); -void pf_init_ruleset(struct pf_ruleset *); -int pf_anchor_setup(struct pf_rule *, - const struct pf_ruleset *, const char *); -int pf_anchor_copyout(const struct pf_ruleset *, - const struct pf_rule *, struct pfioc_rule *); -void pf_anchor_remove(struct pf_rule *); #endif static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); @@ -168,9 +175,14 @@ pf_find_anchor(const char *path) struct pf_anchor *key, *found; key = (struct pf_anchor *)rs_malloc(sizeof(*key)); - memset(key, 0, sizeof(*key)); + if (key == NULL) + return (NULL); strlcpy(key->path, path, sizeof(key->path)); +#ifdef __FreeBSD__ + found = RB_FIND(pf_anchor_global, &V_pf_anchors, key); +#else found = RB_FIND(pf_anchor_global, &pf_anchors, key); +#endif rs_free(key); return (found); } @@ -210,7 +222,8 @@ pf_find_or_create_ruleset(const char *path) if (ruleset != NULL) return (ruleset); p = (char *)rs_malloc(MAXPATHLEN); - bzero(p, MAXPATHLEN); + if (p == NULL) + return (NULL); strlcpy(p, path, MAXPATHLEN); while (parent == NULL && (q = strrchr(p, '/')) != NULL) { *q = 0; @@ -242,7 +255,6 @@ pf_find_or_create_ruleset(const char *path) rs_free(p); return (NULL); } - memset(anchor, 0, sizeof(*anchor)); RB_INIT(&anchor->children); strlcpy(anchor->name, q, sizeof(anchor->name)); if (parent != NULL) { @@ -251,7 +263,11 @@ pf_find_or_create_ruleset(const char *path) strlcat(anchor->path, "/", sizeof(anchor->path)); } strlcat(anchor->path, anchor->name, sizeof(anchor->path)); +#ifdef __FreeBSD__ + if ((dup = RB_INSERT(pf_anchor_global, &V_pf_anchors, anchor)) != +#else if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != +#endif NULL) { printf("pf_find_or_create_ruleset: RB_INSERT1 " "'%s' '%s' collides with '%s' '%s'\n", @@ -268,7 +284,11 @@ pf_find_or_create_ruleset(const char *path) "RB_INSERT2 '%s' '%s' collides with " "'%s' '%s'\n", anchor->path, anchor->name, dup->path, dup->name); +#ifdef __FreeBSD__ + RB_REMOVE(pf_anchor_global, &V_pf_anchors, +#else RB_REMOVE(pf_anchor_global, &pf_anchors, +#endif anchor); rs_free(anchor); rs_free(p); @@ -304,7 +324,11 @@ pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || ruleset->rules[i].inactive.open) return; +#ifdef __FreeBSD__ + RB_REMOVE(pf_anchor_global, &V_pf_anchors, ruleset->anchor); +#else RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); +#endif if ((parent = ruleset->anchor->parent) != NULL) RB_REMOVE(pf_anchor_node, &parent->children, ruleset->anchor); @@ -328,7 +352,8 @@ pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, if (!name[0]) return (0); path = (char *)rs_malloc(MAXPATHLEN); - bzero(path, MAXPATHLEN); + if (path == NULL) + return (1); if (name[0] == '/') strlcpy(path, name + 1, MAXPATHLEN); else { @@ -386,7 +411,8 @@ pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, int i; a = (char *)rs_malloc(MAXPATHLEN); - bzero(a, MAXPATHLEN); + if (a == NULL) + return (1); if (rs->anchor == NULL) a[0] = 0; else diff --git a/sys/contrib/pf/net/pf_subr.c b/sys/contrib/pf/net/pf_subr.c deleted file mode 100644 index 07f5295..0000000 --- a/sys/contrib/pf/net/pf_subr.c +++ /dev/null @@ -1,168 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -#include "opt_inet.h" -#include "opt_inet6.h" - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Following is where TCP initial sequence number generation occurs. - * - * There are two places where we must use initial sequence numbers: - * 1. In SYN-ACK packets. - * 2. In SYN packets. - * - * All ISNs for SYN-ACK packets are generated by the syncache. See - * tcp_syncache.c for details. - * - * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling - * depends on this property. In addition, these ISNs should be - * unguessable so as to prevent connection hijacking. To satisfy - * the requirements of this situation, the algorithm outlined in - * RFC 1948 is used, with only small modifications. - * - * Implementation details: - * - * Time is based off the system timer, and is corrected so that it - * increases by one megabyte per second. This allows for proper - * recycling on high speed LANs while still leaving over an hour - * before rollover. - * - * As reading the *exact* system time is too expensive to be done - * whenever setting up a TCP connection, we increment the time - * offset in two ways. First, a small random positive increment - * is added to isn_offset for each connection that is set up. - * Second, the function tcp_isn_tick fires once per clock tick - * and increments isn_offset as necessary so that sequence numbers - * are incremented at approximately ISN_BYTES_PER_SECOND. The - * random positive increments serve only to ensure that the same - * exact sequence number is never sent out twice (as could otherwise - * happen when a port is recycled in less than the system tick - * interval.) - * - * net.inet.tcp.isn_reseed_interval controls the number of seconds - * between seeding of isn_secret. This is normally set to zero, - * as reseeding should not be necessary. - * - * Locking of the global variables isn_secret, isn_last_reseed, isn_offset, - * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In - * general, this means holding an exclusive (write) lock. - */ - -#define ISN_BYTES_PER_SECOND 1048576 -#define ISN_STATIC_INCREMENT 4096 -#define ISN_RANDOM_INCREMENT (4096 - 1) - -static u_char pf_isn_secret[32]; -static int pf_isn_last_reseed; -static u_int32_t pf_isn_offset; - -u_int32_t -pf_new_isn(struct pf_state *s) -{ - MD5_CTX isn_ctx; - u_int32_t md5_buffer[4]; - u_int32_t new_isn; - struct pf_state_host *src, *dst; - - /* Seed if this is the first use, reseed if requested. */ - if (pf_isn_last_reseed == 0) { - read_random(&pf_isn_secret, sizeof(pf_isn_secret)); - pf_isn_last_reseed = ticks; - } - - if (s->direction == PF_IN) { - src = &s->ext; - dst = &s->gwy; - } else { - src = &s->lan; - dst = &s->ext; - } - - /* Compute the md5 hash and return the ISN. */ - MD5Init(&isn_ctx); - MD5Update(&isn_ctx, (u_char *) &dst->port, sizeof(u_short)); - MD5Update(&isn_ctx, (u_char *) &src->port, sizeof(u_short)); -#ifdef INET6 - if (s->af == AF_INET6) { - MD5Update(&isn_ctx, (u_char *) &dst->addr, - sizeof(struct in6_addr)); - MD5Update(&isn_ctx, (u_char *) &src->addr, - sizeof(struct in6_addr)); - } else -#endif - { - MD5Update(&isn_ctx, (u_char *) &dst->addr, - sizeof(struct in_addr)); - MD5Update(&isn_ctx, (u_char *) &src->addr, - sizeof(struct in_addr)); - } - MD5Update(&isn_ctx, (u_char *) &pf_isn_secret, sizeof(pf_isn_secret)); - MD5Final((u_char *) &md5_buffer, &isn_ctx); - new_isn = (tcp_seq) md5_buffer[0]; - pf_isn_offset += ISN_STATIC_INCREMENT + - (arc4random() & ISN_RANDOM_INCREMENT); - new_isn += pf_isn_offset; - return (new_isn); -} diff --git a/sys/contrib/pf/net/pf_table.c b/sys/contrib/pf/net/pf_table.c index 40c9f67..010dc50 100644 --- a/sys/contrib/pf/net/pf_table.c +++ b/sys/contrib/pf/net/pf_table.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_table.c,v 1.68 2006/05/02 10:08:45 dhartmei Exp $ */ +/* $OpenBSD: pf_table.c,v 1.79 2008/10/08 06:24:50 mcbride Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -43,10 +43,10 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include #ifdef __FreeBSD__ #include +#else +#include #endif #include @@ -55,10 +55,9 @@ __FBSDID("$FreeBSD$"); #ifndef __FreeBSD__ #include #endif - #include -#define ACCEPT_FLAGS(oklist) \ +#define ACCEPT_FLAGS(flags, oklist) \ do { \ if ((flags & ~(oklist)) & \ PFR_FLAG_ALLMASK) \ @@ -90,28 +89,26 @@ _copyout(const void *uaddr, void *kaddr, size_t len) return (r); } -#define COPYIN(from, to, size) \ +#define COPYIN(from, to, size, flags) \ ((flags & PFR_FLAG_USERIOCTL) ? \ _copyin((from), (to), (size)) : \ (bcopy((from), (to), (size)), 0)) -#define COPYOUT(from, to, size) \ +#define COPYOUT(from, to, size, flags) \ ((flags & PFR_FLAG_USERIOCTL) ? \ _copyout((from), (to), (size)) : \ (bcopy((from), (to), (size)), 0)) #else - -#define COPYIN(from, to, size) \ +#define COPYIN(from, to, size, flags) \ ((flags & PFR_FLAG_USERIOCTL) ? \ copyin((from), (to), (size)) : \ (bcopy((from), (to), (size)), 0)) -#define COPYOUT(from, to, size) \ +#define COPYOUT(from, to, size, flags) \ ((flags & PFR_FLAG_USERIOCTL) ? \ copyout((from), (to), (size)) : \ (bcopy((from), (to), (size)), 0)) - #endif #define FILLIN_SIN(sin, addr) \ @@ -128,26 +125,26 @@ _copyout(const void *uaddr, void *kaddr, size_t len) (sin6).sin6_addr = (addr); \ } while (0) -#define SWAP(type, a1, a2) \ +#define SWAP(type, a1, a2) \ do { \ type tmp = a1; \ a1 = a2; \ a2 = tmp; \ } while (0) -#define SUNION2PF(su, af) (((af)==AF_INET) ? \ +#define SUNION2PF(su, af) (((af)==AF_INET) ? \ (struct pf_addr *)&(su)->sin.sin_addr : \ (struct pf_addr *)&(su)->sin6.sin6_addr) #define AF_BITS(af) (((af)==AF_INET)?32:128) #define ADDR_NETWORK(ad) ((ad)->pfra_net < AF_BITS((ad)->pfra_af)) #define KENTRY_NETWORK(ke) ((ke)->pfrke_net < AF_BITS((ke)->pfrke_af)) -#define KENTRY_RNF_ROOT(ke) \ +#define KENTRY_RNF_ROOT(ke) \ ((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0) -#define NO_ADDRESSES (-1) -#define ENQUEUE_UNMARKED_ONLY (1) -#define INVERT_NEG_FLAG (1) +#define NO_ADDRESSES (-1) +#define ENQUEUE_UNMARKED_ONLY (1) +#define INVERT_NEG_FLAG (1) struct pfr_walktree { enum pfrw_op { @@ -169,28 +166,37 @@ struct pfr_walktree { int pfrw_free; int pfrw_flags; }; -#define pfrw_addr pfrw_1.pfrw1_addr -#define pfrw_astats pfrw_1.pfrw1_astats -#define pfrw_workq pfrw_1.pfrw1_workq -#define pfrw_kentry pfrw_1.pfrw1_kentry -#define pfrw_dyn pfrw_1.pfrw1_dyn -#define pfrw_cnt pfrw_free +#define pfrw_addr pfrw_1.pfrw1_addr +#define pfrw_astats pfrw_1.pfrw1_astats +#define pfrw_workq pfrw_1.pfrw1_workq +#define pfrw_kentry pfrw_1.pfrw1_kentry +#define pfrw_dyn pfrw_1.pfrw1_dyn +#define pfrw_cnt pfrw_free -#define senderr(e) do { rv = (e); goto _bad; } while (0) +#define senderr(e) do { rv = (e); goto _bad; } while (0) #ifdef __FreeBSD__ -uma_zone_t pfr_ktable_pl; -uma_zone_t pfr_kentry_pl; -uma_zone_t pfr_kentry_pl2; +VNET_DEFINE(uma_zone_t, pfr_ktable_pl); +VNET_DEFINE(uma_zone_t, pfr_kentry_pl); +VNET_DEFINE(uma_zone_t, pfr_kcounters_pl); +#define V_pfr_kcounters_pl VNET(pfr_kcounters_pl) +VNET_DEFINE(struct sockaddr_in, pfr_sin); +#define V_pfr_sin VNET(pfr_sin) +VNET_DEFINE(struct sockaddr_in6, pfr_sin6); +#define V_pfr_sin6 VNET(pfr_sin6) +VNET_DEFINE(union sockaddr_union, pfr_mask); +#define V_pfr_mask VNET(pfr_mask) +VNET_DEFINE(struct pf_addr, pfr_ffaddr); +#define V_pfr_ffaddr VNET(pfr_ffaddr) #else struct pool pfr_ktable_pl; struct pool pfr_kentry_pl; -struct pool pfr_kentry_pl2; -#endif +struct pool pfr_kcounters_pl; struct sockaddr_in pfr_sin; struct sockaddr_in6 pfr_sin6; union sockaddr_union pfr_mask; struct pf_addr pfr_ffaddr; +#endif void pfr_copyout_addr(struct pfr_addr *, struct pfr_kentry *ke); @@ -226,7 +232,7 @@ void pfr_setflags_ktable(struct pfr_ktable *, int); void pfr_clstats_ktables(struct pfr_ktableworkq *, long, int); void pfr_clstats_ktable(struct pfr_ktable *, long, int); -struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int); +struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int, int); void pfr_destroy_ktables(struct pfr_ktableworkq *, int); void pfr_destroy_ktable(struct pfr_ktable *, int); int pfr_ktable_compare(struct pfr_ktable *, @@ -251,12 +257,11 @@ pfr_initialize(void) { #ifndef __FreeBSD__ pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable), 0, 0, 0, - "pfrktable", &pool_allocator_oldnointr); + "pfrktable", NULL); pool_init(&pfr_kentry_pl, sizeof(struct pfr_kentry), 0, 0, 0, - "pfrkentry", &pool_allocator_oldnointr); - pool_init(&pfr_kentry_pl2, sizeof(struct pfr_kentry), 0, 0, 0, - "pfrkentry2", NULL); -#endif + "pfrkentry", NULL); + pool_init(&pfr_kcounters_pl, sizeof(struct pfr_kcounters), 0, 0, 0, + "pfrkcounters", NULL); pfr_sin.sin_len = sizeof(pfr_sin); pfr_sin.sin_family = AF_INET; @@ -264,6 +269,14 @@ pfr_initialize(void) pfr_sin6.sin6_family = AF_INET6; memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr)); +#else + V_pfr_sin.sin_len = sizeof(V_pfr_sin); + V_pfr_sin.sin_family = AF_INET; + V_pfr_sin6.sin6_len = sizeof(V_pfr_sin6); + V_pfr_sin6.sin6_family = AF_INET6; + + memset(&V_pfr_ffaddr, 0xff, sizeof(V_pfr_ffaddr)); +#endif } int @@ -273,7 +286,7 @@ pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) struct pfr_kentryworkq workq; int s; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -284,7 +297,6 @@ pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) pfr_enqueue_addrs(kt, &workq, ndel, 0); if (!(flags & PFR_FLAG_DUMMY)) { - s = 0; if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_remove_kentries(kt, &workq); @@ -307,10 +319,11 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq workq; struct pfr_kentry *p, *q; struct pfr_addr ad; - int i, rv, s = 0, xadd = 0; + int i, rv, s, xadd = 0; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -318,12 +331,13 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); - tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0, + !(flags & PFR_FLAG_USERIOCTL)); if (tmpkt == NULL) return (ENOMEM); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); @@ -340,7 +354,8 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, ad.pfra_fback = PFR_FB_NONE; } if (p == NULL && q == NULL) { - p = pfr_create_kentry(&ad, 0); + p = pfr_create_kentry(&ad, + !(flags & PFR_FLAG_USERIOCTL)); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -351,10 +366,9 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, xadd++; } } - if (flags & PFR_FLAG_FEEDBACK) { - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (flags & PFR_FLAG_FEEDBACK) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) senderr(EFAULT); - } } pfr_clean_node_mask(tmpkt, &workq); if (!(flags & PFR_FLAG_DUMMY)) { @@ -386,9 +400,10 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq workq; struct pfr_kentry *p; struct pfr_addr ad; - int i, rv, s = 0, xdel = 0, log = 1; + int i, rv, s, xdel = 0, log = 1; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -415,7 +430,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, } else { /* iterate over addresses to delete */ for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) return (EFAULT); if (pfr_validate_addr(&ad)) return (EINVAL); @@ -426,7 +441,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, } SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); @@ -448,7 +463,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, xdel++; } if (flags & PFR_FLAG_FEEDBACK) - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) senderr(EFAULT); } if (!(flags & PFR_FLAG_DUMMY)) { @@ -476,10 +491,11 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq addq, delq, changeq; struct pfr_kentry *p, *q; struct pfr_addr ad; - int i, rv, s = 0, xadd = 0, xdel = 0, xchange = 0; + int i, rv, s, xadd = 0, xdel = 0, xchange = 0; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, ignore_pfrt_flags, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); @@ -488,7 +504,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); - tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0, + !(flags & PFR_FLAG_USERIOCTL)); if (tmpkt == NULL) return (ENOMEM); pfr_mark_addrs(kt); @@ -496,7 +513,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, SLIST_INIT(&delq); SLIST_INIT(&changeq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); @@ -519,7 +536,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, ad.pfra_fback = PFR_FB_DUPLICATE; goto _skip; } - p = pfr_create_kentry(&ad, 0); + p = pfr_create_kentry(&ad, + !(flags & PFR_FLAG_USERIOCTL)); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -533,7 +551,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, } _skip: if (flags & PFR_FLAG_FEEDBACK) - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) senderr(EFAULT); } pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY); @@ -546,7 +564,7 @@ _skip: SLIST_FOREACH(p, &delq, pfrke_workq) { pfr_copyout_addr(&ad, p); ad.pfra_fback = PFR_FB_DELETED; - if (COPYOUT(&ad, addr+size+i, sizeof(ad))) + if (COPYOUT(&ad, addr+size+i, sizeof(ad), flags)) senderr(EFAULT); i++; } @@ -590,7 +608,7 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_addr ad; int i, xmatch = 0; - ACCEPT_FLAGS(PFR_FLAG_REPLACE); + ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -598,7 +616,7 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) return (EFAULT); if (pfr_validate_addr(&ad)) return (EINVAL); @@ -611,7 +629,7 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH); if (p != NULL && !p->pfrke_not) xmatch++; - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) return (EFAULT); } if (nmatch != NULL) @@ -627,7 +645,7 @@ pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, struct pfr_walktree w; int rv; - ACCEPT_FLAGS(0); + ACCEPT_FLAGS(flags, 0); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -650,7 +668,7 @@ pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, #endif if (!rv) #ifdef __FreeBSD__ - rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, + rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); #else rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); @@ -674,10 +692,11 @@ pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, struct pfr_ktable *kt; struct pfr_walktree w; struct pfr_kentryworkq workq; - int rv, s = 0; + int rv, s; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC); /* XXX PFR_FLAG_CLSTATS disabled */ + /* XXX PFR_FLAG_CLSTATS disabled */ + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -733,9 +752,10 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq workq; struct pfr_kentry *p; struct pfr_addr ad; - int i, rv, s = 0, xzero = 0; + int i, rv, s, xzero = 0; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -743,7 +763,7 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); @@ -751,7 +771,7 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, if (flags & PFR_FLAG_FEEDBACK) { ad.pfra_fback = (p != NULL) ? PFR_FB_CLEARED : PFR_FB_NONE; - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) senderr(EFAULT); } if (p != NULL) { @@ -866,7 +886,11 @@ struct pfr_kentry * pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) { union sockaddr_union sa, mask; - struct radix_node_head *head = NULL; /* make the compiler happy */ +#ifdef __FreeBSD__ + struct radix_node_head *head = NULL; +#else + struct radix_node_head *head; +#endif struct pfr_kentry *ke; int s; @@ -904,12 +928,19 @@ pfr_create_kentry(struct pfr_addr *ad, int intr) struct pfr_kentry *ke; if (intr) - ke = pool_get(&pfr_kentry_pl2, PR_NOWAIT); +#ifdef __FreeBSD__ + ke = pool_get(&V_pfr_kentry_pl, PR_NOWAIT | PR_ZERO); +#else + ke = pool_get(&pfr_kentry_pl, PR_NOWAIT | PR_ZERO); +#endif else - ke = pool_get(&pfr_kentry_pl, PR_NOWAIT); +#ifdef __FreeBSD__ + ke = pool_get(&V_pfr_kentry_pl, PR_WAITOK|PR_ZERO); +#else + ke = pool_get(&pfr_kentry_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL); +#endif if (ke == NULL) return (NULL); - bzero(ke, sizeof(*ke)); if (ad->pfra_af == AF_INET) FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr); @@ -918,7 +949,6 @@ pfr_create_kentry(struct pfr_addr *ad, int intr) ke->pfrke_af = ad->pfra_af; ke->pfrke_net = ad->pfra_net; ke->pfrke_not = ad->pfra_not; - ke->pfrke_intrpool = intr; return (ke); } @@ -936,10 +966,14 @@ pfr_destroy_kentries(struct pfr_kentryworkq *workq) void pfr_destroy_kentry(struct pfr_kentry *ke) { - if (ke->pfrke_intrpool) - pool_put(&pfr_kentry_pl2, ke); - else - pool_put(&pfr_kentry_pl, ke); + if (ke->pfrke_counters) +#ifdef __FreeBSD__ + pool_put(&V_pfr_kcounters_pl, ke->pfrke_counters); + pool_put(&V_pfr_kentry_pl, ke); +#else + pool_put(&pfr_kcounters_pl, ke->pfrke_counters); + pool_put(&pfr_kentry_pl, ke); +#endif } void @@ -1020,8 +1054,14 @@ pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange) s = splsoftnet(); if (negchange) p->pfrke_not = !p->pfrke_not; - bzero(p->pfrke_packets, sizeof(p->pfrke_packets)); - bzero(p->pfrke_bytes, sizeof(p->pfrke_bytes)); + if (p->pfrke_counters) { +#ifdef __FreeBSD__ + pool_put(&V_pfr_kcounters_pl, p->pfrke_counters); +#else + pool_put(&pfr_kcounters_pl, p->pfrke_counters); +#endif + p->pfrke_counters = NULL; + } splx(s); p->pfrke_tzero = tzero; } @@ -1034,10 +1074,10 @@ pfr_reset_feedback(struct pfr_addr *addr, int size, int flags) int i; for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) break; ad.pfra_fback = PFR_FB_NONE; - if (COPYOUT(&ad, addr+i, sizeof(ad))) + if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) break; } } @@ -1072,7 +1112,11 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; - struct radix_node_head *head = NULL; /* make the compiler happy */ +#ifdef __FreeBSD__ + struct radix_node_head *head = NULL; +#else + struct radix_node_head *head; +#endif int s; bzero(ke->pfrke_node, sizeof(ke->pfrke_node)); @@ -1087,9 +1131,17 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) #endif if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); +#ifdef __FreeBSD__ rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node); +#else + rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node, 0); +#endif } else +#ifdef __FreeBSD__ rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node); +#else + rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node, 0); +#endif splx(s); return (rn == NULL ? -1 : 0); @@ -1100,7 +1152,11 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; - struct radix_node_head *head = NULL; /* make the compiler happy */ +#ifdef __FreeBSD__ + struct radix_node_head *head = NULL; +#else + struct radix_node_head *head; +#endif int s; if (ke->pfrke_af == AF_INET) @@ -1173,7 +1229,7 @@ pfr_walktree(struct radix_node *rn, void *arg) struct pfr_addr ad; pfr_copyout_addr(&ad, ke); - if (COPYOUT(&ad, w->pfrw_addr, sizeof(ad))) + if (copyout(&ad, w->pfrw_addr, sizeof(ad))) return (EFAULT); w->pfrw_addr++; } @@ -1185,14 +1241,20 @@ pfr_walktree(struct radix_node *rn, void *arg) pfr_copyout_addr(&as.pfras_a, ke); s = splsoftnet(); - bcopy(ke->pfrke_packets, as.pfras_packets, - sizeof(as.pfras_packets)); - bcopy(ke->pfrke_bytes, as.pfras_bytes, - sizeof(as.pfras_bytes)); + if (ke->pfrke_counters) { + bcopy(ke->pfrke_counters->pfrkc_packets, + as.pfras_packets, sizeof(as.pfras_packets)); + bcopy(ke->pfrke_counters->pfrkc_bytes, + as.pfras_bytes, sizeof(as.pfras_bytes)); + } else { + bzero(as.pfras_packets, sizeof(as.pfras_packets)); + bzero(as.pfras_bytes, sizeof(as.pfras_bytes)); + as.pfras_a.pfra_fback = PFR_FB_NOCOUNT; + } splx(s); as.pfras_tzero = ke->pfrke_tzero; - if (COPYOUT(&as, w->pfrw_astats, sizeof(as))) + if (COPYOUT(&as, w->pfrw_astats, sizeof(as), flags)) return (EFAULT); w->pfrw_astats++; } @@ -1209,19 +1271,35 @@ pfr_walktree(struct radix_node *rn, void *arg) if (ke->pfrke_af == AF_INET) { if (w->pfrw_dyn->pfid_acnt4++ > 0) break; +#ifdef __FreeBSD__ + pfr_prepare_network(&V_pfr_mask, AF_INET, ke->pfrke_net); +#else pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net); +#endif w->pfrw_dyn->pfid_addr4 = *SUNION2PF( &ke->pfrke_sa, AF_INET); w->pfrw_dyn->pfid_mask4 = *SUNION2PF( +#ifdef __FreeBSD__ + &V_pfr_mask, AF_INET); +#else &pfr_mask, AF_INET); +#endif } else if (ke->pfrke_af == AF_INET6){ if (w->pfrw_dyn->pfid_acnt6++ > 0) break; +#ifdef __FreeBSD__ + pfr_prepare_network(&V_pfr_mask, AF_INET6, ke->pfrke_net); +#else pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net); +#endif w->pfrw_dyn->pfid_addr6 = *SUNION2PF( &ke->pfrke_sa, AF_INET6); w->pfrw_dyn->pfid_mask6 = *SUNION2PF( +#ifdef __FreeBSD__ + &V_pfr_mask, AF_INET6); +#else &pfr_mask, AF_INET6); +#endif } break; } @@ -1233,9 +1311,10 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p; - int s = 0, xdel = 0; + int s, xdel = 0; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ALLRSETS); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); if (pfr_table_count(filter, flags) < 0) @@ -1270,14 +1349,14 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) { struct pfr_ktableworkq addq, changeq; struct pfr_ktable *p, *q, *r, key; - int i, rv, s = 0, xadd = 0; + int i, rv, s, xadd = 0; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); SLIST_INIT(&addq); SLIST_INIT(&changeq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) senderr(EFAULT); if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK, flags & PFR_FLAG_USERIOCTL)) @@ -1285,7 +1364,8 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) key.pfrkt_flags |= PFR_TFLAG_ACTIVE; p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (p == NULL) { - p = pfr_create_ktable(&key.pfrkt_t, tzero, 1); + p = pfr_create_ktable(&key.pfrkt_t, tzero, 1, + !(flags & PFR_FLAG_USERIOCTL)); if (p == NULL) senderr(ENOMEM); SLIST_FOREACH(q, &addq, pfrkt_workq) { @@ -1311,7 +1391,8 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) } } key.pfrkt_flags = 0; - r = pfr_create_ktable(&key.pfrkt_t, 0, 1); + r = pfr_create_ktable(&key.pfrkt_t, 0, 1, + !(flags & PFR_FLAG_USERIOCTL)); if (r == NULL) senderr(ENOMEM); SLIST_INSERT_HEAD(&addq, r, pfrkt_workq); @@ -1350,12 +1431,12 @@ pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, *q, key; - int i, s = 0, xdel = 0; + int i, s, xdel = 0; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) return (EFAULT); if (pfr_validate_table(&key.pfrkt_t, 0, flags & PFR_FLAG_USERIOCTL)) @@ -1392,7 +1473,7 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, struct pfr_ktable *p; int n, nn; - ACCEPT_FLAGS(PFR_FLAG_ALLRSETS); + ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); n = nn = pfr_table_count(filter, flags); @@ -1407,7 +1488,7 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, continue; if (n-- <= 0) continue; - if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl))) + if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl), flags)) return (EFAULT); } if (n) { @@ -1424,11 +1505,11 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, { struct pfr_ktable *p; struct pfr_ktableworkq workq; - int s = 0, n, nn; + int s, n, nn; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC|PFR_FLAG_ALLRSETS); - /* XXX PFR_FLAG_CLSTATS disabled */ + /* XXX PFR_FLAG_CLSTATS disabled */ + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); n = nn = pfr_table_count(filter, flags); @@ -1448,9 +1529,8 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, continue; if (!(flags & PFR_FLAG_ATOMIC)) s = splsoftnet(); - if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl))) { - if (!(flags & PFR_FLAG_ATOMIC)) - splx(s); + if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl), flags)) { + splx(s); return (EFAULT); } if (!(flags & PFR_FLAG_ATOMIC)) @@ -1475,13 +1555,14 @@ pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, key; - int i, s = 0, xzero = 0; + int i, s, xzero = 0; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ADDRSTOO); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_ADDRSTOO); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) return (EFAULT); if (pfr_validate_table(&key.pfrkt_t, 0, 0)) return (EINVAL); @@ -1509,16 +1590,16 @@ pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag, { struct pfr_ktableworkq workq; struct pfr_ktable *p, *q, key; - int i, s = 0, xchange = 0, xdel = 0; + int i, s, xchange = 0, xdel = 0; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); if ((setflag & ~PFR_TFLAG_USRMASK) || (clrflag & ~PFR_TFLAG_USRMASK) || (setflag & clrflag)) return (EINVAL); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) return (EFAULT); if (pfr_validate_table(&key.pfrkt_t, 0, flags & PFR_FLAG_USERIOCTL)) @@ -1565,7 +1646,7 @@ pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags) struct pf_ruleset *rs; int xdel = 0; - ACCEPT_FLAGS(PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); rs = pf_find_or_create_ruleset(trs->pfrt_anchor); if (rs == NULL) return (ENOMEM); @@ -1602,7 +1683,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pf_ruleset *rs; int i, rv, xadd = 0, xaddr = 0; - ACCEPT_FLAGS(PFR_FLAG_DUMMY|PFR_FLAG_ADDRSTOO); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO); if (size && !(flags & PFR_FLAG_ADDRSTOO)) return (EINVAL); if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK, @@ -1615,7 +1696,8 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, SLIST_INIT(&tableq); kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl); if (kt == NULL) { - kt = pfr_create_ktable(tbl, 0, 1); + kt = pfr_create_ktable(tbl, 0, 1, + !(flags & PFR_FLAG_USERIOCTL)); if (kt == NULL) return (ENOMEM); SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq); @@ -1631,7 +1713,8 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, kt->pfrkt_root = rt; goto _skip; } - rt = pfr_create_ktable(&key.pfrkt_t, 0, 1); + rt = pfr_create_ktable(&key.pfrkt_t, 0, 1, + !(flags & PFR_FLAG_USERIOCTL)); if (rt == NULL) { pfr_destroy_ktables(&tableq, 0); return (ENOMEM); @@ -1641,14 +1724,14 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, } else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE)) xadd++; _skip: - shadow = pfr_create_ktable(tbl, 0, 0); + shadow = pfr_create_ktable(tbl, 0, 0, !(flags & PFR_FLAG_USERIOCTL)); if (shadow == NULL) { pfr_destroy_ktables(&tableq, 0); return (ENOMEM); } SLIST_INIT(&addrq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad))) + if (COPYIN(addr+i, &ad, sizeof(ad), flags)) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); @@ -1698,7 +1781,7 @@ pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags) struct pf_ruleset *rs; int xdel = 0; - ACCEPT_FLAGS(PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (0); @@ -1728,10 +1811,10 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, struct pfr_ktable *p, *q; struct pfr_ktableworkq workq; struct pf_ruleset *rs; - int s = 0, xadd = 0, xchange = 0; + int s, xadd = 0, xchange = 0; long tzero = time_second; - ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (EBUSY); @@ -1992,15 +2075,26 @@ pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse) } struct pfr_ktable * -pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) +pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset, + int intr) { struct pfr_ktable *kt; struct pf_ruleset *rs; - kt = pool_get(&pfr_ktable_pl, PR_NOWAIT); + if (intr) +#ifdef __FreeBSD__ + kt = pool_get(&V_pfr_ktable_pl, PR_NOWAIT|PR_ZERO); +#else + kt = pool_get(&pfr_ktable_pl, PR_NOWAIT|PR_ZERO|PR_LIMITFAIL); +#endif + else +#ifdef __FreeBSD__ + kt = pool_get(&V_pfr_ktable_pl, PR_WAITOK|PR_ZERO); +#else + kt = pool_get(&pfr_ktable_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL); +#endif if (kt == NULL) return (NULL); - bzero(kt, sizeof(*kt)); kt->pfrkt_t = *tbl; if (attachruleset) { @@ -2067,7 +2161,11 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) kt->pfrkt_rs->tables--; pf_remove_if_empty_ruleset(kt->pfrkt_rs); } +#ifdef __FreeBSD__ + pool_put(&V_pfr_ktable_pl, kt); +#else pool_put(&pfr_ktable_pl, kt); +#endif } int @@ -2102,16 +2200,26 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) switch (af) { #ifdef INET case AF_INET: +#ifdef __FreeBSD__ + V_pfr_sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&V_pfr_sin, kt->pfrkt_ip4); +#else pfr_sin.sin_addr.s_addr = a->addr32[0]; ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); +#endif if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; #endif /* INET */ #ifdef INET6 case AF_INET6: +#ifdef __FreeBSD__ + bcopy(a, &V_pfr_sin6.sin6_addr, sizeof(V_pfr_sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&V_pfr_sin6, kt->pfrkt_ip6); +#else bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); +#endif if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; @@ -2139,16 +2247,26 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, switch (af) { #ifdef INET case AF_INET: +#ifdef __FreeBSD__ + V_pfr_sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&V_pfr_sin, kt->pfrkt_ip4); +#else pfr_sin.sin_addr.s_addr = a->addr32[0]; ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); +#endif if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; #endif /* INET */ #ifdef INET6 case AF_INET6: +#ifdef __FreeBSD__ + bcopy(a, &V_pfr_sin6.sin6_addr, sizeof(V_pfr_sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&V_pfr_sin6, kt->pfrkt_ip6); +#else bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); +#endif if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; @@ -2163,14 +2281,24 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, } kt->pfrkt_packets[dir_out][op_pass]++; kt->pfrkt_bytes[dir_out][op_pass] += len; - if (ke != NULL && op_pass != PFR_OP_XPASS) { - ke->pfrke_packets[dir_out][op_pass]++; - ke->pfrke_bytes[dir_out][op_pass] += len; + if (ke != NULL && op_pass != PFR_OP_XPASS && + (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) { + if (ke->pfrke_counters == NULL) +#ifdef __FreeBSD__ + ke->pfrke_counters = pool_get(&V_pfr_kcounters_pl, +#else + ke->pfrke_counters = pool_get(&pfr_kcounters_pl, +#endif + PR_NOWAIT | PR_ZERO); + if (ke->pfrke_counters != NULL) { + ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++; + ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len; + } } } struct pfr_ktable * -pfr_attach_table(struct pf_ruleset *rs, char *name) +pfr_attach_table(struct pf_ruleset *rs, char *name, int intr) { struct pfr_ktable *kt, *rt; struct pfr_table tbl; @@ -2182,14 +2310,14 @@ pfr_attach_table(struct pf_ruleset *rs, char *name) strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor)); kt = pfr_lookup_table(&tbl); if (kt == NULL) { - kt = pfr_create_ktable(&tbl, time_second, 1); + kt = pfr_create_ktable(&tbl, time_second, 1, intr); if (kt == NULL) return (NULL); if (ac != NULL) { bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor)); rt = pfr_lookup_table(&tbl); if (rt == NULL) { - rt = pfr_create_ktable(&tbl, 0, 1); + rt = pfr_create_ktable(&tbl, 0, 1, intr); if (rt == NULL) { pfr_destroy_ktable(kt, 0); return (NULL); @@ -2215,20 +2343,31 @@ pfr_detach_table(struct pfr_ktable *kt) pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED); } - int pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af) { +#ifdef __FreeBSD__ struct pfr_kentry *ke, *ke2 = NULL; struct pf_addr *addr = NULL; +#else + struct pfr_kentry *ke, *ke2; + struct pf_addr *addr; +#endif union sockaddr_union mask; int idx = -1, use_counter = 0; +#ifdef __FreeBSD__ + if (af == AF_INET) + addr = (struct pf_addr *)&V_pfr_sin.sin_addr; + else if (af == AF_INET6) + addr = (struct pf_addr *)&V_pfr_sin6.sin6_addr; +#else if (af == AF_INET) addr = (struct pf_addr *)&pfr_sin.sin_addr; else if (af == AF_INET6) addr = (struct pf_addr *)&pfr_sin6.sin6_addr; +#endif if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -2243,11 +2382,21 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, _next_block: ke = pfr_kentry_byidx(kt, idx, af); - if (ke == NULL) + if (ke == NULL) { + kt->pfrkt_nomatch++; return (1); + } +#ifdef __FreeBSD__ + pfr_prepare_network(&V_pfr_mask, af, ke->pfrke_net); +#else pfr_prepare_network(&pfr_mask, af, ke->pfrke_net); +#endif *raddr = SUNION2PF(&ke->pfrke_sa, af); +#ifdef __FreeBSD__ + *rmask = SUNION2PF(&V_pfr_mask, af); +#else *rmask = SUNION2PF(&pfr_mask, af); +#endif if (use_counter) { /* is supplied address within block? */ @@ -2267,27 +2416,42 @@ _next_block: /* this is a single IP address - no possible nested block */ PF_ACPY(counter, addr, af); *pidx = idx; + kt->pfrkt_match++; return (0); } for (;;) { /* we don't want to use a nested block */ +#ifdef __FreeBSD__ + if (af == AF_INET) + ke2 = (struct pfr_kentry *)rn_match(&V_pfr_sin, + kt->pfrkt_ip4); + else if (af == AF_INET6) + ke2 = (struct pfr_kentry *)rn_match(&V_pfr_sin6, + kt->pfrkt_ip6); +#else if (af == AF_INET) ke2 = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); else if (af == AF_INET6) ke2 = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); +#endif /* no need to check KENTRY_RNF_ROOT() here */ if (ke2 == ke) { /* lookup return the same block - perfect */ PF_ACPY(counter, addr, af); *pidx = idx; + kt->pfrkt_match++; return (0); } /* we need to increase the counter past the nested block */ pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net); +#ifdef __FreeBSD__ + PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &V_pfr_ffaddr, af); +#else PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af); +#endif PF_AINC(addr, af); if (!PF_MATCHA(0, *raddr, *rmask, addr, af)) { /* ok, we reached the end of our main block */ diff --git a/sys/contrib/pf/net/pfvar.h b/sys/contrib/pf/net/pfvar.h index de175b1..2aa4bbf 100644 --- a/sys/contrib/pf/net/pfvar.h +++ b/sys/contrib/pf/net/pfvar.h @@ -1,5 +1,4 @@ -/* $FreeBSD$ */ -/* $OpenBSD: pfvar.h,v 1.244 2007/02/23 21:31:51 deraadt Exp $ */ +/* $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -78,9 +77,8 @@ struct inpcb; #endif enum { PF_INOUT, PF_IN, PF_OUT }; -enum { PF_LAN_EXT, PF_EXT_GWY, PF_ID }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, - PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP }; + PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER }; enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT, @@ -90,6 +88,7 @@ enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; enum { PF_GET_NONE, PF_GET_CLR_CNTR }; +enum { PF_SK_WIRE, PF_SK_STACK, PF_SK_BOTH }; /* * Note about PFTM_*: real indices into pf_rule.timeout[] come before @@ -132,7 +131,8 @@ enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, - PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED }; + PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED, + PF_ADDR_RANGE }; #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_WSCALE_FLAG 0x80 @@ -212,87 +212,106 @@ struct pfi_dynaddr { */ #ifdef __FreeBSD__ -#define splsoftnet() splnet() +#define splsoftnet() splnet() #define HTONL(x) (x) = htonl((__uint32_t)(x)) #define HTONS(x) (x) = htons((__uint16_t)(x)) #define NTOHL(x) (x) = ntohl((__uint32_t)(x)) #define NTOHS(x) (x) = ntohs((__uint16_t)(x)) -#define PF_NAME "pf" +#define PF_NAME "pf" -#define PR_NOWAIT M_NOWAIT -#define pool_get(p, f) uma_zalloc(*(p), (f)) -#define pool_put(p, o) uma_zfree(*(p), (o)) +#define PR_NOWAIT M_NOWAIT +#define PR_WAITOK M_WAIT +#define PR_ZERO M_ZERO +#define pool_get(p, f) uma_zalloc(*(p), (f)) +#define pool_put(p, o) uma_zfree(*(p), (o)) -#define UMA_CREATE(var, type, desc) \ - var = uma_zcreate(desc, sizeof(type), \ - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); \ - if (var == NULL) break -#define UMA_DESTROY(var) \ - if(var) uma_zdestroy(var) +#define UMA_CREATE(var, type, desc) \ + var = uma_zcreate(desc, sizeof(type), \ + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); \ + if (var == NULL) \ + break +#define UMA_DESTROY(var) \ + if (var) \ + uma_zdestroy(var) +#ifdef __FreeBSD__ +VNET_DECLARE(struct mtx, pf_task_mtx); +#define V_pf_task_mtx VNET(pf_task_mtx) + +#define PF_ASSERT(h) mtx_assert(&V_pf_task_mtx, (h)) + +#define PF_LOCK() do { \ + PF_ASSERT(MA_NOTOWNED); \ + mtx_lock(&V_pf_task_mtx); \ +} while(0) +#define PF_UNLOCK() do { \ + PF_ASSERT(MA_OWNED); \ + mtx_unlock(&V_pf_task_mtx); \ +} while(0) +#else extern struct mtx pf_task_mtx; -#define PF_ASSERT(h) mtx_assert(&pf_task_mtx, (h)) +#define PF_ASSERT(h) mtx_assert(&pf_task_mtx, (h)) -#define PF_LOCK() do { \ - PF_ASSERT(MA_NOTOWNED); \ - mtx_lock(&pf_task_mtx); \ +#define PF_LOCK() do { \ + PF_ASSERT(MA_NOTOWNED); \ + mtx_lock(&pf_task_mtx); \ } while(0) -#define PF_UNLOCK() do { \ - PF_ASSERT(MA_OWNED); \ - mtx_unlock(&pf_task_mtx); \ +#define PF_UNLOCK() do { \ + PF_ASSERT(MA_OWNED); \ + mtx_unlock(&pf_task_mtx); \ } while(0) +#endif -#define PF_COPYIN(uaddr, kaddr, len, r) do { \ - PF_UNLOCK(); \ - r = copyin((uaddr), (kaddr), (len)); \ - PF_LOCK(); \ +#define PF_COPYIN(uaddr, kaddr, len, r) do { \ + PF_UNLOCK(); \ + r = copyin((uaddr), (kaddr), (len)); \ + PF_LOCK(); \ } while(0) -#define PF_COPYOUT(kaddr, uaddr, len, r) do { \ - PF_UNLOCK(); \ - r = copyout((kaddr), (uaddr), (len)); \ - PF_LOCK(); \ +#define PF_COPYOUT(kaddr, uaddr, len, r) do { \ + PF_UNLOCK(); \ + r = copyout((kaddr), (uaddr), (len)); \ + PF_LOCK(); \ } while(0) extern void init_pf_mutex(void); extern void destroy_pf_mutex(void); -#define PF_MODVER 1 -#define PFLOG_MODVER 1 -#define PFSYNC_MODVER 1 - -#define PFLOG_MINVER 1 -#define PFLOG_PREFVER PFLOG_MODVER -#define PFLOG_MAXVER 1 -#define PFSYNC_MINVER 1 -#define PFSYNC_PREFVER PFSYNC_MODVER -#define PFSYNC_MAXVER 1 -#endif /* __FreeBSD__ */ - +#define PF_MODVER 1 +#define PFLOG_MODVER 1 +#define PFSYNC_MODVER 1 + +#define PFLOG_MINVER 1 +#define PFLOG_PREFVER PFLOG_MODVER +#define PFLOG_MAXVER 1 +#define PFSYNC_MINVER 1 +#define PFSYNC_PREFVER PFSYNC_MODVER +#define PFSYNC_MAXVER 1 +#endif /* __FreeBSD__ */ #ifdef INET #ifndef INET6 -#define PF_INET_ONLY +#define PF_INET_ONLY #endif /* ! INET6 */ #endif /* INET */ #ifdef INET6 #ifndef INET -#define PF_INET6_ONLY +#define PF_INET6_ONLY #endif /* ! INET */ #endif /* INET6 */ #ifdef INET #ifdef INET6 -#define PF_INET_INET6 +#define PF_INET_INET6 #endif /* INET6 */ #endif /* INET */ #else -#define PF_INET_INET6 +#define PF_INET_INET6 #endif /* _KERNEL */ @@ -412,7 +431,10 @@ extern void destroy_pf_mutex(void); ((aw)->type == PF_ADDR_TABLE && \ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ ((aw)->type == PF_ADDR_DYNIFTL && \ - !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + ((aw)->type == PF_ADDR_RANGE && \ + !pf_match_addr_range(&(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af))) || \ ((aw)->type == PF_ADDR_ADDRMASK && \ !PF_AZERO(&(aw)->v.a.mask, (af)) && \ !PF_MATCHA(0, &(aw)->v.a.addr, \ @@ -619,12 +641,13 @@ struct pf_rule { int rtableid; u_int32_t timeout[PFTM_MAX]; - u_int32_t states; + u_int32_t states_cur; + u_int32_t states_tot; u_int32_t max_states; u_int32_t src_nodes; u_int32_t max_src_nodes; u_int32_t max_src_states; - u_int32_t spare1; /* netgraph */ + u_int32_t spare1; /* netgraph */ u_int32_t max_src_conn; struct { u_int32_t limit; @@ -643,7 +666,7 @@ struct pf_rule { u_int16_t max_mss; u_int16_t tag; u_int16_t match_tag; - u_int16_t spare2; /* netgraph */ + u_int16_t spare2; /* netgraph */ struct pf_rule_uid uid; struct pf_rule_gid gid; @@ -673,12 +696,18 @@ struct pf_rule { u_int8_t rt; u_int8_t return_ttl; u_int8_t tos; + u_int8_t set_tos; u_int8_t anchor_relative; u_int8_t anchor_wildcard; #define PF_FLUSH 0x01 #define PF_FLUSH_GLOBAL 0x02 u_int8_t flush; + + struct { + struct pf_addr addr; + u_int16_t port; + } divert; }; /* rule flags */ @@ -697,10 +726,12 @@ struct pf_rule { #define PFRULE_FRAGDROP 0x0400 /* drop funny fragments */ #define PFRULE_RANDOMID 0x0800 #define PFRULE_REASSEMBLE_TCP 0x1000 +#define PFRULE_SET_TOS 0x2000 /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ #define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */ +#define PFRULE_PFLOW 0x00040000 #define PFSTATE_HIWAT 10000 /* default state table size */ #define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ @@ -758,83 +789,268 @@ struct pf_state_host { }; struct pf_state_peer { + struct pf_state_scrub *scrub; /* state is scrubbed */ u_int32_t seqlo; /* Max sequence number sent */ u_int32_t seqhi; /* Max the other end ACKd + win */ u_int32_t seqdiff; /* Sequence number modulator */ u_int16_t max_win; /* largest window (pre scaling) */ + u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ - u_int16_t mss; /* Maximum segment size option */ u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ - struct pf_state_scrub *scrub; /* state is scrubbed */ - u_int8_t pad[3]; + u_int8_t pad[1]; }; TAILQ_HEAD(pf_state_queue, pf_state); -/* keep synced with struct pf_state, used in RB_FIND */ -struct pf_state_cmp { - u_int64_t id; - u_int32_t creatorid; - struct pf_state_host lan; - struct pf_state_host gwy; - struct pf_state_host ext; +/* keep synced with struct pf_state_key, used in RB_FIND */ +struct pf_state_key_cmp { + struct pf_addr addr[2]; + u_int16_t port[2]; sa_family_t af; u_int8_t proto; - u_int8_t direction; - u_int8_t pad; + u_int8_t pad[2]; +}; + +struct pf_state_item { + TAILQ_ENTRY(pf_state_item) entry; + struct pf_state *s; +}; + +TAILQ_HEAD(pf_statelisthead, pf_state_item); + +struct pf_state_key { + struct pf_addr addr[2]; + u_int16_t port[2]; + sa_family_t af; + u_int8_t proto; + u_int8_t pad[2]; + + RB_ENTRY(pf_state_key) entry; + struct pf_statelisthead states; + struct pf_state_key *reverse; + struct inpcb *inp; +}; + +/* keep synced with struct pf_state, used in RB_FIND */ +struct pf_state_cmp { + u_int64_t id; + u_int32_t creatorid; + u_int8_t direction; + u_int8_t pad[3]; }; struct pf_state { - u_int64_t id; + u_int64_t id; + u_int32_t creatorid; + u_int8_t direction; +#ifdef __FreeBSD__ + u_int8_t pad[2]; + u_int8_t local_flags; +#define PFSTATE_EXPIRING 0x01 +#else + u_int8_t pad[3]; +#endif + + TAILQ_ENTRY(pf_state) sync_list; + TAILQ_ENTRY(pf_state) entry_list; + RB_ENTRY(pf_state) entry_id; + struct pf_state_peer src; + struct pf_state_peer dst; + union pf_rule_ptr rule; + union pf_rule_ptr anchor; + union pf_rule_ptr nat_rule; + struct pf_addr rt_addr; + struct pf_state_key *key[2]; /* addresses stack and wire */ + struct pfi_kif *kif; + struct pfi_kif *rt_kif; + struct pf_src_node *src_node; + struct pf_src_node *nat_src_node; + u_int64_t packets[2]; + u_int64_t bytes[2]; + u_int32_t creation; + u_int32_t expire; + u_int32_t pfsync_time; + u_int16_t tag; + u_int8_t log; + u_int8_t state_flags; +#define PFSTATE_ALLOWOPTS 0x01 +#define PFSTATE_SLOPPY 0x02 +#define PFSTATE_PFLOW 0x04 +#define PFSTATE_NOSYNC 0x08 +#define PFSTATE_ACK 0x10 + u_int8_t timeout; + u_int8_t sync_state; /* PFSYNC_S_x */ + + /* XXX */ + u_int8_t sync_updates; + u_int8_t _tail[3]; +}; + +/* + * Unified state structures for pulling states out of the kernel + * used by pfsync(4) and the pf(4) ioctl. + */ +struct pfsync_state_scrub { + u_int16_t pfss_flags; + u_int8_t pfss_ttl; /* stashed TTL */ +#define PFSYNC_SCRUB_FLAG_VALID 0x01 + u_int8_t scrub_flag; + u_int32_t pfss_ts_mod; /* timestamp modulation */ +} __packed; + +struct pfsync_state_peer { + struct pfsync_state_scrub scrub; /* state is scrubbed */ + u_int32_t seqlo; /* Max sequence number sent */ + u_int32_t seqhi; /* Max the other end ACKd + win */ + u_int32_t seqdiff; /* Sequence number modulator */ + u_int16_t max_win; /* largest window (pre scaling) */ + u_int16_t mss; /* Maximum segment size option */ + u_int8_t state; /* active state level */ + u_int8_t wscale; /* window scaling factor */ + u_int8_t pad[6]; +} __packed; + +struct pfsync_state_key { + struct pf_addr addr[2]; + u_int16_t port[2]; +}; + +struct pfsync_state { + u_int32_t id[2]; + char ifname[IFNAMSIZ]; + struct pfsync_state_key key[2]; + struct pfsync_state_peer src; + struct pfsync_state_peer dst; + struct pf_addr rt_addr; + u_int32_t rule; + u_int32_t anchor; + u_int32_t nat_rule; + u_int32_t creation; + u_int32_t expire; + u_int32_t packets[2][2]; + u_int32_t bytes[2][2]; u_int32_t creatorid; - struct pf_state_host lan; - struct pf_state_host gwy; - struct pf_state_host ext; sa_family_t af; u_int8_t proto; u_int8_t direction; #ifdef __FreeBSD__ u_int8_t local_flags; -#define PFSTATE_EXPIRING 0x01 -#else +#define PFSTATE_EXPIRING 0x01 u_int8_t pad; #endif u_int8_t log; u_int8_t state_flags; -#define PFSTATE_ALLOWOPTS 0x01 -#define PFSTATE_SLOPPY 0x02 u_int8_t timeout; u_int8_t sync_flags; -#define PFSTATE_NOSYNC 0x01 -#define PFSTATE_FROMSYNC 0x02 -#define PFSTATE_STALE 0x04 - union { - struct { - RB_ENTRY(pf_state) entry_lan_ext; - RB_ENTRY(pf_state) entry_ext_gwy; - RB_ENTRY(pf_state) entry_id; - TAILQ_ENTRY(pf_state) entry_list; - struct pfi_kif *kif; - } s; - char ifname[IFNAMSIZ]; - } u; - struct pf_state_peer src; - struct pf_state_peer dst; - union pf_rule_ptr rule; - union pf_rule_ptr anchor; - union pf_rule_ptr nat_rule; - struct pf_addr rt_addr; - struct pfi_kif *rt_kif; - struct pf_src_node *src_node; - struct pf_src_node *nat_src_node; - u_int64_t packets[2]; - u_int64_t bytes[2]; - u_int32_t creation; - u_int32_t expire; - u_int32_t pfsync_time; - u_int16_t tag; -}; + u_int8_t updates; +} __packed; + +#ifdef __FreeBSD__ +#ifdef _KERNEL +/* pfsync */ +typedef int pfsync_state_import_t(struct pfsync_state *, u_int8_t); +typedef void pfsync_insert_state_t(struct pf_state *); +typedef void pfsync_update_state_t(struct pf_state *); +typedef void pfsync_delete_state_t(struct pf_state *); +typedef void pfsync_clear_states_t(u_int32_t, const char *); +typedef int pfsync_state_in_use_t(struct pf_state *); +typedef int pfsync_defer_t(struct pf_state *, struct mbuf *); +typedef int pfsync_up_t(void); + +extern pfsync_state_import_t *pfsync_state_import_ptr; +extern pfsync_insert_state_t *pfsync_insert_state_ptr; +extern pfsync_update_state_t *pfsync_update_state_ptr; +extern pfsync_delete_state_t *pfsync_delete_state_ptr; +extern pfsync_clear_states_t *pfsync_clear_states_ptr; +extern pfsync_state_in_use_t *pfsync_state_in_use_ptr; +extern pfsync_defer_t *pfsync_defer_ptr; +extern pfsync_up_t *pfsync_up_ptr; + +void pfsync_state_export(struct pfsync_state *, + struct pf_state *); + +/* pflow */ +typedef int export_pflow_t(struct pf_state *); + +extern export_pflow_t *export_pflow_ptr; + +/* pflog */ +struct pf_ruleset; +struct pf_pdesc; +typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t, + u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, + struct pf_ruleset *, struct pf_pdesc *); + +extern pflog_packet_t *pflog_packet_ptr; + +/* pf uid hack */ +VNET_DECLARE(int, debug_pfugidhack); +#define V_debug_pfugidhack VNET(debug_pfugidhack) + +#define V_pf_end_threads VNET(pf_end_threads) +#endif + +/* Macros to set/clear/test flags. */ +#ifdef _KERNEL +#define SET(t, f) ((t) |= (f)) +#define CLR(t, f) ((t) &= ~(f)) +#define ISSET(t, f) ((t) & (f)) +#endif +#endif + +#define PFSYNC_FLAG_SRCNODE 0x04 +#define PFSYNC_FLAG_NATSRCNODE 0x08 + +/* for copies to/from network byte order */ +/* ioctl interface also uses network byte order */ +#define pf_state_peer_hton(s,d) do { \ + (d)->seqlo = htonl((s)->seqlo); \ + (d)->seqhi = htonl((s)->seqhi); \ + (d)->seqdiff = htonl((s)->seqdiff); \ + (d)->max_win = htons((s)->max_win); \ + (d)->mss = htons((s)->mss); \ + (d)->state = (s)->state; \ + (d)->wscale = (s)->wscale; \ + if ((s)->scrub) { \ + (d)->scrub.pfss_flags = \ + htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ + (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ + (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ + (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ + } \ +} while (0) + +#define pf_state_peer_ntoh(s,d) do { \ + (d)->seqlo = ntohl((s)->seqlo); \ + (d)->seqhi = ntohl((s)->seqhi); \ + (d)->seqdiff = ntohl((s)->seqdiff); \ + (d)->max_win = ntohs((s)->max_win); \ + (d)->mss = ntohs((s)->mss); \ + (d)->state = (s)->state; \ + (d)->wscale = (s)->wscale; \ + if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ + (d)->scrub != NULL) { \ + (d)->scrub->pfss_flags = \ + ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ + (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ + (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ + } \ +} while (0) + +#define pf_state_counter_hton(s,d) do { \ + d[0] = htonl((s>>32)&0xffffffff); \ + d[1] = htonl(s&0xffffffff); \ +} while (0) + +#define pf_state_counter_from_pfsync(s) \ + (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1])) + +#define pf_state_counter_ntoh(s,d) do { \ + d = ntohl(s[0]); \ + d = d<<32; \ + d += ntohl(s[1]); \ +} while (0) TAILQ_HEAD(pf_rulequeue, pf_rule); @@ -881,9 +1097,11 @@ RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); #define PFR_TFLAG_INACTIVE 0x00000008 #define PFR_TFLAG_REFERENCED 0x00000010 #define PFR_TFLAG_REFDANCHOR 0x00000020 -#define PFR_TFLAG_USRMASK 0x00000003 +#define PFR_TFLAG_COUNTERS 0x00000040 +/* Adjust masks below when adding flags. */ +#define PFR_TFLAG_USRMASK 0x00000043 #define PFR_TFLAG_SETMASK 0x0000003C -#define PFR_TFLAG_ALLMASK 0x0000003F +#define PFR_TFLAG_ALLMASK 0x0000007F struct pfr_table { char pfrt_anchor[MAXPATHLEN]; @@ -894,7 +1112,7 @@ struct pfr_table { enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED, PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE, - PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_MAX }; + PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX }; struct pfr_addr { union { @@ -944,20 +1162,32 @@ union sockaddr_union { }; #endif /* _SOCKADDR_UNION_DEFINED */ +struct pfr_kcounters { + u_int64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; +}; + SLIST_HEAD(pfr_kentryworkq, pfr_kentry); struct pfr_kentry { struct radix_node pfrke_node[2]; union sockaddr_union pfrke_sa; - u_int64_t pfrke_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; - u_int64_t pfrke_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; SLIST_ENTRY(pfr_kentry) pfrke_workq; + union { + + struct pfr_kcounters *pfrke_counters; +#if 0 + struct pfr_kroute *pfrke_route; +#endif + } u; long pfrke_tzero; u_int8_t pfrke_af; u_int8_t pfrke_net; u_int8_t pfrke_not; u_int8_t pfrke_mark; - u_int8_t pfrke_intrpool; }; +#define pfrke_counters u.pfrke_counters +#define pfrke_route u.pfrke_route + SLIST_HEAD(pfr_ktableworkq, pfr_ktable); RB_HEAD(pfr_ktablehead, pfr_ktable); @@ -986,17 +1216,25 @@ struct pfr_ktable { #define pfrkt_nomatch pfrkt_ts.pfrts_nomatch #define pfrkt_tzero pfrkt_ts.pfrts_tzero -RB_HEAD(pf_state_tree_lan_ext, pf_state); -RB_PROTOTYPE(pf_state_tree_lan_ext, pf_state, - u.s.entry_lan_ext, pf_state_compare_lan_ext); +RB_HEAD(pf_state_tree, pf_state_key); +RB_PROTOTYPE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); -RB_HEAD(pf_state_tree_ext_gwy, pf_state); -RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state, - u.s.entry_ext_gwy, pf_state_compare_ext_gwy); +RB_HEAD(pf_state_tree_ext_gwy, pf_state_key); +RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state_key, + entry_ext_gwy, pf_state_compare_ext_gwy); -TAILQ_HEAD(pfi_statehead, pfi_kif); RB_HEAD(pfi_ifhead, pfi_kif); +/* state tables */ +#ifdef __FreeBSD__ +#ifdef _KERNEL +VNET_DECLARE(struct pf_state_tree, pf_statetbl); +#define V_pf_statetbl VNET(pf_statetbl) +#endif +#else +extern struct pf_state_tree pf_statetbl; +#endif + /* keep synced with pfi_kif, used in RB_FIND */ struct pfi_kif_cmp { char pfik_name[IFNAMSIZ]; @@ -1009,12 +1247,7 @@ struct pfi_kif { u_int64_t pfik_bytes[2][2][2]; u_int32_t pfik_tzero; int pfik_flags; - struct pf_state_tree_lan_ext pfik_lan_ext; - struct pf_state_tree_ext_gwy pfik_ext_gwy; - TAILQ_ENTRY(pfi_kif) pfik_w_states; -#ifndef __FreeBSD__ void *pfik_ah_cookie; -#endif struct ifnet *pfik_ifp; struct ifg_group *pfik_group; int pfik_states; @@ -1029,9 +1262,6 @@ enum pfi_kif_refs { }; #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ -/* XXX: revisist */ -#define PFI_IFLAG_SETABLE_MASK 0x0100 /* setable via DIOC{SET,CLR}IFFLAG */ -#define PFI_IFLAG_PLACEHOLDER 0x8000 /* placeholder group/interface */ struct pf_pdesc { struct { @@ -1050,16 +1280,22 @@ struct pf_pdesc { #endif /* INET6 */ void *any; } hdr; - struct pf_addr baddr; /* address before translation */ - struct pf_addr naddr; /* address after translation */ + struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ - struct pf_addr *src; - struct pf_addr *dst; struct ether_header *eh; + struct pf_addr *src; /* src address */ + struct pf_addr *dst; /* dst address */ + u_int16_t *sport; + u_int16_t *dport; +#ifdef __FreeBSD__ struct pf_mtag *pf_mtag; - u_int16_t *ip_sum; +#endif + u_int32_t p_len; /* total length of payload */ + + u_int16_t *ip_sum; + u_int16_t *proto_sum; u_int16_t flags; /* Let SCRUB trigger behavior in * state code. Easier than tags */ #define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ @@ -1067,6 +1303,9 @@ struct pf_pdesc { sa_family_t af; u_int8_t proto; u_int8_t tos; + u_int8_t dir; /* direction */ + u_int8_t sidx; /* key index for source */ + u_int8_t didx; /* key index for destination */ }; /* flags for RDR options */ @@ -1175,6 +1414,15 @@ struct pf_pdesc { *(a) = (x); \ } while (0) +#ifdef __FreeBSD__ +#define REASON_SET(a, x) \ + do { \ + if ((a) != NULL) \ + *(a) = (x); \ + if (x < PFRES_MAX) \ + V_pf_status.counters[x]++; \ + } while (0) +#else #define REASON_SET(a, x) \ do { \ if ((a) != NULL) \ @@ -1182,6 +1430,7 @@ struct pf_pdesc { if (x < PFRES_MAX) \ pf_status.counters[x]++; \ } while (0) +#endif struct pf_status { u_int64_t counters[PFRES_MAX]; @@ -1265,27 +1514,6 @@ struct pf_altq { u_int32_t qid; /* return value */ }; -#ifndef __FreeBSD__ - -#define PF_TAG_GENERATED 0x01 -#define PF_TAG_FRAGCACHE 0x02 -#define PF_TAG_TRANSLATE_LOCALHOST 0x04 - -struct pf_mtag { - void *hdr; /* saved hdr pos in mbuf, for ECN */ - u_int rtableid; /* alternate routing table id */ - u_int32_t qid; /* queue id */ - u_int16_t tag; /* tag id */ - u_int8_t flags; - u_int8_t routed; - sa_family_t af; /* for ECN */ -}; -#endif - -struct pf_tag { - u_int16_t tag; /* tag id */ -}; - struct pf_tagname { TAILQ_ENTRY(pf_tagname) entries; char name[PF_TAG_NAME_SIZE]; @@ -1293,6 +1521,14 @@ struct pf_tagname { int ref; }; +struct pf_divert { + union { + struct in_addr ipv4; + struct in6_addr ipv6; + } addr; + u_int16_t port; +}; + #define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ #define PFFRAG_FRAG_HIWAT 1000 /* Number of fragmented packets */ #define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ @@ -1343,31 +1579,32 @@ struct pfioc_natlook { }; struct pfioc_state { - u_int32_t nr; - struct pf_state state; + struct pfsync_state state; }; struct pfioc_src_node_kill { - /* XXX returns the number of src nodes killed in psnk_af */ sa_family_t psnk_af; struct pf_rule_addr psnk_src; struct pf_rule_addr psnk_dst; + u_int psnk_killed; }; struct pfioc_state_kill { - /* XXX returns the number of states killed in psk_af */ + struct pf_state_cmp psk_pfcmp; sa_family_t psk_af; int psk_proto; struct pf_rule_addr psk_src; struct pf_rule_addr psk_dst; char psk_ifname[IFNAMSIZ]; + char psk_label[PF_RULE_LABEL_SIZE]; + u_int psk_killed; }; struct pfioc_states { int ps_len; union { - caddr_t psu_buf; - struct pf_state *psu_states; + caddr_t psu_buf; + struct pfsync_state *psu_states; } ps_u; #define ps_buf ps_u.psu_buf #define ps_states ps_u.psu_states @@ -1518,55 +1755,97 @@ struct pfioc_iface { #define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table) #define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table) #define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table) -#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) +#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) #define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table) #define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table) #define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table) #define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table) #define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table) #define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table) -#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) +#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) #define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table) #define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table) -#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) -#define DIOCOSFPFLUSH _IO('D', 78) -#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) -#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) -#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans) -#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans) -#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans) -#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes) -#define DIOCCLRSRCNODES _IO('D', 85) -#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) -#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) -#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) -#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) -#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) +#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) +#define DIOCOSFPFLUSH _IO('D', 78) +#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) +#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) +#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans) +#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans) +#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans) +#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes) +#define DIOCCLRSRCNODES _IO('D', 85) +#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) +#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) +#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) +#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) +#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) #ifdef __FreeBSD__ struct pf_ifspeed { char ifname[IFNAMSIZ]; u_int32_t baudrate; }; -#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed) +#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed) #endif #ifdef _KERNEL RB_HEAD(pf_src_tree, pf_src_node); RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare); +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_src_tree, tree_src_tracking); +#define V_tree_src_tracking VNET(tree_src_tracking) +#else extern struct pf_src_tree tree_src_tracking; +#endif RB_HEAD(pf_state_tree_id, pf_state); RB_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_state_tree_id, tree_id); +#define V_tree_id VNET(tree_id) +VNET_DECLARE(struct pf_state_queue, state_list); +#define V_state_list VNET(state_list) +#else extern struct pf_state_tree_id tree_id; extern struct pf_state_queue state_list; +#endif TAILQ_HEAD(pf_poolqueue, pf_pool); +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_poolqueue, pf_pools[2]); +#define V_pf_pools VNET(pf_pools) +#else extern struct pf_poolqueue pf_pools[2]; +#endif TAILQ_HEAD(pf_altqqueue, pf_altq); +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]); +#define V_pf_altqs VNET(pf_altqs) +VNET_DECLARE(struct pf_palist, pf_pabuf); +#define V_pf_pabuf VNET(pf_pabuf) +#else extern struct pf_altqqueue pf_altqs[2]; extern struct pf_palist pf_pabuf; +#endif +#ifdef __FreeBSD__ +VNET_DECLARE(u_int32_t, ticket_altqs_active); +#define V_ticket_altqs_active VNET(ticket_altqs_active) +VNET_DECLARE(u_int32_t, ticket_altqs_inactive); +#define V_ticket_altqs_inactive VNET(ticket_altqs_inactive) +VNET_DECLARE(int, altqs_inactive_open); +#define V_altqs_inactive_open VNET(altqs_inactive_open) +VNET_DECLARE(u_int32_t, ticket_pabuf); +#define V_ticket_pabuf VNET(ticket_pabuf) +VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active); +#define V_pf_altqs_active VNET(pf_altqs_active) +VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive); +#define V_pf_altqs_inactive VNET(pf_altqs_inactive) +VNET_DECLARE(struct pf_poolqueue *, pf_pools_active); +#define V_pf_pools_active VNET(pf_pools_active) +VNET_DECLARE(struct pf_poolqueue *, pf_pools_inactive); +#define V_pf_pools_inactive VNET(pf_pools_inactive) +#else extern u_int32_t ticket_altqs_active; extern u_int32_t ticket_altqs_inactive; extern int altqs_inactive_open; @@ -1575,6 +1854,7 @@ extern struct pf_altqqueue *pf_altqs_active; extern struct pf_altqqueue *pf_altqs_inactive; extern struct pf_poolqueue *pf_pools_active; extern struct pf_poolqueue *pf_pools_inactive; +#endif extern int pf_tbladdr_setup(struct pf_ruleset *, struct pf_addr_wrap *); extern void pf_tbladdr_remove(struct pf_addr_wrap *); @@ -1582,49 +1862,82 @@ extern void pf_tbladdr_copyout(struct pf_addr_wrap *); extern void pf_calc_skip_steps(struct pf_rulequeue *); #ifdef __FreeBSD__ #ifdef ALTQ -extern void pf_altq_ifnet_event(struct ifnet *, int); +extern void pf_altq_ifnet_event(struct ifnet *, int); #endif -extern uma_zone_t pf_src_tree_pl, pf_rule_pl; -extern uma_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl; -extern uma_zone_t pfr_ktable_pl, pfr_kentry_pl, pfr_kentry_pl2; -extern uma_zone_t pf_cache_pl, pf_cent_pl; -extern uma_zone_t pf_state_scrub_pl; -extern uma_zone_t pfi_addr_pl; +VNET_DECLARE(uma_zone_t, pf_src_tree_pl); +#define V_pf_src_tree_pl VNET(pf_src_tree_pl) +VNET_DECLARE(uma_zone_t, pf_rule_pl); +#define V_pf_rule_pl VNET(pf_rule_pl) +VNET_DECLARE(uma_zone_t, pf_state_pl); +#define V_pf_state_pl VNET(pf_state_pl) +VNET_DECLARE(uma_zone_t, pf_state_key_pl); +#define V_pf_state_key_pl VNET(pf_state_key_pl) +VNET_DECLARE(uma_zone_t, pf_state_item_pl); +#define V_pf_state_item_pl VNET(pf_state_item_pl) +VNET_DECLARE(uma_zone_t, pf_altq_pl); +#define V_pf_altq_pl VNET(pf_altq_pl) +VNET_DECLARE(uma_zone_t, pf_pooladdr_pl); +#define V_pf_pooladdr_pl VNET(pf_pooladdr_pl) +VNET_DECLARE(uma_zone_t, pfr_ktable_pl); +#define V_pfr_ktable_pl VNET(pfr_ktable_pl) +VNET_DECLARE(uma_zone_t, pfr_kentry_pl); +#define V_pfr_kentry_pl VNET(pfr_kentry_pl) +VNET_DECLARE(uma_zone_t, pf_cache_pl); +#define V_pf_cache_pl VNET(pf_cache_pl) +VNET_DECLARE(uma_zone_t, pf_cent_pl); +#define V_pf_cent_pl VNET(pf_cent_pl) +VNET_DECLARE(uma_zone_t, pf_state_scrub_pl); +#define V_pf_state_scrub_pl VNET(pf_state_scrub_pl) +VNET_DECLARE(uma_zone_t, pfi_addr_pl); +#define V_pfi_addr_pl VNET(pfi_addr_pl) #else extern struct pool pf_src_tree_pl, pf_rule_pl; -extern struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; +extern struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl, + pf_altq_pl, pf_pooladdr_pl; extern struct pool pf_state_scrub_pl; #endif extern void pf_purge_thread(void *); #ifdef __FreeBSD__ extern int pf_purge_expired_src_nodes(int); -extern int pf_purge_expired_states(u_int32_t, int); +extern int pf_purge_expired_states(u_int32_t , int); #else extern void pf_purge_expired_src_nodes(int); extern void pf_purge_expired_states(u_int32_t); #endif extern void pf_unlink_state(struct pf_state *); extern void pf_free_state(struct pf_state *); -extern int pf_insert_state(struct pfi_kif *, +extern int pf_state_insert(struct pfi_kif *, + struct pf_state_key *, + struct pf_state_key *, struct pf_state *); extern int pf_insert_src_node(struct pf_src_node **, struct pf_rule *, struct pf_addr *, sa_family_t); void pf_src_tree_remove_state(struct pf_state *); extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *); -extern struct pf_state *pf_find_state_all(struct pf_state_cmp *key, - u_int8_t tree, int *more); +extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *, + u_int, int *); extern void pf_print_state(struct pf_state *); extern void pf_print_flags(u_int8_t); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); +#ifdef __FreeBSD__ +VNET_DECLARE(struct ifnet *, sync_ifp); +#define V_sync_ifp VNET(sync_ifp); +VNET_DECLARE(struct pf_rule, pf_default_rule); +#define V_pf_default_rule VNET(pf_default_rule) +#else extern struct ifnet *sync_ifp; extern struct pf_rule pf_default_rule; +#endif extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, u_int8_t); void pf_rm_rule(struct pf_rulequeue *, struct pf_rule *); +#ifndef __FreeBSD__ +struct pf_divert *pf_find_divert(struct mbuf *); +#endif #ifdef INET #ifdef __FreeBSD__ @@ -1656,8 +1969,11 @@ void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); int pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, struct pf_pdesc *); +void pf_send_deferred_syn(struct pf_state *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); +int pf_match_addr_range(struct pf_addr *, struct pf_addr *, + struct pf_addr *, sa_family_t); int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t); @@ -1682,10 +1998,13 @@ void pf_purge_expired_fragments(void); int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *); int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *); #ifdef __FreeBSD__ -int pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *); +int pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *); #else int pf_socket_lookup(int, struct pf_pdesc *); #endif +struct pf_state_key *pf_alloc_state_key(int); +void pf_pkt_addr_changed(struct mbuf *); +int pf_state_key_attach(struct pf_state_key *, struct pf_state *, int); void pfr_initialize(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, @@ -1694,7 +2013,7 @@ int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, struct pf_addr **, struct pf_addr **, sa_family_t); void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); struct pfr_ktable * - pfr_attach_table(struct pf_ruleset *, char *); + pfr_attach_table(struct pf_ruleset *, char *, int); void pfr_detach_table(struct pfr_ktable *); int pfr_clr_tables(struct pfr_table *, int *, int); int pfr_add_tables(struct pfr_table *, int, int *, int); @@ -1723,8 +2042,12 @@ int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); -extern struct pfi_statehead pfi_statehead; +#ifdef __FreeBSD__ +VNET_DECLARE(struct pfi_kif *, pfi_all); +#define V_pfi_all VNET(pfi_all) +#else extern struct pfi_kif *pfi_all; +#endif void pfi_initialize(void); #ifdef __FreeBSD__ @@ -1744,30 +2067,44 @@ int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); void pfi_dynaddr_remove(struct pf_addr_wrap *); void pfi_dynaddr_copyout(struct pf_addr_wrap *); -void pfi_fill_oldstatus(struct pf_status *); -int pfi_clr_istats(const char *); +void pfi_update_status(const char *, struct pf_status *); int pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_set_flags(const char *, int); int pfi_clear_flags(const char *, int); +#ifdef __FreeBSD__ +int pf_match_tag(struct mbuf *, struct pf_rule *, int *, + struct pf_mtag *); +#else +int pf_match_tag(struct mbuf *, struct pf_rule *, int *); +#endif u_int16_t pf_tagname2tag(char *); void pf_tag2tagname(u_int16_t, char *); void pf_tag_ref(u_int16_t); void pf_tag_unref(u_int16_t); -int pf_tag_packet(struct mbuf *, struct pf_mtag *, int, int); +#ifdef __FreeBSD__ +int pf_tag_packet(struct mbuf *, int, int, struct pf_mtag *); +#else +int pf_tag_packet(struct mbuf *, int, int); +#endif u_int32_t pf_qname2qid(char *); void pf_qid2qname(u_int32_t, char *); void pf_qid_unref(u_int32_t); -#ifndef __FreeBSD__ -struct pf_mtag *pf_find_mtag(struct mbuf *); -struct pf_mtag *pf_get_mtag(struct mbuf *); -#endif +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_status, pf_status); +#define V_pf_status VNET(pf_status) +#else extern struct pf_status pf_status; +#endif #ifdef __FreeBSD__ -extern uma_zone_t pf_frent_pl, pf_frag_pl; -extern struct sx pf_consistency_lock; +VNET_DECLARE(uma_zone_t, pf_frent_pl); +#define V_pf_frent_pl VNET(pf_frent_pl) +VNET_DECLARE(uma_zone_t, pf_frag_pl); +#define V_pf_frag_pl VNET(pf_frag_pl) +VNET_DECLARE(struct sx, pf_consistency_lock); +#define V_pf_consistency_lock VNET(pf_consistency_lock) #else extern struct pool pf_frent_pl, pf_frag_pl; extern struct rwlock pf_consistency_lock; @@ -1777,7 +2114,12 @@ struct pf_pool_limit { void *pp; unsigned limit; }; +#ifdef __FreeBSD__ +VNET_DECLARE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]); +#define V_pf_pool_limits VNET(pf_pool_limits) +#else extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; +#endif #ifdef __FreeBSD__ struct pf_frent { @@ -1788,34 +2130,44 @@ struct pf_frent { struct pf_frcache { LIST_ENTRY(pf_frcache) fr_next; - uint16_t fr_off; - uint16_t fr_end; + uint16_t fr_off; + uint16_t fr_end; }; struct pf_fragment { RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; - struct in_addr fr_src; - struct in_addr fr_dst; - u_int8_t fr_p; /* protocol of this fragment */ - u_int8_t fr_flags; /* status flags */ - u_int16_t fr_id; /* fragment id for reassemble */ - u_int16_t fr_max; /* fragment data max */ - u_int32_t fr_timeout; -#define fr_queue fr_u.fru_queue -#define fr_cache fr_u.fru_cache + struct in_addr fr_src; + struct in_addr fr_dst; + u_int8_t fr_p; /* protocol of this fragment */ + u_int8_t fr_flags; /* status flags */ + u_int16_t fr_id; /* fragment id for reassemble */ + u_int16_t fr_max; /* fragment data max */ + u_int32_t fr_timeout; +#define fr_queue fr_u.fru_queue +#define fr_cache fr_u.fru_cache union { - LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ - LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ + LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ + LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ } fr_u; }; #endif /* (__FreeBSD__) */ #endif /* _KERNEL */ -extern struct pf_anchor_global pf_anchors; -extern struct pf_anchor pf_main_anchor; +#ifdef __FreeBSD__ +#ifdef _KERNEL +VNET_DECLARE(struct pf_anchor_global, pf_anchors); +#define V_pf_anchors VNET(pf_anchors) +VNET_DECLARE(struct pf_anchor, pf_main_anchor); +#define V_pf_main_anchor VNET(pf_main_anchor) +#define pf_main_ruleset V_pf_main_anchor.ruleset +#endif +#else +extern struct pf_anchor_global pf_anchors; +extern struct pf_anchor pf_main_anchor; #define pf_main_ruleset pf_main_anchor.ruleset +#endif /* these ruleset functions can be linked into userland programs (pfctl) */ int pf_get_ruleset_number(u_int8_t); @@ -1832,7 +2184,6 @@ struct pf_ruleset *pf_find_or_create_ruleset(const char *); void pf_rs_initialize(void); #ifndef __FreeBSD__ -/* ?!? */ #ifdef _KERNEL int pf_anchor_copyout(const struct pf_ruleset *, const struct pf_rule *, struct pfioc_rule *); @@ -1863,4 +2214,31 @@ int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); struct pf_os_fingerprint * pf_osfp_validate(void); +#ifdef _KERNEL +void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); + +void pf_step_into_anchor(int *, struct pf_ruleset **, int, + struct pf_rule **, struct pf_rule **, int *); +int pf_step_out_of_anchor(int *, struct pf_ruleset **, + int, struct pf_rule **, struct pf_rule **, + int *); + +int pf_map_addr(u_int8_t, struct pf_rule *, + struct pf_addr *, struct pf_addr *, + struct pf_addr *, struct pf_src_node **); +struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct pfi_kif *, struct pf_src_node **, + struct pf_state_key **, struct pf_state_key **, + struct pf_state_key **, struct pf_state_key **, + struct pf_addr *, struct pf_addr *, + u_int16_t, u_int16_t); + +int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *, + struct pf_state_key **, struct pf_state_key **, + struct pf_state_key **, struct pf_state_key **, + struct pf_addr *, struct pf_addr *, + u_int16_t, u_int16_t); +#endif /* _KERNEL */ + + #endif /* _NET_PFVAR_H_ */ -- cgit v1.1