diff options
author | glebius <glebius@FreeBSD.org> | 2012-09-08 06:41:54 +0000 |
---|---|---|
committer | glebius <glebius@FreeBSD.org> | 2012-09-08 06:41:54 +0000 |
commit | 5190d38ee392f405f48ee4edd4912dbe48d19953 (patch) | |
tree | 7b5b7e7d4dce516742188586df4e95db4183a7da | |
parent | fb40c86f7f3666ca5e299037097af8acad817a85 (diff) | |
download | FreeBSD-src-5190d38ee392f405f48ee4edd4912dbe48d19953.zip FreeBSD-src-5190d38ee392f405f48ee4edd4912dbe48d19953.tar.gz |
Merge the projects/pf/head branch, that was worked on for last six months,
into head. The most significant achievements in the new code:
o Fine grained locking, thus much better performance.
o Fixes to many problems in pf, that were specific to FreeBSD port.
New code doesn't have that many ifdefs and much less OpenBSDisms, thus
is more attractive to our developers.
Those interested in details, can browse through SVN log of the
projects/pf/head branch. And for reference, here is exact list of
revisions merged:
r232043, r232044, r232062, r232148, r232149, r232150, r232298, r232330,
r232332, r232340, r232386, r232390, r232391, r232605, r232655, r232656,
r232661, r232662, r232663, r232664, r232673, r232691, r233309, r233782,
r233829, r233830, r233834, r233835, r233836, r233865, r233866, r233868,
r233873, r234056, r234096, r234100, r234108, r234175, r234187, r234223,
r234271, r234272, r234282, r234307, r234309, r234382, r234384, r234456,
r234486, r234606, r234640, r234641, r234642, r234644, r234651, r235505,
r235506, r235535, r235605, r235606, r235826, r235991, r235993, r236168,
r236173, r236179, r236180, r236181, r236186, r236223, r236227, r236230,
r236252, r236254, r236298, r236299, r236300, r236301, r236397, r236398,
r236399, r236499, r236512, r236513, r236525, r236526, r236545, r236548,
r236553, r236554, r236556, r236557, r236561, r236570, r236630, r236672,
r236673, r236679, r236706, r236710, r236718, r237154, r237155, r237169,
r237314, r237363, r237364, r237368, r237369, r237376, r237440, r237442,
r237751, r237783, r237784, r237785, r237788, r237791, r238421, r238522,
r238523, r238524, r238525, r239173, r239186, r239644, r239652, r239661,
r239773, r240125, r240130, r240131, r240136, r240186, r240196, r240212.
I'd like to thank people who participated in early testing:
Tested by: Florian Smeets <flo freebsd.org>
Tested by: Chekaluk Vitaly <artemrts ukr.net>
Tested by: Ben Wilber <ben desync.com>
Tested by: Ian FREISLICH <ianf cloudseed.co.za>
45 files changed, 4591 insertions, 9640 deletions
@@ -24,6 +24,10 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 10.x IS SLOW: disable the most expensive debugging functionality run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20120908: + The pf(4) packet filter ABI has been changed. pfctl(8) and + snmp_pf module need to be recompiled to work with new kernel. + 20120828: A new ZFS feature flag "com.delphix:empty_bpobj" has been merged to -HEAD. Pools that have empty_bpobj in active state can not be diff --git a/contrib/pf/man/pf.4 b/contrib/pf/man/pf.4 index 936a5a8..635078d 100644 --- a/contrib/pf/man/pf.4 +++ b/contrib/pf/man/pf.4 @@ -28,7 +28,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 17 2011 +.Dd June 29 2012 .Dt PF 4 .Os .Sh NAME @@ -75,6 +75,25 @@ separated by characters, similar to how file system hierarchies are laid out. The final component of the anchor path is the anchor under which operations will be performed. +.Sh SYSCTL VARIABLES AND LOADER TUNABLES +The following +.Xr loader 8 +tunables are available. +.Bl -tag -width indent +.It Va net.pf.states_hashsize +Size of hash tables that store states. +Should be power of 2. +Default value is 32768. +.It Va net.pf.source_nodes_hashsize +Size of hash table that store source nodes. +Should be power of 2. +Default value is 8192. +.El +.Pp +Read only +.Xr sysctl 8 +variables with matching names are provided to obtain current values +at runtime. .Sh IOCTL INTERFACE .Nm supports the following @@ -351,7 +370,6 @@ struct pf_status { u_int64_t scounters[SCNT_MAX]; u_int64_t pcounters[2][2][3]; u_int64_t bcounters[2][2]; - u_int64_t stateid; u_int32_t running; u_int32_t states; u_int32_t src_nodes; @@ -493,7 +511,7 @@ struct pfioc_limit { }; enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, - PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; + PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; .Ed .It Dv DIOCGETLIMIT Fa "struct pfioc_limit *pl" Get the hard diff --git a/contrib/pf/man/pf.conf.5 b/contrib/pf/man/pf.conf.5 index dfec264..fc86111 100644 --- a/contrib/pf/man/pf.conf.5 +++ b/contrib/pf/man/pf.conf.5 @@ -28,7 +28,7 @@ .\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd January 31 2009 +.Dd June 29 2012 .Dt PF.CONF 5 .Os .Sh NAME @@ -1421,7 +1421,7 @@ has the socket open where the packet is sourced from or destined to (depending on which socket is local). This is in addition to the normal information logged. .Pp -Due to the problems described in the BUGS section only the first packet +Only the first packet logged via .Ar log (all, user) will have the user credentials logged when using stateful matching. @@ -1479,13 +1479,6 @@ of the following keywords: .Bl -tag -width xxxxxxxxxxxxxx -compact .It Ar any Any address. -.It Ar route Aq Ar label -Any address whose associated route has label -.Aq Ar label . -See -.Xr route 4 -and -.Xr route 8 . .It Ar no-route Any address which is not currently routable. .It Ar urpf-failed @@ -1594,7 +1587,6 @@ pass in proto tcp from any to any port 25 pass in proto tcp from 10.0.0.0/8 port \*(Gt 1024 \e to ! 10.1.2.3 port != ssh pass in proto tcp from any os "OpenBSD" -pass in proto tcp from route "DTAG" .Ed .It Ar all This is equivalent to "from any to any". @@ -2949,9 +2941,9 @@ proto-list = ( proto-name | proto-number ) [ [ "," ] proto-list ] hosts = "all" | "from" ( "any" | "no-route" | "urpf-failed" | "self" | host | - "{" host-list "}" | "route" string ) [ port ] [ os ] + "{" host-list "}" ) [ port ] [ os ] "to" ( "any" | "no-route" | "self" | host | - "{" host-list "}" | "route" string ) [ port ] + "{" host-list "}" ) [ port ] ipspec = "any" | host | "{" host-list "}" host = [ "!" ] ( address [ "/" mask-bits ] | "\*(Lt" string "\*(Gt" ) @@ -3048,28 +3040,6 @@ Protocol name database. .It Pa /etc/services Service name database. .El -.Sh BUGS -Due to a lock order reversal (LOR) with the socket layer, the use of the -.Ar group -and -.Ar user -filter parameter in conjuction with a Giant-free netstack -can result in a deadlock. -A workaround is available under the -.Va debug.pfugidhack -sysctl which is automatically enabled when a -.Ar user -/ -.Ar group -rule is added or -.Ar log (user) -is specified. -.Pp -Route labels are not supported by the -.Fx -.Xr route 4 -system. -Rules with a route label do not match any traffic. .Sh SEE ALSO .Xr altq 4 , .Xr carp 4 , @@ -3080,7 +3050,6 @@ Rules with a route label do not match any traffic. .Xr pf 4 , .Xr pflow 4 , .Xr pfsync 4 , -.Xr route 4 , .Xr tcp 4 , .Xr udp 4 , .Xr hosts 5 , @@ -3090,7 +3059,6 @@ Rules with a route label do not match any traffic. .Xr ftp-proxy 8 , .Xr pfctl 8 , .Xr pflogd 8 , -.Xr route 8 .Sh HISTORY The .Nm diff --git a/contrib/pf/pfctl/parse.y b/contrib/pf/pfctl/parse.y index f798cac..99c26c0 100644 --- a/contrib/pf/pfctl/parse.y +++ b/contrib/pf/pfctl/parse.y @@ -159,8 +159,7 @@ enum { PF_STATE_OPT_MAX, PF_STATE_OPT_NOSYNC, PF_STATE_OPT_SRCTRACK, PF_STATE_OPT_MAX_SRC_STATES, PF_STATE_OPT_MAX_SRC_CONN, PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES, PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK, - PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY, - PF_STATE_OPT_PFLOW }; + PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY, }; enum { PF_SRCTRACK_NONE, PF_SRCTRACK, PF_SRCTRACK_GLOBAL, PF_SRCTRACK_RULE }; @@ -451,7 +450,7 @@ int parseport(char *, struct range *r, int); %token QUEUE PRIORITY QLIMIT RTABLE %token LOAD RULESET_OPTIMIZATION %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE -%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW +%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY %token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS %token DIVERTTO DIVERTREPLY %token <v.string> STRING @@ -2081,15 +2080,6 @@ pfrule : action dir logquick interface route af proto fromto } r.rule_flag |= PFRULE_STATESLOPPY; break; - case PF_STATE_OPT_PFLOW: - if (r.rule_flag & PFRULE_PFLOW) { - yyerror("state pflow " - "option: multiple " - "definitions"); - YYERROR; - } - r.rule_flag |= PFRULE_PFLOW; - break; case PF_STATE_OPT_TIMEOUT: if (o->data.timeout.number == PFTM_ADAPTIVE_START || @@ -2909,26 +2899,6 @@ host : STRING { $$->next = NULL; $$->tail = $$; } - | ROUTE STRING { - $$ = calloc(1, sizeof(struct node_host)); - if ($$ == NULL) { - free($2); - err(1, "host: calloc"); - } - $$->addr.type = PF_ADDR_RTLABEL; - if (strlcpy($$->addr.v.rtlabelname, $2, - sizeof($$->addr.v.rtlabelname)) >= - sizeof($$->addr.v.rtlabelname)) { - yyerror("route label too long, max %u chars", - sizeof($$->addr.v.rtlabelname) - 1); - free($2); - free($$); - YYERROR; - } - $$->next = NULL; - $$->tail = $$; - free($2); - } ; number : NUMBER @@ -3597,14 +3567,6 @@ state_opt_item : MAXIMUM NUMBER { $$->next = NULL; $$->tail = $$; } - | PFLOW { - $$ = calloc(1, sizeof(struct node_state_opt)); - if ($$ == NULL) - err(1, "state_opt_item: calloc"); - $$->type = PF_STATE_OPT_PFLOW; - $$->next = NULL; - $$->tail = $$; - } | STRING NUMBER { int i; @@ -5320,7 +5282,6 @@ lookup(char *s) { "out", OUT}, { "overload", OVERLOAD}, { "pass", PASS}, - { "pflow", PFLOW}, { "port", PORT}, { "priority", PRIORITY}, { "priq", PRIQ}, diff --git a/contrib/pf/pfctl/pf_print_state.c b/contrib/pf/pfctl/pf_print_state.c index 0698516..d6637b4 100644 --- a/contrib/pf/pfctl/pf_print_state.c +++ b/contrib/pf/pfctl/pf_print_state.c @@ -119,9 +119,6 @@ print_addr(struct pf_addr_wrap *addr, sa_family_t af, int verbose) case PF_ADDR_URPFFAILED: printf("urpf-failed"); return; - case PF_ADDR_RTLABEL: - printf("route \"%s\"", addr->v.rtlabelname); - return; default: printf("?"); return; @@ -339,8 +336,6 @@ print_state(struct pfsync_state *s, int opts) printf(", rule %u", ntohl(s->rule)); if (s->state_flags & PFSTATE_SLOPPY) printf(", sloppy"); - if (s->state_flags & PFSTATE_PFLOW) - printf(", pflow"); if (s->sync_flags & PFSYNC_FLAG_SRCNODE) printf(", source-track"); if (s->sync_flags & PFSYNC_FLAG_NATSRCNODE) diff --git a/contrib/pf/pfctl/pfctl.c b/contrib/pf/pfctl/pfctl.c index 8b07a2b..90a2bb5 100644 --- a/contrib/pf/pfctl/pfctl.c +++ b/contrib/pf/pfctl/pfctl.c @@ -144,7 +144,6 @@ static const struct { { "states", PF_LIMIT_STATES }, { "src-nodes", PF_LIMIT_SRC_NODES }, { "frags", PF_LIMIT_FRAGS }, - { "tables", PF_LIMIT_TABLES }, { "table-entries", PF_LIMIT_TABLE_ENTRIES }, { NULL, 0 } }; @@ -1553,9 +1552,6 @@ pfctl_fopen(const char *name, const char *mode) void pfctl_init_options(struct pfctl *pf) { - int64_t mem; - int mib[2]; - size_t size; pf->timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; pf->timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; @@ -1581,21 +1577,8 @@ pfctl_init_options(struct pfctl *pf) pf->limit[PF_LIMIT_STATES] = PFSTATE_HIWAT; pf->limit[PF_LIMIT_FRAGS] = PFFRAG_FRENT_HIWAT; pf->limit[PF_LIMIT_SRC_NODES] = PFSNODE_HIWAT; - pf->limit[PF_LIMIT_TABLES] = PFR_KTABLE_HIWAT; pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT; - mib[0] = CTL_HW; -#ifdef __FreeBSD__ - mib[1] = HW_PHYSMEM; -#else - mib[1] = HW_PHYSMEM64; -#endif - size = sizeof(mem); - if (sysctl(mib, 2, &mem, &size, NULL, 0) == -1) - err(1, "sysctl"); - if (mem <= 100*1024*1024) - pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT_SMALL; - pf->debug = PF_DEBUG_URGENT; } diff --git a/contrib/pf/pfctl/pfctl_parser.c b/contrib/pf/pfctl/pfctl_parser.c index d45b9b7..f248995 100644 --- a/contrib/pf/pfctl/pfctl_parser.c +++ b/contrib/pf/pfctl/pfctl_parser.c @@ -955,12 +955,6 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric) printf("sloppy"); opts = 0; } - if (r->rule_flag & PFRULE_PFLOW) { - if (!opts) - printf(", "); - printf("pflow"); - opts = 0; - } for (i = 0; i < PFTM_MAX; ++i) if (r->timeout[i]) { int j; diff --git a/contrib/pf/pfctl/pfctl_table.c b/contrib/pf/pfctl/pfctl_table.c index 257c014..f3a1efd 100644 --- a/contrib/pf/pfctl/pfctl_table.c +++ b/contrib/pf/pfctl/pfctl_table.c @@ -621,8 +621,7 @@ print_iface(struct pfi_kif *p, int opts) if (!(opts & PF_OPT_VERBOSE2)) return; printf("\tCleared: %s", ctime(&tzero)); - printf("\tReferences: [ States: %-18d Rules: %-18d ]\n", - p->pfik_states, p->pfik_rules); + printf("\tReferences: %-18d\n", p->pfik_rulerefs); for (i = 0; i < 8; i++) { af = (i>>2) & 1; dir = (i>>1) &1; diff --git a/sys/contrib/altq/altq/altq_cbq.c b/sys/contrib/altq/altq/altq_cbq.c index da12cf8..0a33792 100644 --- a/sys/contrib/altq/altq/altq_cbq.c +++ b/sys/contrib/altq/altq/altq_cbq.c @@ -271,10 +271,9 @@ cbq_add_altq(struct pf_altq *a) return (ENODEV); /* allocate and initialize cbq_state_t */ - cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK); + cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cbqp == NULL) return (ENOMEM); - bzero(cbqp, sizeof(cbq_state_t)); CALLOUT_INIT(&cbqp->cbq_callout); cbqp->cbq_qlen = 0; cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ diff --git a/sys/contrib/altq/altq/altq_hfsc.c b/sys/contrib/altq/altq/altq_hfsc.c index bf4f39c..9c91c43 100644 --- a/sys/contrib/altq/altq/altq_hfsc.c +++ b/sys/contrib/altq/altq/altq_hfsc.c @@ -200,10 +200,9 @@ hfsc_add_altq(struct pf_altq *a) if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); - hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK); + hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (hif == NULL) return (ENOMEM); - bzero(hif, sizeof(struct hfsc_if)); hif->hif_eligible = ellist_alloc(); if (hif->hif_eligible == NULL) { diff --git a/sys/contrib/altq/altq/altq_priq.c b/sys/contrib/altq/altq/altq_priq.c index 770d4bf..0ec6534 100644 --- a/sys/contrib/altq/altq/altq_priq.c +++ b/sys/contrib/altq/altq/altq_priq.c @@ -132,11 +132,9 @@ priq_add_altq(struct pf_altq *a) if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); - pif = malloc(sizeof(struct priq_if), - M_DEVBUF, M_WAITOK); + pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (pif == NULL) return (ENOMEM); - bzero(pif, sizeof(struct priq_if)); pif->pif_bandwidth = a->ifbandwidth; pif->pif_maxpri = -1; pif->pif_ifq = &ifp->if_snd; diff --git a/sys/contrib/altq/altq/altq_subr.c b/sys/contrib/altq/altq/altq_subr.c index f5fe990..2d7ce75 100644 --- a/sys/contrib/altq/altq/altq_subr.c +++ b/sys/contrib/altq/altq/altq_subr.c @@ -401,14 +401,11 @@ tbr_set(ifq, profile) return (0); } - IFQ_UNLOCK(ifq); - tbr = malloc(sizeof(struct tb_regulator), - M_DEVBUF, M_WAITOK); - if (tbr == NULL) { /* can not happen */ + tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); + if (tbr == NULL) { IFQ_UNLOCK(ifq); return (ENOMEM); } - bzero(tbr, sizeof(struct tb_regulator)); tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; tbr->tbr_depth = TBR_SCALE(profile->depth); @@ -420,7 +417,6 @@ tbr_set(ifq, profile) tbr->tbr_last = read_machclk(); tbr->tbr_lastop = ALTDQ_REMOVE; - IFQ_LOCK(ifq); otbr = ifq->altq_tbr; ifq->altq_tbr = tbr; /* set the new tbr */ diff --git a/sys/contrib/pf/net/if_pflog.c b/sys/contrib/pf/net/if_pflog.c index 349930b..20feea2 100644 --- a/sys/contrib/pf/net/if_pflog.c +++ b/sys/contrib/pf/net/if_pflog.c @@ -1,10 +1,10 @@ /* $OpenBSD: if_pflog.c,v 1.26 2007/10/18 21:58:18 mpf Exp $ */ /* * The authors of this code are John Ioannidis (ji@tla.org), - * Angelos D. Keromytis (kermit@csd.uch.gr) and + * Angelos D. Keromytis (kermit@csd.uch.gr) and * Niels Provos (provos@physnet.uni-hamburg.de). * - * This code was written by John Ioannidis for BSD/OS in Athens, Greece, + * This code was written by John Ioannidis for BSD/OS in Athens, Greece, * in November 1995. * * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, @@ -20,7 +20,7 @@ * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all copies of any software which is or includes a copy or - * modification of this software. + * modification of this software. * You may use this code under the GNU public license if you so wish. Please * contribute changes back to the authors under this freer than GPL license * so that we may further the use of strong encryption without limitations to @@ -33,61 +33,34 @@ * PURPOSE. */ -#ifdef __FreeBSD__ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include "opt_inet.h" #include "opt_inet6.h" #include "opt_bpf.h" #include "opt_pf.h" -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#ifdef DEV_BPF -#define NBPFILTER DEV_BPF -#else -#define NBPFILTER 0 -#endif - -#ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG -#else -#define NPFLOG 0 -#endif - -#else /* ! __FreeBSD__ */ -#include "bpfilter.h" -#include "pflog.h" -#endif /* __FreeBSD__ */ - #include <sys/param.h> -#include <sys/systm.h> +#include <sys/kernel.h> #include <sys/mbuf.h> +#include <sys/module.h> #include <sys/proc.h> #include <sys/socket.h> -#ifdef __FreeBSD__ -#include <sys/kernel.h> -#include <sys/limits.h> -#include <sys/malloc.h> -#include <sys/module.h> #include <sys/sockio.h> -#else -#include <sys/ioctl.h> -#endif +#include <net/bpf.h> #include <net/if.h> -#ifdef __FreeBSD__ #include <net/if_clone.h> -#endif +#include <net/if_pflog.h> #include <net/if_types.h> -#include <net/route.h> -#include <net/bpf.h> +#include <net/pfvar.h> #if defined(INET) || defined(INET6) #include <netinet/in.h> #endif #ifdef INET #include <netinet/in_var.h> -#include <netinet/in_systm.h> #include <netinet/ip.h> #endif @@ -96,14 +69,9 @@ __FBSDID("$FreeBSD$"); #include <netinet6/nd6.h> #endif /* INET6 */ -#include <net/pfvar.h> -#include <net/if_pflog.h> - -#ifdef __FreeBSD__ #ifdef INET #include <machine/in_cksum.h> #endif /* INET */ -#endif /* __FreeBSD__ */ #define PFLOGMTU (32768 + MHLEN + MLEN) @@ -113,170 +81,82 @@ __FBSDID("$FreeBSD$"); #define DPRINTF(x) #endif -void pflogattach(int); -int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, -#ifdef __FreeBSD__ - struct route *); -#else - struct rtentry *); -#endif -int pflogioctl(struct ifnet *, u_long, caddr_t); -void pflogstart(struct ifnet *); -#ifdef __FreeBSD__ -static int pflog_clone_create(struct if_clone *, int, caddr_t); -static void pflog_clone_destroy(struct ifnet *); -#else -int pflog_clone_create(struct if_clone *, int); -int pflog_clone_destroy(struct ifnet *); -#endif +static int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct route *); +static void pflogattach(int); +static int pflogioctl(struct ifnet *, u_long, caddr_t); +static void pflogstart(struct ifnet *); +static int pflog_clone_create(struct if_clone *, int, caddr_t); +static void pflog_clone_destroy(struct ifnet *); -LIST_HEAD(, pflog_softc) pflogif_list; -#ifdef __FreeBSD__ IFC_SIMPLE_DECLARE(pflog, 1); -#else -struct if_clone pflog_cloner = - IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy); -#endif struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */ -void +static void pflogattach(int npflog) { int i; - LIST_INIT(&pflogif_list); for (i = 0; i < PFLOGIFS_MAX; i++) pflogifs[i] = NULL; if_clone_attach(&pflog_cloner); } -#ifdef __FreeBSD__ static int pflog_clone_create(struct if_clone *ifc, int unit, caddr_t param) -#else -int -pflog_clone_create(struct if_clone *ifc, int unit) -#endif { struct ifnet *ifp; - struct pflog_softc *pflogif; - int s; if (unit >= PFLOGIFS_MAX) return (EINVAL); - if ((pflogif = malloc(sizeof(*pflogif), - M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) - return (ENOMEM); - - pflogif->sc_unit = unit; -#ifdef __FreeBSD__ - ifp = pflogif->sc_ifp = if_alloc(IFT_PFLOG); + ifp = if_alloc(IFT_PFLOG); if (ifp == NULL) { - free(pflogif, M_DEVBUF); return (ENOSPC); } if_initname(ifp, ifc->ifc_name, unit); -#else - ifp = &pflogif->sc_if; - snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", unit); -#endif - ifp->if_softc = pflogif; ifp->if_mtu = PFLOGMTU; ifp->if_ioctl = pflogioctl; ifp->if_output = pflogoutput; ifp->if_start = pflogstart; -#ifndef __FreeBSD__ - ifp->if_type = IFT_PFLOG; -#endif ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = PFLOG_HDRLEN; if_attach(ifp); -#ifndef __FreeBSD__ - if_alloc_sadl(ifp); -#endif -#if NBPFILTER > 0 -#ifdef __FreeBSD__ bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN); -#else - bpfattach(&pflogif->sc_if.if_bpf, ifp, DLT_PFLOG, PFLOG_HDRLEN); -#endif -#endif - s = splnet(); -#ifdef __FreeBSD__ - /* XXX: Why pf(4) lock?! Better add a pflog lock?! */ - PF_LOCK(); -#endif - LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list); pflogifs[unit] = ifp; -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (0); } -#ifdef __FreeBSD__ static void pflog_clone_destroy(struct ifnet *ifp) -#else -int -pflog_clone_destroy(struct ifnet *ifp) -#endif { - struct pflog_softc *pflogif = ifp->if_softc; - int s; + int i; - s = splnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - pflogifs[pflogif->sc_unit] = NULL; - LIST_REMOVE(pflogif, sc_list); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); + for (i = 0; i < PFLOGIFS_MAX; i++) + if (pflogifs[i] == ifp) + pflogifs[i] = NULL; -#if NBPFILTER > 0 bpfdetach(ifp); -#endif if_detach(ifp); -#ifdef __FreeBSD__ if_free(ifp); -#endif - free(pflogif, M_DEVBUF); -#ifndef __FreeBSD__ - return (0); -#endif } /* * Start output on the pflog interface. */ -void +static void pflogstart(struct ifnet *ifp) { struct mbuf *m; -#ifndef __FreeBSD__ - int s; -#endif for (;;) { -#ifdef __FreeBSD__ IF_LOCK(&ifp->if_snd); _IF_DROP(&ifp->if_snd); _IF_DEQUEUE(&ifp->if_snd, m); IF_UNLOCK(&ifp->if_snd); -#else - s = splnet(); - IF_DROP(&ifp->if_snd); - IF_DEQUEUE(&ifp->if_snd, m); - splx(s); -#endif if (m == NULL) return; @@ -285,35 +165,24 @@ pflogstart(struct ifnet *ifp) } } -int +static int pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, -#ifdef __FreeBSD__ struct route *rt) -#else - struct rtentry *rt) -#endif { m_freem(m); return (0); } /* ARGSUSED */ -int +static int pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { switch (cmd) { case SIOCSIFFLAGS: -#ifdef __FreeBSD__ if (ifp->if_flags & IFF_UP) ifp->if_drv_flags |= IFF_DRV_RUNNING; else ifp->if_drv_flags &= ~IFF_DRV_RUNNING; -#else - if (ifp->if_flags & IFF_UP) - ifp->if_flags |= IFF_RUNNING; - else - ifp->if_flags &= ~IFF_RUNNING; -#endif break; default: return (ENOTTY); @@ -322,12 +191,11 @@ pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (0); } -int +static int pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, u_int8_t reason, struct pf_rule *rm, struct pf_rule *am, - struct pf_ruleset *ruleset, struct pf_pdesc *pd) + struct pf_ruleset *ruleset, struct pf_pdesc *pd, int lookupsafe) { -#if NBPFILTER > 0 struct ifnet *ifn; struct pfloghdr hdr; @@ -354,23 +222,18 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, strlcpy(hdr.ruleset, ruleset->anchor->name, sizeof(hdr.ruleset)); } - if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done) -#ifdef __FreeBSD__ - /* - * XXX: This should not happen as we force an early lookup - * via debug.pfugidhack - */ - ; /* empty */ -#else - pd->lookup.done = pf_socket_lookup(dir, pd); -#endif - if (pd->lookup.done > 0) { + /* + * XXXGL: we avoid pf_socket_lookup() when we are holding + * state lock, since this leads to unsafe LOR. + * These conditions are very very rare, however. + */ + if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done && lookupsafe) + pd->lookup.done = pf_socket_lookup(dir, pd, m); + if (pd->lookup.done > 0) hdr.uid = pd->lookup.uid; - hdr.pid = pd->lookup.pid; - } else { + else hdr.uid = UID_MAX; - hdr.pid = NO_PID; - } + hdr.pid = NO_PID; hdr.rule_uid = rm->cuid; hdr.rule_pid = rm->cpid; hdr.dir = dir; @@ -387,18 +250,11 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, ifn->if_opackets++; ifn->if_obytes += m->m_pkthdr.len; -#ifdef __FreeBSD__ BPF_MTAP2(ifn, &hdr, PFLOG_HDRLEN, m); -#else - bpf_mtap_hdr(ifn->if_bpf, (char *)&hdr, PFLOG_HDRLEN, m, - BPF_DIRECTION_OUT); -#endif -#endif return (0); } -#ifdef __FreeBSD__ static int pflog_modevent(module_t mod, int type, void *data) { @@ -407,14 +263,14 @@ pflog_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: pflogattach(1); - PF_LOCK(); + PF_RULES_WLOCK(); pflog_packet_ptr = pflog_packet; - PF_UNLOCK(); + PF_RULES_WUNLOCK(); break; case MOD_UNLOAD: - PF_LOCK(); + PF_RULES_WLOCK(); pflog_packet_ptr = NULL; - PF_UNLOCK(); + PF_RULES_WUNLOCK(); if_clone_detach(&pflog_cloner); break; default: @@ -432,4 +288,3 @@ static moduledata_t pflog_mod = { "pflog", pflog_modevent, 0 }; DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(pflog, PFLOG_MODVER); MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER); -#endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/if_pflog.h b/sys/contrib/pf/net/if_pflog.h index 5f48f6c..0faeb7d 100644 --- a/sys/contrib/pf/net/if_pflog.h +++ b/sys/contrib/pf/net/if_pflog.h @@ -29,16 +29,6 @@ #define PFLOGIFS_MAX 16 -struct pflog_softc { -#ifdef __FreeBSD__ - struct ifnet *sc_ifp; /* the interface pointer */ -#else - struct ifnet sc_if; /* the interface */ -#endif - int sc_unit; - LIST_ENTRY(pflog_softc) sc_list; -}; - #define PFLOG_RULESET_NAME_SIZE 16 struct pfloghdr { @@ -62,40 +52,15 @@ struct pfloghdr { /* minus pad, also used as a signature */ #define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad) -/* XXX remove later when old format logs are no longer needed */ -struct old_pfloghdr { - u_int32_t af; - char ifname[IFNAMSIZ]; - short rnr; - u_short reason; - u_short action; - u_short dir; -}; -#define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr) - #ifdef _KERNEL -#ifdef __FreeBSD__ struct pf_rule; struct pf_ruleset; struct pfi_kif; struct pf_pdesc; -#if 0 -typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t, - u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, - struct pf_ruleset *, struct pf_pdesc *); -extern pflog_packet_t *pflog_packet_ptr; -#endif -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) do { \ +#define PFLOG_PACKET(i,a,b,c,d,e,f,g,h,di) do { \ if (pflog_packet_ptr != NULL) \ - pflog_packet_ptr(i,a,b,c,d,e,f,g,h); \ + pflog_packet_ptr(i,a,b,c,d,e,f,g,h,di); \ } while (0) -#else /* ! __FreeBSD__ */ -#if NPFLOG > 0 -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) pflog_packet(i,a,b,c,d,e,f,g,h) -#else -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0) -#endif /* NPFLOG > 0 */ -#endif #endif /* _KERNEL */ #endif /* _NET_IF_PFLOG_H_ */ diff --git a/sys/contrib/pf/net/if_pflow.h b/sys/contrib/pf/net/if_pflow.h deleted file mode 100644 index 35ccbeb..0000000 --- a/sys/contrib/pf/net/if_pflow.h +++ /dev/null @@ -1,126 +0,0 @@ -/* $OpenBSD: if_pflow.h,v 1.5 2009/02/27 11:09:36 gollo Exp $ */ - -/* - * Copyright (c) 2008 Henning Brauer <henning@openbsd.org> - * Copyright (c) 2008 Joerg Goltermann <jg@osn.de> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * $FreeBSD$ - */ - -#ifndef _NET_IF_PFLOW_H_ -#define _NET_IF_PFLOW_H_ - -#define PFLOW_ID_LEN sizeof(u_int64_t) - -#define PFLOW_MAXFLOWS 30 -#define PFLOW_VERSION 5 -#define PFLOW_ENGINE_TYPE 42 -#define PFLOW_ENGINE_ID 42 -#define PFLOW_MAXBYTES 0xffffffff -#define PFLOW_TIMEOUT 30 - -struct pflow_flow { - u_int32_t src_ip; - u_int32_t dest_ip; - u_int32_t nexthop_ip; - u_int16_t if_index_in; - u_int16_t if_index_out; - u_int32_t flow_packets; - u_int32_t flow_octets; - u_int32_t flow_start; - u_int32_t flow_finish; - u_int16_t src_port; - u_int16_t dest_port; - u_int8_t pad1; - u_int8_t tcp_flags; - u_int8_t protocol; - u_int8_t tos; - u_int16_t src_as; - u_int16_t dest_as; - u_int8_t src_mask; - u_int8_t dest_mask; - u_int16_t pad2; -} __packed; - -#ifdef _KERNEL - -extern int pflow_ok; - -struct pflow_softc { - struct ifnet sc_if; - struct ifnet *sc_pflow_ifp; - - unsigned int sc_count; - unsigned int sc_maxcount; - u_int64_t sc_gcounter; - struct ip_moptions sc_imo; -#ifdef __FreeBSD__ - struct callout sc_tmo; -#else - struct timeout sc_tmo; -#endif - struct in_addr sc_sender_ip; - u_int16_t sc_sender_port; - struct in_addr sc_receiver_ip; - u_int16_t sc_receiver_port; - struct mbuf *sc_mbuf; /* current cumulative mbuf */ - SLIST_ENTRY(pflow_softc) sc_next; -}; - -extern struct pflow_softc *pflowif; - -#endif /* _KERNEL */ - -struct pflow_header { - u_int16_t version; - u_int16_t count; - u_int32_t uptime_ms; - u_int32_t time_sec; - u_int32_t time_nanosec; - u_int32_t flow_sequence; - u_int8_t engine_type; - u_int8_t engine_id; - u_int8_t reserved1; - u_int8_t reserved2; -} __packed; - -#define PFLOW_HDRLEN sizeof(struct pflow_header) - -struct pflowstats { - u_int64_t pflow_flows; - u_int64_t pflow_packets; - u_int64_t pflow_onomem; - u_int64_t pflow_oerrors; -}; - -/* - * Configuration structure for SIOCSETPFLOW SIOCGETPFLOW - */ -struct pflowreq { - struct in_addr sender_ip; - struct in_addr receiver_ip; - u_int16_t receiver_port; - u_int16_t addrmask; -#define PFLOW_MASK_SRCIP 0x01 -#define PFLOW_MASK_DSTIP 0x02 -#define PFLOW_MASK_DSTPRT 0x04 -}; - -#ifdef _KERNEL -int export_pflow(struct pf_state *); -int pflow_sysctl(int *, u_int, void *, size_t *, void *, size_t); -#endif /* _KERNEL */ - -#endif /* _NET_IF_PFLOW_H_ */ diff --git a/sys/contrib/pf/net/if_pfsync.c b/sys/contrib/pf/net/if_pfsync.c index 7da6c2e..28af641 100644 --- a/sys/contrib/pf/net/if_pfsync.c +++ b/sys/contrib/pf/net/if_pfsync.c @@ -54,91 +54,44 @@ * 1.173 - correct expire time processing */ -#ifdef __FreeBSD__ -#include "opt_inet.h" -#include "opt_inet6.h" -#include "opt_pf.h" - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#define NBPFILTER 1 -#endif /* __FreeBSD__ */ +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_pf.h" #include <sys/param.h> -#include <sys/kernel.h> -#ifdef __FreeBSD__ #include <sys/bus.h> +#include <sys/endian.h> #include <sys/interrupt.h> -#include <sys/priv.h> -#endif -#include <sys/proc.h> -#include <sys/systm.h> -#include <sys/time.h> +#include <sys/kernel.h> +#include <sys/lock.h> #include <sys/mbuf.h> -#include <sys/socket.h> -#ifdef __FreeBSD__ -#include <sys/endian.h> -#include <sys/malloc.h> #include <sys/module.h> -#include <sys/sockio.h> -#include <sys/taskqueue.h> -#include <sys/lock.h> #include <sys/mutex.h> +#include <sys/priv.h> #include <sys/protosw.h> -#else -#include <sys/ioctl.h> -#include <sys/timeout.h> -#endif +#include <sys/socket.h> +#include <sys/sockio.h> #include <sys/sysctl.h> -#ifndef __FreeBSD__ -#include <sys/pool.h> -#endif +#include <net/bpf.h> #include <net/if.h> -#ifdef __FreeBSD__ #include <net/if_clone.h> -#endif #include <net/if_types.h> -#include <net/route.h> -#include <net/bpf.h> -#include <net/netisr.h> -#ifdef __FreeBSD__ -#include <net/vnet.h> -#endif +#include <net/pfvar.h> +#include <net/if_pfsync.h> -#include <netinet/in.h> #include <netinet/if_ether.h> -#include <netinet/tcp.h> -#include <netinet/tcp_seq.h> - -#ifdef INET -#include <netinet/in_systm.h> +#include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/ip.h> -#include <netinet/ip_var.h> -#endif - -#ifdef INET6 -#include <netinet6/nd6.h> -#endif /* INET6 */ - -#ifdef __FreeBSD__ -#include <netinet/ip_carp.h> -#else -#include "carp.h" -#if NCARP > 0 #include <netinet/ip_carp.h> -#endif -#endif - -#include <net/pfvar.h> -#include <net/if_pfsync.h> - -#ifndef __FreeBSD__ -#include "bpfilter.h" -#include "pfsync.h" -#endif +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> +#include <netinet/tcp_seq.h> #define PFSYNC_MINPKT ( \ sizeof(struct ip) + \ @@ -152,26 +105,22 @@ struct pfsync_pkt { u_int8_t flags; }; -int pfsync_input_hmac(struct mbuf *, int); - -int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, - struct pfsync_state_peer *); - -int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); - -int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); - -int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { +static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, + struct pfsync_state_peer *); +static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); + +static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { pfsync_in_clr, /* PFSYNC_ACT_CLR */ pfsync_in_ins, /* PFSYNC_ACT_INS */ pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ @@ -194,12 +143,12 @@ struct pfsync_q { }; /* we have one of these for every PFSYNC_S_ */ -int pfsync_out_state(struct pf_state *, struct mbuf *, int); -int pfsync_out_iack(struct pf_state *, struct mbuf *, int); -int pfsync_out_upd_c(struct pf_state *, struct mbuf *, int); -int pfsync_out_del(struct pf_state *, struct mbuf *, int); +static int pfsync_out_state(struct pf_state *, struct mbuf *, int); +static int pfsync_out_iack(struct pf_state *, struct mbuf *, int); +static int pfsync_out_upd_c(struct pf_state *, struct mbuf *, int); +static int pfsync_out_del(struct pf_state *, struct mbuf *, int); -struct pfsync_q pfsync_qs[] = { +static struct pfsync_q pfsync_qs[] = { { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, @@ -207,98 +156,71 @@ struct pfsync_q pfsync_qs[] = { { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } }; -void pfsync_q_ins(struct pf_state *, int); -void pfsync_q_del(struct pf_state *); +static void pfsync_q_ins(struct pf_state *, int); +static void pfsync_q_del(struct pf_state *); + +static void pfsync_update_state(struct pf_state *); struct pfsync_upd_req_item { TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; struct pfsync_upd_req ur_msg; }; -TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); struct pfsync_deferral { - TAILQ_ENTRY(pfsync_deferral) pd_entry; - struct pf_state *pd_st; - struct mbuf *pd_m; -#ifdef __FreeBSD__ - struct callout pd_tmo; -#else - struct timeout pd_tmo; -#endif -}; -TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); + struct pfsync_softc *pd_sc; + TAILQ_ENTRY(pfsync_deferral) pd_entry; + u_int pd_refs; + struct callout pd_tmo; -#define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ - sizeof(struct pfsync_deferral)) - -#ifdef notyet -int pfsync_out_tdb(struct tdb *, struct mbuf *, int); -#endif + struct pf_state *pd_st; + struct mbuf *pd_m; +}; struct pfsync_softc { -#ifdef __FreeBSD__ + /* Configuration */ struct ifnet *sc_ifp; -#else - struct ifnet sc_if; -#endif struct ifnet *sc_sync_if; - -#ifdef __FreeBSD__ - uma_zone_t sc_pool; -#else - struct pool sc_pool; -#endif - - struct ip_moptions sc_imo; - - struct in_addr sc_sync_peer; - u_int8_t sc_maxupdates; -#ifdef __FreeBSD__ - int pfsync_sync_ok; -#endif - - struct ip sc_template; - - struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; - size_t sc_len; - - struct pfsync_upd_reqs sc_upd_req_list; - - int sc_defer; - struct pfsync_deferrals sc_deferrals; - u_int sc_deferred; - + struct ip_moptions sc_imo; + struct in_addr sc_sync_peer; + uint32_t sc_flags; +#define PFSYNCF_OK 0x00000001 +#define PFSYNCF_DEFER 0x00000002 +#define PFSYNCF_PUSH 0x00000004 + uint8_t sc_maxupdates; + struct ip sc_template; + struct callout sc_tmo; + struct mtx sc_mtx; + + /* Queued data */ + size_t sc_len; + TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; + TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; + TAILQ_HEAD(, pfsync_deferral) sc_deferrals; + u_int sc_deferred; void *sc_plus; - size_t sc_pluslen; - - u_int32_t sc_ureq_sent; - int sc_bulk_tries; -#ifdef __FreeBSD__ - struct callout sc_bulkfail_tmo; -#else - struct timeout sc_bulkfail_tmo; -#endif - - u_int32_t sc_ureq_received; - struct pf_state *sc_bulk_next; - struct pf_state *sc_bulk_last; -#ifdef __FreeBSD__ - struct callout sc_bulk_tmo; -#else - struct timeout sc_bulk_tmo; -#endif + size_t sc_pluslen; + + /* Bulk update info */ + struct mtx sc_bulk_mtx; + uint32_t sc_ureq_sent; + int sc_bulk_tries; + uint32_t sc_ureq_received; + int sc_bulk_hashid; + uint64_t sc_bulk_stateid; + uint32_t sc_bulk_creatorid; + struct callout sc_bulk_tmo; + struct callout sc_bulkfail_tmo; +}; - TAILQ_HEAD(, tdb) sc_tdb_q; +#define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) +#define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) +#define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) -#ifdef __FreeBSD__ - struct callout sc_tmo; -#else - struct timeout sc_tmo; -#endif -}; +#define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) +#define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) +#define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) -#ifdef __FreeBSD__ -static MALLOC_DEFINE(M_PFSYNC, "pfsync", "pfsync data"); +static MALLOC_DEFINE(M_PFSYNC, "pfsync", "pfsync(4) data"); static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; #define V_pfsyncif VNET(pfsyncif) static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; @@ -308,14 +230,14 @@ static VNET_DEFINE(struct pfsyncstats, pfsyncstats); static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; #define V_pfsync_carp_adj VNET(pfsync_carp_adj) +static void pfsync_timeout(void *); +static void pfsync_push(struct pfsync_softc *); static void pfsyncintr(void *); -static int pfsync_multicast_setup(struct pfsync_softc *); +static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, + void *); static void pfsync_multicast_cleanup(struct pfsync_softc *); static int pfsync_init(void); static void pfsync_uninit(void); -static void pfsync_sendout1(int); - -#define schednetisr(NETISR_PFSYNC) swi_sched(V_pfsync_swi_cookie, 0) SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW, @@ -323,85 +245,46 @@ SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); -#else -struct pfsync_softc *pfsyncif = NULL; -struct pfsyncstats pfsyncstats; -#define V_pfsyncstats pfsyncstats -#endif -void pfsyncattach(int); -#ifdef __FreeBSD__ -int pfsync_clone_create(struct if_clone *, int, caddr_t); -void pfsync_clone_destroy(struct ifnet *); -#else -int pfsync_clone_create(struct if_clone *, int); -int pfsync_clone_destroy(struct ifnet *); -#endif -int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, - struct pf_state_peer *); -void pfsync_update_net_tdb(struct pfsync_tdb *); -int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, -#ifdef __FreeBSD__ - struct route *); -#else - struct rtentry *); -#endif -int pfsyncioctl(struct ifnet *, u_long, caddr_t); -void pfsyncstart(struct ifnet *); +static int pfsync_clone_create(struct if_clone *, int, caddr_t); +static void pfsync_clone_destroy(struct ifnet *); +static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, + struct pf_state_peer *); +static int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct route *); +static int pfsyncioctl(struct ifnet *, u_long, caddr_t); -struct mbuf *pfsync_if_dequeue(struct ifnet *); +static int pfsync_defer(struct pf_state *, struct mbuf *); +static void pfsync_undefer(struct pfsync_deferral *, int); +static void pfsync_undefer_state(struct pf_state *, int); +static void pfsync_defer_tmo(void *); -void pfsync_deferred(struct pf_state *, int); -void pfsync_undefer(struct pfsync_deferral *, int); -void pfsync_defer_tmo(void *); +static void pfsync_request_update(u_int32_t, u_int64_t); +static void pfsync_update_state_req(struct pf_state *); -void pfsync_request_update(u_int32_t, u_int64_t); -void pfsync_update_state_req(struct pf_state *); +static void pfsync_drop(struct pfsync_softc *); +static void pfsync_sendout(int); +static void pfsync_send_plus(void *, size_t); -void pfsync_drop(struct pfsync_softc *); -void pfsync_sendout(void); -void pfsync_send_plus(void *, size_t); -void pfsync_timeout(void *); -void pfsync_tdb_timeout(void *); +static void pfsync_bulk_start(void); +static void pfsync_bulk_status(u_int8_t); +static void pfsync_bulk_update(void *); +static void pfsync_bulk_fail(void *); -void pfsync_bulk_start(void); -void pfsync_bulk_status(u_int8_t); -void pfsync_bulk_update(void *); -void pfsync_bulk_fail(void *); - -#ifdef __FreeBSD__ -/* XXX: ugly */ -#define betoh64 (unsigned long long)be64toh -#define timeout_del callout_stop +#ifdef IPSEC +static void pfsync_update_net_tdb(struct pfsync_tdb *); #endif #define PFSYNC_MAX_BULKTRIES 12 -#ifndef __FreeBSD__ -int pfsync_sync_ok; -#endif -#ifdef __FreeBSD__ VNET_DEFINE(struct ifc_simple_data, pfsync_cloner_data); VNET_DEFINE(struct if_clone, pfsync_cloner); #define V_pfsync_cloner_data VNET(pfsync_cloner_data) #define V_pfsync_cloner VNET(pfsync_cloner) IFC_SIMPLE_DECLARE(pfsync, 1); -#else -struct if_clone pfsync_cloner = - IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); -#endif -void -pfsyncattach(int npfsync) -{ - if_clone_attach(&pfsync_cloner); -} -int -#ifdef __FreeBSD__ +static int pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) -#else -pfsync_clone_create(struct if_clone *ifc, int unit) -#endif { struct pfsync_softc *sc; struct ifnet *ifp; @@ -410,204 +293,99 @@ pfsync_clone_create(struct if_clone *ifc, int unit) if (unit != 0) return (EINVAL); -#ifdef __FreeBSD__ sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); - sc->pfsync_sync_ok = 1; -#else - pfsync_sync_ok = 1; - sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO); -#endif + sc->sc_flags |= PFSYNCF_OK; for (q = 0; q < PFSYNC_S_COUNT; q++) TAILQ_INIT(&sc->sc_qs[q]); -#ifdef __FreeBSD__ - sc->sc_pool = uma_zcreate("pfsync", PFSYNC_PLSIZE, NULL, NULL, NULL, - NULL, UMA_ALIGN_PTR, 0); -#else - pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL); -#endif TAILQ_INIT(&sc->sc_upd_req_list); TAILQ_INIT(&sc->sc_deferrals); - sc->sc_deferred = 0; - - TAILQ_INIT(&sc->sc_tdb_q); sc->sc_len = PFSYNC_MINPKT; sc->sc_maxupdates = 128; -#ifndef __FreeBSD__ - sc->sc_imo.imo_membership = (struct in_multi **)malloc( - (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, - M_WAITOK | M_ZERO); - sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; -#endif - -#ifdef __FreeBSD__ ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); if (ifp == NULL) { - uma_zdestroy(sc->sc_pool); free(sc, M_PFSYNC); return (ENOSPC); } if_initname(ifp, ifc->ifc_name, unit); -#else - ifp = &sc->sc_if; - snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); -#endif ifp->if_softc = sc; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; - ifp->if_start = pfsyncstart; ifp->if_type = IFT_PFSYNC; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = sizeof(struct pfsync_header); ifp->if_mtu = ETHERMTU; -#ifdef __FreeBSD__ + mtx_init(&sc->sc_mtx, "pfsync", NULL, MTX_DEF); + mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); callout_init(&sc->sc_tmo, CALLOUT_MPSAFE); - callout_init_mtx(&sc->sc_bulk_tmo, &pf_task_mtx, 0); - callout_init(&sc->sc_bulkfail_tmo, CALLOUT_MPSAFE); -#else - timeout_set(&sc->sc_tmo, pfsync_timeout, sc); - timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); - timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); -#endif + callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); + callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); if_attach(ifp); -#ifndef __FreeBSD__ - if_alloc_sadl(ifp); - -#if NCARP > 0 - if_addgroup(ifp, "carp"); -#endif -#endif -#if NBPFILTER > 0 -#ifdef __FreeBSD__ bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); -#else - bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); -#endif -#endif -#ifdef __FreeBSD__ V_pfsyncif = sc; -#else - pfsyncif = sc; -#endif return (0); } -#ifdef __FreeBSD__ -void -#else -int -#endif +static void pfsync_clone_destroy(struct ifnet *ifp) { struct pfsync_softc *sc = ifp->if_softc; -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - timeout_del(&sc->sc_bulkfail_tmo); - timeout_del(&sc->sc_bulk_tmo); - timeout_del(&sc->sc_tmo); -#ifdef __FreeBSD__ - PF_UNLOCK(); - if (!sc->pfsync_sync_ok && carp_demote_adj_p) + /* + * At this stage, everything should have already been + * cleared by pfsync_uninit(), and we have only to + * drain callouts. + */ + while (sc->sc_deferred > 0) { + struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); + + TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); + sc->sc_deferred--; + if (callout_stop(&pd->pd_tmo)) { + pf_release_state(pd->pd_st); + m_freem(pd->pd_m); + free(pd, M_PFSYNC); + } else { + pd->pd_refs++; + callout_drain(&pd->pd_tmo); + free(pd, M_PFSYNC); + } + } + + callout_drain(&sc->sc_tmo); + callout_drain(&sc->sc_bulkfail_tmo); + callout_drain(&sc->sc_bulk_tmo); + + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); -#else -#if NCARP > 0 - if (!pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, -1); -#endif -#endif -#if NBPFILTER > 0 bpfdetach(ifp); -#endif if_detach(ifp); pfsync_drop(sc); - while (sc->sc_deferred > 0) - pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); - -#ifdef __FreeBSD__ - UMA_DESTROY(sc->sc_pool); -#else - pool_destroy(&sc->sc_pool); -#endif -#ifdef __FreeBSD__ if_free(ifp); if (sc->sc_imo.imo_membership) pfsync_multicast_cleanup(sc); + mtx_destroy(&sc->sc_mtx); + mtx_destroy(&sc->sc_bulk_mtx); free(sc, M_PFSYNC); -#else - free(sc->sc_imo.imo_membership, M_IPMOPTS); - free(sc, M_DEVBUF); -#endif -#ifdef __FreeBSD__ V_pfsyncif = NULL; -#else - pfsyncif = NULL; -#endif - -#ifndef __FreeBSD__ - return (0); -#endif -} - -struct mbuf * -pfsync_if_dequeue(struct ifnet *ifp) -{ - struct mbuf *m; -#ifndef __FreeBSD__ - int s; -#endif - -#ifdef __FreeBSD__ - IF_LOCK(&ifp->if_snd); - _IF_DROP(&ifp->if_snd); - _IF_DEQUEUE(&ifp->if_snd, m); - IF_UNLOCK(&ifp->if_snd); -#else - s = splnet(); - IF_DEQUEUE(&ifp->if_snd, m); - splx(s); -#endif - - return (m); } -/* - * Start output on the pfsync interface. - */ -void -pfsyncstart(struct ifnet *ifp) -{ - struct mbuf *m; - - while ((m = pfsync_if_dequeue(ifp)) != NULL) { -#ifndef __FreeBSD__ - IF_DROP(&ifp->if_snd); -#endif - m_freem(m); - } -} - -int +static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { -#ifdef __FreeBSD__ - d->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); -#else - d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); -#endif + d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); if (d->scrub == NULL) return (ENOMEM); } @@ -615,99 +393,29 @@ pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, return (0); } -#ifndef __FreeBSD__ -void -pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) -{ - bzero(sp, sizeof(struct pfsync_state)); - - /* copy from state key */ - sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; - sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; - sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; - sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; - sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; - sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; - sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; - sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; - sp->proto = st->key[PF_SK_WIRE]->proto; - sp->af = st->key[PF_SK_WIRE]->af; - - /* copy from state */ - strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); - bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); - sp->creation = htonl(time_uptime - st->creation); - sp->expire = pf_state_expires(st); - if (sp->expire <= time_second) - sp->expire = htonl(0); - else - sp->expire = htonl(sp->expire - time_second); - - sp->direction = st->direction; - sp->log = st->log; - sp->timeout = st->timeout; - sp->state_flags = st->state_flags; - if (st->src_node) - sp->sync_flags |= PFSYNC_FLAG_SRCNODE; - if (st->nat_src_node) - sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; - - bcopy(&st->id, &sp->id, sizeof(sp->id)); - sp->creatorid = st->creatorid; - pf_state_peer_hton(&st->src, &sp->src); - pf_state_peer_hton(&st->dst, &sp->dst); - - if (st->rule.ptr == NULL) - sp->rule = htonl(-1); - else - sp->rule = htonl(st->rule.ptr->nr); - if (st->anchor.ptr == NULL) - sp->anchor = htonl(-1); - else - sp->anchor = htonl(st->anchor.ptr->nr); - if (st->nat_rule.ptr == NULL) - sp->nat_rule = htonl(-1); - else - sp->nat_rule = htonl(st->nat_rule.ptr->nr); - - pf_state_counter_hton(st->packets[0], sp->packets[0]); - pf_state_counter_hton(st->packets[1], sp->packets[1]); - pf_state_counter_hton(st->bytes[0], sp->bytes[0]); - pf_state_counter_hton(st->bytes[1], sp->bytes[1]); -} -#endif - -int +static int pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) { + struct pfsync_softc *sc = V_pfsyncif; struct pf_state *st = NULL; struct pf_state_key *skw = NULL, *sks = NULL; struct pf_rule *r = NULL; struct pfi_kif *kif; - int pool_flags; int error; -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); + PF_RULES_RASSERT(); if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { -#endif - printf("pfsync_state_import: invalid creator id:" - " %08x\n", ntohl(sp->creatorid)); + printf("%s: invalid creator id: %08x\n", __func__, + ntohl(sp->creatorid)); return (EINVAL); } - if ((kif = pfi_kif_get(sp->ifname)) == NULL) { -#ifdef __FreeBSD__ + if ((kif = pfi_kif_find(sp->ifname)) == NULL) { if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif - printf("pfsync_state_import: " - "unknown interface: %s\n", sp->ifname); + printf("%s: unknown interface: %s\n", __func__, + sp->ifname); if (flags & PFSYNC_SI_IOCTL) return (EINVAL); return (0); /* skip this state */ @@ -723,34 +431,18 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) r = pf_main_ruleset.rules[ PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; else -#ifdef __FreeBSD__ r = &V_pf_default_rule; -#else - r = &pf_default_rule; -#endif if ((r->max_states && r->states_cur >= r->max_states)) goto cleanup; -#ifdef __FreeBSD__ - if (flags & PFSYNC_SI_IOCTL) - pool_flags = PR_WAITOK | PR_ZERO; - else - pool_flags = PR_NOWAIT | PR_ZERO; - - if ((st = pool_get(&V_pf_state_pl, pool_flags)) == NULL) - goto cleanup; -#else - if (flags & PFSYNC_SI_IOCTL) - pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; - else - pool_flags = PR_LIMITFAIL | PR_ZERO; - - if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) + /* + * XXXGL: consider M_WAITOK in ioctl path after. + */ + if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) goto cleanup; -#endif - if ((skw = pf_alloc_state_key(pool_flags)) == NULL) + if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) goto cleanup; if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], @@ -759,7 +451,8 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) &sp->key[PF_SK_STACK].addr[1], sp->af) || sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { - if ((sks = pf_alloc_state_key(pool_flags)) == NULL) + sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); + if (sks == NULL) goto cleanup; } else sks = skw; @@ -788,17 +481,13 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) /* copy to state */ bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); st->creation = time_uptime - ntohl(sp->creation); - st->expire = time_second; + st->expire = time_uptime; if (sp->expire) { uint32_t timeout; timeout = r->timeout[sp->timeout]; if (!timeout) -#ifdef __FreeBSD__ timeout = V_pf_default_rule.timeout[sp->timeout]; -#else - timeout = pf_default_rule.timeout[sp->timeout]; -#endif /* sp->expire may have been adaptively scaled by export. */ st->expire -= timeout - ntohl(sp->expire); @@ -809,7 +498,7 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) st->timeout = sp->timeout; st->state_flags = sp->state_flags; - bcopy(sp->id, &st->id, sizeof(st->id)); + st->id = sp->id; st->creatorid = sp->creatorid; pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); @@ -826,8 +515,8 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) r->states_cur++; r->states_tot++; - if (!ISSET(flags, PFSYNC_SI_IOCTL)) - SET(st->state_flags, PFSTATE_NOSYNC); + if (!(flags & PFSYNC_SI_IOCTL)) + st->state_flags |= PFSTATE_NOSYNC; if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ @@ -835,14 +524,15 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) goto cleanup_state; } - if (!ISSET(flags, PFSYNC_SI_IOCTL)) { - CLR(st->state_flags, PFSTATE_NOSYNC); - if (ISSET(st->state_flags, PFSTATE_ACK)) { + if (!(flags & PFSYNC_SI_IOCTL)) { + st->state_flags &= ~PFSTATE_NOSYNC; + if (st->state_flags & PFSTATE_ACK) { pfsync_q_ins(st, PFSYNC_S_IACK); - schednetisr(NETISR_PFSYNC); + pfsync_push(sc); } } - CLR(st->state_flags, PFSTATE_ACK); + st->state_flags &= ~PFSTATE_ACK; + PF_STATE_UNLOCK(st); return (0); @@ -850,49 +540,26 @@ cleanup: error = ENOMEM; if (skw == sks) sks = NULL; -#ifdef __FreeBSD__ if (skw != NULL) - pool_put(&V_pf_state_key_pl, skw); + uma_zfree(V_pf_state_key_z, skw); if (sks != NULL) - pool_put(&V_pf_state_key_pl, sks); -#else - if (skw != NULL) - pool_put(&pf_state_key_pl, skw); - if (sks != NULL) - pool_put(&pf_state_key_pl, sks); -#endif + uma_zfree(V_pf_state_key_z, sks); -cleanup_state: /* pf_state_insert frees the state keys */ +cleanup_state: /* pf_state_insert() frees the state keys. */ if (st) { -#ifdef __FreeBSD__ if (st->dst.scrub) - pool_put(&V_pf_state_scrub_pl, st->dst.scrub); + uma_zfree(V_pf_state_scrub_z, st->dst.scrub); if (st->src.scrub) - pool_put(&V_pf_state_scrub_pl, st->src.scrub); - pool_put(&V_pf_state_pl, st); -#else - if (st->dst.scrub) - pool_put(&pf_state_scrub_pl, st->dst.scrub); - if (st->src.scrub) - pool_put(&pf_state_scrub_pl, st->src.scrub); - pool_put(&pf_state_pl, st); -#endif + uma_zfree(V_pf_state_scrub_z, st->src.scrub); + uma_zfree(V_pf_state_z, st); } return (error); } -void -#ifdef __FreeBSD__ +static void pfsync_input(struct mbuf *m, __unused int off) -#else -pfsync_input(struct mbuf *m, ...) -#endif { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif struct pfsync_pkt pkt; struct ip *ip = mtod(m, struct ip *); struct pfsync_header *ph; @@ -900,15 +567,13 @@ pfsync_input(struct mbuf *m, ...) int offset; int rv; + uint16_t count; V_pfsyncstats.pfsyncs_ipackets++; - /* verify that we have a sync interface configured */ -#ifdef __FreeBSD__ - if (!sc || !sc->sc_sync_if || !V_pf_status.running) -#else - if (!sc || !sc->sc_sync_if || !pf_status.running) -#endif + /* Verify that we have a sync interface configured. */ + if (!sc || !sc->sc_sync_if || !V_pf_status.running || + (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) goto done; /* verify that the packet came in on the right interface */ @@ -917,13 +582,8 @@ pfsync_input(struct mbuf *m, ...) goto done; } -#ifdef __FreeBSD__ sc->sc_ifp->if_ipackets++; sc->sc_ifp->if_ibytes += m->m_pkthdr.len; -#else - sc->sc_if.if_ipackets++; - sc->sc_if.if_ibytes += m->m_pkthdr.len; -#endif /* verify that the IP TTL is 255. */ if (ip->ip_ttl != PFSYNC_DFLTTL) { V_pfsyncstats.pfsyncs_badttl++; @@ -951,23 +611,17 @@ pfsync_input(struct mbuf *m, ...) goto done; } -#if 0 - if (pfsync_input_hmac(m, offset) != 0) { - /* XXX stats */ - goto done; - } -#endif - /* Cheaper to grab this now than having to mess with mbufs later */ pkt.ip = ip; pkt.src = ip->ip_src; pkt.flags = 0; -#ifdef __FreeBSD__ + /* + * Trusting pf_chksum during packet processing, as well as seeking + * in interface name tree, require holding PF_RULES_RLOCK(). + */ + PF_RULES_RLOCK(); if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) -#else - if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) -#endif pkt.flags |= PFSYNC_SI_CKSUM; offset += sizeof(*ph); @@ -977,34 +631,34 @@ pfsync_input(struct mbuf *m, ...) if (subh.action >= PFSYNC_ACT_MAX) { V_pfsyncstats.pfsyncs_badact++; + PF_RULES_RUNLOCK(); goto done; } - rv = (*pfsync_acts[subh.action])(&pkt, m, offset, - ntohs(subh.count)); - if (rv == -1) + count = ntohs(subh.count); + V_pfsyncstats.pfsyncs_iacts[subh.action] += count; + rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); + if (rv == -1) { + PF_RULES_RUNLOCK(); return; + } offset += rv; } + PF_RULES_RUNLOCK(); done: m_freem(m); } -int +static int pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_clr *clr; struct mbuf *mp; int len = sizeof(*clr) * count; int i, offp; - - struct pf_state *st, *nexts; - struct pf_state_key *sk, *nextsk; - struct pf_state_item *si; u_int32_t creatorid; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1013,64 +667,33 @@ pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } clr = (struct pfsync_clr *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { creatorid = clr[i].creatorid; - if (clr[i].ifname[0] == '\0') { -#ifdef __FreeBSD__ - for (st = RB_MIN(pf_state_tree_id, &V_tree_id); - st; st = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, st); -#else - for (st = RB_MIN(pf_state_tree_id, &tree_id); - st; st = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); -#endif - if (st->creatorid == creatorid) { - SET(st->state_flags, PFSTATE_NOSYNC); - pf_unlink_state(st); - } - } - } else { - if (pfi_kif_get(clr[i].ifname) == NULL) - continue; + if (clr[i].ifname[0] != '\0' && + pfi_kif_find(clr[i].ifname) == NULL) + continue; - /* XXX correct? */ -#ifdef __FreeBSD__ - for (sk = RB_MIN(pf_state_tree, &V_pf_statetbl); -#else - for (sk = RB_MIN(pf_state_tree, &pf_statetbl); -#endif - sk; sk = nextsk) { - nextsk = RB_NEXT(pf_state_tree, -#ifdef __FreeBSD__ - &V_pf_statetbl, sk); -#else - &pf_statetbl, sk); -#endif - TAILQ_FOREACH(si, &sk->states, entry) { - if (si->s->creatorid == creatorid) { - SET(si->s->state_flags, - PFSTATE_NOSYNC); - pf_unlink_state(si->s); - } + for (int i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + struct pf_state *s; +relock: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + if (s->creatorid == creatorid) { + s->state_flags |= PFSTATE_NOSYNC; + pf_unlink_state(s, PF_ENTER_LOCKED); + goto relock; } } + PF_HASHROW_UNLOCK(ih); } } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; @@ -1078,8 +701,6 @@ pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) int len = sizeof(*sp) * count; int i, offp; - int s; - mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; @@ -1087,55 +708,38 @@ pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } sa = (struct pfsync_state *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { sp = &sa[i]; - /* check for invalid values */ + /* Check for invalid values. */ if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST || sp->direction > PF_OUT || (sp->af != AF_INET && sp->af != AF_INET6)) { -#ifdef __FreeBSD__ - if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif - printf("pfsync_input: PFSYNC5_ACT_INS: " - "invalid value\n"); - } + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: invalid value\n", __func__); V_pfsyncstats.pfsyncs_badval++; continue; } - if (pfsync_state_import(sp, pkt->flags) == ENOMEM) { - /* drop out, but process the rest of the actions */ + if (pfsync_state_import(sp, pkt->flags) == ENOMEM) + /* Drop out, but process the rest of the actions. */ break; - } } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_ins_ack *ia, *iaa; - struct pf_state_cmp id_key; struct pf_state *st; struct mbuf *mp; int len = count * sizeof(*ia); int offp, i; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1144,27 +748,20 @@ pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { ia = &iaa[i]; - bcopy(&ia->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = ia->creatorid; - - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(ia->id, ia->creatorid); if (st == NULL) continue; - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 0); + if (st->state_flags & PFSTATE_ACK) { + PFSYNC_LOCK(V_pfsyncif); + pfsync_undefer_state(st, 0); + PFSYNC_UNLOCK(V_pfsyncif); + } + PF_STATE_UNLOCK(st); } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); /* * XXX this is not yet implemented, but we know the size of the * message so we can skip it. @@ -1173,12 +770,14 @@ pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) return (count * sizeof(struct pfsync_ins_ack)); } -int +static int pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, struct pfsync_state_peer *dst) { int sfail = 0; + PF_STATE_LOCK_ASSERT(st); + /* * The state should never go backwards except * for syn-proxy states. Neither should the @@ -1207,11 +806,11 @@ pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, return (sfail); } -int +static int pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { + struct pfsync_softc *sc = V_pfsyncif; struct pfsync_state *sa, *sp; - struct pf_state_cmp id_key; struct pf_state_key *sk; struct pf_state *st; int sfail; @@ -1219,7 +818,6 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) struct mbuf *mp; int len = count * sizeof(*sp); int offp, i; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1228,10 +826,6 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } sa = (struct pfsync_state *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { sp = &sa[i]; @@ -1239,11 +833,7 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pfsync_input: PFSYNC_ACT_UPD: " "invalid value\n"); } @@ -1251,10 +841,7 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) continue; } - bcopy(sp->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = sp->creatorid; - - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { /* insert the update */ if (pfsync_state_import(sp, 0)) @@ -1262,8 +849,11 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) continue; } - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 1); + if (st->state_flags & PFSTATE_ACK) { + PFSYNC_LOCK(sc); + pfsync_undefer_state(st, 1); + PFSYNC_UNLOCK(sc); + } sk = st->key[PF_SK_WIRE]; /* XXX right one? */ sfail = 0; @@ -1281,44 +871,40 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } if (sfail) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pfsync: %s stale update (%d)" " id: %016llx creatorid: %08x\n", (sfail < 7 ? "ignoring" : "partial"), - sfail, betoh64(st->id), + sfail, (unsigned long long)be64toh(st->id), ntohl(st->creatorid)); } V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); - schednetisr(NETISR_PFSYNC); + PF_STATE_UNLOCK(st); + PFSYNC_LOCK(sc); + pfsync_push(sc); + PFSYNC_UNLOCK(sc); continue; } pfsync_alloc_scrub_memory(&sp->dst, &st->dst); pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); - st->expire = time_second; + st->expire = time_uptime; st->timeout = sp->timeout; st->pfsync_time = time_uptime; + PF_STATE_UNLOCK(st); } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { + struct pfsync_softc *sc = V_pfsyncif; struct pfsync_upd_c *ua, *up; struct pf_state_key *sk; - struct pf_state_cmp id_key; struct pf_state *st; int len = count * sizeof(*up); @@ -1326,7 +912,6 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) struct mbuf *mp; int offp, i; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1335,10 +920,6 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } ua = (struct pfsync_upd_c *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { up = &ua[i]; @@ -1346,11 +927,7 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) if (up->timeout >= PFTM_MAX || up->src.state > PF_TCPS_PROXY_DST || up->dst.state > PF_TCPS_PROXY_DST) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pfsync_input: " "PFSYNC_ACT_UPD_C: " "invalid value\n"); @@ -1359,18 +936,20 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) continue; } - bcopy(&up->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = up->creatorid; - - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(up->id, up->creatorid); if (st == NULL) { /* We don't have this state. Ask for it. */ - pfsync_request_update(id_key.creatorid, id_key.id); + PFSYNC_LOCK(sc); + pfsync_request_update(up->creatorid, up->id); + PFSYNC_UNLOCK(sc); continue; } - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 1); + if (st->state_flags & PFSTATE_ACK) { + PFSYNC_LOCK(sc); + pfsync_undefer_state(st, 1); + PFSYNC_UNLOCK(sc); + } sk = st->key[PF_SK_WIRE]; /* XXX right one? */ sfail = 0; @@ -1387,39 +966,35 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } if (sfail) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pfsync: ignoring stale update " "(%d) id: %016llx " "creatorid: %08x\n", sfail, - betoh64(st->id), + (unsigned long long)be64toh(st->id), ntohl(st->creatorid)); } V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); - schednetisr(NETISR_PFSYNC); + PF_STATE_UNLOCK(st); + PFSYNC_LOCK(sc); + pfsync_push(sc); + PFSYNC_UNLOCK(sc); continue; } pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->src, &st->src); pf_state_peer_ntoh(&up->dst, &st->dst); - st->expire = time_second; + st->expire = time_uptime; st->timeout = up->timeout; st->pfsync_time = time_uptime; + PF_STATE_UNLOCK(st); } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_upd_req *ur, *ura; @@ -1427,7 +1002,6 @@ pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) int len = count * sizeof(*ur); int i, offp; - struct pf_state_cmp id_key; struct pf_state *st; mp = m_pulldown(m, offset, len, &offp); @@ -1437,46 +1011,38 @@ pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } ura = (struct pfsync_upd_req *)(mp->m_data + offp); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { ur = &ura[i]; - bcopy(&ur->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = ur->creatorid; - - if (id_key.id == 0 && id_key.creatorid == 0) + if (ur->id == 0 && ur->creatorid == 0) pfsync_bulk_start(); else { - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(ur->id, ur->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) + if (st->state_flags & PFSTATE_NOSYNC) { + PF_STATE_UNLOCK(st); continue; + } pfsync_update_state_req(st); + PF_STATE_UNLOCK(st); } } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif return (len); } -int +static int pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; struct pfsync_state *sa, *sp; - struct pf_state_cmp id_key; struct pf_state *st; int len = count * sizeof(*sp); int offp, i; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1485,42 +1051,29 @@ pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } sa = (struct pfsync_state *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { sp = &sa[i]; - bcopy(sp->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = sp->creatorid; - - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } - SET(st->state_flags, PFSTATE_NOSYNC); - pf_unlink_state(st); + st->state_flags |= PFSTATE_NOSYNC; + pf_unlink_state(st, PF_ENTER_LOCKED); } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; struct pfsync_del_c *sa, *sp; - struct pf_state_cmp id_key; struct pf_state *st; int len = count * sizeof(*sp); int offp, i; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1529,52 +1082,42 @@ pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } sa = (struct pfsync_del_c *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { sp = &sa[i]; - bcopy(&sp->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = sp->creatorid; - - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } - SET(st->state_flags, PFSTATE_NOSYNC); - pf_unlink_state(st); + st->state_flags |= PFSTATE_NOSYNC; + pf_unlink_state(st, PF_ENTER_LOCKED); } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif struct pfsync_bus *bus; struct mbuf *mp; int len = count * sizeof(*bus); int offp; + PFSYNC_BLOCK(sc); + /* If we're not waiting for a bulk update, who cares. */ - if (sc->sc_ureq_sent == 0) + if (sc->sc_ureq_sent == 0) { + PFSYNC_BUNLOCK(sc); return (len); + } mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { + PFSYNC_BUNLOCK(sc); V_pfsyncstats.pfsyncs_badlen++; return (-1); } @@ -1582,23 +1125,12 @@ pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) switch (bus->status) { case PFSYNC_BUS_START: -#ifdef __FreeBSD__ callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + - V_pf_pool_limits[PF_LIMIT_STATES].limit / + V_pf_limits[PF_LIMIT_STATES].limit / ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / sizeof(struct pfsync_state)), - pfsync_bulk_fail, V_pfsyncif); -#else - timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + - pf_pool_limits[PF_LIMIT_STATES].limit / - ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / - sizeof(struct pfsync_state))); -#endif -#ifdef __FreeBSD__ + pfsync_bulk_fail, sc); if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync: received bulk update start\n"); break; @@ -1608,42 +1140,27 @@ pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) /* that's it, we're happy */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; - timeout_del(&sc->sc_bulkfail_tmo); -#ifdef __FreeBSD__ - if (!sc->pfsync_sync_ok && carp_demote_adj_p) + callout_stop(&sc->sc_bulkfail_tmo); + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk done"); - sc->pfsync_sync_ok = 1; -#else -#if NCARP > 0 - if (!pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, -1); -#endif - pfsync_sync_ok = 1; -#endif -#ifdef __FreeBSD__ + sc->sc_flags |= PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync: received valid " "bulk update end\n"); } else { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync: received invalid " "bulk update end: bad timestamp\n"); } break; } + PFSYNC_BUNLOCK(sc); return (len); } -int +static int pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { int len = count * sizeof(struct pfsync_tdb); @@ -1662,16 +1179,8 @@ pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } tp = (struct pfsync_tdb *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) pfsync_update_net_tdb(&tp[i]); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); #endif return (len); @@ -1679,7 +1188,7 @@ pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) #if defined(IPSEC) /* Update an in-kernel tdb. Silently fail if no tdb is found. */ -void +static void pfsync_update_net_tdb(struct pfsync_tdb *pt) { struct tdb *tdb; @@ -1688,34 +1197,27 @@ pfsync_update_net_tdb(struct pfsync_tdb *pt) /* check for invalid values */ if (ntohl(pt->spi) <= SPI_RESERVED_MAX || (pt->dst.sa.sa_family != AF_INET && - pt->dst.sa.sa_family != AF_INET6)) + pt->dst.sa.sa_family != AF_INET6)) goto bad; - s = spltdb(); tdb = gettdb(pt->spi, &pt->dst, pt->sproto); if (tdb) { pt->rpl = ntohl(pt->rpl); - pt->cur_bytes = betoh64(pt->cur_bytes); + pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); /* Neither replay nor byte counter should ever decrease. */ if (pt->rpl < tdb->tdb_rpl || pt->cur_bytes < tdb->tdb_cur_bytes) { - splx(s); goto bad; } tdb->tdb_rpl = pt->rpl; tdb->tdb_cur_bytes = pt->cur_bytes; } - splx(s); return; bad: -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " "invalid value\n"); V_pfsyncstats.pfsyncs_badstate++; @@ -1724,7 +1226,7 @@ bad: #endif -int +static int pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { /* check if we are at the right place in the packet */ @@ -1736,7 +1238,7 @@ pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) return (-1); } -int +static int pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { V_pfsyncstats.pfsyncs_badact++; @@ -1745,51 +1247,31 @@ pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) return (-1); } -int +static int pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, -#ifdef __FreeBSD__ struct route *rt) -#else - struct rtentry *rt) -#endif { m_freem(m); return (0); } /* ARGSUSED */ -int +static int pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { -#ifndef __FreeBSD__ - struct proc *p = curproc; -#endif struct pfsync_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; - struct ip_moptions *imo = &sc->sc_imo; struct pfsyncreq pfsyncr; - struct ifnet *sifp; - struct ip *ip; - int s, error; + int error; switch (cmd) { -#if 0 - case SIOCSIFADDR: - case SIOCAIFADDR: - case SIOCSIFDSTADDR: -#endif case SIOCSIFFLAGS: -#ifdef __FreeBSD__ + PFSYNC_LOCK(sc); if (ifp->if_flags & IFF_UP) ifp->if_drv_flags |= IFF_DRV_RUNNING; else ifp->if_drv_flags &= ~IFF_DRV_RUNNING; -#else - if (ifp->if_flags & IFF_UP) - ifp->if_flags |= IFF_RUNNING; - else - ifp->if_flags &= ~IFF_RUNNING; -#endif + PFSYNC_UNLOCK(sc); break; case SIOCSIFMTU: if (!sc->sc_sync_if || @@ -1797,201 +1279,128 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ifr->ifr_mtu > sc->sc_sync_if->if_mtu) return (EINVAL); if (ifr->ifr_mtu < ifp->if_mtu) { - s = splnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - pfsync_sendout(); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); + PFSYNC_LOCK(sc); + if (sc->sc_len > PFSYNC_MINPKT) + pfsync_sendout(1); + PFSYNC_UNLOCK(sc); } ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); + PFSYNC_LOCK(sc); if (sc->sc_sync_if) { strlcpy(pfsyncr.pfsyncr_syncdev, sc->sc_sync_if->if_xname, IFNAMSIZ); } pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; - pfsyncr.pfsyncr_defer = sc->sc_defer; + pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == + (sc->sc_flags & PFSYNCF_DEFER)); + PFSYNC_UNLOCK(sc); return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); case SIOCSETPFSYNC: -#ifdef __FreeBSD__ + { + struct ip_moptions *imo = &sc->sc_imo; + struct ifnet *sifp; + struct ip *ip; + void *mship = NULL; + if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) -#else - if ((error = suser(p, p->p_acflag)) != 0) -#endif return (error); if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) return (error); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif + if (pfsyncr.pfsyncr_maxupdates > 255) + return (EINVAL); + + if (pfsyncr.pfsyncr_syncdev[0] == 0) + sifp = NULL; + else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) + return (EINVAL); + + if (pfsyncr.pfsyncr_syncpeer.s_addr == 0 && sifp != NULL) + mship = malloc((sizeof(struct in_multi *) * + IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); + + PFSYNC_LOCK(sc); if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) -#ifdef __FreeBSD__ sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); -#else - sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; -#endif else sc->sc_sync_peer.s_addr = pfsyncr.pfsyncr_syncpeer.s_addr; - if (pfsyncr.pfsyncr_maxupdates > 255) -#ifdef __FreeBSD__ - { - PF_UNLOCK(); -#endif - return (EINVAL); -#ifdef __FreeBSD__ - } -#endif sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; - sc->sc_defer = pfsyncr.pfsyncr_defer; + if (pfsyncr.pfsyncr_defer) { + sc->sc_flags |= PFSYNCF_DEFER; + pfsync_defer_ptr = pfsync_defer; + } else { + sc->sc_flags &= ~PFSYNCF_DEFER; + pfsync_defer_ptr = NULL; + } - if (pfsyncr.pfsyncr_syncdev[0] == 0) { + if (sifp == NULL) { + if (sc->sc_sync_if) + if_rele(sc->sc_sync_if); sc->sc_sync_if = NULL; -#ifdef __FreeBSD__ - PF_UNLOCK(); if (imo->imo_membership) pfsync_multicast_cleanup(sc); -#else - if (imo->imo_num_memberships > 0) { - in_delmulti(imo->imo_membership[ - --imo->imo_num_memberships]); - imo->imo_multicast_ifp = NULL; - } -#endif + PFSYNC_UNLOCK(sc); break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) - return (EINVAL); - -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - s = splnet(); -#ifdef __FreeBSD__ - if (sifp->if_mtu < sc->sc_ifp->if_mtu || -#else - if (sifp->if_mtu < sc->sc_if.if_mtu || -#endif + if (sc->sc_len > PFSYNC_MINPKT && + (sifp->if_mtu < sc->sc_ifp->if_mtu || (sc->sc_sync_if != NULL && sifp->if_mtu < sc->sc_sync_if->if_mtu) || - sifp->if_mtu < MCLBYTES - sizeof(struct ip)) - pfsync_sendout(); - sc->sc_sync_if = sifp; + sifp->if_mtu < MCLBYTES - sizeof(struct ip))) + pfsync_sendout(1); -#ifdef __FreeBSD__ - if (imo->imo_membership) { - PF_UNLOCK(); + if (imo->imo_membership) pfsync_multicast_cleanup(sc); - PF_LOCK(); - } -#else - if (imo->imo_num_memberships > 0) { - in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); - imo->imo_multicast_ifp = NULL; - } -#endif -#ifdef __FreeBSD__ - if (sc->sc_sync_if && - sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { - PF_UNLOCK(); - error = pfsync_multicast_setup(sc); - if (error) + if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { + error = pfsync_multicast_setup(sc, sifp, mship); + if (error) { + if_rele(sifp); + free(mship, M_PFSYNC); return (error); - PF_LOCK(); - } -#else - if (sc->sc_sync_if && - sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { - struct in_addr addr; - - if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { - sc->sc_sync_if = NULL; - splx(s); - return (EADDRNOTAVAIL); } - - addr.s_addr = INADDR_PFSYNC_GROUP; - - if ((imo->imo_membership[0] = - in_addmulti(&addr, sc->sc_sync_if)) == NULL) { - sc->sc_sync_if = NULL; - splx(s); - return (ENOBUFS); - } - imo->imo_num_memberships++; - imo->imo_multicast_ifp = sc->sc_sync_if; - imo->imo_multicast_ttl = PFSYNC_DFLTTL; - imo->imo_multicast_loop = 0; } -#endif /* !__FreeBSD__ */ + if (sc->sc_sync_if) + if_rele(sc->sc_sync_if); + sc->sc_sync_if = sifp; ip = &sc->sc_template; bzero(ip, sizeof(*ip)); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(sc->sc_template) >> 2; ip->ip_tos = IPTOS_LOWDELAY; - /* len and id are set later */ -#ifdef __FreeBSD__ + /* len and id are set later. */ ip->ip_off = IP_DF; -#else - ip->ip_off = htons(IP_DF); -#endif ip->ip_ttl = PFSYNC_DFLTTL; ip->ip_p = IPPROTO_PFSYNC; ip->ip_src.s_addr = INADDR_ANY; ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; - if (sc->sc_sync_if) { - /* Request a full state table update. */ - sc->sc_ureq_sent = time_uptime; -#ifdef __FreeBSD__ - if (sc->pfsync_sync_ok && carp_demote_adj_p) - (*carp_demote_adj_p)(V_pfsync_carp_adj, - "pfsync bulk start"); - sc->pfsync_sync_ok = 0; -#else -#if NCARP > 0 - if (pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, 1); -#endif - pfsync_sync_ok = 0; -#endif -#ifdef __FreeBSD__ - if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif - printf("pfsync: requesting bulk update\n"); -#ifdef __FreeBSD__ - callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, - pfsync_bulk_fail, V_pfsyncif); -#else - timeout_add_sec(&sc->sc_bulkfail_tmo, 5); -#endif - pfsync_request_update(0, 0); - } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); + /* Request a full state table update. */ + if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + (*carp_demote_adj_p)(V_pfsync_carp_adj, + "pfsync bulk start"); + sc->sc_flags &= ~PFSYNCF_OK; + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: requesting bulk update\n"); + pfsync_request_update(0, 0); + PFSYNC_UNLOCK(sc); + PFSYNC_BLOCK(sc); + sc->sc_ureq_sent = time_uptime; + callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, + sc); + PFSYNC_BUNLOCK(sc); break; - + } default: return (ENOTTY); } @@ -1999,7 +1408,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (0); } -int +static int pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset) { struct pfsync_state *sp = (struct pfsync_state *)(m->m_data + offset); @@ -2009,7 +1418,7 @@ pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset) return (sizeof(*sp)); } -int +static int pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset) { struct pfsync_ins_ack *iack = @@ -2021,7 +1430,7 @@ pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset) return (sizeof(*iack)); } -int +static int pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset) { struct pfsync_upd_c *up = (struct pfsync_upd_c *)(m->m_data + offset); @@ -2036,7 +1445,7 @@ pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset) return (sizeof(*up)); } -int +static int pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset) { struct pfsync_del_c *dp = (struct pfsync_del_c *)(m->m_data + offset); @@ -2044,139 +1453,71 @@ pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset) dp->id = st->id; dp->creatorid = st->creatorid; - SET(st->state_flags, PFSTATE_NOSYNC); + st->state_flags |= PFSTATE_NOSYNC; return (sizeof(*dp)); } -void +static void pfsync_drop(struct pfsync_softc *sc) { - struct pf_state *st; + struct pf_state *st, *next; struct pfsync_upd_req_item *ur; -#ifdef notyet - struct tdb *t; -#endif int q; for (q = 0; q < PFSYNC_S_COUNT; q++) { if (TAILQ_EMPTY(&sc->sc_qs[q])) continue; - TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { -#ifdef PFSYNC_DEBUG -#ifdef __FreeBSD__ + TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { KASSERT(st->sync_state == q, ("%s: st->sync_state == q", - __FUNCTION__)); -#else - KASSERT(st->sync_state == q); -#endif -#endif + __func__)); st->sync_state = PFSYNC_S_NONE; + pf_release_state(st); } TAILQ_INIT(&sc->sc_qs[q]); } while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); - pool_put(&sc->sc_pool, ur); + free(ur, M_PFSYNC); } sc->sc_plus = NULL; - -#ifdef notyet - if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { - TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) - CLR(t->tdb_flags, TDBF_PFSYNC); - - TAILQ_INIT(&sc->sc_tdb_q); - } -#endif - sc->sc_len = PFSYNC_MINPKT; } -#ifdef __FreeBSD__ -void pfsync_sendout() -{ - pfsync_sendout1(1); -} - static void -pfsync_sendout1(int schedswi) +pfsync_sendout(int schedswi) { struct pfsync_softc *sc = V_pfsyncif; -#else -void -pfsync_sendout(void) -{ - struct pfsync_softc *sc = pfsyncif; -#endif -#if NBPFILTER > 0 -#ifdef __FreeBSD__ struct ifnet *ifp = sc->sc_ifp; -#else - struct ifnet *ifp = &sc->sc_if; -#endif -#endif struct mbuf *m; struct ip *ip; struct pfsync_header *ph; struct pfsync_subheader *subh; - struct pf_state *st; + struct pf_state *st, *next; struct pfsync_upd_req_item *ur; -#ifdef notyet - struct tdb *t; -#endif int offset; int q, count = 0; -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_NET); -#endif - - if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) - return; + KASSERT(sc != NULL, ("%s: null sc", __func__)); + KASSERT(sc->sc_len > PFSYNC_MINPKT, + ("%s: sc_len %zu", __func__, sc->sc_len)); + PFSYNC_LOCK_ASSERT(sc); -#if NBPFILTER > 0 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { -#else - if (sc->sc_sync_if == NULL) { -#endif pfsync_drop(sc); return; } -#ifdef __FreeBSD__ m = m_get2(M_NOWAIT, MT_DATA, M_PKTHDR, max_linkhdr + sc->sc_len); if (m == NULL) { sc->sc_ifp->if_oerrors++; V_pfsyncstats.pfsyncs_onomem++; return; } -#else - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == NULL) { - sc->sc_if.if_oerrors++; - pfsyncstats.pfsyncs_onomem++; - pfsync_drop(sc); - return; - } - - if (max_linkhdr + sc->sc_len > MHLEN) { - MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); - if (!ISSET(m->m_flags, M_EXT)) { - m_free(m); - sc->sc_if.if_oerrors++; - pfsyncstats.pfsyncs_onomem++; - pfsync_drop(sc); - return; - } - } -#endif m->m_data += max_linkhdr; m->m_len = m->m_pkthdr.len = sc->sc_len; @@ -2185,11 +1526,7 @@ pfsync_sendout(void) bcopy(&sc->sc_template, ip, sizeof(*ip)); offset = sizeof(*ip); -#ifdef __FreeBSD__ ip->ip_len = m->m_pkthdr.len; -#else - ip->ip_len = htons(m->m_pkthdr.len); -#endif ip->ip_id = htons(ip_randomid()); /* build the pfsync header */ @@ -2199,11 +1536,7 @@ pfsync_sendout(void) ph->version = PFSYNC_VERSION; ph->len = htons(sc->sc_len - sizeof(*ip)); -#ifdef __FreeBSD__ bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); -#else - bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); -#endif /* walk the queues */ for (q = 0; q < PFSYNC_S_COUNT; q++) { @@ -2214,19 +1547,17 @@ pfsync_sendout(void) offset += sizeof(*subh); count = 0; - TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { -#ifdef PFSYNC_DEBUG -#ifdef __FreeBSD__ + TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { KASSERT(st->sync_state == q, ("%s: st->sync_state == q", - __FUNCTION__)); -#else - KASSERT(st->sync_state == q); -#endif -#endif - + __func__)); + /* + * XXXGL: some of write methods do unlocked reads + * of state data :( + */ offset += pfsync_qs[q].write(st, m, offset); st->sync_state = PFSYNC_S_NONE; + pf_release_state(st); count++; } TAILQ_INIT(&sc->sc_qs[q]); @@ -2234,6 +1565,7 @@ pfsync_sendout(void) bzero(subh, sizeof(*subh)); subh->action = pfsync_qs[q].action; subh->count = htons(count); + V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; } if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { @@ -2247,15 +1579,14 @@ pfsync_sendout(void) bcopy(&ur->ur_msg, m->m_data + offset, sizeof(ur->ur_msg)); offset += sizeof(ur->ur_msg); - - pool_put(&sc->sc_pool, ur); - + free(ur, M_PFSYNC); count++; } bzero(subh, sizeof(*subh)); subh->action = PFSYNC_ACT_UPD_REQ; subh->count = htons(count); + V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; } /* has someone built a custom region for us to add? */ @@ -2266,45 +1597,21 @@ pfsync_sendout(void) sc->sc_plus = NULL; } -#ifdef notyet - if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { - subh = (struct pfsync_subheader *)(m->m_data + offset); - offset += sizeof(*subh); - - count = 0; - TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { - offset += pfsync_out_tdb(t, m, offset); - CLR(t->tdb_flags, TDBF_PFSYNC); - - count++; - } - TAILQ_INIT(&sc->sc_tdb_q); - - bzero(subh, sizeof(*subh)); - subh->action = PFSYNC_ACT_TDB; - subh->count = htons(count); - } -#endif - subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); bzero(subh, sizeof(*subh)); subh->action = PFSYNC_ACT_EOF; subh->count = htons(1); + V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; /* XXX write checksum in EOF here */ /* we're done, let's put it on the wire */ -#if NBPFILTER > 0 if (ifp->if_bpf) { m->m_data += sizeof(*ip); m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); -#ifdef __FreeBSD__ BPF_MTAP(ifp, m); -#else - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); -#endif m->m_data -= sizeof(*ip); m->m_len = m->m_pkthdr.len = sc->sc_len; } @@ -2314,9 +1621,7 @@ pfsync_sendout(void) m_freem(m); return; } -#endif -#ifdef __FreeBSD__ sc->sc_ifp->if_opackets++; sc->sc_ifp->if_obytes += m->m_pkthdr.len; sc->sc_len = PFSYNC_MINPKT; @@ -2325,241 +1630,170 @@ pfsync_sendout(void) _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); else { m_freem(m); - sc->sc_ifp->if_snd.ifq_drops++; + sc->sc_ifp->if_snd.ifq_drops++; } if (schedswi) swi_sched(V_pfsync_swi_cookie, 0); -#else - sc->sc_if.if_opackets++; - sc->sc_if.if_obytes += m->m_pkthdr.len; - - if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0) - pfsyncstats.pfsyncs_opackets++; - else - pfsyncstats.pfsyncs_oerrors++; - - /* start again */ - sc->sc_len = PFSYNC_MINPKT; -#endif } -void +static void pfsync_insert_state(struct pf_state *st) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif + if (st->state_flags & PFSTATE_NOSYNC) + return; - if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || + if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { - SET(st->state_flags, PFSTATE_NOSYNC); + st->state_flags |= PFSTATE_NOSYNC; return; } - if (sc == NULL || ISSET(st->state_flags, PFSTATE_NOSYNC)) - return; - -#ifdef PFSYNC_DEBUG -#ifdef __FreeBSD__ KASSERT(st->sync_state == PFSYNC_S_NONE, - ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__)); -#else - KASSERT(st->sync_state == PFSYNC_S_NONE); -#endif -#endif + ("%s: st->sync_state == PFSYNC_S_NONE", __func__)); + PFSYNC_LOCK(sc); if (sc->sc_len == PFSYNC_MINPKT) -#ifdef __FreeBSD__ - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, - V_pfsyncif); -#else - timeout_add_sec(&sc->sc_tmo, 1); -#endif + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); pfsync_q_ins(st, PFSYNC_S_INS); + PFSYNC_UNLOCK(sc); st->sync_updates = 0; } -int defer = 10; - -int +static int pfsync_defer(struct pf_state *st, struct mbuf *m) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif struct pfsync_deferral *pd; -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif + if (m->m_flags & (M_BCAST|M_MCAST)) + return (0); + + PFSYNC_LOCK(sc); - if (!sc->sc_defer || m->m_flags & (M_BCAST|M_MCAST)) + if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || + !(sc->sc_flags & PFSYNCF_DEFER)) { + PFSYNC_UNLOCK(sc); return (0); + } - if (sc->sc_deferred >= 128) + if (sc->sc_deferred >= 128) pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); - pd = pool_get(&sc->sc_pool, M_NOWAIT); + pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); if (pd == NULL) return (0); sc->sc_deferred++; -#ifdef __FreeBSD__ m->m_flags |= M_SKIP_FIREWALL; -#else - m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; -#endif - SET(st->state_flags, PFSTATE_ACK); + st->state_flags |= PFSTATE_ACK; + pd->pd_sc = sc; + pd->pd_refs = 0; pd->pd_st = st; + pf_ref_state(st); pd->pd_m = m; TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); -#ifdef __FreeBSD__ - callout_init(&pd->pd_tmo, CALLOUT_MPSAFE); - callout_reset(&pd->pd_tmo, defer, pfsync_defer_tmo, - pd); -#else - timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd); - timeout_add(&pd->pd_tmo, defer); -#endif + callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); + callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); - swi_sched(V_pfsync_swi_cookie, 0); + pfsync_push(sc); return (1); } -void +static void pfsync_undefer(struct pfsync_deferral *pd, int drop) { -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - int s; + struct pfsync_softc *sc = pd->pd_sc; + struct mbuf *m = pd->pd_m; + struct pf_state *st = pd->pd_st; -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif + PFSYNC_LOCK_ASSERT(sc); TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); sc->sc_deferred--; + pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ + free(pd, M_PFSYNC); + pf_release_state(st); - CLR(pd->pd_st->state_flags, PFSTATE_ACK); - timeout_del(&pd->pd_tmo); /* bah */ if (drop) - m_freem(pd->pd_m); + m_freem(m); else { - s = splnet(); -#ifdef __FreeBSD__ - /* XXX: use pf_defered?! */ - PF_UNLOCK(); -#endif - ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0, - (void *)NULL, (void *)NULL); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - splx(s); + _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); + pfsync_push(sc); } - - pool_put(&sc->sc_pool, pd); } -void +static void pfsync_defer_tmo(void *arg) { -#if defined(__FreeBSD__) && defined(VIMAGE) struct pfsync_deferral *pd = arg; -#endif - int s; + struct pfsync_softc *sc = pd->pd_sc; + struct mbuf *m = pd->pd_m; + struct pf_state *st = pd->pd_st; + + PFSYNC_LOCK_ASSERT(sc); + + CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); + + TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); + sc->sc_deferred--; + pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ + if (pd->pd_refs == 0) + free(pd, M_PFSYNC); + PFSYNC_UNLOCK(sc); + + ip_output(m, NULL, NULL, 0, NULL, NULL); + + pf_release_state(st); - s = splsoftnet(); -#ifdef __FreeBSD__ - CURVNET_SET(pd->pd_m->m_pkthdr.rcvif->if_vnet); /* XXX */ - PF_LOCK(); -#endif - pfsync_undefer(arg, 0); -#ifdef __FreeBSD__ - PF_UNLOCK(); CURVNET_RESTORE(); -#endif - splx(s); } -void -pfsync_deferred(struct pf_state *st, int drop) +static void +pfsync_undefer_state(struct pf_state *st, int drop) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif struct pfsync_deferral *pd; + PFSYNC_LOCK_ASSERT(sc); + TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { if (pd->pd_st == st) { - pfsync_undefer(pd, drop); + if (callout_stop(&pd->pd_tmo)) + pfsync_undefer(pd, drop); return; } } - panic("pfsync_send_deferred: unable to find deferred state"); + panic("%s: unable to find deferred state", __func__); } -u_int pfsync_upds = 0; - -void +static void pfsync_update_state(struct pf_state *st) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif int sync = 0; -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif + PF_STATE_LOCK_ASSERT(st); + PFSYNC_LOCK(sc); - if (sc == NULL) - return; - - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 0); - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->state_flags & PFSTATE_ACK) + pfsync_undefer_state(st, 0); + if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st); + PFSYNC_UNLOCK(sc); return; } if (sc->sc_len == PFSYNC_MINPKT) -#ifdef __FreeBSD__ - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, - V_pfsyncif); -#else - timeout_add_sec(&sc->sc_tmo, 1); -#endif + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); switch (st->sync_state) { case PFSYNC_S_UPD_C: @@ -2582,40 +1816,31 @@ pfsync_update_state(struct pf_state *st) break; default: - panic("pfsync_update_state: unexpected sync state %d", - st->sync_state); + panic("%s: unexpected sync state %d", __func__, st->sync_state); } - if (sync || (time_uptime - st->pfsync_time) < 2) { - pfsync_upds++; - schednetisr(NETISR_PFSYNC); - } + if (sync || (time_uptime - st->pfsync_time) < 2) + pfsync_push(sc); + + PFSYNC_UNLOCK(sc); } -void +static void pfsync_request_update(u_int32_t creatorid, u_int64_t id) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif struct pfsync_upd_req_item *item; size_t nlen = sizeof(struct pfsync_upd_req); - int s; - PF_LOCK_ASSERT(); + PFSYNC_LOCK_ASSERT(sc); /* - * this code does nothing to prevent multiple update requests for the + * This code does nothing to prevent multiple update requests for the * same state being generated. */ - - item = pool_get(&sc->sc_pool, PR_NOWAIT); - if (item == NULL) { - /* XXX stats */ - return; - } + item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); + if (item == NULL) + return; /* XXX stats */ item->ur_msg.id = id; item->ur_msg.creatorid = creatorid; @@ -2623,14 +1848,8 @@ pfsync_request_update(u_int32_t creatorid, u_int64_t id) if (TAILQ_EMPTY(&sc->sc_upd_req_list)) nlen += sizeof(struct pfsync_subheader); -#ifdef __FreeBSD__ if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { -#else - if (sc->sc_len + nlen > sc->sc_if.if_mtu) { -#endif - s = splnet(); - pfsync_sendout(); - splx(s); + pfsync_sendout(1); nlen = sizeof(struct pfsync_subheader) + sizeof(struct pfsync_upd_req); @@ -2639,26 +1858,21 @@ pfsync_request_update(u_int32_t creatorid, u_int64_t id) TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); sc->sc_len += nlen; - schednetisr(NETISR_PFSYNC); + pfsync_push(sc); } -void +static void pfsync_update_state_req(struct pf_state *st) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - - PF_LOCK_ASSERT(); - if (sc == NULL) - panic("pfsync_update_state_req: nonexistant instance"); + PF_STATE_LOCK_ASSERT(st); + PFSYNC_LOCK(sc); - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st); + PFSYNC_UNLOCK(sc); return; } @@ -2668,60 +1882,45 @@ pfsync_update_state_req(struct pf_state *st) pfsync_q_del(st); case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD); - schednetisr(NETISR_PFSYNC); - return; + pfsync_push(sc); + break; case PFSYNC_S_INS: case PFSYNC_S_UPD: case PFSYNC_S_DEL: /* we're already handling it */ - return; + break; default: - panic("pfsync_update_state_req: unexpected sync state %d", - st->sync_state); + panic("%s: unexpected sync state %d", __func__, st->sync_state); } + + PFSYNC_UNLOCK(sc); } -void +static void pfsync_delete_state(struct pf_state *st) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif - - if (sc == NULL) - return; - - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 1); - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + PFSYNC_LOCK(sc); + if (st->state_flags & PFSTATE_ACK) + pfsync_undefer_state(st, 1); + if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st); + PFSYNC_UNLOCK(sc); return; } if (sc->sc_len == PFSYNC_MINPKT) -#ifdef __FreeBSD__ - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, - V_pfsyncif); -#else - timeout_add_sec(&sc->sc_tmo, 1); -#endif + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); switch (st->sync_state) { case PFSYNC_S_INS: - /* we never got to tell the world so just forget about it */ + /* We never got to tell the world so just forget about it. */ pfsync_q_del(st); - return; + break; case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: @@ -2731,87 +1930,55 @@ pfsync_delete_state(struct pf_state *st) case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_DEL); - return; + break; default: - panic("pfsync_delete_state: unexpected sync state %d", - st->sync_state); + panic("%s: unexpected sync state %d", __func__, st->sync_state); } + PFSYNC_UNLOCK(sc); } -void +static void pfsync_clear_states(u_int32_t creatorid, const char *ifname) { + struct pfsync_softc *sc = V_pfsyncif; struct { struct pfsync_subheader subh; struct pfsync_clr clr; } __packed r; -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif - - if (sc == NULL) - return; - bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_CLR; r.subh.count = htons(1); + V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); r.clr.creatorid = creatorid; + PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); + PFSYNC_UNLOCK(sc); } -void +static void pfsync_q_ins(struct pf_state *st, int q) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif size_t nlen = pfsync_qs[q].len; - int s; - PF_LOCK_ASSERT(); + PFSYNC_LOCK_ASSERT(sc); -#ifdef __FreeBSD__ KASSERT(st->sync_state == PFSYNC_S_NONE, - ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__)); -#else - KASSERT(st->sync_state == PFSYNC_S_NONE); -#endif + ("%s: st->sync_state == PFSYNC_S_NONE", __func__)); + KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", + sc->sc_len)); -#if 1 || defined(PFSYNC_DEBUG) - if (sc->sc_len < PFSYNC_MINPKT) -#ifdef __FreeBSD__ - panic("pfsync pkt len is too low %zu", sc->sc_len); -#else - panic("pfsync pkt len is too low %d", sc->sc_len); -#endif -#endif if (TAILQ_EMPTY(&sc->sc_qs[q])) nlen += sizeof(struct pfsync_subheader); -#ifdef __FreeBSD__ if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { -#else - if (sc->sc_len + nlen > sc->sc_if.if_mtu) { -#endif - s = splnet(); - pfsync_sendout(); - splx(s); + pfsync_sendout(1); nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; } @@ -2819,234 +1986,112 @@ pfsync_q_ins(struct pf_state *st, int q) sc->sc_len += nlen; TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); st->sync_state = q; + pf_ref_state(st); } -void +static void pfsync_q_del(struct pf_state *st) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif int q = st->sync_state; -#ifdef __FreeBSD__ + PFSYNC_LOCK_ASSERT(sc); KASSERT(st->sync_state != PFSYNC_S_NONE, - ("%s: st->sync_state != PFSYNC_S_NONE", __FUNCTION__)); -#else - KASSERT(st->sync_state != PFSYNC_S_NONE); -#endif + ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); sc->sc_len -= pfsync_qs[q].len; TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); st->sync_state = PFSYNC_S_NONE; + pf_release_state(st); if (TAILQ_EMPTY(&sc->sc_qs[q])) sc->sc_len -= sizeof(struct pfsync_subheader); } -#ifdef notyet -void -pfsync_update_tdb(struct tdb *t, int output) -{ -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - size_t nlen = sizeof(struct pfsync_tdb); - int s; - - if (sc == NULL) - return; - - if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { - if (TAILQ_EMPTY(&sc->sc_tdb_q)) - nlen += sizeof(struct pfsync_subheader); - - if (sc->sc_len + nlen > sc->sc_if.if_mtu) { - s = splnet(); - PF_LOCK(); - pfsync_sendout(); - PF_UNLOCK(); - splx(s); - - nlen = sizeof(struct pfsync_subheader) + - sizeof(struct pfsync_tdb); - } - - sc->sc_len += nlen; - TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); - SET(t->tdb_flags, TDBF_PFSYNC); - t->tdb_updates = 0; - } else { - if (++t->tdb_updates >= sc->sc_maxupdates) - schednetisr(NETISR_PFSYNC); - } - - if (output) - SET(t->tdb_flags, TDBF_PFSYNC_RPL); - else - CLR(t->tdb_flags, TDBF_PFSYNC_RPL); -} - -void -pfsync_delete_tdb(struct tdb *t) -{ -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - - if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) - return; - - sc->sc_len -= sizeof(struct pfsync_tdb); - TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); - CLR(t->tdb_flags, TDBF_PFSYNC); - - if (TAILQ_EMPTY(&sc->sc_tdb_q)) - sc->sc_len -= sizeof(struct pfsync_subheader); -} - -int -pfsync_out_tdb(struct tdb *t, struct mbuf *m, int offset) -{ - struct pfsync_tdb *ut = (struct pfsync_tdb *)(m->m_data + offset); - - bzero(ut, sizeof(*ut)); - ut->spi = t->tdb_spi; - bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); - /* - * When a failover happens, the master's rpl is probably above - * what we see here (we may be up to a second late), so - * increase it a bit for outbound tdbs to manage most such - * situations. - * - * For now, just add an offset that is likely to be larger - * than the number of packets we can see in one second. The RFC - * just says the next packet must have a higher seq value. - * - * XXX What is a good algorithm for this? We could use - * a rate-determined increase, but to know it, we would have - * to extend struct tdb. - * XXX pt->rpl can wrap over MAXINT, but if so the real tdb - * will soon be replaced anyway. For now, just don't handle - * this edge case. - */ -#define RPL_INCR 16384 - ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? - RPL_INCR : 0)); - ut->cur_bytes = htobe64(t->tdb_cur_bytes); - ut->sproto = t->tdb_sproto; - - return (sizeof(*ut)); -} -#endif - -void +static void pfsync_bulk_start(void) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync: received bulk update request\n"); -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); - if (TAILQ_EMPTY(&V_state_list)) -#else - if (TAILQ_EMPTY(&state_list)) -#endif - pfsync_bulk_status(PFSYNC_BUS_END); - else { - sc->sc_ureq_received = time_uptime; - if (sc->sc_bulk_next == NULL) -#ifdef __FreeBSD__ - sc->sc_bulk_next = TAILQ_FIRST(&V_state_list); -#else - sc->sc_bulk_next = TAILQ_FIRST(&state_list); -#endif - sc->sc_bulk_last = sc->sc_bulk_next; + PFSYNC_BLOCK(sc); - pfsync_bulk_status(PFSYNC_BUS_START); - callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); - } + sc->sc_ureq_received = time_uptime; + sc->sc_bulk_hashid = 0; + sc->sc_bulk_stateid = 0; + pfsync_bulk_status(PFSYNC_BUS_START); + callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); + PFSYNC_BUNLOCK(sc); } -void +static void pfsync_bulk_update(void *arg) { struct pfsync_softc *sc = arg; - struct pf_state *st = sc->sc_bulk_next; - int i = 0; - int s; + struct pf_state *s; + int i, sent = 0; - PF_LOCK_ASSERT(); - - s = splsoftnet(); -#ifdef __FreeBSD__ + PFSYNC_BLOCK_ASSERT(sc); CURVNET_SET(sc->sc_ifp->if_vnet); -#endif - for (;;) { - if (st->sync_state == PFSYNC_S_NONE && - st->timeout < PFTM_MAX && - st->pfsync_time <= sc->sc_ureq_received) { - pfsync_update_state_req(st); - i++; - } - st = TAILQ_NEXT(st, entry_list); - if (st == NULL) -#ifdef __FreeBSD__ - st = TAILQ_FIRST(&V_state_list); -#else - st = TAILQ_FIRST(&state_list); -#endif + /* + * Start with last state from previous invocation. + * It may had gone, in this case start from the + * hash slot. + */ + s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); - if (st == sc->sc_bulk_last) { - /* we're done */ - sc->sc_bulk_next = NULL; - sc->sc_bulk_last = NULL; - pfsync_bulk_status(PFSYNC_BUS_END); - break; + if (s != NULL) + i = PF_IDHASH(s); + else + i = sc->sc_bulk_hashid; + + for (; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + + if (s != NULL) + PF_HASHROW_ASSERT(ih); + else { + PF_HASHROW_LOCK(ih); + s = LIST_FIRST(&ih->states); } -#ifdef __FreeBSD__ - if (i > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < -#else - if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < -#endif - sizeof(struct pfsync_state)) { - /* we've filled a packet */ - sc->sc_bulk_next = st; -#ifdef __FreeBSD__ - callout_reset(&sc->sc_bulk_tmo, 1, - pfsync_bulk_update, sc); -#else - timeout_add(&sc->sc_bulk_tmo, 1); -#endif - break; + for (; s; s = LIST_NEXT(s, entry)) { + + if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < + sizeof(struct pfsync_state)) { + /* We've filled a packet. */ + sc->sc_bulk_hashid = i; + sc->sc_bulk_stateid = s->id; + sc->sc_bulk_creatorid = s->creatorid; + PF_HASHROW_UNLOCK(ih); + callout_reset(&sc->sc_bulk_tmo, 1, + pfsync_bulk_update, sc); + goto full; + } + + if (s->sync_state == PFSYNC_S_NONE && + s->timeout < PFTM_MAX && + s->pfsync_time <= sc->sc_ureq_received) { + PFSYNC_LOCK(sc); + pfsync_update_state_req(s); + PFSYNC_UNLOCK(sc); + sent++; + } } + PF_HASHROW_UNLOCK(ih); } -#ifdef __FreeBSD__ + /* We're done. */ + pfsync_bulk_status(PFSYNC_BUS_END); + +full: CURVNET_RESTORE(); -#endif - splx(s); } -void +static void pfsync_bulk_status(u_int8_t status) { struct { @@ -3054,268 +2099,151 @@ pfsync_bulk_status(u_int8_t status) struct pfsync_bus bus; } __packed r; -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - - PF_LOCK_ASSERT(); bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_BUS; r.subh.count = htons(1); + V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; -#ifdef __FreeBSD__ r.bus.creatorid = V_pf_status.hostid; -#else - r.bus.creatorid = pf_status.hostid; -#endif r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); r.bus.status = status; + PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); + PFSYNC_UNLOCK(sc); } -void +static void pfsync_bulk_fail(void *arg) { struct pfsync_softc *sc = arg; -#ifdef __FreeBSD__ CURVNET_SET(sc->sc_ifp->if_vnet); -#endif + + PFSYNC_BLOCK_ASSERT(sc); if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { /* Try again */ -#ifdef __FreeBSD__ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, V_pfsyncif); -#else - timeout_add_sec(&sc->sc_bulkfail_tmo, 5); -#endif - PF_LOCK(); + PFSYNC_LOCK(sc); pfsync_request_update(0, 0); - PF_UNLOCK(); + PFSYNC_UNLOCK(sc); } else { - /* Pretend like the transfer was ok */ + /* Pretend like the transfer was ok. */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; -#ifdef __FreeBSD__ - if (!sc->pfsync_sync_ok && carp_demote_adj_p) + PFSYNC_LOCK(sc); + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk fail"); - sc->pfsync_sync_ok = 1; -#else -#if NCARP > 0 - if (!pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, -1); -#endif - pfsync_sync_ok = 1; -#endif -#ifdef __FreeBSD__ + sc->sc_flags |= PFSYNCF_OK; + PFSYNC_UNLOCK(sc); if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync: failed to receive bulk update\n"); } -#ifdef __FreeBSD__ CURVNET_RESTORE(); -#endif } -void +static void pfsync_send_plus(void *plus, size_t pluslen) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - int s; - PF_LOCK_ASSERT(); + PFSYNC_LOCK_ASSERT(sc); -#ifdef __FreeBSD__ - if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) { -#else - if (sc->sc_len + pluslen > sc->sc_if.if_mtu) { -#endif - s = splnet(); - pfsync_sendout(); - splx(s); - } + if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) + pfsync_sendout(1); sc->sc_plus = plus; sc->sc_len += (sc->sc_pluslen = pluslen); - s = splnet(); - pfsync_sendout(); - splx(s); -} - -int -pfsync_up(void) -{ -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - -#ifdef __FreeBSD__ - if (sc == NULL || !ISSET(sc->sc_ifp->if_flags, IFF_DRV_RUNNING)) -#else - if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) -#endif - return (0); - - return (1); -} - -int -pfsync_state_in_use(struct pf_state *st) -{ -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - - if (sc == NULL) - return (0); - - if (st->sync_state != PFSYNC_S_NONE || - st == sc->sc_bulk_next || - st == sc->sc_bulk_last) - return (1); - - return (0); + pfsync_sendout(1); } -u_int pfsync_ints; -u_int pfsync_tmos; - -void +static void pfsync_timeout(void *arg) { -#if defined(__FreeBSD__) && defined(VIMAGE) struct pfsync_softc *sc = arg; -#endif - int s; -#ifdef __FreeBSD__ CURVNET_SET(sc->sc_ifp->if_vnet); -#endif + PFSYNC_LOCK(sc); + pfsync_push(sc); + PFSYNC_UNLOCK(sc); + CURVNET_RESTORE(); +} - pfsync_tmos++; +static void +pfsync_push(struct pfsync_softc *sc) +{ - s = splnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - pfsync_sendout(); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); + PFSYNC_LOCK_ASSERT(sc); -#ifdef __FreeBSD__ - CURVNET_RESTORE(); -#endif + sc->sc_flags |= PFSYNCF_PUSH; + swi_sched(V_pfsync_swi_cookie, 0); } -/* this is a softnet/netisr handler */ -void -#ifdef __FreeBSD__ +static void pfsyncintr(void *arg) { struct pfsync_softc *sc = arg; struct mbuf *m, *n; CURVNET_SET(sc->sc_ifp->if_vnet); - pfsync_ints++; - PF_LOCK(); - if (sc->sc_len > PFSYNC_MINPKT) - pfsync_sendout1(0); + PFSYNC_LOCK(sc); + if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { + pfsync_sendout(0); + sc->sc_flags &= ~PFSYNCF_PUSH; + } _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); - PF_UNLOCK(); + PFSYNC_UNLOCK(sc); for (; m != NULL; m = n) { n = m->m_nextpkt; m->m_nextpkt = NULL; - if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) - == 0) + + /* + * We distinguish between a deferral packet and our + * own pfsync packet based on M_SKIP_FIREWALL + * flag. This is XXX. + */ + if (m->m_flags & M_SKIP_FIREWALL) + ip_output(m, NULL, NULL, 0, NULL, NULL); + else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, + NULL) == 0) V_pfsyncstats.pfsyncs_opackets++; else V_pfsyncstats.pfsyncs_oerrors++; } CURVNET_RESTORE(); } -#else -pfsyncintr(void) -{ - int s; - - pfsync_ints++; - - s = splnet(); - pfsync_sendout(); - splx(s); -} -#endif -int -pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, - size_t newlen) -{ - -#ifdef notyet - /* All sysctl names at this level are terminal. */ - if (namelen != 1) - return (ENOTDIR); - - switch (name[0]) { - case PFSYNCCTL_STATS: - if (newp != NULL) - return (EPERM); - return (sysctl_struct(oldp, oldlenp, newp, newlen, - &V_pfsyncstats, sizeof(V_pfsyncstats))); - } -#endif - return (ENOPROTOOPT); -} - -#ifdef __FreeBSD__ static int -pfsync_multicast_setup(struct pfsync_softc *sc) +pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) { struct ip_moptions *imo = &sc->sc_imo; int error; - if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { - sc->sc_sync_if = NULL; + if (!(ifp->if_flags & IFF_MULTICAST)) return (EADDRNOTAVAIL); - } - imo->imo_membership = (struct in_multi **)malloc( - (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC, - M_WAITOK | M_ZERO); + imo->imo_membership = (struct in_multi **)mship; imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; imo->imo_multicast_vif = -1; - if ((error = in_joingroup(sc->sc_sync_if, &sc->sc_sync_peer, NULL, + if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, &imo->imo_membership[0])) != 0) { - free(imo->imo_membership, M_PFSYNC); + imo->imo_membership = NULL; return (error); } imo->imo_num_memberships++; - imo->imo_multicast_ifp = sc->sc_sync_if; + imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; @@ -3377,16 +2305,14 @@ pfsync_init() goto fail; } #endif - PF_LOCK(); + PF_RULES_WLOCK(); pfsync_state_import_ptr = pfsync_state_import; - pfsync_up_ptr = pfsync_up; pfsync_insert_state_ptr = pfsync_insert_state; pfsync_update_state_ptr = pfsync_update_state; pfsync_delete_state_ptr = pfsync_delete_state; pfsync_clear_states_ptr = pfsync_clear_states; - pfsync_state_in_use_ptr = pfsync_state_in_use; pfsync_defer_ptr = pfsync_defer; - PF_UNLOCK(); + PF_RULES_WUNLOCK(); return (0); @@ -3411,24 +2337,22 @@ pfsync_uninit() { VNET_ITERATOR_DECL(vnet_iter); - PF_LOCK(); + PF_RULES_WLOCK(); pfsync_state_import_ptr = NULL; - pfsync_up_ptr = NULL; pfsync_insert_state_ptr = NULL; pfsync_update_state_ptr = NULL; pfsync_delete_state_ptr = NULL; pfsync_clear_states_ptr = NULL; - pfsync_state_in_use_ptr = NULL; pfsync_defer_ptr = NULL; - PF_UNLOCK(); + PF_RULES_WUNLOCK(); ipproto_unregister(IPPROTO_PFSYNC); pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - swi_remove(V_pfsync_swi_cookie); if_clone_detach(&V_pfsync_cloner); + swi_remove(V_pfsync_swi_cookie); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); @@ -3471,4 +2395,3 @@ static moduledata_t pfsync_mod = { DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); MODULE_VERSION(pfsync, PFSYNC_MODVER); MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); -#endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/if_pfsync.h b/sys/contrib/pf/net/if_pfsync.h index 3f34038..5e71dd8 100644 --- a/sys/contrib/pf/net/if_pfsync.h +++ b/sys/contrib/pf/net/if_pfsync.h @@ -182,7 +182,7 @@ struct pfsync_del_c { u_int32_t creatorid; } __packed; -/* +/* * INS_F, DEL_F */ @@ -256,6 +256,9 @@ struct pfsyncstats { u_int64_t pfsyncs_opackets6; /* total output packets, IPv6 */ u_int64_t pfsyncs_onomem; /* no memory for an mbuf */ u_int64_t pfsyncs_oerrors; /* ip output error */ + + u_int64_t pfsyncs_iacts[PFSYNC_ACT_MAX]; + u_int64_t pfsyncs_oacts[PFSYNC_ACT_MAX]; }; /* @@ -268,10 +271,8 @@ struct pfsyncreq { int pfsyncr_defer; }; -#ifdef __FreeBSD__ #define SIOCSETPFSYNC _IOW('i', 247, struct ifreq) #define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq) -#endif #ifdef _KERNEL @@ -288,37 +289,10 @@ struct pfsyncreq { #define PFSYNC_S_DEFER 0xfe #define PFSYNC_S_NONE 0xff -#ifdef __FreeBSD__ -void pfsync_input(struct mbuf *, __unused int); -#else -void pfsync_input(struct mbuf *, ...); -#endif -int pfsync_sysctl(int *, u_int, void *, size_t *, - void *, size_t); - #define PFSYNC_SI_IOCTL 0x01 #define PFSYNC_SI_CKSUM 0x02 #define PFSYNC_SI_ACK 0x04 -int pfsync_state_import(struct pfsync_state *, u_int8_t); -#ifndef __FreeBSD__ -void pfsync_state_export(struct pfsync_state *, - struct pf_state *); -#endif - -void pfsync_insert_state(struct pf_state *); -void pfsync_update_state(struct pf_state *); -void pfsync_delete_state(struct pf_state *); -void pfsync_clear_states(u_int32_t, const char *); - -#ifdef notyet -void pfsync_update_tdb(struct tdb *, int); -void pfsync_delete_tdb(struct tdb *); -#endif - -int pfsync_defer(struct pf_state *, struct mbuf *); - -int pfsync_up(void); -int pfsync_state_in_use(struct pf_state *); -#endif + +#endif /* _KERNEL */ #endif /* _NET_IF_PFSYNC_H_ */ diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index ac51282..b66d6dd 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -35,138 +35,77 @@ * */ -#ifdef __FreeBSD__ -#include "opt_inet.h" -#include "opt_inet6.h" - #include <sys/cdefs.h> + __FBSDID("$FreeBSD$"); -#endif -#ifdef __FreeBSD__ +#include "opt_inet.h" +#include "opt_inet6.h" #include "opt_bpf.h" #include "opt_pf.h" -#define NPFSYNC 1 - -#ifdef DEV_PFLOW -#define NPFLOW DEV_PFLOW -#else -#define NPFLOW 0 -#endif - -#else -#include "bpfilter.h" -#include "pflog.h" -#include "pfsync.h" -#include "pflow.h" -#endif - #include <sys/param.h> -#include <sys/systm.h> -#include <sys/mbuf.h> -#include <sys/filio.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/kernel.h> -#include <sys/time.h> -#ifdef __FreeBSD__ -#include <sys/random.h> -#include <sys/sysctl.h> +#include <sys/bus.h> #include <sys/endian.h> -#define betoh64 be64toh -#else -#include <sys/pool.h> -#endif -#include <sys/proc.h> -#ifdef __FreeBSD__ +#include <sys/hash.h> +#include <sys/interrupt.h> +#include <sys/kernel.h> #include <sys/kthread.h> -#include <sys/lock.h> -#include <sys/sx.h> -#else -#include <sys/rwlock.h> -#endif - -#ifdef __FreeBSD__ +#include <sys/limits.h> +#include <sys/mbuf.h> #include <sys/md5.h> -#else -#include <crypto/md5.h> -#endif +#include <sys/random.h> +#include <sys/refcount.h> +#include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/taskqueue.h> +#include <sys/ucred.h> #include <net/if.h> #include <net/if_types.h> -#include <net/bpf.h> #include <net/route.h> -#ifdef __FreeBSD__ -#ifdef RADIX_MPATH -#include <net/radix_mpath.h> -#endif -#else #include <net/radix_mpath.h> -#endif +#include <net/vnet.h> -#include <netinet/in.h> +#include <net/pfvar.h> +#include <net/pf_mtag.h> +#include <net/if_pflog.h> +#include <net/if_pfsync.h> + +#include <netinet/in_pcb.h> #include <netinet/in_var.h> -#include <netinet/in_systm.h> #include <netinet/ip.h> +#include <netinet/ip_fw.h> +#include <netinet/ip_icmp.h> +#include <netinet/icmp_var.h> #include <netinet/ip_var.h> +#include <netinet/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */ #include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> #include <netinet/tcp_seq.h> -#include <netinet/udp.h> -#include <netinet/ip_icmp.h> -#include <netinet/in_pcb.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> +#include <netinet/udp.h> #include <netinet/udp_var.h> -#include <netinet/icmp_var.h> -#include <netinet/if_ether.h> -#ifdef __FreeBSD__ -#include <netinet/ip_fw.h> -#include <netinet/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */ -#endif - -#ifndef __FreeBSD__ -#include <dev/rndvar.h> -#endif -#include <net/pfvar.h> -#include <net/if_pflog.h> -#include <net/if_pflow.h> -#include <net/if_pfsync.h> #ifdef INET6 #include <netinet/ip6.h> -#include <netinet/in_pcb.h> #include <netinet/icmp6.h> #include <netinet6/nd6.h> -#ifdef __FreeBSD__ #include <netinet6/ip6_var.h> #include <netinet6/in6_pcb.h> -#endif #endif /* INET6 */ -#ifdef __FreeBSD__ #include <machine/in_cksum.h> -#include <sys/limits.h> -#include <sys/ucred.h> #include <security/mac/mac_framework.h> -extern int ip_optcopy(struct ip *, struct ip *); -#endif - -#ifdef __FreeBSD__ #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x -#else -#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x -#endif /* * Global variables */ /* state tables */ -#ifdef __FreeBSD__ -VNET_DEFINE(struct pf_state_tree, pf_statetbl); - VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]); VNET_DEFINE(struct pf_palist, pf_pabuf); VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active); @@ -196,221 +135,206 @@ struct pf_anchor_stackframe { VNET_DEFINE(struct pf_anchor_stackframe, pf_anchor_stack[64]); #define V_pf_anchor_stack VNET(pf_anchor_stack) -VNET_DEFINE(uma_zone_t, pf_src_tree_pl); -VNET_DEFINE(uma_zone_t, pf_rule_pl); -VNET_DEFINE(uma_zone_t, pf_pooladdr_pl); -VNET_DEFINE(uma_zone_t, pf_state_pl); -VNET_DEFINE(uma_zone_t, pf_state_key_pl); -VNET_DEFINE(uma_zone_t, pf_state_item_pl); -VNET_DEFINE(uma_zone_t, pf_altq_pl); -#else -struct pf_state_tree pf_statetbl; - -struct pf_altqqueue pf_altqs[2]; -struct pf_palist pf_pabuf; -struct pf_altqqueue *pf_altqs_active; -struct pf_altqqueue *pf_altqs_inactive; -struct pf_status pf_status; +/* + * Queue for pf_intr() sends. + */ +static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations"); +struct pf_send_entry { + STAILQ_ENTRY(pf_send_entry) pfse_next; + struct mbuf *pfse_m; + enum { + PFSE_IP, + PFSE_IP6, + PFSE_ICMP, + PFSE_ICMP6, + } pfse_type; + union { + struct route ro; + struct { + int type; + int code; + int mtu; + } icmpopts; + } u; +#define pfse_ro u.ro +#define pfse_icmp_type u.icmpopts.type +#define pfse_icmp_code u.icmpopts.code +#define pfse_icmp_mtu u.icmpopts.mtu +}; -u_int32_t ticket_altqs_active; -u_int32_t ticket_altqs_inactive; -int altqs_inactive_open; -u_int32_t ticket_pabuf; +STAILQ_HEAD(pf_send_head, pf_send_entry); +static VNET_DEFINE(struct pf_send_head, pf_sendqueue); +#define V_pf_sendqueue VNET(pf_sendqueue) -MD5_CTX pf_tcp_secret_ctx; -u_char pf_tcp_secret[16]; -int pf_tcp_secret_init; -int pf_tcp_iss_off; +static struct mtx pf_sendqueue_mtx; +#define PF_SENDQ_LOCK() mtx_lock(&pf_sendqueue_mtx) +#define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx) -struct pf_anchor_stackframe { - struct pf_ruleset *rs; - struct pf_rule *r; - struct pf_anchor_node *parent; - struct pf_anchor *child; -} pf_anchor_stack[64]; - -struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl; -struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; -struct pool pf_altq_pl; -#endif +/* + * Queue for pf_flush_task() tasks. + */ +struct pf_flush_entry { + SLIST_ENTRY(pf_flush_entry) next; + struct pf_addr addr; + sa_family_t af; + uint8_t dir; + struct pf_rule *rule; /* never dereferenced */ +}; -void pf_init_threshold(struct pf_threshold *, u_int32_t, +SLIST_HEAD(pf_flush_head, pf_flush_entry); +static VNET_DEFINE(struct pf_flush_head, pf_flushqueue); +#define V_pf_flushqueue VNET(pf_flushqueue) +static VNET_DEFINE(struct task, pf_flushtask); +#define V_pf_flushtask VNET(pf_flushtask) + +static struct mtx pf_flushqueue_mtx; +#define PF_FLUSHQ_LOCK() mtx_lock(&pf_flushqueue_mtx) +#define PF_FLUSHQ_UNLOCK() mtx_unlock(&pf_flushqueue_mtx) + +VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules); +struct mtx pf_unlnkdrules_mtx; + +static VNET_DEFINE(uma_zone_t, pf_sources_z); +#define V_pf_sources_z VNET(pf_sources_z) +static VNET_DEFINE(uma_zone_t, pf_mtag_z); +#define V_pf_mtag_z VNET(pf_mtag_z) +VNET_DEFINE(uma_zone_t, pf_state_z); +VNET_DEFINE(uma_zone_t, pf_state_key_z); + +VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]); +#define PFID_CPUBITS 8 +#define PFID_CPUSHIFT (sizeof(uint64_t) * NBBY - PFID_CPUBITS) +#define PFID_CPUMASK ((uint64_t)((1 << PFID_CPUBITS) - 1) << PFID_CPUSHIFT) +#define PFID_MAXID (~PFID_CPUMASK) +CTASSERT((1 << PFID_CPUBITS) > MAXCPU); + +static void pf_src_tree_remove_state(struct pf_state *); +static void pf_init_threshold(struct pf_threshold *, u_int32_t, u_int32_t); -void pf_add_threshold(struct pf_threshold *); -int pf_check_threshold(struct pf_threshold *); +static void pf_add_threshold(struct pf_threshold *); +static int pf_check_threshold(struct pf_threshold *); -void pf_change_ap(struct pf_addr *, u_int16_t *, +static void pf_change_ap(struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); -int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, +static int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *); -#ifdef INET6 -void pf_change_a6(struct pf_addr *, u_int16_t *, - struct pf_addr *, u_int8_t); -#endif /* INET6 */ -void pf_change_icmp(struct pf_addr *, u_int16_t *, +static void pf_change_icmp(struct pf_addr *, u_int16_t *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t *, u_int16_t *, u_int16_t *, u_int16_t *, u_int8_t, sa_family_t); -#ifdef __FreeBSD__ -void pf_send_tcp(struct mbuf *, +static void pf_send_tcp(struct mbuf *, const struct pf_rule *, sa_family_t, -#else -void pf_send_tcp(const struct pf_rule *, sa_family_t, -#endif const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, - u_int16_t, struct ether_header *, struct ifnet *); + u_int16_t, struct ifnet *); static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_rule *); -void pf_detach_state(struct pf_state *); -void pf_state_key_detach(struct pf_state *, int); -u_int32_t pf_tcp_iss(struct pf_pdesc *); -int pf_test_rule(struct pf_rule **, struct pf_state **, +static void pf_detach_state(struct pf_state *); +static int pf_state_key_attach(struct pf_state_key *, + struct pf_state_key *, struct pf_state *); +static void pf_state_key_detach(struct pf_state *, int); +static int pf_state_key_ctor(void *, int, void *, int); +static u_int32_t pf_tcp_iss(struct pf_pdesc *); +static int pf_test_rule(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, - void *, struct pf_pdesc *, struct pf_rule **, -#ifdef __FreeBSD__ - struct pf_ruleset **, struct ifqueue *, - struct inpcb *); -#else - struct pf_ruleset **, struct ifqueue *); -#endif -static __inline int pf_create_state(struct pf_rule *, struct pf_rule *, + struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **, struct inpcb *); +static int pf_create_state(struct pf_rule *, struct pf_rule *, struct pf_rule *, struct pf_pdesc *, struct pf_src_node *, struct pf_state_key *, - struct pf_state_key *, struct pf_state_key *, struct pf_state_key *, struct mbuf *, int, u_int16_t, u_int16_t, int *, struct pfi_kif *, struct pf_state **, int, u_int16_t, u_int16_t, int); -int pf_test_fragment(struct pf_rule **, int, +static int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **); -int pf_tcp_track_full(struct pf_state_peer *, +static int pf_tcp_track_full(struct pf_state_peer *, struct pf_state_peer *, struct pf_state **, struct pfi_kif *, struct mbuf *, int, struct pf_pdesc *, u_short *, int *); -int pf_tcp_track_sloppy(struct pf_state_peer *, +static int pf_tcp_track_sloppy(struct pf_state_peer *, struct pf_state_peer *, struct pf_state **, struct pf_pdesc *, u_short *); -int pf_test_state_tcp(struct pf_state **, int, +static int pf_test_state_tcp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); -int pf_test_state_udp(struct pf_state **, int, +static int pf_test_state_udp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *); -int pf_test_state_icmp(struct pf_state **, int, +static int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); -int pf_test_state_other(struct pf_state **, int, +static int pf_test_state_other(struct pf_state **, int, struct pfi_kif *, struct mbuf *, struct pf_pdesc *); -void pf_route(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *, - struct pf_pdesc *); -void pf_route6(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *, - struct pf_pdesc *); -#ifndef __FreeBSD__ -int pf_socket_lookup(int, struct pf_pdesc *); -#endif -u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, +static u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); -u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, +static u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, sa_family_t); -u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, +static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, u_int16_t); -void pf_set_rt_ifp(struct pf_state *, +static void pf_set_rt_ifp(struct pf_state *, struct pf_addr *); -int pf_check_proto_cksum(struct mbuf *, int, int, +static int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); -#ifndef __FreeBSD__ -struct pf_divert *pf_get_divert(struct mbuf *); -#endif -void pf_print_state_parts(struct pf_state *, +static void pf_print_state_parts(struct pf_state *, struct pf_state_key *, struct pf_state_key *); -int pf_addr_wrap_neq(struct pf_addr_wrap *, +static int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); -int pf_compare_state_keys(struct pf_state_key *, - struct pf_state_key *, struct pfi_kif *, u_int); -#ifdef __FreeBSD__ -struct pf_state *pf_find_state(struct pfi_kif *, - struct pf_state_key_cmp *, u_int, struct mbuf *, - struct pf_mtag *); -#else -struct pf_state *pf_find_state(struct pfi_kif *, - struct pf_state_key_cmp *, u_int, struct mbuf *); -#endif -int pf_src_connlimit(struct pf_state **); -int pf_check_congestion(struct ifqueue *); +static struct pf_state *pf_find_state(struct pfi_kif *, + struct pf_state_key_cmp *, u_int); +static int pf_src_connlimit(struct pf_state **); +static void pf_flush_task(void *c, int pending); +static int pf_insert_src_node(struct pf_src_node **, + struct pf_rule *, struct pf_addr *, sa_family_t); +static int pf_purge_expired_states(int); +static void pf_purge_unlinked_rules(void); +static int pf_mtag_init(void *, int, int); +static void pf_mtag_free(struct m_tag *); +#ifdef INET +static void pf_route(struct mbuf **, struct pf_rule *, int, + struct ifnet *, struct pf_state *, + struct pf_pdesc *); +#endif /* INET */ +#ifdef INET6 +static void pf_change_a6(struct pf_addr *, u_int16_t *, + struct pf_addr *, u_int8_t); +static void pf_route6(struct mbuf **, struct pf_rule *, int, + struct ifnet *, struct pf_state *, + struct pf_pdesc *); +#endif /* INET6 */ -#ifdef __FreeBSD__ int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); VNET_DECLARE(int, pf_end_threads); -VNET_DEFINE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]); -#else -extern struct pool pfr_ktable_pl; -extern struct pool pfr_kentry_pl; - -struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { - { &pf_state_pl, PFSTATE_HIWAT }, - { &pf_src_tree_pl, PFSNODE_HIWAT }, - { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, - { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, - { &pfr_kentry_pl, PFR_KENTRY_HIWAT } -}; -#endif - -#ifdef __FreeBSD__ -#define PPACKET_LOOPED() \ - (pd->pf_mtag->flags & PF_PACKET_LOOPED) +VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); -#define PACKET_LOOPED() \ - (pd.pf_mtag->flags & PF_PACKET_LOOPED) +#define PACKET_LOOPED(pd) ((pd)->pf_mtag && \ + (pd)->pf_mtag->flags & PF_PACKET_LOOPED) -#define STATE_LOOKUP(i, k, d, s, m, pt) \ +#define STATE_LOOKUP(i, k, d, s, pd) \ do { \ - s = pf_find_state(i, k, d, m, pt); \ - if (s == NULL || (s)->timeout == PFTM_PURGE) \ + (s) = pf_find_state((i), (k), (d)); \ + if ((s) == NULL || (s)->timeout == PFTM_PURGE) \ return (PF_DROP); \ - if (PPACKET_LOOPED()) \ - return (PF_PASS); \ - if (d == PF_OUT && \ - (((s)->rule.ptr->rt == PF_ROUTETO && \ - (s)->rule.ptr->direction == PF_OUT) || \ - ((s)->rule.ptr->rt == PF_REPLYTO && \ - (s)->rule.ptr->direction == PF_IN)) && \ - (s)->rt_kif != NULL && \ - (s)->rt_kif != i) \ + if (PACKET_LOOPED(pd)) \ return (PF_PASS); \ - } while (0) -#else -#define STATE_LOOKUP(i, k, d, s, m) \ - do { \ - s = pf_find_state(i, k, d, m); \ - if (s == NULL || (s)->timeout == PFTM_PURGE) \ - return (PF_DROP); \ - if (d == PF_OUT && \ + if ((d) == PF_OUT && \ (((s)->rule.ptr->rt == PF_ROUTETO && \ (s)->rule.ptr->direction == PF_OUT) || \ ((s)->rule.ptr->rt == PF_REPLYTO && \ (s)->rule.ptr->direction == PF_IN)) && \ (s)->rt_kif != NULL && \ - (s)->rt_kif != i) \ + (s)->rt_kif != (i)) \ return (PF_PASS); \ } while (0) -#endif -#ifdef __FreeBSD__ #define BOUND_IFACE(r, k) \ ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all -#else -#define BOUND_IFACE(r, k) \ - ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all -#endif #define STATE_INC_COUNTERS(s) \ do { \ @@ -435,71 +359,40 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { s->rule.ptr->states_cur--; \ } while (0) -static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); -static __inline int pf_state_compare_key(struct pf_state_key *, - struct pf_state_key *); -static __inline int pf_state_compare_id(struct pf_state *, - struct pf_state *); +static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures"); +VNET_DEFINE(struct pf_keyhash *, pf_keyhash); +VNET_DEFINE(struct pf_idhash *, pf_idhash); +VNET_DEFINE(u_long, pf_hashmask); +VNET_DEFINE(struct pf_srchash *, pf_srchash); +VNET_DEFINE(u_long, pf_srchashmask); -#ifdef __FreeBSD__ -VNET_DEFINE(struct pf_src_tree, tree_src_tracking); +SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)"); -VNET_DEFINE(struct pf_state_tree_id, tree_id); -VNET_DEFINE(struct pf_state_queue, state_list); -#else -struct pf_src_tree tree_src_tracking; +VNET_DEFINE(u_long, pf_hashsize); +#define V_pf_hashsize VNET(pf_hashsize) +SYSCTL_VNET_UINT(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN, + &VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable"); -struct pf_state_tree_id tree_id; -struct pf_state_queue state_list; -#endif +VNET_DEFINE(u_long, pf_srchashsize); +#define V_pf_srchashsize VNET(pf_srchashsize) +SYSCTL_VNET_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN, + &VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable"); -RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); -RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); -RB_GENERATE(pf_state_tree_id, pf_state, - entry_id, pf_state_compare_id); +VNET_DEFINE(void *, pf_swi_cookie); -static __inline int -pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) +VNET_DEFINE(uint32_t, pf_hashseed); +#define V_pf_hashseed VNET(pf_hashseed) + +static __inline uint32_t +pf_hashkey(struct pf_state_key *sk) { - int diff; + uint32_t h; - if (a->rule.ptr > b->rule.ptr) - return (1); - if (a->rule.ptr < b->rule.ptr) - return (-1); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#ifdef INET - case AF_INET: - if (a->addr.addr32[0] > b->addr.addr32[0]) - return (1); - if (a->addr.addr32[0] < b->addr.addr32[0]) - return (-1); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (a->addr.addr32[3] > b->addr.addr32[3]) - return (1); - if (a->addr.addr32[3] < b->addr.addr32[3]) - return (-1); - if (a->addr.addr32[2] > b->addr.addr32[2]) - return (1); - if (a->addr.addr32[2] < b->addr.addr32[2]) - return (-1); - if (a->addr.addr32[1] > b->addr.addr32[1]) - return (1); - if (a->addr.addr32[1] < b->addr.addr32[1]) - return (-1); - if (a->addr.addr32[0] > b->addr.addr32[0]) - return (1); - if (a->addr.addr32[0] < b->addr.addr32[0]) - return (-1); - break; -#endif /* INET6 */ - } - return (0); + h = jenkins_hash32((uint32_t *)sk, + sizeof(struct pf_state_key_cmp)/sizeof(uint32_t), + V_pf_hashseed); + + return (h & V_pf_hashmask); } #ifdef INET6 @@ -522,20 +415,20 @@ pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) } #endif /* INET6 */ -void +static void pf_init_threshold(struct pf_threshold *threshold, u_int32_t limit, u_int32_t seconds) { threshold->limit = limit * PF_THRESHOLD_MULT; threshold->seconds = seconds; threshold->count = 0; - threshold->last = time_second; + threshold->last = time_uptime; } -void +static void pf_add_threshold(struct pf_threshold *threshold) { - u_int32_t t = time_second, diff = t - threshold->last; + u_int32_t t = time_uptime, diff = t - threshold->last; if (diff >= threshold->seconds) threshold->count = 0; @@ -546,17 +439,21 @@ pf_add_threshold(struct pf_threshold *threshold) threshold->last = t; } -int +static int pf_check_threshold(struct pf_threshold *threshold) { return (threshold->count > threshold->limit); } -int +static int pf_src_connlimit(struct pf_state **state) { + struct pfr_addr p; + struct pf_flush_entry *pffe; int bad = 0; + PF_STATE_LOCK_ASSERT(*state); + (*state)->src_node->conn++; (*state)->src.tcp_est = 1; pf_add_threshold(&(*state)->src_node->conn_rate); @@ -564,333 +461,407 @@ pf_src_connlimit(struct pf_state **state) if ((*state)->rule.ptr->max_src_conn && (*state)->rule.ptr->max_src_conn < (*state)->src_node->conn) { -#ifdef __FreeBSD__ V_pf_status.lcounters[LCNT_SRCCONN]++; -#else - pf_status.lcounters[LCNT_SRCCONN]++; -#endif bad++; } if ((*state)->rule.ptr->max_src_conn_rate.limit && pf_check_threshold(&(*state)->src_node->conn_rate)) { -#ifdef __FreeBSD__ V_pf_status.lcounters[LCNT_SRCCONNRATE]++; -#else - pf_status.lcounters[LCNT_SRCCONNRATE]++; -#endif bad++; } if (!bad) return (0); - if ((*state)->rule.ptr->overload_tbl) { - struct pfr_addr p; - u_int32_t killed = 0; + /* Kill this state. */ + (*state)->timeout = PFTM_PURGE; + (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; -#ifdef __FreeBSD__ - V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; - if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif - printf("pf_src_connlimit: blocking address "); - pf_print_host(&(*state)->src_node->addr, 0, - (*state)->key[PF_SK_WIRE]->af); - } + if ((*state)->rule.ptr->overload_tbl == NULL) + return (1); + + V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; + if (V_pf_status.debug >= PF_DEBUG_MISC) { + printf("%s: blocking address ", __func__); + pf_print_host(&(*state)->src_node->addr, 0, + (*state)->key[PF_SK_WIRE]->af); + printf("\n"); + } - bzero(&p, sizeof(p)); - p.pfra_af = (*state)->key[PF_SK_WIRE]->af; - switch ((*state)->key[PF_SK_WIRE]->af) { + bzero(&p, sizeof(p)); + p.pfra_af = (*state)->key[PF_SK_WIRE]->af; + switch ((*state)->key[PF_SK_WIRE]->af) { #ifdef INET - case AF_INET: - p.pfra_net = 32; - p.pfra_ip4addr = (*state)->src_node->addr.v4; - break; + case AF_INET: + p.pfra_net = 32; + p.pfra_ip4addr = (*state)->src_node->addr.v4; + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - p.pfra_net = 128; - p.pfra_ip6addr = (*state)->src_node->addr.v6; - break; + case AF_INET6: + p.pfra_net = 128; + p.pfra_ip6addr = (*state)->src_node->addr.v6; + break; #endif /* INET6 */ - } + } - pfr_insert_kentry((*state)->rule.ptr->overload_tbl, - &p, time_second); + pfr_insert_kentry((*state)->rule.ptr->overload_tbl, &p, time_second); - /* kill existing states if that's required. */ - if ((*state)->rule.ptr->flush) { - struct pf_state_key *sk; - struct pf_state *st; + if ((*state)->rule.ptr->flush == 0) + return (1); -#ifdef __FreeBSD__ - V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; - RB_FOREACH(st, pf_state_tree_id, &V_tree_id) { -#else - pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; - RB_FOREACH(st, pf_state_tree_id, &tree_id) { -#endif - sk = st->key[PF_SK_WIRE]; - /* - * Kill states from this source. (Only those - * from the same rule if PF_FLUSH_GLOBAL is not - * set) - */ - if (sk->af == - (*state)->key[PF_SK_WIRE]->af && - (((*state)->direction == PF_OUT && - PF_AEQ(&(*state)->src_node->addr, - &sk->addr[1], sk->af)) || - ((*state)->direction == PF_IN && - PF_AEQ(&(*state)->src_node->addr, - &sk->addr[0], sk->af))) && - ((*state)->rule.ptr->flush & - PF_FLUSH_GLOBAL || - (*state)->rule.ptr == st->rule.ptr)) { - st->timeout = PFTM_PURGE; - st->src.state = st->dst.state = - TCPS_CLOSED; - killed++; - } + /* Schedule flushing task. */ + pffe = malloc(sizeof(*pffe), M_PFTEMP, M_NOWAIT); + if (pffe == NULL) + return (1); /* too bad :( */ + + bcopy(&(*state)->src_node->addr, &pffe->addr, sizeof(pffe->addr)); + pffe->af = (*state)->key[PF_SK_WIRE]->af; + pffe->dir = (*state)->direction; + if ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL) + pffe->rule = NULL; + else + pffe->rule = (*state)->rule.ptr; + PF_FLUSHQ_LOCK(); + SLIST_INSERT_HEAD(&V_pf_flushqueue, pffe, next); + PF_FLUSHQ_UNLOCK(); + taskqueue_enqueue(taskqueue_swi, &V_pf_flushtask); + + return (1); +} + +static void +pf_flush_task(void *c, int pending) +{ + struct pf_flush_head queue; + struct pf_flush_entry *pffe, *pffe1; + uint32_t killed = 0; + + PF_FLUSHQ_LOCK(); + queue = *(struct pf_flush_head *)c; + SLIST_INIT((struct pf_flush_head *)c); + PF_FLUSHQ_UNLOCK(); + + V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + + for (int i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + struct pf_state_key *sk; + struct pf_state *s; + + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + sk = s->key[PF_SK_WIRE]; + SLIST_FOREACH(pffe, &queue, next) + if (sk->af == pffe->af && (pffe->rule == NULL || + pffe->rule == s->rule.ptr) && + ((pffe->dir == PF_OUT && + PF_AEQ(&pffe->addr, &sk->addr[1], sk->af)) || + (pffe->dir == PF_IN && + PF_AEQ(&pffe->addr, &sk->addr[0], sk->af)))) { + s->timeout = PFTM_PURGE; + s->src.state = s->dst.state = TCPS_CLOSED; + killed++; } -#ifdef __FreeBSD__ - if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif - printf(", %u states killed", killed); } -#ifdef __FreeBSD__ - if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif - printf("\n"); + PF_HASHROW_UNLOCK(ih); } + SLIST_FOREACH_SAFE(pffe, &queue, next, pffe1) + free(pffe, M_PFTEMP); + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: %u states killed", __func__, killed); +} - /* kill this state */ - (*state)->timeout = PFTM_PURGE; - (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; - return (1); +/* + * Can return locked on failure, so that we can consistently + * allocate and insert a new one. + */ +struct pf_src_node * +pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af, + int returnlocked) +{ + struct pf_srchash *sh; + struct pf_src_node *n; + + V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + + sh = &V_pf_srchash[pf_hashsrc(src, af)]; + PF_HASHROW_LOCK(sh); + LIST_FOREACH(n, &sh->nodes, entry) + if (n->rule.ptr == rule && n->af == af && + ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) || + (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0))) + break; + if (n != NULL || returnlocked == 0) + PF_HASHROW_UNLOCK(sh); + + return (n); } -int +static int pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, struct pf_addr *src, sa_family_t af) { - struct pf_src_node k; + + KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK || + rule->rpool.opts & PF_POOL_STICKYADDR), + ("%s for non-tracking rule %p", __func__, rule)); + + if (*sn == NULL) + *sn = pf_find_src_node(src, rule, af, 1); if (*sn == NULL) { - k.af = af; - PF_ACPY(&k.addr, src, af); - if (rule->rule_flag & PFRULE_RULESRCTRACK || - rule->rpool.opts & PF_POOL_STICKYADDR) - k.rule.ptr = rule; - else - k.rule.ptr = NULL; -#ifdef __FreeBSD__ - V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; - *sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k); -#else - pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; - *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); -#endif - } - if (*sn == NULL) { + struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)]; + + PF_HASHROW_ASSERT(sh); + if (!rule->max_src_nodes || rule->src_nodes < rule->max_src_nodes) -#ifdef __FreeBSD__ - (*sn) = pool_get(&V_pf_src_tree_pl, PR_NOWAIT | PR_ZERO); -#else - (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); -#endif + (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO); else -#ifdef __FreeBSD__ V_pf_status.lcounters[LCNT_SRCNODES]++; -#else - pf_status.lcounters[LCNT_SRCNODES]++; -#endif - if ((*sn) == NULL) + if ((*sn) == NULL) { + PF_HASHROW_UNLOCK(sh); return (-1); + } pf_init_threshold(&(*sn)->conn_rate, rule->max_src_conn_rate.limit, rule->max_src_conn_rate.seconds); (*sn)->af = af; - if (rule->rule_flag & PFRULE_RULESRCTRACK || - rule->rpool.opts & PF_POOL_STICKYADDR) - (*sn)->rule.ptr = rule; - else - (*sn)->rule.ptr = NULL; + (*sn)->rule.ptr = rule; PF_ACPY(&(*sn)->addr, src, af); - if (RB_INSERT(pf_src_tree, -#ifdef __FreeBSD__ - &V_tree_src_tracking, *sn) != NULL) { - if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - &tree_src_tracking, *sn) != NULL) { - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif - printf("pf: src_tree insert failed: "); - pf_print_host(&(*sn)->addr, 0, af); - printf("\n"); - } -#ifdef __FreeBSD__ - pool_put(&V_pf_src_tree_pl, *sn); -#else - pool_put(&pf_src_tree_pl, *sn); -#endif - return (-1); - } - (*sn)->creation = time_second; + LIST_INSERT_HEAD(&sh->nodes, *sn, entry); + (*sn)->creation = time_uptime; (*sn)->ruletype = rule->action; if ((*sn)->rule.ptr != NULL) (*sn)->rule.ptr->src_nodes++; -#ifdef __FreeBSD__ + PF_HASHROW_UNLOCK(sh); V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++; V_pf_status.src_nodes++; -#else - pf_status.scounters[SCNT_SRC_NODE_INSERT]++; - pf_status.src_nodes++; -#endif } else { if (rule->max_src_states && (*sn)->states >= rule->max_src_states) { -#ifdef __FreeBSD__ V_pf_status.lcounters[LCNT_SRCSTATES]++; -#else - pf_status.lcounters[LCNT_SRCSTATES]++; -#endif return (-1); } } return (0); } -/* state table stuff */ +static void +pf_remove_src_node(struct pf_src_node *src) +{ + struct pf_srchash *sh; + + sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)]; + PF_HASHROW_LOCK(sh); + LIST_REMOVE(src, entry); + PF_HASHROW_UNLOCK(sh); +} -static __inline int -pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) +/* Data storage structures initialization. */ +void +pf_initialize() { - int diff; + struct pf_keyhash *kh; + struct pf_idhash *ih; + struct pf_srchash *sh; + u_int i; + + TUNABLE_ULONG_FETCH("net.pf.states_hashsize", &V_pf_hashsize); + if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize)) + V_pf_hashsize = PF_HASHSIZ; + TUNABLE_ULONG_FETCH("net.pf.source_nodes_hashsize", &V_pf_srchashsize); + if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize)) + V_pf_srchashsize = PF_HASHSIZ / 4; + + V_pf_hashseed = arc4random(); + + /* States and state keys storage. */ + V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z; + uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT); + + V_pf_state_key_z = uma_zcreate("pf state keys", + sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + V_pf_keyhash = malloc(V_pf_hashsize * sizeof(struct pf_keyhash), + M_PFHASH, M_WAITOK | M_ZERO); + V_pf_idhash = malloc(V_pf_hashsize * sizeof(struct pf_idhash), + M_PFHASH, M_WAITOK | M_ZERO); + V_pf_hashmask = V_pf_hashsize - 1; + for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask; + i++, kh++, ih++) { + mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF); + mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF); + } + + /* Source nodes. */ + V_pf_sources_z = uma_zcreate("pf source nodes", + sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, + 0); + V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z; + uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT); + V_pf_srchash = malloc(V_pf_srchashsize * sizeof(struct pf_srchash), + M_PFHASH, M_WAITOK|M_ZERO); + V_pf_srchashmask = V_pf_srchashsize - 1; + for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) + mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF); + + /* ALTQ */ + TAILQ_INIT(&V_pf_altqs[0]); + TAILQ_INIT(&V_pf_altqs[1]); + TAILQ_INIT(&V_pf_pabuf); + V_pf_altqs_active = &V_pf_altqs[0]; + V_pf_altqs_inactive = &V_pf_altqs[1]; + + /* Mbuf tags */ + V_pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) + + sizeof(struct pf_mtag), NULL, NULL, pf_mtag_init, NULL, + UMA_ALIGN_PTR, 0); + + /* Send & flush queues. */ + STAILQ_INIT(&V_pf_sendqueue); + SLIST_INIT(&V_pf_flushqueue); + TASK_INIT(&V_pf_flushtask, 0, pf_flush_task, &V_pf_flushqueue); + mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF); + mtx_init(&pf_flushqueue_mtx, "pf flush queue", NULL, MTX_DEF); + + /* Unlinked, but may be referenced rules. */ + TAILQ_INIT(&V_pf_unlinked_rules); + mtx_init(&pf_unlnkdrules_mtx, "pf unlinked rules", NULL, MTX_DEF); +} - if ((diff = a->proto - b->proto) != 0) - return (diff); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#ifdef INET - case AF_INET: - if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) - return (1); - if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) - return (-1); - if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) - return (1); - if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) - return (-1); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (a->addr[0].addr32[3] > b->addr[0].addr32[3]) - return (1); - if (a->addr[0].addr32[3] < b->addr[0].addr32[3]) - return (-1); - if (a->addr[1].addr32[3] > b->addr[1].addr32[3]) - return (1); - if (a->addr[1].addr32[3] < b->addr[1].addr32[3]) - return (-1); - if (a->addr[0].addr32[2] > b->addr[0].addr32[2]) - return (1); - if (a->addr[0].addr32[2] < b->addr[0].addr32[2]) - return (-1); - if (a->addr[1].addr32[2] > b->addr[1].addr32[2]) - return (1); - if (a->addr[1].addr32[2] < b->addr[1].addr32[2]) - return (-1); - if (a->addr[0].addr32[1] > b->addr[0].addr32[1]) - return (1); - if (a->addr[0].addr32[1] < b->addr[0].addr32[1]) - return (-1); - if (a->addr[1].addr32[1] > b->addr[1].addr32[1]) - return (1); - if (a->addr[1].addr32[1] < b->addr[1].addr32[1]) - return (-1); - if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) - return (1); - if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) - return (-1); - if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) - return (1); - if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) - return (-1); - break; -#endif /* INET6 */ - } +void +pf_cleanup() +{ + struct pf_keyhash *kh; + struct pf_idhash *ih; + struct pf_srchash *sh; + struct pf_send_entry *pfse, *next; + u_int i; + + for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask; + i++, kh++, ih++) { + KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty", + __func__)); + KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty", + __func__)); + mtx_destroy(&kh->lock); + mtx_destroy(&ih->lock); + } + free(V_pf_keyhash, M_PFHASH); + free(V_pf_idhash, M_PFHASH); + + for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { + KASSERT(LIST_EMPTY(&sh->nodes), + ("%s: source node hash not empty", __func__)); + mtx_destroy(&sh->lock); + } + free(V_pf_srchash, M_PFHASH); + + STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) { + m_freem(pfse->pfse_m); + free(pfse, M_PFTEMP); + } + + mtx_destroy(&pf_sendqueue_mtx); + mtx_destroy(&pf_flushqueue_mtx); + mtx_destroy(&pf_unlnkdrules_mtx); + + uma_zdestroy(V_pf_mtag_z); + uma_zdestroy(V_pf_sources_z); + uma_zdestroy(V_pf_state_z); + uma_zdestroy(V_pf_state_key_z); +} + +static int +pf_mtag_init(void *mem, int size, int how) +{ + struct m_tag *t; - if ((diff = a->port[0] - b->port[0]) != 0) - return (diff); - if ((diff = a->port[1] - b->port[1]) != 0) - return (diff); + t = (struct m_tag *)mem; + t->m_tag_cookie = MTAG_ABI_COMPAT; + t->m_tag_id = PACKET_TAG_PF; + t->m_tag_len = sizeof(struct pf_mtag); + t->m_tag_free = pf_mtag_free; return (0); } -static __inline int -pf_state_compare_id(struct pf_state *a, struct pf_state *b) +static void +pf_mtag_free(struct m_tag *t) { - if (a->id > b->id) - return (1); - if (a->id < b->id) - return (-1); - if (a->creatorid > b->creatorid) - return (1); - if (a->creatorid < b->creatorid) - return (-1); - return (0); + uma_zfree(V_pf_mtag_z, t); } -int -pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) +struct pf_mtag * +pf_get_mtag(struct mbuf *m) { - struct pf_state_item *si; - struct pf_state_key *cur; - struct pf_state *olds = NULL; + struct m_tag *mtag; -#ifdef __FreeBSD__ - KASSERT(s->key[idx] == NULL, ("%s: key is null!", __FUNCTION__)); -#else - KASSERT(s->key[idx] == NULL); /* XXX handle this? */ -#endif + if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL) + return ((struct pf_mtag *)(mtag + 1)); -#ifdef __FreeBSD__ - if ((cur = RB_INSERT(pf_state_tree, &V_pf_statetbl, sk)) != NULL) { -#else - if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { -#endif - /* key exists. check for same kif, if none, add to key */ - TAILQ_FOREACH(si, &cur->states, entry) - if (si->s->kif == s->kif && - si->s->direction == s->direction) { + mtag = uma_zalloc(V_pf_mtag_z, M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, sizeof(struct pf_mtag)); + m_tag_prepend(m, mtag); + + return ((struct pf_mtag *)(mtag + 1)); +} + +static int +pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks, + struct pf_state *s) +{ + struct pf_keyhash *kh; + struct pf_state_key *sk, *cur; + struct pf_state *si, *olds = NULL; + int idx; + + KASSERT(s->refs == 0, ("%s: state not pristine", __func__)); + KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__)); + KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__)); + + /* + * First run: start with wire key. + */ + sk = skw; + idx = PF_SK_WIRE; + +keyattach: + kh = &V_pf_keyhash[pf_hashkey(sk)]; + + PF_HASHROW_LOCK(kh); + LIST_FOREACH(cur, &kh->keys, entry) + if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0) + break; + + if (cur != NULL) { + /* Key exists. Check for same kif, if none, add to key. */ + TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) { + struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)]; + + PF_HASHROW_LOCK(ih); + if (si->kif == s->kif && + si->direction == s->direction) { if (sk->proto == IPPROTO_TCP && - si->s->src.state >= TCPS_FIN_WAIT_2 && - si->s->dst.state >= TCPS_FIN_WAIT_2) { - si->s->src.state = si->s->dst.state = + si->src.state >= TCPS_FIN_WAIT_2 && + si->dst.state >= TCPS_FIN_WAIT_2) { + si->src.state = si->dst.state = TCPS_CLOSED; - /* unlink late or sks can go away */ - olds = si->s; + /* Unlink later or cur can go away. */ + pf_ref_state(si); + olds = si; } else { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: %s key attach " "failed on %s: ", (idx == PF_SK_WIRE) ? @@ -902,375 +873,294 @@ pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) (idx == PF_SK_STACK) ? sk : NULL); printf(", existing: "); - pf_print_state_parts(si->s, + pf_print_state_parts(si, (idx == PF_SK_WIRE) ? sk : NULL, (idx == PF_SK_STACK) ? sk : NULL); printf("\n"); } -#ifdef __FreeBSD__ - pool_put(&V_pf_state_key_pl, sk); -#else - pool_put(&pf_state_key_pl, sk); -#endif + PF_HASHROW_UNLOCK(ih); + PF_HASHROW_UNLOCK(kh); + uma_zfree(V_pf_state_key_z, sk); + if (idx == PF_SK_STACK) + pf_detach_state(s); return (-1); /* collision! */ } } -#ifdef __FreeBSD__ - pool_put(&V_pf_state_key_pl, sk); -#else - pool_put(&pf_state_key_pl, sk); -#endif + PF_HASHROW_UNLOCK(ih); + } + uma_zfree(V_pf_state_key_z, sk); s->key[idx] = cur; - } else + } else { + LIST_INSERT_HEAD(&kh->keys, sk, entry); s->key[idx] = sk; - -#ifdef __FreeBSD__ - if ((si = pool_get(&V_pf_state_item_pl, PR_NOWAIT)) == NULL) { -#else - if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { -#endif - pf_state_key_detach(s, idx); - return (-1); } - si->s = s; - /* list is sorted, if-bound states before floating */ -#ifdef __FreeBSD__ +stateattach: + /* List is sorted, if-bound states before floating. */ if (s->kif == V_pfi_all) -#else - if (s->kif == pfi_all) -#endif - TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); + TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]); else - TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); + TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]); + + /* + * Attach done. See how should we (or should not?) + * attach a second key. + */ + if (sks == skw) { + s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; + idx = PF_SK_STACK; + sks = NULL; + goto stateattach; + } else if (sks != NULL) { + PF_HASHROW_UNLOCK(kh); + if (olds) { + pf_unlink_state(olds, 0); + pf_release_state(olds); + olds = NULL; + } + /* + * Continue attaching with stack key. + */ + sk = sks; + idx = PF_SK_STACK; + sks = NULL; + goto keyattach; + } else + PF_HASHROW_UNLOCK(kh); - if (olds) - pf_unlink_state(olds); + if (olds) { + pf_unlink_state(olds, 0); + pf_release_state(olds); + } + + KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL, + ("%s failure", __func__)); return (0); } -void +static void pf_detach_state(struct pf_state *s) { - if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) - s->key[PF_SK_WIRE] = NULL; - - if (s->key[PF_SK_STACK] != NULL) - pf_state_key_detach(s, PF_SK_STACK); + struct pf_state_key *sks = s->key[PF_SK_STACK]; + struct pf_keyhash *kh; + + if (sks != NULL) { + kh = &V_pf_keyhash[pf_hashkey(sks)]; + PF_HASHROW_LOCK(kh); + if (s->key[PF_SK_STACK] != NULL) + pf_state_key_detach(s, PF_SK_STACK); + /* + * If both point to same key, then we are done. + */ + if (sks == s->key[PF_SK_WIRE]) { + pf_state_key_detach(s, PF_SK_WIRE); + PF_HASHROW_UNLOCK(kh); + return; + } + PF_HASHROW_UNLOCK(kh); + } - if (s->key[PF_SK_WIRE] != NULL) - pf_state_key_detach(s, PF_SK_WIRE); + if (s->key[PF_SK_WIRE] != NULL) { + kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])]; + PF_HASHROW_LOCK(kh); + if (s->key[PF_SK_WIRE] != NULL) + pf_state_key_detach(s, PF_SK_WIRE); + PF_HASHROW_UNLOCK(kh); + } } -void +static void pf_state_key_detach(struct pf_state *s, int idx) { - struct pf_state_item *si; + struct pf_state_key *sk = s->key[idx]; +#ifdef INVARIANTS + struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)]; - si = TAILQ_FIRST(&s->key[idx]->states); - while (si && si->s != s) - si = TAILQ_NEXT(si, entry); - - if (si) { - TAILQ_REMOVE(&s->key[idx]->states, si, entry); -#ifdef __FreeBSD__ - pool_put(&V_pf_state_item_pl, si); -#else - pool_put(&pf_state_item_pl, si); + PF_HASHROW_ASSERT(kh); #endif - } + TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]); + s->key[idx] = NULL; - if (TAILQ_EMPTY(&s->key[idx]->states)) { -#ifdef __FreeBSD__ - RB_REMOVE(pf_state_tree, &V_pf_statetbl, s->key[idx]); -#else - RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]); -#endif - if (s->key[idx]->reverse) - s->key[idx]->reverse->reverse = NULL; -#ifdef __FreeBSD__ - /* XXX: implement this */ -#else - if (s->key[idx]->inp) - s->key[idx]->inp->inp_pf_sk = NULL; -#endif -#ifdef __FreeBSD__ - pool_put(&V_pf_state_key_pl, s->key[idx]); -#else - pool_put(&pf_state_key_pl, s->key[idx]); -#endif + if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) { + LIST_REMOVE(sk, entry); + uma_zfree(V_pf_state_key_z, sk); } - s->key[idx] = NULL; +} + +static int +pf_state_key_ctor(void *mem, int size, void *arg, int flags) +{ + struct pf_state_key *sk = mem; + + bzero(sk, sizeof(struct pf_state_key_cmp)); + TAILQ_INIT(&sk->states[PF_SK_WIRE]); + TAILQ_INIT(&sk->states[PF_SK_STACK]); + + return (0); } struct pf_state_key * -pf_alloc_state_key(int pool_flags) +pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr, + struct pf_addr *daddr, u_int16_t sport, u_int16_t dport) { - struct pf_state_key *sk; + struct pf_state_key *sk; -#ifdef __FreeBSD__ - if ((sk = pool_get(&V_pf_state_key_pl, pool_flags)) == NULL) -#else - if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) -#endif + sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT); + if (sk == NULL) return (NULL); - TAILQ_INIT(&sk->states); + + PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af); + PF_ACPY(&sk->addr[pd->didx], daddr, pd->af); + sk->port[pd->sidx] = sport; + sk->port[pd->didx] = dport; + sk->proto = pd->proto; + sk->af = pd->af; return (sk); } -int -pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr, - struct pf_state_key **skw, struct pf_state_key **sks, - struct pf_state_key **skp, struct pf_state_key **nkp, - struct pf_addr *saddr, struct pf_addr *daddr, - u_int16_t sport, u_int16_t dport) +struct pf_state_key * +pf_state_key_clone(struct pf_state_key *orig) { -#ifdef __FreeBSD__ - KASSERT((*skp == NULL && *nkp == NULL), - ("%s: skp == NULL && nkp == NULL", __FUNCTION__)); -#else - KASSERT((*skp == NULL && *nkp == NULL)); -#endif + struct pf_state_key *sk; - if ((*skp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) - return (ENOMEM); + sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT); + if (sk == NULL) + return (NULL); - PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af); - PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af); - (*skp)->port[pd->sidx] = sport; - (*skp)->port[pd->didx] = dport; - (*skp)->proto = pd->proto; - (*skp)->af = pd->af; - - if (nr != NULL) { - if ((*nkp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) - return (ENOMEM); /* caller must handle cleanup */ - - /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */ - PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af); - PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af); - (*nkp)->port[0] = (*skp)->port[0]; - (*nkp)->port[1] = (*skp)->port[1]; - (*nkp)->proto = pd->proto; - (*nkp)->af = pd->af; - } else - *nkp = *skp; + bcopy(orig, sk, sizeof(struct pf_state_key_cmp)); - if (pd->dir == PF_IN) { - *skw = *skp; - *sks = *nkp; - } else { - *sks = *skp; - *skw = *nkp; - } - return (0); + return (sk); } - int pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, struct pf_state_key *sks, struct pf_state *s) { -#ifndef __FreeBSD__ - splassert(IPL_SOFTNET); -#endif + struct pf_idhash *ih; + struct pf_state *cur; + + KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]), + ("%s: sks not pristine", __func__)); + KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]), + ("%s: skw not pristine", __func__)); + KASSERT(s->refs == 0, ("%s: state not pristine", __func__)); s->kif = kif; - if (skw == sks) { - if (pf_state_key_attach(skw, s, PF_SK_WIRE)) - return (-1); - s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; - } else { - if (pf_state_key_attach(skw, s, PF_SK_WIRE)) { -#ifdef __FreeBSD__ - pool_put(&V_pf_state_key_pl, sks); -#else - pool_put(&pf_state_key_pl, sks); -#endif - return (-1); - } - if (pf_state_key_attach(sks, s, PF_SK_STACK)) { - pf_state_key_detach(s, PF_SK_WIRE); - return (-1); - } - } + if (pf_state_key_attach(skw, sks, s)) + return (-1); if (s->id == 0 && s->creatorid == 0) { -#ifdef __FreeBSD__ - s->id = htobe64(V_pf_status.stateid++); + /* XXX: should be atomic, but probability of collision low */ + if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID) + V_pf_stateid[curcpu] = 1; + s->id |= (uint64_t )curcpu << PFID_CPUSHIFT; + s->id = htobe64(s->id); s->creatorid = V_pf_status.hostid; -#else - s->id = htobe64(pf_status.stateid++); - s->creatorid = pf_status.hostid; -#endif } -#ifdef __FreeBSD__ - if (RB_INSERT(pf_state_tree_id, &V_tree_id, s) != NULL) { + + ih = &V_pf_idhash[PF_IDHASH(s)]; + PF_HASHROW_LOCK(ih); + LIST_FOREACH(cur, &ih->states, entry) + if (cur->id == s->id && cur->creatorid == s->creatorid) + break; + + if (cur != NULL) { + PF_HASHROW_UNLOCK(ih); if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: state insert failed: " "id: %016llx creatorid: %08x", -#ifdef __FreeBSD__ - (unsigned long long)betoh64(s->id), ntohl(s->creatorid)); -#else - betoh64(s->id), ntohl(s->creatorid)); -#endif + (unsigned long long)be64toh(s->id), + ntohl(s->creatorid)); printf("\n"); } pf_detach_state(s); return (-1); } -#ifdef __FreeBSD__ - TAILQ_INSERT_TAIL(&V_state_list, s, entry_list); + LIST_INSERT_HEAD(&ih->states, s, entry); + /* One for keys, one for ID hash. */ + refcount_init(&s->refs, 2); + V_pf_status.fcounters[FCNT_STATE_INSERT]++; - V_pf_status.states++; -#else - TAILQ_INSERT_TAIL(&state_list, s, entry_list); - pf_status.fcounters[FCNT_STATE_INSERT]++; - pf_status.states++; -#endif - pfi_kif_ref(kif, PFI_KIF_REF_STATE); -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_insert_state_ptr != NULL) pfsync_insert_state_ptr(s); -#else - pfsync_insert_state(s); -#endif -#endif + + /* Returns locked. */ return (0); } +/* + * Find state by ID: returns with locked row on success. + */ struct pf_state * -pf_find_state_byid(struct pf_state_cmp *key) +pf_find_state_byid(uint64_t id, uint32_t creatorid) { -#ifdef __FreeBSD__ + struct pf_idhash *ih; + struct pf_state *s; + V_pf_status.fcounters[FCNT_STATE_SEARCH]++; - return (RB_FIND(pf_state_tree_id, &V_tree_id, (struct pf_state *)key)); -#else - pf_status.fcounters[FCNT_STATE_SEARCH]++; + ih = &V_pf_idhash[(be64toh(id) % (V_pf_hashmask + 1))]; - return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); -#endif -} + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) + if (s->id == id && s->creatorid == creatorid) + break; -/* XXX debug function, intended to be removed one day */ -int -pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, - struct pfi_kif *kif, u_int dir) -{ - /* a (from hdr) and b (new) must be exact opposites of each other */ - if (a->af == b->af && a->proto == b->proto && - PF_AEQ(&a->addr[0], &b->addr[1], a->af) && - PF_AEQ(&a->addr[1], &b->addr[0], a->af) && - a->port[0] == b->port[1] && - a->port[1] == b->port[0]) - return (0); - else { - /* mismatch. must not happen. */ - printf("pf: state key linking mismatch! dir=%s, " - "if=%s, stored af=%u, a0: ", - dir == PF_OUT ? "OUT" : "IN", kif->pfik_name, a->af); - pf_print_host(&a->addr[0], a->port[0], a->af); - printf(", a1: "); - pf_print_host(&a->addr[1], a->port[1], a->af); - printf(", proto=%u", a->proto); - printf(", found af=%u, a0: ", b->af); - pf_print_host(&b->addr[0], b->port[0], b->af); - printf(", a1: "); - pf_print_host(&b->addr[1], b->port[1], b->af); - printf(", proto=%u", b->proto); - printf(".\n"); - return (-1); - } + if (s == NULL) + PF_HASHROW_UNLOCK(ih); + + return (s); } -struct pf_state * -#ifdef __FreeBSD__ -pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, - struct mbuf *m, struct pf_mtag *pftag) -#else -pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, - struct mbuf *m) -#endif +/* + * Find state by key. + * Returns with ID hash slot locked on success. + */ +static struct pf_state * +pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir) { + struct pf_keyhash *kh; struct pf_state_key *sk; - struct pf_state_item *si; + struct pf_state *s; + int idx; -#ifdef __FreeBSD__ V_pf_status.fcounters[FCNT_STATE_SEARCH]++; -#else - pf_status.fcounters[FCNT_STATE_SEARCH]++; -#endif -#ifdef __FreeBSD__ - if (dir == PF_OUT && pftag->statekey && - ((struct pf_state_key *)pftag->statekey)->reverse) - sk = ((struct pf_state_key *)pftag->statekey)->reverse; - else { -#ifdef __FreeBSD__ - if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl, -#else - if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, -#endif - (struct pf_state_key *)key)) == NULL) - return (NULL); - if (dir == PF_OUT && pftag->statekey && - pf_compare_state_keys(pftag->statekey, sk, - kif, dir) == 0) { - ((struct pf_state_key *) - pftag->statekey)->reverse = sk; - sk->reverse = pftag->statekey; - } - } -#else - if (dir == PF_OUT && m->m_pkthdr.pf.statekey && - ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) - sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; - else { -#ifdef __FreeBSD__ - if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl, -#else - if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, -#endif - (struct pf_state_key *)key)) == NULL) - return (NULL); - if (dir == PF_OUT && m->m_pkthdr.pf.statekey && - pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk, - kif, dir) == 0) { - ((struct pf_state_key *) - m->m_pkthdr.pf.statekey)->reverse = sk; - sk->reverse = m->m_pkthdr.pf.statekey; - } + kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)]; + + PF_HASHROW_LOCK(kh); + LIST_FOREACH(sk, &kh->keys, entry) + if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0) + break; + if (sk == NULL) { + PF_HASHROW_UNLOCK(kh); + return (NULL); } -#endif - if (dir == PF_OUT) -#ifdef __FreeBSD__ - pftag->statekey = NULL; -#else - m->m_pkthdr.pf.statekey = NULL; -#endif + idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK); - /* list is sorted, if-bound states before floating ones */ - TAILQ_FOREACH(si, &sk->states, entry) -#ifdef __FreeBSD__ - if ((si->s->kif == V_pfi_all || si->s->kif == kif) && -#else - if ((si->s->kif == pfi_all || si->s->kif == kif) && -#endif - sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : - si->s->key[PF_SK_STACK])) - return (si->s); + /* List is sorted, if-bound states before floating ones. */ + TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) + if (s->kif == V_pfi_all || s->kif == kif) { + PF_STATE_LOCK(s); + PF_HASHROW_UNLOCK(kh); + if (s->timeout == PFTM_UNLINKED) { + /* + * State is being processed + * by pf_unlink_state() in + * an other thread. + */ + PF_STATE_UNLOCK(s); + return (NULL); + } + return (s); + } + PF_HASHROW_UNLOCK(kh); return (NULL); } @@ -1278,115 +1168,178 @@ pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, struct pf_state * pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) { + struct pf_keyhash *kh; struct pf_state_key *sk; - struct pf_state_item *si, *ret = NULL; + struct pf_state *s, *ret = NULL; + int idx, inout = 0; -#ifdef __FreeBSD__ V_pf_status.fcounters[FCNT_STATE_SEARCH]++; -#else - pf_status.fcounters[FCNT_STATE_SEARCH]++; -#endif -#ifdef __FreeBSD__ - sk = RB_FIND(pf_state_tree, &V_pf_statetbl, (struct pf_state_key *)key); -#else - sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); -#endif - if (sk != NULL) { - TAILQ_FOREACH(si, &sk->states, entry) - if (dir == PF_INOUT || - (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : - si->s->key[PF_SK_STACK]))) { - if (more == NULL) - return (si->s); - - if (ret) - (*more)++; - else - ret = si; - } + kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)]; + + PF_HASHROW_LOCK(kh); + LIST_FOREACH(sk, &kh->keys, entry) + if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0) + break; + if (sk == NULL) { + PF_HASHROW_UNLOCK(kh); + return (NULL); + } + switch (dir) { + case PF_IN: + idx = PF_SK_WIRE; + break; + case PF_OUT: + idx = PF_SK_STACK; + break; + case PF_INOUT: + idx = PF_SK_WIRE; + inout = 1; + break; + default: + panic("%s: dir %u", __func__, dir); } - return (ret ? ret->s : NULL); +second_run: + TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) { + if (more == NULL) { + PF_HASHROW_UNLOCK(kh); + return (s); + } + + if (ret) + (*more)++; + else + ret = s; + } + if (inout == 1) { + inout = 0; + idx = PF_SK_STACK; + goto second_run; + } + PF_HASHROW_UNLOCK(kh); + + return (ret); } /* END state table stuff */ +static void +pf_send(struct pf_send_entry *pfse) +{ + + PF_SENDQ_LOCK(); + STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next); + PF_SENDQ_UNLOCK(); + swi_sched(V_pf_swi_cookie, 0); +} + +void +pf_intr(void *v) +{ + struct pf_send_head queue; + struct pf_send_entry *pfse, *next; + + CURVNET_SET((struct vnet *)v); + + PF_SENDQ_LOCK(); + queue = V_pf_sendqueue; + STAILQ_INIT(&V_pf_sendqueue); + PF_SENDQ_UNLOCK(); + + STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) { + switch (pfse->pfse_type) { +#ifdef INET + case PFSE_IP: + ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL); + break; + case PFSE_ICMP: + icmp_error(pfse->pfse_m, pfse->pfse_icmp_type, + pfse->pfse_icmp_code, 0, pfse->pfse_icmp_mtu); + break; +#endif /* INET */ +#ifdef INET6 + case PFSE_IP6: + ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL, + NULL); + break; + case PFSE_ICMP6: + icmp6_error(pfse->pfse_m, pfse->pfse_icmp_type, + pfse->pfse_icmp_code, pfse->pfse_icmp_mtu); + break; +#endif /* INET6 */ + default: + panic("%s: unknown type", __func__); + } + free(pfse, M_PFTEMP); + } + CURVNET_RESTORE(); +} void pf_purge_thread(void *v) { - int nloops = 0, s; -#ifdef __FreeBSD__ - int locked; -#endif + int fullrun; CURVNET_SET((struct vnet *)v); for (;;) { - tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); - -#ifdef __FreeBSD__ - sx_slock(&V_pf_consistency_lock); - PF_LOCK(); - locked = 0; + PF_RULES_RLOCK(); + rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10); if (V_pf_end_threads) { - PF_UNLOCK(); - sx_sunlock(&V_pf_consistency_lock); - sx_xlock(&V_pf_consistency_lock); - PF_LOCK(); + /* + * To cleanse up all kifs and rules we need + * two runs: first one clears reference flags, + * then pf_purge_expired_states() doesn't + * raise them, and then second run frees. + */ + PF_RULES_RUNLOCK(); + pf_purge_unlinked_rules(); + pfi_kif_purge(); - pf_purge_expired_states(V_pf_status.states, 1); + /* + * Now purge everything. + */ + pf_purge_expired_states(V_pf_hashmask + 1); pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(1); - V_pf_end_threads++; + pf_purge_expired_src_nodes(); - sx_xunlock(&V_pf_consistency_lock); - PF_UNLOCK(); + /* + * Now all kifs & rules should be unreferenced, + * thus should be successfully freed. + */ + pf_purge_unlinked_rules(); + pfi_kif_purge(); + + /* + * Announce success and exit. + */ + PF_RULES_RLOCK(); + V_pf_end_threads++; + PF_RULES_RUNLOCK(); wakeup(pf_purge_thread); kproc_exit(0); } -#endif - s = splsoftnet(); - - /* process a fraction of the state table every second */ -#ifdef __FreeBSD__ - if (!pf_purge_expired_states(1 + (V_pf_status.states / - V_pf_default_rule.timeout[PFTM_INTERVAL]), 0)) { - PF_UNLOCK(); - sx_sunlock(&V_pf_consistency_lock); - sx_xlock(&V_pf_consistency_lock); - PF_LOCK(); - locked = 1; - - pf_purge_expired_states(1 + (V_pf_status.states / - V_pf_default_rule.timeout[PFTM_INTERVAL]), 1); - } -#else - pf_purge_expired_states(1 + (pf_status.states - / pf_default_rule.timeout[PFTM_INTERVAL])); -#endif + PF_RULES_RUNLOCK(); - /* purge other expired types every PFTM_INTERVAL seconds */ -#ifdef __FreeBSD__ - if (++nloops >= V_pf_default_rule.timeout[PFTM_INTERVAL]) { -#else - if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { -#endif + /* Process 1/interval fraction of the state table every run. */ + fullrun = pf_purge_expired_states(V_pf_hashmask / + (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10)); + + /* Purge other expired types every PFTM_INTERVAL seconds. */ + if (fullrun) { + /* + * Order is important: + * - states and src nodes reference rules + * - states and rules reference kifs + */ pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(0); - nloops = 0; + pf_purge_expired_src_nodes(); + pf_purge_unlinked_rules(); + pfi_kif_purge(); } - - splx(s); -#ifdef __FreeBSD__ - PF_UNLOCK(); - if (locked) - sx_xunlock(&V_pf_consistency_lock); - else - sx_sunlock(&V_pf_consistency_lock); -#endif } + /* not reached */ CURVNET_RESTORE(); } @@ -1400,117 +1353,59 @@ pf_state_expires(const struct pf_state *state) /* handle all PFTM_* > PFTM_MAX here */ if (state->timeout == PFTM_PURGE) - return (time_second); + return (time_uptime); if (state->timeout == PFTM_UNTIL_PACKET) return (0); -#ifdef __FreeBSD__ KASSERT(state->timeout != PFTM_UNLINKED, ("pf_state_expires: timeout == PFTM_UNLINKED")); - KASSERT((state->timeout < PFTM_MAX), + KASSERT((state->timeout < PFTM_MAX), ("pf_state_expires: timeout > PFTM_MAX")); -#else - KASSERT(state->timeout != PFTM_UNLINKED); - KASSERT(state->timeout < PFTM_MAX); -#endif timeout = state->rule.ptr->timeout[state->timeout]; if (!timeout) -#ifdef __FreeBSD__ timeout = V_pf_default_rule.timeout[state->timeout]; -#else - timeout = pf_default_rule.timeout[state->timeout]; -#endif start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; if (start) { end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; - states = state->rule.ptr->states_cur; + states = state->rule.ptr->states_cur; /* XXXGL */ } else { -#ifdef __FreeBSD__ start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START]; end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END]; states = V_pf_status.states; -#else - start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; - end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; - states = pf_status.states; -#endif } if (end && states > start && start < end) { if (states < end) return (state->expire + timeout * (end - states) / (end - start)); else - return (time_second); + return (time_uptime); } return (state->expire + timeout); } -#ifdef __FreeBSD__ -int -pf_purge_expired_src_nodes(int waslocked) -#else void -pf_purge_expired_src_nodes(int waslocked) -#endif +pf_purge_expired_src_nodes() { - struct pf_src_node *cur, *next; - int locked = waslocked; - -#ifdef __FreeBSD__ - for (cur = RB_MIN(pf_src_tree, &V_tree_src_tracking); cur; cur = next) { - next = RB_NEXT(pf_src_tree, &V_tree_src_tracking, cur); -#else - for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { - next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); -#endif + struct pf_srchash *sh; + struct pf_src_node *cur, *next; + int i; - if (cur->states <= 0 && cur->expire <= time_second) { - if (! locked) { -#ifdef __FreeBSD__ - if (!sx_try_upgrade(&V_pf_consistency_lock)) - return (0); -#else - rw_enter_write(&pf_consistency_lock); -#endif - next = RB_NEXT(pf_src_tree, -#ifdef __FreeBSD__ - &V_tree_src_tracking, cur); -#else - &tree_src_tracking, cur); -#endif - locked = 1; - } - if (cur->rule.ptr != NULL) { + for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { + PF_HASHROW_LOCK(sh); + LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next) + if (cur->states <= 0 && cur->expire <= time_uptime) { + if (cur->rule.ptr != NULL) cur->rule.ptr->src_nodes--; - if (cur->rule.ptr->states_cur <= 0 && - cur->rule.ptr->max_src_nodes <= 0) - pf_rm_rule(NULL, cur->rule.ptr); - } -#ifdef __FreeBSD__ - RB_REMOVE(pf_src_tree, &V_tree_src_tracking, cur); + LIST_REMOVE(cur, entry); V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; V_pf_status.src_nodes--; - pool_put(&V_pf_src_tree_pl, cur); -#else - RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, cur); -#endif - } + uma_zfree(V_pf_sources_z, cur); + } else if (cur->rule.ptr != NULL) + cur->rule.ptr->rule_flag |= PFRULE_REFS; + PF_HASHROW_UNLOCK(sh); } - - if (locked && !waslocked) -#ifdef __FreeBSD__ - { - sx_downgrade(&V_pf_consistency_lock); - } - return (1); -#else - rw_exit_write(&pf_consistency_lock); -#endif } -void +static void pf_src_tree_remove_state(struct pf_state *s) { u_int32_t timeout; @@ -1522,12 +1417,8 @@ pf_src_tree_remove_state(struct pf_state *s) timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = -#ifdef __FreeBSD__ V_pf_default_rule.timeout[PFTM_SRC_NODE]; -#else - pf_default_rule.timeout[PFTM_SRC_NODE]; -#endif - s->src_node->expire = time_second + timeout; + s->src_node->expire = time_uptime + timeout; } } if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { @@ -1535,226 +1426,167 @@ pf_src_tree_remove_state(struct pf_state *s) timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = -#ifdef __FreeBSD__ V_pf_default_rule.timeout[PFTM_SRC_NODE]; -#else - pf_default_rule.timeout[PFTM_SRC_NODE]; -#endif - s->nat_src_node->expire = time_second + timeout; + s->nat_src_node->expire = time_uptime + timeout; } } s->src_node = s->nat_src_node = NULL; } -/* callers should be at splsoftnet */ -void -pf_unlink_state(struct pf_state *cur) +/* + * Unlink and potentilly free a state. Function may be + * called with ID hash row locked, but always returns + * unlocked, since it needs to go through key hash locking. + */ +int +pf_unlink_state(struct pf_state *s, u_int flags) { -#ifdef __FreeBSD__ - if (cur->local_flags & PFSTATE_EXPIRING) - return; - cur->local_flags |= PFSTATE_EXPIRING; -#else - splassert(IPL_SOFTNET); -#endif + struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)]; + + if ((flags & PF_ENTER_LOCKED) == 0) + PF_HASHROW_LOCK(ih); + else + PF_HASHROW_ASSERT(ih); - if (cur->src.state == PF_TCPS_PROXY_DST) { + if (s->timeout == PFTM_UNLINKED) { + /* + * State is being processed + * by pf_unlink_state() in + * an other thread. + */ + PF_HASHROW_UNLOCK(ih); + return (0); /* XXXGL: undefined actually */ + } + + s->timeout = PFTM_UNLINKED; + + if (s->src.state == PF_TCPS_PROXY_DST) { /* XXX wire key the right one? */ -#ifdef __FreeBSD__ - pf_send_tcp(NULL, cur->rule.ptr, cur->key[PF_SK_WIRE]->af, -#else - pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, -#endif - &cur->key[PF_SK_WIRE]->addr[1], - &cur->key[PF_SK_WIRE]->addr[0], - cur->key[PF_SK_WIRE]->port[1], - cur->key[PF_SK_WIRE]->port[0], - cur->src.seqhi, cur->src.seqlo + 1, - TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); - } -#ifdef __FreeBSD__ - RB_REMOVE(pf_state_tree_id, &V_tree_id, cur); -#else - RB_REMOVE(pf_state_tree_id, &tree_id, cur); -#endif -#if NPFLOW > 0 - if (cur->state_flags & PFSTATE_PFLOW) -#ifdef __FreeBSD__ - if (export_pflow_ptr != NULL) - export_pflow_ptr(cur); -#else - export_pflow(cur); -#endif -#endif -#if NPFSYNC > 0 -#ifdef __FreeBSD__ + pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af, + &s->key[PF_SK_WIRE]->addr[1], + &s->key[PF_SK_WIRE]->addr[0], + s->key[PF_SK_WIRE]->port[1], + s->key[PF_SK_WIRE]->port[0], + s->src.seqhi, s->src.seqlo + 1, + TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL); + } + + LIST_REMOVE(s, entry); + pf_src_tree_remove_state(s); + PF_HASHROW_UNLOCK(ih); + if (pfsync_delete_state_ptr != NULL) - pfsync_delete_state_ptr(cur); -#else - pfsync_delete_state(cur); -#endif -#endif - cur->timeout = PFTM_UNLINKED; - pf_src_tree_remove_state(cur); - pf_detach_state(cur); + pfsync_delete_state_ptr(s); + + pf_detach_state(s); + refcount_release(&s->refs); + + return (pf_release_state(s)); } -/* callers should be at splsoftnet and hold the - * write_lock on pf_consistency_lock */ void pf_free_state(struct pf_state *cur) { -#ifndef __FreeBSD__ - splassert(IPL_SOFTNET); -#endif -#if NPFSYNC > 0 -#ifdef __FreeBSD__ - if (pfsync_state_in_use_ptr != NULL && - pfsync_state_in_use_ptr(cur)) -#else - if (pfsync_state_in_use(cur)) -#endif - return; -#endif -#ifdef __FreeBSD__ - KASSERT(cur->timeout == PFTM_UNLINKED, - ("pf_free_state: cur->timeout != PFTM_UNLINKED")); -#else - KASSERT(cur->timeout == PFTM_UNLINKED); -#endif - if (--cur->rule.ptr->states_cur <= 0 && - cur->rule.ptr->src_nodes <= 0) - pf_rm_rule(NULL, cur->rule.ptr); + KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur)); + KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__, + cur->timeout)); + --cur->rule.ptr->states_cur; if (cur->nat_rule.ptr != NULL) - if (--cur->nat_rule.ptr->states_cur <= 0 && - cur->nat_rule.ptr->src_nodes <= 0) - pf_rm_rule(NULL, cur->nat_rule.ptr); + --cur->nat_rule.ptr->states_cur; if (cur->anchor.ptr != NULL) - if (--cur->anchor.ptr->states_cur <= 0) - pf_rm_rule(NULL, cur->anchor.ptr); + --cur->anchor.ptr->states_cur; pf_normalize_tcp_cleanup(cur); - pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); -#ifdef __FreeBSD__ - TAILQ_REMOVE(&V_state_list, cur, entry_list); -#else - TAILQ_REMOVE(&state_list, cur, entry_list); -#endif - if (cur->tag) - pf_tag_unref(cur->tag); -#ifdef __FreeBSD__ - pool_put(&V_pf_state_pl, cur); + uma_zfree(V_pf_state_z, cur); V_pf_status.fcounters[FCNT_STATE_REMOVALS]++; - V_pf_status.states--; -#else - pool_put(&pf_state_pl, cur); - pf_status.fcounters[FCNT_STATE_REMOVALS]++; - pf_status.states--; -#endif } -#ifdef __FreeBSD__ -int -pf_purge_expired_states(u_int32_t maxcheck, int waslocked) -#else -void -pf_purge_expired_states(u_int32_t maxcheck) -#endif +/* + * Called only from pf_purge_thread(), thus serialized. + */ +static int +pf_purge_expired_states(int maxcheck) { - static struct pf_state *cur = NULL; - struct pf_state *next; -#ifdef __FreeBSD__ - int locked = waslocked; -#else - int locked = 0; -#endif + static u_int i = 0; - while (maxcheck--) { - /* wrap to start of list when we hit the end */ - if (cur == NULL) { -#ifdef __FreeBSD__ - cur = TAILQ_FIRST(&V_state_list); -#else - cur = TAILQ_FIRST(&state_list); -#endif - if (cur == NULL) - break; /* list empty */ - } + struct pf_idhash *ih; + struct pf_state *s; + int rv = 0; - /* get next state, as cur may get deleted */ - next = TAILQ_NEXT(cur, entry_list); + V_pf_status.states = uma_zone_get_cur(V_pf_state_z); - if (cur->timeout == PFTM_UNLINKED) { - /* free unlinked state */ - if (! locked) { -#ifdef __FreeBSD__ - if (!sx_try_upgrade(&V_pf_consistency_lock)) - return (0); -#else - rw_enter_write(&pf_consistency_lock); -#endif - locked = 1; - } - pf_free_state(cur); - } else if (pf_state_expires(cur) <= time_second) { - /* unlink and free expired state */ - pf_unlink_state(cur); - if (! locked) { -#ifdef __FreeBSD__ - if (!sx_try_upgrade(&V_pf_consistency_lock)) - return (0); -#else - rw_enter_write(&pf_consistency_lock); -#endif - locked = 1; + /* + * Go through hash and unlink states that expire now. + */ + while (maxcheck > 0) { + + /* Wrap to start of hash when we hit the end. */ + if (i > V_pf_hashmask) { + i = 0; + rv = 1; + } + + ih = &V_pf_idhash[i]; +relock: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + if (pf_state_expires(s) <= time_uptime) { + V_pf_status.states -= + pf_unlink_state(s, PF_ENTER_LOCKED); + goto relock; } - pf_free_state(cur); + s->rule.ptr->rule_flag |= PFRULE_REFS; + if (s->nat_rule.ptr != NULL) + s->nat_rule.ptr->rule_flag |= PFRULE_REFS; + if (s->anchor.ptr != NULL) + s->anchor.ptr->rule_flag |= PFRULE_REFS; + s->kif->pfik_flags |= PFI_IFLAG_REFS; + if (s->rt_kif) + s->rt_kif->pfik_flags |= PFI_IFLAG_REFS; } - cur = next; + PF_HASHROW_UNLOCK(ih); + i++; + maxcheck--; } -#ifdef __FreeBSD__ - if (!waslocked && locked) - sx_downgrade(&V_pf_consistency_lock); - - return (1); -#else - if (locked) - rw_exit_write(&pf_consistency_lock); -#endif -} + V_pf_status.states = uma_zone_get_cur(V_pf_state_z); -int -pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) -{ - if (aw->type != PF_ADDR_TABLE) - return (0); - if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) - return (1); - return (0); + return (rv); } -void -pf_tbladdr_remove(struct pf_addr_wrap *aw) +static void +pf_purge_unlinked_rules() { - if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) - return; - pfr_detach_table(aw->p.tbl); - aw->p.tbl = NULL; -} + struct pf_rulequeue tmpq; + struct pf_rule *r, *r1; -void -pf_tbladdr_copyout(struct pf_addr_wrap *aw) -{ - struct pfr_ktable *kt = aw->p.tbl; + /* + * Do naive mark-and-sweep garbage collecting of old rules. + * Reference flag is raised by pf_purge_expired_states() + * and pf_purge_expired_src_nodes(). + * + * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK, + * use a temporary queue. + */ + TAILQ_INIT(&tmpq); + PF_UNLNKDRULES_LOCK(); + TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) { + if (!(r->rule_flag & PFRULE_REFS)) { + TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries); + TAILQ_INSERT_TAIL(&tmpq, r, entries); + } else + r->rule_flag &= ~PFRULE_REFS; + } + PF_UNLNKDRULES_UNLOCK(); - if (aw->type != PF_ADDR_TABLE || kt == NULL) - return; - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) - kt = kt->pfrkt_root; - aw->p.tbl = NULL; - aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? - kt->pfrkt_cnt : -1; + if (!TAILQ_EMPTY(&tmpq)) { + PF_RULES_WLOCK(); + TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) { + TAILQ_REMOVE(&tmpq, r, entries); + pf_free_rule(r); + } + PF_RULES_WUNLOCK(); + } } void @@ -1826,7 +1658,7 @@ pf_print_state(struct pf_state *s) pf_print_state_parts(s, NULL, NULL); } -void +static void pf_print_state_parts(struct pf_state *s, struct pf_state_key *skwp, struct pf_state_key *sksp) { @@ -1979,7 +1811,7 @@ pf_calc_skip_steps(struct pf_rulequeue *rules) PF_SET_SKIP_STEPS(i); } -int +static int pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) { if (aw1->type != aw2->type) @@ -1999,8 +1831,6 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) return (0); case PF_ADDR_TABLE: return (aw1->p.tbl != aw2->p.tbl); - case PF_ADDR_RTLABEL: - return (aw1->v.rtlabel != aw2->v.rtlabel); default: printf("invalid address type: %d\n", aw1->type); return (1); @@ -2022,7 +1852,7 @@ pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) return (l); } -void +static void pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) { @@ -2080,7 +1910,7 @@ pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) } #ifdef INET6 -void +static void pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) { struct pf_addr ao; @@ -2102,7 +1932,7 @@ pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) } #endif /* INET6 */ -void +static void pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) @@ -2195,16 +2025,12 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, * Need to modulate the sequence numbers in the TCP SACK option * (credits to Krzysztof Pfaff for report and patch) */ -int +static int pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, struct tcphdr *th, struct pf_state_peer *dst) { int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; -#ifdef __FreeBSD__ u_int8_t opts[TCP_MAXOLEN], *opt = opts; -#else - u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; -#endif int copyback = 0, i, olen; struct sackblk sack; @@ -2248,60 +2074,32 @@ pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, } if (copyback) -#ifdef __FreeBSD__ m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts); -#else - m_copyback(m, off + sizeof(*th), thoptlen, opts); -#endif return (copyback); } -void -#ifdef __FreeBSD__ +static void pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af, -#else -pf_send_tcp(const struct pf_rule *r, sa_family_t af, -#endif const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, - u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) + u_int16_t rtag, struct ifnet *ifp) { + struct pf_send_entry *pfse; struct mbuf *m; int len, tlen; #ifdef INET - struct ip *h; + struct ip *h = NULL; #endif /* INET */ #ifdef INET6 - struct ip6_hdr *h6; + struct ip6_hdr *h6 = NULL; #endif /* INET6 */ struct tcphdr *th; char *opt; -#ifdef __FreeBSD__ struct pf_mtag *pf_mtag; - KASSERT( -#ifdef INET - af == AF_INET -#else - 0 -#endif - || -#ifdef INET6 - af == AF_INET6 -#else - 0 -#endif - , ("Unsupported AF %d", af)); len = 0; th = NULL; -#ifdef INET - h = NULL; -#endif -#ifdef INET6 - h6 = NULL; -#endif -#endif /* __FreeBSD__ */ /* maximum segment size tcp option */ tlen = sizeof(struct tcphdr); @@ -2319,54 +2117,40 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, len = sizeof(struct ip6_hdr) + tlen; break; #endif /* INET6 */ + default: + panic("%s: unsupported af %d", __func__, af); } - /* create outgoing mbuf */ - m = m_gethdr(M_DONTWAIT, MT_HEADER); - if (m == NULL) + /* Allocate outgoing queue entry, mbuf and mbuf tag. */ + pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); + if (pfse == NULL) return; -#ifdef __FreeBSD__ + m = m_gethdr(M_NOWAIT, MT_HEADER); + if (m == NULL) { + free(pfse, M_PFTEMP); + return; + } #ifdef MAC mac_netinet_firewall_send(m); #endif if ((pf_mtag = pf_get_mtag(m)) == NULL) { + free(pfse, M_PFTEMP); m_freem(m); return; } -#endif if (tag) -#ifdef __FreeBSD__ m->m_flags |= M_SKIP_FIREWALL; pf_mtag->tag = rtag; -#else - m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; - m->m_pkthdr.pf.tag = rtag; -#endif if (r != NULL && r->rtableid >= 0) -#ifdef __FreeBSD__ - { M_SETFIB(m, r->rtableid); - pf_mtag->rtableid = r->rtableid; -#else - m->m_pkthdr.pf.rtableid = r->rtableid; -#endif -#ifdef __FreeBSD__ - } -#endif #ifdef ALTQ if (r != NULL && r->qid) { -#ifdef __FreeBSD__ pf_mtag->qid = r->qid; /* add hints for ecn */ pf_mtag->hdr = mtod(m, struct ip *); -#else - m->m_pkthdr.pf.qid = r->qid; - /* add hints for ecn */ - m->m_pkthdr.pf.hdr = mtod(m, struct ip *); -#endif } #endif /* ALTQ */ m->m_data += max_linkhdr; @@ -2429,53 +2213,12 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, h->ip_v = 4; h->ip_hl = sizeof(*h) >> 2; h->ip_tos = IPTOS_LOWDELAY; -#ifdef __FreeBSD__ h->ip_off = V_path_mtu_discovery ? IP_DF : 0; h->ip_len = len; h->ip_ttl = ttl ? ttl : V_ip_defttl; -#else - h->ip_len = htons(len); - h->ip_off = htons(ip_mtudisc ? IP_DF : 0); - h->ip_ttl = ttl ? ttl : ip_defttl; -#endif h->ip_sum = 0; - if (eh == NULL) { -#ifdef __FreeBSD__ - PF_UNLOCK(); - ip_output(m, (void *)NULL, (void *)NULL, 0, - (void *)NULL, (void *)NULL); - PF_LOCK(); -#else /* ! __FreeBSD__ */ - ip_output(m, (void *)NULL, (void *)NULL, 0, - (void *)NULL, (void *)NULL); -#endif - } else { - struct route ro; - struct rtentry rt; - struct ether_header *e = (void *)ro.ro_dst.sa_data; - if (ifp == NULL) { - m_freem(m); - return; - } - rt.rt_ifp = ifp; - ro.ro_rt = &rt; - ro.ro_dst.sa_len = sizeof(ro.ro_dst); - ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; - bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); - bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); - e->ether_type = eh->ether_type; -#ifdef __FreeBSD__ - PF_UNLOCK(); - /* XXX_IMPORT: later */ - ip_output(m, (void *)NULL, &ro, 0, - (void *)NULL, (void *)NULL); - PF_LOCK(); -#else /* ! __FreeBSD__ */ - ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER, - (void *)NULL, (void *)NULL); -#endif - } + pfse->pfse_type = PFSE_IP; break; #endif /* INET */ #ifdef INET6 @@ -2487,102 +2230,75 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, h6->ip6_vfc |= IPV6_VERSION; h6->ip6_hlim = IPV6_DEFHLIM; -#ifdef __FreeBSD__ - PF_UNLOCK(); - ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); - PF_LOCK(); -#else - ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); -#endif + pfse->pfse_type = PFSE_IP6; break; #endif /* INET6 */ } + pfse->pfse_m = m; + pf_send(pfse); } static void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) { - struct mbuf *m0; -#ifdef __FreeBSD__ -#ifdef INET - struct ip *ip; -#endif + struct pf_send_entry *pfse; + struct mbuf *m0; struct pf_mtag *pf_mtag; -#endif -#ifdef __FreeBSD__ - m0 = m_copypacket(m, M_DONTWAIT); - if (m0 == NULL) + /* Allocate outgoing queue entry, mbuf and mbuf tag. */ + pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); + if (pfse == NULL) return; -#else - if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL) + + if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) { + free(pfse, M_PFTEMP); return; -#endif + } -#ifdef __FreeBSD__ - if ((pf_mtag = pf_get_mtag(m0)) == NULL) + if ((pf_mtag = pf_get_mtag(m0)) == NULL) { + free(pfse, M_PFTEMP); return; + } /* XXX: revisit */ m0->m_flags |= M_SKIP_FIREWALL; -#else - m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; -#endif if (r->rtableid >= 0) -#ifdef __FreeBSD__ - { M_SETFIB(m0, r->rtableid); - pf_mtag->rtableid = r->rtableid; -#else - m0->m_pkthdr.pf.rtableid = r->rtableid; -#endif -#ifdef __FreeBSD__ - } -#endif #ifdef ALTQ if (r->qid) { -#ifdef __FreeBSD__ pf_mtag->qid = r->qid; /* add hints for ecn */ pf_mtag->hdr = mtod(m0, struct ip *); -#else - m0->m_pkthdr.pf.qid = r->qid; - /* add hints for ecn */ - m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); -#endif } #endif /* ALTQ */ switch (af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ + { + struct ip *ip; + /* icmp_error() expects host byte ordering */ ip = mtod(m0, struct ip *); NTOHS(ip->ip_len); NTOHS(ip->ip_off); - PF_UNLOCK(); - icmp_error(m0, type, code, 0, 0); - PF_LOCK(); -#else - icmp_error(m0, type, code, 0, 0); -#endif + + pfse->pfse_type = PFSE_ICMP; break; + } #endif /* INET */ #ifdef INET6 case AF_INET6: -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - icmp6_error(m0, type, code, 0); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif + pfse->pfse_type = PFSE_ICMP6; break; #endif /* INET6 */ } + pfse->pfse_m = m0; + pfse->pfse_icmp_type = type; + pfse->pfse_icmp_code = code; + pf_send(pfse); } /* @@ -2669,7 +2385,7 @@ pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, return (1); } -int +static int pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) { switch (op) { @@ -2704,7 +2420,7 @@ pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) return (pf_match(op, a1, a2, p)); } -int +static int pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) { if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) @@ -2712,7 +2428,7 @@ pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) return (pf_match(op, a1, a2, u)); } -int +static int pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) { if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) @@ -2721,49 +2437,25 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) } int -#ifdef __FreeBSD__ -pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, - struct pf_mtag *pf_mtag) -#else -pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) -#endif +pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag) { if (*tag == -1) -#ifdef __FreeBSD__ - *tag = pf_mtag->tag; -#else - *tag = m->m_pkthdr.pf.tag; -#endif + *tag = mtag; return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } int -#ifdef __FreeBSD__ -pf_tag_packet(struct mbuf *m, int tag, int rtableid, - struct pf_mtag *pf_mtag) -#else -pf_tag_packet(struct mbuf *m, int tag, int rtableid) -#endif +pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag) { - if (tag <= 0 && rtableid < 0) - return (0); - if (tag > 0) -#ifdef __FreeBSD__ - pf_mtag->tag = tag; -#else - m->m_pkthdr.pf.tag = tag; -#endif - if (rtableid >= 0) -#ifdef __FreeBSD__ - { - M_SETFIB(m, rtableid); - } -#else - m->m_pkthdr.pf.rtableid = rtableid; -#endif + KASSERT(tag > 0, ("%s: tag %d", __func__, tag)); + + if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL)) + return (ENOMEM); + + pd->pf_mtag->tag = tag; return (0); } @@ -2774,26 +2466,19 @@ pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, { struct pf_anchor_stackframe *f; + PF_RULES_RASSERT(); + (*r)->anchor->match = 0; if (match) *match = 0; -#ifdef __FreeBSD__ if (*depth >= sizeof(V_pf_anchor_stack) / sizeof(V_pf_anchor_stack[0])) { -#else - if (*depth >= sizeof(pf_anchor_stack) / - sizeof(pf_anchor_stack[0])) { -#endif printf("pf_step_into_anchor: stack overflow\n"); *r = TAILQ_NEXT(*r, entries); return; } else if (*depth == 0 && a != NULL) *a = *r; -#ifdef __FreeBSD__ f = V_pf_anchor_stack + (*depth)++; -#else - f = pf_anchor_stack + (*depth)++; -#endif f->rs = *rs; f->r = *r; if ((*r)->anchor_wildcard) { @@ -2819,14 +2504,12 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, struct pf_anchor_stackframe *f; int quick = 0; + PF_RULES_RASSERT(); + do { if (*depth <= 0) break; -#ifdef __FreeBSD__ f = V_pf_anchor_stack + *depth - 1; -#else - f = pf_anchor_stack + *depth - 1; -#endif if (f->parent != NULL && f->child != NULL) { if (f->child->match || (match != NULL && *match)) { @@ -2913,35 +2596,15 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) #endif /* INET6 */ int -#ifdef __FreeBSD__ -pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg) -#else -pf_socket_lookup(int direction, struct pf_pdesc *pd) -#endif +pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m) { struct pf_addr *saddr, *daddr; u_int16_t sport, dport; -#ifdef __FreeBSD__ struct inpcbinfo *pi; -#else - struct inpcbtable *tb; -#endif struct inpcb *inp; - if (pd == NULL) - return (-1); pd->lookup.uid = UID_MAX; pd->lookup.gid = GID_MAX; - pd->lookup.pid = NO_PID; - -#ifdef __FreeBSD__ - if (inp_arg != NULL) { - INP_LOCK_ASSERT(inp_arg); - pd->lookup.uid = inp_arg->inp_cred->cr_uid; - pd->lookup.gid = inp_arg->inp_cred->cr_groups[0]; - return (1); - } -#endif switch (pd->proto) { case IPPROTO_TCP: @@ -2949,22 +2612,14 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) return (-1); sport = pd->hdr.tcp->th_sport; dport = pd->hdr.tcp->th_dport; -#ifdef __FreeBSD__ pi = &V_tcbinfo; -#else - tb = &tcbtable; -#endif break; case IPPROTO_UDP: if (pd->hdr.udp == NULL) return (-1); sport = pd->hdr.udp->uh_sport; dport = pd->hdr.udp->uh_dport; -#ifdef __FreeBSD__ pi = &V_udbinfo; -#else - tb = &udbtable; -#endif break; default: return (-1); @@ -2984,77 +2639,43 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) switch (pd->af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ - /* - * XXXRW: would be nice if we had an mbuf here so that we - * could use in_pcblookup_mbuf(). - */ - inp = in_pcblookup(pi, saddr->v4, sport, daddr->v4, - dport, INPLOOKUP_RLOCKPCB, NULL); + inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4, + dport, INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) { - inp = in_pcblookup(pi, saddr->v4, sport, + inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4, dport, INPLOOKUP_WILDCARD | - INPLOOKUP_RLOCKPCB, NULL); + INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) return (-1); } -#else - inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport); - if (inp == NULL) { - inp = in_pcblookup_listen(tb, daddr->v4, dport, 0, - NULL); - if (inp == NULL) - return (-1); - } -#endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: -#ifdef __FreeBSD__ - /* - * XXXRW: would be nice if we had an mbuf here so that we - * could use in6_pcblookup_mbuf(). - */ - inp = in6_pcblookup(pi, &saddr->v6, sport, - &daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL); + inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6, + dport, INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) { - inp = in6_pcblookup(pi, &saddr->v6, sport, + inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6, dport, INPLOOKUP_WILDCARD | - INPLOOKUP_RLOCKPCB, NULL); - if (inp == NULL) - return (-1); - } -#else - inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, - dport); - if (inp == NULL) { - inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0, - NULL); + INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) return (-1); } -#endif break; #endif /* INET6 */ default: return (-1); } -#ifdef __FreeBSD__ INP_RLOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; INP_RUNLOCK(inp); -#else - pd->lookup.uid = inp->inp_socket->so_euid; - pd->lookup.gid = inp->inp_socket->so_egid; - pd->lookup.pid = inp->inp_socket->so_cpid; -#endif + return (1); } -u_int8_t +static u_int8_t pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) { int hlen; @@ -3094,17 +2715,13 @@ pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) return (wscale); } -u_int16_t +static u_int16_t pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) { int hlen; u_int8_t hdr[60]; u_int8_t *opt, optlen; -#ifdef __FreeBSD__ u_int16_t mss = V_tcp_mssdflt; -#else - u_int16_t mss = tcp_mssdflt; -#endif hlen = th_off << 2; /* hlen <= sizeof(hdr) */ if (hlen <= sizeof(struct tcphdr)) @@ -3136,7 +2753,7 @@ pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) return (mss); } -u_int16_t +static u_int16_t pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) { #ifdef INET @@ -3148,13 +2765,8 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) struct route_in6 ro6; #endif /* INET6 */ struct rtentry *rt = NULL; -#ifdef __FreeBSD__ int hlen = 0; u_int16_t mss = V_tcp_mssdflt; -#else - int hlen; - u_int16_t mss = tcp_mssdflt; -#endif switch (af) { #ifdef INET @@ -3165,11 +2777,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; -#ifdef __FreeBSD__ in_rtalloc_ign(&ro, 0, rtableid); -#else /* ! __FreeBSD__ */ - rtalloc_noclone(&ro, NO_CLONING); -#endif rt = ro.ro_rt; break; #endif /* INET */ @@ -3181,11 +2789,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; -#ifdef __FreeBSD__ in6_rtalloc_ign(&ro6, 0, rtableid); -#else /* ! __FreeBSD__ */ - rtalloc_noclone((struct route *)&ro6, NO_CLONING); -#endif rt = ro6.ro_rt; break; #endif /* INET6 */ @@ -3193,11 +2797,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) if (rt && rt->rt_ifp) { mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); -#ifdef __FreeBSD__ mss = max(V_tcp_mssdflt, mss); -#else - mss = max(tcp_mssdflt, mss); -#endif RTFREE(rt); } mss = min(mss, offer); @@ -3205,7 +2805,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) return (mss); } -void +static void pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) { struct pf_rule *r = s->rule.ptr; @@ -3230,13 +2830,12 @@ pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) } } -u_int32_t +static u_int32_t pf_tcp_iss(struct pf_pdesc *pd) { MD5_CTX ctx; u_int32_t digest[4]; -#ifdef __FreeBSD__ if (V_pf_tcp_secret_init == 0) { read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); MD5Init(&V_pf_tcp_secret_ctx); @@ -3246,17 +2845,6 @@ pf_tcp_iss(struct pf_pdesc *pd) } ctx = V_pf_tcp_secret_ctx; -#else - if (pf_tcp_secret_init == 0) { - arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); - MD5Init(&pf_tcp_secret_ctx); - MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, - sizeof(pf_tcp_secret)); - pf_tcp_secret_init = 1; - } - - ctx = pf_tcp_secret_ctx; -#endif MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); @@ -3268,36 +2856,26 @@ pf_tcp_iss(struct pf_pdesc *pd) MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); } MD5Final((u_char *)digest, &ctx); -#ifdef __FreeBSD__ V_pf_tcp_iss_off += 4096; #define ISN_RANDOM_INCREMENT (4096 - 1) return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) + V_pf_tcp_iss_off); #undef ISN_RANDOM_INCREMENT -#else - pf_tcp_iss_off += 4096; - return (digest[0] + tcp_iss + pf_tcp_iss_off); -#endif } -int +static int pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, - struct pfi_kif *kif, struct mbuf *m, int off, void *h, - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, -#ifdef __FreeBSD__ - struct ifqueue *ifq, struct inpcb *inp) -#else - struct ifqueue *ifq) -#endif + struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd, + struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp) { struct pf_rule *nr = NULL; - struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct pf_addr * const saddr = pd->src; + struct pf_addr * const daddr = pd->dst; sa_family_t af = pd->af; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; struct tcphdr *th = pd->hdr.tcp; - struct pf_state_key *skw = NULL, *sks = NULL; struct pf_state_key *sk = NULL, *nk = NULL; u_short reason; int rewrite = 0, hdrlen = 0; @@ -3305,31 +2883,18 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, int asd = 0; int match = 0; int state_icmp = 0; -#ifdef __FreeBSD__ u_int16_t sport = 0, dport = 0; u_int16_t bproto_sum = 0, bip_sum = 0; -#else - u_int16_t sport, dport; - u_int16_t bproto_sum = 0, bip_sum; -#endif u_int8_t icmptype = 0, icmpcode = 0; + PF_RULES_RASSERT(); - if (direction == PF_IN && pf_check_congestion(ifq)) { - REASON_SET(&reason, PFRES_CONGEST); - return (PF_DROP); - } - -#ifdef __FreeBSD__ - if (inp != NULL) - pd->lookup.done = pf_socket_lookup(direction, pd, inp); - else if (V_debug_pfugidhack) { - PF_UNLOCK(); - DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); - pd->lookup.done = pf_socket_lookup(direction, pd, inp); - PF_LOCK(); + if (inp != NULL) { + INP_LOCK_ASSERT(inp); + pd->lookup.uid = inp->inp_cred->cr_uid; + pd->lookup.gid = inp->inp_cred->cr_groups[0]; + pd->lookup.done = 1; } -#endif switch (pd->proto) { case IPPROTO_TCP: @@ -3383,12 +2948,10 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); /* check packet for BINAT/NAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, - &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) { - if (nk == NULL || sk == NULL) { - REASON_SET(&reason, PFRES_MEMORY); - goto cleanup; - } + if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk, + &nk, saddr, daddr, sport, dport)) != NULL) { + KASSERT(sk != NULL, ("%s: null sk", __func__)); + KASSERT(nk != NULL, ("%s: null nk", __func__)); if (pd->ip_sum) bip_sum = *pd->ip_sum; @@ -3551,36 +3114,21 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); /* tcp/udp only. uid.op always 0 in other cases */ else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = -#ifdef __FreeBSD__ - pf_socket_lookup(direction, pd, inp), 1)) && -#else - pf_socket_lookup(direction, pd), 1)) && -#endif + pf_socket_lookup(direction, pd, m), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], pd->lookup.uid)) r = TAILQ_NEXT(r, entries); /* tcp/udp only. gid.op always 0 in other cases */ else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = -#ifdef __FreeBSD__ - pf_socket_lookup(direction, pd, inp), 1)) && -#else - pf_socket_lookup(direction, pd), 1)) && -#endif + pf_socket_lookup(direction, pd, m), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && -#ifdef __FreeBSD__ r->prob <= arc4random()) -#else - r->prob <= arc4random_uniform(UINT_MAX - 1) + 1) -#endif r = TAILQ_NEXT(r, entries); -#ifdef __FreeBSD__ - else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) -#else - else if (r->match_tag && !pf_match_tag(m, r, &tag)) -#endif + else if (r->match_tag && !pf_match_tag(m, r, &tag, + pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match( @@ -3617,8 +3165,8 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->log || (nr != NULL && nr->log)) { if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); - PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, - a, ruleset, pd); + PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a, + ruleset, pd, 1); } if ((r->action == PF_DROP) && @@ -3674,14 +3222,10 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, ack++; if (th->th_flags & TH_FIN) ack++; -#ifdef __FreeBSD__ pf_send_tcp(m, r, af, pd->dst, -#else - pf_send_tcp(r, af, pd->dst, -#endif pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); + r->return_ttl, 1, 0, kif->pfik_ifp); } } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && r->return_icmp) @@ -3696,106 +3240,69 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->action == PF_DROP) goto cleanup; -#ifdef __FreeBSD__ - if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) { -#else - if (pf_tag_packet(m, tag, rtableid)) { -#endif + if (tag > 0 && pf_tag_packet(m, pd, tag)) { REASON_SET(&reason, PFRES_MEMORY); goto cleanup; } + if (rtableid >= 0) + M_SETFIB(m, rtableid); if (!state_icmp && (r->keep_state || nr != NULL || (pd->flags & PFDESC_TCP_NORM))) { int action; - action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m, - off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, - bip_sum, hdrlen); + action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off, + sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum, + hdrlen); if (action != PF_PASS) return (action); } else { -#ifdef __FreeBSD__ - if (sk != NULL) - pool_put(&V_pf_state_key_pl, sk); - if (nk != NULL) - pool_put(&V_pf_state_key_pl, nk); -#else if (sk != NULL) - pool_put(&pf_state_key_pl, sk); + uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) - pool_put(&pf_state_key_pl, nk); -#endif + uma_zfree(V_pf_state_key_z, nk); } /* copy back packet headers if we performed NAT operations */ if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); -#if NPFSYNC > 0 - if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && -#ifdef __FreeBSD__ - direction == PF_OUT && pfsync_up_ptr != NULL && pfsync_up_ptr()) { -#else - direction == PF_OUT && pfsync_up()) { -#endif + if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) && + direction == PF_OUT && + pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m)) /* * We want the state created, but we dont * want to send this in case a partner * firewall has to know about it to allow * replies through it. */ -#ifdef __FreeBSD__ - if (pfsync_defer_ptr != NULL && - pfsync_defer_ptr(*sm, m)) -#else - if (pfsync_defer(*sm, m)) -#endif - return (PF_DEFER); - } -#endif + return (PF_DEFER); return (PF_PASS); cleanup: -#ifdef __FreeBSD__ - if (sk != NULL) - pool_put(&V_pf_state_key_pl, sk); - if (nk != NULL) - pool_put(&V_pf_state_key_pl, nk); -#else if (sk != NULL) - pool_put(&pf_state_key_pl, sk); + uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) - pool_put(&pf_state_key_pl, nk); -#endif + uma_zfree(V_pf_state_key_z, nk); return (PF_DROP); } -static __inline int +static int pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, - struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw, - struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk, - struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, - struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, - u_int16_t bip_sum, int hdrlen) + struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk, + struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport, + u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm, + int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen) { struct pf_state *s = NULL; struct pf_src_node *sn = NULL; struct tcphdr *th = pd->hdr.tcp; -#ifdef __FreeBSD__ u_int16_t mss = V_tcp_mssdflt; -#else - u_int16_t mss = tcp_mssdflt; -#endif u_short reason; /* check maximums */ if (r->max_states && (r->states_cur >= r->max_states)) { -#ifdef __FreeBSD__ V_pf_status.lcounters[LCNT_STATES]++; -#else - pf_status.lcounters[LCNT_STATES]++; -#endif REASON_SET(&reason, PFRES_MAXSTATES); return (PF_DROP); } @@ -3812,11 +3319,7 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, REASON_SET(&reason, PFRES_SRCLIMIT); goto csfailed; } -#ifdef __FreeBSD__ - s = pool_get(&V_pf_state_pl, PR_NOWAIT | PR_ZERO); -#else - s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); -#endif + s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); goto csfailed; @@ -3829,8 +3332,6 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, s->state_flags |= PFSTATE_ALLOWOPTS; if (r->rule_flag & PFRULE_STATESLOPPY) s->state_flags |= PFSTATE_SLOPPY; - if (r->rule_flag & PFRULE_PFLOW) - s->state_flags |= PFSTATE_PFLOW; s->log = r->log & PF_LOG_ALL; s->sync_state = PFSYNC_S_NONE; if (nr != NULL) @@ -3888,8 +3389,8 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, s->timeout = PFTM_OTHER_FIRST_PACKET; } - s->creation = time_second; - s->expire = time_second; + s->creation = time_uptime; + s->expire = time_uptime; if (sn != NULL) { s->src_node = sn; @@ -3907,11 +3408,7 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); -#ifdef __FreeBSD__ - pool_put(&V_pf_state_pl, s); -#else - pool_put(&pf_state_pl, s); -#endif + uma_zfree(V_pf_state_z, s); return (PF_DROP); } if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && @@ -3923,40 +3420,43 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, pf_normalize_tcp_cleanup(s); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); -#ifdef __FreeBSD__ - pool_put(&V_pf_state_pl, s); -#else - pool_put(&pf_state_pl, s); -#endif + uma_zfree(V_pf_state_z, s); return (PF_DROP); } } s->direction = pd->dir; - if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk, - pd->src, pd->dst, sport, dport)) - goto csfailed; + /* + * sk/nk could already been setup by pf_get_translation(). + */ + if (nr == NULL) { + KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p", + __func__, nr, sk, nk)); + sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport); + if (sk == NULL) + goto csfailed; + nk = sk; + } else + KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p", + __func__, nr, sk, nk)); - if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) { + /* Swap sk/nk for PF_OUT. */ + if (pf_state_insert(BOUND_IFACE(r, kif), + (pd->dir == PF_IN) ? sk : nk, + (pd->dir == PF_IN) ? nk : sk, s)) { if (pd->proto == IPPROTO_TCP) pf_normalize_tcp_cleanup(s); REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); -#ifdef __FreeBSD__ - pool_put(&V_pf_state_pl, s); -#else - pool_put(&pf_state_pl, s); -#endif + uma_zfree(V_pf_state_z, s); return (PF_DROP); } else *sm = s; pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ - if (tag > 0) { - pf_tag_ref(tag); + if (tag > 0) s->tag = tag; - } if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { s->src.state = PF_TCPS_PROXY_SRC; @@ -3984,13 +3484,9 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, mss = pf_calc_mss(pd->src, pd->af, rtid, mss); mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); s->src.mss = mss; -#ifdef __FreeBSD__ pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport, -#else - pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, -#endif th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL); REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } @@ -3998,48 +3494,27 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, return (PF_PASS); csfailed: -#ifdef __FreeBSD__ if (sk != NULL) - pool_put(&V_pf_state_key_pl, sk); + uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) - pool_put(&V_pf_state_key_pl, nk); -#else - if (sk != NULL) - pool_put(&pf_state_key_pl, sk); - if (nk != NULL) - pool_put(&pf_state_key_pl, nk); -#endif + uma_zfree(V_pf_state_key_z, nk); if (sn != NULL && sn->states == 0 && sn->expire == 0) { -#ifdef __FreeBSD__ - RB_REMOVE(pf_src_tree, &V_tree_src_tracking, sn); + pf_remove_src_node(sn); V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; V_pf_status.src_nodes--; - pool_put(&V_pf_src_tree_pl, sn); -#else - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); -#endif + uma_zfree(V_pf_sources_z, sn); } if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { -#ifdef __FreeBSD__ - RB_REMOVE(pf_src_tree, &V_tree_src_tracking, nsn); + pf_remove_src_node(nsn); V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; V_pf_status.src_nodes--; - pool_put(&V_pf_src_tree_pl, nsn); -#else - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); -#endif + uma_zfree(V_pf_sources_z, nsn); } return (PF_DROP); } -int +static int pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) @@ -4052,6 +3527,8 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, int asd = 0; int match = 0; + PF_RULES_RASSERT(); + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); while (r != NULL) { r->evaluations++; @@ -4086,11 +3563,8 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, else if (r->prob && r->prob <= (arc4random() % (UINT_MAX - 1) + 1)) r = TAILQ_NEXT(r, entries); -#ifdef __FreeBSD__ - else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) -#else - else if (r->match_tag && !pf_match_tag(m, r, &tag)) -#endif + else if (r->match_tag && !pf_match_tag(m, r, &tag, + pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { @@ -4116,17 +3590,13 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, REASON_SET(&reason, PFRES_MATCH); if (r->log) - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, - pd); + PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd, + 1); if (r->action != PF_PASS) return (PF_DROP); -#ifdef __FreeBSD__ - if (pf_tag_packet(m, tag, -1, pd->pf_mtag)) { -#else - if (pf_tag_packet(m, tag, -1)) { -#endif + if (tag > 0 && pf_tag_packet(m, pd, tag)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -4134,7 +3604,7 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, return (PF_PASS); } -int +static int pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd, u_short *reason, int *copyback) @@ -4334,7 +3804,7 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, src->state = dst->state = TCPS_TIME_WAIT; /* update expire time */ - (*state)->expire = time_second; + (*state)->expire = time_uptime; if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; @@ -4381,23 +3851,14 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, * and keep updating the state TTL. */ -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: loose state match: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, -#ifdef __FreeBSD__ pd->p_len, ackskew, (unsigned long long)(*state)->packets[0], (unsigned long long)(*state)->packets[1], -#else - pd->p_len, ackskew, (*state)->packets[0], - (*state)->packets[1], -#endif pd->dir == PF_IN ? "in" : "out", pd->dir == (*state)->direction ? "fwd" : "rev"); } @@ -4436,36 +3897,24 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, (*state)->src.state == TCPS_SYN_SENT) { /* Send RST for state mismatches during handshake */ if (!(th->th_flags & TH_RST)) -#ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, -#endif pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, (*state)->rule.ptr->return_ttl, 1, 0, - pd->eh, kif->pfik_ifp); + kif->pfik_ifp); src->seqlo = 0; src->seqhi = 1; src->max_win = 1; -#ifdef __FreeBSD__ } else if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - } else if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, ackskew, -#ifdef __FreeBSD__ (unsigned long long)(*state)->packets[0], (unsigned long long)(*state)->packets[1], -#else - (*state)->packets[0], (*state)->packets[1], -#endif pd->dir == PF_IN ? "in" : "out", pd->dir == (*state)->direction ? "fwd" : "rev"); printf("pf: State failure on: %c %c %c %c | %c %c\n", @@ -4484,7 +3933,7 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, return (PF_PASS); } -int +static int pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, struct pf_state **state, struct pf_pdesc *pd, u_short *reason) { @@ -4536,7 +3985,7 @@ pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, src->state = dst->state = TCPS_TIME_WAIT; /* update expire time */ - (*state)->expire = time_second; + (*state)->expire = time_uptime; if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; @@ -4555,7 +4004,7 @@ pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, return (PF_PASS); } -int +static int pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) @@ -4566,6 +4015,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_state_peer *src, *dst; struct pf_state_key *sk; + bzero(&key, sizeof(key)); key.af = pd->af; key.proto = IPPROTO_TCP; if (direction == PF_IN) { /* wire side, straight */ @@ -4580,11 +4030,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, key.port[0] = th->th_dport; } -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->src; @@ -4606,15 +4052,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } -#ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, -#endif pd->src, th->th_dport, th->th_sport, (*state)->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, - 0, NULL, NULL); + TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (!(th->th_flags & TH_ACK) || @@ -4640,15 +4081,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->src.max_win = MAX(ntohs(th->th_win), 1); if ((*state)->dst.seqhi == 1) (*state)->dst.seqhi = htonl(arc4random()); -#ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, -#endif &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], (*state)->dst.seqhi, 0, TH_SYN, 0, - (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); + (*state)->src.mss, 0, 0, (*state)->tag, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((th->th_flags & (TH_SYN|TH_ACK)) != @@ -4659,25 +4096,16 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, } else { (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); (*state)->dst.seqlo = ntohl(th->th_seq); -#ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, -#endif pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, TH_ACK, (*state)->src.max_win, 0, 0, 0, - (*state)->tag, NULL, NULL); -#ifdef __FreeBSD__ + (*state)->tag, NULL); pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, -#endif &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, - TH_ACK, (*state)->dst.max_win, 0, 0, 1, - 0, NULL, NULL); + TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL); (*state)->src.seqdiff = (*state)->dst.seqhi - (*state)->src.seqlo; (*state)->dst.seqdiff = (*state)->src.seqhi - @@ -4697,11 +4125,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && dst->state >= TCPS_FIN_WAIT_2 && src->state >= TCPS_FIN_WAIT_2) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: state reuse "); pf_print_state(*state); pf_print_flags(th->th_flags); @@ -4709,7 +4133,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, } /* XXX make sure it's the same direction ?? */ (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; - pf_unlink_state(*state); + pf_unlink_state(*state, PF_ENTER_LOCKED); *state = NULL; return (PF_DROP); } @@ -4743,16 +4167,12 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, /* Copyback sequence modulation or stateful scrub changes if needed */ if (copyback) -#ifdef __FreeBSD__ m_copyback(m, off, sizeof(*th), (caddr_t)th); -#else - m_copyback(m, off, sizeof(*th), th); -#endif return (PF_PASS); } -int +static int pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd) { @@ -4760,6 +4180,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_state_key_cmp key; struct udphdr *uh = pd->hdr.udp; + bzero(&key, sizeof(key)); key.af = pd->af; key.proto = IPPROTO_UDP; if (direction == PF_IN) { /* wire side, straight */ @@ -4774,11 +4195,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, key.port[0] = uh->uh_dport; } -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->src; @@ -4795,7 +4212,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, dst->state = PFUDPS_MULTIPLE; /* update expire time */ - (*state)->expire = time_second; + (*state)->expire = time_uptime; if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) (*state)->timeout = PFTM_UDP_MULTIPLE; else @@ -4816,30 +4233,23 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, &uh->uh_sum, &nk->addr[pd->didx], nk->port[pd->didx], 1, pd->af); -#ifdef __FreeBSD__ m_copyback(m, off, sizeof(*uh), (caddr_t)uh); -#else - m_copyback(m, off, sizeof(*uh), uh); -#endif } return (PF_PASS); } -int +static int pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { struct pf_addr *saddr = pd->src, *daddr = pd->dst; -#ifdef __FreeBSD__ u_int16_t icmpid = 0, *icmpsum; -#else - u_int16_t icmpid, *icmpsum; -#endif u_int8_t icmptype; int state_icmp = 0; struct pf_state_key_cmp key; + bzero(&key, sizeof(key)); switch (pd->proto) { #ifdef INET case IPPROTO_ICMP: @@ -4887,13 +4297,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, PF_ACPY(&key.addr[0], pd->dst, key.af); } -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); - (*state)->expire = time_second; + (*state)->expire = time_uptime; (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ @@ -4926,10 +4332,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } m_copyback(m, off, ICMP_MINLEN, -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp); + (caddr_t )pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 @@ -4946,12 +4349,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, &pd->hdr.icmp6->icmp6_cksum, &nk->addr[pd->didx], 0); - m_copyback(m, off, - sizeof(struct icmp6_hdr), -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp6); + m_copyback(m, off, sizeof(struct icmp6_hdr), + (caddr_t )pd->hdr.icmp6); break; #endif /* INET6 */ } @@ -4965,9 +4364,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, */ struct pf_pdesc pd2; -#ifdef __FreeBSD__ bzero(&pd2, sizeof pd2); -#endif #ifdef INET struct ip h2; #endif /* INET */ @@ -4975,13 +4372,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct ip6_hdr h2_6; int terminal = 0; #endif /* INET6 */ -#ifdef __FreeBSD__ int ipoff2 = 0; int off2 = 0; -#else - int ipoff2; - int off2; -#endif pd2.af = pd->af; /* Payload packet is from the opposite direction. */ @@ -5102,11 +4494,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.port[pd2.sidx] = th.th_sport; key.port[pd2.didx] = th.th_dport; -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->dst; @@ -5132,11 +4520,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, if (!((*state)->state_flags & PFSTATE_SLOPPY) && (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: BAD ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); pf_print_host(pd->src, 0, pd->af); @@ -5149,11 +4533,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } else { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: OK ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); pf_print_host(pd->src, 0, pd->af); @@ -5197,38 +4577,22 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp); + (caddr_t )pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), -#ifdef __FreeBSD__ - (caddr_t) -#endif - &h2); + (caddr_t )&h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp6); + (caddr_t )pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), -#ifdef __FreeBSD__ - (caddr_t) -#endif - &h2_6); + (caddr_t )&h2_6); break; #endif /* INET6 */ } -#ifdef __FreeBSD__ m_copyback(m, off2, 8, (caddr_t)&th); -#else - m_copyback(m, off2, 8, &th); -#endif } return (PF_PASS); @@ -5252,11 +4616,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.port[pd2.sidx] = uh.uh_sport; key.port[pd2.didx] = uh.uh_dport; -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != @@ -5287,38 +4647,21 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp); -#ifdef __FreeBSD__ + (caddr_t )pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); -#else - m_copyback(m, ipoff2, sizeof(h2), &h2); -#endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp6); + (caddr_t )pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), -#ifdef __FreeBSD__ - (caddr_t) -#endif - &h2_6); + (caddr_t )&h2_6); break; #endif /* INET6 */ } -#ifdef __FreeBSD__ m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); -#else - m_copyback(m, off2, sizeof(uh), &uh); -#endif } return (PF_PASS); break; @@ -5341,11 +4684,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = iih.icmp_id; -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != @@ -5372,15 +4711,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); -#ifdef __FreeBSD__ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); -#else - m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); - m_copyback(m, ipoff2, sizeof(h2), &h2); - m_copyback(m, off2, ICMP_MINLEN, &iih); -#endif } return (PF_PASS); break; @@ -5404,11 +4737,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = iih.icmp6_id; -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != @@ -5435,19 +4764,11 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); -#ifdef __FreeBSD__ m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t)pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); m_copyback(m, off2, sizeof(struct icmp6_hdr), (caddr_t)&iih); -#else - m_copyback(m, off, sizeof(struct icmp6_hdr), - pd->hdr.icmp6); - m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); - m_copyback(m, off2, sizeof(struct icmp6_hdr), - &iih); -#endif } return (PF_PASS); break; @@ -5460,11 +4781,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = 0; -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != @@ -5490,30 +4807,18 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, switch (pd2.af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); -#else - m_copyback(m, off, ICMP_MINLEN, - pd->hdr.icmp); - m_copyback(m, ipoff2, sizeof(h2), &h2); -#endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp6); + (caddr_t )pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), -#ifdef __FreeBSD__ - (caddr_t) -#endif - &h2_6); + (caddr_t )&h2_6); break; #endif /* INET6 */ } @@ -5525,13 +4830,14 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } } -int +static int pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; + bzero(&key, sizeof(key)); key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { @@ -5544,11 +4850,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, key.port[1] = key.port[0] = 0; } -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->src; @@ -5565,7 +4867,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, dst->state = PFOTHERS_MULTIPLE; /* update expire time */ - (*state)->expire = time_second; + (*state)->expire = time_uptime; if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) (*state)->timeout = PFTM_OTHER_MULTIPLE; else @@ -5575,17 +4877,10 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; -#ifdef __FreeBSD__ - KASSERT(nk, ("%s: nk is null", __FUNCTION__)); - KASSERT(pd, ("%s: pd is null", __FUNCTION__)); - KASSERT(pd->src, ("%s: pd->src is null", __FUNCTION__)); - KASSERT(pd->dst, ("%s: pd->dst is null", __FUNCTION__)); -#else - KASSERT(nk); - KASSERT(pd); - KASSERT(pd->src); - KASSERT(pd->dst); -#endif + KASSERT(nk, ("%s: nk is null", __func__)); + KASSERT(pd, ("%s: pd is null", __func__)); + KASSERT(pd->src, ("%s: pd->src is null", __func__)); + KASSERT(pd->dst, ("%s: pd->dst is null", __func__)); switch (pd->af) { #ifdef INET case AF_INET: @@ -5672,21 +4967,13 @@ int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, int rtableid) { -#ifdef __FreeBSD__ #ifdef RADIX_MPATH struct radix_node_head *rnh; #endif -#endif struct sockaddr_in *dst; int ret = 1; int check_mpath; -#ifndef __FreeBSD__ - extern int ipmultipath; -#endif #ifdef INET6 -#ifndef __FreeBSD__ - extern int ip6_multipath; -#endif struct sockaddr_in6 *dst6; struct route_in6 ro; #else @@ -5697,14 +4984,12 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, struct ifnet *ifp; check_mpath = 0; -#ifdef __FreeBSD__ #ifdef RADIX_MPATH /* XXX: stick to table 0 for now */ rnh = rt_tables_get_rnh(0, af); if (rnh != NULL && rn_mpath_capable(rnh)) check_mpath = 1; #endif -#endif bzero(&ro, sizeof(ro)); switch (af) { case AF_INET: @@ -5712,10 +4997,6 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; -#ifndef __FreeBSD__ - if (ipmultipath) - check_mpath = 1; -#endif break; #ifdef INET6 case AF_INET6: @@ -5729,10 +5010,6 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; -#ifndef __FreeBSD__ - if (ip6_multipath) - check_mpath = 1; -#endif break; #endif /* INET6 */ default: @@ -5743,7 +5020,6 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) goto out; -#ifdef __FreeBSD__ switch (af) { #ifdef INET6 case AF_INET6: @@ -5759,9 +5035,6 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, rtalloc_ign((struct route *)&ro, 0); /* No/default FIB. */ break; } -#else /* ! __FreeBSD__ */ - rtalloc_noclone((struct route *)&ro, NO_CLONING); -#endif if (ro.ro_rt != NULL) { /* No interface given, this is a no-route check */ @@ -5778,22 +5051,13 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, rn = (struct radix_node *)ro.ro_rt; do { rt = (struct rtentry *)rn; -#ifndef __FreeBSD__ /* CARPDEV */ - if (rt->rt_ifp->if_type == IFT_CARP) - ifp = rt->rt_ifp->if_carpdev; - else -#endif - ifp = rt->rt_ifp; + ifp = rt->rt_ifp; if (kif->pfik_ifp == ifp) ret = 1; -#ifdef __FreeBSD__ #ifdef RADIX_MPATH rn = rn_mpath_next(rn); #endif -#else - rn = rn_mpath_next(rn, 0); -#endif } while (check_mpath == 1 && rn != NULL && ret == 0); } else ret = 0; @@ -5803,373 +5067,186 @@ out: return (ret); } -int -pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, - int rtableid) -{ - struct sockaddr_in *dst; -#ifdef INET6 - struct sockaddr_in6 *dst6; - struct route_in6 ro; -#else - struct route ro; -#endif - int ret = 0; - - bzero(&ro, sizeof(ro)); - switch (af) { - case AF_INET: - dst = satosin(&ro.ro_dst); - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr = addr->v4; - break; -#ifdef INET6 - case AF_INET6: - dst6 = (struct sockaddr_in6 *)&ro.ro_dst; - dst6->sin6_family = AF_INET6; - dst6->sin6_len = sizeof(*dst6); - dst6->sin6_addr = addr->v6; - break; -#endif /* INET6 */ - default: - return (0); - } - -#ifdef __FreeBSD__ - switch (af) { -#ifdef INET6 - case AF_INET6: - in6_rtalloc_ign(&ro, 0, rtableid); - break; -#endif #ifdef INET - case AF_INET: - in_rtalloc_ign((struct route *)&ro, 0, rtableid); - break; -#endif - default: - rtalloc_ign((struct route *)&ro, 0); - break; - } -#else /* ! __FreeBSD__ */ - rtalloc_noclone((struct route *)&ro, NO_CLONING); -#endif - - if (ro.ro_rt != NULL) { -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ -#else - if (ro.ro_rt->rt_labelid == aw->v.rtlabel) - ret = 1; -#endif - RTFREE(ro.ro_rt); - } - - return (ret); -} - -#ifdef INET -void +static void pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0, *m1; - struct route iproute; - struct route *ro = NULL; - struct sockaddr_in *dst; + struct sockaddr_in dst; struct ip *ip; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; -#ifdef __FreeBSD__ int sw_csum; -#endif -#ifdef IPSEC - struct m_tag *mtag; -#endif /* IPSEC */ - if (m == NULL || *m == NULL || r == NULL || - (dir != PF_IN && dir != PF_OUT) || oifp == NULL) - panic("pf_route: invalid parameters"); + KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); + KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", + __func__)); -#ifdef __FreeBSD__ - if (pd->pf_mtag->routed++ > 3) { -#else - if ((*m)->m_pkthdr.pf.routed++ > 3) { -#endif + if ((pd->pf_mtag == NULL && + ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || + pd->pf_mtag->routed++ > 3) { m0 = *m; *m = NULL; - goto bad; + goto bad_locked; } if (r->rt == PF_DUPTO) { -#ifdef __FreeBSD__ - if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL) -#else - if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) -#endif + if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { + if (s) + PF_STATE_UNLOCK(s); return; + } } else { - if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { + if (s) + PF_STATE_UNLOCK(s); return; + } m0 = *m; } - if (m0->m_len < sizeof(struct ip)) { - DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route: m0->m_len < sizeof(struct ip)\n")); - goto bad; - } - ip = mtod(m0, struct ip *); - ro = &iproute; - bzero((caddr_t)ro, sizeof(*ro)); - dst = satosin(&ro->ro_dst); - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr = ip->ip_dst; + bzero(&dst, sizeof(dst)); + dst.sin_family = AF_INET; + dst.sin_len = sizeof(dst); + dst.sin_addr = ip->ip_dst; if (r->rt == PF_FASTROUTE) { -#ifdef __FreeBSD__ - in_rtalloc_ign(ro, 0, M_GETFIB(m0)); -#else - rtalloc(ro); -#endif - if (ro->ro_rt == 0) { -#ifdef __FreeBSD__ + struct rtentry *rt; + + if (s) + PF_STATE_UNLOCK(s); + rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0)); + if (rt == NULL) { + RTFREE_LOCKED(rt); KMOD_IPSTAT_INC(ips_noroute); -#else - ipstat.ips_noroute++; -#endif + error = EHOSTUNREACH; goto bad; } - ifp = ro->ro_rt->rt_ifp; - ro->ro_rt->rt_use++; + ifp = rt->rt_ifp; + rt->rt_rmx.rmx_pksent++; - if (ro->ro_rt->rt_flags & RTF_GATEWAY) - dst = satosin(ro->ro_rt->rt_gateway); + if (rt->rt_flags & RTF_GATEWAY) + bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst)); + RTFREE_LOCKED(rt); } else { if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); - goto bad; + ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); + goto bad_locked; } if (s == NULL) { pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET)) - dst->sin_addr.s_addr = naddr.v4.s_addr; + dst.sin_addr.s_addr = naddr.v4.s_addr; ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET)) - dst->sin_addr.s_addr = + dst.sin_addr.s_addr = s->rt_addr.v4.s_addr; ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; + PF_STATE_UNLOCK(s); } } if (ifp == NULL) goto bad; if (oifp != ifp) { -#ifdef __FreeBSD__ - PF_UNLOCK(); - if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { - PF_LOCK(); - goto bad; - } else if (m0 == NULL) { - PF_LOCK(); - goto done; - } - PF_LOCK(); -#else if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; -#endif if (m0->m_len < sizeof(struct ip)) { DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route: m0->m_len < sizeof(struct ip)\n")); + ("%s: m0->m_len < sizeof(struct ip)\n", __func__)); goto bad; } ip = mtod(m0, struct ip *); } -#ifdef __FreeBSD__ - /* Copied from FreeBSD 5.1-CURRENT ip_output. */ + if (ifp->if_flags & IFF_LOOPBACK) + m0->m_flags |= M_SKIP_FIREWALL; + + /* Back to host byte order. */ + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + + /* Copied from FreeBSD 10.0-CURRENT ip_output. */ m0->m_pkthdr.csum_flags |= CSUM_IP; sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist; if (sw_csum & CSUM_DELAY_DATA) { - /* - * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least) - */ - NTOHS(ip->ip_len); - NTOHS(ip->ip_off); /* XXX: needed? */ in_delayed_cksum(m0); - HTONS(ip->ip_len); - HTONS(ip->ip_off); sw_csum &= ~CSUM_DELAY_DATA; } +#ifdef SCTP + if (sw_csum & CSUM_SCTP) { + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + sw_csum &= ~CSUM_SCTP; + } +#endif m0->m_pkthdr.csum_flags &= ifp->if_hwassist; - if (ntohs(ip->ip_len) <= ifp->if_mtu || - (ifp->if_hwassist & CSUM_FRAGMENT && - ((ip->ip_off & htons(IP_DF)) == 0))) { - /* - * ip->ip_len = htons(ip->ip_len); - * ip->ip_off = htons(ip->ip_off); - */ - ip->ip_sum = 0; - if (sw_csum & CSUM_DELAY_IP) { - /* From KAME */ - if (ip->ip_v == IPVERSION && - (ip->ip_hl << 2) == sizeof(*ip)) { - ip->ip_sum = in_cksum_hdr(ip); - } else { - ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); - } - } - PF_UNLOCK(); - error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro); - PF_LOCK(); - goto done; - } -#else - /* Copied from ip_output. */ -#ifdef IPSEC /* - * If deferred crypto processing is needed, check that the - * interface supports it. + * If small enough for interface, or the interface will take + * care of the fragmentation for us, we can just send directly. */ - if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) - != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) { - /* Notify IPsec to do its own crypto. */ - ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); - goto bad; - } -#endif /* IPSEC */ - - /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ - if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) { - if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || - ifp->if_bridge != NULL) { - in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clr */ - } - } else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) { - if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || - ifp->if_bridge != NULL) { - in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clr */ - } - } - - if (ntohs(ip->ip_len) <= ifp->if_mtu) { + if (ip->ip_len <= ifp->if_mtu || + (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || + ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) { + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; - if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && - ifp->if_bridge == NULL) { - m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; -#ifdef __FreeBSD__ - KMOD_IPSTAT_INC(ips_outhwcsum); -#else - ipstat.ips_outhwcsum++; -#endif - } else + if (sw_csum & CSUM_DELAY_IP) ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); - /* Update relevant hardware checksum stats for TCP/UDP */ - if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) - KMOD_TCPSTAT_INC(tcps_outhwcsum); - else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) - KMOD_UDPSTAT_INC(udps_outhwcsum); - error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL); + m0->m_flags &= ~(M_PROTOFLAGS); + error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL); goto done; } -#endif - /* - * Too large for interface; fragment if possible. - * Must be able to put at least 8 bytes per fragment. - */ - if (ip->ip_off & htons(IP_DF)) { -#ifdef __FreeBSD__ + /* Balk when DF bit is set or the interface didn't support TSO. */ + if ((ip->ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { + error = EMSGSIZE; KMOD_IPSTAT_INC(ips_cantfrag); -#else - ipstat.ips_cantfrag++; -#endif if (r->rt != PF_DUPTO) { -#ifdef __FreeBSD__ - /* icmp_error() expects host byte ordering */ - NTOHS(ip->ip_len); - NTOHS(ip->ip_off); - PF_UNLOCK(); icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, ifp->if_mtu); - PF_LOCK(); -#else - icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, - ifp->if_mtu); -#endif goto done; } else goto bad; } - m1 = m0; -#ifdef __FreeBSD__ - /* - * XXX: is cheaper + less error prone than own function - */ - NTOHS(ip->ip_len); - NTOHS(ip->ip_off); error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum); -#else - error = ip_fragment(m0, ifp, ifp->if_mtu); -#endif - if (error) { -#ifndef __FreeBSD__ /* ip_fragment does not do m_freem() on FreeBSD */ - m0 = NULL; -#endif + if (error) goto bad; - } - for (m0 = m1; m0; m0 = m1) { + for (; m0; m0 = m1) { m1 = m0->m_nextpkt; - m0->m_nextpkt = 0; -#ifdef __FreeBSD__ + m0->m_nextpkt = NULL; if (error == 0) { - PF_UNLOCK(); - error = (*ifp->if_output)(ifp, m0, sintosa(dst), - NULL); - PF_LOCK(); + m0->m_flags &= ~(M_PROTOFLAGS); + error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL); } else -#else - if (error == 0) - error = (*ifp->if_output)(ifp, m0, sintosa(dst), - NULL); - else -#endif m_freem(m0); } if (error == 0) -#ifdef __FreeBSD__ KMOD_IPSTAT_INC(ips_fragmented); -#else - ipstat.ips_fragmented++; -#endif done: if (r->rt != PF_DUPTO) *m = NULL; - if (ro == &iproute && ro->ro_rt) - RTFREE(ro->ro_rt); return; +bad_locked: + if (s) + PF_STATE_UNLOCK(s); bad: m_freem(m0); goto done; @@ -6177,146 +5254,115 @@ bad: #endif /* INET */ #ifdef INET6 -void +static void pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0; - struct route_in6 ip6route; - struct route_in6 *ro; - struct sockaddr_in6 *dst; + struct sockaddr_in6 dst; struct ip6_hdr *ip6; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; - if (m == NULL || *m == NULL || r == NULL || - (dir != PF_IN && dir != PF_OUT) || oifp == NULL) - panic("pf_route6: invalid parameters"); + KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); + KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", + __func__)); -#ifdef __FreeBSD__ - if (pd->pf_mtag->routed++ > 3) { -#else - if ((*m)->m_pkthdr.pf.routed++ > 3) { -#endif + if ((pd->pf_mtag == NULL && + ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || + pd->pf_mtag->routed++ > 3) { m0 = *m; *m = NULL; - goto bad; + goto bad_locked; } if (r->rt == PF_DUPTO) { -#ifdef __FreeBSD__ - if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL) -#else - if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) -#endif + if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { + if (s) + PF_STATE_UNLOCK(s); return; + } } else { - if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { + if (s) + PF_STATE_UNLOCK(s); return; + } m0 = *m; } - if (m0->m_len < sizeof(struct ip6_hdr)) { - DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); - goto bad; - } ip6 = mtod(m0, struct ip6_hdr *); - ro = &ip6route; - bzero((caddr_t)ro, sizeof(*ro)); - dst = (struct sockaddr_in6 *)&ro->ro_dst; - dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof(*dst); - dst->sin6_addr = ip6->ip6_dst; + bzero(&dst, sizeof(dst)); + dst.sin6_family = AF_INET6; + dst.sin6_len = sizeof(dst); + dst.sin6_addr = ip6->ip6_dst; /* Cheat. XXX why only in the v6 case??? */ if (r->rt == PF_FASTROUTE) { -#ifdef __FreeBSD__ + if (s) + PF_STATE_UNLOCK(s); m0->m_flags |= M_SKIP_FIREWALL; - PF_UNLOCK(); - ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); -#else - m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); -#endif return; } if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); - goto bad; + ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); + goto bad_locked; } if (s == NULL) { pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET6)) - PF_ACPY((struct pf_addr *)&dst->sin6_addr, + PF_ACPY((struct pf_addr *)&dst.sin6_addr, &naddr, AF_INET6); ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET6)) - PF_ACPY((struct pf_addr *)&dst->sin6_addr, + PF_ACPY((struct pf_addr *)&dst.sin6_addr, &s->rt_addr, AF_INET6); ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } + + if (s) + PF_STATE_UNLOCK(s); + if (ifp == NULL) goto bad; if (oifp != ifp) { -#ifdef __FreeBSD__ - PF_UNLOCK(); - if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { - PF_LOCK(); - goto bad; - } else if (m0 == NULL) { - PF_LOCK(); - goto done; - } - PF_LOCK(); -#else if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; -#endif if (m0->m_len < sizeof(struct ip6_hdr)) { DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); + ("%s: m0->m_len < sizeof(struct ip6_hdr)\n", + __func__)); goto bad; } ip6 = mtod(m0, struct ip6_hdr *); } + if (ifp->if_flags & IFF_LOOPBACK) + m0->m_flags |= M_SKIP_FIREWALL; + /* * If the packet is too large for the outgoing interface, * send back an icmp6 error. */ - if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) - dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); - if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - nd6_output(ifp, ifp, m0, dst, NULL); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - } else { + if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr)) + dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) + nd6_output(ifp, ifp, m0, &dst, NULL); + else { in6_ifstat_inc(ifp, ifs6_in_toobig); -#ifdef __FreeBSD__ - if (r->rt != PF_DUPTO) { - PF_UNLOCK(); - icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); - PF_LOCK(); - } else -#else if (r->rt != PF_DUPTO) icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); else -#endif goto bad; } @@ -6325,13 +5371,15 @@ done: *m = NULL; return; +bad_locked: + if (s) + PF_STATE_UNLOCK(s); bad: m_freem(m0); goto done; } #endif /* INET6 */ -#ifdef __FreeBSD__ /* * FreeBSD supports cksum offloads for the following drivers. * em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4), @@ -6350,7 +5398,7 @@ bad: * TCP/UDP layer. * Also, set csum_data to 0xffff to force cksum validation. */ -int +static int pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af) { u_int16_t sum = 0; @@ -6370,7 +5418,7 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a } else { ip = mtod(m, struct ip *); sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htonl((u_short)len + + ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + IPPROTO_TCP)); } sum ^= 0xffff; @@ -6465,246 +5513,72 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a } return (0); } -#else /* !__FreeBSD__ */ -/* - * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag - * off is the offset where the protocol header starts - * len is the total length of protocol header plus payload - * returns 0 when the checksum is valid, otherwise returns 1. - */ -int -pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, - sa_family_t af) -{ - u_int16_t flag_ok, flag_bad; - u_int16_t sum; - - switch (p) { - case IPPROTO_TCP: - flag_ok = M_TCP_CSUM_IN_OK; - flag_bad = M_TCP_CSUM_IN_BAD; - break; - case IPPROTO_UDP: - flag_ok = M_UDP_CSUM_IN_OK; - flag_bad = M_UDP_CSUM_IN_BAD; - break; - case IPPROTO_ICMP: -#ifdef INET6 - case IPPROTO_ICMPV6: -#endif /* INET6 */ - flag_ok = flag_bad = 0; - break; - default: - return (1); - } - if (m->m_pkthdr.csum_flags & flag_ok) - return (0); - if (m->m_pkthdr.csum_flags & flag_bad) - return (1); - if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) - return (1); - if (m->m_pkthdr.len < off + len) - return (1); - switch (af) { -#ifdef INET - case AF_INET: - if (p == IPPROTO_ICMP) { - if (m->m_len < off) - return (1); - m->m_data += off; - m->m_len -= off; - sum = in_cksum(m, len); - m->m_data -= off; - m->m_len += off; - } else { - if (m->m_len < sizeof(struct ip)) - return (1); - sum = in4_cksum(m, p, off, len); - } - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (m->m_len < sizeof(struct ip6_hdr)) - return (1); - sum = in6_cksum(m, p, off, len); - break; -#endif /* INET6 */ - default: - return (1); - } - if (sum) { - m->m_pkthdr.csum_flags |= flag_bad; - switch (p) { - case IPPROTO_TCP: - KMOD_TCPSTAT_INC(tcps_rcvbadsum); - break; - case IPPROTO_UDP: - KMOD_UDPSTAT_INC(udps_badsum); - break; -#ifdef INET - case IPPROTO_ICMP: - KMOD_ICMPSTAT_INC(icps_checksum); - break; -#endif -#ifdef INET6 - case IPPROTO_ICMPV6: - KMOD_ICMP6STAT_INC(icp6s_checksum); - break; -#endif /* INET6 */ - } - return (1); - } - m->m_pkthdr.csum_flags |= flag_ok; - return (0); -} -#endif - -#ifndef __FreeBSD__ -struct pf_divert * -pf_find_divert(struct mbuf *m) -{ - struct m_tag *mtag; - - if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) - return (NULL); - - return ((struct pf_divert *)(mtag + 1)); -} - -struct pf_divert * -pf_get_divert(struct mbuf *m) -{ - struct m_tag *mtag; - - if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { - mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), - M_NOWAIT); - if (mtag == NULL) - return (NULL); - bzero(mtag + 1, sizeof(struct pf_divert)); - m_tag_prepend(m, mtag); - } - - return ((struct pf_divert *)(mtag + 1)); -} -#endif #ifdef INET int -#ifdef __FreeBSD__ -pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, - struct ether_header *eh, struct inpcb *inp) -#else -pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, - struct ether_header *eh) -#endif +pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0; -#ifdef __FreeBSD__ struct ip *h = NULL; struct m_tag *ipfwtag; struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; -#else - struct ip *h; - struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; -#endif struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, dirndx, pqid = 0; -#ifdef __FreeBSD__ - PF_LOCK(); + M_ASSERTPKTHDR(m); + if (!V_pf_status.running) - { - PF_UNLOCK(); return (PF_PASS); - } -#else - if (!pf_status.running) - return (PF_PASS); -#endif memset(&pd, 0, sizeof(pd)); -#ifdef __FreeBSD__ - if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { - PF_UNLOCK(); - DPFPRINTF(PF_DEBUG_URGENT, - ("pf_test: pf_get_mtag returned NULL\n")); - return (PF_DROP); - } -#endif -#ifndef __FreeBSD__ - if (ifp->if_type == IFT_CARP && ifp->if_carpdev) - kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; - else -#endif - kif = (struct pfi_kif *)ifp->if_pf_kif; + + kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif DPFPRINTF(PF_DEBUG_URGENT, ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) -#ifdef __FreeBSD__ - { - PF_UNLOCK(); -#endif return (PF_PASS); -#ifdef __FreeBSD__ - } -#endif -#ifdef __FreeBSD__ - M_ASSERTPKTHDR(m); -#else -#ifdef DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) - panic("non-M_PKTHDR is passed to pf_test"); -#endif /* DIAGNOSTIC */ -#endif + if (m->m_flags & M_SKIP_FIREWALL) + return (PF_PASS); - if (m->m_pkthdr.len < (int)sizeof(*h)) { + if (m->m_pkthdr.len < (int)sizeof(struct ip)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; goto done; } -#ifdef __FreeBSD__ - if (m->m_flags & M_SKIP_FIREWALL) { - PF_UNLOCK(); - return (PF_PASS); - } -#else - if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) - return (PF_PASS); -#endif - -#ifdef __FreeBSD__ + pd.pf_mtag = pf_find_mtag(m); + + PF_RULES_RLOCK(); + if (ip_divert_ptr != NULL && ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) { + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + goto done; + } pd.pf_mtag->flags |= PF_PACKET_LOOPED; m_tag_delete(m, ipfwtag); } - if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { + if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { m->m_flags |= M_FASTFWD_OURS; pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; } - } else -#endif - /* We do IP header normalization and packet reassembly here */ - if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { + } else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { + /* We do IP header normalization and packet reassembly here */ action = PF_DROP; goto done; } @@ -6712,7 +5586,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, h = mtod(m, struct ip *); off = h->ip_hl << 2; - if (off < (int)sizeof(*h)) { + if (off < (int)sizeof(struct ip)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; @@ -6731,7 +5605,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, pd.af = AF_INET; pd.tos = h->ip_tos; pd.tot_len = ntohs(h->ip_len); - pd.eh = eh; /* handle fragments that didn't get reassembled by normalization */ if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { @@ -6760,25 +5633,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ipintrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } @@ -6800,25 +5662,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ipintrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } @@ -6834,25 +5685,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ipintrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } @@ -6868,29 +5708,19 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, default: action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, &ipintrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } done: + PF_RULES_RUNLOCK(); if (action == PF_PASS && h->ip_hl > 5 && !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { action = PF_DROP; @@ -6900,23 +5730,20 @@ done: ("pf: dropping packet with ip options\n")); } - if ((s && s->tag) || r->rtableid >= 0) -#ifdef __FreeBSD__ - pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag); -#else - pf_tag_packet(m, s ? s->tag : 0, r->rtableid); -#endif - - if (dir == PF_IN && s && s->key[PF_SK_STACK]) -#ifdef __FreeBSD__ - pd.pf_mtag->statekey = s->key[PF_SK_STACK]; -#else - m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; -#endif + if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } + if (r->rtableid >= 0) + M_SETFIB(m, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { -#ifdef __FreeBSD__ + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } if (pqid || (pd.tos & IPTOS_LOWDELAY)) pd.pf_mtag->qid = r->pqid; else @@ -6924,14 +5751,6 @@ done: /* add hints for ecn */ pd.pf_mtag->hdr = h; -#else - if (pqid || (pd.tos & IPTOS_LOWDELAY)) - m->m_pkthdr.pf.qid = r->pqid; - else - m->m_pkthdr.pf.qid = r->qid; - /* add hints for ecn */ - m->m_pkthdr.pf.hdr = h; -#endif } #endif /* ALTQ */ @@ -6945,35 +5764,37 @@ done: (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) -#ifdef __FreeBSD__ m->m_flags |= M_SKIP_FIREWALL; -#else - m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; -#endif -#ifdef __FreeBSD__ - if (action == PF_PASS && r->divert.port && - ip_divert_ptr != NULL && !PACKET_LOOPED()) { + if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL && + !PACKET_LOOPED(&pd)) { ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0, - sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); + sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); if (ipfwtag != NULL) { ((struct ipfw_rule_ref *)(ipfwtag+1))->info = ntohs(r->divert.port); ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir; - m_tag_prepend(m, ipfwtag); - - PF_UNLOCK(); + if (s) + PF_STATE_UNLOCK(s); + m_tag_prepend(m, ipfwtag); if (m->m_flags & M_FASTFWD_OURS) { + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate tag\n")); + } pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT; m->m_flags &= ~M_FASTFWD_OURS; } - - ip_divert_ptr(*m0, - dir == PF_IN ? DIR_IN : DIR_OUT); + ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT); *m0 = NULL; + return (action); } else { /* XXX: ipfw has the same behaviour! */ @@ -6984,17 +5805,6 @@ done: ("pf: failed to allocate divert tag\n")); } } -#else - if (dir == PF_IN && action == PF_PASS && r->divert.port) { - struct pf_divert *divert; - - if ((divert = pf_get_divert(m))) { - m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; - divert->port = r->divert.port; - divert->addr.ipv4 = r->divert.addr.v4; - } - } -#endif if (log) { struct pf_rule *lr; @@ -7004,8 +5814,8 @@ done: lr = s->nat_rule.ptr; else lr = r; - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, - &pd); + PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd, + (s == NULL)); } kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; @@ -7038,11 +5848,7 @@ done: } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; -#ifdef __FreeBSD__ if (nr != NULL && r == &V_pf_default_rule) -#else - if (nr != NULL && r == &pf_default_rule) -#endif tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->src.addr.p.tbl, @@ -7068,96 +5874,53 @@ done: action = PF_PASS; break; default: - /* pf_route can free the mbuf causing *m0 to become NULL */ - if (r->rt) + /* pf_route() returns unlocked. */ + if (r->rt) { pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); + return (action); + } break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif + if (s) + PF_STATE_UNLOCK(s); + return (action); } #endif /* INET */ #ifdef INET6 int -#ifdef __FreeBSD__ -pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, - struct ether_header *eh, struct inpcb *inp) -#else -pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, - struct ether_header *eh) -#endif +pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0, *n = NULL; -#ifdef __FreeBSD__ struct ip6_hdr *h = NULL; struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; -#else - struct ip6_hdr *h; - struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; -#endif struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, terminal = 0, dirndx, rh_cnt = 0; -#ifdef __FreeBSD__ - PF_LOCK(); - if (!V_pf_status.running) { - PF_UNLOCK(); - return (PF_PASS); - } -#else - if (!pf_status.running) + M_ASSERTPKTHDR(m); + + if (!V_pf_status.running) return (PF_PASS); -#endif memset(&pd, 0, sizeof(pd)); -#ifdef __FreeBSD__ - if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { - PF_UNLOCK(); - DPFPRINTF(PF_DEBUG_URGENT, - ("pf_test: pf_get_mtag returned NULL\n")); - return (PF_DROP); - } -#endif -#ifndef __FreeBSD__ - if (ifp->if_type == IFT_CARP && ifp->if_carpdev) - kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; - else -#endif - kif = (struct pfi_kif *)ifp->if_pf_kif; + pd.pf_mtag = pf_find_mtag(m); + if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED) + return (PF_PASS); + + kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif DPFPRINTF(PF_DEBUG_URGENT, ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) -#ifdef __FreeBSD__ - { - PF_UNLOCK(); -#endif return (PF_PASS); -#ifdef __FreeBSD__ - } -#endif - -#ifdef __FreeBSD__ - M_ASSERTPKTHDR(m); -#else -#ifdef DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) - panic("non-M_PKTHDR is passed to pf_test6"); -#endif /* DIAGNOSTIC */ -#endif if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; @@ -7166,16 +5929,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, goto done; } -#ifdef __FreeBSD__ - if (pd.pf_mtag->flags & PF_TAG_GENERATED) { - PF_UNLOCK(); -#else - if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) -#endif - return (PF_PASS); -#ifdef __FreeBSD__ - } -#endif + PF_RULES_RLOCK(); /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { @@ -7208,7 +5962,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, pd.af = AF_INET6; pd.tos = 0; pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); - pd.eh = eh; off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); pd.proto = h->ip6_nxt; @@ -7300,25 +6053,14 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ip6intrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } @@ -7340,25 +6082,14 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ip6intrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } @@ -7381,54 +6112,33 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ip6intrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } default: action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, &ip6intrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } done: + PF_RULES_RUNLOCK(); if (n != m) { m_freem(n); n = NULL; @@ -7444,37 +6154,26 @@ done: ("pf: dropping packet with dangerous v6 headers\n")); } - if ((s && s->tag) || r->rtableid >= 0) -#ifdef __FreeBSD__ - pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag); -#else - pf_tag_packet(m, s ? s->tag : 0, r->rtableid); -#endif - - if (dir == PF_IN && s && s->key[PF_SK_STACK]) -#ifdef __FreeBSD__ - pd.pf_mtag->statekey = s->key[PF_SK_STACK]; -#else - m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; -#endif + if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } + if (r->rtableid >= 0) + M_SETFIB(m, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { -#ifdef __FreeBSD__ + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } if (pd.tos & IPTOS_LOWDELAY) pd.pf_mtag->qid = r->pqid; else pd.pf_mtag->qid = r->qid; /* add hints for ecn */ pd.pf_mtag->hdr = h; -#else - if (pd.tos & IPTOS_LOWDELAY) - m->m_pkthdr.pf.qid = r->pqid; - else - m->m_pkthdr.pf.qid = r->qid; - /* add hints for ecn */ - m->m_pkthdr.pf.hdr = h; -#endif } #endif /* ALTQ */ @@ -7483,27 +6182,11 @@ done: (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) -#ifdef __FreeBSD__ m->m_flags |= M_SKIP_FIREWALL; -#else - m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; -#endif -#ifdef __FreeBSD__ /* XXX: Anybody working on it?! */ if (r->divert.port) printf("pf: divert(9) is not supported for IPv6\n"); -#else - if (dir == PF_IN && action == PF_PASS && r->divert.port) { - struct pf_divert *divert; - - if ((divert = pf_get_divert(m))) { - m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; - divert->port = r->divert.port; - divert->addr.ipv6 = r->divert.addr.v6; - } - } -#endif if (log) { struct pf_rule *lr; @@ -7513,8 +6196,8 @@ done: lr = s->nat_rule.ptr; else lr = r; - PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, - &pd); + PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset, + &pd, (s == NULL)); } kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; @@ -7547,11 +6230,7 @@ done: } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; -#ifdef __FreeBSD__ if (nr != NULL && r == &V_pf_default_rule) -#else - if (nr != NULL && r == &pf_default_rule) -#endif tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->src.addr.p.tbl, @@ -7575,46 +6254,17 @@ done: action = PF_PASS; break; default: - /* pf_route6 can free the mbuf causing *m0 to become NULL */ - if (r->rt) + /* pf_route6() returns unlocked. */ + if (r->rt) { pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); + return (action); + } break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif + if (s) + PF_STATE_UNLOCK(s); + return (action); } #endif /* INET6 */ - -int -pf_check_congestion(struct ifqueue *ifq) -{ -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ - return (0); -#else - if (ifq->ifq_congestion) - return (1); - else - return (0); -#endif -} - -/* - * must be called whenever any addressing information such as - * address, port, protocol has changed - */ -void -pf_pkt_addr_changed(struct mbuf *m) -{ -#ifdef __FreeBSD__ - struct pf_mtag *pf_tag; - - if ((pf_tag = pf_find_mtag(m)) != NULL) - pf_tag->statekey = NULL; -#else - m->m_pkthdr.pf.statekey = NULL; -#endif -} diff --git a/sys/contrib/pf/net/pf_if.c b/sys/contrib/pf/net/pf_if.c index b4491b8..c010b65 100644 --- a/sys/contrib/pf/net/pf_if.c +++ b/sys/contrib/pf/net/pf_if.c @@ -32,137 +32,90 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#if defined(__FreeBSD__) -#include "opt_inet.h" -#include "opt_inet6.h" - #include <sys/cdefs.h> + __FBSDID("$FreeBSD$"); -#endif + +#include "opt_inet.h" +#include "opt_inet6.h" #include <sys/param.h> -#include <sys/systm.h> -#ifdef __FreeBSD__ -#include <sys/malloc.h> -#endif -#include <sys/mbuf.h> -#include <sys/filio.h> -#include <sys/socket.h> -#include <sys/socketvar.h> #include <sys/kernel.h> -#ifndef __FreeBSD__ -#include <sys/device.h> -#endif -#include <sys/time.h> -#ifndef __FreeBSD__ -#include <sys/pool.h> -#endif +#include <sys/socket.h> #include <net/if.h> -#include <net/if_types.h> -#ifdef __FreeBSD__ -#include <net/vnet.h> -#endif - -#include <netinet/in.h> -#include <netinet/in_var.h> -#include <netinet/in_systm.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> - #include <net/pfvar.h> +#include <net/route.h> -#ifdef INET6 -#include <netinet/ip6.h> -#endif /* INET6 */ - -#ifdef __FreeBSD__ VNET_DEFINE(struct pfi_kif *, pfi_all); -VNET_DEFINE(uma_zone_t, pfi_addr_pl); -VNET_DEFINE(struct pfi_ifhead, pfi_ifs); -#define V_pfi_ifs VNET(pfi_ifs) -VNET_DEFINE(long, pfi_update); -#define V_pfi_update VNET(pfi_update) -VNET_DEFINE(struct pfr_addr *, pfi_buffer); +static VNET_DEFINE(long, pfi_update); +#define V_pfi_update VNET(pfi_update) +#define PFI_BUFFER_MAX 0x10000 + +static VNET_DEFINE(struct pfr_addr *, pfi_buffer); +static VNET_DEFINE(int, pfi_buffer_cnt); +static VNET_DEFINE(int, pfi_buffer_max); #define V_pfi_buffer VNET(pfi_buffer) -VNET_DEFINE(int, pfi_buffer_cnt); #define V_pfi_buffer_cnt VNET(pfi_buffer_cnt) -VNET_DEFINE(int, pfi_buffer_max); #define V_pfi_buffer_max VNET(pfi_buffer_max) -#else -struct pfi_kif *pfi_all = NULL; -struct pool pfi_addr_pl; -struct pfi_ifhead pfi_ifs; -long pfi_update = 1; -struct pfr_addr *pfi_buffer; -int pfi_buffer_cnt; -int pfi_buffer_max; -#endif -#ifdef __FreeBSD__ + eventhandler_tag pfi_attach_cookie; eventhandler_tag pfi_detach_cookie; eventhandler_tag pfi_attach_group_cookie; eventhandler_tag pfi_change_group_cookie; eventhandler_tag pfi_detach_group_cookie; eventhandler_tag pfi_ifaddr_event_cookie; -#endif - -void pfi_kif_update(struct pfi_kif *); -void pfi_dynaddr_update(struct pfi_dynaddr *dyn); -void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, - int, int); -void pfi_kifaddr_update(void *); -void pfi_instance_add(struct ifnet *, int, int); -void pfi_address_add(struct sockaddr *, int, int); -int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); -int pfi_skip_if(const char *, struct pfi_kif *); -int pfi_unmask(void *); -#ifdef __FreeBSD__ -void pfi_attach_ifnet_event(void * __unused, struct ifnet *); -void pfi_detach_ifnet_event(void * __unused, struct ifnet *); -void pfi_attach_group_event(void *, struct ifg_group *); -void pfi_change_group_event(void *, char *); -void pfi_detach_group_event(void *, struct ifg_group *); -void pfi_ifaddr_event(void * __unused, struct ifnet *); -#endif -RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); -RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); - -#define PFI_BUFFER_MAX 0x10000 -#define PFI_MTYPE M_IFADDR +static void pfi_attach_ifnet(struct ifnet *); +static void pfi_attach_ifgroup(struct ifg_group *); + +static void pfi_kif_update(struct pfi_kif *); +static void pfi_dynaddr_update(struct pfi_dynaddr *dyn); +static void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int, + int); +static void pfi_instance_add(struct ifnet *, int, int); +static void pfi_address_add(struct sockaddr *, int, int); +static int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); +static int pfi_skip_if(const char *, struct pfi_kif *); +static int pfi_unmask(void *); +static void pfi_attach_ifnet_event(void * __unused, struct ifnet *); +static void pfi_detach_ifnet_event(void * __unused, struct ifnet *); +static void pfi_attach_group_event(void *, struct ifg_group *); +static void pfi_change_group_event(void *, char *); +static void pfi_detach_group_event(void *, struct ifg_group *); +static void pfi_ifaddr_event(void * __unused, struct ifnet *); + +RB_HEAD(pfi_ifhead, pfi_kif); +static RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); +static RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); +static VNET_DEFINE(struct pfi_ifhead, pfi_ifs); +#define V_pfi_ifs VNET(pfi_ifs) + +#define PFI_BUFFER_MAX 0x10000 +MALLOC_DEFINE(PFI_MTYPE, "pf_ifnet", "pf(4) interface database"); + +LIST_HEAD(pfi_list, pfi_kif); +static VNET_DEFINE(struct pfi_list, pfi_unlinked_kifs); +#define V_pfi_unlinked_kifs VNET(pfi_unlinked_kifs) +static struct mtx pfi_unlnkdkifs_mtx; void pfi_initialize(void) { -#ifdef __FreeBSD__ - if (V_pfi_all != NULL) /* already initialized */ -#else - if (pfi_all != NULL) /* already initialized */ -#endif - return; + struct ifg_group *ifg; + struct ifnet *ifp; + struct pfi_kif *kif; -#ifndef __FreeBSD__ - pool_init(&V_pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, 0, 0, - "pfiaddrpl", &pool_allocator_nointr); -#endif -#ifdef __FreeBSD__ V_pfi_buffer_max = 64; V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer), PFI_MTYPE, M_WAITOK); - if ((V_pfi_all = pfi_kif_get(IFG_ALL)) == NULL) -#else - pfi_buffer_max = 64; - pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer), - PFI_MTYPE, M_WAITOK); + mtx_init(&pfi_unlnkdkifs_mtx, "pf unlinked interfaces", NULL, MTX_DEF); - if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL) -#endif - panic("pfi_kif_get for pfi_all failed"); -#ifdef __FreeBSD__ - struct ifg_group *ifg; - struct ifnet *ifp; + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); + PF_RULES_WLOCK(); + V_pfi_all = pfi_kif_attach(kif, IFG_ALL); + PF_RULES_WUNLOCK(); IFNET_RLOCK(); TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) @@ -183,144 +136,131 @@ pfi_initialize(void) pfi_detach_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event, pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); -#endif } -#ifdef __FreeBSD__ void pfi_cleanup(void) { struct pfi_kif *p; - PF_UNLOCK(); EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie); EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie); EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie); EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie); EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie); EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie); - PF_LOCK(); V_pfi_all = NULL; while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) { - if (p->pfik_rules || p->pfik_states) { - printf("pfi_cleanup: dangling refs for %s\n", - p->pfik_name); - } - RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p); free(p, PFI_MTYPE); } + while ((p = LIST_FIRST(&V_pfi_unlinked_kifs))) { + LIST_REMOVE(p, pfik_list); + free(p, PFI_MTYPE); + } + + mtx_destroy(&pfi_unlnkdkifs_mtx); + free(V_pfi_buffer, PFI_MTYPE); } -#endif struct pfi_kif * -pfi_kif_get(const char *kif_name) +pfi_kif_find(const char *kif_name) { - struct pfi_kif *kif; - struct pfi_kif_cmp s; + struct pfi_kif_cmp s; + + PF_RULES_ASSERT(); bzero(&s, sizeof(s)); strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name)); -#ifdef __FreeBSD__ - if ((kif = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s)) != NULL) -#else - if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL) -#endif - return (kif); - /* create new one */ -#ifdef __FreeBSD__ - if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL) -#else - if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT|M_ZERO)) == NULL) -#endif - return (NULL); + return (RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s)); +} +struct pfi_kif * +pfi_kif_attach(struct pfi_kif *kif, const char *kif_name) +{ + struct pfi_kif *kif1; + + PF_RULES_WASSERT(); + KASSERT(kif != NULL, ("%s: null kif", __func__)); + + kif1 = pfi_kif_find(kif_name); + if (kif1 != NULL) { + free(kif, PFI_MTYPE); + return (kif1); + } + + bzero(kif, sizeof(*kif)); strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name)); -#ifdef __FreeBSD__ /* * It seems that the value of time_second is in unintialzied state * when pf sets interface statistics clear time in boot phase if pf * was statically linked to kernel. Instead of setting the bogus * time value have pfi_get_ifaces handle this case. In - * pfi_get_ifaces it uses boottime.tv_sec if it sees the time is 0. + * pfi_get_ifaces it uses time_second if it sees the time is 0. */ kif->pfik_tzero = time_second > 1 ? time_second : 0; -#else - kif->pfik_tzero = time_second; -#endif TAILQ_INIT(&kif->pfik_dynaddrs); -#ifdef __FreeBSD__ RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif); -#else - RB_INSERT(pfi_ifhead, &pfi_ifs, kif); -#endif return (kif); } void -pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what) +pfi_kif_ref(struct pfi_kif *kif) { - switch (what) { - case PFI_KIF_REF_RULE: - kif->pfik_rules++; - break; - case PFI_KIF_REF_STATE: - kif->pfik_states++; - break; - default: - panic("pfi_kif_ref with unknown type"); - } + + PF_RULES_WASSERT(); + kif->pfik_rulerefs++; } void -pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what) +pfi_kif_unref(struct pfi_kif *kif) { - if (kif == NULL) - return; - switch (what) { - case PFI_KIF_REF_NONE: - break; - case PFI_KIF_REF_RULE: - if (kif->pfik_rules <= 0) { - printf("pfi_kif_unref: rules refcount <= 0\n"); - return; - } - kif->pfik_rules--; - break; - case PFI_KIF_REF_STATE: - if (kif->pfik_states <= 0) { - printf("pfi_kif_unref: state refcount <= 0\n"); - return; - } - kif->pfik_states--; - break; - default: - panic("pfi_kif_unref with unknown type"); - } + PF_RULES_WASSERT(); + KASSERT(kif->pfik_rulerefs > 0, ("%s: %p has zero refs", __func__, kif)); -#ifdef __FreeBSD__ - if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all) -#else - if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all) -#endif + kif->pfik_rulerefs--; + + if (kif->pfik_rulerefs > 0) return; - if (kif->pfik_rules || kif->pfik_states) + /* kif referencing an existing ifnet or group should exist. */ + if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all) return; -#ifdef __FreeBSD__ RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif); -#else - RB_REMOVE(pfi_ifhead, &pfi_ifs, kif); -#endif - free(kif, PFI_MTYPE); + + kif->pfik_flags |= PFI_IFLAG_REFS; + + mtx_lock(&pfi_unlnkdkifs_mtx); + LIST_INSERT_HEAD(&V_pfi_unlinked_kifs, kif, pfik_list); + mtx_unlock(&pfi_unlnkdkifs_mtx); +} + +void +pfi_kif_purge(void) +{ + struct pfi_kif *kif, *kif1; + + /* + * Do naive mark-and-sweep garbage collecting of old kifs. + * Reference flag is raised by pf_purge_expired_states(). + */ + mtx_lock(&pfi_unlnkdkifs_mtx); + LIST_FOREACH_SAFE(kif, &V_pfi_unlinked_kifs, pfik_list, kif1) { + if (!(kif->pfik_flags & PFI_IFLAG_REFS)) { + LIST_REMOVE(kif, pfik_list); + free(kif, PFI_MTYPE); + } else + kif->pfik_flags &= ~PFI_IFLAG_REFS; + } + mtx_unlock(&pfi_unlnkdkifs_mtx); } int @@ -332,6 +272,7 @@ pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) return (1); if (rule_kif->pfik_group != NULL) + /* XXXGL: locking? */ TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next) if (p->ifgl_group == rule_kif->pfik_group) return (1); @@ -339,125 +280,38 @@ pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) return (0); } -void +static void pfi_attach_ifnet(struct ifnet *ifp) { - struct pfi_kif *kif; - int s; + struct pfi_kif *kif; + + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); - pfi_initialize(); - s = splsoftnet(); -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); V_pfi_update++; -#else - pfi_update++; -#endif - if ((kif = pfi_kif_get(ifp->if_xname)) == NULL) - panic("pfi_kif_get failed"); + kif = pfi_kif_attach(kif, ifp->if_xname); kif->pfik_ifp = ifp; - ifp->if_pf_kif = (caddr_t)kif; - -#ifndef __FreeBSD__ - if ((kif->pfik_ah_cookie = hook_establish(ifp->if_addrhooks, 1, - pfi_kifaddr_update, kif)) == NULL) - panic("pfi_attach_ifnet: cannot allocate '%s' address hook", - ifp->if_xname); -#endif + ifp->if_pf_kif = kif; pfi_kif_update(kif); - - splx(s); + PF_RULES_WUNLOCK(); } -void -pfi_detach_ifnet(struct ifnet *ifp) -{ - int s; - struct pfi_kif *kif; - - if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL) - return; - - s = splsoftnet(); -#ifdef __FreeBSD__ - V_pfi_update++; -#else - pfi_update++; -#endif -#ifndef __FreeBSD__ - hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie); -#endif - pfi_kif_update(kif); - - kif->pfik_ifp = NULL; - ifp->if_pf_kif = NULL; - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - splx(s); -} - -void +static void pfi_attach_ifgroup(struct ifg_group *ifg) { - struct pfi_kif *kif; - int s; + struct pfi_kif *kif; - pfi_initialize(); - s = splsoftnet(); -#ifdef __FreeBSD__ - V_pfi_update++; -#else - pfi_update++; -#endif - if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL) - panic("pfi_kif_get failed"); - - kif->pfik_group = ifg; - ifg->ifg_pf_kif = (caddr_t)kif; - - splx(s); -} - -void -pfi_detach_ifgroup(struct ifg_group *ifg) -{ - int s; - struct pfi_kif *kif; - - if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL) - return; - - s = splsoftnet(); -#ifdef __FreeBSD__ - V_pfi_update++; -#else - pfi_update++; -#endif - - kif->pfik_group = NULL; - ifg->ifg_pf_kif = NULL; - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - splx(s); -} - -void -pfi_group_change(const char *group) -{ - struct pfi_kif *kif; - int s; + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); - s = splsoftnet(); -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); V_pfi_update++; -#else - pfi_update++; -#endif - if ((kif = pfi_kif_get(group)) == NULL) - panic("pfi_kif_get failed"); - - pfi_kif_update(kif); + kif = pfi_kif_attach(kif, ifg->ifg_group); - splx(s); + kif->pfik_group = ifg; + ifg->ifg_pf_kif = kif; + PF_RULES_WUNLOCK(); } int @@ -501,28 +355,27 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) struct pfi_dynaddr *dyn; char tblname[PF_TABLE_NAME_SIZE]; struct pf_ruleset *ruleset = NULL; - int s, rv = 0; + struct pfi_kif *kif; + int rv = 0; - if (aw->type != PF_ADDR_DYNIFTL) - return (0); -#ifdef __FreeBSD__ - if ((dyn = pool_get(&V_pfi_addr_pl, PR_NOWAIT | PR_ZERO)) -#else - if ((dyn = pool_get(&pfi_addr_pl, PR_WAITOK | PR_LIMITFAIL | PR_ZERO)) -#endif - == NULL) - return (1); + PF_RULES_WASSERT(); + KASSERT(aw->type == PF_ADDR_DYNIFTL, ("%s: type %u", + __func__, aw->type)); + KASSERT(aw->p.dyn == NULL, ("%s: dyn is %p", __func__, aw->p.dyn)); + + if ((dyn = malloc(sizeof(*dyn), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL) + return (ENOMEM); + + if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) { + free(dyn, PFI_MTYPE); + return (ENOMEM); + } - s = splsoftnet(); if (!strcmp(aw->v.ifname, "self")) - dyn->pfid_kif = pfi_kif_get(IFG_ALL); + dyn->pfid_kif = pfi_kif_attach(kif, IFG_ALL); else - dyn->pfid_kif = pfi_kif_get(aw->v.ifname); - if (dyn->pfid_kif == NULL) { - rv = 1; - goto _bad; - } - pfi_kif_ref(dyn->pfid_kif, PFI_KIF_REF_RULE); + dyn->pfid_kif = pfi_kif_attach(kif, aw->v.ifname); + pfi_kif_ref(dyn->pfid_kif); dyn->pfid_net = pfi_unmask(&aw->v.a.mask); if (af == AF_INET && dyn->pfid_net == 32) @@ -540,12 +393,12 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) snprintf(tblname + strlen(tblname), sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net); if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) { - rv = 1; + rv = ENOMEM; goto _bad; } - if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname, 1)) == NULL) { - rv = 1; + if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) { + rv = ENOMEM; goto _bad; } @@ -556,7 +409,7 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); aw->p.dyn = dyn; pfi_kif_update(dyn->pfid_kif); - splx(s); + return (0); _bad: @@ -565,108 +418,92 @@ _bad: if (ruleset != NULL) pf_remove_if_empty_ruleset(ruleset); if (dyn->pfid_kif != NULL) - pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); -#ifdef __FreeBSD__ - pool_put(&V_pfi_addr_pl, dyn); -#else - pool_put(&pfi_addr_pl, dyn); -#endif - splx(s); + pfi_kif_unref(dyn->pfid_kif); + free(dyn, PFI_MTYPE); + return (rv); } -void +static void pfi_kif_update(struct pfi_kif *kif) { struct ifg_list *ifgl; struct pfi_dynaddr *p; + PF_RULES_WASSERT(); + /* update all dynaddr */ TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry) pfi_dynaddr_update(p); /* again for all groups kif is member of */ - if (kif->pfik_ifp != NULL) + if (kif->pfik_ifp != NULL) { + IF_ADDR_RLOCK(kif->pfik_ifp); TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) pfi_kif_update((struct pfi_kif *) ifgl->ifgl_group->ifg_pf_kif); + IF_ADDR_RUNLOCK(kif->pfik_ifp); + } } -void +static void pfi_dynaddr_update(struct pfi_dynaddr *dyn) { struct pfi_kif *kif; struct pfr_ktable *kt; - if (dyn == NULL || dyn->pfid_kif == NULL || dyn->pfid_kt == NULL) - panic("pfi_dynaddr_update"); + PF_RULES_WASSERT(); + KASSERT(dyn && dyn->pfid_kif && dyn->pfid_kt, + ("%s: bad argument", __func__)); kif = dyn->pfid_kif; kt = dyn->pfid_kt; -#ifdef __FreeBSD__ if (kt->pfrkt_larg != V_pfi_update) { -#else - if (kt->pfrkt_larg != pfi_update) { -#endif /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); -#ifdef __FreeBSD__ kt->pfrkt_larg = V_pfi_update; -#else - kt->pfrkt_larg = pfi_update; -#endif } pfr_dynaddr_update(kt, dyn); } -void +static void pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) { int e, size2 = 0; struct ifg_member *ifgm; -#ifdef __FreeBSD__ V_pfi_buffer_cnt = 0; -#else - pfi_buffer_cnt = 0; -#endif if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); - else if (kif->pfik_group != NULL) + else if (kif->pfik_group != NULL) { + IFNET_RLOCK(); TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) pfi_instance_add(ifgm->ifgm_ifp, net, flags); + IFNET_RUNLOCK(); + } -#ifdef __FreeBSD__ if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2, NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) - printf("pfi_table_update: cannot set %d new addresses " - "into table %s: %d\n", V_pfi_buffer_cnt, kt->pfrkt_name, e); -#else - if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2, - NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) - printf("pfi_table_update: cannot set %d new addresses " - "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e); -#endif + printf("%s: cannot set %d new addresses into table %s: %d\n", + __func__, V_pfi_buffer_cnt, kt->pfrkt_name, e); } -void +static void pfi_instance_add(struct ifnet *ifp, int net, int flags) { struct ifaddr *ia; int got4 = 0, got6 = 0; int net2, af; - if (ifp == NULL) - return; - TAILQ_FOREACH(ia, &ifp->if_addrlist, ifa_list) { + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_list) { if (ia->ifa_addr == NULL) continue; af = ia->ifa_addr->sa_family; if (af != AF_INET && af != AF_INET6) continue; -#ifdef __FreeBSD__ /* * XXX: For point-to-point interfaces, (ifname:0) and IPv4, * jump over addresses without a proper route to work @@ -677,7 +514,6 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) !(ia->ifa_flags & IFA_ROUTE) && (flags & PFI_AFLAG_NOALIAS) && (af == AF_INET)) continue; -#endif if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6) continue; if ((flags & PFI_AFLAG_BROADCAST) && @@ -718,67 +554,39 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) else pfi_address_add(ia->ifa_addr, af, net2); } + IF_ADDR_RUNLOCK(ifp); } -void +static void pfi_address_add(struct sockaddr *sa, int af, int net) { struct pfr_addr *p; int i; -#ifdef __FreeBSD__ if (V_pfi_buffer_cnt >= V_pfi_buffer_max) { int new_max = V_pfi_buffer_max * 2; -#else - if (pfi_buffer_cnt >= pfi_buffer_max) { - int new_max = pfi_buffer_max * 2; -#endif if (new_max > PFI_BUFFER_MAX) { - printf("pfi_address_add: address buffer full (%d/%d)\n", -#ifdef __FreeBSD__ + printf("%s: address buffer full (%d/%d)\n", __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX); -#else - pfi_buffer_cnt, PFI_BUFFER_MAX); -#endif return; } p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE, -#ifdef __FreeBSD__ M_NOWAIT); -#else - M_DONTWAIT); -#endif if (p == NULL) { - printf("pfi_address_add: no memory to grow buffer " -#ifdef __FreeBSD__ - "(%d/%d)\n", V_pfi_buffer_cnt, PFI_BUFFER_MAX); -#else - "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX); -#endif + printf("%s: no memory to grow buffer (%d/%d)\n", + __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX); return; } -#ifdef __FreeBSD__ memcpy(V_pfi_buffer, p, V_pfi_buffer_cnt * sizeof(*V_pfi_buffer)); /* no need to zero buffer */ free(V_pfi_buffer, PFI_MTYPE); V_pfi_buffer = p; V_pfi_buffer_max = new_max; -#else - memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof(*pfi_buffer)); - /* no need to zero buffer */ - free(pfi_buffer, PFI_MTYPE); - pfi_buffer = p; - pfi_buffer_max = new_max; -#endif } if (af == AF_INET && net > 32) net = 128; -#ifdef __FreeBSD__ p = V_pfi_buffer + V_pfi_buffer_cnt++; -#else - p = pfi_buffer + pfi_buffer_cnt++; -#endif bzero(p, sizeof(*p)); p->pfra_af = af; p->pfra_net = net; @@ -797,55 +605,31 @@ pfi_address_add(struct sockaddr *sa, int af, int net) } void -pfi_dynaddr_remove(struct pf_addr_wrap *aw) +pfi_dynaddr_remove(struct pfi_dynaddr *dyn) { - int s; - if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || - aw->p.dyn->pfid_kif == NULL || aw->p.dyn->pfid_kt == NULL) - return; + KASSERT(dyn->pfid_kif != NULL, ("%s: null pfid_kif", __func__)); + KASSERT(dyn->pfid_kt != NULL, ("%s: null pfid_kt", __func__)); - s = splsoftnet(); - TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry); - pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE); - aw->p.dyn->pfid_kif = NULL; - pfr_detach_table(aw->p.dyn->pfid_kt); - aw->p.dyn->pfid_kt = NULL; -#ifdef __FreeBSD__ - pool_put(&V_pfi_addr_pl, aw->p.dyn); -#else - pool_put(&pfi_addr_pl, aw->p.dyn); -#endif - aw->p.dyn = NULL; - splx(s); + TAILQ_REMOVE(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); + pfi_kif_unref(dyn->pfid_kif); + pfr_detach_table(dyn->pfid_kt); + free(dyn, PFI_MTYPE); } void pfi_dynaddr_copyout(struct pf_addr_wrap *aw) { - if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || - aw->p.dyn->pfid_kif == NULL) - return; - aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6; -} -void -pfi_kifaddr_update(void *v) -{ - int s; - struct pfi_kif *kif = (struct pfi_kif *)v; + KASSERT(aw->type == PF_ADDR_DYNIFTL, + ("%s: type %u", __func__, aw->type)); - s = splsoftnet(); -#ifdef __FreeBSD__ - V_pfi_update++; -#else - pfi_update++; -#endif - pfi_kif_update(kif); - splx(s); + if (aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL) + return; + aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6; } -int +static int pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) { return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ)); @@ -858,19 +642,13 @@ pfi_update_status(const char *name, struct pf_status *pfs) struct pfi_kif_cmp key; struct ifg_member p_member, *ifgm; TAILQ_HEAD(, ifg_member) ifg_members; - int i, j, k, s; + int i, j, k; strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); - s = splsoftnet(); -#ifdef __FreeBSD__ p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key); -#else - p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); -#endif - if (p == NULL) { - splx(s); + if (p == NULL) return; - } + if (p->pfik_group != NULL) { bcopy(&p->pfik_group->ifg_members, &ifg_members, sizeof(ifg_members)); @@ -906,56 +684,29 @@ pfi_update_status(const char *name, struct pf_status *pfs) p->pfik_bytes[i][j][k]; } } - splx(s); } -int +void pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) { struct pfi_kif *p, *nextp; - int s, n = 0; -#ifdef __FreeBSD__ - int error; -#endif + int n = 0; - s = splsoftnet(); -#ifdef __FreeBSD__ for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) { nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); -#else - for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) { - nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); -#endif if (pfi_skip_if(name, p)) continue; - if (*size > n++) { - if (!p->pfik_tzero) - p->pfik_tzero = time_second; - pfi_kif_ref(p, PFI_KIF_REF_RULE); -#ifdef __FreeBSD__ - PF_COPYOUT(p, buf++, sizeof(*buf), error); - if (error) { -#else - if (copyout(p, buf++, sizeof(*buf))) { -#endif - pfi_kif_unref(p, PFI_KIF_REF_RULE); - splx(s); - return (EFAULT); - } -#ifdef __FreeBSD__ - nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); -#else - nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); -#endif - pfi_kif_unref(p, PFI_KIF_REF_RULE); - } + if (*size <= n++) + break; + if (!p->pfik_tzero) + p->pfik_tzero = time_second; + bcopy(p, buf++, sizeof(*buf)); + nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); } - splx(s); *size = n; - return (0); } -int +static int pfi_skip_if(const char *filter, struct pfi_kif *p) { int n; @@ -978,19 +729,12 @@ int pfi_set_flags(const char *name, int flags) { struct pfi_kif *p; - int s; - s = splsoftnet(); -#ifdef __FreeBSD__ RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) { -#else - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { -#endif if (pfi_skip_if(name, p)) continue; p->pfik_flags |= flags; } - splx(s); return (0); } @@ -998,24 +742,17 @@ int pfi_clear_flags(const char *name, int flags) { struct pfi_kif *p; - int s; - s = splsoftnet(); -#ifdef __FreeBSD__ RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) { -#else - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { -#endif if (pfi_skip_if(name, p)) continue; p->pfik_flags &= ~flags; } - splx(s); return (0); } /* from pf_print_state.c */ -int +static int pfi_unmask(void *addr) { struct pf_addr *m = addr; @@ -1034,77 +771,89 @@ pfi_unmask(void *addr) return (b); } -#ifdef __FreeBSD__ -void +static void pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp) { CURVNET_SET(ifp->if_vnet); - PF_LOCK(); pfi_attach_ifnet(ifp); #ifdef ALTQ + PF_RULES_WLOCK(); pf_altq_ifnet_event(ifp, 0); + PF_RULES_WUNLOCK(); #endif - PF_UNLOCK(); CURVNET_RESTORE(); } -void +static void pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp) { + struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif; CURVNET_SET(ifp->if_vnet); - PF_LOCK(); - pfi_detach_ifnet(ifp); + PF_RULES_WLOCK(); + V_pfi_update++; + pfi_kif_update(kif); + + kif->pfik_ifp = NULL; + ifp->if_pf_kif = NULL; #ifdef ALTQ pf_altq_ifnet_event(ifp, 1); #endif - PF_UNLOCK(); + PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } -void +static void pfi_attach_group_event(void *arg , struct ifg_group *ifg) { CURVNET_SET((struct vnet *)arg); - PF_LOCK(); pfi_attach_ifgroup(ifg); - PF_UNLOCK(); CURVNET_RESTORE(); } -void +static void pfi_change_group_event(void *arg, char *gname) { + struct pfi_kif *kif; + + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); CURVNET_SET((struct vnet *)arg); - PF_LOCK(); - pfi_group_change(gname); - PF_UNLOCK(); + PF_RULES_WLOCK(); + V_pfi_update++; + kif = pfi_kif_attach(kif, gname); + pfi_kif_update(kif); + PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } -void +static void pfi_detach_group_event(void *arg, struct ifg_group *ifg) { + struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif; CURVNET_SET((struct vnet *)arg); - PF_LOCK(); - pfi_detach_ifgroup(ifg); - PF_UNLOCK(); + PF_RULES_WLOCK(); + V_pfi_update++; + + kif->pfik_group = NULL; + ifg->ifg_pf_kif = NULL; + PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } -void +static void pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp) { CURVNET_SET(ifp->if_vnet); - PF_LOCK(); - if (ifp && ifp->if_pf_kif) - pfi_kifaddr_update(ifp->if_pf_kif); - PF_UNLOCK(); + PF_RULES_WLOCK(); + if (ifp && ifp->if_pf_kif) { + V_pfi_update++; + pfi_kif_update(ifp->if_pf_kif); + } + PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } -#endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index 6b5d8f5..032f051 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -35,7 +35,6 @@ * */ -#ifdef __FreeBSD__ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -44,187 +43,116 @@ __FBSDID("$FreeBSD$"); #include "opt_bpf.h" #include "opt_pf.h" -#define NPFSYNC 1 - -#ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG -#else -#define NPFLOG 0 -#endif - -#else /* !__FreeBSD__ */ -#include "pfsync.h" -#include "pflog.h" -#endif /* __FreeBSD__ */ - #include <sys/param.h> -#include <sys/systm.h> -#include <sys/mbuf.h> -#include <sys/filio.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/endian.h> #include <sys/fcntl.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/kernel.h> -#include <sys/time.h> -#ifdef __FreeBSD__ -#include <sys/ucred.h> +#include <sys/filio.h> +#include <sys/interrupt.h> #include <sys/jail.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/mbuf.h> #include <sys/module.h> -#include <sys/conf.h> #include <sys/proc.h> +#include <sys/smp.h> +#include <sys/socket.h> #include <sys/sysctl.h> -#else -#include <sys/timeout.h> -#include <sys/pool.h> -#endif -#include <sys/proc.h> -#include <sys/malloc.h> -#include <sys/kthread.h> -#ifndef __FreeBSD__ -#include <sys/rwlock.h> -#include <uvm/uvm_extern.h> -#endif +#include <sys/md5.h> +#include <sys/ucred.h> #include <net/if.h> -#include <net/if_types.h> -#ifdef __FreeBSD__ -#include <net/vnet.h> -#endif #include <net/route.h> +#include <net/pfil.h> +#include <net/pfvar.h> +#include <net/if_pfsync.h> +#include <net/if_pflog.h> #include <netinet/in.h> -#include <netinet/in_var.h> -#include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/ip_var.h> #include <netinet/ip_icmp.h> -#ifdef __FreeBSD__ -#include <sys/md5.h> -#else -#include <dev/rndvar.h> -#include <crypto/md5.h> -#endif -#include <net/pfvar.h> - -#include <net/if_pfsync.h> - -#if NPFLOG > 0 -#include <net/if_pflog.h> -#endif /* NPFLOG > 0 */ - #ifdef INET6 #include <netinet/ip6.h> -#include <netinet/in_pcb.h> #endif /* INET6 */ #ifdef ALTQ #include <altq/altq.h> #endif -#ifdef __FreeBSD__ -#include <sys/limits.h> -#include <sys/lock.h> -#include <sys/mutex.h> -#include <net/pfil.h> -#endif /* __FreeBSD__ */ - -#ifdef __FreeBSD__ -void init_zone_var(void); -void cleanup_pf_zone(void); -int pfattach(void); -#else -void pfattach(int); -void pf_thread_create(void *); -int pfopen(dev_t, int, int, struct proc *); -int pfclose(dev_t, int, int, struct proc *); -#endif -struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, +static int pfattach(void); +static struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); -void pf_mv_pool(struct pf_palist *, struct pf_palist *); -void pf_empty_pool(struct pf_palist *); -#ifdef __FreeBSD__ -int pfioctl(struct cdev *, u_long, caddr_t, int, struct thread *); -#else -int pfioctl(dev_t, u_long, caddr_t, int, struct proc *); -#endif +static void pf_mv_pool(struct pf_palist *, struct pf_palist *); +static void pf_empty_pool(struct pf_palist *); +static int pfioctl(struct cdev *, u_long, caddr_t, int, + struct thread *); #ifdef ALTQ -int pf_begin_altq(u_int32_t *); -int pf_rollback_altq(u_int32_t); -int pf_commit_altq(u_int32_t); -int pf_enable_altq(struct pf_altq *); -int pf_disable_altq(struct pf_altq *); +static int pf_begin_altq(u_int32_t *); +static int pf_rollback_altq(u_int32_t); +static int pf_commit_altq(u_int32_t); +static int pf_enable_altq(struct pf_altq *); +static int pf_disable_altq(struct pf_altq *); +static u_int32_t pf_qname2qid(char *); +static void pf_qid_unref(u_int32_t); #endif /* ALTQ */ -int pf_begin_rules(u_int32_t *, int, const char *); -int pf_rollback_rules(u_int32_t, int, char *); -int pf_setup_pfsync_matching(struct pf_ruleset *); -void pf_hash_rule(MD5_CTX *, struct pf_rule *); -void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); -int pf_commit_rules(u_int32_t, int, char *); -int pf_addr_setup(struct pf_ruleset *, +static int pf_begin_rules(u_int32_t *, int, const char *); +static int pf_rollback_rules(u_int32_t, int, char *); +static int pf_setup_pfsync_matching(struct pf_ruleset *); +static void pf_hash_rule(MD5_CTX *, struct pf_rule *); +static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); +static int pf_commit_rules(u_int32_t, int, char *); +static int pf_addr_setup(struct pf_ruleset *, struct pf_addr_wrap *, sa_family_t); -void pf_addr_copyout(struct pf_addr_wrap *); - -#define TAGID_MAX 50000 +static void pf_addr_copyout(struct pf_addr_wrap *); -#ifdef __FreeBSD__ -VNET_DEFINE(struct pf_rule, pf_default_rule); -VNET_DEFINE(struct sx, pf_consistency_lock); +VNET_DEFINE(struct pf_rule, pf_default_rule); #ifdef ALTQ static VNET_DEFINE(int, pf_altq_running); #define V_pf_altq_running VNET(pf_altq_running) #endif -TAILQ_HEAD(pf_tags, pf_tagname); +#define TAGID_MAX 50000 +struct pf_tagname { + TAILQ_ENTRY(pf_tagname) entries; + char name[PF_TAG_NAME_SIZE]; + uint16_t tag; + int ref; +}; +TAILQ_HEAD(pf_tags, pf_tagname); #define V_pf_tags VNET(pf_tags) VNET_DEFINE(struct pf_tags, pf_tags); #define V_pf_qids VNET(pf_qids) VNET_DEFINE(struct pf_tags, pf_qids); - -#else /* !__FreeBSD__ */ -struct pf_rule pf_default_rule; -struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER("pfcnslk"); -#ifdef ALTQ -static int pf_altq_running; -#endif - -TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), - pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); -#endif /* __FreeBSD__ */ +static MALLOC_DEFINE(M_PFTAG, "pf_tag", "pf(4) tag names"); +static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db"); +static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif -u_int16_t tagname2tag(struct pf_tags *, char *); -void tag2tagname(struct pf_tags *, u_int16_t, char *); -void tag_unref(struct pf_tags *, u_int16_t); -int pf_rtlabel_add(struct pf_addr_wrap *); -void pf_rtlabel_remove(struct pf_addr_wrap *); -void pf_rtlabel_copyout(struct pf_addr_wrap *); +static u_int16_t tagname2tag(struct pf_tags *, char *); +static u_int16_t pf_tagname2tag(char *); +static void tag_unref(struct pf_tags *, u_int16_t); -#ifdef __FreeBSD__ #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x -#else -#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x -#endif -#ifdef __FreeBSD__ struct cdev *pf_dev; - + /* * XXX - These are new and need to be checked when moveing to a new version */ static void pf_clear_states(void); static int pf_clear_tables(void); -static void pf_clear_srcnodes(void); -/* - * XXX - These are new and need to be checked when moveing to a new version - */ - +static void pf_clear_srcnodes(struct pf_src_node *); +static void pf_tbladdr_copyout(struct pf_addr_wrap *); + /* * Wrapper functions for pfil(9) hooks */ @@ -240,7 +168,7 @@ static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct inpcb *inp); #endif - + static int hook_pf(void); static int dehook_pf(void); static int shutdown_pf(void); @@ -256,7 +184,8 @@ static struct cdevsw pf_cdevsw = { static volatile VNET_DEFINE(int, pf_pfil_hooked); #define V_pf_pfil_hooked VNET(pf_pfil_hooked) VNET_DEFINE(int, pf_end_threads); -struct mtx pf_task_mtx; + +struct rwlock pf_rules_lock; /* pfsync */ pfsync_state_import_t *pfsync_state_import_ptr = NULL; @@ -264,126 +193,27 @@ pfsync_insert_state_t *pfsync_insert_state_ptr = NULL; pfsync_update_state_t *pfsync_update_state_ptr = NULL; pfsync_delete_state_t *pfsync_delete_state_ptr = NULL; pfsync_clear_states_t *pfsync_clear_states_ptr = NULL; -pfsync_state_in_use_t *pfsync_state_in_use_ptr = NULL; pfsync_defer_t *pfsync_defer_ptr = NULL; -pfsync_up_t *pfsync_up_ptr = NULL; -/* pflow */ -export_pflow_t *export_pflow_ptr = NULL; /* pflog */ pflog_packet_t *pflog_packet_ptr = NULL; -VNET_DEFINE(int, debug_pfugidhack); -SYSCTL_VNET_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW, - &VNET_NAME(debug_pfugidhack), 0, - "Enable/disable pf user/group rules mpsafe hack"); - -static void -init_pf_mutex(void) -{ - - mtx_init(&pf_task_mtx, "pf task mtx", NULL, MTX_DEF); -} - -static void -destroy_pf_mutex(void) -{ - - mtx_destroy(&pf_task_mtx); -} -void -init_zone_var(void) -{ - V_pf_src_tree_pl = V_pf_rule_pl = NULL; - V_pf_state_pl = V_pf_state_key_pl = V_pf_state_item_pl = NULL; - V_pf_altq_pl = V_pf_pooladdr_pl = NULL; - V_pf_frent_pl = V_pf_frag_pl = V_pf_cache_pl = V_pf_cent_pl = NULL; - V_pf_state_scrub_pl = NULL; - V_pfr_ktable_pl = V_pfr_kentry_pl = V_pfr_kcounters_pl = NULL; -} - -void -cleanup_pf_zone(void) -{ - UMA_DESTROY(V_pf_src_tree_pl); - UMA_DESTROY(V_pf_rule_pl); - UMA_DESTROY(V_pf_state_pl); - UMA_DESTROY(V_pf_state_key_pl); - UMA_DESTROY(V_pf_state_item_pl); - UMA_DESTROY(V_pf_altq_pl); - UMA_DESTROY(V_pf_pooladdr_pl); - UMA_DESTROY(V_pf_frent_pl); - UMA_DESTROY(V_pf_frag_pl); - UMA_DESTROY(V_pf_cache_pl); - UMA_DESTROY(V_pf_cent_pl); - UMA_DESTROY(V_pfr_ktable_pl); - UMA_DESTROY(V_pfr_kentry_pl); - UMA_DESTROY(V_pfr_kcounters_pl); - UMA_DESTROY(V_pf_state_scrub_pl); - UMA_DESTROY(V_pfi_addr_pl); -} - -int +static int pfattach(void) { u_int32_t *my_timeout = V_pf_default_rule.timeout; - int error = 1; + int error; - do { - UMA_CREATE(V_pf_src_tree_pl, struct pf_src_node, "pfsrctrpl"); - UMA_CREATE(V_pf_rule_pl, struct pf_rule, "pfrulepl"); - UMA_CREATE(V_pf_state_pl, struct pf_state, "pfstatepl"); - UMA_CREATE(V_pf_state_key_pl, struct pf_state, "pfstatekeypl"); - UMA_CREATE(V_pf_state_item_pl, struct pf_state, "pfstateitempl"); - UMA_CREATE(V_pf_altq_pl, struct pf_altq, "pfaltqpl"); - UMA_CREATE(V_pf_pooladdr_pl, struct pf_pooladdr, "pfpooladdrpl"); - UMA_CREATE(V_pfr_ktable_pl, struct pfr_ktable, "pfrktable"); - UMA_CREATE(V_pfr_kentry_pl, struct pfr_kentry, "pfrkentry"); - UMA_CREATE(V_pfr_kcounters_pl, struct pfr_kcounters, "pfrkcounters"); - UMA_CREATE(V_pf_frent_pl, struct pf_frent, "pffrent"); - UMA_CREATE(V_pf_frag_pl, struct pf_fragment, "pffrag"); - UMA_CREATE(V_pf_cache_pl, struct pf_fragment, "pffrcache"); - UMA_CREATE(V_pf_cent_pl, struct pf_frcache, "pffrcent"); - UMA_CREATE(V_pf_state_scrub_pl, struct pf_state_scrub, - "pfstatescrub"); - UMA_CREATE(V_pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl"); - error = 0; - } while(0); - if (error) { - cleanup_pf_zone(); - return (error); - } + pf_initialize(); pfr_initialize(); pfi_initialize(); - if ( (error = pf_osfp_initialize()) ) { - cleanup_pf_zone(); - pf_osfp_cleanup(); - return (error); - } + pf_normalize_init(); + + V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; + V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; - V_pf_pool_limits[PF_LIMIT_STATES].pp = V_pf_state_pl; - V_pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; - V_pf_pool_limits[PF_LIMIT_SRC_NODES].pp = V_pf_src_tree_pl; - V_pf_pool_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; - V_pf_pool_limits[PF_LIMIT_FRAGS].pp = V_pf_frent_pl; - V_pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; - V_pf_pool_limits[PF_LIMIT_TABLES].pp = V_pfr_ktable_pl; - V_pf_pool_limits[PF_LIMIT_TABLES].limit = PFR_KTABLE_HIWAT; - V_pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].pp = V_pfr_kentry_pl; - V_pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT; - uma_zone_set_max(V_pf_pool_limits[PF_LIMIT_STATES].pp, - V_pf_pool_limits[PF_LIMIT_STATES].limit); - - RB_INIT(&V_tree_src_tracking); RB_INIT(&V_pf_anchors); pf_init_ruleset(&pf_main_ruleset); - TAILQ_INIT(&V_pf_altqs[0]); - TAILQ_INIT(&V_pf_altqs[1]); - TAILQ_INIT(&V_pf_pabuf); - V_pf_altqs_active = &V_pf_altqs[0]; - V_pf_altqs_inactive = &V_pf_altqs[1]; - TAILQ_INIT(&V_state_list); - /* default rule should never be garbage collected */ V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next; V_pf_default_rule.action = PF_PASS; @@ -412,8 +242,6 @@ pfattach(void) my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; - pf_normalize_init(); - bzero(&V_pf_status, sizeof(V_pf_status)); V_pf_status.debug = PF_DEBUG_URGENT; @@ -422,119 +250,19 @@ pfattach(void) /* XXX do our best to avoid a conflict */ V_pf_status.hostid = arc4random(); - if (kproc_create(pf_purge_thread, curvnet, NULL, 0, 0, "pfpurge")) - return (ENXIO); - - m_addr_chg_pf_p = pf_pkt_addr_changed; - - return (error); -} -#else /* !__FreeBSD__ */ - -void -pfattach(int num) -{ - u_int32_t *timeout = pf_default_rule.timeout; - - pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0, 0, 0, "pfrulepl", - &pool_allocator_nointr); - pool_init(&pf_src_tree_pl, sizeof(struct pf_src_node), 0, 0, 0, - "pfsrctrpl", NULL); - pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl", - NULL); - pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0, 0, 0, - "pfstatekeypl", NULL); - pool_init(&pf_state_item_pl, sizeof(struct pf_state_item), 0, 0, 0, - "pfstateitempl", NULL); - pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl", - &pool_allocator_nointr); - pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0, - "pfpooladdrpl", &pool_allocator_nointr); - pfr_initialize(); - pfi_initialize(); - pf_osfp_initialize(); - - pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp, - pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0); - - if (physmem <= atop(100*1024*1024)) - pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = - PFR_KENTRY_HIWAT_SMALL; - - RB_INIT(&tree_src_tracking); - RB_INIT(&pf_anchors); - pf_init_ruleset(&pf_main_ruleset); - TAILQ_INIT(&pf_altqs[0]); - TAILQ_INIT(&pf_altqs[1]); - TAILQ_INIT(&pf_pabuf); - pf_altqs_active = &pf_altqs[0]; - pf_altqs_inactive = &pf_altqs[1]; - TAILQ_INIT(&state_list); - - /* default rule should never be garbage collected */ - pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; - pf_default_rule.action = PF_PASS; - pf_default_rule.nr = -1; - pf_default_rule.rtableid = -1; - - /* initialize default timeouts */ - timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; - timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; - timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL; - timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL; - timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL; - timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL; - timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL; - timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL; - timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL; - timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL; - timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL; - timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL; - timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL; - timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL; - timeout[PFTM_FRAG] = PFTM_FRAG_VAL; - timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; - timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; - timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; - timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; - timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; - - pf_normalize_init(); - bzero(&pf_status, sizeof(pf_status)); - pf_status.debug = PF_DEBUG_URGENT; - - /* XXX do our best to avoid a conflict */ - pf_status.hostid = arc4random(); - - /* require process context to purge states, so perform in a thread */ - kthread_create_deferred(pf_thread_create, NULL); -} - -void -pf_thread_create(void *v) -{ - if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge")) - panic("pfpurge thread"); -} - -int -pfopen(dev_t dev, int flags, int fmt, struct proc *p) -{ - if (minor(dev) >= 1) - return (ENXIO); - return (0); -} + if ((error = kproc_create(pf_purge_thread, curvnet, NULL, 0, 0, + "pf purge")) != 0) + /* XXXGL: leaked all above. */ + return (error); + if ((error = swi_add(NULL, "pf send", pf_intr, curvnet, SWI_NET, + INTR_MPSAFE, &V_pf_swi_cookie)) != 0) + /* XXXGL: leaked all above. */ + return (error); -int -pfclose(dev_t dev, int flags, int fmt, struct proc *p) -{ - if (minor(dev) >= 1) - return (ENXIO); return (0); } -#endif -struct pf_pool * +static struct pf_pool * pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, u_int32_t rule_number, u_int8_t r_last, u_int8_t active, u_int8_t check_ticket) @@ -578,7 +306,7 @@ pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, return (&rule->rpool); } -void +static void pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) { struct pf_pooladdr *mv_pool_pa; @@ -589,80 +317,89 @@ pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) } } -void +static void pf_empty_pool(struct pf_palist *poola) { - struct pf_pooladdr *empty_pool_pa; - - while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) { - pfi_dynaddr_remove(&empty_pool_pa->addr); - pf_tbladdr_remove(&empty_pool_pa->addr); - pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE); - TAILQ_REMOVE(poola, empty_pool_pa, entries); -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, empty_pool_pa); -#else - pool_put(&pf_pooladdr_pl, empty_pool_pa); -#endif + struct pf_pooladdr *pa; + + while ((pa = TAILQ_FIRST(poola)) != NULL) { + switch (pa->addr.type) { + case PF_ADDR_DYNIFTL: + pfi_dynaddr_remove(pa->addr.p.dyn); + break; + case PF_ADDR_TABLE: + pfr_detach_table(pa->addr.p.tbl); + break; + } + if (pa->kif) + pfi_kif_unref(pa->kif); + TAILQ_REMOVE(poola, pa, entries); + free(pa, M_PFRULE); } } +static void +pf_unlink_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) +{ + + PF_RULES_WASSERT(); + + TAILQ_REMOVE(rulequeue, rule, entries); + + PF_UNLNKDRULES_LOCK(); + rule->rule_flag |= PFRULE_REFS; + TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries); + PF_UNLNKDRULES_UNLOCK(); +} + void -pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) +pf_free_rule(struct pf_rule *rule) { - if (rulequeue != NULL) { - if (rule->states_cur <= 0) { - /* - * XXX - we need to remove the table *before* detaching - * the rule to make sure the table code does not delete - * the anchor under our feet. - */ - pf_tbladdr_remove(&rule->src.addr); - pf_tbladdr_remove(&rule->dst.addr); - if (rule->overload_tbl) - pfr_detach_table(rule->overload_tbl); - } - TAILQ_REMOVE(rulequeue, rule, entries); - rule->entries.tqe_prev = NULL; - rule->nr = -1; - } - if (rule->states_cur > 0 || rule->src_nodes > 0 || - rule->entries.tqe_prev != NULL) - return; - pf_tag_unref(rule->tag); - pf_tag_unref(rule->match_tag); + PF_RULES_WASSERT(); + + if (rule->tag) + tag_unref(&V_pf_tags, rule->tag); + if (rule->match_tag) + tag_unref(&V_pf_tags, rule->match_tag); #ifdef ALTQ if (rule->pqid != rule->qid) pf_qid_unref(rule->pqid); pf_qid_unref(rule->qid); #endif - pf_rtlabel_remove(&rule->src.addr); - pf_rtlabel_remove(&rule->dst.addr); - pfi_dynaddr_remove(&rule->src.addr); - pfi_dynaddr_remove(&rule->dst.addr); - if (rulequeue == NULL) { - pf_tbladdr_remove(&rule->src.addr); - pf_tbladdr_remove(&rule->dst.addr); - if (rule->overload_tbl) - pfr_detach_table(rule->overload_tbl); + switch (rule->src.addr.type) { + case PF_ADDR_DYNIFTL: + pfi_dynaddr_remove(rule->src.addr.p.dyn); + break; + case PF_ADDR_TABLE: + pfr_detach_table(rule->src.addr.p.tbl); + break; + } + switch (rule->dst.addr.type) { + case PF_ADDR_DYNIFTL: + pfi_dynaddr_remove(rule->dst.addr.p.dyn); + break; + case PF_ADDR_TABLE: + pfr_detach_table(rule->dst.addr.p.tbl); + break; } - pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE); + if (rule->overload_tbl) + pfr_detach_table(rule->overload_tbl); + if (rule->kif) + pfi_kif_unref(rule->kif); pf_anchor_remove(rule); pf_empty_pool(&rule->rpool.list); -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, rule); -#else - pool_put(&pf_rule_pl, rule); -#endif + free(rule, M_PFRULE); } -u_int16_t +static u_int16_t tagname2tag(struct pf_tags *head, char *tagname) { struct pf_tagname *tag, *p = NULL; u_int16_t new_tagid = 1; + PF_RULES_WASSERT(); + TAILQ_FOREACH(tag, head, entries) if (strcmp(tagname, tag->name) == 0) { tag->ref++; @@ -685,7 +422,7 @@ tagname2tag(struct pf_tags *head, char *tagname) return (0); /* allocate and fill new struct pf_tagname */ - tag = malloc(sizeof(*tag), M_TEMP, M_NOWAIT|M_ZERO); + tag = malloc(sizeof(*tag), M_PFTAG, M_NOWAIT|M_ZERO); if (tag == NULL) return (0); strlcpy(tag->name, tagname, sizeof(tag->name)); @@ -700,207 +437,78 @@ tagname2tag(struct pf_tags *head, char *tagname) return (tag->tag); } -void -tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) -{ - struct pf_tagname *tag; - - TAILQ_FOREACH(tag, head, entries) - if (tag->tag == tagid) { - strlcpy(p, tag->name, PF_TAG_NAME_SIZE); - return; - } -} - -void +static void tag_unref(struct pf_tags *head, u_int16_t tag) { struct pf_tagname *p, *next; - if (tag == 0) - return; + PF_RULES_WASSERT(); for (p = TAILQ_FIRST(head); p != NULL; p = next) { next = TAILQ_NEXT(p, entries); if (tag == p->tag) { if (--p->ref == 0) { TAILQ_REMOVE(head, p, entries); - free(p, M_TEMP); + free(p, M_PFTAG); } break; } } } -u_int16_t +static u_int16_t pf_tagname2tag(char *tagname) { -#ifdef __FreeBSD__ return (tagname2tag(&V_pf_tags, tagname)); -#else - return (tagname2tag(&pf_tags, tagname)); -#endif -} - -void -pf_tag2tagname(u_int16_t tagid, char *p) -{ -#ifdef __FreeBSD__ - tag2tagname(&V_pf_tags, tagid, p); -#else - tag2tagname(&pf_tags, tagid, p); -#endif -} - -void -pf_tag_ref(u_int16_t tag) -{ - struct pf_tagname *t; - -#ifdef __FreeBSD__ - TAILQ_FOREACH(t, &V_pf_tags, entries) -#else - TAILQ_FOREACH(t, &pf_tags, entries) -#endif - if (t->tag == tag) - break; - if (t != NULL) - t->ref++; -} - -void -pf_tag_unref(u_int16_t tag) -{ -#ifdef __FreeBSD__ - tag_unref(&V_pf_tags, tag); -#else - tag_unref(&pf_tags, tag); -#endif -} - -int -pf_rtlabel_add(struct pf_addr_wrap *a) -{ -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ - return (0); -#else - if (a->type == PF_ADDR_RTLABEL && - (a->v.rtlabel = rtlabel_name2id(a->v.rtlabelname)) == 0) - return (-1); - return (0); -#endif -} - -void -pf_rtlabel_remove(struct pf_addr_wrap *a) -{ -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ -#else - if (a->type == PF_ADDR_RTLABEL) - rtlabel_unref(a->v.rtlabel); -#endif -} - -void -pf_rtlabel_copyout(struct pf_addr_wrap *a) -{ -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ - if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) - strlcpy(a->v.rtlabelname, "?", sizeof(a->v.rtlabelname)); -#else - const char *name; - - if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) { - if ((name = rtlabel_id2name(a->v.rtlabel)) == NULL) - strlcpy(a->v.rtlabelname, "?", - sizeof(a->v.rtlabelname)); - else - strlcpy(a->v.rtlabelname, name, - sizeof(a->v.rtlabelname)); - } -#endif } #ifdef ALTQ -u_int32_t +static u_int32_t pf_qname2qid(char *qname) { -#ifdef __FreeBSD__ return ((u_int32_t)tagname2tag(&V_pf_qids, qname)); -#else - return ((u_int32_t)tagname2tag(&pf_qids, qname)); -#endif -} - -void -pf_qid2qname(u_int32_t qid, char *p) -{ -#ifdef __FreeBSD__ - tag2tagname(&V_pf_qids, (u_int16_t)qid, p); -#else - tag2tagname(&pf_qids, (u_int16_t)qid, p); -#endif } -void +static void pf_qid_unref(u_int32_t qid) { -#ifdef __FreeBSD__ tag_unref(&V_pf_qids, (u_int16_t)qid); -#else - tag_unref(&pf_qids, (u_int16_t)qid); -#endif } -int +static int pf_begin_altq(u_int32_t *ticket) { struct pf_altq *altq; int error = 0; + PF_RULES_WASSERT(); + /* Purge the old altq list */ -#ifdef __FreeBSD__ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0) { -#endif /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, altq); -#else - pool_put(&pf_altq_pl, altq); -#endif + free(altq, M_PFALTQ); } if (error) return (error); -#ifdef __FreeBSD__ *ticket = ++V_ticket_altqs_inactive; V_altqs_inactive_open = 1; -#else - *ticket = ++ticket_altqs_inactive; - altqs_inactive_open = 1; -#endif return (0); } -int +static int pf_rollback_altq(u_int32_t ticket) { struct pf_altq *altq; int error = 0; -#ifdef __FreeBSD__ + PF_RULES_WASSERT(); + if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) return (0); /* Purge the old altq list */ @@ -908,101 +516,54 @@ pf_rollback_altq(u_int32_t ticket) TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - if (!altqs_inactive_open || ticket != ticket_altqs_inactive) - return (0); - /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0) { -#endif /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, altq); -#else - pool_put(&pf_altq_pl, altq); -#endif + free(altq, M_PFALTQ); } -#ifdef __FreeBSD__ V_altqs_inactive_open = 0; -#else - altqs_inactive_open = 0; -#endif return (error); } -int +static int pf_commit_altq(u_int32_t ticket) { struct pf_altqqueue *old_altqs; struct pf_altq *altq; - int s, err, error = 0; + int err, error = 0; + + PF_RULES_WASSERT(); -#ifdef __FreeBSD__ if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) -#else - if (!altqs_inactive_open || ticket != ticket_altqs_inactive) -#endif return (EBUSY); /* swap altqs, keep the old. */ - s = splsoftnet(); -#ifdef __FreeBSD__ old_altqs = V_pf_altqs_active; V_pf_altqs_active = V_pf_altqs_inactive; V_pf_altqs_inactive = old_altqs; V_ticket_altqs_active = V_ticket_altqs_inactive; -#else - old_altqs = pf_altqs_active; - pf_altqs_active = pf_altqs_inactive; - pf_altqs_inactive = old_altqs; - ticket_altqs_active = ticket_altqs_inactive; -#endif /* Attach new disciplines */ -#ifdef __FreeBSD__ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - TAILQ_FOREACH(altq, pf_altqs_active, entries) { - if (altq->qname[0] == 0) { -#endif /* attach the discipline */ error = altq_pfattach(altq); -#ifdef __FreeBSD__ if (error == 0 && V_pf_altq_running) -#else - if (error == 0 && pf_altq_running) -#endif error = pf_enable_altq(altq); - if (error != 0) { - splx(s); + if (error != 0) return (error); - } } } /* Purge the old altq list */ -#ifdef __FreeBSD__ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0) { -#endif /* detach and destroy the discipline */ -#ifdef __FreeBSD__ if (V_pf_altq_running) -#else - if (pf_altq_running) -#endif error = pf_disable_altq(altq); err = altq_pfdetach(altq); if (err != 0 && error == 0) @@ -1012,28 +573,19 @@ pf_commit_altq(u_int32_t ticket) error = err; } else pf_qid_unref(altq->qid); -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, altq); -#else - pool_put(&pf_altq_pl, altq); -#endif + free(altq, M_PFALTQ); } - splx(s); -#ifdef __FreeBSD__ V_altqs_inactive_open = 0; -#else - altqs_inactive_open = 0; -#endif return (error); } -int +static int pf_enable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; - int s, error = 0; + int error = 0; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); @@ -1045,26 +597,18 @@ pf_enable_altq(struct pf_altq *altq) if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { tb.rate = altq->ifbandwidth; tb.depth = altq->tbrsize; - s = splnet(); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif error = tbr_set(&ifp->if_snd, &tb); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - splx(s); } return (error); } -int +static int pf_disable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; - int s, error; + int error; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); @@ -1081,21 +625,12 @@ pf_disable_altq(struct pf_altq *altq) if (error == 0) { /* clear tokenbucket regulator */ tb.rate = 0; - s = splnet(); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif error = tbr_set(&ifp->if_snd, &tb); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - splx(s); } return (error); } -#ifdef __FreeBSD__ void pf_altq_ifnet_event(struct ifnet *ifp, int remove) { @@ -1105,26 +640,16 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) int error = 0; /* Interrupt userland queue modifications */ -#ifdef __FreeBSD__ if (V_altqs_inactive_open) pf_rollback_altq(V_ticket_altqs_inactive); -#else - if (altqs_inactive_open) - pf_rollback_altq(ticket_altqs_inactive); -#endif /* Start new altq ruleset */ if (pf_begin_altq(&ticket)) return; /* Copy the current active set */ -#ifdef __FreeBSD__ TAILQ_FOREACH(a1, V_pf_altqs_active, entries) { - a2 = pool_get(&V_pf_altq_pl, PR_NOWAIT); -#else - TAILQ_FOREACH(a1, pf_altqs_active, entries) { - a2 = pool_get(&pf_altq_pl, PR_NOWAIT); -#endif + a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); if (a2 == NULL) { error = ENOMEM; break; @@ -1134,19 +659,11 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) if (a2->qname[0] != 0) { if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { error = EBUSY; -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, a2); -#else - pool_put(&pf_altq_pl, a2); -#endif + free(a2, M_PFALTQ); break; } a2->altq_disc = NULL; -#ifdef __FreeBSD__ TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) { -#else - TAILQ_FOREACH(a3, pf_altqs_inactive, entries) { -#endif if (strncmp(a3->ifname, a2->ifname, IFNAMSIZ) == 0 && a3->qname[0] == 0) { a2->altq_disc = a3->altq_disc; @@ -1160,55 +677,42 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) (remove && ifp1 == ifp)) { a2->local_flags |= PFALTQ_FLAG_IF_REMOVED; } else { - PF_UNLOCK(); error = altq_add(a2); - PF_LOCK(); -#ifdef __FreeBSD__ if (ticket != V_ticket_altqs_inactive) -#else - if (ticket != ticket_altqs_inactive) -#endif error = EBUSY; if (error) { -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, a2); -#else - pool_put(&pf_altq_pl, a2); -#endif + free(a2, M_PFALTQ); break; } } -#ifdef __FreeBSD__ TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries); -#else - TAILQ_INSERT_TAIL(pf_altqs_inactive, a2, entries); -#endif } if (error != 0) pf_rollback_altq(ticket); else pf_commit_altq(ticket); - } -#endif +} #endif /* ALTQ */ -int +static int pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; + PF_RULES_WASSERT(); + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_or_create_ruleset(anchor); if (rs == NULL) return (EINVAL); while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { - pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule); rs->rules[rs_num].inactive.rcount--; } *ticket = ++rs->rules[rs_num].inactive.ticket; @@ -1216,12 +720,14 @@ pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) return (0); } -int +static int pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; + PF_RULES_WASSERT(); + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_ruleset(anchor); @@ -1229,7 +735,7 @@ pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) rs->rules[rs_num].inactive.ticket != ticket) return (0); while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { - pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule); rs->rules[rs_num].inactive.rcount--; } rs->rules[rs_num].inactive.open = 0; @@ -1252,7 +758,7 @@ pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\ } while (0) -void +static void pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) { PF_MD5_UPD(pfr, addr.type); @@ -1269,9 +775,6 @@ pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) PF_MD5_UPD(pfr, addr.v.a.addr.addr32); PF_MD5_UPD(pfr, addr.v.a.mask.addr32); break; - case PF_ADDR_RTLABEL: - PF_MD5_UPD(pfr, addr.v.rtlabelname); - break; } PF_MD5_UPD(pfr, port[0]); @@ -1280,7 +783,7 @@ pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) PF_MD5_UPD(pfr, port_op); } -void +static void pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) { u_int16_t x; @@ -1319,15 +822,17 @@ pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) PF_MD5_UPD(rule, tos); } -int +static int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule, **old_array; struct pf_rulequeue *old_rules; - int s, error; + int error; u_int32_t old_rcount; + PF_RULES_WASSERT(); + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_ruleset(anchor); @@ -1343,7 +848,6 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) } /* Swap rules, keep the old. */ - s = splsoftnet(); old_rules = rs->rules[rs_num].active.ptr; old_rcount = rs->rules[rs_num].active.rcount; old_array = rs->rules[rs_num].active.ptr_array; @@ -1365,18 +869,18 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) /* Purge the old rule list. */ while ((rule = TAILQ_FIRST(old_rules)) != NULL) - pf_rm_rule(old_rules, rule); + pf_unlink_rule(old_rules, rule); if (rs->rules[rs_num].inactive.ptr_array) free(rs->rules[rs_num].inactive.ptr_array, M_TEMP); rs->rules[rs_num].inactive.ptr_array = NULL; rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_ruleset(rs); - splx(s); + return (0); } -int +static int pf_setup_pfsync_matching(struct pf_ruleset *rs) { MD5_CTX ctx; @@ -1412,55 +916,53 @@ pf_setup_pfsync_matching(struct pf_ruleset *rs) } MD5Final(digest, &ctx); -#ifdef __FreeBSD__ memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum)); -#else - memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum)); -#endif return (0); } -int +static int pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr, sa_family_t af) { - if (pfi_dynaddr_setup(addr, af) || - pf_tbladdr_setup(ruleset, addr)) - return (EINVAL); + int error = 0; - return (0); + switch (addr->type) { + case PF_ADDR_TABLE: + addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname); + if (addr->p.tbl == NULL) + error = ENOMEM; + break; + case PF_ADDR_DYNIFTL: + error = pfi_dynaddr_setup(addr, af); + break; + } + + return (error); } -void +static void pf_addr_copyout(struct pf_addr_wrap *addr) { - pfi_dynaddr_copyout(addr); - pf_tbladdr_copyout(addr); - pf_rtlabel_copyout(addr); + + switch (addr->type) { + case PF_ADDR_DYNIFTL: + pfi_dynaddr_copyout(addr); + break; + case PF_ADDR_TABLE: + pf_tbladdr_copyout(addr); + break; + } } -int -#ifdef __FreeBSD__ +static int pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) -#else -pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) -#endif { - struct pf_pooladdr *pa = NULL; - struct pf_pool *pool = NULL; -#ifndef __FreeBSD__ - int s; -#endif int error = 0; CURVNET_SET(TD_TO_VNET(td)); /* XXX keep in sync with switch() below */ -#ifdef __FreeBSD__ if (securelevel_gt(td->td_ucred, 2)) -#else - if (securelevel > 1) -#endif switch (cmd) { case DIOCGETRULES: case DIOCGETRULE: @@ -1496,9 +998,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETSRCNODES: case DIOCCLRSRCNODES: case DIOCIGETIFACES: -#ifdef __FreeBSD__ case DIOCGIFSPEED: -#endif case DIOCSETIFFLAG: case DIOCCLRIFFLAG: break; @@ -1538,9 +1038,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCOSFPGET: case DIOCGETSRCNODES: case DIOCIGETIFACES: -#ifdef __FreeBSD__ case DIOCGIFSPEED: -#endif break; case DIOCRCLRTABLES: case DIOCRADDTABLES: @@ -1566,85 +1064,51 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) return (EACCES); } - if (flags & FWRITE) -#ifdef __FreeBSD__ - sx_xlock(&V_pf_consistency_lock); - else - sx_slock(&V_pf_consistency_lock); -#else - rw_enter_write(&pf_consistency_lock); - else - rw_enter_read(&pf_consistency_lock); -#endif - -#ifdef __FreeBSD__ - PF_LOCK(); -#else - s = splsoftnet(); -#endif switch (cmd) { - case DIOCSTART: -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); if (V_pf_status.running) -#else - if (pf_status.running) -#endif error = EEXIST; else { -#ifdef __FreeBSD__ - PF_UNLOCK(); + int cpu; + + PF_RULES_WUNLOCK(); error = hook_pf(); - PF_LOCK(); if (error) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: pfil registeration fail\n")); + ("pf: pfil registration failed\n")); break; } + PF_RULES_WLOCK(); V_pf_status.running = 1; V_pf_status.since = time_second; - if (V_pf_status.stateid == 0) { - V_pf_status.stateid = time_second; - V_pf_status.stateid = V_pf_status.stateid << 32; - } -#else - pf_status.running = 1; - pf_status.since = time_second; + CPU_FOREACH(cpu) + V_pf_stateid[cpu] = time_second; - if (pf_status.stateid == 0) { - pf_status.stateid = time_second; - pf_status.stateid = pf_status.stateid << 32; - } -#endif DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); } + PF_RULES_WUNLOCK(); break; case DIOCSTOP: -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); if (!V_pf_status.running) error = ENOENT; else { V_pf_status.running = 0; - PF_UNLOCK(); + PF_RULES_WUNLOCK(); error = dehook_pf(); - PF_LOCK(); if (error) { V_pf_status.running = 1; DPFPRINTF(PF_DEBUG_MISC, - ("pf: pfil unregisteration failed\n")); + ("pf: pfil unregistration failed\n")); } + PF_RULES_WLOCK(); V_pf_status.since = time_second; -#else - if (!pf_status.running) - error = ENOENT; - else { - pf_status.running = 0; - pf_status.since = time_second; -#endif DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); } + PF_RULES_WUNLOCK(); break; case DIOCADDRULE: { @@ -1652,89 +1116,57 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_ruleset *ruleset; struct pf_rule *rule, *tail; struct pf_pooladdr *pa; + struct pfi_kif *kif = NULL; int rs_num; - pr->anchor[sizeof(pr->anchor) - 1] = 0; - ruleset = pf_find_ruleset(pr->anchor); - if (ruleset == NULL) { + if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; } - rs_num = pf_get_ruleset_number(pr->rule.action); - if (rs_num >= PF_RULESET_MAX) { - error = EINVAL; +#ifndef INET + if (pr->rule.af == AF_INET) { + error = EAFNOSUPPORT; break; } - if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { - error = EINVAL; +#endif /* INET */ +#ifndef INET6 + if (pr->rule.af == AF_INET6) { + error = EAFNOSUPPORT; break; } +#endif /* INET6 */ + + rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK); + bcopy(&pr->rule, rule, sizeof(struct pf_rule)); + if (rule->ifname[0]) + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); + rule->cuid = td->td_ucred->cr_ruid; + rule->cpid = td->td_proc ? td->td_proc->p_pid : 0; + TAILQ_INIT(&rule->rpool.list); + +#define ERROUT(x) { error = (x); goto DIOCADDRULE_error; } + + PF_RULES_WLOCK(); + pr->anchor[sizeof(pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); + if (ruleset == NULL) + ERROUT(EINVAL); + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) + ERROUT(EINVAL); if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) { -#ifdef __FreeBSD__ DPFPRINTF(PF_DEBUG_MISC, ("ticket: %d != [%d]%d\n", pr->ticket, rs_num, ruleset->rules[rs_num].inactive.ticket)); -#endif - error = EBUSY; - break; + ERROUT(EBUSY); } -#ifdef __FreeBSD__ if (pr->pool_ticket != V_ticket_pabuf) { DPFPRINTF(PF_DEBUG_MISC, ("pool_ticket: %d != %d\n", pr->pool_ticket, V_ticket_pabuf)); -#else - if (pr->pool_ticket != ticket_pabuf) { -#endif - error = EBUSY; - break; - } -#ifdef __FreeBSD__ - rule = pool_get(&V_pf_rule_pl, PR_NOWAIT); -#else - rule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL); -#endif - if (rule == NULL) { - error = ENOMEM; - break; + ERROUT(EBUSY); } - bcopy(&pr->rule, rule, sizeof(struct pf_rule)); -#ifdef __FreeBSD__ - rule->cuid = td->td_ucred->cr_ruid; - rule->cpid = td->td_proc ? td->td_proc->p_pid : 0; -#else - rule->cuid = p->p_cred->p_ruid; - rule->cpid = p->p_pid; -#endif - rule->anchor = NULL; - rule->kif = NULL; - TAILQ_INIT(&rule->rpool.list); - /* initialize refcounting */ - rule->states_cur = 0; - rule->src_nodes = 0; - rule->entries.tqe_prev = NULL; -#ifndef INET - if (rule->af == AF_INET) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, rule); -#else - pool_put(&pf_rule_pl, rule); -#endif - error = EAFNOSUPPORT; - break; - } -#endif /* INET */ -#ifndef INET6 - if (rule->af == AF_INET6) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, rule); -#else - pool_put(&pf_rule_pl, rule); -#endif - error = EAFNOSUPPORT; - break; - } -#endif /* INET6 */ + tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_rulequeue); if (tail) @@ -1742,24 +1174,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else rule->nr = 0; if (rule->ifname[0]) { - rule->kif = pfi_kif_get(rule->ifname); - if (rule->kif == NULL) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, rule); -#else - pool_put(&pf_rule_pl, rule); -#endif - error = EINVAL; - break; - } - pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE); - } + rule->kif = pfi_kif_attach(kif, rule->ifname); + pfi_kif_ref(rule->kif); + } else + rule->kif = NULL; -#ifdef __FreeBSD__ /* ROUTING */ if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs) -#else - if (rule->rtableid > 0 && !rtable_exists(rule->rtableid)) -#endif error = EBUSY; #ifdef ALTQ @@ -1784,43 +1204,34 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; if (rule->rt && !rule->direction) error = EINVAL; -#if NPFLOG > 0 if (!rule->log) rule->logif = 0; if (rule->logif >= PFLOGIFS_MAX) error = EINVAL; -#endif - if (pf_rtlabel_add(&rule->src.addr) || - pf_rtlabel_add(&rule->dst.addr)) - error = EBUSY; if (pf_addr_setup(ruleset, &rule->src.addr, rule->af)) - error = EINVAL; + error = ENOMEM; if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af)) - error = EINVAL; + error = ENOMEM; if (pf_anchor_setup(rule, ruleset, pr->anchor_call)) error = EINVAL; -#ifdef __FreeBSD__ TAILQ_FOREACH(pa, &V_pf_pabuf, entries) -#else - TAILQ_FOREACH(pa, &pf_pabuf, entries) -#endif - if (pf_tbladdr_setup(ruleset, &pa->addr)) - error = EINVAL; + if (pa->addr.type == PF_ADDR_TABLE) { + pa->addr.p.tbl = pfr_attach_table(ruleset, + pa->addr.v.tblname); + if (pa->addr.p.tbl == NULL) + error = ENOMEM; + } if (rule->overload_tblname[0]) { if ((rule->overload_tbl = pfr_attach_table(ruleset, - rule->overload_tblname, 0)) == NULL) + rule->overload_tblname)) == NULL) error = EINVAL; else rule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } -#ifdef __FreeBSD__ pf_mv_pool(&V_pf_pabuf, &rule->rpool.list); -#else - pf_mv_pool(&pf_pabuf, &rule->rpool.list); -#endif if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || (rule->action == PF_BINAT)) && rule->anchor == NULL) || (rule->rt > PF_FASTROUTE)) && @@ -1828,24 +1239,26 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; if (error) { - pf_rm_rule(NULL, rule); + pf_free_rule(rule); + PF_RULES_WUNLOCK(); break; } -#ifdef __FreeBSD__ - if (!V_debug_pfugidhack && (rule->uid.op || rule->gid.op || - rule->log & PF_LOG_SOCKET_LOOKUP)) { - DPFPRINTF(PF_DEBUG_MISC, - ("pf: debug.pfugidhack enabled\n")); - V_debug_pfugidhack = 1; - } -#endif rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); rule->evaluations = rule->packets[0] = rule->packets[1] = rule->bytes[0] = rule->bytes[1] = 0; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; + PF_RULES_WUNLOCK(); + break; + +#undef ERROUT +DIOCADDRULE_error: + PF_RULES_WUNLOCK(); + free(rule, M_PFRULE); + if (kif) + free(kif, PFI_MTYPE); break; } @@ -1855,14 +1268,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_rule *tail; int rs_num; + PF_RULES_WLOCK(); pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { + PF_RULES_WUNLOCK(); error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { + PF_RULES_WUNLOCK(); error = EINVAL; break; } @@ -1873,6 +1289,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else pr->nr = 0; pr->ticket = ruleset->rules[rs_num].active.ticket; + PF_RULES_WUNLOCK(); break; } @@ -1882,18 +1299,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_rule *rule; int rs_num, i; + PF_RULES_WLOCK(); pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { + PF_RULES_WUNLOCK(); error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { + PF_RULES_WUNLOCK(); error = EINVAL; break; } if (pr->ticket != ruleset->rules[rs_num].active.ticket) { + PF_RULES_WUNLOCK(); error = EBUSY; break; } @@ -1901,11 +1322,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) while ((rule != NULL) && (rule->nr != pr->nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { + PF_RULES_WUNLOCK(); error = EBUSY; break; } bcopy(rule, &pr->rule, sizeof(struct pf_rule)); if (pf_anchor_copyout(ruleset, rule, pr)) { + PF_RULES_WUNLOCK(); error = EBUSY; break; } @@ -1924,6 +1347,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rule->bytes[0] = rule->bytes[1] = 0; rule->states_tot = 0; } + PF_RULES_WUNLOCK(); break; } @@ -1931,116 +1355,80 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_rule *pcr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *oldrule = NULL, *newrule = NULL; + struct pfi_kif *kif = NULL; + struct pf_pooladdr *pa; u_int32_t nr = 0; int rs_num; - if (!(pcr->action == PF_CHANGE_REMOVE || - pcr->action == PF_CHANGE_GET_TICKET) && -#ifdef __FreeBSD__ - pcr->pool_ticket != V_ticket_pabuf) { -#else - pcr->pool_ticket != ticket_pabuf) { -#endif - error = EBUSY; - break; - } - if (pcr->action < PF_CHANGE_ADD_HEAD || pcr->action > PF_CHANGE_GET_TICKET) { error = EINVAL; break; } - ruleset = pf_find_ruleset(pcr->anchor); - if (ruleset == NULL) { - error = EINVAL; - break; - } - rs_num = pf_get_ruleset_number(pcr->rule.action); - if (rs_num >= PF_RULESET_MAX) { + if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; } - if (pcr->action == PF_CHANGE_GET_TICKET) { - pcr->ticket = ++ruleset->rules[rs_num].active.ticket; - break; - } else { - if (pcr->ticket != - ruleset->rules[rs_num].active.ticket) { - error = EINVAL; - break; - } - if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { - error = EINVAL; - break; - } - } - if (pcr->action != PF_CHANGE_REMOVE) { -#ifdef __FreeBSD__ - newrule = pool_get(&V_pf_rule_pl, PR_NOWAIT); -#else - newrule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL); -#endif - if (newrule == NULL) { - error = ENOMEM; - break; - } - bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); -#ifdef __FreeBSD__ - newrule->cuid = td->td_ucred->cr_ruid; - newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; -#else - newrule->cuid = p->p_cred->p_ruid; - newrule->cpid = p->p_pid; -#endif - TAILQ_INIT(&newrule->rpool.list); - /* initialize refcounting */ - newrule->states_cur = 0; - newrule->entries.tqe_prev = NULL; #ifndef INET - if (newrule->af == AF_INET) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, newrule); -#else - pool_put(&pf_rule_pl, newrule); -#endif + if (pcr->rule.af == AF_INET) { error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 - if (newrule->af == AF_INET6) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, newrule); -#else - pool_put(&pf_rule_pl, newrule); -#endif + if (pcr->rule.af == AF_INET6) { error = EAFNOSUPPORT; break; } #endif /* INET6 */ + newrule = malloc(sizeof(*newrule), M_PFRULE, M_WAITOK); + bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); + newrule->cuid = td->td_ucred->cr_ruid; + newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; + TAILQ_INIT(&newrule->rpool.list); + /* Initialize refcounting. */ + newrule->states_cur = 0; + newrule->entries.tqe_prev = NULL; + + if (newrule->ifname[0]) + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); + } + +#define ERROUT(x) { error = (x); goto DIOCCHANGERULE_error; } + + PF_RULES_WLOCK(); + if (!(pcr->action == PF_CHANGE_REMOVE || + pcr->action == PF_CHANGE_GET_TICKET) && + pcr->pool_ticket != V_ticket_pabuf) + ERROUT(EBUSY); + + ruleset = pf_find_ruleset(pcr->anchor); + if (ruleset == NULL) + ERROUT(EINVAL); + + rs_num = pf_get_ruleset_number(pcr->rule.action); + if (rs_num >= PF_RULESET_MAX) + ERROUT(EINVAL); + + if (pcr->action == PF_CHANGE_GET_TICKET) { + pcr->ticket = ++ruleset->rules[rs_num].active.ticket; + ERROUT(0); + } else if (pcr->ticket != + ruleset->rules[rs_num].active.ticket) + ERROUT(EINVAL); + + if (pcr->action != PF_CHANGE_REMOVE) { if (newrule->ifname[0]) { - newrule->kif = pfi_kif_get(newrule->ifname); - if (newrule->kif == NULL) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, newrule); -#else - pool_put(&pf_rule_pl, newrule); -#endif - error = EINVAL; - break; - } - pfi_kif_ref(newrule->kif, PFI_KIF_REF_RULE); + newrule->kif = pfi_kif_attach(kif, + newrule->ifname); + pfi_kif_ref(newrule->kif); } else newrule->kif = NULL; if (newrule->rtableid > 0 && -#ifdef __FreeBSD__ /* ROUTING */ newrule->rtableid >= rt_numfibs) -#else - !rtable_exists(newrule->rtableid)) -#endif error = EBUSY; #ifdef ALTQ @@ -2067,32 +1455,28 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; if (newrule->rt && !newrule->direction) error = EINVAL; -#if NPFLOG > 0 if (!newrule->log) newrule->logif = 0; if (newrule->logif >= PFLOGIFS_MAX) error = EINVAL; -#endif - if (pf_rtlabel_add(&newrule->src.addr) || - pf_rtlabel_add(&newrule->dst.addr)) - error = EBUSY; if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af)) - error = EINVAL; + error = ENOMEM; if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af)) - error = EINVAL; + error = ENOMEM; if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) error = EINVAL; -#ifdef __FreeBSD__ TAILQ_FOREACH(pa, &V_pf_pabuf, entries) -#else - TAILQ_FOREACH(pa, &pf_pabuf, entries) -#endif - if (pf_tbladdr_setup(ruleset, &pa->addr)) - error = EINVAL; + if (pa->addr.type == PF_ADDR_TABLE) { + pa->addr.p.tbl = + pfr_attach_table(ruleset, + pa->addr.v.tblname); + if (pa->addr.p.tbl == NULL) + error = ENOMEM; + } if (newrule->overload_tblname[0]) { if ((newrule->overload_tbl = pfr_attach_table( - ruleset, newrule->overload_tblname, 0)) == + ruleset, newrule->overload_tblname)) == NULL) error = EINVAL; else @@ -2100,11 +1484,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) PFR_TFLAG_ACTIVE; } -#ifdef __FreeBSD__ pf_mv_pool(&V_pf_pabuf, &newrule->rpool.list); -#else - pf_mv_pool(&pf_pabuf, &newrule->rpool.list); -#endif if (((((newrule->action == PF_NAT) || (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || @@ -2114,30 +1494,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; if (error) { - pf_rm_rule(NULL, newrule); + pf_free_rule(newrule); + PF_RULES_WUNLOCK(); break; } -#ifdef __FreeBSD__ - if (!V_debug_pfugidhack && (newrule->uid.op || - newrule->gid.op || - newrule->log & PF_LOG_SOCKET_LOOKUP)) { - DPFPRINTF(PF_DEBUG_MISC, - ("pf: debug.pfugidhack enabled\n")); - V_debug_pfugidhack = 1; - } -#endif - newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); newrule->evaluations = 0; newrule->packets[0] = newrule->packets[1] = 0; newrule->bytes[0] = newrule->bytes[1] = 0; } -#ifdef __FreeBSD__ pf_empty_pool(&V_pf_pabuf); -#else - pf_empty_pool(&pf_pabuf); -#endif if (pcr->action == PF_CHANGE_ADD_HEAD) oldrule = TAILQ_FIRST( @@ -2152,14 +1519,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) oldrule = TAILQ_NEXT(oldrule, entries); if (oldrule == NULL) { if (newrule != NULL) - pf_rm_rule(NULL, newrule); + pf_free_rule(newrule); + PF_RULES_WUNLOCK(); error = EINVAL; break; } } if (pcr->action == PF_CHANGE_REMOVE) { - pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule); + pf_unlink_rule(ruleset->rules[rs_num].active.ptr, + oldrule); ruleset->rules[rs_num].active.rcount--; } else { if (oldrule == NULL) @@ -2186,114 +1555,120 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); pf_remove_if_empty_ruleset(ruleset); + PF_RULES_WUNLOCK(); + break; + +#undef ERROUT +DIOCCHANGERULE_error: + PF_RULES_WUNLOCK(); + if (newrule != NULL) + free(newrule, M_PFRULE); + if (kif != NULL) + free(kif, PFI_MTYPE); break; } case DIOCCLRSTATES: { - struct pf_state *s, *nexts; + struct pf_state *s; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; - u_int killed = 0; - -#ifdef __FreeBSD__ - for (s = RB_MIN(pf_state_tree_id, &V_tree_id); s; s = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, s); -#else - for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); -#endif - - if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, - s->kif->pfik_name)) { -#if NPFSYNC > 0 - /* don't send out individual delete messages */ - SET(s->state_flags, PFSTATE_NOSYNC); -#endif - pf_unlink_state(s); - killed++; - } + u_int i, killed = 0; + + for (i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + +relock_DIOCCLRSTATES: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) + if (!psk->psk_ifname[0] || + !strcmp(psk->psk_ifname, + s->kif->pfik_name)) { + /* + * Don't send out individual + * delete messages. + */ + s->state_flags |= PFSTATE_NOSYNC; + pf_unlink_state(s, PF_ENTER_LOCKED); + killed++; + goto relock_DIOCCLRSTATES; + } + PF_HASHROW_UNLOCK(ih); } psk->psk_killed = killed; -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_clear_states_ptr != NULL) pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname); -#else - pfsync_clear_states(pf_status.hostid, psk->psk_ifname); -#endif -#endif break; } case DIOCKILLSTATES: { - struct pf_state *s, *nexts; + struct pf_state *s; struct pf_state_key *sk; struct pf_addr *srcaddr, *dstaddr; u_int16_t srcport, dstport; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; - u_int killed = 0; + u_int i, killed = 0; if (psk->psk_pfcmp.id) { if (psk->psk_pfcmp.creatorid == 0) -#ifdef __FreeBSD__ psk->psk_pfcmp.creatorid = V_pf_status.hostid; -#else - psk->psk_pfcmp.creatorid = pf_status.hostid; -#endif - if ((s = pf_find_state_byid(&psk->psk_pfcmp))) { - pf_unlink_state(s); + if ((s = pf_find_state_byid(psk->psk_pfcmp.id, + psk->psk_pfcmp.creatorid))) { + pf_unlink_state(s, PF_ENTER_LOCKED); psk->psk_killed = 1; } break; } -#ifdef __FreeBSD__ - for (s = RB_MIN(pf_state_tree_id, &V_tree_id); s; - s = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, s); -#else - for (s = RB_MIN(pf_state_tree_id, &tree_id); s; - s = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); -#endif - sk = s->key[PF_SK_WIRE]; - - if (s->direction == PF_OUT) { - srcaddr = &sk->addr[1]; - dstaddr = &sk->addr[0]; - srcport = sk->port[0]; - dstport = sk->port[0]; - } else { - srcaddr = &sk->addr[0]; - dstaddr = &sk->addr[1]; - srcport = sk->port[0]; - dstport = sk->port[0]; - } - if ((!psk->psk_af || sk->af == psk->psk_af) - && (!psk->psk_proto || psk->psk_proto == - sk->proto) && - PF_MATCHA(psk->psk_src.neg, - &psk->psk_src.addr.v.a.addr, - &psk->psk_src.addr.v.a.mask, - srcaddr, sk->af) && - PF_MATCHA(psk->psk_dst.neg, - &psk->psk_dst.addr.v.a.addr, - &psk->psk_dst.addr.v.a.mask, - dstaddr, sk->af) && - (psk->psk_src.port_op == 0 || - pf_match_port(psk->psk_src.port_op, - psk->psk_src.port[0], psk->psk_src.port[1], - srcport)) && - (psk->psk_dst.port_op == 0 || - pf_match_port(psk->psk_dst.port_op, - psk->psk_dst.port[0], psk->psk_dst.port[1], - dstport)) && - (!psk->psk_label[0] || (s->rule.ptr->label[0] && - !strcmp(psk->psk_label, s->rule.ptr->label))) && - (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, - s->kif->pfik_name))) { - pf_unlink_state(s); - killed++; + for (i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + +relock_DIOCKILLSTATES: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + sk = s->key[PF_SK_WIRE]; + if (s->direction == PF_OUT) { + srcaddr = &sk->addr[1]; + dstaddr = &sk->addr[0]; + srcport = sk->port[0]; + dstport = sk->port[0]; + } else { + srcaddr = &sk->addr[0]; + dstaddr = &sk->addr[1]; + srcport = sk->port[0]; + dstport = sk->port[0]; + } + + if ((!psk->psk_af || sk->af == psk->psk_af) + && (!psk->psk_proto || psk->psk_proto == + sk->proto) && + PF_MATCHA(psk->psk_src.neg, + &psk->psk_src.addr.v.a.addr, + &psk->psk_src.addr.v.a.mask, + srcaddr, sk->af) && + PF_MATCHA(psk->psk_dst.neg, + &psk->psk_dst.addr.v.a.addr, + &psk->psk_dst.addr.v.a.mask, + dstaddr, sk->af) && + (psk->psk_src.port_op == 0 || + pf_match_port(psk->psk_src.port_op, + psk->psk_src.port[0], psk->psk_src.port[1], + srcport)) && + (psk->psk_dst.port_op == 0 || + pf_match_port(psk->psk_dst.port_op, + psk->psk_dst.port[0], psk->psk_dst.port[1], + dstport)) && + (!psk->psk_label[0] || + (s->rule.ptr->label[0] && + !strcmp(psk->psk_label, + s->rule.ptr->label))) && + (!psk->psk_ifname[0] || + !strcmp(psk->psk_ifname, + s->kif->pfik_name))) { + pf_unlink_state(s, PF_ENTER_LOCKED); + killed++; + goto relock_DIOCKILLSTATES; + } } + PF_HASHROW_UNLOCK(ih); } psk->psk_killed = killed; break; @@ -2308,98 +1683,83 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } -#ifdef __FreeBSD__ - if (pfsync_state_import_ptr != NULL) + if (pfsync_state_import_ptr != NULL) { + PF_RULES_RLOCK(); error = pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL); -#else - error = pfsync_state_import(sp, PFSYNC_SI_IOCTL); -#endif + PF_RULES_RUNLOCK(); + } + error = EOPNOTSUPP; break; } case DIOCGETSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pf_state *s; - struct pf_state_cmp id_key; - - bcopy(ps->state.id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = ps->state.creatorid; - s = pf_find_state_byid(&id_key); + s = pf_find_state_byid(ps->state.id, ps->state.creatorid); if (s == NULL) { error = ENOENT; break; } pfsync_state_export(&ps->state, s); + PF_STATE_UNLOCK(s); break; } case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; - struct pf_state *state; - struct pfsync_state *p, *pstore; - u_int32_t nr = 0; + struct pf_state *s; + struct pfsync_state *pstore, *p; + int i, nr; if (ps->ps_len == 0) { -#ifdef __FreeBSD__ - nr = V_pf_status.states; -#else - nr = pf_status.states; -#endif + nr = uma_zone_get_cur(V_pf_state_z); ps->ps_len = sizeof(struct pfsync_state) * nr; break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif + p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK); + nr = 0; - p = ps->ps_states; + for (i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; -#ifdef __FreeBSD__ - state = TAILQ_FIRST(&V_state_list); -#else - state = TAILQ_FIRST(&state_list); -#endif - while (state) { - if (state->timeout != PFTM_UNLINKED) { - if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) - break; - pfsync_state_export(pstore, state); -#ifdef __FreeBSD__ - PF_COPYOUT(pstore, p, sizeof(*p), error); -#else - error = copyout(pstore, p, sizeof(*p)); -#endif - if (error) { - free(pstore, M_TEMP); - goto fail; + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + + if (s->timeout == PFTM_UNLINKED) + continue; + + if ((nr+1) * sizeof(*p) > ps->ps_len) { + PF_HASHROW_UNLOCK(ih); + goto DIOCGETSTATES_full; } + pfsync_state_export(p, s); p++; nr++; } - state = TAILQ_NEXT(state, entry_list); + PF_HASHROW_UNLOCK(ih); + } +DIOCGETSTATES_full: + error = copyout(pstore, ps->ps_states, + sizeof(struct pfsync_state) * nr); + if (error) { + free(pstore, M_TEMP); + break; } - ps->ps_len = sizeof(struct pfsync_state) * nr; - free(pstore, M_TEMP); + break; } case DIOCGETSTATUS: { struct pf_status *s = (struct pf_status *)addr; -#ifdef __FreeBSD__ + PF_RULES_RLOCK(); bcopy(&V_pf_status, s, sizeof(struct pf_status)); -#else - bcopy(&pf_status, s, sizeof(struct pf_status)); -#endif pfi_update_status(s->ifname, s); + PF_RULES_RUNLOCK(); break; } @@ -2407,37 +1767,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_if *pi = (struct pfioc_if *)addr; if (pi->ifname[0] == 0) { -#ifdef __FreeBSD__ bzero(V_pf_status.ifname, IFNAMSIZ); -#else - bzero(pf_status.ifname, IFNAMSIZ); -#endif break; } -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); strlcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ); -#else - strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ); -#endif + PF_RULES_WUNLOCK(); break; } case DIOCCLRSTATUS: { -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); bzero(V_pf_status.counters, sizeof(V_pf_status.counters)); bzero(V_pf_status.fcounters, sizeof(V_pf_status.fcounters)); bzero(V_pf_status.scounters, sizeof(V_pf_status.scounters)); V_pf_status.since = time_second; if (*V_pf_status.ifname) pfi_update_status(V_pf_status.ifname, NULL); -#else - bzero(pf_status.counters, sizeof(pf_status.counters)); - bzero(pf_status.fcounters, sizeof(pf_status.fcounters)); - bzero(pf_status.scounters, sizeof(pf_status.scounters)); - pf_status.since = time_second; - if (*pf_status.ifname) - pfi_update_status(pf_status.ifname, NULL); -#endif + PF_RULES_WUNLOCK(); break; } @@ -2473,6 +1820,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (m > 1) error = E2BIG; /* more than one state */ else if (state != NULL) { + /* XXXGL: not locked read */ sk = state->key[sidx]; PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af); pnl->rsport = sk->port[sidx]; @@ -2491,23 +1839,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pt->timeout < 0 || pt->timeout >= PFTM_MAX || pt->seconds < 0) { error = EINVAL; - goto fail; + break; } -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); old = V_pf_default_rule.timeout[pt->timeout]; -#else - old = pf_default_rule.timeout[pt->timeout]; -#endif if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) pt->seconds = 1; -#ifdef __FreeBSD__ V_pf_default_rule.timeout[pt->timeout] = pt->seconds; -#else - pf_default_rule.timeout[pt->timeout] = pt->seconds; -#endif if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) wakeup(pf_purge_thread); pt->seconds = old; + PF_RULES_WUNLOCK(); break; } @@ -2516,13 +1858,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) { error = EINVAL; - goto fail; + break; } -#ifdef __FreeBSD__ + PF_RULES_RLOCK(); pt->seconds = V_pf_default_rule.timeout[pt->timeout]; -#else - pt->seconds = pf_default_rule.timeout[pt->timeout]; -#endif + PF_RULES_RUNLOCK(); break; } @@ -2531,13 +1871,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) { error = EINVAL; - goto fail; + break; } -#ifdef __FreeBSD__ - pl->limit = V_pf_pool_limits[pl->index].limit; -#else - pl->limit = pf_pool_limits[pl->index].limit; -#endif + PF_RULES_RLOCK(); + pl->limit = V_pf_limits[pl->index].limit; + PF_RULES_RUNLOCK(); break; } @@ -2545,41 +1883,27 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_limit *pl = (struct pfioc_limit *)addr; int old_limit; + PF_RULES_WLOCK(); if (pl->index < 0 || pl->index >= PF_LIMIT_MAX || -#ifdef __FreeBSD__ - V_pf_pool_limits[pl->index].pp == NULL) { -#else - pf_pool_limits[pl->index].pp == NULL) { -#endif + V_pf_limits[pl->index].zone == NULL) { + PF_RULES_WUNLOCK(); error = EINVAL; - goto fail; - } -#ifdef __FreeBSD__ - uma_zone_set_max(V_pf_pool_limits[pl->index].pp, pl->limit); - old_limit = V_pf_pool_limits[pl->index].limit; - V_pf_pool_limits[pl->index].limit = pl->limit; - pl->limit = old_limit; -#else - if (pool_sethardlimit(pf_pool_limits[pl->index].pp, - pl->limit, NULL, 0) != 0) { - error = EBUSY; - goto fail; + break; } - old_limit = pf_pool_limits[pl->index].limit; - pf_pool_limits[pl->index].limit = pl->limit; + uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit); + old_limit = V_pf_limits[pl->index].limit; + V_pf_limits[pl->index].limit = pl->limit; pl->limit = old_limit; -#endif + PF_RULES_WUNLOCK(); break; } case DIOCSETDEBUG: { u_int32_t *level = (u_int32_t *)addr; -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); V_pf_status.debug = *level; -#else - pf_status.debug = *level; -#endif + PF_RULES_WUNLOCK(); break; } @@ -2588,16 +1912,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_ruleset *ruleset = &pf_main_ruleset; struct pf_rule *rule; + PF_RULES_WLOCK(); TAILQ_FOREACH(rule, ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) { rule->evaluations = 0; rule->packets[0] = rule->packets[1] = 0; rule->bytes[0] = rule->bytes[1] = 0; } + PF_RULES_WUNLOCK(); break; } -#ifdef __FreeBSD__ case DIOCGIFSPEED: { struct pf_ifspeed *psp = (struct pf_ifspeed *)addr; struct pf_ifspeed ps; @@ -2615,32 +1940,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } -#endif /* __FreeBSD__ */ #ifdef ALTQ case DIOCSTARTALTQ: { struct pf_altq *altq; + PF_RULES_WLOCK(); /* enable all altq interfaces on active list */ -#ifdef __FreeBSD__ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - TAILQ_FOREACH(altq, pf_altqs_active, entries) { - if (altq->qname[0] == 0) { -#endif error = pf_enable_altq(altq); if (error != 0) break; } } if (error == 0) -#ifdef __FreeBSD__ V_pf_altq_running = 1; -#else - pf_altq_running = 1; -#endif + PF_RULES_WUNLOCK(); DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); break; } @@ -2648,26 +1965,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCSTOPALTQ: { struct pf_altq *altq; + PF_RULES_WLOCK(); /* disable all altq interfaces on active list */ -#ifdef __FreeBSD__ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - TAILQ_FOREACH(altq, pf_altqs_active, entries) { - if (altq->qname[0] == 0) { -#endif error = pf_disable_altq(altq); if (error != 0) break; } } if (error == 0) -#ifdef __FreeBSD__ V_pf_altq_running = 0; -#else - pf_altq_running = 0; -#endif + PF_RULES_WUNLOCK(); DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); break; } @@ -2675,28 +1985,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCADDALTQ: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq, *a; + struct ifnet *ifp; + + altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK); + bcopy(&pa->altq, altq, sizeof(struct pf_altq)); + altq->local_flags = 0; -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); if (pa->ticket != V_ticket_altqs_inactive) { -#else - if (pa->ticket != ticket_altqs_inactive) { -#endif + PF_RULES_WUNLOCK(); + free(altq, M_PFALTQ); error = EBUSY; break; } -#ifdef __FreeBSD__ - altq = pool_get(&V_pf_altq_pl, PR_NOWAIT); -#else - altq = pool_get(&pf_altq_pl, PR_WAITOK|PR_LIMITFAIL); -#endif - if (altq == NULL) { - error = ENOMEM; - break; - } - bcopy(&pa->altq, altq, sizeof(struct pf_altq)); -#ifdef __FreeBSD__ - altq->local_flags = 0; -#endif /* * if this is for a queue, find the discipline and @@ -2704,20 +2005,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) */ if (altq->qname[0] != 0) { if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { + PF_RULES_WUNLOCK(); error = EBUSY; -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, altq); -#else - pool_put(&pf_altq_pl, altq); -#endif + free(altq, M_PFALTQ); break; } altq->altq_disc = NULL; -#ifdef __FreeBSD__ TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) { -#else - TAILQ_FOREACH(a, pf_altqs_inactive, entries) { -#endif if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 && a->qname[0] == 0) { altq->altq_disc = a->altq_disc; @@ -2726,34 +2020,20 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } -#ifdef __FreeBSD__ - struct ifnet *ifp; - - if ((ifp = ifunit(altq->ifname)) == NULL) { + if ((ifp = ifunit(altq->ifname)) == NULL) altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; - } else { - PF_UNLOCK(); -#endif - error = altq_add(altq); -#ifdef __FreeBSD__ - PF_LOCK(); - } -#endif + else + error = altq_add(altq); + if (error) { -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, altq); -#else - pool_put(&pf_altq_pl, altq); -#endif + PF_RULES_WUNLOCK(); + free(altq, M_PFALTQ); break; } -#ifdef __FreeBSD__ TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); -#else - TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries); -#endif bcopy(altq, &pa->altq, sizeof(struct pf_altq)); + PF_RULES_WUNLOCK(); break; } @@ -2761,16 +2041,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq; + PF_RULES_RLOCK(); pa->nr = 0; -#ifdef __FreeBSD__ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) pa->nr++; pa->ticket = V_ticket_altqs_active; -#else - TAILQ_FOREACH(altq, pf_altqs_active, entries) - pa->nr++; - pa->ticket = ticket_altqs_active; -#endif + PF_RULES_RUNLOCK(); break; } @@ -2779,29 +2055,25 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_altq *altq; u_int32_t nr; -#ifdef __FreeBSD__ + PF_RULES_RLOCK(); if (pa->ticket != V_ticket_altqs_active) { -#else - if (pa->ticket != ticket_altqs_active) { -#endif + PF_RULES_RUNLOCK(); error = EBUSY; break; } nr = 0; -#ifdef __FreeBSD__ altq = TAILQ_FIRST(V_pf_altqs_active); -#else - altq = TAILQ_FIRST(pf_altqs_active); -#endif while ((altq != NULL) && (nr < pa->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; } if (altq == NULL) { + PF_RULES_RUNLOCK(); error = EBUSY; break; } bcopy(altq, &pa->altq, sizeof(struct pf_altq)); + PF_RULES_RUNLOCK(); break; } @@ -2816,41 +2088,32 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) u_int32_t nr; int nbytes; -#ifdef __FreeBSD__ + PF_RULES_RLOCK(); if (pq->ticket != V_ticket_altqs_active) { -#else - if (pq->ticket != ticket_altqs_active) { -#endif + PF_RULES_RUNLOCK(); error = EBUSY; break; } nbytes = pq->nbytes; nr = 0; -#ifdef __FreeBSD__ altq = TAILQ_FIRST(V_pf_altqs_active); -#else - altq = TAILQ_FIRST(pf_altqs_active); -#endif while ((altq != NULL) && (nr < pq->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; } if (altq == NULL) { + PF_RULES_RUNLOCK(); error = EBUSY; break; } -#ifdef __FreeBSD__ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) { + PF_RULES_RUNLOCK(); error = ENXIO; break; } - PF_UNLOCK(); -#endif + PF_RULES_RUNLOCK(); error = altq_getqstats(altq, pq->buf, &nbytes); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif if (error == 0) { pq->scheduler = altq->scheduler; pq->nbytes = nbytes; @@ -2862,27 +2125,18 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCBEGINADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); pf_empty_pool(&V_pf_pabuf); pp->ticket = ++V_ticket_pabuf; -#else - pf_empty_pool(&pf_pabuf); - pp->ticket = ++ticket_pabuf; -#endif + PF_RULES_WUNLOCK(); break; } case DIOCADDADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + struct pf_pooladdr *pa; + struct pfi_kif *kif = NULL; -#ifdef __FreeBSD__ - if (pp->ticket != V_ticket_pabuf) { -#else - if (pp->ticket != ticket_pabuf) { -#endif - error = EBUSY; - break; - } #ifndef INET if (pp->af == AF_INET) { error = EAFNOSUPPORT; @@ -2901,70 +2155,68 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } -#ifdef __FreeBSD__ - pa = pool_get(&V_pf_pooladdr_pl, PR_NOWAIT); -#else - pa = pool_get(&pf_pooladdr_pl, PR_WAITOK|PR_LIMITFAIL); -#endif - if (pa == NULL) { - error = ENOMEM; + pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK); + bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); + if (pa->ifname[0]) + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); + PF_RULES_WLOCK(); + if (pp->ticket != V_ticket_pabuf) { + PF_RULES_WUNLOCK(); + if (pa->ifname[0]) + free(kif, PFI_MTYPE); + free(pa, M_PFRULE); + error = EBUSY; break; } - bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); if (pa->ifname[0]) { - pa->kif = pfi_kif_get(pa->ifname); - if (pa->kif == NULL) { -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, pa); -#else - pool_put(&pf_pooladdr_pl, pa); -#endif - error = EINVAL; - break; - } - pfi_kif_ref(pa->kif, PFI_KIF_REF_RULE); - } - if (pfi_dynaddr_setup(&pa->addr, pp->af)) { - pfi_dynaddr_remove(&pa->addr); - pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE); -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, pa); -#else - pool_put(&pf_pooladdr_pl, pa); -#endif - error = EINVAL; + pa->kif = pfi_kif_attach(kif, pa->ifname); + pfi_kif_ref(pa->kif); + } else + pa->kif = NULL; + if (pa->addr.type == PF_ADDR_DYNIFTL && ((error = + pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) { + if (pa->ifname[0]) + pfi_kif_unref(pa->kif); + PF_RULES_WUNLOCK(); + free(pa, M_PFRULE); break; } -#ifdef __FreeBSD__ TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries); -#else - TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries); -#endif + PF_RULES_WUNLOCK(); break; } case DIOCGETADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + struct pf_pool *pool; + struct pf_pooladdr *pa; + PF_RULES_RLOCK(); pp->nr = 0; pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 0); if (pool == NULL) { + PF_RULES_RUNLOCK(); error = EBUSY; break; } TAILQ_FOREACH(pa, &pool->list, entries) pp->nr++; + PF_RULES_RUNLOCK(); break; } case DIOCGETADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + struct pf_pool *pool; + struct pf_pooladdr *pa; u_int32_t nr = 0; + PF_RULES_RLOCK(); pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 1); if (pool == NULL) { + PF_RULES_RUNLOCK(); error = EBUSY; break; } @@ -2974,18 +2226,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) nr++; } if (pa == NULL) { + PF_RULES_RUNLOCK(); error = EBUSY; break; } bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); pf_addr_copyout(&pp->addr.addr); + PF_RULES_RUNLOCK(); break; } case DIOCCHANGEADDR: { struct pfioc_pooladdr *pca = (struct pfioc_pooladdr *)addr; + struct pf_pool *pool; struct pf_pooladdr *oldpa = NULL, *newpa = NULL; struct pf_ruleset *ruleset; + struct pfi_kif *kif = NULL; if (pca->action < PF_CHANGE_ADD_HEAD || pca->action > PF_CHANGE_REMOVE) { @@ -2999,76 +2255,60 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } - ruleset = pf_find_ruleset(pca->anchor); - if (ruleset == NULL) { - error = EBUSY; - break; - } - pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action, - pca->r_num, pca->r_last, 1, 1); - if (pool == NULL) { - error = EBUSY; - break; - } if (pca->action != PF_CHANGE_REMOVE) { -#ifdef __FreeBSD__ - newpa = pool_get(&V_pf_pooladdr_pl, - PR_NOWAIT); -#else - newpa = pool_get(&pf_pooladdr_pl, - PR_WAITOK|PR_LIMITFAIL); -#endif - if (newpa == NULL) { - error = ENOMEM; - break; - } - bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); #ifndef INET if (pca->af == AF_INET) { -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, newpa); -#else - pool_put(&pf_pooladdr_pl, newpa); -#endif error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pca->af == AF_INET6) { -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, newpa); -#else - pool_put(&pf_pooladdr_pl, newpa); -#endif error = EAFNOSUPPORT; break; } #endif /* INET6 */ + newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK); + bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); + if (newpa->ifname[0]) + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); + } + +#define ERROUT(x) { error = (x); goto DIOCCHANGEADDR_error; } + PF_RULES_WLOCK(); + ruleset = pf_find_ruleset(pca->anchor); + if (ruleset == NULL) + ERROUT(EBUSY); + + pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action, + pca->r_num, pca->r_last, 1, 1); + if (pool == NULL) + ERROUT(EBUSY); + + if (pca->action != PF_CHANGE_REMOVE) { if (newpa->ifname[0]) { - newpa->kif = pfi_kif_get(newpa->ifname); - if (newpa->kif == NULL) { -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, newpa); -#else - pool_put(&pf_pooladdr_pl, newpa); -#endif - error = EINVAL; - break; - } - pfi_kif_ref(newpa->kif, PFI_KIF_REF_RULE); + newpa->kif = pfi_kif_attach(kif, newpa->ifname); + pfi_kif_ref(newpa->kif); } else newpa->kif = NULL; - if (pfi_dynaddr_setup(&newpa->addr, pca->af) || - pf_tbladdr_setup(ruleset, &newpa->addr)) { - pfi_dynaddr_remove(&newpa->addr); - pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE); -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, newpa); -#else - pool_put(&pf_pooladdr_pl, newpa); -#endif - error = EINVAL; + + switch (newpa->addr.type) { + case PF_ADDR_DYNIFTL: + error = pfi_dynaddr_setup(&newpa->addr, + pca->af); + break; + case PF_ADDR_TABLE: + newpa->addr.p.tbl = pfr_attach_table(ruleset, + newpa->addr.v.tblname); + if (newpa->addr.p.tbl == NULL) + error = ENOMEM; + break; + } + if (error) { + if (newpa->kif) + pfi_kif_unref(newpa->kif); + PF_RULES_WUNLOCK(); + free(newpa, M_PFRULE); break; } } @@ -3086,6 +2326,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) i++; } if (oldpa == NULL) { + PF_RULES_WUNLOCK(); error = EINVAL; break; } @@ -3093,14 +2334,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pca->action == PF_CHANGE_REMOVE) { TAILQ_REMOVE(&pool->list, oldpa, entries); - pfi_dynaddr_remove(&oldpa->addr); - pf_tbladdr_remove(&oldpa->addr); - pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE); -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, oldpa); -#else - pool_put(&pf_pooladdr_pl, oldpa); -#endif + switch (oldpa->addr.type) { + case PF_ADDR_DYNIFTL: + pfi_dynaddr_remove(oldpa->addr.p.dyn); + break; + case PF_ADDR_TABLE: + pfr_detach_table(oldpa->addr.p.tbl); + break; + } + if (oldpa->kif) + pfi_kif_unref(oldpa->kif); + free(oldpa, M_PFRULE); } else { if (oldpa == NULL) TAILQ_INSERT_TAIL(&pool->list, newpa, entries); @@ -3115,6 +2359,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pool->cur = TAILQ_FIRST(&pool->list); PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); + PF_RULES_WUNLOCK(); + break; + +#undef ERROUT +DIOCCHANGEADDR_error: + PF_RULES_WUNLOCK(); + if (newpa != NULL) + free(newpa, M_PFRULE); + if (kif != NULL) + free(kif, PFI_MTYPE); break; } @@ -3123,19 +2377,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_ruleset *ruleset; struct pf_anchor *anchor; + PF_RULES_RLOCK(); pr->path[sizeof(pr->path) - 1] = 0; if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { - error = EINVAL; + PF_RULES_RUNLOCK(); + error = ENOENT; break; } pr->nr = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ -#ifdef __FreeBSD__ RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors) -#else - RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) -#endif if (anchor->parent == NULL) pr->nr++; } else { @@ -3143,6 +2395,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) &ruleset->anchor->children) pr->nr++; } + PF_RULES_RUNLOCK(); break; } @@ -3152,19 +2405,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_anchor *anchor; u_int32_t nr = 0; + PF_RULES_RLOCK(); pr->path[sizeof(pr->path) - 1] = 0; if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { - error = EINVAL; + PF_RULES_RUNLOCK(); + error = ENOENT; break; } pr->name[0] = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ -#ifdef __FreeBSD__ RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors) -#else - RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) -#endif if (anchor->parent == NULL && nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); @@ -3181,6 +2432,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (!pr->name[0]) error = EBUSY; + PF_RULES_RUNLOCK(); break; } @@ -3191,81 +2443,149 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = ENODEV; break; } + PF_RULES_WLOCK(); error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); break; } case DIOCRADDTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_table *pfrts; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } - error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size, + totlen = io->pfrio_size * sizeof(struct pfr_table); + pfrts = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfrts, totlen); + if (error) { + free(pfrts, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_add_tables(pfrts, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + free(pfrts, M_TEMP); break; } case DIOCRDELTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_table *pfrts; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } - error = pfr_del_tables(io->pfrio_buffer, io->pfrio_size, + totlen = io->pfrio_size * sizeof(struct pfr_table); + pfrts = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfrts, totlen); + if (error) { + free(pfrts, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_del_tables(pfrts, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + free(pfrts, M_TEMP); break; } case DIOCRGETTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_table *pfrts; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } - error = pfr_get_tables(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_table); + pfrts = malloc(totlen, M_TEMP, M_WAITOK); + PF_RULES_RLOCK(); + error = pfr_get_tables(&io->pfrio_table, pfrts, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error == 0) + error = copyout(pfrts, io->pfrio_buffer, totlen); + free(pfrts, M_TEMP); break; } case DIOCRGETTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_tstats *pfrtstats; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_tstats)) { error = ENODEV; break; } - error = pfr_get_tstats(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_tstats); + pfrtstats = malloc(totlen, M_TEMP, M_WAITOK); + PF_RULES_WLOCK(); + error = pfr_get_tstats(&io->pfrio_table, pfrtstats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + if (error == 0) + error = copyout(pfrtstats, io->pfrio_buffer, totlen); + free(pfrtstats, M_TEMP); break; } case DIOCRCLRTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_table *pfrts; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } - error = pfr_clr_tstats(io->pfrio_buffer, io->pfrio_size, + totlen = io->pfrio_size * sizeof(struct pfr_table); + pfrts = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfrts, totlen); + if (error) { + free(pfrts, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_clr_tstats(pfrts, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + free(pfrts, M_TEMP); break; } case DIOCRSETTFLAGS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_table *pfrts; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } - error = pfr_set_tflags(io->pfrio_buffer, io->pfrio_size, + totlen = io->pfrio_size * sizeof(struct pfr_table); + pfrts = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfrts, totlen); + if (error) { + free(pfrts, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_set_tflags(pfrts, io->pfrio_size, io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + free(pfrts, M_TEMP); break; } @@ -3276,332 +2596,398 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = ENODEV; break; } + PF_RULES_WLOCK(); error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); break; } case DIOCRADDADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_add_addrs(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_add_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRDELADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_del_addrs(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_del_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRSETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer, + totlen = (io->pfrio_size + io->pfrio_size2) * + sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_set_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | PFR_FLAG_USERIOCTL, 0); + PF_RULES_WUNLOCK(); + if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRGETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_get_addrs(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + PF_RULES_RLOCK(); + error = pfr_get_addrs(&io->pfrio_table, pfras, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error == 0) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRGETASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_astats *pfrastats; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_astats)) { error = ENODEV; break; } - error = pfr_get_astats(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_astats); + pfrastats = malloc(totlen, M_TEMP, M_WAITOK); + PF_RULES_RLOCK(); + error = pfr_get_astats(&io->pfrio_table, pfrastats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error == 0) + error = copyout(pfrastats, io->pfrio_buffer, totlen); + free(pfrastats, M_TEMP); break; } case DIOCRCLRASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_clr_astats(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_clr_astats(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRTSTADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_tst_addrs(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_RLOCK(); + error = pfr_tst_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error == 0) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRINADEFINE: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_ina_define(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_ina_define(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + free(pfras, M_TEMP); break; } case DIOCOSFPADD: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; + PF_RULES_WLOCK(); error = pf_osfp_add(io); + PF_RULES_WUNLOCK(); break; } case DIOCOSFPGET: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; + PF_RULES_RLOCK(); error = pf_osfp_get(io); + PF_RULES_RUNLOCK(); break; } case DIOCXBEGIN: { struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e *ioe; - struct pfr_table *table; + struct pfioc_trans_e *ioes, *ioe; + size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; - goto fail; + break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); - table = malloc(sizeof(*table), M_TEMP, M_WAITOK); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - for (i = 0; i < io->size; i++) { -#ifdef __FreeBSD__ - PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); + totlen = sizeof(struct pfioc_trans_e) * io->size; + ioes = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->array, ioes, totlen); if (error) { -#else - if (copyin(io->array+i, ioe, sizeof(*ioe))) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); - error = EFAULT; - goto fail; - } + free(ioes, M_TEMP); + break; + } + PF_RULES_WLOCK(); + for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EINVAL; goto fail; } if ((error = pf_begin_altq(&ioe->ticket))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(table, sizeof(*table)); - strlcpy(table->pfrt_anchor, ioe->anchor, - sizeof(table->pfrt_anchor)); - if ((error = pfr_ina_begin(table, + { + struct pfr_table table; + + bzero(&table, sizeof(table)); + strlcpy(table.pfrt_anchor, ioe->anchor, + sizeof(table.pfrt_anchor)); + if ((error = pfr_ina_begin(&table, &ioe->ticket, NULL, 0))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; } break; + } default: if ((error = pf_begin_rules(&ioe->ticket, ioe->rs_num, ioe->anchor))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; } break; } -#ifdef __FreeBSD__ - PF_COPYOUT(ioe, io->array+i, sizeof(io->array[i]), - error); - if (error) { -#else - if (copyout(ioe, io->array+i, sizeof(io->array[i]))) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); - error = EFAULT; - goto fail; - } } - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + error = copyout(ioes, io->array, totlen); + free(ioes, M_TEMP); break; } case DIOCXROLLBACK: { struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e *ioe; - struct pfr_table *table; + struct pfioc_trans_e *ioe, *ioes; + size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; - goto fail; + break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); - table = malloc(sizeof(*table), M_TEMP, M_WAITOK); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - for (i = 0; i < io->size; i++) { -#ifdef __FreeBSD__ - PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); - if (error) { -#else - if (copyin(io->array+i, ioe, sizeof(*ioe))) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); - error = EFAULT; - goto fail; - } + totlen = sizeof(struct pfioc_trans_e) * io->size; + ioes = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->array, ioes, totlen); + if (error) { + free(ioes, M_TEMP); + break; + } + PF_RULES_WLOCK(); + for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EINVAL; goto fail; } if ((error = pf_rollback_altq(ioe->ticket))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(table, sizeof(*table)); - strlcpy(table->pfrt_anchor, ioe->anchor, - sizeof(table->pfrt_anchor)); - if ((error = pfr_ina_rollback(table, + { + struct pfr_table table; + + bzero(&table, sizeof(table)); + strlcpy(table.pfrt_anchor, ioe->anchor, + sizeof(table.pfrt_anchor)); + if ((error = pfr_ina_rollback(&table, ioe->ticket, NULL, 0))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; + } default: if ((error = pf_rollback_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; } } - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); break; } case DIOCXCOMMIT: { struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e *ioe; - struct pfr_table *table; + struct pfioc_trans_e *ioe, *ioes; struct pf_ruleset *rs; + size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; - goto fail; + break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); - table = malloc(sizeof(*table), M_TEMP, M_WAITOK); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - /* first makes sure everything will succeed */ - for (i = 0; i < io->size; i++) { -#ifdef __FreeBSD__ - PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); - if (error) { -#else - if (copyin(io->array+i, ioe, sizeof(*ioe))) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); - error = EFAULT; - goto fail; - } + totlen = sizeof(struct pfioc_trans_e) * io->size; + ioes = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->array, ioes, totlen); + if (error) { + free(ioes, M_TEMP); + break; + } + PF_RULES_WLOCK(); + /* First makes sure everything will succeed. */ + for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EINVAL; goto fail; } -#ifdef __FreeBSD__ if (!V_altqs_inactive_open || ioe->ticket != V_ticket_altqs_inactive) { -#else - if (!altqs_inactive_open || ioe->ticket != - ticket_altqs_inactive) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EBUSY; goto fail; } @@ -3611,8 +2997,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EBUSY; goto fail; } @@ -3620,8 +3006,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) default: if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EINVAL; goto fail; } @@ -3630,175 +3016,141 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) !rs->rules[ioe->rs_num].inactive.open || rs->rules[ioe->rs_num].inactive.ticket != ioe->ticket) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EBUSY; goto fail; } break; } } - /* now do the commit - no errors should happen here */ - for (i = 0; i < io->size; i++) { -#ifdef __FreeBSD__ - PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); - if (error) { -#else - if (copyin(io->array+i, ioe, sizeof(*ioe))) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); - error = EFAULT; - goto fail; - } + /* Now do the commit - no errors should happen here. */ + for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if ((error = pf_commit_altq(ioe->ticket))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(table, sizeof(*table)); - strlcpy(table->pfrt_anchor, ioe->anchor, - sizeof(table->pfrt_anchor)); - if ((error = pfr_ina_commit(table, ioe->ticket, - NULL, NULL, 0))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + { + struct pfr_table table; + + bzero(&table, sizeof(table)); + strlcpy(table.pfrt_anchor, ioe->anchor, + sizeof(table.pfrt_anchor)); + if ((error = pfr_ina_commit(&table, + ioe->ticket, NULL, NULL, 0))) { + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; + } default: if ((error = pf_commit_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; } } - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); break; } case DIOCGETSRCNODES: { struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; + struct pf_srchash *sh; struct pf_src_node *n, *p, *pstore; - u_int32_t nr = 0; - int space = psn->psn_len; - - if (space == 0) { -#ifdef __FreeBSD__ - RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) -#else - RB_FOREACH(n, pf_src_tree, &tree_src_tracking) -#endif - nr++; + uint32_t i, nr = 0; + + if (psn->psn_len == 0) { + for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask; + i++, sh++) { + PF_HASHROW_LOCK(sh); + LIST_FOREACH(n, &sh->nodes, entry) + nr++; + PF_HASHROW_UNLOCK(sh); + } psn->psn_len = sizeof(struct pf_src_node) * nr; break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - p = psn->psn_src_nodes; -#ifdef __FreeBSD__ - RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { -#else - RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { -#endif - int secs = time_second, diff; + p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK); + for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask; + i++, sh++) { + PF_HASHROW_LOCK(sh); + LIST_FOREACH(n, &sh->nodes, entry) { + int secs = time_uptime, diff; if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) break; - bcopy(n, pstore, sizeof(*pstore)); + bcopy(n, p, sizeof(struct pf_src_node)); if (n->rule.ptr != NULL) - pstore->rule.nr = n->rule.ptr->nr; - pstore->creation = secs - pstore->creation; - if (pstore->expire > secs) - pstore->expire -= secs; + p->rule.nr = n->rule.ptr->nr; + p->creation = secs - p->creation; + if (p->expire > secs) + p->expire -= secs; else - pstore->expire = 0; + p->expire = 0; - /* adjust the connection rate estimate */ + /* Adjust the connection rate estimate. */ diff = secs - n->conn_rate.last; if (diff >= n->conn_rate.seconds) - pstore->conn_rate.count = 0; + p->conn_rate.count = 0; else - pstore->conn_rate.count -= + p->conn_rate.count -= n->conn_rate.count * diff / n->conn_rate.seconds; - -#ifdef __FreeBSD__ - PF_COPYOUT(pstore, p, sizeof(*p), error); -#else - error = copyout(pstore, p, sizeof(*p)); -#endif - if (error) { - free(pstore, M_TEMP); - goto fail; - } p++; nr++; + } + PF_HASHROW_UNLOCK(sh); + } + error = copyout(pstore, psn->psn_src_nodes, + sizeof(struct pf_src_node) * nr); + if (error) { + free(pstore, M_TEMP); + break; } psn->psn_len = sizeof(struct pf_src_node) * nr; - free(pstore, M_TEMP); break; } case DIOCCLRSRCNODES: { - struct pf_src_node *n; - struct pf_state *state; -#ifdef __FreeBSD__ - RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { -#else - RB_FOREACH(state, pf_state_tree_id, &tree_id) { -#endif - state->src_node = NULL; - state->nat_src_node = NULL; - } -#ifdef __FreeBSD__ - RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { -#else - RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { -#endif - n->expire = 1; - n->states = 0; - } - pf_purge_expired_src_nodes(1); -#ifdef __FreeBSD__ + pf_clear_srcnodes(NULL); + pf_purge_expired_src_nodes(); V_pf_status.src_nodes = 0; -#else - pf_status.src_nodes = 0; -#endif break; } case DIOCKILLSRCNODES: { - struct pf_src_node *sn; - struct pf_state *s; struct pfioc_src_node_kill *psnk = (struct pfioc_src_node_kill *)addr; - u_int killed = 0; - -#ifdef __FreeBSD__ - RB_FOREACH(sn, pf_src_tree, &V_tree_src_tracking) { -#else - RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) { -#endif + struct pf_srchash *sh; + struct pf_src_node *sn; + u_int i, killed = 0; + + for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask; + i++, sh++) { + /* + * XXXGL: we don't ever acquire sources hash lock + * but if we ever do, the below call to pf_clear_srcnodes() + * would lead to a LOR. + */ + PF_HASHROW_LOCK(sh); + LIST_FOREACH(sn, &sh->nodes, entry) if (PF_MATCHA(psnk->psnk_src.neg, &psnk->psnk_src.addr.v.a.addr, &psnk->psnk_src.addr.v.a.mask, @@ -3808,27 +3160,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) &psnk->psnk_dst.addr.v.a.mask, &sn->raddr, sn->af)) { /* Handle state to src_node linkage */ - if (sn->states != 0) { - RB_FOREACH(s, pf_state_tree_id, -#ifdef __FreeBSD__ - &V_tree_id) { -#else - &tree_id) { -#endif - if (s->src_node == sn) - s->src_node = NULL; - if (s->nat_src_node == sn) - s->nat_src_node = NULL; - } - sn->states = 0; - } + if (sn->states != 0) + pf_clear_srcnodes(sn); sn->expire = 1; killed++; } + PF_HASHROW_UNLOCK(sh); } if (killed > 0) - pf_purge_expired_src_nodes(1); + pf_purge_expired_src_nodes(); psnk->psnk_killed = killed; break; @@ -3837,47 +3178,56 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); if (*hostid == 0) V_pf_status.hostid = arc4random(); else V_pf_status.hostid = *hostid; -#else - if (*hostid == 0) - pf_status.hostid = arc4random(); - else - pf_status.hostid = *hostid; -#endif + PF_RULES_WUNLOCK(); break; } case DIOCOSFPFLUSH: + PF_RULES_WLOCK(); pf_osfp_flush(); + PF_RULES_WUNLOCK(); break; case DIOCIGETIFACES: { struct pfioc_iface *io = (struct pfioc_iface *)addr; + struct pfi_kif *ifstore; + size_t bufsiz; if (io->pfiio_esize != sizeof(struct pfi_kif)) { error = ENODEV; break; } - error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer, - &io->pfiio_size); + + bufsiz = io->pfiio_size * sizeof(struct pfi_kif); + ifstore = malloc(bufsiz, M_TEMP, M_WAITOK); + PF_RULES_RLOCK(); + pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size); + PF_RULES_RUNLOCK(); + error = copyout(ifstore, io->pfiio_buffer, bufsiz); + free(ifstore, M_TEMP); break; } case DIOCSETIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; + PF_RULES_WLOCK(); error = pfi_set_flags(io->pfiio_name, io->pfiio_flags); + PF_RULES_WUNLOCK(); break; } case DIOCCLRIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; + PF_RULES_WLOCK(); error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags); + PF_RULES_WUNLOCK(); break; } @@ -3886,27 +3236,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } fail: -#ifdef __FreeBSD__ - PF_UNLOCK(); - - if (flags & FWRITE) - sx_xunlock(&V_pf_consistency_lock); - else - sx_sunlock(&V_pf_consistency_lock); -#else - splx(s); - if (flags & FWRITE) - rw_exit_write(&pf_consistency_lock); - else - rw_exit_read(&pf_consistency_lock); -#endif - CURVNET_RESTORE(); return (error); } -#ifdef __FreeBSD__ void pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) { @@ -3927,12 +3261,12 @@ pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) /* copy from state */ strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); - sp->creation = htonl(time_second - st->creation); + sp->creation = htonl(time_uptime - st->creation); sp->expire = pf_state_expires(st); - if (sp->expire <= time_second) + if (sp->expire <= time_uptime) sp->expire = htonl(0); else - sp->expire = htonl(sp->expire - time_second); + sp->expire = htonl(sp->expire - time_uptime); sp->direction = st->direction; sp->log = st->log; @@ -3943,7 +3277,7 @@ pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) if (st->nat_src_node) sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; - bcopy(&st->id, &sp->id, sizeof(sp->id)); + sp->id = st->id; sp->creatorid = st->creatorid; pf_state_peer_hton(&st->src, &sp->src); pf_state_peer_hton(&st->dst, &sp->dst); @@ -3968,33 +3302,43 @@ pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) } +static void +pf_tbladdr_copyout(struct pf_addr_wrap *aw) +{ + struct pfr_ktable *kt; + + KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type)); + + kt = aw->p.tbl; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + aw->p.tbl = NULL; + aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? + kt->pfrkt_cnt : -1; +} + /* * XXX - Check for version missmatch!!! */ static void pf_clear_states(void) { - struct pf_state *state; - -#ifdef __FreeBSD__ - RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { -#else - RB_FOREACH(state, pf_state_tree_id, &tree_id) { -#endif - state->timeout = PFTM_PURGE; -#if NPFSYNC - /* don't send out individual delete messages */ - state->sync_state = PFSTATE_NOSYNC; -#endif - pf_unlink_state(state); + struct pf_state *s; + u_int i; + + for (i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; +relock: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + s->timeout = PFTM_PURGE; + /* Don't send out individual delete messages. */ + s->sync_state = PFSTATE_NOSYNC; + pf_unlink_state(s, PF_ENTER_LOCKED); + goto relock; + } + PF_HASHROW_UNLOCK(ih); } - -#if 0 /* NPFSYNC */ -/* - * XXX This is called on module unload, we do not want to sync that over? */ - */ - pfsync_clear_states(V_pf_status.hostid, psk->psk_ifname); -#endif } static int @@ -4012,24 +3356,38 @@ pf_clear_tables(void) } static void -pf_clear_srcnodes(void) +pf_clear_srcnodes(struct pf_src_node *n) { - struct pf_src_node *n; - struct pf_state *state; + struct pf_state *s; + int i; -#ifdef __FreeBSD__ - RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { -#else - RB_FOREACH(state, pf_state_tree_id, &tree_id) { -#endif - state->src_node = NULL; - state->nat_src_node = NULL; + for (i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + if (n == NULL || n == s->src_node) + s->src_node = NULL; + if (n == NULL || n == s->nat_src_node) + s->nat_src_node = NULL; + } + PF_HASHROW_UNLOCK(ih); } -#ifdef __FreeBSD__ - RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { -#else - RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { -#endif + + if (n == NULL) { + struct pf_srchash *sh; + + for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask; + i++, sh++) { + PF_HASHROW_LOCK(sh); + LIST_FOREACH(n, &sh->nodes, entry) { + n->expire = 1; + n->states = 0; + } + PF_HASHROW_UNLOCK(sh); + } + } else { + /* XXX: hash slot should already be locked here. */ n->expire = 1; n->states = 0; } @@ -4047,7 +3405,7 @@ shutdown_pf(void) int error = 0; u_int32_t t[5]; char nn = '\0'; - + V_pf_status.running = 0; do { if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn)) @@ -4058,22 +3416,22 @@ shutdown_pf(void) if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } /* XXX: these should always succeed here */ @@ -4086,17 +3444,17 @@ shutdown_pf(void) if ((error = pf_clear_tables()) != 0) break; - #ifdef ALTQ +#ifdef ALTQ if ((error = pf_begin_altq(&t[0])) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n")); break; } pf_commit_altq(t[0]); - #endif +#endif pf_clear_states(); - pf_clear_srcnodes(); + pf_clear_srcnodes(NULL); /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have thier own cleanup code */ @@ -4116,7 +3474,7 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, * in network stack. OpenBSD's network stack have converted * ip_len/ip_off to host byte order frist as FreeBSD. * Now this is not true anymore , so we should convert back to network - * byte order. + * byte order. */ struct ip *h = NULL; int chk; @@ -4128,7 +3486,7 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, HTONS(h->ip_off); } CURVNET_SET(ifp->if_vnet); - chk = pf_test(PF_IN, ifp, m, NULL, inp); + chk = pf_test(PF_IN, ifp, m, inp); CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); @@ -4153,7 +3511,7 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, * in network stack. OpenBSD's network stack have converted * ip_len/ip_off to host byte order frist as FreeBSD. * Now this is not true anymore , so we should convert back to network - * byte order. + * byte order. */ struct ip *h = NULL; int chk; @@ -4170,7 +3528,7 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, HTONS(h->ip_off); } CURVNET_SET(ifp->if_vnet); - chk = pf_test(PF_OUT, ifp, m, NULL, inp); + chk = pf_test(PF_OUT, ifp, m, inp); CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); @@ -4203,8 +3561,7 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, * filtering we have change this to lo0 as it is the case in IPv4. */ CURVNET_SET(ifp->if_vnet); - chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, - NULL, inp); + chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp); CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); @@ -4231,7 +3588,7 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } CURVNET_SET(ifp->if_vnet); - chk = pf_test6(PF_OUT, ifp, m, NULL, inp); + chk = pf_test6(PF_OUT, ifp, m, inp); CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); @@ -4251,10 +3608,8 @@ hook_pf(void) struct pfil_head *pfh_inet6; #endif - PF_UNLOCK_ASSERT(); - if (V_pf_pfil_hooked) - return (0); + return (0); #ifdef INET pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); @@ -4292,8 +3647,6 @@ dehook_pf(void) struct pfil_head *pfh_inet6; #endif - PF_UNLOCK_ASSERT(); - if (V_pf_pfil_hooked == 0) return (0); @@ -4323,6 +3676,8 @@ dehook_pf(void) static int pf_load(void) { + int error; + VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK(); @@ -4330,19 +3685,17 @@ pf_load(void) CURVNET_SET(vnet_iter); V_pf_pfil_hooked = 0; V_pf_end_threads = 0; - V_debug_pfugidhack = 0; TAILQ_INIT(&V_pf_tags); TAILQ_INIT(&V_pf_qids); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); - init_pf_mutex(); + rw_init(&pf_rules_lock, "pf rulesets"); + pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME); - init_zone_var(); - sx_init(&V_pf_consistency_lock, "pf_statetbl_lock"); - if (pfattach() < 0) - return (ENOMEM); + if ((error = pfattach()) != 0) + return (error); return (0); } @@ -4352,10 +3705,10 @@ pf_unload(void) { int error = 0; - PF_LOCK(); + PF_RULES_WLOCK(); V_pf_status.running = 0; - PF_UNLOCK(); - m_addr_chg_pf_p = NULL; + PF_RULES_WUNLOCK(); + swi_remove(V_pf_swi_cookie); error = dehook_pf(); if (error) { /* @@ -4366,22 +3719,23 @@ pf_unload(void) printf("%s : pfil unregisteration fail\n", __FUNCTION__); return error; } - PF_LOCK(); + PF_RULES_WLOCK(); shutdown_pf(); V_pf_end_threads = 1; while (V_pf_end_threads < 2) { wakeup_one(pf_purge_thread); - msleep(pf_purge_thread, &pf_task_mtx, 0, "pftmo", hz); + rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0); } + pf_normalize_cleanup(); pfi_cleanup(); + pfr_cleanup(); pf_osfp_flush(); - pf_osfp_cleanup(); - cleanup_pf_zone(); - PF_UNLOCK(); + pf_cleanup(); + PF_RULES_WUNLOCK(); destroy_dev(pf_dev); - destroy_pf_mutex(); - sx_destroy(&V_pf_consistency_lock); - return error; + rw_destroy(&pf_rules_lock); + + return (error); } static int @@ -4406,9 +3760,10 @@ pf_modevent(module_t mod, int type, void *data) error = EINVAL; break; } - return error; + + return (error); } - + static moduledata_t pf_mod = { "pf", pf_modevent, @@ -4417,4 +3772,3 @@ static moduledata_t pf_mod = { DECLARE_MODULE(pf, pf_mod, SI_SUB_PSEUDO, SI_ORDER_FIRST); MODULE_VERSION(pf, PF_MODVER); -#endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/pf_lb.c b/sys/contrib/pf/net/pf_lb.c index 4adc6f0..5b47852 100644 --- a/sys/contrib/pf/net/pf_lb.c +++ b/sys/contrib/pf/net/pf_lb.c @@ -35,136 +35,31 @@ * */ -#ifdef __FreeBSD__ -#include "opt_inet.h" -#include "opt_inet6.h" - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#endif -#ifdef __FreeBSD__ -#include "opt_bpf.h" #include "opt_pf.h" - -#ifdef DEV_BPF -#define NBPFILTER DEV_BPF -#else -#define NBPFILTER 0 -#endif - -#ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG -#else -#define NPFLOG 0 -#endif - -#ifdef DEV_PFSYNC -#define NPFSYNC DEV_PFSYNC -#else -#define NPFSYNC 0 -#endif - -#ifdef DEV_PFLOW -#define NPFLOW DEV_PFLOW -#else -#define NPFLOW 0 -#endif - -#else -#include "bpfilter.h" -#include "pflog.h" -#include "pfsync.h" -#include "pflow.h" -#endif +#include "opt_inet.h" +#include "opt_inet6.h" #include <sys/param.h> -#include <sys/systm.h> -#include <sys/mbuf.h> -#include <sys/filio.h> #include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/kernel.h> -#include <sys/time.h> -#ifdef __FreeBSD__ #include <sys/sysctl.h> -#endif -#ifndef __FreeBSD__ -#include <sys/pool.h> -#endif -#include <sys/proc.h> -#ifdef __FreeBSD__ -#include <sys/kthread.h> -#include <sys/lock.h> -#include <sys/sx.h> -#else -#include <sys/rwlock.h> -#endif - -#ifdef __FreeBSD__ -#include <sys/md5.h> -#else -#include <crypto/md5.h> -#endif #include <net/if.h> -#include <net/if_types.h> -#include <net/bpf.h> -#include <net/route.h> -#include <net/radix_mpath.h> - -#include <netinet/in.h> -#include <netinet/in_var.h> -#include <netinet/in_systm.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcp_seq.h> -#include <netinet/udp.h> -#include <netinet/ip_icmp.h> -#include <netinet/in_pcb.h> -#include <netinet/tcp_timer.h> -#include <netinet/tcp_var.h> -#include <netinet/udp_var.h> -#include <netinet/icmp_var.h> -#include <netinet/if_ether.h> - -#ifndef __FreeBSD__ -#include <dev/rndvar.h> -#endif #include <net/pfvar.h> #include <net/if_pflog.h> -#include <net/if_pflow.h> - -#if NPFSYNC > 0 -#include <net/if_pfsync.h> -#endif /* NPFSYNC > 0 */ - -#ifdef INET6 -#include <netinet/ip6.h> -#include <netinet/in_pcb.h> -#include <netinet/icmp6.h> -#include <netinet6/nd6.h> -#endif /* INET6 */ - +#include <net/pf_mtag.h> -#ifdef __FreeBSD__ #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x -#else -#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x -#endif - -/* - * Global variables - */ -void pf_hash(struct pf_addr *, struct pf_addr *, +static void pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); -struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, +static struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, int, int, struct pfi_kif *, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t, int); -int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, +static int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, struct pf_addr *, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, struct pf_src_node **); @@ -185,7 +80,7 @@ int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, /* * hash function based on bridge_hash in if_bridge.c */ -void +static void pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, struct pf_poolhashkey *key, sa_family_t af) { @@ -226,7 +121,7 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, } } -struct pf_rule * +static struct pf_rule * pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, struct pf_addr *daddr, u_int16_t dport, int rs_num) @@ -279,11 +174,8 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, !pf_match_port(dst->port_op, dst->port[0], dst->port[1], dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; -#ifdef __FreeBSD__ - else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) -#else - else if (r->match_tag && !pf_match_tag(m, r, &tag)) -#endif + else if (r->match_tag && !pf_match_tag(m, r, &tag, + pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, @@ -304,19 +196,19 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, NULL, NULL); } -#ifdef __FreeBSD__ - if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) -#else - if (pf_tag_packet(m, tag, rtableid)) -#endif + + if (tag > 0 && pf_tag_packet(m, pd, tag)) return (NULL); + if (rtableid >= 0) + M_SETFIB(m, rtableid); + if (rm != NULL && (rm->action == PF_NONAT || rm->action == PF_NORDR || rm->action == PF_NOBINAT)) return (NULL); return (rm); } -int +static int pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, @@ -370,20 +262,12 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, high = tmp; } /* low < high */ -#ifdef __FreeBSD__ cut = htonl(arc4random()) % (1 + high - low) + low; -#else - cut = arc4random_uniform(1 + high - low) + low; -#endif /* low <= cut <= high */ for (tmp = cut; tmp <= high; ++(tmp)) { key.port[0] = htons(tmp); if (pf_find_state_all(&key, PF_IN, NULL) == -#ifdef __FreeBSD__ NULL) { -#else - NULL && !in_baddynamic(tmp, proto)) { -#endif *nport = htons(tmp); return (0); } @@ -391,11 +275,7 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, for (tmp = cut - 1; tmp >= low; --(tmp)) { key.port[0] = htons(tmp); if (pf_find_state_all(&key, PF_IN, NULL) == -#ifdef __FreeBSD__ NULL) { -#else - NULL && !in_baddynamic(tmp, proto)) { -#endif *nport = htons(tmp); return (0); } @@ -422,38 +302,17 @@ int pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) { - unsigned char hash[16]; struct pf_pool *rpool = &r->rpool; - struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; - struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; - struct pf_pooladdr *acur = rpool->cur; - struct pf_src_node k; + struct pf_addr *raddr = NULL, *rmask = NULL; if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { - k.af = af; - PF_ACPY(&k.addr, saddr, af); - if (r->rule_flag & PFRULE_RULESRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) - k.rule.ptr = r; - else - k.rule.ptr = NULL; -#ifdef __FreeBSD__ - V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; - *sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k); -#else - pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; - *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); -#endif + *sn = pf_find_src_node(saddr, r, af, 0); if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { PF_ACPY(naddr, &(*sn)->raddr, af); -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf_map_addr: src tracking maps "); - pf_print_host(&k.addr, 0, af); + pf_print_host(saddr, 0, af); printf(" to "); pf_print_host(naddr, 0, af); printf("\n"); @@ -542,31 +401,58 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, } break; case PF_POOL_SRCHASH: + { + unsigned char hash[16]; + pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); break; + } case PF_POOL_ROUNDROBIN: + { + struct pf_pooladdr *acur = rpool->cur; + + /* + * XXXGL: in the round-robin case we need to store + * the round-robin machine state in the rule, thus + * forwarding thread needs to modify rule. + * + * This is done w/o locking, because performance is assumed + * more important than round-robin precision. + * + * In the simpliest case we just update the "rpool->cur" + * pointer. However, if pool contains tables or dynamic + * addresses, then "tblidx" is also used to store machine + * state. Since "tblidx" is int, concurrent access to it can't + * lead to inconsistence, only to lost of precision. + * + * Things get worse, if table contains not hosts, but + * prefixes. In this case counter also stores machine state, + * and for IPv6 address, counter can't be updated atomically. + * Probably, using round-robin on a table containing IPv6 + * prefixes (or even IPv4) would cause a panic. + */ + if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) + &rpool->tblidx, &rpool->counter, af)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) + &rpool->tblidx, &rpool->counter, af)) goto get_addr; } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) goto get_addr; try_next: - if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) + if (TAILQ_NEXT(rpool->cur, entries) == NULL) rpool->cur = TAILQ_FIRST(&rpool->list); + else + rpool->cur = TAILQ_NEXT(rpool->cur, entries); if (rpool->cur->addr.type == PF_ADDR_TABLE) { rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) { + &rpool->tblidx, &rpool->counter, af)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; @@ -575,8 +461,7 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) { + &rpool->tblidx, &rpool->counter, af)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; @@ -594,15 +479,12 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, PF_ACPY(init_addr, naddr, af); PF_AINC(&rpool->counter, af); break; + } } if (*sn != NULL) PF_ACPY(&(*sn)->raddr, naddr, af); -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC && -#else - if (pf_status.debug >= PF_DEBUG_MISC && -#endif (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { printf("pf_map_addr: selected address "); pf_print_host(naddr, 0, af); @@ -615,13 +497,17 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, struct pf_rule * pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_src_node **sn, - struct pf_state_key **skw, struct pf_state_key **sks, struct pf_state_key **skp, struct pf_state_key **nkp, struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t sport, u_int16_t dport) { struct pf_rule *r = NULL; + struct pf_addr *naddr; + uint16_t *nport; + PF_RULES_RASSERT(); + KASSERT(*skp == NULL, ("*skp not NULL")); + KASSERT(*nkp == NULL, ("*nkp not NULL")); if (direction == PF_OUT) { r = pf_match_translation(pd, m, off, direction, kif, saddr, @@ -637,157 +523,141 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, saddr, sport, daddr, dport, PF_RULESET_BINAT); } - if (r != NULL) { - struct pf_addr *naddr; - u_int16_t *nport; - - if (pf_state_key_setup(pd, r, skw, sks, skp, nkp, - saddr, daddr, sport, dport)) - return r; - - /* XXX We only modify one side for now. */ - naddr = &(*nkp)->addr[1]; - nport = &(*nkp)->port[1]; - - switch (r->action) { - case PF_NONAT: - case PF_NOBINAT: - case PF_NORDR: - return (NULL); - case PF_NAT: - if (pf_get_sport(pd->af, pd->proto, r, saddr, - daddr, dport, naddr, nport, r->rpool.proxy_port[0], - r->rpool.proxy_port[1], sn)) { - DPFPRINTF(PF_DEBUG_MISC, - ("pf: NAT proxy port allocation " - "(%u-%u) failed\n", - r->rpool.proxy_port[0], - r->rpool.proxy_port[1])); - return (NULL); - } - break; - case PF_BINAT: - switch (direction) { - case PF_OUT: - if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ - switch (pd->af) { + if (r == NULL) + return (NULL); + + switch (r->action) { + case PF_NONAT: + case PF_NOBINAT: + case PF_NORDR: + return (NULL); + } + + *skp = pf_state_key_setup(pd, saddr, daddr, sport, dport); + if (*skp == NULL) + return (NULL); + *nkp = pf_state_key_clone(*skp); + if (*nkp == NULL) { + uma_zfree(V_pf_state_key_z, skp); + *skp = NULL; + return (NULL); + } + + /* XXX We only modify one side for now. */ + naddr = &(*nkp)->addr[1]; + nport = &(*nkp)->port[1]; + + switch (r->action) { + case PF_NAT: + if (pf_get_sport(pd->af, pd->proto, r, saddr, daddr, dport, + naddr, nport, r->rpool.proxy_port[0], + r->rpool.proxy_port[1], sn)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: NAT proxy port allocation (%u-%u) failed\n", + r->rpool.proxy_port[0], r->rpool.proxy_port[1])); + goto notrans; + } + break; + case PF_BINAT: + switch (direction) { + case PF_OUT: + if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ + switch (pd->af) { #ifdef INET - case AF_INET: - if (r->rpool.cur->addr.p.dyn-> - pfid_acnt4 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->rpool.cur->addr.p.dyn-> - pfid_addr4, - &r->rpool.cur->addr.p.dyn-> - pfid_mask4, - saddr, AF_INET); - break; + case AF_INET: + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt4 < 1) + goto notrans; + PF_POOLMASK(naddr, + &r->rpool.cur->addr.p.dyn-> + pfid_addr4, + &r->rpool.cur->addr.p.dyn-> + pfid_mask4, saddr, AF_INET); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - if (r->rpool.cur->addr.p.dyn-> - pfid_acnt6 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->rpool.cur->addr.p.dyn-> - pfid_addr6, - &r->rpool.cur->addr.p.dyn-> - pfid_mask6, - saddr, AF_INET6); - break; -#endif /* INET6 */ - } - } else + case AF_INET6: + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt6 < 1) + goto notrans; PF_POOLMASK(naddr, - &r->rpool.cur->addr.v.a.addr, - &r->rpool.cur->addr.v.a.mask, - saddr, pd->af); - break; - case PF_IN: - if (r->src.addr.type == PF_ADDR_DYNIFTL) { - switch (pd->af) { + &r->rpool.cur->addr.p.dyn-> + pfid_addr6, + &r->rpool.cur->addr.p.dyn-> + pfid_mask6, saddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else + PF_POOLMASK(naddr, + &r->rpool.cur->addr.v.a.addr, + &r->rpool.cur->addr.v.a.mask, saddr, + pd->af); + break; + case PF_IN: + if (r->src.addr.type == PF_ADDR_DYNIFTL) { + switch (pd->af) { #ifdef INET - case AF_INET: - if (r->src.addr.p.dyn-> - pfid_acnt4 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->src.addr.p.dyn-> - pfid_addr4, - &r->src.addr.p.dyn-> - pfid_mask4, - daddr, AF_INET); - break; + case AF_INET: + if (r->src.addr.p.dyn-> pfid_acnt4 < 1) + goto notrans; + PF_POOLMASK(naddr, + &r->src.addr.p.dyn->pfid_addr4, + &r->src.addr.p.dyn->pfid_mask4, + daddr, AF_INET); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - if (r->src.addr.p.dyn-> - pfid_acnt6 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->src.addr.p.dyn-> - pfid_addr6, - &r->src.addr.p.dyn-> - pfid_mask6, - daddr, AF_INET6); - break; -#endif /* INET6 */ - } - } else + case AF_INET6: + if (r->src.addr.p.dyn->pfid_acnt6 < 1) + goto notrans; PF_POOLMASK(naddr, - &r->src.addr.v.a.addr, - &r->src.addr.v.a.mask, daddr, - pd->af); - break; - } - break; - case PF_RDR: { - if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) - return (NULL); - if ((r->rpool.opts & PF_POOL_TYPEMASK) == - PF_POOL_BITMASK) - PF_POOLMASK(naddr, naddr, - &r->rpool.cur->addr.v.a.mask, daddr, - pd->af); - - if (r->rpool.proxy_port[1]) { - u_int32_t tmp_nport; - - tmp_nport = ((ntohs(dport) - - ntohs(r->dst.port[0])) % - (r->rpool.proxy_port[1] - - r->rpool.proxy_port[0] + 1)) + - r->rpool.proxy_port[0]; - - /* wrap around if necessary */ - if (tmp_nport > 65535) - tmp_nport -= 65535; - *nport = htons((u_int16_t)tmp_nport); - } else if (r->rpool.proxy_port[0]) - *nport = htons(r->rpool.proxy_port[0]); + &r->src.addr.p.dyn->pfid_addr6, + &r->src.addr.p.dyn->pfid_mask6, + daddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else + PF_POOLMASK(naddr, &r->src.addr.v.a.addr, + &r->src.addr.v.a.mask, daddr, pd->af); break; } - default: - return (NULL); - } - /* - * Translation was a NOP. - * Pretend there was no match. - */ - if (!bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) { -#ifdef __FreeBSD__ - pool_put(&V_pf_state_key_pl, *nkp); - pool_put(&V_pf_state_key_pl, *skp); -#else - pool_put(&pf_state_key_pl, *nkp); - pool_put(&pf_state_key_pl, *skp); -#endif - *skw = *sks = *nkp = *skp = NULL; - return (NULL); - } + break; + case PF_RDR: { + if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) + goto notrans; + if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) + PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask, + daddr, pd->af); + + if (r->rpool.proxy_port[1]) { + uint32_t tmp_nport; + + tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) % + (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] + + 1)) + r->rpool.proxy_port[0]; + + /* Wrap around if necessary. */ + if (tmp_nport > 65535) + tmp_nport -= 65535; + *nport = htons((uint16_t)tmp_nport); + } else if (r->rpool.proxy_port[0]) + *nport = htons(r->rpool.proxy_port[0]); + break; + } + default: + panic("%s: unknown action %u", __func__, r->action); } - return (r); -} + /* Return success only if translation really happened. */ + if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) + return (r); +notrans: + uma_zfree(V_pf_state_key_z, *nkp); + uma_zfree(V_pf_state_key_z, *skp); + *skp = *nkp = NULL; + + return (NULL); +} diff --git a/sys/contrib/pf/net/pf_mtag.h b/sys/contrib/pf/net/pf_mtag.h index 141a867..baff00a 100644 --- a/sys/contrib/pf/net/pf_mtag.h +++ b/sys/contrib/pf/net/pf_mtag.h @@ -42,17 +42,12 @@ struct pf_mtag { void *hdr; /* saved hdr pos in mbuf, for ECN */ - void *statekey; /* pf stackside statekey */ u_int32_t qid; /* queue id */ - u_int rtableid; /* alternate routing table id */ u_int16_t tag; /* tag id */ u_int8_t flags; u_int8_t routed; }; -static __inline struct pf_mtag *pf_find_mtag(struct mbuf *); -static __inline struct pf_mtag *pf_get_mtag(struct mbuf *); - static __inline struct pf_mtag * pf_find_mtag(struct mbuf *m) { @@ -63,22 +58,5 @@ pf_find_mtag(struct mbuf *m) return ((struct pf_mtag *)(mtag + 1)); } - -static __inline struct pf_mtag * -pf_get_mtag(struct mbuf *m) -{ - struct m_tag *mtag; - - if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) { - mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag), - M_NOWAIT); - if (mtag == NULL) - return (NULL); - bzero(mtag + 1, sizeof(struct pf_mtag)); - m_tag_prepend(m, mtag); - } - - return ((struct pf_mtag *)(mtag + 1)); -} #endif /* _KERNEL */ #endif /* _NET_PF_MTAG_H_ */ diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c index 2b20c85..9063fe8 100644 --- a/sys/contrib/pf/net/pf_norm.c +++ b/sys/contrib/pf/net/pf_norm.c @@ -25,78 +25,56 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifdef __FreeBSD__ -#include "opt_inet.h" -#include "opt_inet6.h" -#include "opt_pf.h" - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG -#else -#define NPFLOG 0 -#endif -#else -#include "pflog.h" -#endif +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_pf.h" #include <sys/param.h> -#include <sys/systm.h> +#include <sys/lock.h> #include <sys/mbuf.h> -#include <sys/filio.h> -#include <sys/fcntl.h> +#include <sys/mutex.h> +#include <sys/refcount.h> +#include <sys/rwlock.h> #include <sys/socket.h> -#include <sys/kernel.h> -#include <sys/time.h> -#ifndef __FreeBSD__ -#include <sys/pool.h> -#include <dev/rndvar.h> -#endif #include <net/if.h> -#include <net/if_types.h> -#include <net/bpf.h> -#include <net/route.h> +#include <net/vnet.h> +#include <net/pfvar.h> +#include <net/pf_mtag.h> #include <net/if_pflog.h> #include <netinet/in.h> -#include <netinet/in_var.h> -#include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/ip_var.h> #include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> #include <netinet/tcp_seq.h> -#include <netinet/udp.h> -#include <netinet/ip_icmp.h> #ifdef INET6 #include <netinet/ip6.h> #endif /* INET6 */ -#include <net/pfvar.h> - -#ifndef __FreeBSD__ struct pf_frent { LIST_ENTRY(pf_frent) fr_next; - struct ip *fr_ip; - struct mbuf *fr_m; -}; - -struct pf_frcache { - LIST_ENTRY(pf_frcache) fr_next; - uint16_t fr_off; - uint16_t fr_end; + union { + struct { + struct ip *_fr_ip; + struct mbuf *_fr_m; + } _frag; + struct { + uint16_t _fr_off; + uint16_t _fr_end; + } _cache; + } _u; }; -#endif - -#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ -#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ -#define PFFRAG_DROP 0x0004 /* Drop all fragments */ -#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) +#define fr_ip _u._frag._fr_ip +#define fr_m _u._frag._fr_m +#define fr_off _u._cache._fr_off +#define fr_end _u._cache._fr_end -#ifndef __FreeBSD__ struct pf_fragment { RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; @@ -104,145 +82,104 @@ struct pf_fragment { struct in_addr fr_dst; u_int8_t fr_p; /* protocol of this fragment */ u_int8_t fr_flags; /* status flags */ +#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ +#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ +#define PFFRAG_DROP 0x0004 /* Drop all fragments */ +#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) u_int16_t fr_id; /* fragment id for reassemble */ u_int16_t fr_max; /* fragment data max */ u_int32_t fr_timeout; -#define fr_queue fr_u.fru_queue -#define fr_cache fr_u.fru_cache - union { - LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ - LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ - } fr_u; + LIST_HEAD(, pf_frent) fr_queue; }; -#endif -#ifdef __FreeBSD__ +static struct mtx pf_frag_mtx; +#define PF_FRAG_LOCK() mtx_lock(&pf_frag_mtx) +#define PF_FRAG_UNLOCK() mtx_unlock(&pf_frag_mtx) +#define PF_FRAG_ASSERT() mtx_assert(&pf_frag_mtx, MA_OWNED) + +VNET_DEFINE(uma_zone_t, pf_state_scrub_z); /* XXX: shared with pfsync */ + +static VNET_DEFINE(uma_zone_t, pf_frent_z); +#define V_pf_frent_z VNET(pf_frent_z) +static VNET_DEFINE(uma_zone_t, pf_frag_z); +#define V_pf_frag_z VNET(pf_frag_z) + TAILQ_HEAD(pf_fragqueue, pf_fragment); TAILQ_HEAD(pf_cachequeue, pf_fragment); -VNET_DEFINE(struct pf_fragqueue, pf_fragqueue); +static VNET_DEFINE(struct pf_fragqueue, pf_fragqueue); #define V_pf_fragqueue VNET(pf_fragqueue) -VNET_DEFINE(struct pf_cachequeue, pf_cachequeue); +static VNET_DEFINE(struct pf_cachequeue, pf_cachequeue); #define V_pf_cachequeue VNET(pf_cachequeue) -#else -TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; -TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; -#endif - -#ifndef __FreeBSD__ -static __inline int pf_frag_compare(struct pf_fragment *, - struct pf_fragment *); -#else -static int pf_frag_compare(struct pf_fragment *, - struct pf_fragment *); -#endif - -#ifdef __FreeBSD__ RB_HEAD(pf_frag_tree, pf_fragment); -VNET_DEFINE(struct pf_frag_tree, pf_frag_tree); +static VNET_DEFINE(struct pf_frag_tree, pf_frag_tree); #define V_pf_frag_tree VNET(pf_frag_tree) -VNET_DEFINE(struct pf_frag_tree, pf_cache_tree); +static VNET_DEFINE(struct pf_frag_tree, pf_cache_tree); #define V_pf_cache_tree VNET(pf_cache_tree) -#else -RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; -#endif -RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); -RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); +static int pf_frag_compare(struct pf_fragment *, + struct pf_fragment *); +static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); +static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); /* Private prototypes */ -void pf_ip2key(struct pf_fragment *, struct ip *); -void pf_remove_fragment(struct pf_fragment *); -void pf_flush_fragments(void); -void pf_free_fragment(struct pf_fragment *); -struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); -struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, - struct pf_frent *, int); -struct mbuf *pf_fragcache(struct mbuf **, struct ip*, - struct pf_fragment **, int, int, int *); -int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, +static void pf_free_fragment(struct pf_fragment *); +static void pf_remove_fragment(struct pf_fragment *); +static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, struct tcphdr *, int, sa_family_t); -void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t, +#ifdef INET +static void pf_ip2key(struct pf_fragment *, struct ip *); +static void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t, u_int8_t); +static void pf_flush_fragments(void); +static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); +static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, + struct pf_frent *, int); +static struct mbuf *pf_fragcache(struct mbuf **, struct ip*, + struct pf_fragment **, int, int, int *); +#endif /* INET */ #ifdef INET6 -void pf_scrub_ip6(struct mbuf **, u_int8_t); +static void pf_scrub_ip6(struct mbuf **, u_int8_t); #endif -#ifdef __FreeBSD__ #define DPFPRINTF(x) do { \ if (V_pf_status.debug >= PF_DEBUG_MISC) { \ printf("%s: ", __func__); \ printf x ; \ } \ } while(0) -#else -#define DPFPRINTF(x) do { \ - if (pf_status.debug >= PF_DEBUG_MISC) { \ - printf("%s: ", __func__); \ - printf x ; \ - } \ -} while(0) -#endif - -/* Globals */ -#ifdef __FreeBSD__ -VNET_DEFINE(uma_zone_t, pf_frent_pl); -VNET_DEFINE(uma_zone_t, pf_frag_pl); -VNET_DEFINE(uma_zone_t, pf_cache_pl); -VNET_DEFINE(uma_zone_t, pf_cent_pl); -VNET_DEFINE(uma_zone_t, pf_state_scrub_pl); - -VNET_DEFINE(int, pf_nfrents); -#define V_pf_nfrents VNET(pf_nfrents) -VNET_DEFINE(int, pf_ncache); -#define V_pf_ncache VNET(pf_ncache) -#else -struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; -struct pool pf_state_scrub_pl; -int pf_nfrents, pf_ncache; -#endif void pf_normalize_init(void) { -#ifdef __FreeBSD__ - /* - * XXX - * No high water mark support(It's hint not hard limit). - * uma_zone_set_max(pf_frag_pl, PFFRAG_FRAG_HIWAT); - */ - uma_zone_set_max(V_pf_frent_pl, PFFRAG_FRENT_HIWAT); - uma_zone_set_max(V_pf_cache_pl, PFFRAG_FRCACHE_HIWAT); - uma_zone_set_max(V_pf_cent_pl, PFFRAG_FRCENT_HIWAT); -#else - pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", - NULL); - pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", - NULL); - pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, - "pffrcache", NULL); - pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", - NULL); - pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, - "pfstscr", NULL); - - pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); - pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); - pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); - pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); -#endif -#ifdef __FreeBSD__ + V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + V_pf_state_scrub_z = uma_zcreate("pf state scrubs", + sizeof(struct pf_state_scrub), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + + V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z; + V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; + uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT); + + mtx_init(&pf_frag_mtx, "pf fragments", NULL, MTX_DEF); + TAILQ_INIT(&V_pf_fragqueue); TAILQ_INIT(&V_pf_cachequeue); -#else - TAILQ_INIT(&pf_fragqueue); - TAILQ_INIT(&pf_cachequeue); -#endif } -#ifdef __FreeBSD__ +void +pf_normalize_cleanup(void) +{ + + uma_zdestroy(V_pf_state_scrub_z); + uma_zdestroy(V_pf_frent_z); + uma_zdestroy(V_pf_frag_z); + + mtx_destroy(&pf_frag_mtx); +} + static int -#else -static __inline int -#endif pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) { int diff; @@ -266,22 +203,13 @@ void pf_purge_expired_fragments(void) { struct pf_fragment *frag; -#ifdef __FreeBSD__ - u_int32_t expire = time_second - + u_int32_t expire = time_uptime - V_pf_default_rule.timeout[PFTM_FRAG]; -#else - u_int32_t expire = time_second - - pf_default_rule.timeout[PFTM_FRAG]; -#endif -#ifdef __FreeBSD__ + PF_FRAG_LOCK(); while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) { KASSERT((BUFFER_FRAGMENTS(frag)), ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__)); -#else - while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { - KASSERT(BUFFER_FRAGMENTS(frag)); -#endif if (frag->fr_timeout > expire) break; @@ -289,92 +217,56 @@ pf_purge_expired_fragments(void) pf_free_fragment(frag); } -#ifdef __FreeBSD__ while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) { KASSERT((!BUFFER_FRAGMENTS(frag)), ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__)); -#else - while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { - KASSERT(!BUFFER_FRAGMENTS(frag)); -#endif if (frag->fr_timeout > expire) break; DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); pf_free_fragment(frag); -#ifdef __FreeBSD__ KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) || TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag), ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s", __FUNCTION__)); -#else - KASSERT(TAILQ_EMPTY(&pf_cachequeue) || - TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); -#endif } + PF_FRAG_UNLOCK(); } +#ifdef INET /* * Try to flush old fragments to make space for new ones */ - -void +static void pf_flush_fragments(void) { - struct pf_fragment *frag; + struct pf_fragment *frag, *cache; int goal; -#ifdef __FreeBSD__ - goal = V_pf_nfrents * 9 / 10; - DPFPRINTF(("trying to free > %d frents\n", - V_pf_nfrents - goal)); - while (goal < V_pf_nfrents) { -#else - goal = pf_nfrents * 9 / 10; - DPFPRINTF(("trying to free > %d frents\n", - pf_nfrents - goal)); - while (goal < pf_nfrents) { -#endif -#ifdef __FreeBSD__ - frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue); -#else - frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); -#endif - if (frag == NULL) - break; - pf_free_fragment(frag); - } + PF_FRAG_ASSERT(); - -#ifdef __FreeBSD__ - goal = V_pf_ncache * 9 / 10; - DPFPRINTF(("trying to free > %d cache entries\n", - V_pf_ncache - goal)); - while (goal < V_pf_ncache) { -#else - goal = pf_ncache * 9 / 10; - DPFPRINTF(("trying to free > %d cache entries\n", - pf_ncache - goal)); - while (goal < pf_ncache) { -#endif -#ifdef __FreeBSD__ - frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue); -#else - frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); -#endif - if (frag == NULL) + goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10; + DPFPRINTF(("trying to free %d frag entriess\n", goal)); + while (goal < uma_zone_get_cur(V_pf_frent_z)) { + frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue); + if (frag) + pf_free_fragment(frag); + cache = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue); + if (cache) + pf_free_fragment(cache); + if (frag == NULL && cache == NULL) break; - pf_free_fragment(frag); } } +#endif /* INET */ /* Frees the fragments and all associated entries */ - -void +static void pf_free_fragment(struct pf_fragment *frag) { struct pf_frent *frent; - struct pf_frcache *frcache; + + PF_FRAG_ASSERT(); /* Free all fragments */ if (BUFFER_FRAGMENTS(frag)) { @@ -383,43 +275,28 @@ pf_free_fragment(struct pf_fragment *frag) LIST_REMOVE(frent, fr_next); m_freem(frent->fr_m); -#ifdef __FreeBSD__ - pool_put(&V_pf_frent_pl, frent); - V_pf_nfrents--; -#else - pool_put(&pf_frent_pl, frent); - pf_nfrents--; -#endif + uma_zfree(V_pf_frent_z, frent); } } else { - for (frcache = LIST_FIRST(&frag->fr_cache); frcache; - frcache = LIST_FIRST(&frag->fr_cache)) { - LIST_REMOVE(frcache, fr_next); - -#ifdef __FreeBSD__ - KASSERT((LIST_EMPTY(&frag->fr_cache) || - LIST_FIRST(&frag->fr_cache)->fr_off > - frcache->fr_end), - ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >" - " frcache->fr_end): %s", __FUNCTION__)); + for (frent = LIST_FIRST(&frag->fr_queue); frent; + frent = LIST_FIRST(&frag->fr_queue)) { + LIST_REMOVE(frent, fr_next); - pool_put(&V_pf_cent_pl, frcache); - V_pf_ncache--; -#else - KASSERT(LIST_EMPTY(&frag->fr_cache) || - LIST_FIRST(&frag->fr_cache)->fr_off > - frcache->fr_end); + KASSERT((LIST_EMPTY(&frag->fr_queue) || + LIST_FIRST(&frag->fr_queue)->fr_off > + frent->fr_end), + ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >" + " frent->fr_end): %s", __func__)); - pool_put(&pf_cent_pl, frcache); - pf_ncache--; -#endif + uma_zfree(V_pf_frent_z, frent); } } pf_remove_fragment(frag); } -void +#ifdef INET +static void pf_ip2key(struct pf_fragment *key, struct ip *ip) { key->fr_p = ip->ip_p; @@ -428,70 +305,55 @@ pf_ip2key(struct pf_fragment *key, struct ip *ip) key->fr_dst.s_addr = ip->ip_dst.s_addr; } -struct pf_fragment * +static struct pf_fragment * pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) { struct pf_fragment key; struct pf_fragment *frag; + PF_FRAG_ASSERT(); + pf_ip2key(&key, ip); frag = RB_FIND(pf_frag_tree, tree, &key); if (frag != NULL) { /* XXX Are we sure we want to update the timeout? */ - frag->fr_timeout = time_second; + frag->fr_timeout = time_uptime; if (BUFFER_FRAGMENTS(frag)) { -#ifdef __FreeBSD__ TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); -#else - TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); - TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); -#endif } else { -#ifdef __FreeBSD__ TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next); TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next); -#else - TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); - TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); -#endif } } return (frag); } +#endif /* INET */ /* Removes a fragment from the fragment queue and frees the fragment */ -void +static void pf_remove_fragment(struct pf_fragment *frag) { + + PF_FRAG_ASSERT(); + if (BUFFER_FRAGMENTS(frag)) { -#ifdef __FreeBSD__ RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag); TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); - pool_put(&V_pf_frag_pl, frag); -#else - RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); - TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); - pool_put(&pf_frag_pl, frag); -#endif + uma_zfree(V_pf_frag_z, frag); } else { -#ifdef __FreeBSD__ RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag); TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next); - pool_put(&V_pf_cache_pl, frag); -#else - RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); - TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); - pool_put(&pf_cache_pl, frag); -#endif + uma_zfree(V_pf_frag_z, frag); } } +#ifdef INET #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) -struct mbuf * +static struct mbuf * pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, struct pf_frent *frent, int mff) { @@ -504,12 +366,9 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; u_int16_t max = ip_len + off; -#ifdef __FreeBSD__ + PF_FRAG_ASSERT(); KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)), ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__)); -#else - KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag)); -#endif /* Strip off ip header */ m->m_data += hlen; @@ -517,18 +376,10 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, /* Create a new reassembly queue for this packet */ if (*frag == NULL) { -#ifdef __FreeBSD__ - *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT); -#else - *frag = pool_get(&pf_frag_pl, PR_NOWAIT); -#endif + *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); if (*frag == NULL) { pf_flush_fragments(); -#ifdef __FreeBSD__ - *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT); -#else - *frag = pool_get(&pf_frag_pl, PR_NOWAIT); -#endif + *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); if (*frag == NULL) goto drop_fragment; } @@ -539,16 +390,11 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, (*frag)->fr_dst = frent->fr_ip->ip_dst; (*frag)->fr_p = frent->fr_ip->ip_p; (*frag)->fr_id = frent->fr_ip->ip_id; - (*frag)->fr_timeout = time_second; + (*frag)->fr_timeout = time_uptime; LIST_INIT(&(*frag)->fr_queue); -#ifdef __FreeBSD__ RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag); TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next); -#else - RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); - TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); -#endif /* We do not have a previous fragment */ frep = NULL; @@ -565,12 +411,8 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, frep = frea; } -#ifdef __FreeBSD__ KASSERT((frep != NULL || frea != NULL), ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));; -#else - KASSERT(frep != NULL || frea != NULL); -#endif if (frep != NULL && FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * @@ -613,13 +455,7 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, next = LIST_NEXT(frea, fr_next); m_freem(frea->fr_m); LIST_REMOVE(frea, fr_next); -#ifdef __FreeBSD__ - pool_put(&V_pf_frent_pl, frea); - V_pf_nfrents--; -#else - pool_put(&pf_frent_pl, frea); - pf_nfrents--; -#endif + uma_zfree(V_pf_frent_z, frea); } insert: @@ -660,11 +496,7 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, /* We have all the data */ frent = LIST_FIRST(&(*frag)->fr_queue); -#ifdef __FreeBSD__ KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__)); -#else - KASSERT(frent != NULL); -#endif if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { DPFPRINTF(("drop: too big: %d\n", off)); pf_free_fragment(*frag); @@ -679,36 +511,20 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, m2 = m->m_next; m->m_next = NULL; m_cat(m, m2); -#ifdef __FreeBSD__ - pool_put(&V_pf_frent_pl, frent); - V_pf_nfrents--; -#else - pool_put(&pf_frent_pl, frent); - pf_nfrents--; -#endif + uma_zfree(V_pf_frent_z, frent); for (frent = next; frent != NULL; frent = next) { next = LIST_NEXT(frent, fr_next); m2 = frent->fr_m; -#ifdef __FreeBSD__ - pool_put(&V_pf_frent_pl, frent); - V_pf_nfrents--; -#else - pool_put(&pf_frent_pl, frent); - pf_nfrents--; -#endif -#ifdef __FreeBSD__ + uma_zfree(V_pf_frent_z, frent); m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags; m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data; -#endif m_cat(m, m2); } -#ifdef __FreeBSD__ while (m->m_pkthdr.csum_data & 0xffff0000) m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16); -#endif ip->ip_src = (*frag)->fr_src; ip->ip_dst = (*frag)->fr_dst; @@ -735,71 +551,43 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, drop_fragment: /* Oops - fail safe - drop packet */ -#ifdef __FreeBSD__ - pool_put(&V_pf_frent_pl, frent); - V_pf_nfrents--; -#else - pool_put(&pf_frent_pl, frent); - pf_nfrents--; -#endif + uma_zfree(V_pf_frent_z, frent); m_freem(m); return (NULL); } -struct mbuf * +static struct mbuf * pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, int drop, int *nomem) { struct mbuf *m = *m0; - struct pf_frcache *frp, *fra, *cur = NULL; + struct pf_frent *frp, *fra, *cur = NULL; int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); u_int16_t off = ntohs(h->ip_off) << 3; u_int16_t max = ip_len + off; int hosed = 0; -#ifdef __FreeBSD__ + PF_FRAG_ASSERT(); KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)), ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__)); -#else - KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); -#endif /* Create a new range queue for this packet */ if (*frag == NULL) { -#ifdef __FreeBSD__ - *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT); -#else - *frag = pool_get(&pf_cache_pl, PR_NOWAIT); -#endif + *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); if (*frag == NULL) { pf_flush_fragments(); -#ifdef __FreeBSD__ - *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT); -#else - *frag = pool_get(&pf_cache_pl, PR_NOWAIT); -#endif + *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); if (*frag == NULL) goto no_mem; } /* Get an entry for the queue */ -#ifdef __FreeBSD__ - cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); - if (cur == NULL) { - pool_put(&V_pf_cache_pl, *frag); -#else - cur = pool_get(&pf_cent_pl, PR_NOWAIT); + cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (cur == NULL) { - pool_put(&pf_cache_pl, *frag); -#endif + uma_zfree(V_pf_frag_z, *frag); *frag = NULL; goto no_mem; } -#ifdef __FreeBSD__ - V_pf_ncache++; -#else - pf_ncache++; -#endif (*frag)->fr_flags = PFFRAG_NOBUFFER; (*frag)->fr_max = 0; @@ -807,20 +595,15 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, (*frag)->fr_dst = h->ip_dst; (*frag)->fr_p = h->ip_p; (*frag)->fr_id = h->ip_id; - (*frag)->fr_timeout = time_second; + (*frag)->fr_timeout = time_uptime; cur->fr_off = off; cur->fr_end = max; - LIST_INIT(&(*frag)->fr_cache); - LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); + LIST_INIT(&(*frag)->fr_queue); + LIST_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next); -#ifdef __FreeBSD__ RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag); TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next); -#else - RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); - TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); -#endif DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); @@ -832,18 +615,14 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, * - off contains the real shifted offset. */ frp = NULL; - LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { + LIST_FOREACH(fra, &(*frag)->fr_queue, fr_next) { if (fra->fr_off > off) break; frp = fra; } -#ifdef __FreeBSD__ KASSERT((frp != NULL || fra != NULL), ("!(frp != NULL || fra != NULL): %s", __FUNCTION__)); -#else - KASSERT(frp != NULL || fra != NULL); -#endif if (frp != NULL) { int precut; @@ -885,24 +664,16 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, * than this mbuf magic. For my next trick, * I'll pull a rabbit out of my laptop. */ -#ifdef __FreeBSD__ - *m0 = m_dup(m, M_DONTWAIT); -#else - *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT); -#endif + *m0 = m_dup(m, M_NOWAIT); if (*m0 == NULL) goto no_mem; -#ifdef __FreeBSD__ /* From KAME Project : We have missed this! */ m_adj(*m0, (h->ip_hl << 2) - (*m0)->m_pkthdr.len); - KASSERT(((*m0)->m_next == NULL), - ("(*m0)->m_next != NULL: %s", + KASSERT(((*m0)->m_next == NULL), + ("(*m0)->m_next != NULL: %s", __FUNCTION__)); -#else - KASSERT((*m0)->m_next == NULL); -#endif m_adj(m, precut + (h->ip_hl << 2)); m_cat(*m0, m); m = *m0; @@ -917,15 +688,10 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, h = mtod(m, struct ip *); -#ifdef __FreeBSD__ KASSERT(((int)m->m_len == ntohs(h->ip_len) - precut), ("m->m_len != ntohs(h->ip_len) - precut: %s", __FUNCTION__)); -#else - KASSERT((int)m->m_len == - ntohs(h->ip_len) - precut); -#endif h->ip_off = htons(ntohs(h->ip_off) + (precut >> 3)); h->ip_len = htons(ntohs(h->ip_len) - precut); @@ -939,18 +705,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, h->ip_id, -precut, frp->fr_off, frp->fr_end, off, max)); -#ifdef __FreeBSD__ - cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); -#else - cur = pool_get(&pf_cent_pl, PR_NOWAIT); -#endif + cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (cur == NULL) goto no_mem; -#ifdef __FreeBSD__ - V_pf_ncache++; -#else - pf_ncache++; -#endif cur->fr_off = off; cur->fr_end = max; @@ -989,14 +746,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, m->m_pkthdr.len = plen; } h = mtod(m, struct ip *); -#ifdef __FreeBSD__ KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut), ("m->m_len != ntohs(h->ip_len) - aftercut: %s", __FUNCTION__)); -#else - KASSERT((int)m->m_len == - ntohs(h->ip_len) - aftercut); -#endif h->ip_len = htons(ntohs(h->ip_len) - aftercut); } else { hosed++; @@ -1007,18 +759,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, h->ip_id, -aftercut, off, max, fra->fr_off, fra->fr_end)); -#ifdef __FreeBSD__ - cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); -#else - cur = pool_get(&pf_cent_pl, PR_NOWAIT); -#endif + cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (cur == NULL) goto no_mem; -#ifdef __FreeBSD__ - V_pf_ncache++; -#else - pf_ncache++; -#endif cur->fr_off = off; cur->fr_end = max; @@ -1036,36 +779,20 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, max, fra->fr_off, fra->fr_end)); fra->fr_off = cur->fr_off; LIST_REMOVE(cur, fr_next); -#ifdef __FreeBSD__ - pool_put(&V_pf_cent_pl, cur); - V_pf_ncache--; -#else - pool_put(&pf_cent_pl, cur); - pf_ncache--; -#endif + uma_zfree(V_pf_frent_z, cur); cur = NULL; } else if (frp && fra->fr_off <= frp->fr_end) { /* Need to merge in a modified 'frp' */ -#ifdef __FreeBSD__ KASSERT((cur == NULL), ("cur != NULL: %s", __FUNCTION__)); -#else - KASSERT(cur == NULL); -#endif DPFPRINTF(("fragcache[%d]: adjacent(merge " "%d-%d) %d-%d (%d-%d)\n", h->ip_id, frp->fr_off, frp->fr_end, off, max, fra->fr_off, fra->fr_end)); fra->fr_off = frp->fr_off; LIST_REMOVE(frp, fr_next); -#ifdef __FreeBSD__ - pool_put(&V_pf_cent_pl, frp); - V_pf_ncache--; -#else - pool_put(&pf_cent_pl, frp); - pf_ncache--; -#endif + uma_zfree(V_pf_frent_z, frp); frp = NULL; } @@ -1093,8 +820,8 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, /* Check if we are completely reassembled */ if (((*frag)->fr_flags & PFFRAG_SEENLAST) && - LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && - LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { + LIST_FIRST(&(*frag)->fr_queue)->fr_off == 0 && + LIST_FIRST(&(*frag)->fr_queue)->fr_end == (*frag)->fr_max) { /* Remove from fragment queue */ DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, (*frag)->fr_max)); @@ -1132,7 +859,6 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, return (NULL); } -#ifdef INET int pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, struct pf_pdesc *pd) @@ -1150,6 +876,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, int ip_off; int tag = -1; + PF_RULES_RASSERT(); + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; @@ -1169,11 +897,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; -#ifdef __FreeBSD__ - else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) -#else - else if (r->match_tag && !pf_match_tag(m, r, &tag)) -#endif + else if (r->match_tag && !pf_match_tag(m, r, &tag, + pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else break; @@ -1231,13 +956,10 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, max = fragoff + ip_len; if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { - /* Fully buffer all of the fragments */ -#ifdef __FreeBSD__ + /* Fully buffer all of the fragments */ + PF_FRAG_LOCK(); frag = pf_find_fragment(h, &V_pf_frag_tree); -#else - frag = pf_find_fragment(h, &pf_frag_tree); -#endif /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && @@ -1245,26 +967,19 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto bad; /* Get an entry for the fragment queue */ -#ifdef __FreeBSD__ - frent = pool_get(&V_pf_frent_pl, PR_NOWAIT); -#else - frent = pool_get(&pf_frent_pl, PR_NOWAIT); -#endif + frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (frent == NULL) { + PF_FRAG_UNLOCK(); REASON_SET(reason, PFRES_MEMORY); return (PF_DROP); } -#ifdef __FreeBSD__ - V_pf_nfrents++; -#else - pf_nfrents++; -#endif frent->fr_ip = h; frent->fr_m = m; /* Might return a completely reassembled mbuf, or NULL */ DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); *m0 = m = pf_reassemble(m0, &frag, frent, mff); + PF_FRAG_UNLOCK(); if (m == NULL) return (PF_DROP); @@ -1289,11 +1004,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, /* non-buffering fragment cache (drops or masks overlaps) */ int nomem = 0; -#ifdef __FreeBSD__ if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) { -#else - if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) { -#endif /* * Already passed the fragment cache in the * input direction. If we continued, it would @@ -1302,11 +1013,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto fragment_pass; } -#ifdef __FreeBSD__ + PF_FRAG_LOCK(); frag = pf_find_fragment(h, &V_pf_cache_tree); -#else - frag = pf_find_fragment(h, &pf_cache_tree); -#endif /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && @@ -1318,6 +1026,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, *m0 = m = pf_fragcache(m0, h, &frag, mff, (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); + PF_FRAG_UNLOCK(); if (m == NULL) { if (nomem) goto no_mem; @@ -1337,11 +1046,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, } #endif if (dir == PF_IN) -#ifdef __FreeBSD__ pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; -#else - m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE; -#endif if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; @@ -1369,25 +1074,30 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, no_mem: REASON_SET(reason, PFRES_MEMORY); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); bad: DPFPRINTF(("dropping bad fragment\n")); /* Free associated fragments */ - if (frag != NULL) + if (frag != NULL) { pf_free_fragment(frag); + PF_FRAG_UNLOCK(); + } REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); } @@ -1413,6 +1123,8 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, u_int8_t proto; int terminal; + PF_RULES_RASSERT(); + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; @@ -1553,19 +1265,22 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, shortpkt: REASON_SET(reason, PFRES_SHORT); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); badfrag: REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); } #endif /* INET6 */ @@ -1581,6 +1296,8 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, u_int8_t flags; sa_family_t af = pd->af; + PF_RULES_RASSERT(); + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; @@ -1674,18 +1391,15 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, /* copy back packet headers if we sanitized */ if (rewrite) -#ifdef __FreeBSD__ m_copyback(m, off, sizeof(*th), (caddr_t)th); -#else - m_copyback(m, off, sizeof(*th), th); -#endif return (PF_PASS); tcp_drop: REASON_SET(&reason, PFRES_NORM); if (rm != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET, dir, reason, r, NULL, NULL, pd, + 1); return (PF_DROP); } @@ -1697,19 +1411,12 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, u_int8_t hdr[60]; u_int8_t *opt; -#ifdef __FreeBSD__ - KASSERT((src->scrub == NULL), + KASSERT((src->scrub == NULL), ("pf_normalize_tcp_init: src->scrub != NULL")); - src->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT); -#else - KASSERT(src->scrub == NULL); - - src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); -#endif + src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT); if (src->scrub == NULL) return (1); - bzero(src->scrub, sizeof(*src->scrub)); switch (pd->af) { #ifdef INET @@ -1782,17 +1489,10 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, void pf_normalize_tcp_cleanup(struct pf_state *state) { -#ifdef __FreeBSD__ - if (state->src.scrub) - pool_put(&V_pf_state_scrub_pl, state->src.scrub); - if (state->dst.scrub) - pool_put(&V_pf_state_scrub_pl, state->dst.scrub); -#else if (state->src.scrub) - pool_put(&pf_state_scrub_pl, state->src.scrub); + uma_zfree(V_pf_state_scrub_z, state->src.scrub); if (state->dst.scrub) - pool_put(&pf_state_scrub_pl, state->dst.scrub); -#endif + uma_zfree(V_pf_state_scrub_z, state->dst.scrub); /* Someday... flush the TCP segment reassembly descriptors. */ } @@ -1810,12 +1510,8 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, int copyback = 0; int got_ts = 0; -#ifdef __FreeBSD__ - KASSERT((src->scrub || dst->scrub), - ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!")); -#else - KASSERT(src->scrub || dst->scrub); -#endif + KASSERT((src->scrub || dst->scrub), + ("%s: src->scrub && dst->scrub!", __func__)); /* * Enforce the minimum TTL seen for this connection. Negate a common @@ -1870,11 +1566,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, if (got_ts) { /* Huh? Multiple timestamps!? */ -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif DPFPRINTF(("multiple TS??")); pf_print_state(state); printf("\n"); @@ -1942,12 +1634,8 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, getmicrouptime(&uptime); if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || - time_second - state->creation > TS_MAX_CONN)) { -#ifdef __FreeBSD__ + time_uptime - state->creation > TS_MAX_CONN)) { if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif DPFPRINTF(("src idled out of PAWS\n")); pf_print_state(state); printf("\n"); @@ -1957,11 +1645,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, } if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif DPFPRINTF(("dst idled out of PAWS\n")); pf_print_state(state); printf("\n"); @@ -1978,7 +1662,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * measurement of RTT (round trip time) and PAWS * (protection against wrapped sequence numbers). PAWS * gives us a set of rules for rejecting packets on - * long fat pipes (packets that were somehow delayed + * long fat pipes (packets that were somehow delayed * in transit longer than the time it took to send the * full TCP sequence space of 4Gb). We can use these * rules and infer a few others that will let us treat @@ -2045,34 +1729,16 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * this packet. */ if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) -#ifdef __FreeBSD__ ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF]; -#else - ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; -#endif - /* Calculate max ticks since the last timestamp */ #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ #define TS_MICROSECS 1000000 /* microseconds per second */ -#ifdef __FreeBSD__ -#ifndef timersub -#define timersub(tvp, uvp, vvp) \ - do { \ - (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ - (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ - if ((vvp)->tv_usec < 0) { \ - (vvp)->tv_sec--; \ - (vvp)->tv_usec += 1000000; \ - } \ - } while (0) -#endif -#endif - timersub(&uptime, &src->scrub->pfss_last, &delta_ts); + delta_ts = uptime; + timevalsub(&delta_ts, &src->scrub->pfss_last); tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); - if ((src->state >= TCPS_ESTABLISHED && dst->state >= TCPS_ESTABLISHED) && (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || @@ -2092,7 +1758,6 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, tsval_from_last) ? '1' : ' ', SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); -#ifdef __FreeBSD__ DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u " "idle: %jus %lums\n", tsval, tsecr, tsval_from_last, @@ -2103,22 +1768,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u" "\n", dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); -#else - DPFPRINTF((" tsval: %lu tsecr: %lu +ticks: %lu " - "idle: %lus %lums\n", - tsval, tsecr, tsval_from_last, delta_ts.tv_sec, - delta_ts.tv_usec / 1000)); - DPFPRINTF((" src->tsval: %lu tsecr: %lu\n", - src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); - DPFPRINTF((" dst->tsval: %lu tsecr: %lu tsval0: %lu" - "\n", dst->scrub->pfss_tsval, - dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); -#endif -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif pf_print_state(state); pf_print_flags(th->th_flags); printf("\n"); @@ -2166,11 +1816,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * Hey! Someone tried to sneak a packet in. Or the * stack changed its RFC1323 behavior?!?! */ -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif DPFPRINTF(("Did not receive expected RFC1323 " "timestamp\n")); pf_print_state(state); @@ -2197,11 +1843,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, src->scrub->pfss_flags |= PFSS_DATA_TS; else { src->scrub->pfss_flags |= PFSS_DATA_NOTS; -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub && -#else - if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && -#endif (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { /* Don't warn if other host rejected RFC1323 */ DPFPRINTF(("Broken RFC1323 stack did not " @@ -2247,7 +1889,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, return (0); } -int +static int pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, int off, sa_family_t af) { @@ -2255,11 +1897,7 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, int thoff; int opt, cnt, optlen = 0; int rewrite = 0; -#ifdef __FreeBSD__ u_char opts[TCP_MAXOLEN]; -#else - u_char opts[MAX_TCPOPTLEN]; -#endif u_char *optp = opts; thoff = th->th_off << 2; @@ -2303,7 +1941,8 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, return (rewrite); } -void +#ifdef INET +static void pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) { struct mbuf *m = *m0; @@ -2344,9 +1983,10 @@ pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); } } +#endif /* INET */ #ifdef INET6 -void +static void pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl) { struct mbuf *m = *m0; diff --git a/sys/contrib/pf/net/pf_osfp.c b/sys/contrib/pf/net/pf_osfp.c index dcd8af7..29d4a40 100644 --- a/sys/contrib/pf/net/pf_osfp.c +++ b/sys/contrib/pf/net/pf_osfp.c @@ -17,23 +17,14 @@ * */ -#ifdef __FreeBSD__ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#endif #include <sys/param.h> +#include <sys/kernel.h> #include <sys/socket.h> -#ifdef _KERNEL -#include <sys/systm.h> -#ifndef __FreeBSD__ -#include <sys/pool.h> -#endif -#endif /* _KERNEL */ -#include <sys/mbuf.h> #include <netinet/in.h> -#include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/tcp.h> @@ -41,77 +32,31 @@ __FBSDID("$FreeBSD$"); #include <net/pfvar.h> #include <netinet/ip6.h> -#ifdef _KERNEL #include <netinet6/in6_var.h> -#endif - -#ifdef _KERNEL -#ifdef __FreeBSD__ +static MALLOC_DEFINE(M_PFOSFP, "pf_osfp", "pf(4) operating system fingerprints"); #define DPFPRINTF(format, x...) \ if (V_pf_status.debug >= PF_DEBUG_NOISY) \ printf(format , ##x) -#else -#define DPFPRINTF(format, x...) \ - if (pf_status.debug >= PF_DEBUG_NOISY) \ - printf(format , ##x) -#endif -#ifdef __FreeBSD__ -typedef uma_zone_t pool_t; -#else -typedef struct pool pool_t; -#endif -#else -/* Userland equivalents so we can lend code to tcpdump et al. */ - -#include <arpa/inet.h> -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <netdb.h> -#define pool_t int -#define pool_get(pool, flags) malloc(*(pool)) -#define pool_put(pool, item) free(item) -#define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size) - -#ifdef __FreeBSD__ -#define NTOHS(x) (x) = ntohs((u_int16_t)(x)) -#endif - -#ifdef PFDEBUG -#include <sys/stdarg.h> -#define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) -#else -#define DPFPRINTF(format, x...) ((void)0) -#endif /* PFDEBUG */ -#endif /* _KERNEL */ - - -#ifdef __FreeBSD__ SLIST_HEAD(pf_osfp_list, pf_os_fingerprint); -VNET_DEFINE(struct pf_osfp_list, pf_osfp_list); +static VNET_DEFINE(struct pf_osfp_list, pf_osfp_list) = + SLIST_HEAD_INITIALIZER(); #define V_pf_osfp_list VNET(pf_osfp_list) -VNET_DEFINE(pool_t, pf_osfp_entry_pl); -#define pf_osfp_entry_pl VNET(pf_osfp_entry_pl) -VNET_DEFINE(pool_t, pf_osfp_pl); -#define pf_osfp_pl VNET(pf_osfp_pl) -#else -SLIST_HEAD(pf_osfp_list, pf_os_fingerprint) pf_osfp_list; -pool_t pf_osfp_entry_pl; -pool_t pf_osfp_pl; -#endif -struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *, +static struct pf_osfp_enlist *pf_osfp_fingerprint_hdr(const struct ip *, + const struct ip6_hdr *, + const struct tcphdr *); +static struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *, struct pf_os_fingerprint *, u_int8_t); -struct pf_os_fingerprint *pf_osfp_find_exact(struct pf_osfp_list *, +static struct pf_os_fingerprint *pf_osfp_find_exact(struct pf_osfp_list *, struct pf_os_fingerprint *); -void pf_osfp_insert(struct pf_osfp_list *, +static void pf_osfp_insert(struct pf_osfp_list *, struct pf_os_fingerprint *); +#ifdef PFDEBUG +static struct pf_os_fingerprint *pf_osfp_validate(void); +#endif - -#ifdef _KERNEL /* * Passively fingerprint the OS of the host (IPv4 TCP SYN packets only) * Returns the list of possible OSes. @@ -140,19 +85,14 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off, return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr)); } -#endif /* _KERNEL */ -struct pf_osfp_enlist * +static struct pf_osfp_enlist * pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const struct tcphdr *tcp) { struct pf_os_fingerprint fp, *fpresult; int cnt, optlen = 0; const u_int8_t *optp; -#ifdef _KERNEL char srcname[128]; -#else - char srcname[NI_MAXHOST]; -#endif if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN) return (NULL); @@ -164,49 +104,21 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st memset(&fp, 0, sizeof(fp)); if (ip) { -#ifndef _KERNEL - struct sockaddr_in sin; -#endif - fp.fp_psize = ntohs(ip->ip_len); fp.fp_ttl = ip->ip_ttl; if (ip->ip_off & htons(IP_DF)) fp.fp_flags |= PF_OSFP_DF; -#ifdef _KERNEL strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname)); -#else - memset(&sin, 0, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_addr = ip->ip_src; - (void)getnameinfo((struct sockaddr *)&sin, - sizeof(struct sockaddr_in), srcname, sizeof(srcname), - NULL, 0, NI_NUMERICHOST); -#endif } #ifdef INET6 else if (ip6) { -#ifndef _KERNEL - struct sockaddr_in6 sin6; -#endif - /* jumbo payload? */ fp.fp_psize = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); fp.fp_ttl = ip6->ip6_hlim; fp.fp_flags |= PF_OSFP_DF; fp.fp_flags |= PF_OSFP_INET6; -#ifdef _KERNEL strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src), sizeof(srcname)); -#else - memset(&sin6, 0, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_addr = ip6->ip6_src; - (void)getnameinfo((struct sockaddr *)&sin6, - sizeof(struct sockaddr_in6), srcname, sizeof(srcname), - NULL, 0, NI_NUMERICHOST); -#endif } #endif else @@ -284,11 +196,7 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", fp.fp_wscale); -#ifdef __FreeBSD__ if ((fpresult = pf_osfp_find(&V_pf_osfp_list, &fp, -#else - if ((fpresult = pf_osfp_find(&pf_osfp_list, &fp, -#endif PF_OSFP_MAXTTL_OFFSET))) return (&fpresult->fp_oses); return (NULL); @@ -324,52 +232,6 @@ pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os) return (0); } -/* Initialize the OS fingerprint system */ -#ifdef __FreeBSD__ -int -#else -void -#endif -pf_osfp_initialize(void) -{ -#if defined(__FreeBSD__) && defined(_KERNEL) - int error = ENOMEM; - - do { - pf_osfp_entry_pl = pf_osfp_pl = NULL; - UMA_CREATE(pf_osfp_entry_pl, struct pf_osfp_entry, "pfospfen"); - UMA_CREATE(pf_osfp_pl, struct pf_os_fingerprint, "pfosfp"); - error = 0; - } while(0); - - SLIST_INIT(&V_pf_osfp_list); -#else - pool_init(&pf_osfp_entry_pl, sizeof(struct pf_osfp_entry), 0, 0, 0, - "pfosfpen", &pool_allocator_nointr); - pool_init(&pf_osfp_pl, sizeof(struct pf_os_fingerprint), 0, 0, 0, - "pfosfp", &pool_allocator_nointr); - SLIST_INIT(&pf_osfp_list); -#endif - -#ifdef __FreeBSD__ -#ifdef _KERNEL - return (error); -#else - return (0); -#endif -#endif -} - -#if defined(__FreeBSD__) && (_KERNEL) -void -pf_osfp_cleanup(void) -{ - - UMA_DESTROY(pf_osfp_entry_pl); - UMA_DESTROY(pf_osfp_pl); -} -#endif - /* Flush the fingerprint list */ void pf_osfp_flush(void) @@ -377,18 +239,13 @@ pf_osfp_flush(void) struct pf_os_fingerprint *fp; struct pf_osfp_entry *entry; -#ifdef __FreeBSD__ while ((fp = SLIST_FIRST(&V_pf_osfp_list))) { SLIST_REMOVE_HEAD(&V_pf_osfp_list, fp_next); -#else - while ((fp = SLIST_FIRST(&pf_osfp_list))) { - SLIST_REMOVE_HEAD(&pf_osfp_list, fp_next); -#endif while ((entry = SLIST_FIRST(&fp->fp_oses))) { SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry); - pool_put(&pf_osfp_entry_pl, entry); + free(entry, M_PFOSFP); } - pool_put(&pf_osfp_pl, fp); + free(fp, M_PFOSFP); } } @@ -400,6 +257,8 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) struct pf_os_fingerprint *fp, fpadd; struct pf_osfp_entry *entry; + PF_RULES_WASSERT(); + memset(&fpadd, 0, sizeof(fpadd)); fpadd.fp_tcpopts = fpioc->fp_tcpopts; fpadd.fp_wsize = fpioc->fp_wsize; @@ -436,31 +295,18 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fpioc->fp_os.fp_os); #endif -#ifdef __FreeBSD__ if ((fp = pf_osfp_find_exact(&V_pf_osfp_list, &fpadd))) { -#else - if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) { -#endif SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os)) return (EEXIST); } - if ((entry = pool_get(&pf_osfp_entry_pl, -#ifdef __FreeBSD__ - PR_NOWAIT)) == NULL) -#else - PR_WAITOK|PR_LIMITFAIL)) == NULL) -#endif + if ((entry = malloc(sizeof(*entry), M_PFOSFP, M_NOWAIT)) + == NULL) return (ENOMEM); } else { - if ((fp = pool_get(&pf_osfp_pl, -#ifdef __FreeBSD__ - PR_NOWAIT)) == NULL) -#else - PR_WAITOK|PR_LIMITFAIL)) == NULL) -#endif + if ((fp = malloc(sizeof(*fp), M_PFOSFP, M_ZERO | M_NOWAIT)) + == NULL) return (ENOMEM); - memset(fp, 0, sizeof(*fp)); fp->fp_tcpopts = fpioc->fp_tcpopts; fp->fp_wsize = fpioc->fp_wsize; fp->fp_psize = fpioc->fp_psize; @@ -470,20 +316,12 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fp->fp_wscale = fpioc->fp_wscale; fp->fp_ttl = fpioc->fp_ttl; SLIST_INIT(&fp->fp_oses); - if ((entry = pool_get(&pf_osfp_entry_pl, -#ifdef __FreeBSD__ - PR_NOWAIT)) == NULL) { -#else - PR_WAITOK|PR_LIMITFAIL)) == NULL) { -#endif - pool_put(&pf_osfp_pl, fp); + if ((entry = malloc(sizeof(*entry), M_PFOSFP, M_NOWAIT)) + == NULL) { + free(fp, M_PFOSFP); return (ENOMEM); } -#ifdef __FreeBSD__ pf_osfp_insert(&V_pf_osfp_list, fp); -#else - pf_osfp_insert(&pf_osfp_list, fp); -#endif } memcpy(entry, &fpioc->fp_os, sizeof(*entry)); @@ -503,7 +341,7 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) /* Find a fingerprint in the list */ -struct pf_os_fingerprint * +static struct pf_os_fingerprint * pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, u_int8_t ttldiff) { @@ -578,7 +416,7 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, } /* Find an exact fingerprint in the list */ -struct pf_os_fingerprint * +static struct pf_os_fingerprint * pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find) { struct pf_os_fingerprint *f; @@ -599,7 +437,7 @@ pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find) } /* Insert a fingerprint into the list */ -void +static void pf_osfp_insert(struct pf_osfp_list *list, struct pf_os_fingerprint *ins) { struct pf_os_fingerprint *f, *prev = NULL; @@ -625,11 +463,7 @@ pf_osfp_get(struct pf_osfp_ioctl *fpioc) memset(fpioc, 0, sizeof(*fpioc)); -#ifdef __FreeBSD__ SLIST_FOREACH(fp, &V_pf_osfp_list, fp_next) { -#else - SLIST_FOREACH(fp, &pf_osfp_list, fp_next) { -#endif SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (i++ == num) { fpioc->fp_mss = fp->fp_mss; @@ -650,17 +484,14 @@ pf_osfp_get(struct pf_osfp_ioctl *fpioc) } +#ifdef PFDEBUG /* Validate that each signature is reachable */ -struct pf_os_fingerprint * +static struct pf_os_fingerprint * pf_osfp_validate(void) { struct pf_os_fingerprint *f, *f2, find; -#ifdef __FreeBSD__ SLIST_FOREACH(f, &V_pf_osfp_list, fp_next) { -#else - SLIST_FOREACH(f, &pf_osfp_list, fp_next) { -#endif memcpy(&find, f, sizeof(find)); /* We do a few MSS/th_win percolations to make things unique */ @@ -672,11 +503,7 @@ pf_osfp_validate(void) find.fp_wsize *= (find.fp_mss + 40); else if (f->fp_flags & PF_OSFP_WSIZE_MOD) find.fp_wsize *= 2; -#ifdef __FreeBSD__ if (f != (f2 = pf_osfp_find(&V_pf_osfp_list, &find, 0))) { -#else - if (f != (f2 = pf_osfp_find(&pf_osfp_list, &find, 0))) { -#endif if (f2) printf("Found \"%s %s %s\" instead of " "\"%s %s %s\"\n", @@ -696,3 +523,4 @@ pf_osfp_validate(void) } return (NULL); } +#endif /* PFDEBUG */ diff --git a/sys/contrib/pf/net/pf_ruleset.c b/sys/contrib/pf/net/pf_ruleset.c index ca8667c..77652a6 100644 --- a/sys/contrib/pf/net/pf_ruleset.c +++ b/sys/contrib/pf/net/pf_ruleset.c @@ -35,15 +35,14 @@ * */ -#ifdef __FreeBSD__ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#endif #include <sys/param.h> #include <sys/socket.h> #ifdef _KERNEL # include <sys/systm.h> +# include <sys/refcount.h> #endif /* _KERNEL */ #include <sys/mbuf.h> @@ -61,20 +60,10 @@ __FBSDID("$FreeBSD$"); #ifdef _KERNEL -#ifdef __FreeBSD__ #define DPFPRINTF(format, x...) \ if (V_pf_status.debug >= PF_DEBUG_NOISY) \ printf(format , ##x) -#else -#define DPFPRINTF(format, x...) \ - if (pf_status.debug >= PF_DEBUG_NOISY) \ - printf(format , ##x) -#endif -#ifdef __FreeBSD__ #define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO) -#else -#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO) -#endif #define rs_free(x) free(x, M_TEMP) #else @@ -96,24 +85,22 @@ __FBSDID("$FreeBSD$"); #endif /* PFDEBUG */ #endif /* _KERNEL */ -#if defined(__FreeBSD__) && !defined(_KERNEL) -#undef V_pf_anchors -#define V_pf_anchors pf_anchors - -#undef pf_main_ruleset -#define pf_main_ruleset pf_main_anchor.ruleset -#endif - -#if defined(__FreeBSD__) && defined(_KERNEL) +#ifdef _KERNEL VNET_DEFINE(struct pf_anchor_global, pf_anchors); VNET_DEFINE(struct pf_anchor, pf_main_anchor); -#else +#else /* ! _KERNEL */ struct pf_anchor_global pf_anchors; struct pf_anchor pf_main_anchor; -#endif +#undef V_pf_anchors +#define V_pf_anchors pf_anchors +#undef pf_main_ruleset +#define pf_main_ruleset pf_main_anchor.ruleset +#endif /* _KERNEL */ static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); +static struct pf_anchor *pf_find_anchor(const char *); + RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); @@ -169,7 +156,7 @@ pf_init_ruleset(struct pf_ruleset *ruleset) } } -struct pf_anchor * +static struct pf_anchor * pf_find_anchor(const char *path) { struct pf_anchor *key, *found; @@ -178,11 +165,7 @@ pf_find_anchor(const char *path) if (key == NULL) return (NULL); strlcpy(key->path, path, sizeof(key->path)); -#ifdef __FreeBSD__ found = RB_FIND(pf_anchor_global, &V_pf_anchors, key); -#else - found = RB_FIND(pf_anchor_global, &pf_anchors, key); -#endif rs_free(key); return (found); } @@ -208,11 +191,7 @@ pf_find_or_create_ruleset(const char *path) { char *p, *q, *r; struct pf_ruleset *ruleset; -#ifdef __FreeBSD__ struct pf_anchor *anchor = NULL, *dup, *parent = NULL; -#else - struct pf_anchor *anchor, *dup, *parent = NULL; -#endif if (path[0] == 0) return (&pf_main_ruleset); @@ -263,11 +242,7 @@ pf_find_or_create_ruleset(const char *path) strlcat(anchor->path, "/", sizeof(anchor->path)); } strlcat(anchor->path, anchor->name, sizeof(anchor->path)); -#ifdef __FreeBSD__ if ((dup = RB_INSERT(pf_anchor_global, &V_pf_anchors, anchor)) != -#else - if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != -#endif NULL) { printf("pf_find_or_create_ruleset: RB_INSERT1 " "'%s' '%s' collides with '%s' '%s'\n", @@ -284,11 +259,7 @@ pf_find_or_create_ruleset(const char *path) "RB_INSERT2 '%s' '%s' collides with " "'%s' '%s'\n", anchor->path, anchor->name, dup->path, dup->name); -#ifdef __FreeBSD__ RB_REMOVE(pf_anchor_global, &V_pf_anchors, -#else - RB_REMOVE(pf_anchor_global, &pf_anchors, -#endif anchor); rs_free(anchor); rs_free(p); @@ -324,11 +295,7 @@ pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || ruleset->rules[i].inactive.open) return; -#ifdef __FreeBSD__ RB_REMOVE(pf_anchor_global, &V_pf_anchors, ruleset->anchor); -#else - RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); -#endif if ((parent = ruleset->anchor->parent) != NULL) RB_REMOVE(pf_anchor_node, &parent->children, ruleset->anchor); diff --git a/sys/contrib/pf/net/pf_table.c b/sys/contrib/pf/net/pf_table.c index ea77e31..fa88045 100644 --- a/sys/contrib/pf/net/pf_table.c +++ b/sys/contrib/pf/net/pf_table.c @@ -30,31 +30,24 @@ * */ -#ifdef __FreeBSD__ -#include "opt_inet.h" -#include "opt_inet6.h" - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#endif + +#include "opt_inet.h" +#include "opt_inet6.h" #include <sys/param.h> -#include <sys/systm.h> -#include <sys/socket.h> -#include <sys/mbuf.h> #include <sys/kernel.h> -#ifdef __FreeBSD__ +#include <sys/lock.h> #include <sys/malloc.h> -#else -#include <sys/pool.h> -#endif +#include <sys/mutex.h> +#include <sys/refcount.h> +#include <sys/rwlock.h> +#include <sys/socket.h> +#include <vm/uma.h> #include <net/if.h> -#include <net/route.h> -#include <netinet/in.h> -#ifndef __FreeBSD__ -#include <netinet/ip_ipsp.h> -#endif +#include <net/vnet.h> #include <net/pfvar.h> #define ACCEPT_FLAGS(flags, oklist) \ @@ -64,53 +57,6 @@ __FBSDID("$FreeBSD$"); return (EINVAL); \ } while (0) -#ifdef __FreeBSD__ -static inline int -_copyin(const void *uaddr, void *kaddr, size_t len) -{ - int r; - - PF_UNLOCK(); - r = copyin(uaddr, kaddr, len); - PF_LOCK(); - - return (r); -} - -static inline int -_copyout(const void *uaddr, void *kaddr, size_t len) -{ - int r; - - PF_UNLOCK(); - r = copyout(uaddr, kaddr, len); - PF_LOCK(); - - return (r); -} - -#define COPYIN(from, to, size, flags) \ - ((flags & PFR_FLAG_USERIOCTL) ? \ - _copyin((from), (to), (size)) : \ - (bcopy((from), (to), (size)), 0)) - -#define COPYOUT(from, to, size, flags) \ - ((flags & PFR_FLAG_USERIOCTL) ? \ - _copyout((from), (to), (size)) : \ - (bcopy((from), (to), (size)), 0)) - -#else -#define COPYIN(from, to, size, flags) \ - ((flags & PFR_FLAG_USERIOCTL) ? \ - copyin((from), (to), (size)) : \ - (bcopy((from), (to), (size)), 0)) - -#define COPYOUT(from, to, size, flags) \ - ((flags & PFR_FLAG_USERIOCTL) ? \ - copyout((from), (to), (size)) : \ - (bcopy((from), (to), (size)), 0)) -#endif - #define FILLIN_SIN(sin, addr) \ do { \ (sin).sin_len = sizeof(sin); \ @@ -164,7 +110,6 @@ struct pfr_walktree { struct pfi_dynaddr *pfrw1_dyn; } pfrw_1; int pfrw_free; - int pfrw_flags; }; #define pfrw_addr pfrw_1.pfrw1_addr #define pfrw_astats pfrw_1.pfrw1_astats @@ -175,77 +120,69 @@ struct pfr_walktree { #define senderr(e) do { rv = (e); goto _bad; } while (0) -#ifdef __FreeBSD__ -VNET_DEFINE(uma_zone_t, pfr_ktable_pl); -VNET_DEFINE(uma_zone_t, pfr_kentry_pl); -VNET_DEFINE(uma_zone_t, pfr_kcounters_pl); -VNET_DEFINE(struct sockaddr_in, pfr_sin); -#define V_pfr_sin VNET(pfr_sin) -VNET_DEFINE(struct sockaddr_in6, pfr_sin6); -#define V_pfr_sin6 VNET(pfr_sin6) -VNET_DEFINE(union sockaddr_union, pfr_mask); -#define V_pfr_mask VNET(pfr_mask) -VNET_DEFINE(struct pf_addr, pfr_ffaddr); -#define V_pfr_ffaddr VNET(pfr_ffaddr) -#else -struct pool pfr_ktable_pl; -struct pool pfr_kentry_pl; -struct pool pfr_kcounters_pl; -struct sockaddr_in pfr_sin; -struct sockaddr_in6 pfr_sin6; -union sockaddr_union pfr_mask; -struct pf_addr pfr_ffaddr; -#endif - -void pfr_copyout_addr(struct pfr_addr *, +static MALLOC_DEFINE(M_PFTABLE, "pf_table", "pf(4) tables structures"); +static VNET_DEFINE(uma_zone_t, pfr_kentry_z); +#define V_pfr_kentry_z VNET(pfr_kentry_z) +static VNET_DEFINE(uma_zone_t, pfr_kcounters_z); +#define V_pfr_kcounters_z VNET(pfr_kcounters_z) + +static struct pf_addr pfr_ffaddr = { + .addr32 = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff } +}; + +static void pfr_copyout_addr(struct pfr_addr *, struct pfr_kentry *ke); -int pfr_validate_addr(struct pfr_addr *); -void pfr_enqueue_addrs(struct pfr_ktable *, +static int pfr_validate_addr(struct pfr_addr *); +static void pfr_enqueue_addrs(struct pfr_ktable *, struct pfr_kentryworkq *, int *, int); -void pfr_mark_addrs(struct pfr_ktable *); -struct pfr_kentry *pfr_lookup_addr(struct pfr_ktable *, +static void pfr_mark_addrs(struct pfr_ktable *); +static struct pfr_kentry + *pfr_lookup_addr(struct pfr_ktable *, struct pfr_addr *, int); -struct pfr_kentry *pfr_create_kentry(struct pfr_addr *, int); -void pfr_destroy_kentries(struct pfr_kentryworkq *); -void pfr_destroy_kentry(struct pfr_kentry *); -void pfr_insert_kentries(struct pfr_ktable *, +static struct pfr_kentry *pfr_create_kentry(struct pfr_addr *); +static void pfr_destroy_kentries(struct pfr_kentryworkq *); +static void pfr_destroy_kentry(struct pfr_kentry *); +static void pfr_insert_kentries(struct pfr_ktable *, struct pfr_kentryworkq *, long); -void pfr_remove_kentries(struct pfr_ktable *, +static void pfr_remove_kentries(struct pfr_ktable *, struct pfr_kentryworkq *); -void pfr_clstats_kentries(struct pfr_kentryworkq *, long, +static void pfr_clstats_kentries(struct pfr_kentryworkq *, long, int); -void pfr_reset_feedback(struct pfr_addr *, int, int); -void pfr_prepare_network(union sockaddr_union *, int, int); -int pfr_route_kentry(struct pfr_ktable *, +static void pfr_reset_feedback(struct pfr_addr *, int); +static void pfr_prepare_network(union sockaddr_union *, int, int); +static int pfr_route_kentry(struct pfr_ktable *, struct pfr_kentry *); -int pfr_unroute_kentry(struct pfr_ktable *, +static int pfr_unroute_kentry(struct pfr_ktable *, struct pfr_kentry *); -int pfr_walktree(struct radix_node *, void *); -int pfr_validate_table(struct pfr_table *, int, int); -int pfr_fix_anchor(char *); -void pfr_commit_ktable(struct pfr_ktable *, long); -void pfr_insert_ktables(struct pfr_ktableworkq *); -void pfr_insert_ktable(struct pfr_ktable *); -void pfr_setflags_ktables(struct pfr_ktableworkq *); -void pfr_setflags_ktable(struct pfr_ktable *, int); -void pfr_clstats_ktables(struct pfr_ktableworkq *, long, +static int pfr_walktree(struct radix_node *, void *); +static int pfr_validate_table(struct pfr_table *, int, int); +static int pfr_fix_anchor(char *); +static void pfr_commit_ktable(struct pfr_ktable *, long); +static void pfr_insert_ktables(struct pfr_ktableworkq *); +static void pfr_insert_ktable(struct pfr_ktable *); +static void pfr_setflags_ktables(struct pfr_ktableworkq *); +static void pfr_setflags_ktable(struct pfr_ktable *, int); +static void pfr_clstats_ktables(struct pfr_ktableworkq *, long, int); -void pfr_clstats_ktable(struct pfr_ktable *, long, int); -struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int, int); -void pfr_destroy_ktables(struct pfr_ktableworkq *, int); -void pfr_destroy_ktable(struct pfr_ktable *, int); -int pfr_ktable_compare(struct pfr_ktable *, +static void pfr_clstats_ktable(struct pfr_ktable *, long, int); +static struct pfr_ktable + *pfr_create_ktable(struct pfr_table *, long, int); +static void pfr_destroy_ktables(struct pfr_ktableworkq *, int); +static void pfr_destroy_ktable(struct pfr_ktable *, int); +static int pfr_ktable_compare(struct pfr_ktable *, struct pfr_ktable *); -struct pfr_ktable *pfr_lookup_table(struct pfr_table *); -void pfr_clean_node_mask(struct pfr_ktable *, +static struct pfr_ktable + *pfr_lookup_table(struct pfr_table *); +static void pfr_clean_node_mask(struct pfr_ktable *, struct pfr_kentryworkq *); -int pfr_table_count(struct pfr_table *, int); -int pfr_skip_table(struct pfr_table *, +static int pfr_table_count(struct pfr_table *, int); +static int pfr_skip_table(struct pfr_table *, struct pfr_ktable *, int); -struct pfr_kentry *pfr_kentry_byidx(struct pfr_ktable *, int, int); +static struct pfr_kentry + *pfr_kentry_byidx(struct pfr_ktable *, int, int); -RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); -RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); +static RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); +static RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); struct pfr_ktablehead pfr_ktables; struct pfr_table pfr_nulltable; @@ -254,28 +191,23 @@ int pfr_ktable_cnt; void pfr_initialize(void) { -#ifndef __FreeBSD__ - pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable), 0, 0, 0, - "pfrktable", NULL); - pool_init(&pfr_kentry_pl, sizeof(struct pfr_kentry), 0, 0, 0, - "pfrkentry", NULL); - pool_init(&pfr_kcounters_pl, sizeof(struct pfr_kcounters), 0, 0, 0, - "pfrkcounters", NULL); - - pfr_sin.sin_len = sizeof(pfr_sin); - pfr_sin.sin_family = AF_INET; - pfr_sin6.sin6_len = sizeof(pfr_sin6); - pfr_sin6.sin6_family = AF_INET6; - - memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr)); -#else - V_pfr_sin.sin_len = sizeof(V_pfr_sin); - V_pfr_sin.sin_family = AF_INET; - V_pfr_sin6.sin6_len = sizeof(V_pfr_sin6); - V_pfr_sin6.sin6_family = AF_INET6; - - memset(&V_pfr_ffaddr, 0xff, sizeof(V_pfr_ffaddr)); -#endif + + V_pfr_kentry_z = uma_zcreate("pf table entries", + sizeof(struct pfr_kentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, + 0); + V_pfr_kcounters_z = uma_zcreate("pf table counters", + sizeof(struct pfr_kcounters), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + V_pf_limits[PF_LIMIT_TABLE_ENTRIES].zone = V_pfr_kentry_z; + V_pf_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT; +} + +void +pfr_cleanup(void) +{ + + uma_zdestroy(V_pfr_kentry_z); + uma_zdestroy(V_pfr_kcounters_z); } int @@ -283,9 +215,10 @@ pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) { struct pfr_ktable *kt; struct pfr_kentryworkq workq; - int s; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + PF_RULES_WASSERT(); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -296,16 +229,8 @@ pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) pfr_enqueue_addrs(kt, &workq, ndel, 0); if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); pfr_remove_kentries(kt, &workq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - if (kt->pfrkt_cnt) { - printf("pfr_clr_addrs: corruption detected (%d).\n", - kt->pfrkt_cnt); - kt->pfrkt_cnt = 0; - } + KASSERT(kt->pfrkt_cnt == 0, ("%s: non-null pfrkt_cnt", __func__)); } return (0); } @@ -317,12 +242,13 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_ktable *kt, *tmpkt; struct pfr_kentryworkq workq; struct pfr_kentry *p, *q; - struct pfr_addr ad; - int i, rv, s, xadd = 0; + struct pfr_addr *ad; + int i, rv, xadd = 0; long tzero = time_second; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_FEEDBACK); + PF_RULES_WASSERT(); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -330,53 +256,42 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); - tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0, - !(flags & PFR_FLAG_USERIOCTL)); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); if (tmpkt == NULL) return (ENOMEM); SLIST_INIT(&workq); - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - senderr(EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) senderr(EINVAL); - p = pfr_lookup_addr(kt, &ad, 1); - q = pfr_lookup_addr(tmpkt, &ad, 1); + p = pfr_lookup_addr(kt, ad, 1); + q = pfr_lookup_addr(tmpkt, ad, 1); if (flags & PFR_FLAG_FEEDBACK) { if (q != NULL) - ad.pfra_fback = PFR_FB_DUPLICATE; + ad->pfra_fback = PFR_FB_DUPLICATE; else if (p == NULL) - ad.pfra_fback = PFR_FB_ADDED; - else if (p->pfrke_not != ad.pfra_not) - ad.pfra_fback = PFR_FB_CONFLICT; + ad->pfra_fback = PFR_FB_ADDED; + else if (p->pfrke_not != ad->pfra_not) + ad->pfra_fback = PFR_FB_CONFLICT; else - ad.pfra_fback = PFR_FB_NONE; + ad->pfra_fback = PFR_FB_NONE; } if (p == NULL && q == NULL) { - p = pfr_create_kentry(&ad, - !(flags & PFR_FLAG_USERIOCTL)); + p = pfr_create_kentry(ad); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { pfr_destroy_kentry(p); - ad.pfra_fback = PFR_FB_NONE; + ad->pfra_fback = PFR_FB_NONE; } else { SLIST_INSERT_HEAD(&workq, p, pfrke_workq); xadd++; } } - if (flags & PFR_FLAG_FEEDBACK) - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - senderr(EFAULT); } pfr_clean_node_mask(tmpkt, &workq); - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_insert_kentries(kt, &workq, tzero); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } else + else pfr_destroy_kentries(&workq); if (nadd != NULL) *nadd = xadd; @@ -386,7 +301,7 @@ _bad: pfr_clean_node_mask(tmpkt, &workq); pfr_destroy_kentries(&workq); if (flags & PFR_FLAG_FEEDBACK) - pfr_reset_feedback(addr, size, flags); + pfr_reset_feedback(addr, size); pfr_destroy_ktable(tmpkt, 0); return (rv); } @@ -398,11 +313,12 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_ktable *kt; struct pfr_kentryworkq workq; struct pfr_kentry *p; - struct pfr_addr ad; - int i, rv, s, xdel = 0, log = 1; + struct pfr_addr *ad; + int i, rv, xdel = 0, log = 1; + + PF_RULES_WASSERT(); - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -418,7 +334,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, * * one is O(N) and is better for large 'n' * one is O(n*LOG(N)) and is better for small 'n' - * + * * following code try to decide which one is best. */ for (i = kt->pfrkt_cnt; i > 0; i >>= 1) @@ -428,56 +344,44 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, pfr_mark_addrs(kt); } else { /* iterate over addresses to delete */ - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - return (EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) return (EINVAL); - p = pfr_lookup_addr(kt, &ad, 1); + p = pfr_lookup_addr(kt, ad, 1); if (p != NULL) p->pfrke_mark = 0; } } SLIST_INIT(&workq); - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - senderr(EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) senderr(EINVAL); - p = pfr_lookup_addr(kt, &ad, 1); + p = pfr_lookup_addr(kt, ad, 1); if (flags & PFR_FLAG_FEEDBACK) { if (p == NULL) - ad.pfra_fback = PFR_FB_NONE; - else if (p->pfrke_not != ad.pfra_not) - ad.pfra_fback = PFR_FB_CONFLICT; + ad->pfra_fback = PFR_FB_NONE; + else if (p->pfrke_not != ad->pfra_not) + ad->pfra_fback = PFR_FB_CONFLICT; else if (p->pfrke_mark) - ad.pfra_fback = PFR_FB_DUPLICATE; + ad->pfra_fback = PFR_FB_DUPLICATE; else - ad.pfra_fback = PFR_FB_DELETED; + ad->pfra_fback = PFR_FB_DELETED; } - if (p != NULL && p->pfrke_not == ad.pfra_not && + if (p != NULL && p->pfrke_not == ad->pfra_not && !p->pfrke_mark) { p->pfrke_mark = 1; SLIST_INSERT_HEAD(&workq, p, pfrke_workq); xdel++; } - if (flags & PFR_FLAG_FEEDBACK) - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - senderr(EFAULT); } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_remove_kentries(kt, &workq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (ndel != NULL) *ndel = xdel; return (0); _bad: if (flags & PFR_FLAG_FEEDBACK) - pfr_reset_feedback(addr, size, flags); + pfr_reset_feedback(addr, size); return (rv); } @@ -490,11 +394,12 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq addq, delq, changeq; struct pfr_kentry *p, *q; struct pfr_addr ad; - int i, rv, s, xadd = 0, xdel = 0, xchange = 0; + int i, rv, xadd = 0, xdel = 0, xchange = 0; long tzero = time_second; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_FEEDBACK); + PF_RULES_WASSERT(); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, ignore_pfrt_flags, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); @@ -503,8 +408,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); - tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0, - !(flags & PFR_FLAG_USERIOCTL)); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); if (tmpkt == NULL) return (ENOMEM); pfr_mark_addrs(kt); @@ -512,8 +416,11 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, SLIST_INIT(&delq); SLIST_INIT(&changeq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - senderr(EFAULT); + /* + * XXXGL: undertand pf_if usage of this function + * and make ad a moving pointer + */ + bcopy(addr + i, &ad, sizeof(ad)); if (pfr_validate_addr(&ad)) senderr(EINVAL); ad.pfra_fback = PFR_FB_NONE; @@ -535,8 +442,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, ad.pfra_fback = PFR_FB_DUPLICATE; goto _skip; } - p = pfr_create_kentry(&ad, - !(flags & PFR_FLAG_USERIOCTL)); + p = pfr_create_kentry(&ad); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -550,8 +456,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, } _skip: if (flags & PFR_FLAG_FEEDBACK) - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - senderr(EFAULT); + bcopy(&ad, addr + i, sizeof(ad)); } pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY); if ((flags & PFR_FLAG_FEEDBACK) && *size2) { @@ -563,20 +468,15 @@ _skip: SLIST_FOREACH(p, &delq, pfrke_workq) { pfr_copyout_addr(&ad, p); ad.pfra_fback = PFR_FB_DELETED; - if (COPYOUT(&ad, addr+size+i, sizeof(ad), flags)) - senderr(EFAULT); + bcopy(&ad, addr + size + i, sizeof(ad)); i++; } } pfr_clean_node_mask(tmpkt, &addq); if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); pfr_insert_kentries(kt, &addq, tzero); pfr_remove_kentries(kt, &delq); pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG); - if (flags & PFR_FLAG_ATOMIC) - splx(s); } else pfr_destroy_kentries(&addq); if (nadd != NULL) @@ -593,7 +493,7 @@ _bad: pfr_clean_node_mask(tmpkt, &addq); pfr_destroy_kentries(&addq); if (flags & PFR_FLAG_FEEDBACK) - pfr_reset_feedback(addr, size, flags); + pfr_reset_feedback(addr, size); pfr_destroy_ktable(tmpkt, 0); return (rv); } @@ -604,9 +504,11 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, { struct pfr_ktable *kt; struct pfr_kentry *p; - struct pfr_addr ad; + struct pfr_addr *ad; int i, xmatch = 0; + PF_RULES_RASSERT(); + ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); @@ -614,22 +516,18 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - return (EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) return (EINVAL); - if (ADDR_NETWORK(&ad)) + if (ADDR_NETWORK(ad)) return (EINVAL); - p = pfr_lookup_addr(kt, &ad, 0); + p = pfr_lookup_addr(kt, ad, 0); if (flags & PFR_FLAG_REPLACE) - pfr_copyout_addr(&ad, p); - ad.pfra_fback = (p == NULL) ? PFR_FB_NONE : + pfr_copyout_addr(ad, p); + ad->pfra_fback = (p == NULL) ? PFR_FB_NONE : (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH); if (p != NULL && !p->pfrke_not) xmatch++; - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - return (EFAULT); } if (nmatch != NULL) *nmatch = xmatch; @@ -644,6 +542,8 @@ pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, struct pfr_walktree w; int rv; + PF_RULES_RASSERT(); + ACCEPT_FLAGS(flags, 0); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); @@ -659,27 +559,16 @@ pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, w.pfrw_op = PFRW_GET_ADDRS; w.pfrw_addr = addr; w.pfrw_free = kt->pfrkt_cnt; - w.pfrw_flags = flags; -#ifdef __FreeBSD__ rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#else - rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#endif if (!rv) -#ifdef __FreeBSD__ rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#else - rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#endif if (rv) return (rv); - if (w.pfrw_free) { - printf("pfr_get_addrs: corruption detected (%d).\n", - w.pfrw_free); - return (ENOTTY); - } + KASSERT(w.pfrw_free == 0, ("%s: corruption detected (%d)", __func__, + w.pfrw_free)); + *size = kt->pfrkt_cnt; return (0); } @@ -691,11 +580,13 @@ pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, struct pfr_ktable *kt; struct pfr_walktree w; struct pfr_kentryworkq workq; - int rv, s; + int rv; long tzero = time_second; + PF_RULES_RASSERT(); + /* XXX PFR_FLAG_CLSTATS disabled */ - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC); + ACCEPT_FLAGS(flags, 0); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -710,27 +601,14 @@ pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, w.pfrw_op = PFRW_GET_ASTATS; w.pfrw_astats = addr; w.pfrw_free = kt->pfrkt_cnt; - w.pfrw_flags = flags; - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); -#ifdef __FreeBSD__ rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#else - rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#endif if (!rv) -#ifdef __FreeBSD__ - rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, + rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#else - rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#endif if (!rv && (flags & PFR_FLAG_CLSTATS)) { pfr_enqueue_addrs(kt, &workq, NULL, 0); pfr_clstats_kentries(&workq, tzero, 0); } - if (flags & PFR_FLAG_ATOMIC) - splx(s); if (rv) return (rv); @@ -750,28 +628,25 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_ktable *kt; struct pfr_kentryworkq workq; struct pfr_kentry *p; - struct pfr_addr ad; - int i, rv, s, xzero = 0; + struct pfr_addr *ad; + int i, rv, xzero = 0; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_FEEDBACK); + PF_RULES_WASSERT(); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); SLIST_INIT(&workq); - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - senderr(EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) senderr(EINVAL); - p = pfr_lookup_addr(kt, &ad, 1); + p = pfr_lookup_addr(kt, ad, 1); if (flags & PFR_FLAG_FEEDBACK) { - ad.pfra_fback = (p != NULL) ? + ad->pfra_fback = (p != NULL) ? PFR_FB_CLEARED : PFR_FB_NONE; - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - senderr(EFAULT); } if (p != NULL) { SLIST_INSERT_HEAD(&workq, p, pfrke_workq); @@ -779,23 +654,18 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, } } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_clstats_kentries(&workq, 0, 0); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (nzero != NULL) *nzero = xzero; return (0); _bad: if (flags & PFR_FLAG_FEEDBACK) - pfr_reset_feedback(addr, size, flags); + pfr_reset_feedback(addr, size); return (rv); } -int +static int pfr_validate_addr(struct pfr_addr *ad) { int i; @@ -829,7 +699,7 @@ pfr_validate_addr(struct pfr_addr *ad) return (0); } -void +static void pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, int *naddr, int sweep) { @@ -840,58 +710,37 @@ pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE; w.pfrw_workq = workq; if (kt->pfrkt_ip4 != NULL) -#ifdef __FreeBSD__ - if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, + if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) -#else - if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) -#endif printf("pfr_enqueue_addrs: IPv4 walktree failed.\n"); if (kt->pfrkt_ip6 != NULL) -#ifdef __FreeBSD__ - if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, + if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) -#else - if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) -#endif printf("pfr_enqueue_addrs: IPv6 walktree failed.\n"); if (naddr != NULL) *naddr = w.pfrw_cnt; } -void +static void pfr_mark_addrs(struct pfr_ktable *kt) { struct pfr_walktree w; bzero(&w, sizeof(w)); w.pfrw_op = PFRW_MARK; -#ifdef __FreeBSD__ if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) -#else - if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) -#endif printf("pfr_mark_addrs: IPv4 walktree failed.\n"); -#ifdef __FreeBSD__ if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) -#else - if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) -#endif printf("pfr_mark_addrs: IPv6 walktree failed.\n"); } -struct pfr_kentry * +static struct pfr_kentry * pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) { union sockaddr_union sa, mask; -#ifdef __FreeBSD__ struct radix_node_head *head = NULL; -#else - struct radix_node_head *head; -#endif struct pfr_kentry *ke; - int s; bzero(&sa, sizeof(sa)); if (ad->pfra_af == AF_INET) { @@ -903,12 +752,7 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) } if (ADDR_NETWORK(ad)) { pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net); - s = splsoftnet(); /* rn_lookup makes use of globals */ -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#endif ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head); - splx(s); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; } else { @@ -921,19 +765,12 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) return (ke); } -struct pfr_kentry * -pfr_create_kentry(struct pfr_addr *ad, int intr) +static struct pfr_kentry * +pfr_create_kentry(struct pfr_addr *ad) { struct pfr_kentry *ke; -#ifdef __FreeBSD__ - ke = pool_get(&V_pfr_kentry_pl, PR_NOWAIT | PR_ZERO); -#else - if (intr) - ke = pool_get(&pfr_kentry_pl, PR_NOWAIT | PR_ZERO); - else - ke = pool_get(&pfr_kentry_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL); -#endif + ke = uma_zalloc(V_pfr_kentry_z, M_NOWAIT | M_ZERO); if (ke == NULL) return (NULL); @@ -947,7 +784,7 @@ pfr_create_kentry(struct pfr_addr *ad, int intr) return (ke); } -void +static void pfr_destroy_kentries(struct pfr_kentryworkq *workq) { struct pfr_kentry *p, *q; @@ -958,20 +795,15 @@ pfr_destroy_kentries(struct pfr_kentryworkq *workq) } } -void +static void pfr_destroy_kentry(struct pfr_kentry *ke) { if (ke->pfrke_counters) -#ifdef __FreeBSD__ - pool_put(&V_pfr_kcounters_pl, ke->pfrke_counters); - pool_put(&V_pfr_kentry_pl, ke); -#else - pool_put(&pfr_kcounters_pl, ke->pfrke_counters); - pool_put(&pfr_kentry_pl, ke); -#endif + uma_zfree(V_pfr_kcounters_z, ke->pfrke_counters); + uma_zfree(V_pfr_kentry_z, ke); } -void +static void pfr_insert_kentries(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, long tzero) { @@ -1000,7 +832,7 @@ pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero) p = pfr_lookup_addr(kt, ad, 1); if (p != NULL) return (0); - p = pfr_create_kentry(ad, 1); + p = pfr_create_kentry(ad); if (p == NULL) return (EINVAL); @@ -1014,7 +846,7 @@ pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero) return (0); } -void +static void pfr_remove_kentries(struct pfr_ktable *kt, struct pfr_kentryworkq *workq) { @@ -1029,7 +861,7 @@ pfr_remove_kentries(struct pfr_ktable *kt, pfr_destroy_kentries(workq); } -void +static void pfr_clean_node_mask(struct pfr_ktable *kt, struct pfr_kentryworkq *workq) { @@ -1039,45 +871,33 @@ pfr_clean_node_mask(struct pfr_ktable *kt, pfr_unroute_kentry(kt, p); } -void +static void pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange) { struct pfr_kentry *p; - int s; SLIST_FOREACH(p, workq, pfrke_workq) { - s = splsoftnet(); if (negchange) p->pfrke_not = !p->pfrke_not; if (p->pfrke_counters) { -#ifdef __FreeBSD__ - pool_put(&V_pfr_kcounters_pl, p->pfrke_counters); -#else - pool_put(&pfr_kcounters_pl, p->pfrke_counters); -#endif + uma_zfree(V_pfr_kcounters_z, p->pfrke_counters); p->pfrke_counters = NULL; } - splx(s); p->pfrke_tzero = tzero; } } -void -pfr_reset_feedback(struct pfr_addr *addr, int size, int flags) +static void +pfr_reset_feedback(struct pfr_addr *addr, int size) { - struct pfr_addr ad; + struct pfr_addr *ad; int i; - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - break; - ad.pfra_fback = PFR_FB_NONE; - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - break; - } + for (i = 0, ad = addr; i < size; i++, ad++) + ad->pfra_fback = PFR_FB_NONE; } -void +static void pfr_prepare_network(union sockaddr_union *sa, int af, int net) { int i; @@ -1102,17 +922,12 @@ pfr_prepare_network(union sockaddr_union *sa, int af, int net) } } -int +static int pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; -#ifdef __FreeBSD__ struct radix_node_head *head = NULL; -#else - struct radix_node_head *head; -#endif - int s; bzero(ke->pfrke_node, sizeof(ke->pfrke_node)); if (ke->pfrke_af == AF_INET) @@ -1120,63 +935,32 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) else if (ke->pfrke_af == AF_INET6) head = kt->pfrkt_ip6; - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#endif if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); -#ifdef __FreeBSD__ rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node); -#else - rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node, 0); -#endif } else -#ifdef __FreeBSD__ rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node); -#else - rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node, 0); -#endif - splx(s); return (rn == NULL ? -1 : 0); } -int +static int pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; -#ifdef __FreeBSD__ struct radix_node_head *head = NULL; -#else - struct radix_node_head *head; -#endif - int s; if (ke->pfrke_af == AF_INET) head = kt->pfrkt_ip4; else if (ke->pfrke_af == AF_INET6) head = kt->pfrkt_ip6; - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#endif if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); -#ifdef __FreeBSD__ rn = rn_delete(&ke->pfrke_sa, &mask, head); -#else - rn = rn_delete(&ke->pfrke_sa, &mask, head, NULL); -#endif } else -#ifdef __FreeBSD__ rn = rn_delete(&ke->pfrke_sa, NULL, head); -#else - rn = rn_delete(&ke->pfrke_sa, NULL, head, NULL); -#endif - splx(s); if (rn == NULL) { printf("pfr_unroute_kentry: delete failed.\n"); @@ -1185,7 +969,7 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) return (0); } -void +static void pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) { bzero(ad, sizeof(*ad)); @@ -1200,12 +984,11 @@ pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr; } -int +static int pfr_walktree(struct radix_node *rn, void *arg) { struct pfr_kentry *ke = (struct pfr_kentry *)rn; struct pfr_walktree *w = arg; - int s, flags = w->pfrw_flags; switch (w->pfrw_op) { case PFRW_MARK: @@ -1221,11 +1004,7 @@ pfr_walktree(struct radix_node *rn, void *arg) break; case PFRW_GET_ADDRS: if (w->pfrw_free-- > 0) { - struct pfr_addr ad; - - pfr_copyout_addr(&ad, ke); - if (copyout(&ad, w->pfrw_addr, sizeof(ad))) - return (EFAULT); + pfr_copyout_addr(w->pfrw_addr, ke); w->pfrw_addr++; } break; @@ -1235,7 +1014,6 @@ pfr_walktree(struct radix_node *rn, void *arg) pfr_copyout_addr(&as.pfras_a, ke); - s = splsoftnet(); if (ke->pfrke_counters) { bcopy(ke->pfrke_counters->pfrkc_packets, as.pfras_packets, sizeof(as.pfras_packets)); @@ -1246,11 +1024,9 @@ pfr_walktree(struct radix_node *rn, void *arg) bzero(as.pfras_bytes, sizeof(as.pfras_bytes)); as.pfras_a.pfra_fback = PFR_FB_NOCOUNT; } - splx(s); as.pfras_tzero = ke->pfrke_tzero; - if (COPYOUT(&as, w->pfrw_astats, sizeof(as), flags)) - return (EFAULT); + bcopy(&as, w->pfrw_astats, sizeof(as)); w->pfrw_astats++; } break; @@ -1263,40 +1039,28 @@ pfr_walktree(struct radix_node *rn, void *arg) } break; case PFRW_DYNADDR_UPDATE: + { + union sockaddr_union pfr_mask; + if (ke->pfrke_af == AF_INET) { if (w->pfrw_dyn->pfid_acnt4++ > 0) break; -#ifdef __FreeBSD__ - pfr_prepare_network(&V_pfr_mask, AF_INET, ke->pfrke_net); -#else pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net); -#endif - w->pfrw_dyn->pfid_addr4 = *SUNION2PF( - &ke->pfrke_sa, AF_INET); - w->pfrw_dyn->pfid_mask4 = *SUNION2PF( -#ifdef __FreeBSD__ - &V_pfr_mask, AF_INET); -#else - &pfr_mask, AF_INET); -#endif + w->pfrw_dyn->pfid_addr4 = *SUNION2PF(&ke->pfrke_sa, + AF_INET); + w->pfrw_dyn->pfid_mask4 = *SUNION2PF(&pfr_mask, + AF_INET); } else if (ke->pfrke_af == AF_INET6){ if (w->pfrw_dyn->pfid_acnt6++ > 0) break; -#ifdef __FreeBSD__ - pfr_prepare_network(&V_pfr_mask, AF_INET6, ke->pfrke_net); -#else pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net); -#endif - w->pfrw_dyn->pfid_addr6 = *SUNION2PF( - &ke->pfrke_sa, AF_INET6); - w->pfrw_dyn->pfid_mask6 = *SUNION2PF( -#ifdef __FreeBSD__ - &V_pfr_mask, AF_INET6); -#else - &pfr_mask, AF_INET6); -#endif + w->pfrw_dyn->pfid_addr6 = *SUNION2PF(&ke->pfrke_sa, + AF_INET6); + w->pfrw_dyn->pfid_mask6 = *SUNION2PF(&pfr_mask, + AF_INET6); } break; + } } return (0); } @@ -1306,10 +1070,9 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p; - int s, xdel = 0; + int xdel = 0; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_ALLRSETS); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); if (pfr_table_count(filter, flags) < 0) @@ -1327,13 +1090,8 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); xdel++; } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_setflags_ktables(&workq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (ndel != NULL) *ndel = xdel; return (0); @@ -1344,23 +1102,21 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) { struct pfr_ktableworkq addq, changeq; struct pfr_ktable *p, *q, *r, key; - int i, rv, s, xadd = 0; + int i, rv, xadd = 0; long tzero = time_second; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); SLIST_INIT(&addq); SLIST_INIT(&changeq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) - senderr(EFAULT); + bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)); if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK, flags & PFR_FLAG_USERIOCTL)) senderr(EINVAL); key.pfrkt_flags |= PFR_TFLAG_ACTIVE; p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (p == NULL) { - p = pfr_create_ktable(&key.pfrkt_t, tzero, 1, - !(flags & PFR_FLAG_USERIOCTL)); + p = pfr_create_ktable(&key.pfrkt_t, tzero, 1); if (p == NULL) senderr(ENOMEM); SLIST_FOREACH(q, &addq, pfrkt_workq) { @@ -1386,8 +1142,7 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) } } key.pfrkt_flags = 0; - r = pfr_create_ktable(&key.pfrkt_t, 0, 1, - !(flags & PFR_FLAG_USERIOCTL)); + r = pfr_create_ktable(&key.pfrkt_t, 0, 1); if (r == NULL) senderr(ENOMEM); SLIST_INSERT_HEAD(&addq, r, pfrkt_workq); @@ -1405,12 +1160,8 @@ _skip: ; } if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); pfr_insert_ktables(&addq); pfr_setflags_ktables(&changeq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); } else pfr_destroy_ktables(&addq, 0); if (nadd != NULL) @@ -1426,13 +1177,12 @@ pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, *q, key; - int i, s, xdel = 0; + int i, xdel = 0; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) - return (EFAULT); + bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)); if (pfr_validate_table(&key.pfrkt_t, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); @@ -1449,13 +1199,8 @@ _skip: ; } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_setflags_ktables(&workq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (ndel != NULL) *ndel = xdel; return (0); @@ -1468,6 +1213,8 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, struct pfr_ktable *p; int n, nn; + PF_RULES_RASSERT(); + ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); @@ -1483,13 +1230,11 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, continue; if (n-- <= 0) continue; - if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl), flags)) - return (EFAULT); - } - if (n) { - printf("pfr_get_tables: corruption detected (%d).\n", n); - return (ENOTTY); + bcopy(&p->pfrkt_t, tbl++, sizeof(*tbl)); } + + KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n)); + *size = nn; return (0); } @@ -1500,11 +1245,11 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, { struct pfr_ktable *p; struct pfr_ktableworkq workq; - int s, n, nn; + int n, nn; long tzero = time_second; /* XXX PFR_FLAG_CLSTATS disabled */ - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_ALLRSETS); + ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); n = nn = pfr_table_count(filter, flags); @@ -1515,32 +1260,20 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, return (0); } SLIST_INIT(&workq); - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { if (pfr_skip_table(filter, p, flags)) continue; if (n-- <= 0) continue; - if (!(flags & PFR_FLAG_ATOMIC)) - s = splsoftnet(); - if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl), flags)) { - splx(s); - return (EFAULT); - } - if (!(flags & PFR_FLAG_ATOMIC)) - splx(s); + bcopy(&p->pfrkt_ts, tbl++, sizeof(*tbl)); SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); } if (flags & PFR_FLAG_CLSTATS) pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - if (n) { - printf("pfr_get_tstats: corruption detected (%d).\n", n); - return (ENOTTY); - } + + KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n)); + *size = nn; return (0); } @@ -1550,15 +1283,13 @@ pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, key; - int i, s, xzero = 0; + int i, xzero = 0; long tzero = time_second; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_ADDRSTOO); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) - return (EFAULT); + bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t)); if (pfr_validate_table(&key.pfrkt_t, 0, 0)) return (EINVAL); p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); @@ -1567,13 +1298,8 @@ pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) xzero++; } } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (nzero != NULL) *nzero = xzero; return (0); @@ -1585,17 +1311,16 @@ pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag, { struct pfr_ktableworkq workq; struct pfr_ktable *p, *q, key; - int i, s, xchange = 0, xdel = 0; + int i, xchange = 0, xdel = 0; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); if ((setflag & ~PFR_TFLAG_USRMASK) || (clrflag & ~PFR_TFLAG_USRMASK) || (setflag & clrflag)) return (EINVAL); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) - return (EFAULT); + bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t)); if (pfr_validate_table(&key.pfrkt_t, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); @@ -1619,13 +1344,8 @@ pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag, _skip: ; } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_setflags_ktables(&workq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (nchange != NULL) *nchange = xchange; if (ndel != NULL) @@ -1674,10 +1394,12 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq addrq; struct pfr_ktable *kt, *rt, *shadow, key; struct pfr_kentry *p; - struct pfr_addr ad; + struct pfr_addr *ad; struct pf_ruleset *rs; int i, rv, xadd = 0, xaddr = 0; + PF_RULES_WASSERT(); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO); if (size && !(flags & PFR_FLAG_ADDRSTOO)) return (EINVAL); @@ -1691,8 +1413,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, SLIST_INIT(&tableq); kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl); if (kt == NULL) { - kt = pfr_create_ktable(tbl, 0, 1, - !(flags & PFR_FLAG_USERIOCTL)); + kt = pfr_create_ktable(tbl, 0, 1); if (kt == NULL) return (ENOMEM); SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq); @@ -1708,8 +1429,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, kt->pfrkt_root = rt; goto _skip; } - rt = pfr_create_ktable(&key.pfrkt_t, 0, 1, - !(flags & PFR_FLAG_USERIOCTL)); + rt = pfr_create_ktable(&key.pfrkt_t, 0, 1); if (rt == NULL) { pfr_destroy_ktables(&tableq, 0); return (ENOMEM); @@ -1719,20 +1439,18 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, } else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE)) xadd++; _skip: - shadow = pfr_create_ktable(tbl, 0, 0, !(flags & PFR_FLAG_USERIOCTL)); + shadow = pfr_create_ktable(tbl, 0, 0); if (shadow == NULL) { pfr_destroy_ktables(&tableq, 0); return (ENOMEM); } SLIST_INIT(&addrq); - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - senderr(EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) senderr(EINVAL); - if (pfr_lookup_addr(shadow, &ad, 1) != NULL) + if (pfr_lookup_addr(shadow, ad, 1) != NULL) continue; - p = pfr_create_kentry(&ad, 0); + p = pfr_create_kentry(ad); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(shadow, p)) { @@ -1776,6 +1494,8 @@ pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags) struct pf_ruleset *rs; int xdel = 0; + PF_RULES_WASSERT(); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) @@ -1806,10 +1526,12 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, struct pfr_ktable *p, *q; struct pfr_ktableworkq workq; struct pf_ruleset *rs; - int s, xadd = 0, xchange = 0; + int xadd = 0, xchange = 0; long tzero = time_second; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + PF_RULES_WASSERT(); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (EBUSY); @@ -1827,14 +1549,10 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, } if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); for (p = SLIST_FIRST(&workq); p != NULL; p = q) { q = SLIST_NEXT(p, pfrkt_workq); pfr_commit_ktable(p, tzero); } - if (flags & PFR_FLAG_ATOMIC) - splx(s); rs->topen = 0; pf_remove_if_empty_ruleset(rs); } @@ -1846,12 +1564,14 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, return (0); } -void +static void pfr_commit_ktable(struct pfr_ktable *kt, long tzero) { struct pfr_ktable *shadow = kt->pfrkt_shadow; int nflags; + PF_RULES_WASSERT(); + if (shadow->pfrkt_cnt == NO_ADDRESSES) { if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) pfr_clstats_ktable(kt, tzero, 1); @@ -1905,7 +1625,7 @@ pfr_commit_ktable(struct pfr_ktable *kt, long tzero) pfr_setflags_ktable(kt, nflags); } -int +static int pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved) { int i; @@ -1930,7 +1650,7 @@ pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved) * Rewrite anchors referenced by tables to remove slashes * and check for validity. */ -int +static int pfr_fix_anchor(char *anchor) { size_t siz = MAXPATHLEN; @@ -1955,11 +1675,13 @@ pfr_fix_anchor(char *anchor) return (0); } -int +static int pfr_table_count(struct pfr_table *filter, int flags) { struct pf_ruleset *rs; + PF_RULES_ASSERT(); + if (flags & PFR_FLAG_ALLRSETS) return (pfr_ktable_cnt); if (filter->pfrt_anchor[0]) { @@ -1969,7 +1691,7 @@ pfr_table_count(struct pfr_table *filter, int flags) return (pf_main_ruleset.tables); } -int +static int pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags) { if (flags & PFR_FLAG_ALLRSETS) @@ -1979,7 +1701,7 @@ pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags) return (0); } -void +static void pfr_insert_ktables(struct pfr_ktableworkq *workq) { struct pfr_ktable *p; @@ -1988,9 +1710,12 @@ pfr_insert_ktables(struct pfr_ktableworkq *workq) pfr_insert_ktable(p); } -void +static void pfr_insert_ktable(struct pfr_ktable *kt) { + + PF_RULES_WASSERT(); + RB_INSERT(pfr_ktablehead, &pfr_ktables, kt); pfr_ktable_cnt++; if (kt->pfrkt_root != NULL) @@ -1999,7 +1724,7 @@ pfr_insert_ktable(struct pfr_ktable *kt) kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR); } -void +static void pfr_setflags_ktables(struct pfr_ktableworkq *workq) { struct pfr_ktable *p, *q; @@ -2010,11 +1735,13 @@ pfr_setflags_ktables(struct pfr_ktableworkq *workq) } } -void +static void pfr_setflags_ktable(struct pfr_ktable *kt, int newf) { struct pfr_kentryworkq addrq; + PF_RULES_WASSERT(); + if (!(newf & PFR_TFLAG_REFERENCED) && !(newf & PFR_TFLAG_PERSIST)) newf &= ~PFR_TFLAG_ACTIVE; @@ -2042,7 +1769,7 @@ pfr_setflags_ktable(struct pfr_ktable *kt, int newf) kt->pfrkt_flags = newf; } -void +static void pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse) { struct pfr_ktable *p; @@ -2051,39 +1778,30 @@ pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse) pfr_clstats_ktable(p, tzero, recurse); } -void +static void pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse) { struct pfr_kentryworkq addrq; - int s; if (recurse) { pfr_enqueue_addrs(kt, &addrq, NULL, 0); pfr_clstats_kentries(&addrq, tzero, 0); } - s = splsoftnet(); bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets)); bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes)); kt->pfrkt_match = kt->pfrkt_nomatch = 0; - splx(s); kt->pfrkt_tzero = tzero; } -struct pfr_ktable * -pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset, - int intr) +static struct pfr_ktable * +pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) { struct pfr_ktable *kt; struct pf_ruleset *rs; -#ifdef __FreeBSD__ - kt = pool_get(&V_pfr_ktable_pl, PR_NOWAIT|PR_ZERO); -#else - if (intr) - kt = pool_get(&pfr_ktable_pl, PR_NOWAIT|PR_ZERO|PR_LIMITFAIL); - else - kt = pool_get(&pfr_ktable_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL); -#endif + PF_RULES_WASSERT(); + + kt = malloc(sizeof(*kt), M_PFTABLE, M_NOWAIT|M_ZERO); if (kt == NULL) return (NULL); kt->pfrkt_t = *tbl; @@ -2110,7 +1828,7 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset, return (kt); } -void +static void pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) { struct pfr_ktable *p, *q; @@ -2121,7 +1839,7 @@ pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) } } -void +static void pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) { struct pfr_kentryworkq addrq; @@ -2131,7 +1849,6 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) pfr_clean_node_mask(kt, &addrq); pfr_destroy_kentries(&addrq); } -#if defined(__FreeBSD__) && (__FreeBSD_version >= 500100) if (kt->pfrkt_ip4 != NULL) { RADIX_NODE_HEAD_DESTROY(kt->pfrkt_ip4); free((caddr_t)kt->pfrkt_ip4, M_RTABLE); @@ -2140,26 +1857,16 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) RADIX_NODE_HEAD_DESTROY(kt->pfrkt_ip6); free((caddr_t)kt->pfrkt_ip6, M_RTABLE); } -#else - if (kt->pfrkt_ip4 != NULL) - free((caddr_t)kt->pfrkt_ip4, M_RTABLE); - if (kt->pfrkt_ip6 != NULL) - free((caddr_t)kt->pfrkt_ip6, M_RTABLE); -#endif if (kt->pfrkt_shadow != NULL) pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr); if (kt->pfrkt_rs != NULL) { kt->pfrkt_rs->tables--; pf_remove_if_empty_ruleset(kt->pfrkt_rs); } -#ifdef __FreeBSD__ - pool_put(&V_pfr_ktable_pl, kt); -#else - pool_put(&pfr_ktable_pl, kt); -#endif + free(kt, M_PFTABLE); } -int +static int pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q) { int d; @@ -2169,7 +1876,7 @@ pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q) return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor)); } -struct pfr_ktable * +static struct pfr_ktable * pfr_lookup_table(struct pfr_table *tbl) { /* struct pfr_ktable start like a struct pfr_table */ @@ -2183,6 +1890,8 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) struct pfr_kentry *ke = NULL; int match; + PF_RULES_RASSERT(); + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -2191,29 +1900,33 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) switch (af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ - V_pfr_sin.sin_addr.s_addr = a->addr32[0]; - ke = (struct pfr_kentry *)rn_match(&V_pfr_sin, kt->pfrkt_ip4); -#else - pfr_sin.sin_addr.s_addr = a->addr32[0]; - ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); -#endif + { + struct sockaddr_in sin; + + bzero(&sin, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&sin, kt->pfrkt_ip4); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; + } #endif /* INET */ #ifdef INET6 case AF_INET6: -#ifdef __FreeBSD__ - bcopy(a, &V_pfr_sin6.sin6_addr, sizeof(V_pfr_sin6.sin6_addr)); - ke = (struct pfr_kentry *)rn_match(&V_pfr_sin6, kt->pfrkt_ip6); -#else - bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); - ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); -#endif + { + struct sockaddr_in6 sin6; + + bzero(&sin6, sizeof(sin6)); + sin6.sin6_len = sizeof(sin6); + sin6.sin6_family = AF_INET6; + bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&sin6, kt->pfrkt_ip6); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; + } #endif /* INET6 */ } match = (ke && !ke->pfrke_not); @@ -2238,29 +1951,31 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, switch (af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ - V_pfr_sin.sin_addr.s_addr = a->addr32[0]; - ke = (struct pfr_kentry *)rn_match(&V_pfr_sin, kt->pfrkt_ip4); -#else - pfr_sin.sin_addr.s_addr = a->addr32[0]; - ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); -#endif + { + struct sockaddr_in sin; + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&sin, kt->pfrkt_ip4); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; + } #endif /* INET */ #ifdef INET6 case AF_INET6: -#ifdef __FreeBSD__ - bcopy(a, &V_pfr_sin6.sin6_addr, sizeof(V_pfr_sin6.sin6_addr)); - ke = (struct pfr_kentry *)rn_match(&V_pfr_sin6, kt->pfrkt_ip6); -#else - bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); - ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); -#endif + { + struct sockaddr_in6 sin6; + + sin6.sin6_len = sizeof(sin6); + sin6.sin6_family = AF_INET6; + bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&sin6, kt->pfrkt_ip6); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; + } #endif /* INET6 */ default: ; @@ -2275,12 +1990,8 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, if (ke != NULL && op_pass != PFR_OP_XPASS && (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) { if (ke->pfrke_counters == NULL) -#ifdef __FreeBSD__ - ke->pfrke_counters = pool_get(&V_pfr_kcounters_pl, -#else - ke->pfrke_counters = pool_get(&pfr_kcounters_pl, -#endif - PR_NOWAIT | PR_ZERO); + ke->pfrke_counters = uma_zalloc(V_pfr_kcounters_z, + M_NOWAIT | M_ZERO); if (ke->pfrke_counters != NULL) { ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++; ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len; @@ -2289,26 +2000,28 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, } struct pfr_ktable * -pfr_attach_table(struct pf_ruleset *rs, char *name, int intr) +pfr_attach_table(struct pf_ruleset *rs, char *name) { struct pfr_ktable *kt, *rt; struct pfr_table tbl; struct pf_anchor *ac = rs->anchor; + PF_RULES_WASSERT(); + bzero(&tbl, sizeof(tbl)); strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name)); if (ac != NULL) strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor)); kt = pfr_lookup_table(&tbl); if (kt == NULL) { - kt = pfr_create_ktable(&tbl, time_second, 1, intr); + kt = pfr_create_ktable(&tbl, time_second, 1); if (kt == NULL) return (NULL); if (ac != NULL) { bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor)); rt = pfr_lookup_table(&tbl); if (rt == NULL) { - rt = pfr_create_ktable(&tbl, 0, 1, intr); + rt = pfr_create_ktable(&tbl, 0, 1); if (rt == NULL) { pfr_destroy_ktable(kt, 0); return (NULL); @@ -2327,38 +2040,36 @@ pfr_attach_table(struct pf_ruleset *rs, char *name, int intr) void pfr_detach_table(struct pfr_ktable *kt) { - if (kt->pfrkt_refcnt[PFR_REFCNT_RULE] <= 0) - printf("pfr_detach_table: refcount = %d.\n", - kt->pfrkt_refcnt[PFR_REFCNT_RULE]); - else if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE]) + + PF_RULES_WASSERT(); + KASSERT(kt->pfrkt_refcnt[PFR_REFCNT_RULE] > 0, ("%s: refcount %d\n", + __func__, kt->pfrkt_refcnt[PFR_REFCNT_RULE])); + + if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE]) pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED); } int pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, - struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af) + sa_family_t af) { -#ifdef __FreeBSD__ + struct pf_addr *addr, *cur, *mask; + union sockaddr_union uaddr, umask; struct pfr_kentry *ke, *ke2 = NULL; - struct pf_addr *addr = NULL; -#else - struct pfr_kentry *ke, *ke2; - struct pf_addr *addr; -#endif - union sockaddr_union mask; int idx = -1, use_counter = 0; -#ifdef __FreeBSD__ - if (af == AF_INET) - addr = (struct pf_addr *)&V_pfr_sin.sin_addr; - else if (af == AF_INET6) - addr = (struct pf_addr *)&V_pfr_sin6.sin6_addr; -#else - if (af == AF_INET) - addr = (struct pf_addr *)&pfr_sin.sin_addr; - else if (af == AF_INET6) - addr = (struct pf_addr *)&pfr_sin6.sin6_addr; -#endif + switch (af) { + case AF_INET: + uaddr.sin.sin_len = sizeof(struct sockaddr_in); + uaddr.sin.sin_family = AF_INET; + break; + case AF_INET6: + uaddr.sin6.sin6_len = sizeof(struct sockaddr_in6); + uaddr.sin6.sin6_family = AF_INET6; + break; + } + addr = SUNION2PF(&uaddr, af); + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -2377,21 +2088,13 @@ _next_block: kt->pfrkt_nomatch++; return (1); } -#ifdef __FreeBSD__ - pfr_prepare_network(&V_pfr_mask, af, ke->pfrke_net); -#else - pfr_prepare_network(&pfr_mask, af, ke->pfrke_net); -#endif - *raddr = SUNION2PF(&ke->pfrke_sa, af); -#ifdef __FreeBSD__ - *rmask = SUNION2PF(&V_pfr_mask, af); -#else - *rmask = SUNION2PF(&pfr_mask, af); -#endif + pfr_prepare_network(&umask, af, ke->pfrke_net); + cur = SUNION2PF(&ke->pfrke_sa, af); + mask = SUNION2PF(&umask, af); if (use_counter) { /* is supplied address within block? */ - if (!PF_MATCHA(0, *raddr, *rmask, counter, af)) { + if (!PF_MATCHA(0, cur, mask, counter, af)) { /* no, go to next block in table */ idx++; use_counter = 0; @@ -2400,7 +2103,7 @@ _next_block: PF_ACPY(addr, counter, af); } else { /* use first address of block */ - PF_ACPY(addr, *raddr, af); + PF_ACPY(addr, cur, af); } if (!KENTRY_NETWORK(ke)) { @@ -2412,21 +2115,16 @@ _next_block: } for (;;) { /* we don't want to use a nested block */ -#ifdef __FreeBSD__ - if (af == AF_INET) - ke2 = (struct pfr_kentry *)rn_match(&V_pfr_sin, + switch (af) { + case AF_INET: + ke2 = (struct pfr_kentry *)rn_match(&uaddr, kt->pfrkt_ip4); - else if (af == AF_INET6) - ke2 = (struct pfr_kentry *)rn_match(&V_pfr_sin6, - kt->pfrkt_ip6); -#else - if (af == AF_INET) - ke2 = (struct pfr_kentry *)rn_match(&pfr_sin, - kt->pfrkt_ip4); - else if (af == AF_INET6) - ke2 = (struct pfr_kentry *)rn_match(&pfr_sin6, + break; + case AF_INET6: + ke2 = (struct pfr_kentry *)rn_match(&uaddr, kt->pfrkt_ip6); -#endif + break; + } /* no need to check KENTRY_RNF_ROOT() here */ if (ke2 == ke) { /* lookup return the same block - perfect */ @@ -2437,14 +2135,10 @@ _next_block: } /* we need to increase the counter past the nested block */ - pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net); -#ifdef __FreeBSD__ - PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &V_pfr_ffaddr, af); -#else - PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af); -#endif + pfr_prepare_network(&umask, AF_INET, ke2->pfrke_net); + PF_POOLMASK(addr, addr, SUNION2PF(&umask, af), &pfr_ffaddr, af); PF_AINC(addr, af); - if (!PF_MATCHA(0, *raddr, *rmask, addr, af)) { + if (!PF_MATCHA(0, cur, mask, addr, af)) { /* ok, we reached the end of our main block */ /* go to next block in table */ idx++; @@ -2454,7 +2148,7 @@ _next_block: } } -struct pfr_kentry * +static struct pfr_kentry * pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af) { struct pfr_walktree w; @@ -2466,20 +2160,12 @@ pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af) switch (af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#else - rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#endif return (w.pfrw_kentry); #endif /* INET */ #ifdef INET6 case AF_INET6: -#ifdef __FreeBSD__ kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#else - rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#endif return (w.pfrw_kentry); #endif /* INET6 */ default: @@ -2491,26 +2177,15 @@ void pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn) { struct pfr_walktree w; - int s; bzero(&w, sizeof(w)); w.pfrw_op = PFRW_DYNADDR_UPDATE; w.pfrw_dyn = dyn; - s = splsoftnet(); dyn->pfid_acnt4 = 0; dyn->pfid_acnt6 = 0; if (!dyn->pfid_af || dyn->pfid_af == AF_INET) -#ifdef __FreeBSD__ kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#else - rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#endif if (!dyn->pfid_af || dyn->pfid_af == AF_INET6) -#ifdef __FreeBSD__ kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#else - rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#endif - splx(s); } diff --git a/sys/contrib/pf/net/pfvar.h b/sys/contrib/pf/net/pfvar.h index dab70c5..ca4e449 100644 --- a/sys/contrib/pf/net/pfvar.h +++ b/sys/contrib/pf/net/pfvar.h @@ -34,37 +34,14 @@ #define _NET_PFVAR_H_ #include <sys/param.h> -#include <sys/types.h> #include <sys/queue.h> +#include <sys/refcount.h> #include <sys/tree.h> -#ifdef __FreeBSD__ -#include <sys/lock.h> -#include <sys/sx.h> -#else -#include <sys/rwlock.h> -#endif #include <net/radix.h> -#include <net/route.h> -#ifdef __FreeBSD__ -#include <net/if_clone.h> -#include <net/pf_mtag.h> -#include <vm/uma.h> -#else -#include <netinet/ip_ipsp.h> -#endif - -#ifdef __FreeBSD__ #include <netinet/in.h> -#endif - -#include <netinet/tcp_fsm.h> -struct ip; -struct ip6_hdr; -#ifdef __FreeBSD__ -struct inpcb; -#endif +#include <net/pf_mtag.h> #define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0) #define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1) @@ -126,12 +103,12 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, - PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; + PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; #define PF_POOL_IDMASK 0x0f enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, - PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED, + PF_ADDR_TABLE, PF_ADDR_URPFFAILED, PF_ADDR_RANGE }; #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 @@ -173,11 +150,6 @@ struct pf_addr_wrap { } a; char ifname[IFNAMSIZ]; char tblname[PF_TABLE_NAME_SIZE]; -#ifdef __FreeBSD__ -#define RTLABEL_LEN 32 -#endif - char rtlabelname[RTLABEL_LEN]; - u_int32_t rtlabel; } v; union { struct pfi_dynaddr *dyn; @@ -199,7 +171,6 @@ struct pfi_dynaddr { struct pf_addr pfid_mask6; struct pfr_ktable *pfid_kt; struct pfi_kif *pfid_kif; - void *pfid_hook_cookie; int pfid_net; /* mask or 128 */ int pfid_acnt4; /* address count IPv4 */ int pfid_acnt6; /* address count IPv6 */ @@ -210,10 +181,6 @@ struct pfi_dynaddr { /* * Address manipulation macros */ - -#ifdef __FreeBSD__ -#define splsoftnet() splnet() - #define HTONL(x) (x) = htonl((__uint32_t)(x)) #define HTONS(x) (x) = htons((__uint16_t)(x)) #define NTOHL(x) (x) = ntohl((__uint32_t)(x)) @@ -221,46 +188,44 @@ struct pfi_dynaddr { #define PF_NAME "pf" -#define PR_NOWAIT M_NOWAIT -#define PR_WAITOK M_WAITOK -#define PR_ZERO M_ZERO -#define pool_get(p, f) uma_zalloc(*(p), (f)) -#define pool_put(p, o) uma_zfree(*(p), (o)) - -#define UMA_CREATE(var, type, desc) \ - var = uma_zcreate(desc, sizeof(type), \ - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); \ - if (var == NULL) \ - break -#define UMA_DESTROY(var) \ - if (var) \ - uma_zdestroy(var) - -#ifdef __FreeBSD__ -extern struct mtx pf_task_mtx; - -#define PF_LOCK_ASSERT() mtx_assert(&pf_task_mtx, MA_OWNED) -#define PF_UNLOCK_ASSERT() mtx_assert(&pf_task_mtx, MA_NOTOWNED) -#define PF_LOCK() mtx_lock(&pf_task_mtx) -#define PF_UNLOCK() mtx_unlock(&pf_task_mtx) -#else -#define PF_LOCK_ASSERT() -#define PF_UNLOCK_ASSERT() -#define PF_LOCK() -#define PF_UNLOCK() -#endif /* __FreeBSD__ */ - -#define PF_COPYIN(uaddr, kaddr, len, r) do { \ - PF_UNLOCK(); \ - r = copyin((uaddr), (kaddr), (len)); \ - PF_LOCK(); \ -} while(0) +#define PF_HASHROW_ASSERT(h) mtx_assert(&(h)->lock, MA_OWNED) +#define PF_HASHROW_LOCK(h) mtx_lock(&(h)->lock) +#define PF_HASHROW_UNLOCK(h) mtx_unlock(&(h)->lock) -#define PF_COPYOUT(kaddr, uaddr, len, r) do { \ - PF_UNLOCK(); \ - r = copyout((kaddr), (uaddr), (len)); \ - PF_LOCK(); \ -} while(0) +#define PF_STATE_LOCK(s) \ + do { \ + struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \ + PF_HASHROW_LOCK(_ih); \ + } while (0) + +#define PF_STATE_UNLOCK(s) \ + do { \ + struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH((s))]; \ + PF_HASHROW_UNLOCK(_ih); \ + } while (0) + +#ifdef INVARIANTS +#define PF_STATE_LOCK_ASSERT(s) \ + do { \ + struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \ + PF_HASHROW_ASSERT(_ih); \ + } while (0) +#else /* !INVARIANTS */ +#define PF_STATE_LOCK_ASSERT(s) do {} while (0) +#endif /* INVARIANTS */ + +extern struct mtx pf_unlnkdrules_mtx; +#define PF_UNLNKDRULES_LOCK() mtx_lock(&pf_unlnkdrules_mtx) +#define PF_UNLNKDRULES_UNLOCK() mtx_unlock(&pf_unlnkdrules_mtx) + +extern struct rwlock pf_rules_lock; +#define PF_RULES_RLOCK() rw_rlock(&pf_rules_lock) +#define PF_RULES_RUNLOCK() rw_runlock(&pf_rules_lock) +#define PF_RULES_WLOCK() rw_wlock(&pf_rules_lock) +#define PF_RULES_WUNLOCK() rw_wunlock(&pf_rules_lock) +#define PF_RULES_ASSERT() rw_assert(&pf_rules_lock, RA_LOCKED) +#define PF_RULES_RASSERT() rw_assert(&pf_rules_lock, RA_RLOCKED) +#define PF_RULES_WASSERT() rw_assert(&pf_rules_lock, RA_WLOCKED) #define PF_MODVER 1 #define PFLOG_MODVER 1 @@ -272,7 +237,7 @@ extern struct mtx pf_task_mtx; #define PFSYNC_MINVER 1 #define PFSYNC_PREFVER PFSYNC_MODVER #define PFSYNC_MAXVER 1 -#endif /* __FreeBSD__ */ + #ifdef INET #ifndef INET6 #define PF_INET_ONLY @@ -412,8 +377,6 @@ extern struct mtx pf_task_mtx; pf_routable((x), (af), NULL, (rtid))) || \ (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ pf_routable((x), (af), (ifp), (rtid))) || \ - ((aw)->type == PF_ADDR_RTLABEL && \ - !pf_rtlabel_match((x), (af), (aw), (rtid))) || \ ((aw)->type == PF_ADDR_TABLE && \ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ ((aw)->type == PF_ADDR_DYNIFTL && \ @@ -473,7 +436,6 @@ struct pf_pool { struct pf_addr counter; int tblidx; u_int16_t proxy_port[2]; - u_int8_t port_op; u_int8_t opts; }; @@ -705,6 +667,7 @@ struct pf_rule { #define PFRULE_NOSYNC 0x0010 #define PFRULE_SRCTRACK 0x0020 /* track source states */ #define PFRULE_RULESRCTRACK 0x0040 /* per rule */ +#define PFRULE_REFS 0x0080 /* rule has references */ /* scrub flags */ #define PFRULE_NODF 0x0100 @@ -717,7 +680,6 @@ struct pf_rule { /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ #define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */ -#define PFRULE_PFLOW 0x00040000 #define PFSTATE_HIWAT 10000 /* default state table size */ #define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ @@ -734,7 +696,7 @@ struct pf_threshold { }; struct pf_src_node { - RB_ENTRY(pf_src_node) entry; + LIST_ENTRY(pf_src_node) entry; struct pf_addr addr; struct pf_addr raddr; union pf_rule_ptr rule; @@ -787,9 +749,7 @@ struct pf_state_peer { u_int8_t pad[1]; }; -TAILQ_HEAD(pf_state_queue, pf_state); - -/* keep synced with struct pf_state_key, used in RB_FIND */ +/* Keep synced with struct pf_state_key. */ struct pf_state_key_cmp { struct pf_addr addr[2]; u_int16_t port[2]; @@ -798,13 +758,6 @@ struct pf_state_key_cmp { u_int8_t pad[2]; }; -struct pf_state_item { - TAILQ_ENTRY(pf_state_item) entry; - struct pf_state *s; -}; - -TAILQ_HEAD(pf_statelisthead, pf_state_item); - struct pf_state_key { struct pf_addr addr[2]; u_int16_t port[2]; @@ -812,13 +765,11 @@ struct pf_state_key { u_int8_t proto; u_int8_t pad[2]; - RB_ENTRY(pf_state_key) entry; - struct pf_statelisthead states; - struct pf_state_key *reverse; - struct inpcb *inp; + LIST_ENTRY(pf_state_key) entry; + TAILQ_HEAD(, pf_state) states[2]; }; -/* keep synced with struct pf_state, used in RB_FIND */ +/* Keep synced with struct pf_state. */ struct pf_state_cmp { u_int64_t id; u_int32_t creatorid; @@ -830,17 +781,12 @@ struct pf_state { u_int64_t id; u_int32_t creatorid; u_int8_t direction; -#ifdef __FreeBSD__ - u_int8_t pad[2]; - u_int8_t local_flags; -#define PFSTATE_EXPIRING 0x01 -#else u_int8_t pad[3]; -#endif + u_int refs; TAILQ_ENTRY(pf_state) sync_list; - TAILQ_ENTRY(pf_state) entry_list; - RB_ENTRY(pf_state) entry_id; + TAILQ_ENTRY(pf_state) key_list[2]; + LIST_ENTRY(pf_state) entry; struct pf_state_peer src; struct pf_state_peer dst; union pf_rule_ptr rule; @@ -862,7 +808,7 @@ struct pf_state { u_int8_t state_flags; #define PFSTATE_ALLOWOPTS 0x01 #define PFSTATE_SLOPPY 0x02 -#define PFSTATE_PFLOW 0x04 +/* was PFSTATE_PFLOW 0x04 */ #define PFSTATE_NOSYNC 0x08 #define PFSTATE_ACK 0x10 u_int8_t timeout; @@ -903,7 +849,7 @@ struct pfsync_state_key { }; struct pfsync_state { - u_int32_t id[2]; + u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; struct pfsync_state_peer src; @@ -920,11 +866,7 @@ struct pfsync_state { sa_family_t af; u_int8_t proto; u_int8_t direction; -#ifdef __FreeBSD__ - u_int8_t local_flags; -#define PFSTATE_EXPIRING 0x01 - u_int8_t pad; -#endif + u_int8_t __spare[2]; u_int8_t log; u_int8_t state_flags; u_int8_t timeout; @@ -932,7 +874,6 @@ struct pfsync_state { u_int8_t updates; } __packed; -#ifdef __FreeBSD__ #ifdef _KERNEL /* pfsync */ typedef int pfsync_state_import_t(struct pfsync_state *, u_int8_t); @@ -940,50 +881,28 @@ typedef void pfsync_insert_state_t(struct pf_state *); typedef void pfsync_update_state_t(struct pf_state *); typedef void pfsync_delete_state_t(struct pf_state *); typedef void pfsync_clear_states_t(u_int32_t, const char *); -typedef int pfsync_state_in_use_t(struct pf_state *); typedef int pfsync_defer_t(struct pf_state *, struct mbuf *); -typedef int pfsync_up_t(void); extern pfsync_state_import_t *pfsync_state_import_ptr; extern pfsync_insert_state_t *pfsync_insert_state_ptr; extern pfsync_update_state_t *pfsync_update_state_ptr; extern pfsync_delete_state_t *pfsync_delete_state_ptr; extern pfsync_clear_states_t *pfsync_clear_states_ptr; -extern pfsync_state_in_use_t *pfsync_state_in_use_ptr; extern pfsync_defer_t *pfsync_defer_ptr; -extern pfsync_up_t *pfsync_up_ptr; void pfsync_state_export(struct pfsync_state *, struct pf_state *); -/* pflow */ -typedef int export_pflow_t(struct pf_state *); - -extern export_pflow_t *export_pflow_ptr; - /* pflog */ struct pf_ruleset; struct pf_pdesc; typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, - struct pf_ruleset *, struct pf_pdesc *); - + struct pf_ruleset *, struct pf_pdesc *, int); extern pflog_packet_t *pflog_packet_ptr; -/* pf uid hack */ -VNET_DECLARE(int, debug_pfugidhack); -#define V_debug_pfugidhack VNET(debug_pfugidhack) - #define V_pf_end_threads VNET(pf_end_threads) -#endif - -/* Macros to set/clear/test flags. */ -#ifdef _KERNEL -#define SET(t, f) ((t) |= (f)) -#define CLR(t, f) ((t) &= ~(f)) -#define ISSET(t, f) ((t) & (f)) -#endif -#endif +#endif /* _KERNEL */ #define PFSYNC_FLAG_SRCNODE 0x04 #define PFSYNC_FLAG_NATSRCNODE 0x08 @@ -1085,9 +1004,20 @@ RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); #define PFR_TFLAG_REFDANCHOR 0x00000020 #define PFR_TFLAG_COUNTERS 0x00000040 /* Adjust masks below when adding flags. */ -#define PFR_TFLAG_USRMASK 0x00000043 -#define PFR_TFLAG_SETMASK 0x0000003C -#define PFR_TFLAG_ALLMASK 0x0000007F +#define PFR_TFLAG_USRMASK (PFR_TFLAG_PERSIST | \ + PFR_TFLAG_CONST | \ + PFR_TFLAG_COUNTERS) +#define PFR_TFLAG_SETMASK (PFR_TFLAG_ACTIVE | \ + PFR_TFLAG_INACTIVE | \ + PFR_TFLAG_REFERENCED | \ + PFR_TFLAG_REFDANCHOR) +#define PFR_TFLAG_ALLMASK (PFR_TFLAG_PERSIST | \ + PFR_TFLAG_CONST | \ + PFR_TFLAG_ACTIVE | \ + PFR_TFLAG_INACTIVE | \ + PFR_TFLAG_REFERENCED | \ + PFR_TFLAG_REFDANCHOR | \ + PFR_TFLAG_COUNTERS) struct pfr_table { char pfrt_anchor[MAXPATHLEN]; @@ -1158,22 +1088,13 @@ struct pfr_kentry { struct radix_node pfrke_node[2]; union sockaddr_union pfrke_sa; SLIST_ENTRY(pfr_kentry) pfrke_workq; - union { - - struct pfr_kcounters *pfrke_counters; -#if 0 - struct pfr_kroute *pfrke_route; -#endif - } u; + struct pfr_kcounters *pfrke_counters; long pfrke_tzero; u_int8_t pfrke_af; u_int8_t pfrke_net; u_int8_t pfrke_not; u_int8_t pfrke_mark; }; -#define pfrke_counters u.pfrke_counters -#define pfrke_route u.pfrke_route - SLIST_HEAD(pfr_ktableworkq, pfr_ktable); RB_HEAD(pfr_ktablehead, pfr_ktable); @@ -1202,25 +1123,6 @@ struct pfr_ktable { #define pfrkt_nomatch pfrkt_ts.pfrts_nomatch #define pfrkt_tzero pfrkt_ts.pfrts_tzero -RB_HEAD(pf_state_tree, pf_state_key); -RB_PROTOTYPE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); - -RB_HEAD(pf_state_tree_ext_gwy, pf_state_key); -RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state_key, - entry_ext_gwy, pf_state_compare_ext_gwy); - -RB_HEAD(pfi_ifhead, pfi_kif); - -/* state tables */ -#ifdef __FreeBSD__ -#ifdef _KERNEL -VNET_DECLARE(struct pf_state_tree, pf_statetbl); -#define V_pf_statetbl VNET(pf_statetbl) -#endif -#else -extern struct pf_state_tree pf_statetbl; -#endif - /* keep synced with pfi_kif, used in RB_FIND */ struct pfi_kif_cmp { char pfik_name[IFNAMSIZ]; @@ -1228,25 +1130,23 @@ struct pfi_kif_cmp { struct pfi_kif { char pfik_name[IFNAMSIZ]; - RB_ENTRY(pfi_kif) pfik_tree; + union { + RB_ENTRY(pfi_kif) _pfik_tree; + LIST_ENTRY(pfi_kif) _pfik_list; + } _pfik_glue; +#define pfik_tree _pfik_glue._pfik_tree +#define pfik_list _pfik_glue._pfik_list u_int64_t pfik_packets[2][2][2]; u_int64_t pfik_bytes[2][2][2]; u_int32_t pfik_tzero; - int pfik_flags; - void *pfik_ah_cookie; + u_int pfik_flags; struct ifnet *pfik_ifp; struct ifg_group *pfik_group; - int pfik_states; - int pfik_rules; + u_int pfik_rulerefs; TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; }; -enum pfi_kif_refs { - PFI_KIF_REF_NONE, - PFI_KIF_REF_STATE, - PFI_KIF_REF_RULE -}; - +#define PFI_IFLAG_REFS 0x0001 /* has state references */ #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ struct pf_pdesc { @@ -1254,7 +1154,6 @@ struct pf_pdesc { int done; uid_t uid; gid_t gid; - pid_t pid; } lookup; u_int64_t tot_len; /* Make Mickey money */ union { @@ -1268,15 +1167,11 @@ struct pf_pdesc { } hdr; struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ - struct ether_header - *eh; struct pf_addr *src; /* src address */ struct pf_addr *dst; /* dst address */ u_int16_t *sport; u_int16_t *dport; -#ifdef __FreeBSD__ struct pf_mtag *pf_mtag; -#endif u_int32_t p_len; /* total length of payload */ @@ -1400,7 +1295,6 @@ struct pf_pdesc { *(a) = (x); \ } while (0) -#ifdef __FreeBSD__ #define REASON_SET(a, x) \ do { \ if ((a) != NULL) \ @@ -1408,15 +1302,6 @@ struct pf_pdesc { if (x < PFRES_MAX) \ V_pf_status.counters[x]++; \ } while (0) -#else -#define REASON_SET(a, x) \ - do { \ - if ((a) != NULL) \ - *(a) = (x); \ - if (x < PFRES_MAX) \ - pf_status.counters[x]++; \ - } while (0) -#endif struct pf_status { u_int64_t counters[PFRES_MAX]; @@ -1425,7 +1310,6 @@ struct pf_status { u_int64_t scounters[SCNT_MAX]; u_int64_t pcounters[2][2][3]; u_int64_t bcounters[2][2]; - u_int64_t stateid; u_int32_t running; u_int32_t states; u_int32_t src_nodes; @@ -1485,10 +1369,9 @@ struct pf_altq { u_int32_t parent_qid; /* parent queue id */ u_int32_t bandwidth; /* queue bandwidth */ u_int8_t priority; /* priority */ -#ifdef __FreeBSD__ u_int8_t local_flags; /* dynamic interface */ #define PFALTQ_FLAG_IF_REMOVED 0x01 -#endif + u_int16_t qlimit; /* queue size limit */ u_int16_t flags; /* misc flags */ union { @@ -1500,13 +1383,6 @@ struct pf_altq { u_int32_t qid; /* return value */ }; -struct pf_tagname { - TAILQ_ENTRY(pf_tagname) entries; - char name[PF_TAG_NAME_SIZE]; - u_int16_t tag; - int ref; -}; - struct pf_divert { union { struct in_addr ipv4; @@ -1516,13 +1392,7 @@ struct pf_divert { }; #define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ -#define PFFRAG_FRAG_HIWAT 1000 /* Number of fragmented packets */ -#define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ -#define PFFRAG_FRCACHE_HIWAT 10000 /* Number of fragment descriptors */ - -#define PFR_KTABLE_HIWAT 1000 /* Number of tables */ #define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ -#define PFR_KENTRY_HIWAT_SMALL 100000 /* Number of table entries (tiny hosts) */ /* * ioctl parameter structures @@ -1653,7 +1523,7 @@ struct pfioc_trans { } *array; }; -#define PFR_FLAG_ATOMIC 0x00000001 +#define PFR_FLAG_ATOMIC 0x00000001 /* unused */ #define PFR_FLAG_DUMMY 0x00000002 #define PFR_FLAG_FEEDBACK 0x00000004 #define PFR_FLAG_CLSTATS 0x00000008 @@ -1765,56 +1635,54 @@ struct pfioc_iface { #define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) #define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) #define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) -#ifdef __FreeBSD__ struct pf_ifspeed { char ifname[IFNAMSIZ]; u_int32_t baudrate; }; #define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed) -#endif #ifdef _KERNEL -RB_HEAD(pf_src_tree, pf_src_node); -RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare); -#ifdef __FreeBSD__ -VNET_DECLARE(struct pf_src_tree, tree_src_tracking); -#define V_tree_src_tracking VNET(tree_src_tracking) -#else -extern struct pf_src_tree tree_src_tracking; -#endif +struct pf_srchash { + LIST_HEAD(, pf_src_node) nodes; + struct mtx lock; +}; -RB_HEAD(pf_state_tree_id, pf_state); -RB_PROTOTYPE(pf_state_tree_id, pf_state, - entry_id, pf_state_compare_id); -#ifdef __FreeBSD__ -VNET_DECLARE(struct pf_state_tree_id, tree_id); -#define V_tree_id VNET(tree_id) -VNET_DECLARE(struct pf_state_queue, state_list); -#define V_state_list VNET(state_list) -#else -extern struct pf_state_tree_id tree_id; -extern struct pf_state_queue state_list; -#endif +struct pf_keyhash { + LIST_HEAD(, pf_state_key) keys; + struct mtx lock; +}; + +struct pf_idhash { + LIST_HEAD(, pf_state) states; + struct mtx lock; +}; + +#define PF_HASHSIZ (32768) +VNET_DECLARE(struct pf_keyhash *, pf_keyhash); +VNET_DECLARE(struct pf_idhash *, pf_idhash); +VNET_DECLARE(u_long, pf_hashmask); +#define V_pf_keyhash VNET(pf_keyhash) +#define V_pf_idhash VNET(pf_idhash) +#define V_pf_hashmask VNET(pf_hashmask) +VNET_DECLARE(struct pf_srchash *, pf_srchash); +VNET_DECLARE(u_long, pf_srchashmask); +#define V_pf_srchash VNET(pf_srchash) +#define V_pf_srchashmask VNET(pf_srchashmask) + +#define PF_IDHASH(s) (be64toh((s)->id) % (V_pf_hashmask + 1)) + +VNET_DECLARE(void *, pf_swi_cookie); +#define V_pf_swi_cookie VNET(pf_swi_cookie) + +VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]); +#define V_pf_stateid VNET(pf_stateid) -TAILQ_HEAD(pf_poolqueue, pf_pool); -#ifdef __FreeBSD__ -VNET_DECLARE(struct pf_poolqueue, pf_pools[2]); -#define V_pf_pools VNET(pf_pools) -#else -extern struct pf_poolqueue pf_pools[2]; -#endif TAILQ_HEAD(pf_altqqueue, pf_altq); -#ifdef __FreeBSD__ VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]); #define V_pf_altqs VNET(pf_altqs) VNET_DECLARE(struct pf_palist, pf_pabuf); #define V_pf_pabuf VNET(pf_pabuf) -#else -extern struct pf_altqqueue pf_altqs[2]; -extern struct pf_palist pf_pabuf; -#endif -#ifdef __FreeBSD__ VNET_DECLARE(u_int32_t, ticket_altqs_active); #define V_ticket_altqs_active VNET(ticket_altqs_active) VNET_DECLARE(u_int32_t, ticket_altqs_inactive); @@ -1827,147 +1695,120 @@ VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active); #define V_pf_altqs_active VNET(pf_altqs_active) VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive); #define V_pf_altqs_inactive VNET(pf_altqs_inactive) -VNET_DECLARE(struct pf_poolqueue *, pf_pools_active); -#define V_pf_pools_active VNET(pf_pools_active) -VNET_DECLARE(struct pf_poolqueue *, pf_pools_inactive); -#define V_pf_pools_inactive VNET(pf_pools_inactive) -#else -extern u_int32_t ticket_altqs_active; -extern u_int32_t ticket_altqs_inactive; -extern int altqs_inactive_open; -extern u_int32_t ticket_pabuf; -extern struct pf_altqqueue *pf_altqs_active; -extern struct pf_altqqueue *pf_altqs_inactive; -extern struct pf_poolqueue *pf_pools_active; -extern struct pf_poolqueue *pf_pools_inactive; -#endif -extern int pf_tbladdr_setup(struct pf_ruleset *, - struct pf_addr_wrap *); -extern void pf_tbladdr_remove(struct pf_addr_wrap *); -extern void pf_tbladdr_copyout(struct pf_addr_wrap *); + +VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules); +#define V_pf_unlinked_rules VNET(pf_unlinked_rules) + +void pf_initialize(void); +void pf_cleanup(void); + +struct pf_mtag *pf_get_mtag(struct mbuf *); + extern void pf_calc_skip_steps(struct pf_rulequeue *); -#ifdef __FreeBSD__ #ifdef ALTQ extern void pf_altq_ifnet_event(struct ifnet *, int); #endif -VNET_DECLARE(uma_zone_t, pf_src_tree_pl); -#define V_pf_src_tree_pl VNET(pf_src_tree_pl) -VNET_DECLARE(uma_zone_t, pf_rule_pl); -#define V_pf_rule_pl VNET(pf_rule_pl) -VNET_DECLARE(uma_zone_t, pf_state_pl); -#define V_pf_state_pl VNET(pf_state_pl) -VNET_DECLARE(uma_zone_t, pf_state_key_pl); -#define V_pf_state_key_pl VNET(pf_state_key_pl) -VNET_DECLARE(uma_zone_t, pf_state_item_pl); -#define V_pf_state_item_pl VNET(pf_state_item_pl) -VNET_DECLARE(uma_zone_t, pf_altq_pl); -#define V_pf_altq_pl VNET(pf_altq_pl) -VNET_DECLARE(uma_zone_t, pf_pooladdr_pl); -#define V_pf_pooladdr_pl VNET(pf_pooladdr_pl) -VNET_DECLARE(uma_zone_t, pfr_ktable_pl); -#define V_pfr_ktable_pl VNET(pfr_ktable_pl) -VNET_DECLARE(uma_zone_t, pfr_kentry_pl); -#define V_pfr_kentry_pl VNET(pfr_kentry_pl) -VNET_DECLARE(uma_zone_t, pfr_kcounters_pl); -#define V_pfr_kcounters_pl VNET(pfr_kcounters_pl) -VNET_DECLARE(uma_zone_t, pf_cache_pl); -#define V_pf_cache_pl VNET(pf_cache_pl) -VNET_DECLARE(uma_zone_t, pf_cent_pl); -#define V_pf_cent_pl VNET(pf_cent_pl) -VNET_DECLARE(uma_zone_t, pf_state_scrub_pl); -#define V_pf_state_scrub_pl VNET(pf_state_scrub_pl) -VNET_DECLARE(uma_zone_t, pfi_addr_pl); -#define V_pfi_addr_pl VNET(pfi_addr_pl) -#else -extern struct pool pf_src_tree_pl, pf_rule_pl; -extern struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl, - pf_altq_pl, pf_pooladdr_pl; -extern struct pool pf_state_scrub_pl; -#endif +VNET_DECLARE(uma_zone_t, pf_state_z); +#define V_pf_state_z VNET(pf_state_z) +VNET_DECLARE(uma_zone_t, pf_state_key_z); +#define V_pf_state_key_z VNET(pf_state_key_z) +VNET_DECLARE(uma_zone_t, pf_state_scrub_z); +#define V_pf_state_scrub_z VNET(pf_state_scrub_z) + extern void pf_purge_thread(void *); -#ifdef __FreeBSD__ -extern int pf_purge_expired_src_nodes(int); -extern int pf_purge_expired_states(u_int32_t , int); -#else -extern void pf_purge_expired_src_nodes(int); -extern void pf_purge_expired_states(u_int32_t); -#endif -extern void pf_unlink_state(struct pf_state *); -extern void pf_free_state(struct pf_state *); +extern void pf_intr(void *); +extern void pf_purge_expired_src_nodes(void); + +extern int pf_unlink_state(struct pf_state *, u_int); +#define PF_ENTER_LOCKED 0x00000001 +#define PF_RETURN_LOCKED 0x00000002 extern int pf_state_insert(struct pfi_kif *, struct pf_state_key *, struct pf_state_key *, struct pf_state *); -extern int pf_insert_src_node(struct pf_src_node **, - struct pf_rule *, struct pf_addr *, - sa_family_t); -void pf_src_tree_remove_state(struct pf_state *); -extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *); +extern void pf_free_state(struct pf_state *); + +static __inline u_int +pf_hashsrc(struct pf_addr *addr, sa_family_t af) +{ + u_int h; + +#define ADDR_HASH(a) ((a) ^ ((a) >> 16)) + + switch (af) { + case AF_INET: + h = ADDR_HASH(addr->v4.s_addr); + break; + case AF_INET6: + h = ADDR_HASH(addr->v6.__u6_addr.__u6_addr32[3]); + default: + panic("%s: unknown address family %u", __func__, af); + } +#undef ADDR_HASH + + return (h & V_pf_srchashmask); +} + +static __inline void +pf_ref_state(struct pf_state *s) +{ + + refcount_acquire(&s->refs); +} + +static __inline int +pf_release_state(struct pf_state *s) +{ + + if (refcount_release(&s->refs)) { + pf_free_state(s); + return (1); + } else + return (0); +} + +extern struct pf_state *pf_find_state_byid(uint64_t, uint32_t); extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *, u_int, int *); +struct pf_src_node *pf_find_src_node(struct pf_addr *, struct pf_rule *, + sa_family_t, int); extern void pf_print_state(struct pf_state *); extern void pf_print_flags(u_int8_t); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); -#ifdef __FreeBSD__ VNET_DECLARE(struct ifnet *, sync_ifp); #define V_sync_ifp VNET(sync_ifp); VNET_DECLARE(struct pf_rule, pf_default_rule); #define V_pf_default_rule VNET(pf_default_rule) -#else -extern struct ifnet *sync_ifp; -extern struct pf_rule pf_default_rule; -#endif extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, u_int8_t); -void pf_rm_rule(struct pf_rulequeue *, - struct pf_rule *); -#ifndef __FreeBSD__ -struct pf_divert *pf_find_divert(struct mbuf *); -#endif +void pf_free_rule(struct pf_rule *); #ifdef INET -#ifdef __FreeBSD__ -int pf_test(int, struct ifnet *, struct mbuf **, struct ether_header *, - struct inpcb *); -#else -int pf_test(int, struct ifnet *, struct mbuf **, struct ether_header *); -#endif +int pf_test(int, struct ifnet *, struct mbuf **, struct inpcb *); #endif /* INET */ #ifdef INET6 -#ifdef __FreeBSD__ -int pf_test6(int, struct ifnet *, struct mbuf **, struct ether_header *, - struct inpcb *); -#else -int pf_test6(int, struct ifnet *, struct mbuf **, struct ether_header *); -#endif +int pf_test6(int, struct ifnet *, struct mbuf **, struct inpcb *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, u_int8_t); void pf_addr_inc(struct pf_addr *, sa_family_t); #endif /* INET6 */ -#ifdef __FreeBSD__ u_int32_t pf_new_isn(struct pf_state *); -#endif void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); -int pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, - u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, - struct pf_pdesc *); void pf_send_deferred_syn(struct pf_state *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match_addr_range(struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); -int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); -int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t); -int pf_match_gid(u_int8_t, gid_t, gid_t, gid_t); void pf_normalize_init(void); +void pf_normalize_cleanup(void); int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *, struct pf_pdesc *); int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *, @@ -1985,25 +1826,17 @@ u_int32_t void pf_purge_expired_fragments(void); int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *, int); -int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *, - int); -#ifdef __FreeBSD__ -int pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *); -#else -int pf_socket_lookup(int, struct pf_pdesc *); -#endif +int pf_socket_lookup(int, struct pf_pdesc *, struct mbuf *); struct pf_state_key *pf_alloc_state_key(int); -void pf_pkt_addr_changed(struct mbuf *); -int pf_state_key_attach(struct pf_state_key *, struct pf_state *, int); void pfr_initialize(void); +void pfr_cleanup(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, u_int64_t, int, int, int); -int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, - struct pf_addr **, struct pf_addr **, sa_family_t); +int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t); void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); struct pfr_ktable * - pfr_attach_table(struct pf_ruleset *, char *, int); + pfr_attach_table(struct pf_ruleset *, char *); void pfr_detach_table(struct pfr_ktable *); int pfr_clr_tables(struct pfr_table *, int *, int); int pfr_add_tables(struct pfr_table *, int, int *, int); @@ -2032,120 +1865,44 @@ int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); -#ifdef __FreeBSD__ +MALLOC_DECLARE(PFI_MTYPE); VNET_DECLARE(struct pfi_kif *, pfi_all); #define V_pfi_all VNET(pfi_all) -#else -extern struct pfi_kif *pfi_all; -#endif void pfi_initialize(void); -#ifdef __FreeBSD__ void pfi_cleanup(void); -#endif -struct pfi_kif *pfi_kif_get(const char *); -void pfi_kif_ref(struct pfi_kif *, enum pfi_kif_refs); -void pfi_kif_unref(struct pfi_kif *, enum pfi_kif_refs); +void pfi_kif_ref(struct pfi_kif *); +void pfi_kif_unref(struct pfi_kif *); +struct pfi_kif *pfi_kif_find(const char *); +struct pfi_kif *pfi_kif_attach(struct pfi_kif *, const char *); int pfi_kif_match(struct pfi_kif *, struct pfi_kif *); -void pfi_attach_ifnet(struct ifnet *); -void pfi_detach_ifnet(struct ifnet *); -void pfi_attach_ifgroup(struct ifg_group *); -void pfi_detach_ifgroup(struct ifg_group *); -void pfi_group_change(const char *); +void pfi_kif_purge(void); int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, sa_family_t); int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); -void pfi_dynaddr_remove(struct pf_addr_wrap *); +void pfi_dynaddr_remove(struct pfi_dynaddr *); void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_update_status(const char *, struct pf_status *); -int pfi_get_ifaces(const char *, struct pfi_kif *, int *); +void pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_set_flags(const char *, int); int pfi_clear_flags(const char *, int); -#ifdef __FreeBSD__ -int pf_match_tag(struct mbuf *, struct pf_rule *, int *, - struct pf_mtag *); -#else -int pf_match_tag(struct mbuf *, struct pf_rule *, int *); -#endif -u_int16_t pf_tagname2tag(char *); -void pf_tag2tagname(u_int16_t, char *); -void pf_tag_ref(u_int16_t); -void pf_tag_unref(u_int16_t); -#ifdef __FreeBSD__ -int pf_tag_packet(struct mbuf *, int, int, struct pf_mtag *); -#else -int pf_tag_packet(struct mbuf *, int, int); -#endif -u_int32_t pf_qname2qid(char *); +int pf_match_tag(struct mbuf *, struct pf_rule *, int *, int); +int pf_tag_packet(struct mbuf *, struct pf_pdesc *, int); void pf_qid2qname(u_int32_t, char *); -void pf_qid_unref(u_int32_t); -#ifdef __FreeBSD__ VNET_DECLARE(struct pf_status, pf_status); #define V_pf_status VNET(pf_status) -#else -extern struct pf_status pf_status; -#endif -#ifdef __FreeBSD__ -VNET_DECLARE(uma_zone_t, pf_frent_pl); -#define V_pf_frent_pl VNET(pf_frent_pl) -VNET_DECLARE(uma_zone_t, pf_frag_pl); -#define V_pf_frag_pl VNET(pf_frag_pl) -VNET_DECLARE(struct sx, pf_consistency_lock); -#define V_pf_consistency_lock VNET(pf_consistency_lock) -#else -extern struct pool pf_frent_pl, pf_frag_pl; -extern struct rwlock pf_consistency_lock; -#endif - -struct pf_pool_limit { - void *pp; - unsigned limit; -}; -#ifdef __FreeBSD__ -VNET_DECLARE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]); -#define V_pf_pool_limits VNET(pf_pool_limits) -#else -extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; -#endif - -#ifdef __FreeBSD__ -struct pf_frent { - LIST_ENTRY(pf_frent) fr_next; - struct ip *fr_ip; - struct mbuf *fr_m; -}; - -struct pf_frcache { - LIST_ENTRY(pf_frcache) fr_next; - uint16_t fr_off; - uint16_t fr_end; -}; - -struct pf_fragment { - RB_ENTRY(pf_fragment) fr_entry; - TAILQ_ENTRY(pf_fragment) frag_next; - struct in_addr fr_src; - struct in_addr fr_dst; - u_int8_t fr_p; /* protocol of this fragment */ - u_int8_t fr_flags; /* status flags */ - u_int16_t fr_id; /* fragment id for reassemble */ - u_int16_t fr_max; /* fragment data max */ - u_int32_t fr_timeout; -#define fr_queue fr_u.fru_queue -#define fr_cache fr_u.fru_cache - union { - LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ - LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ - } fr_u; +struct pf_limit { + uma_zone_t zone; + u_int limit; }; -#endif /* (__FreeBSD__) */ +VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); +#define V_pf_limits VNET(pf_limits) #endif /* _KERNEL */ -#ifdef __FreeBSD__ #ifdef _KERNEL VNET_DECLARE(struct pf_anchor_global, pf_anchors); #define V_pf_anchors VNET(pf_anchors) @@ -2153,11 +1910,6 @@ VNET_DECLARE(struct pf_anchor, pf_main_anchor); #define V_pf_main_anchor VNET(pf_main_anchor) #define pf_main_ruleset V_pf_main_anchor.ruleset #endif -#else -extern struct pf_anchor_global pf_anchors; -extern struct pf_anchor pf_main_anchor; -#define pf_main_ruleset pf_main_anchor.ruleset -#endif /* these ruleset functions can be linked into userland programs (pfctl) */ int pf_get_ruleset_number(u_int8_t); @@ -2168,20 +1920,10 @@ int pf_anchor_copyout(const struct pf_ruleset *, const struct pf_rule *, struct pfioc_rule *); void pf_anchor_remove(struct pf_rule *); void pf_remove_if_empty_ruleset(struct pf_ruleset *); -struct pf_anchor *pf_find_anchor(const char *); struct pf_ruleset *pf_find_ruleset(const char *); struct pf_ruleset *pf_find_or_create_ruleset(const char *); void pf_rs_initialize(void); -#ifndef __FreeBSD__ -#ifdef _KERNEL -int pf_anchor_copyout(const struct pf_ruleset *, - const struct pf_rule *, struct pfioc_rule *); -void pf_anchor_remove(struct pf_rule *); - -#endif /* _KERNEL */ -#endif - /* The fingerprint functions can be linked into userland programs (tcpdump) */ int pf_osfp_add(struct pf_osfp_ioctl *); #ifdef _KERNEL @@ -2189,20 +1931,9 @@ struct pf_osfp_enlist * pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int, const struct tcphdr *); #endif /* _KERNEL */ -struct pf_osfp_enlist * - pf_osfp_fingerprint_hdr(const struct ip *, const struct ip6_hdr *, - const struct tcphdr *); void pf_osfp_flush(void); int pf_osfp_get(struct pf_osfp_ioctl *); -#ifdef __FreeBSD__ -int pf_osfp_initialize(void); -void pf_osfp_cleanup(void); -#else -void pf_osfp_initialize(void); -#endif int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); -struct pf_os_fingerprint * - pf_osfp_validate(void); #ifdef _KERNEL void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); @@ -2219,16 +1950,12 @@ int pf_map_addr(u_int8_t, struct pf_rule *, struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, int, int, struct pfi_kif *, struct pf_src_node **, struct pf_state_key **, struct pf_state_key **, - struct pf_state_key **, struct pf_state_key **, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t); -int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *, - struct pf_state_key **, struct pf_state_key **, - struct pf_state_key **, struct pf_state_key **, - struct pf_addr *, struct pf_addr *, - u_int16_t, u_int16_t); +struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *, + struct pf_addr *, u_int16_t, u_int16_t); +struct pf_state_key *pf_state_key_clone(struct pf_state_key *); #endif /* _KERNEL */ - #endif /* _NET_PFVAR_H_ */ diff --git a/sys/modules/pf/Makefile b/sys/modules/pf/Makefile index d4b01da..2f941e1 100644 --- a/sys/modules/pf/Makefile +++ b/sys/modules/pf/Makefile @@ -7,8 +7,8 @@ KMOD= pf SRCS= pf.c pf_if.c pf_lb.c pf_osfp.c pf_ioctl.c pf_norm.c pf_table.c \ - pf_ruleset.c \ - in4_cksum.c \ + pf_ruleset.c in4_cksum.c \ + bus_if.h device_if.h \ opt_pf.h opt_inet.h opt_inet6.h opt_bpf.h opt_global.h CFLAGS+= -I${.CURDIR}/../../contrib/pf @@ -33,7 +33,6 @@ opt_pf.h: echo "#define DEV_PF 1" > ${.TARGET} echo "#define DEV_PFLOG 1" >> ${.TARGET} echo "#define DEV_PFSYNC 1" >> ${.TARGET} - echo "#define DEV_PFLOW 1" >> ${.TARGET} .if defined(VIMAGE) opt_global.h: diff --git a/sys/net/if.c b/sys/net/if.c index bf332db..6a2812e 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1084,6 +1084,7 @@ if_addgroup(struct ifnet *ifp, const char *groupname) struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; + int new = 0; if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && groupname[strlen(groupname) - 1] <= '9') @@ -1124,8 +1125,8 @@ if_addgroup(struct ifnet *ifp, const char *groupname) strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 0; TAILQ_INIT(&ifg->ifg_members); - EVENTHANDLER_INVOKE(group_attach_event, ifg); TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); + new = 1; } ifg->ifg_refcnt++; @@ -1139,6 +1140,8 @@ if_addgroup(struct ifnet *ifp, const char *groupname) IFNET_WUNLOCK(); + if (new) + EVENTHANDLER_INVOKE(group_attach_event, ifg); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); @@ -1177,10 +1180,11 @@ if_delgroup(struct ifnet *ifp, const char *groupname) if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); + IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); - } - IFNET_WUNLOCK(); + } else + IFNET_WUNLOCK(); free(ifgl, M_TEMP); @@ -1221,11 +1225,12 @@ if_delgroups(struct ifnet *ifp) if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); + IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); - } - IFNET_WUNLOCK(); + } else + IFNET_WUNLOCK(); free(ifgl, M_TEMP); diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c index 22f35df..6c60390 100644 --- a/sys/netinet/in_gif.c +++ b/sys/netinet/in_gif.c @@ -256,8 +256,6 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) #endif } - m_addr_changed(m); - error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL); if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) && diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index 3260197..6451cb6 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -675,8 +675,6 @@ icmp_reflect(struct mbuf *m) goto done; /* Ip_output() will check for broadcast */ } - m_addr_changed(m); - t = ip->ip_dst; ip->ip_dst = ip->ip_src; diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c index 18a9c5a..0dfab1f 100644 --- a/sys/netinet/ipfw/ip_fw2.c +++ b/sys/netinet/ipfw/ip_fw2.c @@ -1698,20 +1698,30 @@ do { \ case O_ALTQ: { struct pf_mtag *at; + struct m_tag *mtag; ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; + /* + * ALTQ uses mbuf tags from another + * packet filtering system - pf(4). + * We allocate a tag in its format + * and fill it in, pretending to be pf(4). + */ match = 1; at = pf_find_mtag(m); if (at != NULL && at->qid != 0) break; - at = pf_get_mtag(m); - if (at == NULL) { + mtag = m_tag_get(PACKET_TAG_PF, + sizeof(struct pf_mtag), M_NOWAIT | M_ZERO); + if (mtag == NULL) { /* * Let the packet fall back to the * default ALTQ. */ break; } + m_tag_prepend(m, mtag); + at = (struct pf_mtag *)(mtag + 1); at->qid = altq->qid; at->hdr = ip; break; diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index d2b841a..e48b666 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -100,9 +100,6 @@ void (*ip_divert_ptr)(struct mbuf *, int); int (*ng_ipfw_input_p)(struct mbuf **, int, struct ip_fw_args *, int); -/* Hook for telling pf that the destination address changed */ -void (*m_addr_chg_pf_p)(struct mbuf *m); - #ifdef INET /* * Hooks for multicast routing. They all default to NULL, so leave them not diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 9d35e0a..4cc2e69 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -544,7 +544,6 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, m_freem(m->m_next); m->m_next = NULL; m->m_data = (caddr_t)ipgen; - m_addr_changed(m); /* m_len is set later */ tlen = 0; #define xchg(a,b,type) { type t; t=a; a=b; b=t; } diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index a1e19d9..202dc05 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -1177,8 +1177,6 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code) ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; - m_addr_changed(m); - if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; @@ -2298,8 +2296,6 @@ icmp6_reflect(struct mbuf *m, size_t off) m->m_flags &= ~(M_BCAST|M_MCAST); - m_addr_changed(m); - ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) icmp6_ifoutstat_inc(outif, type, code); diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c index c329e11..573287c 100644 --- a/sys/netinet6/in6_gif.c +++ b/sys/netinet6/in6_gif.c @@ -264,8 +264,6 @@ in6_gif_output(struct ifnet *ifp, #endif } - m_addr_changed(m); - #ifdef IPV6_MINMTU /* * force fragmentation to minimum MTU, to avoid path MTU discovery. diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index 8b53bf4..a004aef 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -473,8 +473,6 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, key_sa_recordxfer(sav, m); /* record data transfer */ - m_addr_changed(m); - #ifdef DEV_ENC encif->if_ipackets++; encif->if_ibytes += m->m_pkthdr.len; diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 1a1b646..b03e4b6 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -190,8 +190,6 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) } key_sa_recordxfer(sav, m); /* record data transfer */ - m_addr_changed(m); - /* * We're done with IPsec processing, transmit the packet using the * appropriate network protocol (IP or IPv6). SPD lookup will be diff --git a/sys/netipsec/xform_ipip.c b/sys/netipsec/xform_ipip.c index 0d5fdb4..c6f6379 100644 --- a/sys/netipsec/xform_ipip.c +++ b/sys/netipsec/xform_ipip.c @@ -392,8 +392,6 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp) panic("%s: bogus ip version %u", __func__, v>>4); } - m_addr_changed(m); - if (netisr_queue(isr, m)) { /* (0) on success. */ V_ipipstat.ipips_qfull++; DPRINTF(("%s: packet dropped because of full queue\n", diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 38adfaf..a26d5ca 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -749,16 +749,6 @@ m_last(struct mbuf *m) return (m); } -extern void (*m_addr_chg_pf_p)(struct mbuf *m); - -static __inline void -m_addr_changed(struct mbuf *m) -{ - - if (m_addr_chg_pf_p) - m_addr_chg_pf_p(m); -} - /* * mbuf, cluster, and external object allocation macros (for compatibility * purposes). @@ -998,7 +988,7 @@ struct mbuf *m_unshare(struct mbuf *, int how); #define PACKET_TAG_DIVERT 17 /* divert info */ #define PACKET_TAG_IPFORWARD 18 /* ipforward info */ #define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ -#define PACKET_TAG_PF 21 /* PF + ALTQ information */ +#define PACKET_TAG_PF (21 | MTAG_PERSISTENT) /* PF/ALTQ information */ #define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ #define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ #define PACKET_TAG_CARP 28 /* CARP info */ diff --git a/sys/sys/param.h b/sys/sys/param.h index a3bf897..a079980 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1000017 /* Master, propagated to newvers */ +#define __FreeBSD_version 1000018 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/usr.bin/netstat/if.c b/usr.bin/netstat/if.c index c08fbf0..854844f 100644 --- a/usr.bin/netstat/if.c +++ b/usr.bin/netstat/if.c @@ -81,6 +81,32 @@ static void catchalarm(int); static char addr_buf[NI_MAXHOST]; /* for getnameinfo() */ #endif +static const char* pfsyncacts[] = { + /* PFSYNC_ACT_CLR */ "clear all request", + /* PFSYNC_ACT_INS */ "state insert", + /* PFSYNC_ACT_INS_ACK */ "state inserted ack", + /* PFSYNC_ACT_UPD */ "state update", + /* PFSYNC_ACT_UPD_C */ "compressed state update", + /* PFSYNC_ACT_UPD_REQ */ "uncompressed state request", + /* PFSYNC_ACT_DEL */ "state delete", + /* PFSYNC_ACT_DEL_C */ "compressed state delete", + /* PFSYNC_ACT_INS_F */ "fragment insert", + /* PFSYNC_ACT_DEL_F */ "fragment delete", + /* PFSYNC_ACT_BUS */ "bulk update mark", + /* PFSYNC_ACT_TDB */ "TDB replay counter update", + /* PFSYNC_ACT_EOF */ "end of frame mark", +}; + +static void +pfsync_acts_stats(const char *fmt, uint64_t *a) +{ + int i; + + for (i = 0; i < PFSYNC_ACT_MAX; i++, a++) + if (*a || sflag <= 1) + printf(fmt, *a, pfsyncacts[i], plural(*a)); +} + /* * Dump pfsync statistics structure. */ @@ -106,11 +132,11 @@ pfsync_stats(u_long off, const char *name, int af1 __unused, int proto __unused) #define p(f, m) if (pfsyncstat.f || sflag <= 1) \ printf(m, (uintmax_t)pfsyncstat.f, plural(pfsyncstat.f)) -#define p2(f, m) if (pfsyncstat.f || sflag <= 1) \ - printf(m, (uintmax_t)pfsyncstat.f) p(pfsyncs_ipackets, "\t%ju packet%s received (IPv4)\n"); p(pfsyncs_ipackets6, "\t%ju packet%s received (IPv6)\n"); + pfsync_acts_stats("\t %ju %s%s received\n", + &pfsyncstat.pfsyncs_iacts[0]); p(pfsyncs_badif, "\t\t%ju packet%s discarded for bad interface\n"); p(pfsyncs_badttl, "\t\t%ju packet%s discarded for bad ttl\n"); p(pfsyncs_hdrops, "\t\t%ju packet%s shorter than header\n"); @@ -123,10 +149,11 @@ pfsync_stats(u_long off, const char *name, int af1 __unused, int proto __unused) p(pfsyncs_badstate, "\t\t%ju failed state lookup/insert%s\n"); p(pfsyncs_opackets, "\t%ju packet%s sent (IPv4)\n"); p(pfsyncs_opackets6, "\t%ju packet%s sent (IPv6)\n"); - p2(pfsyncs_onomem, "\t\t%ju send failed due to mbuf memory error\n"); - p2(pfsyncs_oerrors, "\t\t%ju send error\n"); + pfsync_acts_stats("\t %ju %s%s sent\n", + &pfsyncstat.pfsyncs_oacts[0]); + p(pfsyncs_onomem, "\t\t%ju failure%s due to mbuf memory error\n"); + p(pfsyncs_oerrors, "\t\t%ju send error%s\n"); #undef p -#undef p2 } /* diff --git a/usr.sbin/bsnmpd/modules/snmp_pf/BEGEMOT-PF-MIB.txt b/usr.sbin/bsnmpd/modules/snmp_pf/BEGEMOT-PF-MIB.txt index 0b90bb2..d2b247f 100644 --- a/usr.sbin/bsnmpd/modules/snmp_pf/BEGEMOT-PF-MIB.txt +++ b/usr.sbin/bsnmpd/modules/snmp_pf/BEGEMOT-PF-MIB.txt @@ -585,7 +585,7 @@ PfInterfacesIfEntry ::= SEQUENCE { pfInterfacesIfDescr OCTET STRING, pfInterfacesIfType INTEGER, pfInterfacesIfTZero TimeTicks, - pfInterfacesIfRefsState Unsigned32, + pfInterfacesIfRefsState Null, pfInterfacesIfRefsRule Unsigned32, pfInterfacesIf4BytesInPass Counter64, pfInterfacesIf4BytesInBlock Counter64, diff --git a/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c b/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c index bc4bc35..1048ffe 100644 --- a/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c +++ b/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c @@ -586,11 +586,8 @@ pf_iftable(struct snmp_context __unused *ctx, struct snmp_value *val, val->v.uint32 = (time(NULL) - e->pfi.pfik_tzero) * 100; break; - case LEAF_pfInterfacesIfRefsState: - val->v.uint32 = e->pfi.pfik_states; - break; case LEAF_pfInterfacesIfRefsRule: - val->v.uint32 = e->pfi.pfik_rules; + val->v.uint32 = e->pfi.pfik_rulerefs; break; case LEAF_pfInterfacesIf4BytesInPass: val->v.counter64 = diff --git a/usr.sbin/bsnmpd/modules/snmp_pf/pf_tree.def b/usr.sbin/bsnmpd/modules/snmp_pf/pf_tree.def index 7b791b3..1dfa14c 100644 --- a/usr.sbin/bsnmpd/modules/snmp_pf/pf_tree.def +++ b/usr.sbin/bsnmpd/modules/snmp_pf/pf_tree.def @@ -108,7 +108,7 @@ (2 pfInterfacesIfDescr OCTETSTRING GET) (3 pfInterfacesIfType ENUM ( 0 group 1 instance 2 detached ) GET) (4 pfInterfacesIfTZero TIMETICKS GET) - (5 pfInterfacesIfRefsState UNSIGNED32 GET) + (5 pfInterfacesIfRefsState NULL GET) (6 pfInterfacesIfRefsRule UNSIGNED32 GET) (7 pfInterfacesIf4BytesInPass COUNTER64 GET) (8 pfInterfacesIf4BytesInBlock COUNTER64 GET) |