diff options
45 files changed, 4591 insertions, 9640 deletions
@@ -24,6 +24,10 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 10.x IS SLOW: disable the most expensive debugging functionality run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20120908: + The pf(4) packet filter ABI has been changed. pfctl(8) and + snmp_pf module need to be recompiled to work with new kernel. + 20120828: A new ZFS feature flag "com.delphix:empty_bpobj" has been merged to -HEAD. Pools that have empty_bpobj in active state can not be diff --git a/contrib/pf/man/pf.4 b/contrib/pf/man/pf.4 index 936a5a8..635078d 100644 --- a/contrib/pf/man/pf.4 +++ b/contrib/pf/man/pf.4 @@ -28,7 +28,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 17 2011 +.Dd June 29 2012 .Dt PF 4 .Os .Sh NAME @@ -75,6 +75,25 @@ separated by characters, similar to how file system hierarchies are laid out. The final component of the anchor path is the anchor under which operations will be performed. +.Sh SYSCTL VARIABLES AND LOADER TUNABLES +The following +.Xr loader 8 +tunables are available. +.Bl -tag -width indent +.It Va net.pf.states_hashsize +Size of hash tables that store states. +Should be power of 2. +Default value is 32768. +.It Va net.pf.source_nodes_hashsize +Size of hash table that store source nodes. +Should be power of 2. +Default value is 8192. +.El +.Pp +Read only +.Xr sysctl 8 +variables with matching names are provided to obtain current values +at runtime. .Sh IOCTL INTERFACE .Nm supports the following @@ -351,7 +370,6 @@ struct pf_status { u_int64_t scounters[SCNT_MAX]; u_int64_t pcounters[2][2][3]; u_int64_t bcounters[2][2]; - u_int64_t stateid; u_int32_t running; u_int32_t states; u_int32_t src_nodes; @@ -493,7 +511,7 @@ struct pfioc_limit { }; enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, - PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; + PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; .Ed .It Dv DIOCGETLIMIT Fa "struct pfioc_limit *pl" Get the hard diff --git a/contrib/pf/man/pf.conf.5 b/contrib/pf/man/pf.conf.5 index dfec264..fc86111 100644 --- a/contrib/pf/man/pf.conf.5 +++ b/contrib/pf/man/pf.conf.5 @@ -28,7 +28,7 @@ .\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd January 31 2009 +.Dd June 29 2012 .Dt PF.CONF 5 .Os .Sh NAME @@ -1421,7 +1421,7 @@ has the socket open where the packet is sourced from or destined to (depending on which socket is local). This is in addition to the normal information logged. .Pp -Due to the problems described in the BUGS section only the first packet +Only the first packet logged via .Ar log (all, user) will have the user credentials logged when using stateful matching. @@ -1479,13 +1479,6 @@ of the following keywords: .Bl -tag -width xxxxxxxxxxxxxx -compact .It Ar any Any address. -.It Ar route Aq Ar label -Any address whose associated route has label -.Aq Ar label . -See -.Xr route 4 -and -.Xr route 8 . .It Ar no-route Any address which is not currently routable. .It Ar urpf-failed @@ -1594,7 +1587,6 @@ pass in proto tcp from any to any port 25 pass in proto tcp from 10.0.0.0/8 port \*(Gt 1024 \e to ! 10.1.2.3 port != ssh pass in proto tcp from any os "OpenBSD" -pass in proto tcp from route "DTAG" .Ed .It Ar all This is equivalent to "from any to any". @@ -2949,9 +2941,9 @@ proto-list = ( proto-name | proto-number ) [ [ "," ] proto-list ] hosts = "all" | "from" ( "any" | "no-route" | "urpf-failed" | "self" | host | - "{" host-list "}" | "route" string ) [ port ] [ os ] + "{" host-list "}" ) [ port ] [ os ] "to" ( "any" | "no-route" | "self" | host | - "{" host-list "}" | "route" string ) [ port ] + "{" host-list "}" ) [ port ] ipspec = "any" | host | "{" host-list "}" host = [ "!" ] ( address [ "/" mask-bits ] | "\*(Lt" string "\*(Gt" ) @@ -3048,28 +3040,6 @@ Protocol name database. .It Pa /etc/services Service name database. .El -.Sh BUGS -Due to a lock order reversal (LOR) with the socket layer, the use of the -.Ar group -and -.Ar user -filter parameter in conjuction with a Giant-free netstack -can result in a deadlock. -A workaround is available under the -.Va debug.pfugidhack -sysctl which is automatically enabled when a -.Ar user -/ -.Ar group -rule is added or -.Ar log (user) -is specified. -.Pp -Route labels are not supported by the -.Fx -.Xr route 4 -system. -Rules with a route label do not match any traffic. .Sh SEE ALSO .Xr altq 4 , .Xr carp 4 , @@ -3080,7 +3050,6 @@ Rules with a route label do not match any traffic. .Xr pf 4 , .Xr pflow 4 , .Xr pfsync 4 , -.Xr route 4 , .Xr tcp 4 , .Xr udp 4 , .Xr hosts 5 , @@ -3090,7 +3059,6 @@ Rules with a route label do not match any traffic. .Xr ftp-proxy 8 , .Xr pfctl 8 , .Xr pflogd 8 , -.Xr route 8 .Sh HISTORY The .Nm diff --git a/contrib/pf/pfctl/parse.y b/contrib/pf/pfctl/parse.y index f798cac..99c26c0 100644 --- a/contrib/pf/pfctl/parse.y +++ b/contrib/pf/pfctl/parse.y @@ -159,8 +159,7 @@ enum { PF_STATE_OPT_MAX, PF_STATE_OPT_NOSYNC, PF_STATE_OPT_SRCTRACK, PF_STATE_OPT_MAX_SRC_STATES, PF_STATE_OPT_MAX_SRC_CONN, PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES, PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK, - PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY, - PF_STATE_OPT_PFLOW }; + PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY, }; enum { PF_SRCTRACK_NONE, PF_SRCTRACK, PF_SRCTRACK_GLOBAL, PF_SRCTRACK_RULE }; @@ -451,7 +450,7 @@ int parseport(char *, struct range *r, int); %token QUEUE PRIORITY QLIMIT RTABLE %token LOAD RULESET_OPTIMIZATION %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE -%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW +%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY %token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS %token DIVERTTO DIVERTREPLY %token <v.string> STRING @@ -2081,15 +2080,6 @@ pfrule : action dir logquick interface route af proto fromto } r.rule_flag |= PFRULE_STATESLOPPY; break; - case PF_STATE_OPT_PFLOW: - if (r.rule_flag & PFRULE_PFLOW) { - yyerror("state pflow " - "option: multiple " - "definitions"); - YYERROR; - } - r.rule_flag |= PFRULE_PFLOW; - break; case PF_STATE_OPT_TIMEOUT: if (o->data.timeout.number == PFTM_ADAPTIVE_START || @@ -2909,26 +2899,6 @@ host : STRING { $$->next = NULL; $$->tail = $$; } - | ROUTE STRING { - $$ = calloc(1, sizeof(struct node_host)); - if ($$ == NULL) { - free($2); - err(1, "host: calloc"); - } - $$->addr.type = PF_ADDR_RTLABEL; - if (strlcpy($$->addr.v.rtlabelname, $2, - sizeof($$->addr.v.rtlabelname)) >= - sizeof($$->addr.v.rtlabelname)) { - yyerror("route label too long, max %u chars", - sizeof($$->addr.v.rtlabelname) - 1); - free($2); - free($$); - YYERROR; - } - $$->next = NULL; - $$->tail = $$; - free($2); - } ; number : NUMBER @@ -3597,14 +3567,6 @@ state_opt_item : MAXIMUM NUMBER { $$->next = NULL; $$->tail = $$; } - | PFLOW { - $$ = calloc(1, sizeof(struct node_state_opt)); - if ($$ == NULL) - err(1, "state_opt_item: calloc"); - $$->type = PF_STATE_OPT_PFLOW; - $$->next = NULL; - $$->tail = $$; - } | STRING NUMBER { int i; @@ -5320,7 +5282,6 @@ lookup(char *s) { "out", OUT}, { "overload", OVERLOAD}, { "pass", PASS}, - { "pflow", PFLOW}, { "port", PORT}, { "priority", PRIORITY}, { "priq", PRIQ}, diff --git a/contrib/pf/pfctl/pf_print_state.c b/contrib/pf/pfctl/pf_print_state.c index 0698516..d6637b4 100644 --- a/contrib/pf/pfctl/pf_print_state.c +++ b/contrib/pf/pfctl/pf_print_state.c @@ -119,9 +119,6 @@ print_addr(struct pf_addr_wrap *addr, sa_family_t af, int verbose) case PF_ADDR_URPFFAILED: printf("urpf-failed"); return; - case PF_ADDR_RTLABEL: - printf("route \"%s\"", addr->v.rtlabelname); - return; default: printf("?"); return; @@ -339,8 +336,6 @@ print_state(struct pfsync_state *s, int opts) printf(", rule %u", ntohl(s->rule)); if (s->state_flags & PFSTATE_SLOPPY) printf(", sloppy"); - if (s->state_flags & PFSTATE_PFLOW) - printf(", pflow"); if (s->sync_flags & PFSYNC_FLAG_SRCNODE) printf(", source-track"); if (s->sync_flags & PFSYNC_FLAG_NATSRCNODE) diff --git a/contrib/pf/pfctl/pfctl.c b/contrib/pf/pfctl/pfctl.c index 8b07a2b..90a2bb5 100644 --- a/contrib/pf/pfctl/pfctl.c +++ b/contrib/pf/pfctl/pfctl.c @@ -144,7 +144,6 @@ static const struct { { "states", PF_LIMIT_STATES }, { "src-nodes", PF_LIMIT_SRC_NODES }, { "frags", PF_LIMIT_FRAGS }, - { "tables", PF_LIMIT_TABLES }, { "table-entries", PF_LIMIT_TABLE_ENTRIES }, { NULL, 0 } }; @@ -1553,9 +1552,6 @@ pfctl_fopen(const char *name, const char *mode) void pfctl_init_options(struct pfctl *pf) { - int64_t mem; - int mib[2]; - size_t size; pf->timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; pf->timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; @@ -1581,21 +1577,8 @@ pfctl_init_options(struct pfctl *pf) pf->limit[PF_LIMIT_STATES] = PFSTATE_HIWAT; pf->limit[PF_LIMIT_FRAGS] = PFFRAG_FRENT_HIWAT; pf->limit[PF_LIMIT_SRC_NODES] = PFSNODE_HIWAT; - pf->limit[PF_LIMIT_TABLES] = PFR_KTABLE_HIWAT; pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT; - mib[0] = CTL_HW; -#ifdef __FreeBSD__ - mib[1] = HW_PHYSMEM; -#else - mib[1] = HW_PHYSMEM64; -#endif - size = sizeof(mem); - if (sysctl(mib, 2, &mem, &size, NULL, 0) == -1) - err(1, "sysctl"); - if (mem <= 100*1024*1024) - pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT_SMALL; - pf->debug = PF_DEBUG_URGENT; } diff --git a/contrib/pf/pfctl/pfctl_parser.c b/contrib/pf/pfctl/pfctl_parser.c index d45b9b7..f248995 100644 --- a/contrib/pf/pfctl/pfctl_parser.c +++ b/contrib/pf/pfctl/pfctl_parser.c @@ -955,12 +955,6 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric) printf("sloppy"); opts = 0; } - if (r->rule_flag & PFRULE_PFLOW) { - if (!opts) - printf(", "); - printf("pflow"); - opts = 0; - } for (i = 0; i < PFTM_MAX; ++i) if (r->timeout[i]) { int j; diff --git a/contrib/pf/pfctl/pfctl_table.c b/contrib/pf/pfctl/pfctl_table.c index 257c014..f3a1efd 100644 --- a/contrib/pf/pfctl/pfctl_table.c +++ b/contrib/pf/pfctl/pfctl_table.c @@ -621,8 +621,7 @@ print_iface(struct pfi_kif *p, int opts) if (!(opts & PF_OPT_VERBOSE2)) return; printf("\tCleared: %s", ctime(&tzero)); - printf("\tReferences: [ States: %-18d Rules: %-18d ]\n", - p->pfik_states, p->pfik_rules); + printf("\tReferences: %-18d\n", p->pfik_rulerefs); for (i = 0; i < 8; i++) { af = (i>>2) & 1; dir = (i>>1) &1; diff --git a/sys/contrib/altq/altq/altq_cbq.c b/sys/contrib/altq/altq/altq_cbq.c index da12cf8..0a33792 100644 --- a/sys/contrib/altq/altq/altq_cbq.c +++ b/sys/contrib/altq/altq/altq_cbq.c @@ -271,10 +271,9 @@ cbq_add_altq(struct pf_altq *a) return (ENODEV); /* allocate and initialize cbq_state_t */ - cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK); + cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cbqp == NULL) return (ENOMEM); - bzero(cbqp, sizeof(cbq_state_t)); CALLOUT_INIT(&cbqp->cbq_callout); cbqp->cbq_qlen = 0; cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ diff --git a/sys/contrib/altq/altq/altq_hfsc.c b/sys/contrib/altq/altq/altq_hfsc.c index bf4f39c..9c91c43 100644 --- a/sys/contrib/altq/altq/altq_hfsc.c +++ b/sys/contrib/altq/altq/altq_hfsc.c @@ -200,10 +200,9 @@ hfsc_add_altq(struct pf_altq *a) if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); - hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK); + hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (hif == NULL) return (ENOMEM); - bzero(hif, sizeof(struct hfsc_if)); hif->hif_eligible = ellist_alloc(); if (hif->hif_eligible == NULL) { diff --git a/sys/contrib/altq/altq/altq_priq.c b/sys/contrib/altq/altq/altq_priq.c index 770d4bf..0ec6534 100644 --- a/sys/contrib/altq/altq/altq_priq.c +++ b/sys/contrib/altq/altq/altq_priq.c @@ -132,11 +132,9 @@ priq_add_altq(struct pf_altq *a) if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); - pif = malloc(sizeof(struct priq_if), - M_DEVBUF, M_WAITOK); + pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (pif == NULL) return (ENOMEM); - bzero(pif, sizeof(struct priq_if)); pif->pif_bandwidth = a->ifbandwidth; pif->pif_maxpri = -1; pif->pif_ifq = &ifp->if_snd; diff --git a/sys/contrib/altq/altq/altq_subr.c b/sys/contrib/altq/altq/altq_subr.c index f5fe990..2d7ce75 100644 --- a/sys/contrib/altq/altq/altq_subr.c +++ b/sys/contrib/altq/altq/altq_subr.c @@ -401,14 +401,11 @@ tbr_set(ifq, profile) return (0); } - IFQ_UNLOCK(ifq); - tbr = malloc(sizeof(struct tb_regulator), - M_DEVBUF, M_WAITOK); - if (tbr == NULL) { /* can not happen */ + tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); + if (tbr == NULL) { IFQ_UNLOCK(ifq); return (ENOMEM); } - bzero(tbr, sizeof(struct tb_regulator)); tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; tbr->tbr_depth = TBR_SCALE(profile->depth); @@ -420,7 +417,6 @@ tbr_set(ifq, profile) tbr->tbr_last = read_machclk(); tbr->tbr_lastop = ALTDQ_REMOVE; - IFQ_LOCK(ifq); otbr = ifq->altq_tbr; ifq->altq_tbr = tbr; /* set the new tbr */ diff --git a/sys/contrib/pf/net/if_pflog.c b/sys/contrib/pf/net/if_pflog.c index 349930b..20feea2 100644 --- a/sys/contrib/pf/net/if_pflog.c +++ b/sys/contrib/pf/net/if_pflog.c @@ -1,10 +1,10 @@ /* $OpenBSD: if_pflog.c,v 1.26 2007/10/18 21:58:18 mpf Exp $ */ /* * The authors of this code are John Ioannidis (ji@tla.org), - * Angelos D. Keromytis (kermit@csd.uch.gr) and + * Angelos D. Keromytis (kermit@csd.uch.gr) and * Niels Provos (provos@physnet.uni-hamburg.de). * - * This code was written by John Ioannidis for BSD/OS in Athens, Greece, + * This code was written by John Ioannidis for BSD/OS in Athens, Greece, * in November 1995. * * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, @@ -20,7 +20,7 @@ * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all copies of any software which is or includes a copy or - * modification of this software. + * modification of this software. * You may use this code under the GNU public license if you so wish. Please * contribute changes back to the authors under this freer than GPL license * so that we may further the use of strong encryption without limitations to @@ -33,61 +33,34 @@ * PURPOSE. */ -#ifdef __FreeBSD__ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include "opt_inet.h" #include "opt_inet6.h" #include "opt_bpf.h" #include "opt_pf.h" -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#ifdef DEV_BPF -#define NBPFILTER DEV_BPF -#else -#define NBPFILTER 0 -#endif - -#ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG -#else -#define NPFLOG 0 -#endif - -#else /* ! __FreeBSD__ */ -#include "bpfilter.h" -#include "pflog.h" -#endif /* __FreeBSD__ */ - #include <sys/param.h> -#include <sys/systm.h> +#include <sys/kernel.h> #include <sys/mbuf.h> +#include <sys/module.h> #include <sys/proc.h> #include <sys/socket.h> -#ifdef __FreeBSD__ -#include <sys/kernel.h> -#include <sys/limits.h> -#include <sys/malloc.h> -#include <sys/module.h> #include <sys/sockio.h> -#else -#include <sys/ioctl.h> -#endif +#include <net/bpf.h> #include <net/if.h> -#ifdef __FreeBSD__ #include <net/if_clone.h> -#endif +#include <net/if_pflog.h> #include <net/if_types.h> -#include <net/route.h> -#include <net/bpf.h> +#include <net/pfvar.h> #if defined(INET) || defined(INET6) #include <netinet/in.h> #endif #ifdef INET #include <netinet/in_var.h> -#include <netinet/in_systm.h> #include <netinet/ip.h> #endif @@ -96,14 +69,9 @@ __FBSDID("$FreeBSD$"); #include <netinet6/nd6.h> #endif /* INET6 */ -#include <net/pfvar.h> -#include <net/if_pflog.h> - -#ifdef __FreeBSD__ #ifdef INET #include <machine/in_cksum.h> #endif /* INET */ -#endif /* __FreeBSD__ */ #define PFLOGMTU (32768 + MHLEN + MLEN) @@ -113,170 +81,82 @@ __FBSDID("$FreeBSD$"); #define DPRINTF(x) #endif -void pflogattach(int); -int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, -#ifdef __FreeBSD__ - struct route *); -#else - struct rtentry *); -#endif -int pflogioctl(struct ifnet *, u_long, caddr_t); -void pflogstart(struct ifnet *); -#ifdef __FreeBSD__ -static int pflog_clone_create(struct if_clone *, int, caddr_t); -static void pflog_clone_destroy(struct ifnet *); -#else -int pflog_clone_create(struct if_clone *, int); -int pflog_clone_destroy(struct ifnet *); -#endif +static int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct route *); +static void pflogattach(int); +static int pflogioctl(struct ifnet *, u_long, caddr_t); +static void pflogstart(struct ifnet *); +static int pflog_clone_create(struct if_clone *, int, caddr_t); +static void pflog_clone_destroy(struct ifnet *); -LIST_HEAD(, pflog_softc) pflogif_list; -#ifdef __FreeBSD__ IFC_SIMPLE_DECLARE(pflog, 1); -#else -struct if_clone pflog_cloner = - IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy); -#endif struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */ -void +static void pflogattach(int npflog) { int i; - LIST_INIT(&pflogif_list); for (i = 0; i < PFLOGIFS_MAX; i++) pflogifs[i] = NULL; if_clone_attach(&pflog_cloner); } -#ifdef __FreeBSD__ static int pflog_clone_create(struct if_clone *ifc, int unit, caddr_t param) -#else -int -pflog_clone_create(struct if_clone *ifc, int unit) -#endif { struct ifnet *ifp; - struct pflog_softc *pflogif; - int s; if (unit >= PFLOGIFS_MAX) return (EINVAL); - if ((pflogif = malloc(sizeof(*pflogif), - M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) - return (ENOMEM); - - pflogif->sc_unit = unit; -#ifdef __FreeBSD__ - ifp = pflogif->sc_ifp = if_alloc(IFT_PFLOG); + ifp = if_alloc(IFT_PFLOG); if (ifp == NULL) { - free(pflogif, M_DEVBUF); return (ENOSPC); } if_initname(ifp, ifc->ifc_name, unit); -#else - ifp = &pflogif->sc_if; - snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", unit); -#endif - ifp->if_softc = pflogif; ifp->if_mtu = PFLOGMTU; ifp->if_ioctl = pflogioctl; ifp->if_output = pflogoutput; ifp->if_start = pflogstart; -#ifndef __FreeBSD__ - ifp->if_type = IFT_PFLOG; -#endif ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = PFLOG_HDRLEN; if_attach(ifp); -#ifndef __FreeBSD__ - if_alloc_sadl(ifp); -#endif -#if NBPFILTER > 0 -#ifdef __FreeBSD__ bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN); -#else - bpfattach(&pflogif->sc_if.if_bpf, ifp, DLT_PFLOG, PFLOG_HDRLEN); -#endif -#endif - s = splnet(); -#ifdef __FreeBSD__ - /* XXX: Why pf(4) lock?! Better add a pflog lock?! */ - PF_LOCK(); -#endif - LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list); pflogifs[unit] = ifp; -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (0); } -#ifdef __FreeBSD__ static void pflog_clone_destroy(struct ifnet *ifp) -#else -int -pflog_clone_destroy(struct ifnet *ifp) -#endif { - struct pflog_softc *pflogif = ifp->if_softc; - int s; + int i; - s = splnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - pflogifs[pflogif->sc_unit] = NULL; - LIST_REMOVE(pflogif, sc_list); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); + for (i = 0; i < PFLOGIFS_MAX; i++) + if (pflogifs[i] == ifp) + pflogifs[i] = NULL; -#if NBPFILTER > 0 bpfdetach(ifp); -#endif if_detach(ifp); -#ifdef __FreeBSD__ if_free(ifp); -#endif - free(pflogif, M_DEVBUF); -#ifndef __FreeBSD__ - return (0); -#endif } /* * Start output on the pflog interface. */ -void +static void pflogstart(struct ifnet *ifp) { struct mbuf *m; -#ifndef __FreeBSD__ - int s; -#endif for (;;) { -#ifdef __FreeBSD__ IF_LOCK(&ifp->if_snd); _IF_DROP(&ifp->if_snd); _IF_DEQUEUE(&ifp->if_snd, m); IF_UNLOCK(&ifp->if_snd); -#else - s = splnet(); - IF_DROP(&ifp->if_snd); - IF_DEQUEUE(&ifp->if_snd, m); - splx(s); -#endif if (m == NULL) return; @@ -285,35 +165,24 @@ pflogstart(struct ifnet *ifp) } } -int +static int pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, -#ifdef __FreeBSD__ struct route *rt) -#else - struct rtentry *rt) -#endif { m_freem(m); return (0); } /* ARGSUSED */ -int +static int pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { switch (cmd) { case SIOCSIFFLAGS: -#ifdef __FreeBSD__ if (ifp->if_flags & IFF_UP) ifp->if_drv_flags |= IFF_DRV_RUNNING; else ifp->if_drv_flags &= ~IFF_DRV_RUNNING; -#else - if (ifp->if_flags & IFF_UP) - ifp->if_flags |= IFF_RUNNING; - else - ifp->if_flags &= ~IFF_RUNNING; -#endif break; default: return (ENOTTY); @@ -322,12 +191,11 @@ pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (0); } -int +static int pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, u_int8_t reason, struct pf_rule *rm, struct pf_rule *am, - struct pf_ruleset *ruleset, struct pf_pdesc *pd) + struct pf_ruleset *ruleset, struct pf_pdesc *pd, int lookupsafe) { -#if NBPFILTER > 0 struct ifnet *ifn; struct pfloghdr hdr; @@ -354,23 +222,18 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, strlcpy(hdr.ruleset, ruleset->anchor->name, sizeof(hdr.ruleset)); } - if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done) -#ifdef __FreeBSD__ - /* - * XXX: This should not happen as we force an early lookup - * via debug.pfugidhack - */ - ; /* empty */ -#else - pd->lookup.done = pf_socket_lookup(dir, pd); -#endif - if (pd->lookup.done > 0) { + /* + * XXXGL: we avoid pf_socket_lookup() when we are holding + * state lock, since this leads to unsafe LOR. + * These conditions are very very rare, however. + */ + if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done && lookupsafe) + pd->lookup.done = pf_socket_lookup(dir, pd, m); + if (pd->lookup.done > 0) hdr.uid = pd->lookup.uid; - hdr.pid = pd->lookup.pid; - } else { + else hdr.uid = UID_MAX; - hdr.pid = NO_PID; - } + hdr.pid = NO_PID; hdr.rule_uid = rm->cuid; hdr.rule_pid = rm->cpid; hdr.dir = dir; @@ -387,18 +250,11 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, ifn->if_opackets++; ifn->if_obytes += m->m_pkthdr.len; -#ifdef __FreeBSD__ BPF_MTAP2(ifn, &hdr, PFLOG_HDRLEN, m); -#else - bpf_mtap_hdr(ifn->if_bpf, (char *)&hdr, PFLOG_HDRLEN, m, - BPF_DIRECTION_OUT); -#endif -#endif return (0); } -#ifdef __FreeBSD__ static int pflog_modevent(module_t mod, int type, void *data) { @@ -407,14 +263,14 @@ pflog_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: pflogattach(1); - PF_LOCK(); + PF_RULES_WLOCK(); pflog_packet_ptr = pflog_packet; - PF_UNLOCK(); + PF_RULES_WUNLOCK(); break; case MOD_UNLOAD: - PF_LOCK(); + PF_RULES_WLOCK(); pflog_packet_ptr = NULL; - PF_UNLOCK(); + PF_RULES_WUNLOCK(); if_clone_detach(&pflog_cloner); break; default: @@ -432,4 +288,3 @@ static moduledata_t pflog_mod = { "pflog", pflog_modevent, 0 }; DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(pflog, PFLOG_MODVER); MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER); -#endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/if_pflog.h b/sys/contrib/pf/net/if_pflog.h index 5f48f6c..0faeb7d 100644 --- a/sys/contrib/pf/net/if_pflog.h +++ b/sys/contrib/pf/net/if_pflog.h @@ -29,16 +29,6 @@ #define PFLOGIFS_MAX 16 -struct pflog_softc { -#ifdef __FreeBSD__ - struct ifnet *sc_ifp; /* the interface pointer */ -#else - struct ifnet sc_if; /* the interface */ -#endif - int sc_unit; - LIST_ENTRY(pflog_softc) sc_list; -}; - #define PFLOG_RULESET_NAME_SIZE 16 struct pfloghdr { @@ -62,40 +52,15 @@ struct pfloghdr { /* minus pad, also used as a signature */ #define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad) -/* XXX remove later when old format logs are no longer needed */ -struct old_pfloghdr { - u_int32_t af; - char ifname[IFNAMSIZ]; - short rnr; - u_short reason; - u_short action; - u_short dir; -}; -#define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr) - #ifdef _KERNEL -#ifdef __FreeBSD__ struct pf_rule; struct pf_ruleset; struct pfi_kif; struct pf_pdesc; -#if 0 -typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t, - u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, - struct pf_ruleset *, struct pf_pdesc *); -extern pflog_packet_t *pflog_packet_ptr; -#endif -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) do { \ +#define PFLOG_PACKET(i,a,b,c,d,e,f,g,h,di) do { \ if (pflog_packet_ptr != NULL) \ - pflog_packet_ptr(i,a,b,c,d,e,f,g,h); \ + pflog_packet_ptr(i,a,b,c,d,e,f,g,h,di); \ } while (0) -#else /* ! __FreeBSD__ */ -#if NPFLOG > 0 -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) pflog_packet(i,a,b,c,d,e,f,g,h) -#else -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0) -#endif /* NPFLOG > 0 */ -#endif #endif /* _KERNEL */ #endif /* _NET_IF_PFLOG_H_ */ diff --git a/sys/contrib/pf/net/if_pflow.h b/sys/contrib/pf/net/if_pflow.h deleted file mode 100644 index 35ccbeb..0000000 --- a/sys/contrib/pf/net/if_pflow.h +++ /dev/null @@ -1,126 +0,0 @@ -/* $OpenBSD: if_pflow.h,v 1.5 2009/02/27 11:09:36 gollo Exp $ */ - -/* - * Copyright (c) 2008 Henning Brauer <henning@openbsd.org> - * Copyright (c) 2008 Joerg Goltermann <jg@osn.de> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * $FreeBSD$ - */ - -#ifndef _NET_IF_PFLOW_H_ -#define _NET_IF_PFLOW_H_ - -#define PFLOW_ID_LEN sizeof(u_int64_t) - -#define PFLOW_MAXFLOWS 30 -#define PFLOW_VERSION 5 -#define PFLOW_ENGINE_TYPE 42 -#define PFLOW_ENGINE_ID 42 -#define PFLOW_MAXBYTES 0xffffffff -#define PFLOW_TIMEOUT 30 - -struct pflow_flow { - u_int32_t src_ip; - u_int32_t dest_ip; - u_int32_t nexthop_ip; - u_int16_t if_index_in; - u_int16_t if_index_out; - u_int32_t flow_packets; - u_int32_t flow_octets; - u_int32_t flow_start; - u_int32_t flow_finish; - u_int16_t src_port; - u_int16_t dest_port; - u_int8_t pad1; - u_int8_t tcp_flags; - u_int8_t protocol; - u_int8_t tos; - u_int16_t src_as; - u_int16_t dest_as; - u_int8_t src_mask; - u_int8_t dest_mask; - u_int16_t pad2; -} __packed; - -#ifdef _KERNEL - -extern int pflow_ok; - -struct pflow_softc { - struct ifnet sc_if; - struct ifnet *sc_pflow_ifp; - - unsigned int sc_count; - unsigned int sc_maxcount; - u_int64_t sc_gcounter; - struct ip_moptions sc_imo; -#ifdef __FreeBSD__ - struct callout sc_tmo; -#else - struct timeout sc_tmo; -#endif - struct in_addr sc_sender_ip; - u_int16_t sc_sender_port; - struct in_addr sc_receiver_ip; - u_int16_t sc_receiver_port; - struct mbuf *sc_mbuf; /* current cumulative mbuf */ - SLIST_ENTRY(pflow_softc) sc_next; -}; - -extern struct pflow_softc *pflowif; - -#endif /* _KERNEL */ - -struct pflow_header { - u_int16_t version; - u_int16_t count; - u_int32_t uptime_ms; - u_int32_t time_sec; - u_int32_t time_nanosec; - u_int32_t flow_sequence; - u_int8_t engine_type; - u_int8_t engine_id; - u_int8_t reserved1; - u_int8_t reserved2; -} __packed; - -#define PFLOW_HDRLEN sizeof(struct pflow_header) - -struct pflowstats { - u_int64_t pflow_flows; - u_int64_t pflow_packets; - u_int64_t pflow_onomem; - u_int64_t pflow_oerrors; -}; - -/* - * Configuration structure for SIOCSETPFLOW SIOCGETPFLOW - */ -struct pflowreq { - struct in_addr sender_ip; - struct in_addr receiver_ip; - u_int16_t receiver_port; - u_int16_t addrmask; -#define PFLOW_MASK_SRCIP 0x01 -#define PFLOW_MASK_DSTIP 0x02 -#define PFLOW_MASK_DSTPRT 0x04 -}; - -#ifdef _KERNEL -int export_pflow(struct pf_state *); -int pflow_sysctl(int *, u_int, void *, size_t *, void *, size_t); -#endif /* _KERNEL */ - -#endif /* _NET_IF_PFLOW_H_ */ diff --git a/sys/contrib/pf/net/if_pfsync.c b/sys/contrib/pf/net/if_pfsync.c index 7da6c2e..28af641 100644 --- a/sys/contrib/pf/net/if_pfsync.c +++ b/sys/contrib/pf/net/if_pfsync.c @@ -54,91 +54,44 @@ * 1.173 - correct expire time processing */ -#ifdef __FreeBSD__ -#include "opt_inet.h" -#include "opt_inet6.h" -#include "opt_pf.h" - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#define NBPFILTER 1 -#endif /* __FreeBSD__ */ +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_pf.h" #include <sys/param.h> -#include <sys/kernel.h> -#ifdef __FreeBSD__ #include <sys/bus.h> +#include <sys/endian.h> #include <sys/interrupt.h> -#include <sys/priv.h> -#endif -#include <sys/proc.h> -#include <sys/systm.h> -#include <sys/time.h> +#include <sys/kernel.h> +#include <sys/lock.h> #include <sys/mbuf.h> -#include <sys/socket.h> -#ifdef __FreeBSD__ -#include <sys/endian.h> -#include <sys/malloc.h> #include <sys/module.h> -#include <sys/sockio.h> -#include <sys/taskqueue.h> -#include <sys/lock.h> #include <sys/mutex.h> +#include <sys/priv.h> #include <sys/protosw.h> -#else -#include <sys/ioctl.h> -#include <sys/timeout.h> -#endif +#include <sys/socket.h> +#include <sys/sockio.h> #include <sys/sysctl.h> -#ifndef __FreeBSD__ -#include <sys/pool.h> -#endif +#include <net/bpf.h> #include <net/if.h> -#ifdef __FreeBSD__ #include <net/if_clone.h> -#endif #include <net/if_types.h> -#include <net/route.h> -#include <net/bpf.h> -#include <net/netisr.h> -#ifdef __FreeBSD__ -#include <net/vnet.h> -#endif +#include <net/pfvar.h> +#include <net/if_pfsync.h> -#include <netinet/in.h> #include <netinet/if_ether.h> -#include <netinet/tcp.h> -#include <netinet/tcp_seq.h> - -#ifdef INET -#include <netinet/in_systm.h> +#include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/ip.h> -#include <netinet/ip_var.h> -#endif - -#ifdef INET6 -#include <netinet6/nd6.h> -#endif /* INET6 */ - -#ifdef __FreeBSD__ -#include <netinet/ip_carp.h> -#else -#include "carp.h" -#if NCARP > 0 #include <netinet/ip_carp.h> -#endif -#endif - -#include <net/pfvar.h> -#include <net/if_pfsync.h> - -#ifndef __FreeBSD__ -#include "bpfilter.h" -#include "pfsync.h" -#endif +#include <netinet/ip_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> +#include <netinet/tcp_seq.h> #define PFSYNC_MINPKT ( \ sizeof(struct ip) + \ @@ -152,26 +105,22 @@ struct pfsync_pkt { u_int8_t flags; }; -int pfsync_input_hmac(struct mbuf *, int); - -int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, - struct pfsync_state_peer *); - -int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); -int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); - -int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); - -int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { +static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, + struct pfsync_state_peer *); +static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); +static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); + +static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { pfsync_in_clr, /* PFSYNC_ACT_CLR */ pfsync_in_ins, /* PFSYNC_ACT_INS */ pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ @@ -194,12 +143,12 @@ struct pfsync_q { }; /* we have one of these for every PFSYNC_S_ */ -int pfsync_out_state(struct pf_state *, struct mbuf *, int); -int pfsync_out_iack(struct pf_state *, struct mbuf *, int); -int pfsync_out_upd_c(struct pf_state *, struct mbuf *, int); -int pfsync_out_del(struct pf_state *, struct mbuf *, int); +static int pfsync_out_state(struct pf_state *, struct mbuf *, int); +static int pfsync_out_iack(struct pf_state *, struct mbuf *, int); +static int pfsync_out_upd_c(struct pf_state *, struct mbuf *, int); +static int pfsync_out_del(struct pf_state *, struct mbuf *, int); -struct pfsync_q pfsync_qs[] = { +static struct pfsync_q pfsync_qs[] = { { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, @@ -207,98 +156,71 @@ struct pfsync_q pfsync_qs[] = { { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } }; -void pfsync_q_ins(struct pf_state *, int); -void pfsync_q_del(struct pf_state *); +static void pfsync_q_ins(struct pf_state *, int); +static void pfsync_q_del(struct pf_state *); + +static void pfsync_update_state(struct pf_state *); struct pfsync_upd_req_item { TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; struct pfsync_upd_req ur_msg; }; -TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); struct pfsync_deferral { - TAILQ_ENTRY(pfsync_deferral) pd_entry; - struct pf_state *pd_st; - struct mbuf *pd_m; -#ifdef __FreeBSD__ - struct callout pd_tmo; -#else - struct timeout pd_tmo; -#endif -}; -TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); + struct pfsync_softc *pd_sc; + TAILQ_ENTRY(pfsync_deferral) pd_entry; + u_int pd_refs; + struct callout pd_tmo; -#define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ - sizeof(struct pfsync_deferral)) - -#ifdef notyet -int pfsync_out_tdb(struct tdb *, struct mbuf *, int); -#endif + struct pf_state *pd_st; + struct mbuf *pd_m; +}; struct pfsync_softc { -#ifdef __FreeBSD__ + /* Configuration */ struct ifnet *sc_ifp; -#else - struct ifnet sc_if; -#endif struct ifnet *sc_sync_if; - -#ifdef __FreeBSD__ - uma_zone_t sc_pool; -#else - struct pool sc_pool; -#endif - - struct ip_moptions sc_imo; - - struct in_addr sc_sync_peer; - u_int8_t sc_maxupdates; -#ifdef __FreeBSD__ - int pfsync_sync_ok; -#endif - - struct ip sc_template; - - struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; - size_t sc_len; - - struct pfsync_upd_reqs sc_upd_req_list; - - int sc_defer; - struct pfsync_deferrals sc_deferrals; - u_int sc_deferred; - + struct ip_moptions sc_imo; + struct in_addr sc_sync_peer; + uint32_t sc_flags; +#define PFSYNCF_OK 0x00000001 +#define PFSYNCF_DEFER 0x00000002 +#define PFSYNCF_PUSH 0x00000004 + uint8_t sc_maxupdates; + struct ip sc_template; + struct callout sc_tmo; + struct mtx sc_mtx; + + /* Queued data */ + size_t sc_len; + TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; + TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; + TAILQ_HEAD(, pfsync_deferral) sc_deferrals; + u_int sc_deferred; void *sc_plus; - size_t sc_pluslen; - - u_int32_t sc_ureq_sent; - int sc_bulk_tries; -#ifdef __FreeBSD__ - struct callout sc_bulkfail_tmo; -#else - struct timeout sc_bulkfail_tmo; -#endif - - u_int32_t sc_ureq_received; - struct pf_state *sc_bulk_next; - struct pf_state *sc_bulk_last; -#ifdef __FreeBSD__ - struct callout sc_bulk_tmo; -#else - struct timeout sc_bulk_tmo; -#endif + size_t sc_pluslen; + + /* Bulk update info */ + struct mtx sc_bulk_mtx; + uint32_t sc_ureq_sent; + int sc_bulk_tries; + uint32_t sc_ureq_received; + int sc_bulk_hashid; + uint64_t sc_bulk_stateid; + uint32_t sc_bulk_creatorid; + struct callout sc_bulk_tmo; + struct callout sc_bulkfail_tmo; +}; - TAILQ_HEAD(, tdb) sc_tdb_q; +#define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) +#define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) +#define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) -#ifdef __FreeBSD__ - struct callout sc_tmo; -#else - struct timeout sc_tmo; -#endif -}; +#define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) +#define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) +#define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) -#ifdef __FreeBSD__ -static MALLOC_DEFINE(M_PFSYNC, "pfsync", "pfsync data"); +static MALLOC_DEFINE(M_PFSYNC, "pfsync", "pfsync(4) data"); static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; #define V_pfsyncif VNET(pfsyncif) static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; @@ -308,14 +230,14 @@ static VNET_DEFINE(struct pfsyncstats, pfsyncstats); static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; #define V_pfsync_carp_adj VNET(pfsync_carp_adj) +static void pfsync_timeout(void *); +static void pfsync_push(struct pfsync_softc *); static void pfsyncintr(void *); -static int pfsync_multicast_setup(struct pfsync_softc *); +static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, + void *); static void pfsync_multicast_cleanup(struct pfsync_softc *); static int pfsync_init(void); static void pfsync_uninit(void); -static void pfsync_sendout1(int); - -#define schednetisr(NETISR_PFSYNC) swi_sched(V_pfsync_swi_cookie, 0) SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW, @@ -323,85 +245,46 @@ SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); -#else -struct pfsync_softc *pfsyncif = NULL; -struct pfsyncstats pfsyncstats; -#define V_pfsyncstats pfsyncstats -#endif -void pfsyncattach(int); -#ifdef __FreeBSD__ -int pfsync_clone_create(struct if_clone *, int, caddr_t); -void pfsync_clone_destroy(struct ifnet *); -#else -int pfsync_clone_create(struct if_clone *, int); -int pfsync_clone_destroy(struct ifnet *); -#endif -int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, - struct pf_state_peer *); -void pfsync_update_net_tdb(struct pfsync_tdb *); -int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, -#ifdef __FreeBSD__ - struct route *); -#else - struct rtentry *); -#endif -int pfsyncioctl(struct ifnet *, u_long, caddr_t); -void pfsyncstart(struct ifnet *); +static int pfsync_clone_create(struct if_clone *, int, caddr_t); +static void pfsync_clone_destroy(struct ifnet *); +static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, + struct pf_state_peer *); +static int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct route *); +static int pfsyncioctl(struct ifnet *, u_long, caddr_t); -struct mbuf *pfsync_if_dequeue(struct ifnet *); +static int pfsync_defer(struct pf_state *, struct mbuf *); +static void pfsync_undefer(struct pfsync_deferral *, int); +static void pfsync_undefer_state(struct pf_state *, int); +static void pfsync_defer_tmo(void *); -void pfsync_deferred(struct pf_state *, int); -void pfsync_undefer(struct pfsync_deferral *, int); -void pfsync_defer_tmo(void *); +static void pfsync_request_update(u_int32_t, u_int64_t); +static void pfsync_update_state_req(struct pf_state *); -void pfsync_request_update(u_int32_t, u_int64_t); -void pfsync_update_state_req(struct pf_state *); +static void pfsync_drop(struct pfsync_softc *); +static void pfsync_sendout(int); +static void pfsync_send_plus(void *, size_t); -void pfsync_drop(struct pfsync_softc *); -void pfsync_sendout(void); -void pfsync_send_plus(void *, size_t); -void pfsync_timeout(void *); -void pfsync_tdb_timeout(void *); +static void pfsync_bulk_start(void); +static void pfsync_bulk_status(u_int8_t); +static void pfsync_bulk_update(void *); +static void pfsync_bulk_fail(void *); -void pfsync_bulk_start(void); -void pfsync_bulk_status(u_int8_t); -void pfsync_bulk_update(void *); -void pfsync_bulk_fail(void *); - -#ifdef __FreeBSD__ -/* XXX: ugly */ -#define betoh64 (unsigned long long)be64toh -#define timeout_del callout_stop +#ifdef IPSEC +static void pfsync_update_net_tdb(struct pfsync_tdb *); #endif #define PFSYNC_MAX_BULKTRIES 12 -#ifndef __FreeBSD__ -int pfsync_sync_ok; -#endif -#ifdef __FreeBSD__ VNET_DEFINE(struct ifc_simple_data, pfsync_cloner_data); VNET_DEFINE(struct if_clone, pfsync_cloner); #define V_pfsync_cloner_data VNET(pfsync_cloner_data) #define V_pfsync_cloner VNET(pfsync_cloner) IFC_SIMPLE_DECLARE(pfsync, 1); -#else -struct if_clone pfsync_cloner = - IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); -#endif -void -pfsyncattach(int npfsync) -{ - if_clone_attach(&pfsync_cloner); -} -int -#ifdef __FreeBSD__ +static int pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) -#else -pfsync_clone_create(struct if_clone *ifc, int unit) -#endif { struct pfsync_softc *sc; struct ifnet *ifp; @@ -410,204 +293,99 @@ pfsync_clone_create(struct if_clone *ifc, int unit) if (unit != 0) return (EINVAL); -#ifdef __FreeBSD__ sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); - sc->pfsync_sync_ok = 1; -#else - pfsync_sync_ok = 1; - sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO); -#endif + sc->sc_flags |= PFSYNCF_OK; for (q = 0; q < PFSYNC_S_COUNT; q++) TAILQ_INIT(&sc->sc_qs[q]); -#ifdef __FreeBSD__ - sc->sc_pool = uma_zcreate("pfsync", PFSYNC_PLSIZE, NULL, NULL, NULL, - NULL, UMA_ALIGN_PTR, 0); -#else - pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL); -#endif TAILQ_INIT(&sc->sc_upd_req_list); TAILQ_INIT(&sc->sc_deferrals); - sc->sc_deferred = 0; - - TAILQ_INIT(&sc->sc_tdb_q); sc->sc_len = PFSYNC_MINPKT; sc->sc_maxupdates = 128; -#ifndef __FreeBSD__ - sc->sc_imo.imo_membership = (struct in_multi **)malloc( - (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, - M_WAITOK | M_ZERO); - sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; -#endif - -#ifdef __FreeBSD__ ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); if (ifp == NULL) { - uma_zdestroy(sc->sc_pool); free(sc, M_PFSYNC); return (ENOSPC); } if_initname(ifp, ifc->ifc_name, unit); -#else - ifp = &sc->sc_if; - snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); -#endif ifp->if_softc = sc; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; - ifp->if_start = pfsyncstart; ifp->if_type = IFT_PFSYNC; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = sizeof(struct pfsync_header); ifp->if_mtu = ETHERMTU; -#ifdef __FreeBSD__ + mtx_init(&sc->sc_mtx, "pfsync", NULL, MTX_DEF); + mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); callout_init(&sc->sc_tmo, CALLOUT_MPSAFE); - callout_init_mtx(&sc->sc_bulk_tmo, &pf_task_mtx, 0); - callout_init(&sc->sc_bulkfail_tmo, CALLOUT_MPSAFE); -#else - timeout_set(&sc->sc_tmo, pfsync_timeout, sc); - timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); - timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); -#endif + callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); + callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); if_attach(ifp); -#ifndef __FreeBSD__ - if_alloc_sadl(ifp); - -#if NCARP > 0 - if_addgroup(ifp, "carp"); -#endif -#endif -#if NBPFILTER > 0 -#ifdef __FreeBSD__ bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); -#else - bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); -#endif -#endif -#ifdef __FreeBSD__ V_pfsyncif = sc; -#else - pfsyncif = sc; -#endif return (0); } -#ifdef __FreeBSD__ -void -#else -int -#endif +static void pfsync_clone_destroy(struct ifnet *ifp) { struct pfsync_softc *sc = ifp->if_softc; -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - timeout_del(&sc->sc_bulkfail_tmo); - timeout_del(&sc->sc_bulk_tmo); - timeout_del(&sc->sc_tmo); -#ifdef __FreeBSD__ - PF_UNLOCK(); - if (!sc->pfsync_sync_ok && carp_demote_adj_p) + /* + * At this stage, everything should have already been + * cleared by pfsync_uninit(), and we have only to + * drain callouts. + */ + while (sc->sc_deferred > 0) { + struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); + + TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); + sc->sc_deferred--; + if (callout_stop(&pd->pd_tmo)) { + pf_release_state(pd->pd_st); + m_freem(pd->pd_m); + free(pd, M_PFSYNC); + } else { + pd->pd_refs++; + callout_drain(&pd->pd_tmo); + free(pd, M_PFSYNC); + } + } + + callout_drain(&sc->sc_tmo); + callout_drain(&sc->sc_bulkfail_tmo); + callout_drain(&sc->sc_bulk_tmo); + + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); -#else -#if NCARP > 0 - if (!pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, -1); -#endif -#endif -#if NBPFILTER > 0 bpfdetach(ifp); -#endif if_detach(ifp); pfsync_drop(sc); - while (sc->sc_deferred > 0) - pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); - -#ifdef __FreeBSD__ - UMA_DESTROY(sc->sc_pool); -#else - pool_destroy(&sc->sc_pool); -#endif -#ifdef __FreeBSD__ if_free(ifp); if (sc->sc_imo.imo_membership) pfsync_multicast_cleanup(sc); + mtx_destroy(&sc->sc_mtx); + mtx_destroy(&sc->sc_bulk_mtx); free(sc, M_PFSYNC); -#else - free(sc->sc_imo.imo_membership, M_IPMOPTS); - free(sc, M_DEVBUF); -#endif -#ifdef __FreeBSD__ V_pfsyncif = NULL; -#else - pfsyncif = NULL; -#endif - -#ifndef __FreeBSD__ - return (0); -#endif -} - -struct mbuf * -pfsync_if_dequeue(struct ifnet *ifp) -{ - struct mbuf *m; -#ifndef __FreeBSD__ - int s; -#endif - -#ifdef __FreeBSD__ - IF_LOCK(&ifp->if_snd); - _IF_DROP(&ifp->if_snd); - _IF_DEQUEUE(&ifp->if_snd, m); - IF_UNLOCK(&ifp->if_snd); -#else - s = splnet(); - IF_DEQUEUE(&ifp->if_snd, m); - splx(s); -#endif - - return (m); } -/* - * Start output on the pfsync interface. - */ -void -pfsyncstart(struct ifnet *ifp) -{ - struct mbuf *m; - - while ((m = pfsync_if_dequeue(ifp)) != NULL) { -#ifndef __FreeBSD__ - IF_DROP(&ifp->if_snd); -#endif - m_freem(m); - } -} - -int +static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { -#ifdef __FreeBSD__ - d->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); -#else - d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); -#endif + d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); if (d->scrub == NULL) return (ENOMEM); } @@ -615,99 +393,29 @@ pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, return (0); } -#ifndef __FreeBSD__ -void -pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) -{ - bzero(sp, sizeof(struct pfsync_state)); - - /* copy from state key */ - sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; - sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; - sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; - sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; - sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; - sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; - sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; - sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; - sp->proto = st->key[PF_SK_WIRE]->proto; - sp->af = st->key[PF_SK_WIRE]->af; - - /* copy from state */ - strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); - bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); - sp->creation = htonl(time_uptime - st->creation); - sp->expire = pf_state_expires(st); - if (sp->expire <= time_second) - sp->expire = htonl(0); - else - sp->expire = htonl(sp->expire - time_second); - - sp->direction = st->direction; - sp->log = st->log; - sp->timeout = st->timeout; - sp->state_flags = st->state_flags; - if (st->src_node) - sp->sync_flags |= PFSYNC_FLAG_SRCNODE; - if (st->nat_src_node) - sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; - - bcopy(&st->id, &sp->id, sizeof(sp->id)); - sp->creatorid = st->creatorid; - pf_state_peer_hton(&st->src, &sp->src); - pf_state_peer_hton(&st->dst, &sp->dst); - - if (st->rule.ptr == NULL) - sp->rule = htonl(-1); - else - sp->rule = htonl(st->rule.ptr->nr); - if (st->anchor.ptr == NULL) - sp->anchor = htonl(-1); - else - sp->anchor = htonl(st->anchor.ptr->nr); - if (st->nat_rule.ptr == NULL) - sp->nat_rule = htonl(-1); - else - sp->nat_rule = htonl(st->nat_rule.ptr->nr); - - pf_state_counter_hton(st->packets[0], sp->packets[0]); - pf_state_counter_hton(st->packets[1], sp->packets[1]); - pf_state_counter_hton(st->bytes[0], sp->bytes[0]); - pf_state_counter_hton(st->bytes[1], sp->bytes[1]); -} -#endif - -int +static int pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) { + struct pfsync_softc *sc = V_pfsyncif; struct pf_state *st = NULL; struct pf_state_key *skw = NULL, *sks = NULL; struct pf_rule *r = NULL; struct pfi_kif *kif; - int pool_flags; int error; -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); + PF_RULES_RASSERT(); if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { -#endif - printf("pfsync_state_import: invalid creator id:" - " %08x\n", ntohl(sp->creatorid)); + printf("%s: invalid creator id: %08x\n", __func__, + ntohl(sp->creatorid)); return (EINVAL); } - if ((kif = pfi_kif_get(sp->ifname)) == NULL) { -#ifdef __FreeBSD__ + if ((kif = pfi_kif_find(sp->ifname)) == NULL) { if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif - printf("pfsync_state_import: " - "unknown interface: %s\n", sp->ifname); + printf("%s: unknown interface: %s\n", __func__, + sp->ifname); if (flags & PFSYNC_SI_IOCTL) return (EINVAL); return (0); /* skip this state */ @@ -723,34 +431,18 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) r = pf_main_ruleset.rules[ PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; else -#ifdef __FreeBSD__ r = &V_pf_default_rule; -#else - r = &pf_default_rule; -#endif if ((r->max_states && r->states_cur >= r->max_states)) goto cleanup; -#ifdef __FreeBSD__ - if (flags & PFSYNC_SI_IOCTL) - pool_flags = PR_WAITOK | PR_ZERO; - else - pool_flags = PR_NOWAIT | PR_ZERO; - - if ((st = pool_get(&V_pf_state_pl, pool_flags)) == NULL) - goto cleanup; -#else - if (flags & PFSYNC_SI_IOCTL) - pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; - else - pool_flags = PR_LIMITFAIL | PR_ZERO; - - if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) + /* + * XXXGL: consider M_WAITOK in ioctl path after. + */ + if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) goto cleanup; -#endif - if ((skw = pf_alloc_state_key(pool_flags)) == NULL) + if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) goto cleanup; if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], @@ -759,7 +451,8 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) &sp->key[PF_SK_STACK].addr[1], sp->af) || sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { - if ((sks = pf_alloc_state_key(pool_flags)) == NULL) + sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); + if (sks == NULL) goto cleanup; } else sks = skw; @@ -788,17 +481,13 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) /* copy to state */ bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); st->creation = time_uptime - ntohl(sp->creation); - st->expire = time_second; + st->expire = time_uptime; if (sp->expire) { uint32_t timeout; timeout = r->timeout[sp->timeout]; if (!timeout) -#ifdef __FreeBSD__ timeout = V_pf_default_rule.timeout[sp->timeout]; -#else - timeout = pf_default_rule.timeout[sp->timeout]; -#endif /* sp->expire may have been adaptively scaled by export. */ st->expire -= timeout - ntohl(sp->expire); @@ -809,7 +498,7 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) st->timeout = sp->timeout; st->state_flags = sp->state_flags; - bcopy(sp->id, &st->id, sizeof(st->id)); + st->id = sp->id; st->creatorid = sp->creatorid; pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); @@ -826,8 +515,8 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) r->states_cur++; r->states_tot++; - if (!ISSET(flags, PFSYNC_SI_IOCTL)) - SET(st->state_flags, PFSTATE_NOSYNC); + if (!(flags & PFSYNC_SI_IOCTL)) + st->state_flags |= PFSTATE_NOSYNC; if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ @@ -835,14 +524,15 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) goto cleanup_state; } - if (!ISSET(flags, PFSYNC_SI_IOCTL)) { - CLR(st->state_flags, PFSTATE_NOSYNC); - if (ISSET(st->state_flags, PFSTATE_ACK)) { + if (!(flags & PFSYNC_SI_IOCTL)) { + st->state_flags &= ~PFSTATE_NOSYNC; + if (st->state_flags & PFSTATE_ACK) { pfsync_q_ins(st, PFSYNC_S_IACK); - schednetisr(NETISR_PFSYNC); + pfsync_push(sc); } } - CLR(st->state_flags, PFSTATE_ACK); + st->state_flags &= ~PFSTATE_ACK; + PF_STATE_UNLOCK(st); return (0); @@ -850,49 +540,26 @@ cleanup: error = ENOMEM; if (skw == sks) sks = NULL; -#ifdef __FreeBSD__ if (skw != NULL) - pool_put(&V_pf_state_key_pl, skw); + uma_zfree(V_pf_state_key_z, skw); if (sks != NULL) - pool_put(&V_pf_state_key_pl, sks); -#else - if (skw != NULL) - pool_put(&pf_state_key_pl, skw); - if (sks != NULL) - pool_put(&pf_state_key_pl, sks); -#endif + uma_zfree(V_pf_state_key_z, sks); -cleanup_state: /* pf_state_insert frees the state keys */ +cleanup_state: /* pf_state_insert() frees the state keys. */ if (st) { -#ifdef __FreeBSD__ if (st->dst.scrub) - pool_put(&V_pf_state_scrub_pl, st->dst.scrub); + uma_zfree(V_pf_state_scrub_z, st->dst.scrub); if (st->src.scrub) - pool_put(&V_pf_state_scrub_pl, st->src.scrub); - pool_put(&V_pf_state_pl, st); -#else - if (st->dst.scrub) - pool_put(&pf_state_scrub_pl, st->dst.scrub); - if (st->src.scrub) - pool_put(&pf_state_scrub_pl, st->src.scrub); - pool_put(&pf_state_pl, st); -#endif + uma_zfree(V_pf_state_scrub_z, st->src.scrub); + uma_zfree(V_pf_state_z, st); } return (error); } -void -#ifdef __FreeBSD__ +static void pfsync_input(struct mbuf *m, __unused int off) -#else -pfsync_input(struct mbuf *m, ...) -#endif { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif struct pfsync_pkt pkt; struct ip *ip = mtod(m, struct ip *); struct pfsync_header *ph; @@ -900,15 +567,13 @@ pfsync_input(struct mbuf *m, ...) int offset; int rv; + uint16_t count; V_pfsyncstats.pfsyncs_ipackets++; - /* verify that we have a sync interface configured */ -#ifdef __FreeBSD__ - if (!sc || !sc->sc_sync_if || !V_pf_status.running) -#else - if (!sc || !sc->sc_sync_if || !pf_status.running) -#endif + /* Verify that we have a sync interface configured. */ + if (!sc || !sc->sc_sync_if || !V_pf_status.running || + (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) goto done; /* verify that the packet came in on the right interface */ @@ -917,13 +582,8 @@ pfsync_input(struct mbuf *m, ...) goto done; } -#ifdef __FreeBSD__ sc->sc_ifp->if_ipackets++; sc->sc_ifp->if_ibytes += m->m_pkthdr.len; -#else - sc->sc_if.if_ipackets++; - sc->sc_if.if_ibytes += m->m_pkthdr.len; -#endif /* verify that the IP TTL is 255. */ if (ip->ip_ttl != PFSYNC_DFLTTL) { V_pfsyncstats.pfsyncs_badttl++; @@ -951,23 +611,17 @@ pfsync_input(struct mbuf *m, ...) goto done; } -#if 0 - if (pfsync_input_hmac(m, offset) != 0) { - /* XXX stats */ - goto done; - } -#endif - /* Cheaper to grab this now than having to mess with mbufs later */ pkt.ip = ip; pkt.src = ip->ip_src; pkt.flags = 0; -#ifdef __FreeBSD__ + /* + * Trusting pf_chksum during packet processing, as well as seeking + * in interface name tree, require holding PF_RULES_RLOCK(). + */ + PF_RULES_RLOCK(); if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) -#else - if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) -#endif pkt.flags |= PFSYNC_SI_CKSUM; offset += sizeof(*ph); @@ -977,34 +631,34 @@ pfsync_input(struct mbuf *m, ...) if (subh.action >= PFSYNC_ACT_MAX) { V_pfsyncstats.pfsyncs_badact++; + PF_RULES_RUNLOCK(); goto done; } - rv = (*pfsync_acts[subh.action])(&pkt, m, offset, - ntohs(subh.count)); - if (rv == -1) + count = ntohs(subh.count); + V_pfsyncstats.pfsyncs_iacts[subh.action] += count; + rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); + if (rv == -1) { + PF_RULES_RUNLOCK(); return; + } offset += rv; } + PF_RULES_RUNLOCK(); done: m_freem(m); } -int +static int pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_clr *clr; struct mbuf *mp; int len = sizeof(*clr) * count; int i, offp; - - struct pf_state *st, *nexts; - struct pf_state_key *sk, *nextsk; - struct pf_state_item *si; u_int32_t creatorid; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1013,64 +667,33 @@ pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } clr = (struct pfsync_clr *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { creatorid = clr[i].creatorid; - if (clr[i].ifname[0] == '\0') { -#ifdef __FreeBSD__ - for (st = RB_MIN(pf_state_tree_id, &V_tree_id); - st; st = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, st); -#else - for (st = RB_MIN(pf_state_tree_id, &tree_id); - st; st = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); -#endif - if (st->creatorid == creatorid) { - SET(st->state_flags, PFSTATE_NOSYNC); - pf_unlink_state(st); - } - } - } else { - if (pfi_kif_get(clr[i].ifname) == NULL) - continue; + if (clr[i].ifname[0] != '\0' && + pfi_kif_find(clr[i].ifname) == NULL) + continue; - /* XXX correct? */ -#ifdef __FreeBSD__ - for (sk = RB_MIN(pf_state_tree, &V_pf_statetbl); -#else - for (sk = RB_MIN(pf_state_tree, &pf_statetbl); -#endif - sk; sk = nextsk) { - nextsk = RB_NEXT(pf_state_tree, -#ifdef __FreeBSD__ - &V_pf_statetbl, sk); -#else - &pf_statetbl, sk); -#endif - TAILQ_FOREACH(si, &sk->states, entry) { - if (si->s->creatorid == creatorid) { - SET(si->s->state_flags, - PFSTATE_NOSYNC); - pf_unlink_state(si->s); - } + for (int i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + struct pf_state *s; +relock: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + if (s->creatorid == creatorid) { + s->state_flags |= PFSTATE_NOSYNC; + pf_unlink_state(s, PF_ENTER_LOCKED); + goto relock; } } + PF_HASHROW_UNLOCK(ih); } } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; @@ -1078,8 +701,6 @@ pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) int len = sizeof(*sp) * count; int i, offp; - int s; - mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; @@ -1087,55 +708,38 @@ pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } sa = (struct pfsync_state *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { sp = &sa[i]; - /* check for invalid values */ + /* Check for invalid values. */ if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST || sp->direction > PF_OUT || (sp->af != AF_INET && sp->af != AF_INET6)) { -#ifdef __FreeBSD__ - if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif - printf("pfsync_input: PFSYNC5_ACT_INS: " - "invalid value\n"); - } + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: invalid value\n", __func__); V_pfsyncstats.pfsyncs_badval++; continue; } - if (pfsync_state_import(sp, pkt->flags) == ENOMEM) { - /* drop out, but process the rest of the actions */ + if (pfsync_state_import(sp, pkt->flags) == ENOMEM) + /* Drop out, but process the rest of the actions. */ break; - } } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_ins_ack *ia, *iaa; - struct pf_state_cmp id_key; struct pf_state *st; struct mbuf *mp; int len = count * sizeof(*ia); int offp, i; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1144,27 +748,20 @@ pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { ia = &iaa[i]; - bcopy(&ia->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = ia->creatorid; - - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(ia->id, ia->creatorid); if (st == NULL) continue; - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 0); + if (st->state_flags & PFSTATE_ACK) { + PFSYNC_LOCK(V_pfsyncif); + pfsync_undefer_state(st, 0); + PFSYNC_UNLOCK(V_pfsyncif); + } + PF_STATE_UNLOCK(st); } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); /* * XXX this is not yet implemented, but we know the size of the * message so we can skip it. @@ -1173,12 +770,14 @@ pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) return (count * sizeof(struct pfsync_ins_ack)); } -int +static int pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, struct pfsync_state_peer *dst) { int sfail = 0; + PF_STATE_LOCK_ASSERT(st); + /* * The state should never go backwards except * for syn-proxy states. Neither should the @@ -1207,11 +806,11 @@ pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, return (sfail); } -int +static int pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { + struct pfsync_softc *sc = V_pfsyncif; struct pfsync_state *sa, *sp; - struct pf_state_cmp id_key; struct pf_state_key *sk; struct pf_state *st; int sfail; @@ -1219,7 +818,6 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) struct mbuf *mp; int len = count * sizeof(*sp); int offp, i; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1228,10 +826,6 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } sa = (struct pfsync_state *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { sp = &sa[i]; @@ -1239,11 +833,7 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pfsync_input: PFSYNC_ACT_UPD: " "invalid value\n"); } @@ -1251,10 +841,7 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) continue; } - bcopy(sp->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = sp->creatorid; - - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { /* insert the update */ if (pfsync_state_import(sp, 0)) @@ -1262,8 +849,11 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) continue; } - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 1); + if (st->state_flags & PFSTATE_ACK) { + PFSYNC_LOCK(sc); + pfsync_undefer_state(st, 1); + PFSYNC_UNLOCK(sc); + } sk = st->key[PF_SK_WIRE]; /* XXX right one? */ sfail = 0; @@ -1281,44 +871,40 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } if (sfail) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pfsync: %s stale update (%d)" " id: %016llx creatorid: %08x\n", (sfail < 7 ? "ignoring" : "partial"), - sfail, betoh64(st->id), + sfail, (unsigned long long)be64toh(st->id), ntohl(st->creatorid)); } V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); - schednetisr(NETISR_PFSYNC); + PF_STATE_UNLOCK(st); + PFSYNC_LOCK(sc); + pfsync_push(sc); + PFSYNC_UNLOCK(sc); continue; } pfsync_alloc_scrub_memory(&sp->dst, &st->dst); pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); - st->expire = time_second; + st->expire = time_uptime; st->timeout = sp->timeout; st->pfsync_time = time_uptime; + PF_STATE_UNLOCK(st); } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { + struct pfsync_softc *sc = V_pfsyncif; struct pfsync_upd_c *ua, *up; struct pf_state_key *sk; - struct pf_state_cmp id_key; struct pf_state *st; int len = count * sizeof(*up); @@ -1326,7 +912,6 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) struct mbuf *mp; int offp, i; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1335,10 +920,6 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } ua = (struct pfsync_upd_c *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { up = &ua[i]; @@ -1346,11 +927,7 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) if (up->timeout >= PFTM_MAX || up->src.state > PF_TCPS_PROXY_DST || up->dst.state > PF_TCPS_PROXY_DST) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pfsync_input: " "PFSYNC_ACT_UPD_C: " "invalid value\n"); @@ -1359,18 +936,20 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) continue; } - bcopy(&up->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = up->creatorid; - - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(up->id, up->creatorid); if (st == NULL) { /* We don't have this state. Ask for it. */ - pfsync_request_update(id_key.creatorid, id_key.id); + PFSYNC_LOCK(sc); + pfsync_request_update(up->creatorid, up->id); + PFSYNC_UNLOCK(sc); continue; } - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 1); + if (st->state_flags & PFSTATE_ACK) { + PFSYNC_LOCK(sc); + pfsync_undefer_state(st, 1); + PFSYNC_UNLOCK(sc); + } sk = st->key[PF_SK_WIRE]; /* XXX right one? */ sfail = 0; @@ -1387,39 +966,35 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } if (sfail) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pfsync: ignoring stale update " "(%d) id: %016llx " "creatorid: %08x\n", sfail, - betoh64(st->id), + (unsigned long long)be64toh(st->id), ntohl(st->creatorid)); } V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); - schednetisr(NETISR_PFSYNC); + PF_STATE_UNLOCK(st); + PFSYNC_LOCK(sc); + pfsync_push(sc); + PFSYNC_UNLOCK(sc); continue; } pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->src, &st->src); pf_state_peer_ntoh(&up->dst, &st->dst); - st->expire = time_second; + st->expire = time_uptime; st->timeout = up->timeout; st->pfsync_time = time_uptime; + PF_STATE_UNLOCK(st); } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_upd_req *ur, *ura; @@ -1427,7 +1002,6 @@ pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) int len = count * sizeof(*ur); int i, offp; - struct pf_state_cmp id_key; struct pf_state *st; mp = m_pulldown(m, offset, len, &offp); @@ -1437,46 +1011,38 @@ pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } ura = (struct pfsync_upd_req *)(mp->m_data + offp); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { ur = &ura[i]; - bcopy(&ur->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = ur->creatorid; - - if (id_key.id == 0 && id_key.creatorid == 0) + if (ur->id == 0 && ur->creatorid == 0) pfsync_bulk_start(); else { - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(ur->id, ur->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) + if (st->state_flags & PFSTATE_NOSYNC) { + PF_STATE_UNLOCK(st); continue; + } pfsync_update_state_req(st); + PF_STATE_UNLOCK(st); } } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif return (len); } -int +static int pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; struct pfsync_state *sa, *sp; - struct pf_state_cmp id_key; struct pf_state *st; int len = count * sizeof(*sp); int offp, i; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1485,42 +1051,29 @@ pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } sa = (struct pfsync_state *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { sp = &sa[i]; - bcopy(sp->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = sp->creatorid; - - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } - SET(st->state_flags, PFSTATE_NOSYNC); - pf_unlink_state(st); + st->state_flags |= PFSTATE_NOSYNC; + pf_unlink_state(st, PF_ENTER_LOCKED); } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; struct pfsync_del_c *sa, *sp; - struct pf_state_cmp id_key; struct pf_state *st; int len = count * sizeof(*sp); int offp, i; - int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { @@ -1529,52 +1082,42 @@ pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } sa = (struct pfsync_del_c *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) { sp = &sa[i]; - bcopy(&sp->id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = sp->creatorid; - - st = pf_find_state_byid(&id_key); + st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } - SET(st->state_flags, PFSTATE_NOSYNC); - pf_unlink_state(st); + st->state_flags |= PFSTATE_NOSYNC; + pf_unlink_state(st, PF_ENTER_LOCKED); } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); return (len); } -int +static int pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif struct pfsync_bus *bus; struct mbuf *mp; int len = count * sizeof(*bus); int offp; + PFSYNC_BLOCK(sc); + /* If we're not waiting for a bulk update, who cares. */ - if (sc->sc_ureq_sent == 0) + if (sc->sc_ureq_sent == 0) { + PFSYNC_BUNLOCK(sc); return (len); + } mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { + PFSYNC_BUNLOCK(sc); V_pfsyncstats.pfsyncs_badlen++; return (-1); } @@ -1582,23 +1125,12 @@ pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) switch (bus->status) { case PFSYNC_BUS_START: -#ifdef __FreeBSD__ callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + - V_pf_pool_limits[PF_LIMIT_STATES].limit / + V_pf_limits[PF_LIMIT_STATES].limit / ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / sizeof(struct pfsync_state)), - pfsync_bulk_fail, V_pfsyncif); -#else - timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + - pf_pool_limits[PF_LIMIT_STATES].limit / - ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / - sizeof(struct pfsync_state))); -#endif -#ifdef __FreeBSD__ + pfsync_bulk_fail, sc); if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync: received bulk update start\n"); break; @@ -1608,42 +1140,27 @@ pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) /* that's it, we're happy */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; - timeout_del(&sc->sc_bulkfail_tmo); -#ifdef __FreeBSD__ - if (!sc->pfsync_sync_ok && carp_demote_adj_p) + callout_stop(&sc->sc_bulkfail_tmo); + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk done"); - sc->pfsync_sync_ok = 1; -#else -#if NCARP > 0 - if (!pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, -1); -#endif - pfsync_sync_ok = 1; -#endif -#ifdef __FreeBSD__ + sc->sc_flags |= PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync: received valid " "bulk update end\n"); } else { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync: received invalid " "bulk update end: bad timestamp\n"); } break; } + PFSYNC_BUNLOCK(sc); return (len); } -int +static int pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { int len = count * sizeof(struct pfsync_tdb); @@ -1662,16 +1179,8 @@ pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) } tp = (struct pfsync_tdb *)(mp->m_data + offp); - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif for (i = 0; i < count; i++) pfsync_update_net_tdb(&tp[i]); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); #endif return (len); @@ -1679,7 +1188,7 @@ pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) #if defined(IPSEC) /* Update an in-kernel tdb. Silently fail if no tdb is found. */ -void +static void pfsync_update_net_tdb(struct pfsync_tdb *pt) { struct tdb *tdb; @@ -1688,34 +1197,27 @@ pfsync_update_net_tdb(struct pfsync_tdb *pt) /* check for invalid values */ if (ntohl(pt->spi) <= SPI_RESERVED_MAX || (pt->dst.sa.sa_family != AF_INET && - pt->dst.sa.sa_family != AF_INET6)) + pt->dst.sa.sa_family != AF_INET6)) goto bad; - s = spltdb(); tdb = gettdb(pt->spi, &pt->dst, pt->sproto); if (tdb) { pt->rpl = ntohl(pt->rpl); - pt->cur_bytes = betoh64(pt->cur_bytes); + pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); /* Neither replay nor byte counter should ever decrease. */ if (pt->rpl < tdb->tdb_rpl || pt->cur_bytes < tdb->tdb_cur_bytes) { - splx(s); goto bad; } tdb->tdb_rpl = pt->rpl; tdb->tdb_cur_bytes = pt->cur_bytes; } - splx(s); return; bad: -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " "invalid value\n"); V_pfsyncstats.pfsyncs_badstate++; @@ -1724,7 +1226,7 @@ bad: #endif -int +static int pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { /* check if we are at the right place in the packet */ @@ -1736,7 +1238,7 @@ pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) return (-1); } -int +static int pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { V_pfsyncstats.pfsyncs_badact++; @@ -1745,51 +1247,31 @@ pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) return (-1); } -int +static int pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, -#ifdef __FreeBSD__ struct route *rt) -#else - struct rtentry *rt) -#endif { m_freem(m); return (0); } /* ARGSUSED */ -int +static int pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { -#ifndef __FreeBSD__ - struct proc *p = curproc; -#endif struct pfsync_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; - struct ip_moptions *imo = &sc->sc_imo; struct pfsyncreq pfsyncr; - struct ifnet *sifp; - struct ip *ip; - int s, error; + int error; switch (cmd) { -#if 0 - case SIOCSIFADDR: - case SIOCAIFADDR: - case SIOCSIFDSTADDR: -#endif case SIOCSIFFLAGS: -#ifdef __FreeBSD__ + PFSYNC_LOCK(sc); if (ifp->if_flags & IFF_UP) ifp->if_drv_flags |= IFF_DRV_RUNNING; else ifp->if_drv_flags &= ~IFF_DRV_RUNNING; -#else - if (ifp->if_flags & IFF_UP) - ifp->if_flags |= IFF_RUNNING; - else - ifp->if_flags &= ~IFF_RUNNING; -#endif + PFSYNC_UNLOCK(sc); break; case SIOCSIFMTU: if (!sc->sc_sync_if || @@ -1797,201 +1279,128 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ifr->ifr_mtu > sc->sc_sync_if->if_mtu) return (EINVAL); if (ifr->ifr_mtu < ifp->if_mtu) { - s = splnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - pfsync_sendout(); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); + PFSYNC_LOCK(sc); + if (sc->sc_len > PFSYNC_MINPKT) + pfsync_sendout(1); + PFSYNC_UNLOCK(sc); } ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); + PFSYNC_LOCK(sc); if (sc->sc_sync_if) { strlcpy(pfsyncr.pfsyncr_syncdev, sc->sc_sync_if->if_xname, IFNAMSIZ); } pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; - pfsyncr.pfsyncr_defer = sc->sc_defer; + pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == + (sc->sc_flags & PFSYNCF_DEFER)); + PFSYNC_UNLOCK(sc); return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); case SIOCSETPFSYNC: -#ifdef __FreeBSD__ + { + struct ip_moptions *imo = &sc->sc_imo; + struct ifnet *sifp; + struct ip *ip; + void *mship = NULL; + if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) -#else - if ((error = suser(p, p->p_acflag)) != 0) -#endif return (error); if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) return (error); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif + if (pfsyncr.pfsyncr_maxupdates > 255) + return (EINVAL); + + if (pfsyncr.pfsyncr_syncdev[0] == 0) + sifp = NULL; + else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) + return (EINVAL); + + if (pfsyncr.pfsyncr_syncpeer.s_addr == 0 && sifp != NULL) + mship = malloc((sizeof(struct in_multi *) * + IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); + + PFSYNC_LOCK(sc); if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) -#ifdef __FreeBSD__ sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); -#else - sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; -#endif else sc->sc_sync_peer.s_addr = pfsyncr.pfsyncr_syncpeer.s_addr; - if (pfsyncr.pfsyncr_maxupdates > 255) -#ifdef __FreeBSD__ - { - PF_UNLOCK(); -#endif - return (EINVAL); -#ifdef __FreeBSD__ - } -#endif sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; - sc->sc_defer = pfsyncr.pfsyncr_defer; + if (pfsyncr.pfsyncr_defer) { + sc->sc_flags |= PFSYNCF_DEFER; + pfsync_defer_ptr = pfsync_defer; + } else { + sc->sc_flags &= ~PFSYNCF_DEFER; + pfsync_defer_ptr = NULL; + } - if (pfsyncr.pfsyncr_syncdev[0] == 0) { + if (sifp == NULL) { + if (sc->sc_sync_if) + if_rele(sc->sc_sync_if); sc->sc_sync_if = NULL; -#ifdef __FreeBSD__ - PF_UNLOCK(); if (imo->imo_membership) pfsync_multicast_cleanup(sc); -#else - if (imo->imo_num_memberships > 0) { - in_delmulti(imo->imo_membership[ - --imo->imo_num_memberships]); - imo->imo_multicast_ifp = NULL; - } -#endif + PFSYNC_UNLOCK(sc); break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) - return (EINVAL); - -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - s = splnet(); -#ifdef __FreeBSD__ - if (sifp->if_mtu < sc->sc_ifp->if_mtu || -#else - if (sifp->if_mtu < sc->sc_if.if_mtu || -#endif + if (sc->sc_len > PFSYNC_MINPKT && + (sifp->if_mtu < sc->sc_ifp->if_mtu || (sc->sc_sync_if != NULL && sifp->if_mtu < sc->sc_sync_if->if_mtu) || - sifp->if_mtu < MCLBYTES - sizeof(struct ip)) - pfsync_sendout(); - sc->sc_sync_if = sifp; + sifp->if_mtu < MCLBYTES - sizeof(struct ip))) + pfsync_sendout(1); -#ifdef __FreeBSD__ - if (imo->imo_membership) { - PF_UNLOCK(); + if (imo->imo_membership) pfsync_multicast_cleanup(sc); - PF_LOCK(); - } -#else - if (imo->imo_num_memberships > 0) { - in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); - imo->imo_multicast_ifp = NULL; - } -#endif -#ifdef __FreeBSD__ - if (sc->sc_sync_if && - sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { - PF_UNLOCK(); - error = pfsync_multicast_setup(sc); - if (error) + if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { + error = pfsync_multicast_setup(sc, sifp, mship); + if (error) { + if_rele(sifp); + free(mship, M_PFSYNC); return (error); - PF_LOCK(); - } -#else - if (sc->sc_sync_if && - sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { - struct in_addr addr; - - if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { - sc->sc_sync_if = NULL; - splx(s); - return (EADDRNOTAVAIL); } - - addr.s_addr = INADDR_PFSYNC_GROUP; - - if ((imo->imo_membership[0] = - in_addmulti(&addr, sc->sc_sync_if)) == NULL) { - sc->sc_sync_if = NULL; - splx(s); - return (ENOBUFS); - } - imo->imo_num_memberships++; - imo->imo_multicast_ifp = sc->sc_sync_if; - imo->imo_multicast_ttl = PFSYNC_DFLTTL; - imo->imo_multicast_loop = 0; } -#endif /* !__FreeBSD__ */ + if (sc->sc_sync_if) + if_rele(sc->sc_sync_if); + sc->sc_sync_if = sifp; ip = &sc->sc_template; bzero(ip, sizeof(*ip)); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(sc->sc_template) >> 2; ip->ip_tos = IPTOS_LOWDELAY; - /* len and id are set later */ -#ifdef __FreeBSD__ + /* len and id are set later. */ ip->ip_off = IP_DF; -#else - ip->ip_off = htons(IP_DF); -#endif ip->ip_ttl = PFSYNC_DFLTTL; ip->ip_p = IPPROTO_PFSYNC; ip->ip_src.s_addr = INADDR_ANY; ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; - if (sc->sc_sync_if) { - /* Request a full state table update. */ - sc->sc_ureq_sent = time_uptime; -#ifdef __FreeBSD__ - if (sc->pfsync_sync_ok && carp_demote_adj_p) - (*carp_demote_adj_p)(V_pfsync_carp_adj, - "pfsync bulk start"); - sc->pfsync_sync_ok = 0; -#else -#if NCARP > 0 - if (pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, 1); -#endif - pfsync_sync_ok = 0; -#endif -#ifdef __FreeBSD__ - if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif - printf("pfsync: requesting bulk update\n"); -#ifdef __FreeBSD__ - callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, - pfsync_bulk_fail, V_pfsyncif); -#else - timeout_add_sec(&sc->sc_bulkfail_tmo, 5); -#endif - pfsync_request_update(0, 0); - } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); + /* Request a full state table update. */ + if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + (*carp_demote_adj_p)(V_pfsync_carp_adj, + "pfsync bulk start"); + sc->sc_flags &= ~PFSYNCF_OK; + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: requesting bulk update\n"); + pfsync_request_update(0, 0); + PFSYNC_UNLOCK(sc); + PFSYNC_BLOCK(sc); + sc->sc_ureq_sent = time_uptime; + callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, + sc); + PFSYNC_BUNLOCK(sc); break; - + } default: return (ENOTTY); } @@ -1999,7 +1408,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (0); } -int +static int pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset) { struct pfsync_state *sp = (struct pfsync_state *)(m->m_data + offset); @@ -2009,7 +1418,7 @@ pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset) return (sizeof(*sp)); } -int +static int pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset) { struct pfsync_ins_ack *iack = @@ -2021,7 +1430,7 @@ pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset) return (sizeof(*iack)); } -int +static int pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset) { struct pfsync_upd_c *up = (struct pfsync_upd_c *)(m->m_data + offset); @@ -2036,7 +1445,7 @@ pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset) return (sizeof(*up)); } -int +static int pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset) { struct pfsync_del_c *dp = (struct pfsync_del_c *)(m->m_data + offset); @@ -2044,139 +1453,71 @@ pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset) dp->id = st->id; dp->creatorid = st->creatorid; - SET(st->state_flags, PFSTATE_NOSYNC); + st->state_flags |= PFSTATE_NOSYNC; return (sizeof(*dp)); } -void +static void pfsync_drop(struct pfsync_softc *sc) { - struct pf_state *st; + struct pf_state *st, *next; struct pfsync_upd_req_item *ur; -#ifdef notyet - struct tdb *t; -#endif int q; for (q = 0; q < PFSYNC_S_COUNT; q++) { if (TAILQ_EMPTY(&sc->sc_qs[q])) continue; - TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { -#ifdef PFSYNC_DEBUG -#ifdef __FreeBSD__ + TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { KASSERT(st->sync_state == q, ("%s: st->sync_state == q", - __FUNCTION__)); -#else - KASSERT(st->sync_state == q); -#endif -#endif + __func__)); st->sync_state = PFSYNC_S_NONE; + pf_release_state(st); } TAILQ_INIT(&sc->sc_qs[q]); } while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); - pool_put(&sc->sc_pool, ur); + free(ur, M_PFSYNC); } sc->sc_plus = NULL; - -#ifdef notyet - if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { - TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) - CLR(t->tdb_flags, TDBF_PFSYNC); - - TAILQ_INIT(&sc->sc_tdb_q); - } -#endif - sc->sc_len = PFSYNC_MINPKT; } -#ifdef __FreeBSD__ -void pfsync_sendout() -{ - pfsync_sendout1(1); -} - static void -pfsync_sendout1(int schedswi) +pfsync_sendout(int schedswi) { struct pfsync_softc *sc = V_pfsyncif; -#else -void -pfsync_sendout(void) -{ - struct pfsync_softc *sc = pfsyncif; -#endif -#if NBPFILTER > 0 -#ifdef __FreeBSD__ struct ifnet *ifp = sc->sc_ifp; -#else - struct ifnet *ifp = &sc->sc_if; -#endif -#endif struct mbuf *m; struct ip *ip; struct pfsync_header *ph; struct pfsync_subheader *subh; - struct pf_state *st; + struct pf_state *st, *next; struct pfsync_upd_req_item *ur; -#ifdef notyet - struct tdb *t; -#endif int offset; int q, count = 0; -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_NET); -#endif - - if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) - return; + KASSERT(sc != NULL, ("%s: null sc", __func__)); + KASSERT(sc->sc_len > PFSYNC_MINPKT, + ("%s: sc_len %zu", __func__, sc->sc_len)); + PFSYNC_LOCK_ASSERT(sc); -#if NBPFILTER > 0 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { -#else - if (sc->sc_sync_if == NULL) { -#endif pfsync_drop(sc); return; } -#ifdef __FreeBSD__ m = m_get2(M_NOWAIT, MT_DATA, M_PKTHDR, max_linkhdr + sc->sc_len); if (m == NULL) { sc->sc_ifp->if_oerrors++; V_pfsyncstats.pfsyncs_onomem++; return; } -#else - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == NULL) { - sc->sc_if.if_oerrors++; - pfsyncstats.pfsyncs_onomem++; - pfsync_drop(sc); - return; - } - - if (max_linkhdr + sc->sc_len > MHLEN) { - MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); - if (!ISSET(m->m_flags, M_EXT)) { - m_free(m); - sc->sc_if.if_oerrors++; - pfsyncstats.pfsyncs_onomem++; - pfsync_drop(sc); - return; - } - } -#endif m->m_data += max_linkhdr; m->m_len = m->m_pkthdr.len = sc->sc_len; @@ -2185,11 +1526,7 @@ pfsync_sendout(void) bcopy(&sc->sc_template, ip, sizeof(*ip)); offset = sizeof(*ip); -#ifdef __FreeBSD__ ip->ip_len = m->m_pkthdr.len; -#else - ip->ip_len = htons(m->m_pkthdr.len); -#endif ip->ip_id = htons(ip_randomid()); /* build the pfsync header */ @@ -2199,11 +1536,7 @@ pfsync_sendout(void) ph->version = PFSYNC_VERSION; ph->len = htons(sc->sc_len - sizeof(*ip)); -#ifdef __FreeBSD__ bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); -#else - bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); -#endif /* walk the queues */ for (q = 0; q < PFSYNC_S_COUNT; q++) { @@ -2214,19 +1547,17 @@ pfsync_sendout(void) offset += sizeof(*subh); count = 0; - TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { -#ifdef PFSYNC_DEBUG -#ifdef __FreeBSD__ + TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { KASSERT(st->sync_state == q, ("%s: st->sync_state == q", - __FUNCTION__)); -#else - KASSERT(st->sync_state == q); -#endif -#endif - + __func__)); + /* + * XXXGL: some of write methods do unlocked reads + * of state data :( + */ offset += pfsync_qs[q].write(st, m, offset); st->sync_state = PFSYNC_S_NONE; + pf_release_state(st); count++; } TAILQ_INIT(&sc->sc_qs[q]); @@ -2234,6 +1565,7 @@ pfsync_sendout(void) bzero(subh, sizeof(*subh)); subh->action = pfsync_qs[q].action; subh->count = htons(count); + V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; } if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { @@ -2247,15 +1579,14 @@ pfsync_sendout(void) bcopy(&ur->ur_msg, m->m_data + offset, sizeof(ur->ur_msg)); offset += sizeof(ur->ur_msg); - - pool_put(&sc->sc_pool, ur); - + free(ur, M_PFSYNC); count++; } bzero(subh, sizeof(*subh)); subh->action = PFSYNC_ACT_UPD_REQ; subh->count = htons(count); + V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; } /* has someone built a custom region for us to add? */ @@ -2266,45 +1597,21 @@ pfsync_sendout(void) sc->sc_plus = NULL; } -#ifdef notyet - if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { - subh = (struct pfsync_subheader *)(m->m_data + offset); - offset += sizeof(*subh); - - count = 0; - TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { - offset += pfsync_out_tdb(t, m, offset); - CLR(t->tdb_flags, TDBF_PFSYNC); - - count++; - } - TAILQ_INIT(&sc->sc_tdb_q); - - bzero(subh, sizeof(*subh)); - subh->action = PFSYNC_ACT_TDB; - subh->count = htons(count); - } -#endif - subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); bzero(subh, sizeof(*subh)); subh->action = PFSYNC_ACT_EOF; subh->count = htons(1); + V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; /* XXX write checksum in EOF here */ /* we're done, let's put it on the wire */ -#if NBPFILTER > 0 if (ifp->if_bpf) { m->m_data += sizeof(*ip); m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); -#ifdef __FreeBSD__ BPF_MTAP(ifp, m); -#else - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); -#endif m->m_data -= sizeof(*ip); m->m_len = m->m_pkthdr.len = sc->sc_len; } @@ -2314,9 +1621,7 @@ pfsync_sendout(void) m_freem(m); return; } -#endif -#ifdef __FreeBSD__ sc->sc_ifp->if_opackets++; sc->sc_ifp->if_obytes += m->m_pkthdr.len; sc->sc_len = PFSYNC_MINPKT; @@ -2325,241 +1630,170 @@ pfsync_sendout(void) _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); else { m_freem(m); - sc->sc_ifp->if_snd.ifq_drops++; + sc->sc_ifp->if_snd.ifq_drops++; } if (schedswi) swi_sched(V_pfsync_swi_cookie, 0); -#else - sc->sc_if.if_opackets++; - sc->sc_if.if_obytes += m->m_pkthdr.len; - - if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0) - pfsyncstats.pfsyncs_opackets++; - else - pfsyncstats.pfsyncs_oerrors++; - - /* start again */ - sc->sc_len = PFSYNC_MINPKT; -#endif } -void +static void pfsync_insert_state(struct pf_state *st) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif + if (st->state_flags & PFSTATE_NOSYNC) + return; - if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || + if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { - SET(st->state_flags, PFSTATE_NOSYNC); + st->state_flags |= PFSTATE_NOSYNC; return; } - if (sc == NULL || ISSET(st->state_flags, PFSTATE_NOSYNC)) - return; - -#ifdef PFSYNC_DEBUG -#ifdef __FreeBSD__ KASSERT(st->sync_state == PFSYNC_S_NONE, - ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__)); -#else - KASSERT(st->sync_state == PFSYNC_S_NONE); -#endif -#endif + ("%s: st->sync_state == PFSYNC_S_NONE", __func__)); + PFSYNC_LOCK(sc); if (sc->sc_len == PFSYNC_MINPKT) -#ifdef __FreeBSD__ - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, - V_pfsyncif); -#else - timeout_add_sec(&sc->sc_tmo, 1); -#endif + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); pfsync_q_ins(st, PFSYNC_S_INS); + PFSYNC_UNLOCK(sc); st->sync_updates = 0; } -int defer = 10; - -int +static int pfsync_defer(struct pf_state *st, struct mbuf *m) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif struct pfsync_deferral *pd; -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif + if (m->m_flags & (M_BCAST|M_MCAST)) + return (0); + + PFSYNC_LOCK(sc); - if (!sc->sc_defer || m->m_flags & (M_BCAST|M_MCAST)) + if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || + !(sc->sc_flags & PFSYNCF_DEFER)) { + PFSYNC_UNLOCK(sc); return (0); + } - if (sc->sc_deferred >= 128) + if (sc->sc_deferred >= 128) pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); - pd = pool_get(&sc->sc_pool, M_NOWAIT); + pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); if (pd == NULL) return (0); sc->sc_deferred++; -#ifdef __FreeBSD__ m->m_flags |= M_SKIP_FIREWALL; -#else - m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; -#endif - SET(st->state_flags, PFSTATE_ACK); + st->state_flags |= PFSTATE_ACK; + pd->pd_sc = sc; + pd->pd_refs = 0; pd->pd_st = st; + pf_ref_state(st); pd->pd_m = m; TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); -#ifdef __FreeBSD__ - callout_init(&pd->pd_tmo, CALLOUT_MPSAFE); - callout_reset(&pd->pd_tmo, defer, pfsync_defer_tmo, - pd); -#else - timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd); - timeout_add(&pd->pd_tmo, defer); -#endif + callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); + callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); - swi_sched(V_pfsync_swi_cookie, 0); + pfsync_push(sc); return (1); } -void +static void pfsync_undefer(struct pfsync_deferral *pd, int drop) { -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - int s; + struct pfsync_softc *sc = pd->pd_sc; + struct mbuf *m = pd->pd_m; + struct pf_state *st = pd->pd_st; -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif + PFSYNC_LOCK_ASSERT(sc); TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); sc->sc_deferred--; + pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ + free(pd, M_PFSYNC); + pf_release_state(st); - CLR(pd->pd_st->state_flags, PFSTATE_ACK); - timeout_del(&pd->pd_tmo); /* bah */ if (drop) - m_freem(pd->pd_m); + m_freem(m); else { - s = splnet(); -#ifdef __FreeBSD__ - /* XXX: use pf_defered?! */ - PF_UNLOCK(); -#endif - ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0, - (void *)NULL, (void *)NULL); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - splx(s); + _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); + pfsync_push(sc); } - - pool_put(&sc->sc_pool, pd); } -void +static void pfsync_defer_tmo(void *arg) { -#if defined(__FreeBSD__) && defined(VIMAGE) struct pfsync_deferral *pd = arg; -#endif - int s; + struct pfsync_softc *sc = pd->pd_sc; + struct mbuf *m = pd->pd_m; + struct pf_state *st = pd->pd_st; + + PFSYNC_LOCK_ASSERT(sc); + + CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); + + TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); + sc->sc_deferred--; + pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ + if (pd->pd_refs == 0) + free(pd, M_PFSYNC); + PFSYNC_UNLOCK(sc); + + ip_output(m, NULL, NULL, 0, NULL, NULL); + + pf_release_state(st); - s = splsoftnet(); -#ifdef __FreeBSD__ - CURVNET_SET(pd->pd_m->m_pkthdr.rcvif->if_vnet); /* XXX */ - PF_LOCK(); -#endif - pfsync_undefer(arg, 0); -#ifdef __FreeBSD__ - PF_UNLOCK(); CURVNET_RESTORE(); -#endif - splx(s); } -void -pfsync_deferred(struct pf_state *st, int drop) +static void +pfsync_undefer_state(struct pf_state *st, int drop) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif struct pfsync_deferral *pd; + PFSYNC_LOCK_ASSERT(sc); + TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { if (pd->pd_st == st) { - pfsync_undefer(pd, drop); + if (callout_stop(&pd->pd_tmo)) + pfsync_undefer(pd, drop); return; } } - panic("pfsync_send_deferred: unable to find deferred state"); + panic("%s: unable to find deferred state", __func__); } -u_int pfsync_upds = 0; - -void +static void pfsync_update_state(struct pf_state *st) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif int sync = 0; -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif + PF_STATE_LOCK_ASSERT(st); + PFSYNC_LOCK(sc); - if (sc == NULL) - return; - - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 0); - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->state_flags & PFSTATE_ACK) + pfsync_undefer_state(st, 0); + if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st); + PFSYNC_UNLOCK(sc); return; } if (sc->sc_len == PFSYNC_MINPKT) -#ifdef __FreeBSD__ - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, - V_pfsyncif); -#else - timeout_add_sec(&sc->sc_tmo, 1); -#endif + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); switch (st->sync_state) { case PFSYNC_S_UPD_C: @@ -2582,40 +1816,31 @@ pfsync_update_state(struct pf_state *st) break; default: - panic("pfsync_update_state: unexpected sync state %d", - st->sync_state); + panic("%s: unexpected sync state %d", __func__, st->sync_state); } - if (sync || (time_uptime - st->pfsync_time) < 2) { - pfsync_upds++; - schednetisr(NETISR_PFSYNC); - } + if (sync || (time_uptime - st->pfsync_time) < 2) + pfsync_push(sc); + + PFSYNC_UNLOCK(sc); } -void +static void pfsync_request_update(u_int32_t creatorid, u_int64_t id) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif struct pfsync_upd_req_item *item; size_t nlen = sizeof(struct pfsync_upd_req); - int s; - PF_LOCK_ASSERT(); + PFSYNC_LOCK_ASSERT(sc); /* - * this code does nothing to prevent multiple update requests for the + * This code does nothing to prevent multiple update requests for the * same state being generated. */ - - item = pool_get(&sc->sc_pool, PR_NOWAIT); - if (item == NULL) { - /* XXX stats */ - return; - } + item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); + if (item == NULL) + return; /* XXX stats */ item->ur_msg.id = id; item->ur_msg.creatorid = creatorid; @@ -2623,14 +1848,8 @@ pfsync_request_update(u_int32_t creatorid, u_int64_t id) if (TAILQ_EMPTY(&sc->sc_upd_req_list)) nlen += sizeof(struct pfsync_subheader); -#ifdef __FreeBSD__ if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { -#else - if (sc->sc_len + nlen > sc->sc_if.if_mtu) { -#endif - s = splnet(); - pfsync_sendout(); - splx(s); + pfsync_sendout(1); nlen = sizeof(struct pfsync_subheader) + sizeof(struct pfsync_upd_req); @@ -2639,26 +1858,21 @@ pfsync_request_update(u_int32_t creatorid, u_int64_t id) TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); sc->sc_len += nlen; - schednetisr(NETISR_PFSYNC); + pfsync_push(sc); } -void +static void pfsync_update_state_req(struct pf_state *st) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - - PF_LOCK_ASSERT(); - if (sc == NULL) - panic("pfsync_update_state_req: nonexistant instance"); + PF_STATE_LOCK_ASSERT(st); + PFSYNC_LOCK(sc); - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st); + PFSYNC_UNLOCK(sc); return; } @@ -2668,60 +1882,45 @@ pfsync_update_state_req(struct pf_state *st) pfsync_q_del(st); case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD); - schednetisr(NETISR_PFSYNC); - return; + pfsync_push(sc); + break; case PFSYNC_S_INS: case PFSYNC_S_UPD: case PFSYNC_S_DEL: /* we're already handling it */ - return; + break; default: - panic("pfsync_update_state_req: unexpected sync state %d", - st->sync_state); + panic("%s: unexpected sync state %d", __func__, st->sync_state); } + + PFSYNC_UNLOCK(sc); } -void +static void pfsync_delete_state(struct pf_state *st) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif - - if (sc == NULL) - return; - - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 1); - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + PFSYNC_LOCK(sc); + if (st->state_flags & PFSTATE_ACK) + pfsync_undefer_state(st, 1); + if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st); + PFSYNC_UNLOCK(sc); return; } if (sc->sc_len == PFSYNC_MINPKT) -#ifdef __FreeBSD__ - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, - V_pfsyncif); -#else - timeout_add_sec(&sc->sc_tmo, 1); -#endif + callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); switch (st->sync_state) { case PFSYNC_S_INS: - /* we never got to tell the world so just forget about it */ + /* We never got to tell the world so just forget about it. */ pfsync_q_del(st); - return; + break; case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: @@ -2731,87 +1930,55 @@ pfsync_delete_state(struct pf_state *st) case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_DEL); - return; + break; default: - panic("pfsync_delete_state: unexpected sync state %d", - st->sync_state); + panic("%s: unexpected sync state %d", __func__, st->sync_state); } + PFSYNC_UNLOCK(sc); } -void +static void pfsync_clear_states(u_int32_t creatorid, const char *ifname) { + struct pfsync_softc *sc = V_pfsyncif; struct { struct pfsync_subheader subh; struct pfsync_clr clr; } __packed r; -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#else - splassert(IPL_SOFTNET); -#endif - - if (sc == NULL) - return; - bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_CLR; r.subh.count = htons(1); + V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); r.clr.creatorid = creatorid; + PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); + PFSYNC_UNLOCK(sc); } -void +static void pfsync_q_ins(struct pf_state *st, int q) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif size_t nlen = pfsync_qs[q].len; - int s; - PF_LOCK_ASSERT(); + PFSYNC_LOCK_ASSERT(sc); -#ifdef __FreeBSD__ KASSERT(st->sync_state == PFSYNC_S_NONE, - ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__)); -#else - KASSERT(st->sync_state == PFSYNC_S_NONE); -#endif + ("%s: st->sync_state == PFSYNC_S_NONE", __func__)); + KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", + sc->sc_len)); -#if 1 || defined(PFSYNC_DEBUG) - if (sc->sc_len < PFSYNC_MINPKT) -#ifdef __FreeBSD__ - panic("pfsync pkt len is too low %zu", sc->sc_len); -#else - panic("pfsync pkt len is too low %d", sc->sc_len); -#endif -#endif if (TAILQ_EMPTY(&sc->sc_qs[q])) nlen += sizeof(struct pfsync_subheader); -#ifdef __FreeBSD__ if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { -#else - if (sc->sc_len + nlen > sc->sc_if.if_mtu) { -#endif - s = splnet(); - pfsync_sendout(); - splx(s); + pfsync_sendout(1); nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; } @@ -2819,234 +1986,112 @@ pfsync_q_ins(struct pf_state *st, int q) sc->sc_len += nlen; TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); st->sync_state = q; + pf_ref_state(st); } -void +static void pfsync_q_del(struct pf_state *st) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif int q = st->sync_state; -#ifdef __FreeBSD__ + PFSYNC_LOCK_ASSERT(sc); KASSERT(st->sync_state != PFSYNC_S_NONE, - ("%s: st->sync_state != PFSYNC_S_NONE", __FUNCTION__)); -#else - KASSERT(st->sync_state != PFSYNC_S_NONE); -#endif + ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); sc->sc_len -= pfsync_qs[q].len; TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); st->sync_state = PFSYNC_S_NONE; + pf_release_state(st); if (TAILQ_EMPTY(&sc->sc_qs[q])) sc->sc_len -= sizeof(struct pfsync_subheader); } -#ifdef notyet -void -pfsync_update_tdb(struct tdb *t, int output) -{ -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - size_t nlen = sizeof(struct pfsync_tdb); - int s; - - if (sc == NULL) - return; - - if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { - if (TAILQ_EMPTY(&sc->sc_tdb_q)) - nlen += sizeof(struct pfsync_subheader); - - if (sc->sc_len + nlen > sc->sc_if.if_mtu) { - s = splnet(); - PF_LOCK(); - pfsync_sendout(); - PF_UNLOCK(); - splx(s); - - nlen = sizeof(struct pfsync_subheader) + - sizeof(struct pfsync_tdb); - } - - sc->sc_len += nlen; - TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); - SET(t->tdb_flags, TDBF_PFSYNC); - t->tdb_updates = 0; - } else { - if (++t->tdb_updates >= sc->sc_maxupdates) - schednetisr(NETISR_PFSYNC); - } - - if (output) - SET(t->tdb_flags, TDBF_PFSYNC_RPL); - else - CLR(t->tdb_flags, TDBF_PFSYNC_RPL); -} - -void -pfsync_delete_tdb(struct tdb *t) -{ -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - - if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) - return; - - sc->sc_len -= sizeof(struct pfsync_tdb); - TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); - CLR(t->tdb_flags, TDBF_PFSYNC); - - if (TAILQ_EMPTY(&sc->sc_tdb_q)) - sc->sc_len -= sizeof(struct pfsync_subheader); -} - -int -pfsync_out_tdb(struct tdb *t, struct mbuf *m, int offset) -{ - struct pfsync_tdb *ut = (struct pfsync_tdb *)(m->m_data + offset); - - bzero(ut, sizeof(*ut)); - ut->spi = t->tdb_spi; - bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); - /* - * When a failover happens, the master's rpl is probably above - * what we see here (we may be up to a second late), so - * increase it a bit for outbound tdbs to manage most such - * situations. - * - * For now, just add an offset that is likely to be larger - * than the number of packets we can see in one second. The RFC - * just says the next packet must have a higher seq value. - * - * XXX What is a good algorithm for this? We could use - * a rate-determined increase, but to know it, we would have - * to extend struct tdb. - * XXX pt->rpl can wrap over MAXINT, but if so the real tdb - * will soon be replaced anyway. For now, just don't handle - * this edge case. - */ -#define RPL_INCR 16384 - ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? - RPL_INCR : 0)); - ut->cur_bytes = htobe64(t->tdb_cur_bytes); - ut->sproto = t->tdb_sproto; - - return (sizeof(*ut)); -} -#endif - -void +static void pfsync_bulk_start(void) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync: received bulk update request\n"); -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); - if (TAILQ_EMPTY(&V_state_list)) -#else - if (TAILQ_EMPTY(&state_list)) -#endif - pfsync_bulk_status(PFSYNC_BUS_END); - else { - sc->sc_ureq_received = time_uptime; - if (sc->sc_bulk_next == NULL) -#ifdef __FreeBSD__ - sc->sc_bulk_next = TAILQ_FIRST(&V_state_list); -#else - sc->sc_bulk_next = TAILQ_FIRST(&state_list); -#endif - sc->sc_bulk_last = sc->sc_bulk_next; + PFSYNC_BLOCK(sc); - pfsync_bulk_status(PFSYNC_BUS_START); - callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); - } + sc->sc_ureq_received = time_uptime; + sc->sc_bulk_hashid = 0; + sc->sc_bulk_stateid = 0; + pfsync_bulk_status(PFSYNC_BUS_START); + callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); + PFSYNC_BUNLOCK(sc); } -void +static void pfsync_bulk_update(void *arg) { struct pfsync_softc *sc = arg; - struct pf_state *st = sc->sc_bulk_next; - int i = 0; - int s; + struct pf_state *s; + int i, sent = 0; - PF_LOCK_ASSERT(); - - s = splsoftnet(); -#ifdef __FreeBSD__ + PFSYNC_BLOCK_ASSERT(sc); CURVNET_SET(sc->sc_ifp->if_vnet); -#endif - for (;;) { - if (st->sync_state == PFSYNC_S_NONE && - st->timeout < PFTM_MAX && - st->pfsync_time <= sc->sc_ureq_received) { - pfsync_update_state_req(st); - i++; - } - st = TAILQ_NEXT(st, entry_list); - if (st == NULL) -#ifdef __FreeBSD__ - st = TAILQ_FIRST(&V_state_list); -#else - st = TAILQ_FIRST(&state_list); -#endif + /* + * Start with last state from previous invocation. + * It may had gone, in this case start from the + * hash slot. + */ + s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); - if (st == sc->sc_bulk_last) { - /* we're done */ - sc->sc_bulk_next = NULL; - sc->sc_bulk_last = NULL; - pfsync_bulk_status(PFSYNC_BUS_END); - break; + if (s != NULL) + i = PF_IDHASH(s); + else + i = sc->sc_bulk_hashid; + + for (; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + + if (s != NULL) + PF_HASHROW_ASSERT(ih); + else { + PF_HASHROW_LOCK(ih); + s = LIST_FIRST(&ih->states); } -#ifdef __FreeBSD__ - if (i > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < -#else - if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < -#endif - sizeof(struct pfsync_state)) { - /* we've filled a packet */ - sc->sc_bulk_next = st; -#ifdef __FreeBSD__ - callout_reset(&sc->sc_bulk_tmo, 1, - pfsync_bulk_update, sc); -#else - timeout_add(&sc->sc_bulk_tmo, 1); -#endif - break; + for (; s; s = LIST_NEXT(s, entry)) { + + if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < + sizeof(struct pfsync_state)) { + /* We've filled a packet. */ + sc->sc_bulk_hashid = i; + sc->sc_bulk_stateid = s->id; + sc->sc_bulk_creatorid = s->creatorid; + PF_HASHROW_UNLOCK(ih); + callout_reset(&sc->sc_bulk_tmo, 1, + pfsync_bulk_update, sc); + goto full; + } + + if (s->sync_state == PFSYNC_S_NONE && + s->timeout < PFTM_MAX && + s->pfsync_time <= sc->sc_ureq_received) { + PFSYNC_LOCK(sc); + pfsync_update_state_req(s); + PFSYNC_UNLOCK(sc); + sent++; + } } + PF_HASHROW_UNLOCK(ih); } -#ifdef __FreeBSD__ + /* We're done. */ + pfsync_bulk_status(PFSYNC_BUS_END); + +full: CURVNET_RESTORE(); -#endif - splx(s); } -void +static void pfsync_bulk_status(u_int8_t status) { struct { @@ -3054,268 +2099,151 @@ pfsync_bulk_status(u_int8_t status) struct pfsync_bus bus; } __packed r; -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - - PF_LOCK_ASSERT(); bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_BUS; r.subh.count = htons(1); + V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; -#ifdef __FreeBSD__ r.bus.creatorid = V_pf_status.hostid; -#else - r.bus.creatorid = pf_status.hostid; -#endif r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); r.bus.status = status; + PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); + PFSYNC_UNLOCK(sc); } -void +static void pfsync_bulk_fail(void *arg) { struct pfsync_softc *sc = arg; -#ifdef __FreeBSD__ CURVNET_SET(sc->sc_ifp->if_vnet); -#endif + + PFSYNC_BLOCK_ASSERT(sc); if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { /* Try again */ -#ifdef __FreeBSD__ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, V_pfsyncif); -#else - timeout_add_sec(&sc->sc_bulkfail_tmo, 5); -#endif - PF_LOCK(); + PFSYNC_LOCK(sc); pfsync_request_update(0, 0); - PF_UNLOCK(); + PFSYNC_UNLOCK(sc); } else { - /* Pretend like the transfer was ok */ + /* Pretend like the transfer was ok. */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; -#ifdef __FreeBSD__ - if (!sc->pfsync_sync_ok && carp_demote_adj_p) + PFSYNC_LOCK(sc); + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk fail"); - sc->pfsync_sync_ok = 1; -#else -#if NCARP > 0 - if (!pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, -1); -#endif - pfsync_sync_ok = 1; -#endif -#ifdef __FreeBSD__ + sc->sc_flags |= PFSYNCF_OK; + PFSYNC_UNLOCK(sc); if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif printf("pfsync: failed to receive bulk update\n"); } -#ifdef __FreeBSD__ CURVNET_RESTORE(); -#endif } -void +static void pfsync_send_plus(void *plus, size_t pluslen) { -#ifdef __FreeBSD__ struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - int s; - PF_LOCK_ASSERT(); + PFSYNC_LOCK_ASSERT(sc); -#ifdef __FreeBSD__ - if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) { -#else - if (sc->sc_len + pluslen > sc->sc_if.if_mtu) { -#endif - s = splnet(); - pfsync_sendout(); - splx(s); - } + if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) + pfsync_sendout(1); sc->sc_plus = plus; sc->sc_len += (sc->sc_pluslen = pluslen); - s = splnet(); - pfsync_sendout(); - splx(s); -} - -int -pfsync_up(void) -{ -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - -#ifdef __FreeBSD__ - if (sc == NULL || !ISSET(sc->sc_ifp->if_flags, IFF_DRV_RUNNING)) -#else - if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) -#endif - return (0); - - return (1); -} - -int -pfsync_state_in_use(struct pf_state *st) -{ -#ifdef __FreeBSD__ - struct pfsync_softc *sc = V_pfsyncif; -#else - struct pfsync_softc *sc = pfsyncif; -#endif - - if (sc == NULL) - return (0); - - if (st->sync_state != PFSYNC_S_NONE || - st == sc->sc_bulk_next || - st == sc->sc_bulk_last) - return (1); - - return (0); + pfsync_sendout(1); } -u_int pfsync_ints; -u_int pfsync_tmos; - -void +static void pfsync_timeout(void *arg) { -#if defined(__FreeBSD__) && defined(VIMAGE) struct pfsync_softc *sc = arg; -#endif - int s; -#ifdef __FreeBSD__ CURVNET_SET(sc->sc_ifp->if_vnet); -#endif + PFSYNC_LOCK(sc); + pfsync_push(sc); + PFSYNC_UNLOCK(sc); + CURVNET_RESTORE(); +} - pfsync_tmos++; +static void +pfsync_push(struct pfsync_softc *sc) +{ - s = splnet(); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - pfsync_sendout(); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - splx(s); + PFSYNC_LOCK_ASSERT(sc); -#ifdef __FreeBSD__ - CURVNET_RESTORE(); -#endif + sc->sc_flags |= PFSYNCF_PUSH; + swi_sched(V_pfsync_swi_cookie, 0); } -/* this is a softnet/netisr handler */ -void -#ifdef __FreeBSD__ +static void pfsyncintr(void *arg) { struct pfsync_softc *sc = arg; struct mbuf *m, *n; CURVNET_SET(sc->sc_ifp->if_vnet); - pfsync_ints++; - PF_LOCK(); - if (sc->sc_len > PFSYNC_MINPKT) - pfsync_sendout1(0); + PFSYNC_LOCK(sc); + if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { + pfsync_sendout(0); + sc->sc_flags &= ~PFSYNCF_PUSH; + } _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); - PF_UNLOCK(); + PFSYNC_UNLOCK(sc); for (; m != NULL; m = n) { n = m->m_nextpkt; m->m_nextpkt = NULL; - if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) - == 0) + + /* + * We distinguish between a deferral packet and our + * own pfsync packet based on M_SKIP_FIREWALL + * flag. This is XXX. + */ + if (m->m_flags & M_SKIP_FIREWALL) + ip_output(m, NULL, NULL, 0, NULL, NULL); + else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, + NULL) == 0) V_pfsyncstats.pfsyncs_opackets++; else V_pfsyncstats.pfsyncs_oerrors++; } CURVNET_RESTORE(); } -#else -pfsyncintr(void) -{ - int s; - - pfsync_ints++; - - s = splnet(); - pfsync_sendout(); - splx(s); -} -#endif -int -pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, - size_t newlen) -{ - -#ifdef notyet - /* All sysctl names at this level are terminal. */ - if (namelen != 1) - return (ENOTDIR); - - switch (name[0]) { - case PFSYNCCTL_STATS: - if (newp != NULL) - return (EPERM); - return (sysctl_struct(oldp, oldlenp, newp, newlen, - &V_pfsyncstats, sizeof(V_pfsyncstats))); - } -#endif - return (ENOPROTOOPT); -} - -#ifdef __FreeBSD__ static int -pfsync_multicast_setup(struct pfsync_softc *sc) +pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) { struct ip_moptions *imo = &sc->sc_imo; int error; - if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { - sc->sc_sync_if = NULL; + if (!(ifp->if_flags & IFF_MULTICAST)) return (EADDRNOTAVAIL); - } - imo->imo_membership = (struct in_multi **)malloc( - (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC, - M_WAITOK | M_ZERO); + imo->imo_membership = (struct in_multi **)mship; imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; imo->imo_multicast_vif = -1; - if ((error = in_joingroup(sc->sc_sync_if, &sc->sc_sync_peer, NULL, + if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, &imo->imo_membership[0])) != 0) { - free(imo->imo_membership, M_PFSYNC); + imo->imo_membership = NULL; return (error); } imo->imo_num_memberships++; - imo->imo_multicast_ifp = sc->sc_sync_if; + imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; @@ -3377,16 +2305,14 @@ pfsync_init() goto fail; } #endif - PF_LOCK(); + PF_RULES_WLOCK(); pfsync_state_import_ptr = pfsync_state_import; - pfsync_up_ptr = pfsync_up; pfsync_insert_state_ptr = pfsync_insert_state; pfsync_update_state_ptr = pfsync_update_state; pfsync_delete_state_ptr = pfsync_delete_state; pfsync_clear_states_ptr = pfsync_clear_states; - pfsync_state_in_use_ptr = pfsync_state_in_use; pfsync_defer_ptr = pfsync_defer; - PF_UNLOCK(); + PF_RULES_WUNLOCK(); return (0); @@ -3411,24 +2337,22 @@ pfsync_uninit() { VNET_ITERATOR_DECL(vnet_iter); - PF_LOCK(); + PF_RULES_WLOCK(); pfsync_state_import_ptr = NULL; - pfsync_up_ptr = NULL; pfsync_insert_state_ptr = NULL; pfsync_update_state_ptr = NULL; pfsync_delete_state_ptr = NULL; pfsync_clear_states_ptr = NULL; - pfsync_state_in_use_ptr = NULL; pfsync_defer_ptr = NULL; - PF_UNLOCK(); + PF_RULES_WUNLOCK(); ipproto_unregister(IPPROTO_PFSYNC); pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - swi_remove(V_pfsync_swi_cookie); if_clone_detach(&V_pfsync_cloner); + swi_remove(V_pfsync_swi_cookie); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); @@ -3471,4 +2395,3 @@ static moduledata_t pfsync_mod = { DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); MODULE_VERSION(pfsync, PFSYNC_MODVER); MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); -#endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/if_pfsync.h b/sys/contrib/pf/net/if_pfsync.h index 3f34038..5e71dd8 100644 --- a/sys/contrib/pf/net/if_pfsync.h +++ b/sys/contrib/pf/net/if_pfsync.h @@ -182,7 +182,7 @@ struct pfsync_del_c { u_int32_t creatorid; } __packed; -/* +/* * INS_F, DEL_F */ @@ -256,6 +256,9 @@ struct pfsyncstats { u_int64_t pfsyncs_opackets6; /* total output packets, IPv6 */ u_int64_t pfsyncs_onomem; /* no memory for an mbuf */ u_int64_t pfsyncs_oerrors; /* ip output error */ + + u_int64_t pfsyncs_iacts[PFSYNC_ACT_MAX]; + u_int64_t pfsyncs_oacts[PFSYNC_ACT_MAX]; }; /* @@ -268,10 +271,8 @@ struct pfsyncreq { int pfsyncr_defer; }; -#ifdef __FreeBSD__ #define SIOCSETPFSYNC _IOW('i', 247, struct ifreq) #define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq) -#endif #ifdef _KERNEL @@ -288,37 +289,10 @@ struct pfsyncreq { #define PFSYNC_S_DEFER 0xfe #define PFSYNC_S_NONE 0xff -#ifdef __FreeBSD__ -void pfsync_input(struct mbuf *, __unused int); -#else -void pfsync_input(struct mbuf *, ...); -#endif -int pfsync_sysctl(int *, u_int, void *, size_t *, - void *, size_t); - #define PFSYNC_SI_IOCTL 0x01 #define PFSYNC_SI_CKSUM 0x02 #define PFSYNC_SI_ACK 0x04 -int pfsync_state_import(struct pfsync_state *, u_int8_t); -#ifndef __FreeBSD__ -void pfsync_state_export(struct pfsync_state *, - struct pf_state *); -#endif - -void pfsync_insert_state(struct pf_state *); -void pfsync_update_state(struct pf_state *); -void pfsync_delete_state(struct pf_state *); -void pfsync_clear_states(u_int32_t, const char *); - -#ifdef notyet -void pfsync_update_tdb(struct tdb *, int); -void pfsync_delete_tdb(struct tdb *); -#endif - -int pfsync_defer(struct pf_state *, struct mbuf *); - -int pfsync_up(void); -int pfsync_state_in_use(struct pf_state *); -#endif + +#endif /* _KERNEL */ #endif /* _NET_IF_PFSYNC_H_ */ diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index ac51282..b66d6dd 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -35,138 +35,77 @@ * */ -#ifdef __FreeBSD__ -#include "opt_inet.h" -#include "opt_inet6.h" - #include <sys/cdefs.h> + __FBSDID("$FreeBSD$"); -#endif -#ifdef __FreeBSD__ +#include "opt_inet.h" +#include "opt_inet6.h" #include "opt_bpf.h" #include "opt_pf.h" -#define NPFSYNC 1 - -#ifdef DEV_PFLOW -#define NPFLOW DEV_PFLOW -#else -#define NPFLOW 0 -#endif - -#else -#include "bpfilter.h" -#include "pflog.h" -#include "pfsync.h" -#include "pflow.h" -#endif - #include <sys/param.h> -#include <sys/systm.h> -#include <sys/mbuf.h> -#include <sys/filio.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/kernel.h> -#include <sys/time.h> -#ifdef __FreeBSD__ -#include <sys/random.h> -#include <sys/sysctl.h> +#include <sys/bus.h> #include <sys/endian.h> -#define betoh64 be64toh -#else -#include <sys/pool.h> -#endif -#include <sys/proc.h> -#ifdef __FreeBSD__ +#include <sys/hash.h> +#include <sys/interrupt.h> +#include <sys/kernel.h> #include <sys/kthread.h> -#include <sys/lock.h> -#include <sys/sx.h> -#else -#include <sys/rwlock.h> -#endif - -#ifdef __FreeBSD__ +#include <sys/limits.h> +#include <sys/mbuf.h> #include <sys/md5.h> -#else -#include <crypto/md5.h> -#endif +#include <sys/random.h> +#include <sys/refcount.h> +#include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/taskqueue.h> +#include <sys/ucred.h> #include <net/if.h> #include <net/if_types.h> -#include <net/bpf.h> #include <net/route.h> -#ifdef __FreeBSD__ -#ifdef RADIX_MPATH -#include <net/radix_mpath.h> -#endif -#else #include <net/radix_mpath.h> -#endif +#include <net/vnet.h> -#include <netinet/in.h> +#include <net/pfvar.h> +#include <net/pf_mtag.h> +#include <net/if_pflog.h> +#include <net/if_pfsync.h> + +#include <netinet/in_pcb.h> #include <netinet/in_var.h> -#include <netinet/in_systm.h> #include <netinet/ip.h> +#include <netinet/ip_fw.h> +#include <netinet/ip_icmp.h> +#include <netinet/icmp_var.h> #include <netinet/ip_var.h> +#include <netinet/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */ #include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> #include <netinet/tcp_seq.h> -#include <netinet/udp.h> -#include <netinet/ip_icmp.h> -#include <netinet/in_pcb.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> +#include <netinet/udp.h> #include <netinet/udp_var.h> -#include <netinet/icmp_var.h> -#include <netinet/if_ether.h> -#ifdef __FreeBSD__ -#include <netinet/ip_fw.h> -#include <netinet/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */ -#endif - -#ifndef __FreeBSD__ -#include <dev/rndvar.h> -#endif -#include <net/pfvar.h> -#include <net/if_pflog.h> -#include <net/if_pflow.h> -#include <net/if_pfsync.h> #ifdef INET6 #include <netinet/ip6.h> -#include <netinet/in_pcb.h> #include <netinet/icmp6.h> #include <netinet6/nd6.h> -#ifdef __FreeBSD__ #include <netinet6/ip6_var.h> #include <netinet6/in6_pcb.h> -#endif #endif /* INET6 */ -#ifdef __FreeBSD__ #include <machine/in_cksum.h> -#include <sys/limits.h> -#include <sys/ucred.h> #include <security/mac/mac_framework.h> -extern int ip_optcopy(struct ip *, struct ip *); -#endif - -#ifdef __FreeBSD__ #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x -#else -#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x -#endif /* * Global variables */ /* state tables */ -#ifdef __FreeBSD__ -VNET_DEFINE(struct pf_state_tree, pf_statetbl); - VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]); VNET_DEFINE(struct pf_palist, pf_pabuf); VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active); @@ -196,221 +135,206 @@ struct pf_anchor_stackframe { VNET_DEFINE(struct pf_anchor_stackframe, pf_anchor_stack[64]); #define V_pf_anchor_stack VNET(pf_anchor_stack) -VNET_DEFINE(uma_zone_t, pf_src_tree_pl); -VNET_DEFINE(uma_zone_t, pf_rule_pl); -VNET_DEFINE(uma_zone_t, pf_pooladdr_pl); -VNET_DEFINE(uma_zone_t, pf_state_pl); -VNET_DEFINE(uma_zone_t, pf_state_key_pl); -VNET_DEFINE(uma_zone_t, pf_state_item_pl); -VNET_DEFINE(uma_zone_t, pf_altq_pl); -#else -struct pf_state_tree pf_statetbl; - -struct pf_altqqueue pf_altqs[2]; -struct pf_palist pf_pabuf; -struct pf_altqqueue *pf_altqs_active; -struct pf_altqqueue *pf_altqs_inactive; -struct pf_status pf_status; +/* + * Queue for pf_intr() sends. + */ +static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations"); +struct pf_send_entry { + STAILQ_ENTRY(pf_send_entry) pfse_next; + struct mbuf *pfse_m; + enum { + PFSE_IP, + PFSE_IP6, + PFSE_ICMP, + PFSE_ICMP6, + } pfse_type; + union { + struct route ro; + struct { + int type; + int code; + int mtu; + } icmpopts; + } u; +#define pfse_ro u.ro +#define pfse_icmp_type u.icmpopts.type +#define pfse_icmp_code u.icmpopts.code +#define pfse_icmp_mtu u.icmpopts.mtu +}; -u_int32_t ticket_altqs_active; -u_int32_t ticket_altqs_inactive; -int altqs_inactive_open; -u_int32_t ticket_pabuf; +STAILQ_HEAD(pf_send_head, pf_send_entry); +static VNET_DEFINE(struct pf_send_head, pf_sendqueue); +#define V_pf_sendqueue VNET(pf_sendqueue) -MD5_CTX pf_tcp_secret_ctx; -u_char pf_tcp_secret[16]; -int pf_tcp_secret_init; -int pf_tcp_iss_off; +static struct mtx pf_sendqueue_mtx; +#define PF_SENDQ_LOCK() mtx_lock(&pf_sendqueue_mtx) +#define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx) -struct pf_anchor_stackframe { - struct pf_ruleset *rs; - struct pf_rule *r; - struct pf_anchor_node *parent; - struct pf_anchor *child; -} pf_anchor_stack[64]; - -struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl; -struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; -struct pool pf_altq_pl; -#endif +/* + * Queue for pf_flush_task() tasks. + */ +struct pf_flush_entry { + SLIST_ENTRY(pf_flush_entry) next; + struct pf_addr addr; + sa_family_t af; + uint8_t dir; + struct pf_rule *rule; /* never dereferenced */ +}; -void pf_init_threshold(struct pf_threshold *, u_int32_t, +SLIST_HEAD(pf_flush_head, pf_flush_entry); +static VNET_DEFINE(struct pf_flush_head, pf_flushqueue); +#define V_pf_flushqueue VNET(pf_flushqueue) +static VNET_DEFINE(struct task, pf_flushtask); +#define V_pf_flushtask VNET(pf_flushtask) + +static struct mtx pf_flushqueue_mtx; +#define PF_FLUSHQ_LOCK() mtx_lock(&pf_flushqueue_mtx) +#define PF_FLUSHQ_UNLOCK() mtx_unlock(&pf_flushqueue_mtx) + +VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules); +struct mtx pf_unlnkdrules_mtx; + +static VNET_DEFINE(uma_zone_t, pf_sources_z); +#define V_pf_sources_z VNET(pf_sources_z) +static VNET_DEFINE(uma_zone_t, pf_mtag_z); +#define V_pf_mtag_z VNET(pf_mtag_z) +VNET_DEFINE(uma_zone_t, pf_state_z); +VNET_DEFINE(uma_zone_t, pf_state_key_z); + +VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]); +#define PFID_CPUBITS 8 +#define PFID_CPUSHIFT (sizeof(uint64_t) * NBBY - PFID_CPUBITS) +#define PFID_CPUMASK ((uint64_t)((1 << PFID_CPUBITS) - 1) << PFID_CPUSHIFT) +#define PFID_MAXID (~PFID_CPUMASK) +CTASSERT((1 << PFID_CPUBITS) > MAXCPU); + +static void pf_src_tree_remove_state(struct pf_state *); +static void pf_init_threshold(struct pf_threshold *, u_int32_t, u_int32_t); -void pf_add_threshold(struct pf_threshold *); -int pf_check_threshold(struct pf_threshold *); +static void pf_add_threshold(struct pf_threshold *); +static int pf_check_threshold(struct pf_threshold *); -void pf_change_ap(struct pf_addr *, u_int16_t *, +static void pf_change_ap(struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); -int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, +static int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *); -#ifdef INET6 -void pf_change_a6(struct pf_addr *, u_int16_t *, - struct pf_addr *, u_int8_t); -#endif /* INET6 */ -void pf_change_icmp(struct pf_addr *, u_int16_t *, +static void pf_change_icmp(struct pf_addr *, u_int16_t *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t *, u_int16_t *, u_int16_t *, u_int16_t *, u_int8_t, sa_family_t); -#ifdef __FreeBSD__ -void pf_send_tcp(struct mbuf *, +static void pf_send_tcp(struct mbuf *, const struct pf_rule *, sa_family_t, -#else -void pf_send_tcp(const struct pf_rule *, sa_family_t, -#endif const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, - u_int16_t, struct ether_header *, struct ifnet *); + u_int16_t, struct ifnet *); static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_rule *); -void pf_detach_state(struct pf_state *); -void pf_state_key_detach(struct pf_state *, int); -u_int32_t pf_tcp_iss(struct pf_pdesc *); -int pf_test_rule(struct pf_rule **, struct pf_state **, +static void pf_detach_state(struct pf_state *); +static int pf_state_key_attach(struct pf_state_key *, + struct pf_state_key *, struct pf_state *); +static void pf_state_key_detach(struct pf_state *, int); +static int pf_state_key_ctor(void *, int, void *, int); +static u_int32_t pf_tcp_iss(struct pf_pdesc *); +static int pf_test_rule(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, - void *, struct pf_pdesc *, struct pf_rule **, -#ifdef __FreeBSD__ - struct pf_ruleset **, struct ifqueue *, - struct inpcb *); -#else - struct pf_ruleset **, struct ifqueue *); -#endif -static __inline int pf_create_state(struct pf_rule *, struct pf_rule *, + struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **, struct inpcb *); +static int pf_create_state(struct pf_rule *, struct pf_rule *, struct pf_rule *, struct pf_pdesc *, struct pf_src_node *, struct pf_state_key *, - struct pf_state_key *, struct pf_state_key *, struct pf_state_key *, struct mbuf *, int, u_int16_t, u_int16_t, int *, struct pfi_kif *, struct pf_state **, int, u_int16_t, u_int16_t, int); -int pf_test_fragment(struct pf_rule **, int, +static int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **); -int pf_tcp_track_full(struct pf_state_peer *, +static int pf_tcp_track_full(struct pf_state_peer *, struct pf_state_peer *, struct pf_state **, struct pfi_kif *, struct mbuf *, int, struct pf_pdesc *, u_short *, int *); -int pf_tcp_track_sloppy(struct pf_state_peer *, +static int pf_tcp_track_sloppy(struct pf_state_peer *, struct pf_state_peer *, struct pf_state **, struct pf_pdesc *, u_short *); -int pf_test_state_tcp(struct pf_state **, int, +static int pf_test_state_tcp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); -int pf_test_state_udp(struct pf_state **, int, +static int pf_test_state_udp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *); -int pf_test_state_icmp(struct pf_state **, int, +static int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); -int pf_test_state_other(struct pf_state **, int, +static int pf_test_state_other(struct pf_state **, int, struct pfi_kif *, struct mbuf *, struct pf_pdesc *); -void pf_route(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *, - struct pf_pdesc *); -void pf_route6(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *, - struct pf_pdesc *); -#ifndef __FreeBSD__ -int pf_socket_lookup(int, struct pf_pdesc *); -#endif -u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, +static u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); -u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, +static u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, sa_family_t); -u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, +static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, u_int16_t); -void pf_set_rt_ifp(struct pf_state *, +static void pf_set_rt_ifp(struct pf_state *, struct pf_addr *); -int pf_check_proto_cksum(struct mbuf *, int, int, +static int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); -#ifndef __FreeBSD__ -struct pf_divert *pf_get_divert(struct mbuf *); -#endif -void pf_print_state_parts(struct pf_state *, +static void pf_print_state_parts(struct pf_state *, struct pf_state_key *, struct pf_state_key *); -int pf_addr_wrap_neq(struct pf_addr_wrap *, +static int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); -int pf_compare_state_keys(struct pf_state_key *, - struct pf_state_key *, struct pfi_kif *, u_int); -#ifdef __FreeBSD__ -struct pf_state *pf_find_state(struct pfi_kif *, - struct pf_state_key_cmp *, u_int, struct mbuf *, - struct pf_mtag *); -#else -struct pf_state *pf_find_state(struct pfi_kif *, - struct pf_state_key_cmp *, u_int, struct mbuf *); -#endif -int pf_src_connlimit(struct pf_state **); -int pf_check_congestion(struct ifqueue *); +static struct pf_state *pf_find_state(struct pfi_kif *, + struct pf_state_key_cmp *, u_int); +static int pf_src_connlimit(struct pf_state **); +static void pf_flush_task(void *c, int pending); +static int pf_insert_src_node(struct pf_src_node **, + struct pf_rule *, struct pf_addr *, sa_family_t); +static int pf_purge_expired_states(int); +static void pf_purge_unlinked_rules(void); +static int pf_mtag_init(void *, int, int); +static void pf_mtag_free(struct m_tag *); +#ifdef INET +static void pf_route(struct mbuf **, struct pf_rule *, int, + struct ifnet *, struct pf_state *, + struct pf_pdesc *); +#endif /* INET */ +#ifdef INET6 +static void pf_change_a6(struct pf_addr *, u_int16_t *, + struct pf_addr *, u_int8_t); +static void pf_route6(struct mbuf **, struct pf_rule *, int, + struct ifnet *, struct pf_state *, + struct pf_pdesc *); +#endif /* INET6 */ -#ifdef __FreeBSD__ int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); VNET_DECLARE(int, pf_end_threads); -VNET_DEFINE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]); -#else -extern struct pool pfr_ktable_pl; -extern struct pool pfr_kentry_pl; - -struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { - { &pf_state_pl, PFSTATE_HIWAT }, - { &pf_src_tree_pl, PFSNODE_HIWAT }, - { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, - { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, - { &pfr_kentry_pl, PFR_KENTRY_HIWAT } -}; -#endif - -#ifdef __FreeBSD__ -#define PPACKET_LOOPED() \ - (pd->pf_mtag->flags & PF_PACKET_LOOPED) +VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); -#define PACKET_LOOPED() \ - (pd.pf_mtag->flags & PF_PACKET_LOOPED) +#define PACKET_LOOPED(pd) ((pd)->pf_mtag && \ + (pd)->pf_mtag->flags & PF_PACKET_LOOPED) -#define STATE_LOOKUP(i, k, d, s, m, pt) \ +#define STATE_LOOKUP(i, k, d, s, pd) \ do { \ - s = pf_find_state(i, k, d, m, pt); \ - if (s == NULL || (s)->timeout == PFTM_PURGE) \ + (s) = pf_find_state((i), (k), (d)); \ + if ((s) == NULL || (s)->timeout == PFTM_PURGE) \ return (PF_DROP); \ - if (PPACKET_LOOPED()) \ - return (PF_PASS); \ - if (d == PF_OUT && \ - (((s)->rule.ptr->rt == PF_ROUTETO && \ - (s)->rule.ptr->direction == PF_OUT) || \ - ((s)->rule.ptr->rt == PF_REPLYTO && \ - (s)->rule.ptr->direction == PF_IN)) && \ - (s)->rt_kif != NULL && \ - (s)->rt_kif != i) \ + if (PACKET_LOOPED(pd)) \ return (PF_PASS); \ - } while (0) -#else -#define STATE_LOOKUP(i, k, d, s, m) \ - do { \ - s = pf_find_state(i, k, d, m); \ - if (s == NULL || (s)->timeout == PFTM_PURGE) \ - return (PF_DROP); \ - if (d == PF_OUT && \ + if ((d) == PF_OUT && \ (((s)->rule.ptr->rt == PF_ROUTETO && \ (s)->rule.ptr->direction == PF_OUT) || \ ((s)->rule.ptr->rt == PF_REPLYTO && \ (s)->rule.ptr->direction == PF_IN)) && \ (s)->rt_kif != NULL && \ - (s)->rt_kif != i) \ + (s)->rt_kif != (i)) \ return (PF_PASS); \ } while (0) -#endif -#ifdef __FreeBSD__ #define BOUND_IFACE(r, k) \ ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all -#else -#define BOUND_IFACE(r, k) \ - ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all -#endif #define STATE_INC_COUNTERS(s) \ do { \ @@ -435,71 +359,40 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { s->rule.ptr->states_cur--; \ } while (0) -static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); -static __inline int pf_state_compare_key(struct pf_state_key *, - struct pf_state_key *); -static __inline int pf_state_compare_id(struct pf_state *, - struct pf_state *); +static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures"); +VNET_DEFINE(struct pf_keyhash *, pf_keyhash); +VNET_DEFINE(struct pf_idhash *, pf_idhash); +VNET_DEFINE(u_long, pf_hashmask); +VNET_DEFINE(struct pf_srchash *, pf_srchash); +VNET_DEFINE(u_long, pf_srchashmask); -#ifdef __FreeBSD__ -VNET_DEFINE(struct pf_src_tree, tree_src_tracking); +SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)"); -VNET_DEFINE(struct pf_state_tree_id, tree_id); -VNET_DEFINE(struct pf_state_queue, state_list); -#else -struct pf_src_tree tree_src_tracking; +VNET_DEFINE(u_long, pf_hashsize); +#define V_pf_hashsize VNET(pf_hashsize) +SYSCTL_VNET_UINT(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN, + &VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable"); -struct pf_state_tree_id tree_id; -struct pf_state_queue state_list; -#endif +VNET_DEFINE(u_long, pf_srchashsize); +#define V_pf_srchashsize VNET(pf_srchashsize) +SYSCTL_VNET_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN, + &VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable"); -RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); -RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); -RB_GENERATE(pf_state_tree_id, pf_state, - entry_id, pf_state_compare_id); +VNET_DEFINE(void *, pf_swi_cookie); -static __inline int -pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) +VNET_DEFINE(uint32_t, pf_hashseed); +#define V_pf_hashseed VNET(pf_hashseed) + +static __inline uint32_t +pf_hashkey(struct pf_state_key *sk) { - int diff; + uint32_t h; - if (a->rule.ptr > b->rule.ptr) - return (1); - if (a->rule.ptr < b->rule.ptr) - return (-1); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#ifdef INET - case AF_INET: - if (a->addr.addr32[0] > b->addr.addr32[0]) - return (1); - if (a->addr.addr32[0] < b->addr.addr32[0]) - return (-1); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (a->addr.addr32[3] > b->addr.addr32[3]) - return (1); - if (a->addr.addr32[3] < b->addr.addr32[3]) - return (-1); - if (a->addr.addr32[2] > b->addr.addr32[2]) - return (1); - if (a->addr.addr32[2] < b->addr.addr32[2]) - return (-1); - if (a->addr.addr32[1] > b->addr.addr32[1]) - return (1); - if (a->addr.addr32[1] < b->addr.addr32[1]) - return (-1); - if (a->addr.addr32[0] > b->addr.addr32[0]) - return (1); - if (a->addr.addr32[0] < b->addr.addr32[0]) - return (-1); - break; -#endif /* INET6 */ - } - return (0); + h = jenkins_hash32((uint32_t *)sk, + sizeof(struct pf_state_key_cmp)/sizeof(uint32_t), + V_pf_hashseed); + + return (h & V_pf_hashmask); } #ifdef INET6 @@ -522,20 +415,20 @@ pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) } #endif /* INET6 */ -void +static void pf_init_threshold(struct pf_threshold *threshold, u_int32_t limit, u_int32_t seconds) { threshold->limit = limit * PF_THRESHOLD_MULT; threshold->seconds = seconds; threshold->count = 0; - threshold->last = time_second; + threshold->last = time_uptime; } -void +static void pf_add_threshold(struct pf_threshold *threshold) { - u_int32_t t = time_second, diff = t - threshold->last; + u_int32_t t = time_uptime, diff = t - threshold->last; if (diff >= threshold->seconds) threshold->count = 0; @@ -546,17 +439,21 @@ pf_add_threshold(struct pf_threshold *threshold) threshold->last = t; } -int +static int pf_check_threshold(struct pf_threshold *threshold) { return (threshold->count > threshold->limit); } -int +static int pf_src_connlimit(struct pf_state **state) { + struct pfr_addr p; + struct pf_flush_entry *pffe; int bad = 0; + PF_STATE_LOCK_ASSERT(*state); + (*state)->src_node->conn++; (*state)->src.tcp_est = 1; pf_add_threshold(&(*state)->src_node->conn_rate); @@ -564,333 +461,407 @@ pf_src_connlimit(struct pf_state **state) if ((*state)->rule.ptr->max_src_conn && (*state)->rule.ptr->max_src_conn < (*state)->src_node->conn) { -#ifdef __FreeBSD__ V_pf_status.lcounters[LCNT_SRCCONN]++; -#else - pf_status.lcounters[LCNT_SRCCONN]++; -#endif bad++; } if ((*state)->rule.ptr->max_src_conn_rate.limit && pf_check_threshold(&(*state)->src_node->conn_rate)) { -#ifdef __FreeBSD__ V_pf_status.lcounters[LCNT_SRCCONNRATE]++; -#else - pf_status.lcounters[LCNT_SRCCONNRATE]++; -#endif bad++; } if (!bad) return (0); - if ((*state)->rule.ptr->overload_tbl) { - struct pfr_addr p; - u_int32_t killed = 0; + /* Kill this state. */ + (*state)->timeout = PFTM_PURGE; + (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; -#ifdef __FreeBSD__ - V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; - if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif - printf("pf_src_connlimit: blocking address "); - pf_print_host(&(*state)->src_node->addr, 0, - (*state)->key[PF_SK_WIRE]->af); - } + if ((*state)->rule.ptr->overload_tbl == NULL) + return (1); + + V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; + if (V_pf_status.debug >= PF_DEBUG_MISC) { + printf("%s: blocking address ", __func__); + pf_print_host(&(*state)->src_node->addr, 0, + (*state)->key[PF_SK_WIRE]->af); + printf("\n"); + } - bzero(&p, sizeof(p)); - p.pfra_af = (*state)->key[PF_SK_WIRE]->af; - switch ((*state)->key[PF_SK_WIRE]->af) { + bzero(&p, sizeof(p)); + p.pfra_af = (*state)->key[PF_SK_WIRE]->af; + switch ((*state)->key[PF_SK_WIRE]->af) { #ifdef INET - case AF_INET: - p.pfra_net = 32; - p.pfra_ip4addr = (*state)->src_node->addr.v4; - break; + case AF_INET: + p.pfra_net = 32; + p.pfra_ip4addr = (*state)->src_node->addr.v4; + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - p.pfra_net = 128; - p.pfra_ip6addr = (*state)->src_node->addr.v6; - break; + case AF_INET6: + p.pfra_net = 128; + p.pfra_ip6addr = (*state)->src_node->addr.v6; + break; #endif /* INET6 */ - } + } - pfr_insert_kentry((*state)->rule.ptr->overload_tbl, - &p, time_second); + pfr_insert_kentry((*state)->rule.ptr->overload_tbl, &p, time_second); - /* kill existing states if that's required. */ - if ((*state)->rule.ptr->flush) { - struct pf_state_key *sk; - struct pf_state *st; + if ((*state)->rule.ptr->flush == 0) + return (1); -#ifdef __FreeBSD__ - V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; - RB_FOREACH(st, pf_state_tree_id, &V_tree_id) { -#else - pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; - RB_FOREACH(st, pf_state_tree_id, &tree_id) { -#endif - sk = st->key[PF_SK_WIRE]; - /* - * Kill states from this source. (Only those - * from the same rule if PF_FLUSH_GLOBAL is not - * set) - */ - if (sk->af == - (*state)->key[PF_SK_WIRE]->af && - (((*state)->direction == PF_OUT && - PF_AEQ(&(*state)->src_node->addr, - &sk->addr[1], sk->af)) || - ((*state)->direction == PF_IN && - PF_AEQ(&(*state)->src_node->addr, - &sk->addr[0], sk->af))) && - ((*state)->rule.ptr->flush & - PF_FLUSH_GLOBAL || - (*state)->rule.ptr == st->rule.ptr)) { - st->timeout = PFTM_PURGE; - st->src.state = st->dst.state = - TCPS_CLOSED; - killed++; - } + /* Schedule flushing task. */ + pffe = malloc(sizeof(*pffe), M_PFTEMP, M_NOWAIT); + if (pffe == NULL) + return (1); /* too bad :( */ + + bcopy(&(*state)->src_node->addr, &pffe->addr, sizeof(pffe->addr)); + pffe->af = (*state)->key[PF_SK_WIRE]->af; + pffe->dir = (*state)->direction; + if ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL) + pffe->rule = NULL; + else + pffe->rule = (*state)->rule.ptr; + PF_FLUSHQ_LOCK(); + SLIST_INSERT_HEAD(&V_pf_flushqueue, pffe, next); + PF_FLUSHQ_UNLOCK(); + taskqueue_enqueue(taskqueue_swi, &V_pf_flushtask); + + return (1); +} + +static void +pf_flush_task(void *c, int pending) +{ + struct pf_flush_head queue; + struct pf_flush_entry *pffe, *pffe1; + uint32_t killed = 0; + + PF_FLUSHQ_LOCK(); + queue = *(struct pf_flush_head *)c; + SLIST_INIT((struct pf_flush_head *)c); + PF_FLUSHQ_UNLOCK(); + + V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + + for (int i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + struct pf_state_key *sk; + struct pf_state *s; + + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + sk = s->key[PF_SK_WIRE]; + SLIST_FOREACH(pffe, &queue, next) + if (sk->af == pffe->af && (pffe->rule == NULL || + pffe->rule == s->rule.ptr) && + ((pffe->dir == PF_OUT && + PF_AEQ(&pffe->addr, &sk->addr[1], sk->af)) || + (pffe->dir == PF_IN && + PF_AEQ(&pffe->addr, &sk->addr[0], sk->af)))) { + s->timeout = PFTM_PURGE; + s->src.state = s->dst.state = TCPS_CLOSED; + killed++; } -#ifdef __FreeBSD__ - if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif - printf(", %u states killed", killed); } -#ifdef __FreeBSD__ - if (V_pf_status.debug >= PF_DEBUG_MISC) -#else - if (pf_status.debug >= PF_DEBUG_MISC) -#endif - printf("\n"); + PF_HASHROW_UNLOCK(ih); } + SLIST_FOREACH_SAFE(pffe, &queue, next, pffe1) + free(pffe, M_PFTEMP); + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: %u states killed", __func__, killed); +} - /* kill this state */ - (*state)->timeout = PFTM_PURGE; - (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; - return (1); +/* + * Can return locked on failure, so that we can consistently + * allocate and insert a new one. + */ +struct pf_src_node * +pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af, + int returnlocked) +{ + struct pf_srchash *sh; + struct pf_src_node *n; + + V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + + sh = &V_pf_srchash[pf_hashsrc(src, af)]; + PF_HASHROW_LOCK(sh); + LIST_FOREACH(n, &sh->nodes, entry) + if (n->rule.ptr == rule && n->af == af && + ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) || + (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0))) + break; + if (n != NULL || returnlocked == 0) + PF_HASHROW_UNLOCK(sh); + + return (n); } -int +static int pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, struct pf_addr *src, sa_family_t af) { - struct pf_src_node k; + + KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK || + rule->rpool.opts & PF_POOL_STICKYADDR), + ("%s for non-tracking rule %p", __func__, rule)); + + if (*sn == NULL) + *sn = pf_find_src_node(src, rule, af, 1); if (*sn == NULL) { - k.af = af; - PF_ACPY(&k.addr, src, af); - if (rule->rule_flag & PFRULE_RULESRCTRACK || - rule->rpool.opts & PF_POOL_STICKYADDR) - k.rule.ptr = rule; - else - k.rule.ptr = NULL; -#ifdef __FreeBSD__ - V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; - *sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k); -#else - pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; - *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); -#endif - } - if (*sn == NULL) { + struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)]; + + PF_HASHROW_ASSERT(sh); + if (!rule->max_src_nodes || rule->src_nodes < rule->max_src_nodes) -#ifdef __FreeBSD__ - (*sn) = pool_get(&V_pf_src_tree_pl, PR_NOWAIT | PR_ZERO); -#else - (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); -#endif + (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO); else -#ifdef __FreeBSD__ V_pf_status.lcounters[LCNT_SRCNODES]++; -#else - pf_status.lcounters[LCNT_SRCNODES]++; -#endif - if ((*sn) == NULL) + if ((*sn) == NULL) { + PF_HASHROW_UNLOCK(sh); return (-1); + } pf_init_threshold(&(*sn)->conn_rate, rule->max_src_conn_rate.limit, rule->max_src_conn_rate.seconds); (*sn)->af = af; - if (rule->rule_flag & PFRULE_RULESRCTRACK || - rule->rpool.opts & PF_POOL_STICKYADDR) - (*sn)->rule.ptr = rule; - else - (*sn)->rule.ptr = NULL; + (*sn)->rule.ptr = rule; PF_ACPY(&(*sn)->addr, src, af); - if (RB_INSERT(pf_src_tree, -#ifdef __FreeBSD__ - &V_tree_src_tracking, *sn) != NULL) { - if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - &tree_src_tracking, *sn) != NULL) { - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif - printf("pf: src_tree insert failed: "); - pf_print_host(&(*sn)->addr, 0, af); - printf("\n"); - } -#ifdef __FreeBSD__ - pool_put(&V_pf_src_tree_pl, *sn); -#else - pool_put(&pf_src_tree_pl, *sn); -#endif - return (-1); - } - (*sn)->creation = time_second; + LIST_INSERT_HEAD(&sh->nodes, *sn, entry); + (*sn)->creation = time_uptime; (*sn)->ruletype = rule->action; if ((*sn)->rule.ptr != NULL) (*sn)->rule.ptr->src_nodes++; -#ifdef __FreeBSD__ + PF_HASHROW_UNLOCK(sh); V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++; V_pf_status.src_nodes++; -#else - pf_status.scounters[SCNT_SRC_NODE_INSERT]++; - pf_status.src_nodes++; -#endif } else { if (rule->max_src_states && (*sn)->states >= rule->max_src_states) { -#ifdef __FreeBSD__ V_pf_status.lcounters[LCNT_SRCSTATES]++; -#else - pf_status.lcounters[LCNT_SRCSTATES]++; -#endif return (-1); } } return (0); } -/* state table stuff */ +static void +pf_remove_src_node(struct pf_src_node *src) +{ + struct pf_srchash *sh; + + sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)]; + PF_HASHROW_LOCK(sh); + LIST_REMOVE(src, entry); + PF_HASHROW_UNLOCK(sh); +} -static __inline int -pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) +/* Data storage structures initialization. */ +void +pf_initialize() { - int diff; + struct pf_keyhash *kh; + struct pf_idhash *ih; + struct pf_srchash *sh; + u_int i; + + TUNABLE_ULONG_FETCH("net.pf.states_hashsize", &V_pf_hashsize); + if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize)) + V_pf_hashsize = PF_HASHSIZ; + TUNABLE_ULONG_FETCH("net.pf.source_nodes_hashsize", &V_pf_srchashsize); + if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize)) + V_pf_srchashsize = PF_HASHSIZ / 4; + + V_pf_hashseed = arc4random(); + + /* States and state keys storage. */ + V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z; + uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT); + + V_pf_state_key_z = uma_zcreate("pf state keys", + sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + V_pf_keyhash = malloc(V_pf_hashsize * sizeof(struct pf_keyhash), + M_PFHASH, M_WAITOK | M_ZERO); + V_pf_idhash = malloc(V_pf_hashsize * sizeof(struct pf_idhash), + M_PFHASH, M_WAITOK | M_ZERO); + V_pf_hashmask = V_pf_hashsize - 1; + for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask; + i++, kh++, ih++) { + mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF); + mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF); + } + + /* Source nodes. */ + V_pf_sources_z = uma_zcreate("pf source nodes", + sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, + 0); + V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z; + uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT); + V_pf_srchash = malloc(V_pf_srchashsize * sizeof(struct pf_srchash), + M_PFHASH, M_WAITOK|M_ZERO); + V_pf_srchashmask = V_pf_srchashsize - 1; + for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) + mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF); + + /* ALTQ */ + TAILQ_INIT(&V_pf_altqs[0]); + TAILQ_INIT(&V_pf_altqs[1]); + TAILQ_INIT(&V_pf_pabuf); + V_pf_altqs_active = &V_pf_altqs[0]; + V_pf_altqs_inactive = &V_pf_altqs[1]; + + /* Mbuf tags */ + V_pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) + + sizeof(struct pf_mtag), NULL, NULL, pf_mtag_init, NULL, + UMA_ALIGN_PTR, 0); + + /* Send & flush queues. */ + STAILQ_INIT(&V_pf_sendqueue); + SLIST_INIT(&V_pf_flushqueue); + TASK_INIT(&V_pf_flushtask, 0, pf_flush_task, &V_pf_flushqueue); + mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF); + mtx_init(&pf_flushqueue_mtx, "pf flush queue", NULL, MTX_DEF); + + /* Unlinked, but may be referenced rules. */ + TAILQ_INIT(&V_pf_unlinked_rules); + mtx_init(&pf_unlnkdrules_mtx, "pf unlinked rules", NULL, MTX_DEF); +} - if ((diff = a->proto - b->proto) != 0) - return (diff); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#ifdef INET - case AF_INET: - if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) - return (1); - if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) - return (-1); - if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) - return (1); - if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) - return (-1); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (a->addr[0].addr32[3] > b->addr[0].addr32[3]) - return (1); - if (a->addr[0].addr32[3] < b->addr[0].addr32[3]) - return (-1); - if (a->addr[1].addr32[3] > b->addr[1].addr32[3]) - return (1); - if (a->addr[1].addr32[3] < b->addr[1].addr32[3]) - return (-1); - if (a->addr[0].addr32[2] > b->addr[0].addr32[2]) - return (1); - if (a->addr[0].addr32[2] < b->addr[0].addr32[2]) - return (-1); - if (a->addr[1].addr32[2] > b->addr[1].addr32[2]) - return (1); - if (a->addr[1].addr32[2] < b->addr[1].addr32[2]) - return (-1); - if (a->addr[0].addr32[1] > b->addr[0].addr32[1]) - return (1); - if (a->addr[0].addr32[1] < b->addr[0].addr32[1]) - return (-1); - if (a->addr[1].addr32[1] > b->addr[1].addr32[1]) - return (1); - if (a->addr[1].addr32[1] < b->addr[1].addr32[1]) - return (-1); - if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) - return (1); - if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) - return (-1); - if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) - return (1); - if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) - return (-1); - break; -#endif /* INET6 */ - } +void +pf_cleanup() +{ + struct pf_keyhash *kh; + struct pf_idhash *ih; + struct pf_srchash *sh; + struct pf_send_entry *pfse, *next; + u_int i; + + for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask; + i++, kh++, ih++) { + KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty", + __func__)); + KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty", + __func__)); + mtx_destroy(&kh->lock); + mtx_destroy(&ih->lock); + } + free(V_pf_keyhash, M_PFHASH); + free(V_pf_idhash, M_PFHASH); + + for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { + KASSERT(LIST_EMPTY(&sh->nodes), + ("%s: source node hash not empty", __func__)); + mtx_destroy(&sh->lock); + } + free(V_pf_srchash, M_PFHASH); + + STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) { + m_freem(pfse->pfse_m); + free(pfse, M_PFTEMP); + } + + mtx_destroy(&pf_sendqueue_mtx); + mtx_destroy(&pf_flushqueue_mtx); + mtx_destroy(&pf_unlnkdrules_mtx); + + uma_zdestroy(V_pf_mtag_z); + uma_zdestroy(V_pf_sources_z); + uma_zdestroy(V_pf_state_z); + uma_zdestroy(V_pf_state_key_z); +} + +static int +pf_mtag_init(void *mem, int size, int how) +{ + struct m_tag *t; - if ((diff = a->port[0] - b->port[0]) != 0) - return (diff); - if ((diff = a->port[1] - b->port[1]) != 0) - return (diff); + t = (struct m_tag *)mem; + t->m_tag_cookie = MTAG_ABI_COMPAT; + t->m_tag_id = PACKET_TAG_PF; + t->m_tag_len = sizeof(struct pf_mtag); + t->m_tag_free = pf_mtag_free; return (0); } -static __inline int -pf_state_compare_id(struct pf_state *a, struct pf_state *b) +static void +pf_mtag_free(struct m_tag *t) { - if (a->id > b->id) - return (1); - if (a->id < b->id) - return (-1); - if (a->creatorid > b->creatorid) - return (1); - if (a->creatorid < b->creatorid) - return (-1); - return (0); + uma_zfree(V_pf_mtag_z, t); } -int -pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) +struct pf_mtag * +pf_get_mtag(struct mbuf *m) { - struct pf_state_item *si; - struct pf_state_key *cur; - struct pf_state *olds = NULL; + struct m_tag *mtag; -#ifdef __FreeBSD__ - KASSERT(s->key[idx] == NULL, ("%s: key is null!", __FUNCTION__)); -#else - KASSERT(s->key[idx] == NULL); /* XXX handle this? */ -#endif + if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL) + return ((struct pf_mtag *)(mtag + 1)); -#ifdef __FreeBSD__ - if ((cur = RB_INSERT(pf_state_tree, &V_pf_statetbl, sk)) != NULL) { -#else - if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { -#endif - /* key exists. check for same kif, if none, add to key */ - TAILQ_FOREACH(si, &cur->states, entry) - if (si->s->kif == s->kif && - si->s->direction == s->direction) { + mtag = uma_zalloc(V_pf_mtag_z, M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, sizeof(struct pf_mtag)); + m_tag_prepend(m, mtag); + + return ((struct pf_mtag *)(mtag + 1)); +} + +static int +pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks, + struct pf_state *s) +{ + struct pf_keyhash *kh; + struct pf_state_key *sk, *cur; + struct pf_state *si, *olds = NULL; + int idx; + + KASSERT(s->refs == 0, ("%s: state not pristine", __func__)); + KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__)); + KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__)); + + /* + * First run: start with wire key. + */ + sk = skw; + idx = PF_SK_WIRE; + +keyattach: + kh = &V_pf_keyhash[pf_hashkey(sk)]; + + PF_HASHROW_LOCK(kh); + LIST_FOREACH(cur, &kh->keys, entry) + if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0) + break; + + if (cur != NULL) { + /* Key exists. Check for same kif, if none, add to key. */ + TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) { + struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)]; + + PF_HASHROW_LOCK(ih); + if (si->kif == s->kif && + si->direction == s->direction) { if (sk->proto == IPPROTO_TCP && - si->s->src.state >= TCPS_FIN_WAIT_2 && - si->s->dst.state >= TCPS_FIN_WAIT_2) { - si->s->src.state = si->s->dst.state = + si->src.state >= TCPS_FIN_WAIT_2 && + si->dst.state >= TCPS_FIN_WAIT_2) { + si->src.state = si->dst.state = TCPS_CLOSED; - /* unlink late or sks can go away */ - olds = si->s; + /* Unlink later or cur can go away. */ + pf_ref_state(si); + olds = si; } else { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: %s key attach " "failed on %s: ", (idx == PF_SK_WIRE) ? @@ -902,375 +873,294 @@ pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) (idx == PF_SK_STACK) ? sk : NULL); printf(", existing: "); - pf_print_state_parts(si->s, + pf_print_state_parts(si, (idx == PF_SK_WIRE) ? sk : NULL, (idx == PF_SK_STACK) ? sk : NULL); printf("\n"); } -#ifdef __FreeBSD__ - pool_put(&V_pf_state_key_pl, sk); -#else - pool_put(&pf_state_key_pl, sk); -#endif + PF_HASHROW_UNLOCK(ih); + PF_HASHROW_UNLOCK(kh); + uma_zfree(V_pf_state_key_z, sk); + if (idx == PF_SK_STACK) + pf_detach_state(s); return (-1); /* collision! */ } } -#ifdef __FreeBSD__ - pool_put(&V_pf_state_key_pl, sk); -#else - pool_put(&pf_state_key_pl, sk); -#endif + PF_HASHROW_UNLOCK(ih); + } + uma_zfree(V_pf_state_key_z, sk); s->key[idx] = cur; - } else + } else { + LIST_INSERT_HEAD(&kh->keys, sk, entry); s->key[idx] = sk; - -#ifdef __FreeBSD__ - if ((si = pool_get(&V_pf_state_item_pl, PR_NOWAIT)) == NULL) { -#else - if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { -#endif - pf_state_key_detach(s, idx); - return (-1); } - si->s = s; - /* list is sorted, if-bound states before floating */ -#ifdef __FreeBSD__ +stateattach: + /* List is sorted, if-bound states before floating. */ if (s->kif == V_pfi_all) -#else - if (s->kif == pfi_all) -#endif - TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); + TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]); else - TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); + TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]); + + /* + * Attach done. See how should we (or should not?) + * attach a second key. + */ + if (sks == skw) { + s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; + idx = PF_SK_STACK; + sks = NULL; + goto stateattach; + } else if (sks != NULL) { + PF_HASHROW_UNLOCK(kh); + if (olds) { + pf_unlink_state(olds, 0); + pf_release_state(olds); + olds = NULL; + } + /* + * Continue attaching with stack key. + */ + sk = sks; + idx = PF_SK_STACK; + sks = NULL; + goto keyattach; + } else + PF_HASHROW_UNLOCK(kh); - if (olds) - pf_unlink_state(olds); + if (olds) { + pf_unlink_state(olds, 0); + pf_release_state(olds); + } + + KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL, + ("%s failure", __func__)); return (0); } -void +static void pf_detach_state(struct pf_state *s) { - if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) - s->key[PF_SK_WIRE] = NULL; - - if (s->key[PF_SK_STACK] != NULL) - pf_state_key_detach(s, PF_SK_STACK); + struct pf_state_key *sks = s->key[PF_SK_STACK]; + struct pf_keyhash *kh; + + if (sks != NULL) { + kh = &V_pf_keyhash[pf_hashkey(sks)]; + PF_HASHROW_LOCK(kh); + if (s->key[PF_SK_STACK] != NULL) + pf_state_key_detach(s, PF_SK_STACK); + /* + * If both point to same key, then we are done. + */ + if (sks == s->key[PF_SK_WIRE]) { + pf_state_key_detach(s, PF_SK_WIRE); + PF_HASHROW_UNLOCK(kh); + return; + } + PF_HASHROW_UNLOCK(kh); + } - if (s->key[PF_SK_WIRE] != NULL) - pf_state_key_detach(s, PF_SK_WIRE); + if (s->key[PF_SK_WIRE] != NULL) { + kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])]; + PF_HASHROW_LOCK(kh); + if (s->key[PF_SK_WIRE] != NULL) + pf_state_key_detach(s, PF_SK_WIRE); + PF_HASHROW_UNLOCK(kh); + } } -void +static void pf_state_key_detach(struct pf_state *s, int idx) { - struct pf_state_item *si; + struct pf_state_key *sk = s->key[idx]; +#ifdef INVARIANTS + struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)]; - si = TAILQ_FIRST(&s->key[idx]->states); - while (si && si->s != s) - si = TAILQ_NEXT(si, entry); - - if (si) { - TAILQ_REMOVE(&s->key[idx]->states, si, entry); -#ifdef __FreeBSD__ - pool_put(&V_pf_state_item_pl, si); -#else - pool_put(&pf_state_item_pl, si); + PF_HASHROW_ASSERT(kh); #endif - } + TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]); + s->key[idx] = NULL; - if (TAILQ_EMPTY(&s->key[idx]->states)) { -#ifdef __FreeBSD__ - RB_REMOVE(pf_state_tree, &V_pf_statetbl, s->key[idx]); -#else - RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]); -#endif - if (s->key[idx]->reverse) - s->key[idx]->reverse->reverse = NULL; -#ifdef __FreeBSD__ - /* XXX: implement this */ -#else - if (s->key[idx]->inp) - s->key[idx]->inp->inp_pf_sk = NULL; -#endif -#ifdef __FreeBSD__ - pool_put(&V_pf_state_key_pl, s->key[idx]); -#else - pool_put(&pf_state_key_pl, s->key[idx]); -#endif + if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) { + LIST_REMOVE(sk, entry); + uma_zfree(V_pf_state_key_z, sk); } - s->key[idx] = NULL; +} + +static int +pf_state_key_ctor(void *mem, int size, void *arg, int flags) +{ + struct pf_state_key *sk = mem; + + bzero(sk, sizeof(struct pf_state_key_cmp)); + TAILQ_INIT(&sk->states[PF_SK_WIRE]); + TAILQ_INIT(&sk->states[PF_SK_STACK]); + + return (0); } struct pf_state_key * -pf_alloc_state_key(int pool_flags) +pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr, + struct pf_addr *daddr, u_int16_t sport, u_int16_t dport) { - struct pf_state_key *sk; + struct pf_state_key *sk; -#ifdef __FreeBSD__ - if ((sk = pool_get(&V_pf_state_key_pl, pool_flags)) == NULL) -#else - if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) -#endif + sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT); + if (sk == NULL) return (NULL); - TAILQ_INIT(&sk->states); + + PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af); + PF_ACPY(&sk->addr[pd->didx], daddr, pd->af); + sk->port[pd->sidx] = sport; + sk->port[pd->didx] = dport; + sk->proto = pd->proto; + sk->af = pd->af; return (sk); } -int -pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr, - struct pf_state_key **skw, struct pf_state_key **sks, - struct pf_state_key **skp, struct pf_state_key **nkp, - struct pf_addr *saddr, struct pf_addr *daddr, - u_int16_t sport, u_int16_t dport) +struct pf_state_key * +pf_state_key_clone(struct pf_state_key *orig) { -#ifdef __FreeBSD__ - KASSERT((*skp == NULL && *nkp == NULL), - ("%s: skp == NULL && nkp == NULL", __FUNCTION__)); -#else - KASSERT((*skp == NULL && *nkp == NULL)); -#endif + struct pf_state_key *sk; - if ((*skp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) - return (ENOMEM); + sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT); + if (sk == NULL) + return (NULL); - PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af); - PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af); - (*skp)->port[pd->sidx] = sport; - (*skp)->port[pd->didx] = dport; - (*skp)->proto = pd->proto; - (*skp)->af = pd->af; - - if (nr != NULL) { - if ((*nkp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) - return (ENOMEM); /* caller must handle cleanup */ - - /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */ - PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af); - PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af); - (*nkp)->port[0] = (*skp)->port[0]; - (*nkp)->port[1] = (*skp)->port[1]; - (*nkp)->proto = pd->proto; - (*nkp)->af = pd->af; - } else - *nkp = *skp; + bcopy(orig, sk, sizeof(struct pf_state_key_cmp)); - if (pd->dir == PF_IN) { - *skw = *skp; - *sks = *nkp; - } else { - *sks = *skp; - *skw = *nkp; - } - return (0); + return (sk); } - int pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, struct pf_state_key *sks, struct pf_state *s) { -#ifndef __FreeBSD__ - splassert(IPL_SOFTNET); -#endif + struct pf_idhash *ih; + struct pf_state *cur; + + KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]), + ("%s: sks not pristine", __func__)); + KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]), + ("%s: skw not pristine", __func__)); + KASSERT(s->refs == 0, ("%s: state not pristine", __func__)); s->kif = kif; - if (skw == sks) { - if (pf_state_key_attach(skw, s, PF_SK_WIRE)) - return (-1); - s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; - } else { - if (pf_state_key_attach(skw, s, PF_SK_WIRE)) { -#ifdef __FreeBSD__ - pool_put(&V_pf_state_key_pl, sks); -#else - pool_put(&pf_state_key_pl, sks); -#endif - return (-1); - } - if (pf_state_key_attach(sks, s, PF_SK_STACK)) { - pf_state_key_detach(s, PF_SK_WIRE); - return (-1); - } - } + if (pf_state_key_attach(skw, sks, s)) + return (-1); if (s->id == 0 && s->creatorid == 0) { -#ifdef __FreeBSD__ - s->id = htobe64(V_pf_status.stateid++); + /* XXX: should be atomic, but probability of collision low */ + if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID) + V_pf_stateid[curcpu] = 1; + s->id |= (uint64_t )curcpu << PFID_CPUSHIFT; + s->id = htobe64(s->id); s->creatorid = V_pf_status.hostid; -#else - s->id = htobe64(pf_status.stateid++); - s->creatorid = pf_status.hostid; -#endif } -#ifdef __FreeBSD__ - if (RB_INSERT(pf_state_tree_id, &V_tree_id, s) != NULL) { + + ih = &V_pf_idhash[PF_IDHASH(s)]; + PF_HASHROW_LOCK(ih); + LIST_FOREACH(cur, &ih->states, entry) + if (cur->id == s->id && cur->creatorid == s->creatorid) + break; + + if (cur != NULL) { + PF_HASHROW_UNLOCK(ih); if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: state insert failed: " "id: %016llx creatorid: %08x", -#ifdef __FreeBSD__ - (unsigned long long)betoh64(s->id), ntohl(s->creatorid)); -#else - betoh64(s->id), ntohl(s->creatorid)); -#endif + (unsigned long long)be64toh(s->id), + ntohl(s->creatorid)); printf("\n"); } pf_detach_state(s); return (-1); } -#ifdef __FreeBSD__ - TAILQ_INSERT_TAIL(&V_state_list, s, entry_list); + LIST_INSERT_HEAD(&ih->states, s, entry); + /* One for keys, one for ID hash. */ + refcount_init(&s->refs, 2); + V_pf_status.fcounters[FCNT_STATE_INSERT]++; - V_pf_status.states++; -#else - TAILQ_INSERT_TAIL(&state_list, s, entry_list); - pf_status.fcounters[FCNT_STATE_INSERT]++; - pf_status.states++; -#endif - pfi_kif_ref(kif, PFI_KIF_REF_STATE); -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_insert_state_ptr != NULL) pfsync_insert_state_ptr(s); -#else - pfsync_insert_state(s); -#endif -#endif + + /* Returns locked. */ return (0); } +/* + * Find state by ID: returns with locked row on success. + */ struct pf_state * -pf_find_state_byid(struct pf_state_cmp *key) +pf_find_state_byid(uint64_t id, uint32_t creatorid) { -#ifdef __FreeBSD__ + struct pf_idhash *ih; + struct pf_state *s; + V_pf_status.fcounters[FCNT_STATE_SEARCH]++; - return (RB_FIND(pf_state_tree_id, &V_tree_id, (struct pf_state *)key)); -#else - pf_status.fcounters[FCNT_STATE_SEARCH]++; + ih = &V_pf_idhash[(be64toh(id) % (V_pf_hashmask + 1))]; - return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); -#endif -} + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) + if (s->id == id && s->creatorid == creatorid) + break; -/* XXX debug function, intended to be removed one day */ -int -pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, - struct pfi_kif *kif, u_int dir) -{ - /* a (from hdr) and b (new) must be exact opposites of each other */ - if (a->af == b->af && a->proto == b->proto && - PF_AEQ(&a->addr[0], &b->addr[1], a->af) && - PF_AEQ(&a->addr[1], &b->addr[0], a->af) && - a->port[0] == b->port[1] && - a->port[1] == b->port[0]) - return (0); - else { - /* mismatch. must not happen. */ - printf("pf: state key linking mismatch! dir=%s, " - "if=%s, stored af=%u, a0: ", - dir == PF_OUT ? "OUT" : "IN", kif->pfik_name, a->af); - pf_print_host(&a->addr[0], a->port[0], a->af); - printf(", a1: "); - pf_print_host(&a->addr[1], a->port[1], a->af); - printf(", proto=%u", a->proto); - printf(", found af=%u, a0: ", b->af); - pf_print_host(&b->addr[0], b->port[0], b->af); - printf(", a1: "); - pf_print_host(&b->addr[1], b->port[1], b->af); - printf(", proto=%u", b->proto); - printf(".\n"); - return (-1); - } + if (s == NULL) + PF_HASHROW_UNLOCK(ih); + + return (s); } -struct pf_state * -#ifdef __FreeBSD__ -pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, - struct mbuf *m, struct pf_mtag *pftag) -#else -pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, - struct mbuf *m) -#endif +/* + * Find state by key. + * Returns with ID hash slot locked on success. + */ +static struct pf_state * +pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir) { + struct pf_keyhash *kh; struct pf_state_key *sk; - struct pf_state_item *si; + struct pf_state *s; + int idx; -#ifdef __FreeBSD__ V_pf_status.fcounters[FCNT_STATE_SEARCH]++; -#else - pf_status.fcounters[FCNT_STATE_SEARCH]++; -#endif -#ifdef __FreeBSD__ - if (dir == PF_OUT && pftag->statekey && - ((struct pf_state_key *)pftag->statekey)->reverse) - sk = ((struct pf_state_key *)pftag->statekey)->reverse; - else { -#ifdef __FreeBSD__ - if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl, -#else - if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, -#endif - (struct pf_state_key *)key)) == NULL) - return (NULL); - if (dir == PF_OUT && pftag->statekey && - pf_compare_state_keys(pftag->statekey, sk, - kif, dir) == 0) { - ((struct pf_state_key *) - pftag->statekey)->reverse = sk; - sk->reverse = pftag->statekey; - } - } -#else - if (dir == PF_OUT && m->m_pkthdr.pf.statekey && - ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) - sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; - else { -#ifdef __FreeBSD__ - if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl, -#else - if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, -#endif - (struct pf_state_key *)key)) == NULL) - return (NULL); - if (dir == PF_OUT && m->m_pkthdr.pf.statekey && - pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk, - kif, dir) == 0) { - ((struct pf_state_key *) - m->m_pkthdr.pf.statekey)->reverse = sk; - sk->reverse = m->m_pkthdr.pf.statekey; - } + kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)]; + + PF_HASHROW_LOCK(kh); + LIST_FOREACH(sk, &kh->keys, entry) + if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0) + break; + if (sk == NULL) { + PF_HASHROW_UNLOCK(kh); + return (NULL); } -#endif - if (dir == PF_OUT) -#ifdef __FreeBSD__ - pftag->statekey = NULL; -#else - m->m_pkthdr.pf.statekey = NULL; -#endif + idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK); - /* list is sorted, if-bound states before floating ones */ - TAILQ_FOREACH(si, &sk->states, entry) -#ifdef __FreeBSD__ - if ((si->s->kif == V_pfi_all || si->s->kif == kif) && -#else - if ((si->s->kif == pfi_all || si->s->kif == kif) && -#endif - sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : - si->s->key[PF_SK_STACK])) - return (si->s); + /* List is sorted, if-bound states before floating ones. */ + TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) + if (s->kif == V_pfi_all || s->kif == kif) { + PF_STATE_LOCK(s); + PF_HASHROW_UNLOCK(kh); + if (s->timeout == PFTM_UNLINKED) { + /* + * State is being processed + * by pf_unlink_state() in + * an other thread. + */ + PF_STATE_UNLOCK(s); + return (NULL); + } + return (s); + } + PF_HASHROW_UNLOCK(kh); return (NULL); } @@ -1278,115 +1168,178 @@ pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, struct pf_state * pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) { + struct pf_keyhash *kh; struct pf_state_key *sk; - struct pf_state_item *si, *ret = NULL; + struct pf_state *s, *ret = NULL; + int idx, inout = 0; -#ifdef __FreeBSD__ V_pf_status.fcounters[FCNT_STATE_SEARCH]++; -#else - pf_status.fcounters[FCNT_STATE_SEARCH]++; -#endif -#ifdef __FreeBSD__ - sk = RB_FIND(pf_state_tree, &V_pf_statetbl, (struct pf_state_key *)key); -#else - sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); -#endif - if (sk != NULL) { - TAILQ_FOREACH(si, &sk->states, entry) - if (dir == PF_INOUT || - (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : - si->s->key[PF_SK_STACK]))) { - if (more == NULL) - return (si->s); - - if (ret) - (*more)++; - else - ret = si; - } + kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)]; + + PF_HASHROW_LOCK(kh); + LIST_FOREACH(sk, &kh->keys, entry) + if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0) + break; + if (sk == NULL) { + PF_HASHROW_UNLOCK(kh); + return (NULL); + } + switch (dir) { + case PF_IN: + idx = PF_SK_WIRE; + break; + case PF_OUT: + idx = PF_SK_STACK; + break; + case PF_INOUT: + idx = PF_SK_WIRE; + inout = 1; + break; + default: + panic("%s: dir %u", __func__, dir); } - return (ret ? ret->s : NULL); +second_run: + TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) { + if (more == NULL) { + PF_HASHROW_UNLOCK(kh); + return (s); + } + + if (ret) + (*more)++; + else + ret = s; + } + if (inout == 1) { + inout = 0; + idx = PF_SK_STACK; + goto second_run; + } + PF_HASHROW_UNLOCK(kh); + + return (ret); } /* END state table stuff */ +static void +pf_send(struct pf_send_entry *pfse) +{ + + PF_SENDQ_LOCK(); + STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next); + PF_SENDQ_UNLOCK(); + swi_sched(V_pf_swi_cookie, 0); +} + +void +pf_intr(void *v) +{ + struct pf_send_head queue; + struct pf_send_entry *pfse, *next; + + CURVNET_SET((struct vnet *)v); + + PF_SENDQ_LOCK(); + queue = V_pf_sendqueue; + STAILQ_INIT(&V_pf_sendqueue); + PF_SENDQ_UNLOCK(); + + STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) { + switch (pfse->pfse_type) { +#ifdef INET + case PFSE_IP: + ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL); + break; + case PFSE_ICMP: + icmp_error(pfse->pfse_m, pfse->pfse_icmp_type, + pfse->pfse_icmp_code, 0, pfse->pfse_icmp_mtu); + break; +#endif /* INET */ +#ifdef INET6 + case PFSE_IP6: + ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL, + NULL); + break; + case PFSE_ICMP6: + icmp6_error(pfse->pfse_m, pfse->pfse_icmp_type, + pfse->pfse_icmp_code, pfse->pfse_icmp_mtu); + break; +#endif /* INET6 */ + default: + panic("%s: unknown type", __func__); + } + free(pfse, M_PFTEMP); + } + CURVNET_RESTORE(); +} void pf_purge_thread(void *v) { - int nloops = 0, s; -#ifdef __FreeBSD__ - int locked; -#endif + int fullrun; CURVNET_SET((struct vnet *)v); for (;;) { - tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); - -#ifdef __FreeBSD__ - sx_slock(&V_pf_consistency_lock); - PF_LOCK(); - locked = 0; + PF_RULES_RLOCK(); + rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10); if (V_pf_end_threads) { - PF_UNLOCK(); - sx_sunlock(&V_pf_consistency_lock); - sx_xlock(&V_pf_consistency_lock); - PF_LOCK(); + /* + * To cleanse up all kifs and rules we need + * two runs: first one clears reference flags, + * then pf_purge_expired_states() doesn't + * raise them, and then second run frees. + */ + PF_RULES_RUNLOCK(); + pf_purge_unlinked_rules(); + pfi_kif_purge(); - pf_purge_expired_states(V_pf_status.states, 1); + /* + * Now purge everything. + */ + pf_purge_expired_states(V_pf_hashmask + 1); pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(1); - V_pf_end_threads++; + pf_purge_expired_src_nodes(); - sx_xunlock(&V_pf_consistency_lock); - PF_UNLOCK(); + /* + * Now all kifs & rules should be unreferenced, + * thus should be successfully freed. + */ + pf_purge_unlinked_rules(); + pfi_kif_purge(); + + /* + * Announce success and exit. + */ + PF_RULES_RLOCK(); + V_pf_end_threads++; + PF_RULES_RUNLOCK(); wakeup(pf_purge_thread); kproc_exit(0); } -#endif - s = splsoftnet(); - - /* process a fraction of the state table every second */ -#ifdef __FreeBSD__ - if (!pf_purge_expired_states(1 + (V_pf_status.states / - V_pf_default_rule.timeout[PFTM_INTERVAL]), 0)) { - PF_UNLOCK(); - sx_sunlock(&V_pf_consistency_lock); - sx_xlock(&V_pf_consistency_lock); - PF_LOCK(); - locked = 1; - - pf_purge_expired_states(1 + (V_pf_status.states / - V_pf_default_rule.timeout[PFTM_INTERVAL]), 1); - } -#else - pf_purge_expired_states(1 + (pf_status.states - / pf_default_rule.timeout[PFTM_INTERVAL])); -#endif + PF_RULES_RUNLOCK(); - /* purge other expired types every PFTM_INTERVAL seconds */ -#ifdef __FreeBSD__ - if (++nloops >= V_pf_default_rule.timeout[PFTM_INTERVAL]) { -#else - if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { -#endif + /* Process 1/interval fraction of the state table every run. */ + fullrun = pf_purge_expired_states(V_pf_hashmask / + (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10)); + + /* Purge other expired types every PFTM_INTERVAL seconds. */ + if (fullrun) { + /* + * Order is important: + * - states and src nodes reference rules + * - states and rules reference kifs + */ pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(0); - nloops = 0; + pf_purge_expired_src_nodes(); + pf_purge_unlinked_rules(); + pfi_kif_purge(); } - - splx(s); -#ifdef __FreeBSD__ - PF_UNLOCK(); - if (locked) - sx_xunlock(&V_pf_consistency_lock); - else - sx_sunlock(&V_pf_consistency_lock); -#endif } + /* not reached */ CURVNET_RESTORE(); } @@ -1400,117 +1353,59 @@ pf_state_expires(const struct pf_state *state) /* handle all PFTM_* > PFTM_MAX here */ if (state->timeout == PFTM_PURGE) - return (time_second); + return (time_uptime); if (state->timeout == PFTM_UNTIL_PACKET) return (0); -#ifdef __FreeBSD__ KASSERT(state->timeout != PFTM_UNLINKED, ("pf_state_expires: timeout == PFTM_UNLINKED")); - KASSERT((state->timeout < PFTM_MAX), + KASSERT((state->timeout < PFTM_MAX), ("pf_state_expires: timeout > PFTM_MAX")); -#else - KASSERT(state->timeout != PFTM_UNLINKED); - KASSERT(state->timeout < PFTM_MAX); -#endif timeout = state->rule.ptr->timeout[state->timeout]; if (!timeout) -#ifdef __FreeBSD__ timeout = V_pf_default_rule.timeout[state->timeout]; -#else - timeout = pf_default_rule.timeout[state->timeout]; -#endif start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; if (start) { end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; - states = state->rule.ptr->states_cur; + states = state->rule.ptr->states_cur; /* XXXGL */ } else { -#ifdef __FreeBSD__ start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START]; end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END]; states = V_pf_status.states; -#else - start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; - end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; - states = pf_status.states; -#endif } if (end && states > start && start < end) { if (states < end) return (state->expire + timeout * (end - states) / (end - start)); else - return (time_second); + return (time_uptime); } return (state->expire + timeout); } -#ifdef __FreeBSD__ -int -pf_purge_expired_src_nodes(int waslocked) -#else void -pf_purge_expired_src_nodes(int waslocked) -#endif +pf_purge_expired_src_nodes() { - struct pf_src_node *cur, *next; - int locked = waslocked; - -#ifdef __FreeBSD__ - for (cur = RB_MIN(pf_src_tree, &V_tree_src_tracking); cur; cur = next) { - next = RB_NEXT(pf_src_tree, &V_tree_src_tracking, cur); -#else - for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { - next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); -#endif + struct pf_srchash *sh; + struct pf_src_node *cur, *next; + int i; - if (cur->states <= 0 && cur->expire <= time_second) { - if (! locked) { -#ifdef __FreeBSD__ - if (!sx_try_upgrade(&V_pf_consistency_lock)) - return (0); -#else - rw_enter_write(&pf_consistency_lock); -#endif - next = RB_NEXT(pf_src_tree, -#ifdef __FreeBSD__ - &V_tree_src_tracking, cur); -#else - &tree_src_tracking, cur); -#endif - locked = 1; - } - if (cur->rule.ptr != NULL) { + for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { + PF_HASHROW_LOCK(sh); + LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next) + if (cur->states <= 0 && cur->expire <= time_uptime) { + if (cur->rule.ptr != NULL) cur->rule.ptr->src_nodes--; - if (cur->rule.ptr->states_cur <= 0 && - cur->rule.ptr->max_src_nodes <= 0) - pf_rm_rule(NULL, cur->rule.ptr); - } -#ifdef __FreeBSD__ - RB_REMOVE(pf_src_tree, &V_tree_src_tracking, cur); + LIST_REMOVE(cur, entry); V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; V_pf_status.src_nodes--; - pool_put(&V_pf_src_tree_pl, cur); -#else - RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, cur); -#endif - } + uma_zfree(V_pf_sources_z, cur); + } else if (cur->rule.ptr != NULL) + cur->rule.ptr->rule_flag |= PFRULE_REFS; + PF_HASHROW_UNLOCK(sh); } - - if (locked && !waslocked) -#ifdef __FreeBSD__ - { - sx_downgrade(&V_pf_consistency_lock); - } - return (1); -#else - rw_exit_write(&pf_consistency_lock); -#endif } -void +static void pf_src_tree_remove_state(struct pf_state *s) { u_int32_t timeout; @@ -1522,12 +1417,8 @@ pf_src_tree_remove_state(struct pf_state *s) timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = -#ifdef __FreeBSD__ V_pf_default_rule.timeout[PFTM_SRC_NODE]; -#else - pf_default_rule.timeout[PFTM_SRC_NODE]; -#endif - s->src_node->expire = time_second + timeout; + s->src_node->expire = time_uptime + timeout; } } if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { @@ -1535,226 +1426,167 @@ pf_src_tree_remove_state(struct pf_state *s) timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = -#ifdef __FreeBSD__ V_pf_default_rule.timeout[PFTM_SRC_NODE]; -#else - pf_default_rule.timeout[PFTM_SRC_NODE]; -#endif - s->nat_src_node->expire = time_second + timeout; + s->nat_src_node->expire = time_uptime + timeout; } } s->src_node = s->nat_src_node = NULL; } -/* callers should be at splsoftnet */ -void -pf_unlink_state(struct pf_state *cur) +/* + * Unlink and potentilly free a state. Function may be + * called with ID hash row locked, but always returns + * unlocked, since it needs to go through key hash locking. + */ +int +pf_unlink_state(struct pf_state *s, u_int flags) { -#ifdef __FreeBSD__ - if (cur->local_flags & PFSTATE_EXPIRING) - return; - cur->local_flags |= PFSTATE_EXPIRING; -#else - splassert(IPL_SOFTNET); -#endif + struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)]; + + if ((flags & PF_ENTER_LOCKED) == 0) + PF_HASHROW_LOCK(ih); + else + PF_HASHROW_ASSERT(ih); - if (cur->src.state == PF_TCPS_PROXY_DST) { + if (s->timeout == PFTM_UNLINKED) { + /* + * State is being processed + * by pf_unlink_state() in + * an other thread. + */ + PF_HASHROW_UNLOCK(ih); + return (0); /* XXXGL: undefined actually */ + } + + s->timeout = PFTM_UNLINKED; + + if (s->src.state == PF_TCPS_PROXY_DST) { /* XXX wire key the right one? */ -#ifdef __FreeBSD__ - pf_send_tcp(NULL, cur->rule.ptr, cur->key[PF_SK_WIRE]->af, -#else - pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, -#endif - &cur->key[PF_SK_WIRE]->addr[1], - &cur->key[PF_SK_WIRE]->addr[0], - cur->key[PF_SK_WIRE]->port[1], - cur->key[PF_SK_WIRE]->port[0], - cur->src.seqhi, cur->src.seqlo + 1, - TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); - } -#ifdef __FreeBSD__ - RB_REMOVE(pf_state_tree_id, &V_tree_id, cur); -#else - RB_REMOVE(pf_state_tree_id, &tree_id, cur); -#endif -#if NPFLOW > 0 - if (cur->state_flags & PFSTATE_PFLOW) -#ifdef __FreeBSD__ - if (export_pflow_ptr != NULL) - export_pflow_ptr(cur); -#else - export_pflow(cur); -#endif -#endif -#if NPFSYNC > 0 -#ifdef __FreeBSD__ + pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af, + &s->key[PF_SK_WIRE]->addr[1], + &s->key[PF_SK_WIRE]->addr[0], + s->key[PF_SK_WIRE]->port[1], + s->key[PF_SK_WIRE]->port[0], + s->src.seqhi, s->src.seqlo + 1, + TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL); + } + + LIST_REMOVE(s, entry); + pf_src_tree_remove_state(s); + PF_HASHROW_UNLOCK(ih); + if (pfsync_delete_state_ptr != NULL) - pfsync_delete_state_ptr(cur); -#else - pfsync_delete_state(cur); -#endif -#endif - cur->timeout = PFTM_UNLINKED; - pf_src_tree_remove_state(cur); - pf_detach_state(cur); + pfsync_delete_state_ptr(s); + + pf_detach_state(s); + refcount_release(&s->refs); + + return (pf_release_state(s)); } -/* callers should be at splsoftnet and hold the - * write_lock on pf_consistency_lock */ void pf_free_state(struct pf_state *cur) { -#ifndef __FreeBSD__ - splassert(IPL_SOFTNET); -#endif -#if NPFSYNC > 0 -#ifdef __FreeBSD__ - if (pfsync_state_in_use_ptr != NULL && - pfsync_state_in_use_ptr(cur)) -#else - if (pfsync_state_in_use(cur)) -#endif - return; -#endif -#ifdef __FreeBSD__ - KASSERT(cur->timeout == PFTM_UNLINKED, - ("pf_free_state: cur->timeout != PFTM_UNLINKED")); -#else - KASSERT(cur->timeout == PFTM_UNLINKED); -#endif - if (--cur->rule.ptr->states_cur <= 0 && - cur->rule.ptr->src_nodes <= 0) - pf_rm_rule(NULL, cur->rule.ptr); + KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur)); + KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__, + cur->timeout)); + --cur->rule.ptr->states_cur; if (cur->nat_rule.ptr != NULL) - if (--cur->nat_rule.ptr->states_cur <= 0 && - cur->nat_rule.ptr->src_nodes <= 0) - pf_rm_rule(NULL, cur->nat_rule.ptr); + --cur->nat_rule.ptr->states_cur; if (cur->anchor.ptr != NULL) - if (--cur->anchor.ptr->states_cur <= 0) - pf_rm_rule(NULL, cur->anchor.ptr); + --cur->anchor.ptr->states_cur; pf_normalize_tcp_cleanup(cur); - pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); -#ifdef __FreeBSD__ - TAILQ_REMOVE(&V_state_list, cur, entry_list); -#else - TAILQ_REMOVE(&state_list, cur, entry_list); -#endif - if (cur->tag) - pf_tag_unref(cur->tag); -#ifdef __FreeBSD__ - pool_put(&V_pf_state_pl, cur); + uma_zfree(V_pf_state_z, cur); V_pf_status.fcounters[FCNT_STATE_REMOVALS]++; - V_pf_status.states--; -#else - pool_put(&pf_state_pl, cur); - pf_status.fcounters[FCNT_STATE_REMOVALS]++; - pf_status.states--; -#endif } -#ifdef __FreeBSD__ -int -pf_purge_expired_states(u_int32_t maxcheck, int waslocked) -#else -void -pf_purge_expired_states(u_int32_t maxcheck) -#endif +/* + * Called only from pf_purge_thread(), thus serialized. + */ +static int +pf_purge_expired_states(int maxcheck) { - static struct pf_state *cur = NULL; - struct pf_state *next; -#ifdef __FreeBSD__ - int locked = waslocked; -#else - int locked = 0; -#endif + static u_int i = 0; - while (maxcheck--) { - /* wrap to start of list when we hit the end */ - if (cur == NULL) { -#ifdef __FreeBSD__ - cur = TAILQ_FIRST(&V_state_list); -#else - cur = TAILQ_FIRST(&state_list); -#endif - if (cur == NULL) - break; /* list empty */ - } + struct pf_idhash *ih; + struct pf_state *s; + int rv = 0; - /* get next state, as cur may get deleted */ - next = TAILQ_NEXT(cur, entry_list); + V_pf_status.states = uma_zone_get_cur(V_pf_state_z); - if (cur->timeout == PFTM_UNLINKED) { - /* free unlinked state */ - if (! locked) { -#ifdef __FreeBSD__ - if (!sx_try_upgrade(&V_pf_consistency_lock)) - return (0); -#else - rw_enter_write(&pf_consistency_lock); -#endif - locked = 1; - } - pf_free_state(cur); - } else if (pf_state_expires(cur) <= time_second) { - /* unlink and free expired state */ - pf_unlink_state(cur); - if (! locked) { -#ifdef __FreeBSD__ - if (!sx_try_upgrade(&V_pf_consistency_lock)) - return (0); -#else - rw_enter_write(&pf_consistency_lock); -#endif - locked = 1; + /* + * Go through hash and unlink states that expire now. + */ + while (maxcheck > 0) { + + /* Wrap to start of hash when we hit the end. */ + if (i > V_pf_hashmask) { + i = 0; + rv = 1; + } + + ih = &V_pf_idhash[i]; +relock: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + if (pf_state_expires(s) <= time_uptime) { + V_pf_status.states -= + pf_unlink_state(s, PF_ENTER_LOCKED); + goto relock; } - pf_free_state(cur); + s->rule.ptr->rule_flag |= PFRULE_REFS; + if (s->nat_rule.ptr != NULL) + s->nat_rule.ptr->rule_flag |= PFRULE_REFS; + if (s->anchor.ptr != NULL) + s->anchor.ptr->rule_flag |= PFRULE_REFS; + s->kif->pfik_flags |= PFI_IFLAG_REFS; + if (s->rt_kif) + s->rt_kif->pfik_flags |= PFI_IFLAG_REFS; } - cur = next; + PF_HASHROW_UNLOCK(ih); + i++; + maxcheck--; } -#ifdef __FreeBSD__ - if (!waslocked && locked) - sx_downgrade(&V_pf_consistency_lock); - - return (1); -#else - if (locked) - rw_exit_write(&pf_consistency_lock); -#endif -} + V_pf_status.states = uma_zone_get_cur(V_pf_state_z); -int -pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) -{ - if (aw->type != PF_ADDR_TABLE) - return (0); - if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) - return (1); - return (0); + return (rv); } -void -pf_tbladdr_remove(struct pf_addr_wrap *aw) +static void +pf_purge_unlinked_rules() { - if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) - return; - pfr_detach_table(aw->p.tbl); - aw->p.tbl = NULL; -} + struct pf_rulequeue tmpq; + struct pf_rule *r, *r1; -void -pf_tbladdr_copyout(struct pf_addr_wrap *aw) -{ - struct pfr_ktable *kt = aw->p.tbl; + /* + * Do naive mark-and-sweep garbage collecting of old rules. + * Reference flag is raised by pf_purge_expired_states() + * and pf_purge_expired_src_nodes(). + * + * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK, + * use a temporary queue. + */ + TAILQ_INIT(&tmpq); + PF_UNLNKDRULES_LOCK(); + TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) { + if (!(r->rule_flag & PFRULE_REFS)) { + TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries); + TAILQ_INSERT_TAIL(&tmpq, r, entries); + } else + r->rule_flag &= ~PFRULE_REFS; + } + PF_UNLNKDRULES_UNLOCK(); - if (aw->type != PF_ADDR_TABLE || kt == NULL) - return; - if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) - kt = kt->pfrkt_root; - aw->p.tbl = NULL; - aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? - kt->pfrkt_cnt : -1; + if (!TAILQ_EMPTY(&tmpq)) { + PF_RULES_WLOCK(); + TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) { + TAILQ_REMOVE(&tmpq, r, entries); + pf_free_rule(r); + } + PF_RULES_WUNLOCK(); + } } void @@ -1826,7 +1658,7 @@ pf_print_state(struct pf_state *s) pf_print_state_parts(s, NULL, NULL); } -void +static void pf_print_state_parts(struct pf_state *s, struct pf_state_key *skwp, struct pf_state_key *sksp) { @@ -1979,7 +1811,7 @@ pf_calc_skip_steps(struct pf_rulequeue *rules) PF_SET_SKIP_STEPS(i); } -int +static int pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) { if (aw1->type != aw2->type) @@ -1999,8 +1831,6 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) return (0); case PF_ADDR_TABLE: return (aw1->p.tbl != aw2->p.tbl); - case PF_ADDR_RTLABEL: - return (aw1->v.rtlabel != aw2->v.rtlabel); default: printf("invalid address type: %d\n", aw1->type); return (1); @@ -2022,7 +1852,7 @@ pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) return (l); } -void +static void pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) { @@ -2080,7 +1910,7 @@ pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) } #ifdef INET6 -void +static void pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) { struct pf_addr ao; @@ -2102,7 +1932,7 @@ pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) } #endif /* INET6 */ -void +static void pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) @@ -2195,16 +2025,12 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, * Need to modulate the sequence numbers in the TCP SACK option * (credits to Krzysztof Pfaff for report and patch) */ -int +static int pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, struct tcphdr *th, struct pf_state_peer *dst) { int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; -#ifdef __FreeBSD__ u_int8_t opts[TCP_MAXOLEN], *opt = opts; -#else - u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; -#endif int copyback = 0, i, olen; struct sackblk sack; @@ -2248,60 +2074,32 @@ pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, } if (copyback) -#ifdef __FreeBSD__ m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts); -#else - m_copyback(m, off + sizeof(*th), thoptlen, opts); -#endif return (copyback); } -void -#ifdef __FreeBSD__ +static void pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af, -#else -pf_send_tcp(const struct pf_rule *r, sa_family_t af, -#endif const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, - u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) + u_int16_t rtag, struct ifnet *ifp) { + struct pf_send_entry *pfse; struct mbuf *m; int len, tlen; #ifdef INET - struct ip *h; + struct ip *h = NULL; #endif /* INET */ #ifdef INET6 - struct ip6_hdr *h6; + struct ip6_hdr *h6 = NULL; #endif /* INET6 */ struct tcphdr *th; char *opt; -#ifdef __FreeBSD__ struct pf_mtag *pf_mtag; - KASSERT( -#ifdef INET - af == AF_INET -#else - 0 -#endif - || -#ifdef INET6 - af == AF_INET6 -#else - 0 -#endif - , ("Unsupported AF %d", af)); len = 0; th = NULL; -#ifdef INET - h = NULL; -#endif -#ifdef INET6 - h6 = NULL; -#endif -#endif /* __FreeBSD__ */ /* maximum segment size tcp option */ tlen = sizeof(struct tcphdr); @@ -2319,54 +2117,40 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, len = sizeof(struct ip6_hdr) + tlen; break; #endif /* INET6 */ + default: + panic("%s: unsupported af %d", __func__, af); } - /* create outgoing mbuf */ - m = m_gethdr(M_DONTWAIT, MT_HEADER); - if (m == NULL) + /* Allocate outgoing queue entry, mbuf and mbuf tag. */ + pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); + if (pfse == NULL) return; -#ifdef __FreeBSD__ + m = m_gethdr(M_NOWAIT, MT_HEADER); + if (m == NULL) { + free(pfse, M_PFTEMP); + return; + } #ifdef MAC mac_netinet_firewall_send(m); #endif if ((pf_mtag = pf_get_mtag(m)) == NULL) { + free(pfse, M_PFTEMP); m_freem(m); return; } -#endif if (tag) -#ifdef __FreeBSD__ m->m_flags |= M_SKIP_FIREWALL; pf_mtag->tag = rtag; -#else - m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; - m->m_pkthdr.pf.tag = rtag; -#endif if (r != NULL && r->rtableid >= 0) -#ifdef __FreeBSD__ - { M_SETFIB(m, r->rtableid); - pf_mtag->rtableid = r->rtableid; -#else - m->m_pkthdr.pf.rtableid = r->rtableid; -#endif -#ifdef __FreeBSD__ - } -#endif #ifdef ALTQ if (r != NULL && r->qid) { -#ifdef __FreeBSD__ pf_mtag->qid = r->qid; /* add hints for ecn */ pf_mtag->hdr = mtod(m, struct ip *); -#else - m->m_pkthdr.pf.qid = r->qid; - /* add hints for ecn */ - m->m_pkthdr.pf.hdr = mtod(m, struct ip *); -#endif } #endif /* ALTQ */ m->m_data += max_linkhdr; @@ -2429,53 +2213,12 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, h->ip_v = 4; h->ip_hl = sizeof(*h) >> 2; h->ip_tos = IPTOS_LOWDELAY; -#ifdef __FreeBSD__ h->ip_off = V_path_mtu_discovery ? IP_DF : 0; h->ip_len = len; h->ip_ttl = ttl ? ttl : V_ip_defttl; -#else - h->ip_len = htons(len); - h->ip_off = htons(ip_mtudisc ? IP_DF : 0); - h->ip_ttl = ttl ? ttl : ip_defttl; -#endif h->ip_sum = 0; - if (eh == NULL) { -#ifdef __FreeBSD__ - PF_UNLOCK(); - ip_output(m, (void *)NULL, (void *)NULL, 0, - (void *)NULL, (void *)NULL); - PF_LOCK(); -#else /* ! __FreeBSD__ */ - ip_output(m, (void *)NULL, (void *)NULL, 0, - (void *)NULL, (void *)NULL); -#endif - } else { - struct route ro; - struct rtentry rt; - struct ether_header *e = (void *)ro.ro_dst.sa_data; - if (ifp == NULL) { - m_freem(m); - return; - } - rt.rt_ifp = ifp; - ro.ro_rt = &rt; - ro.ro_dst.sa_len = sizeof(ro.ro_dst); - ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; - bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); - bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); - e->ether_type = eh->ether_type; -#ifdef __FreeBSD__ - PF_UNLOCK(); - /* XXX_IMPORT: later */ - ip_output(m, (void *)NULL, &ro, 0, - (void *)NULL, (void *)NULL); - PF_LOCK(); -#else /* ! __FreeBSD__ */ - ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER, - (void *)NULL, (void *)NULL); -#endif - } + pfse->pfse_type = PFSE_IP; break; #endif /* INET */ #ifdef INET6 @@ -2487,102 +2230,75 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, h6->ip6_vfc |= IPV6_VERSION; h6->ip6_hlim = IPV6_DEFHLIM; -#ifdef __FreeBSD__ - PF_UNLOCK(); - ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); - PF_LOCK(); -#else - ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); -#endif + pfse->pfse_type = PFSE_IP6; break; #endif /* INET6 */ } + pfse->pfse_m = m; + pf_send(pfse); } static void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) { - struct mbuf *m0; -#ifdef __FreeBSD__ -#ifdef INET - struct ip *ip; -#endif + struct pf_send_entry *pfse; + struct mbuf *m0; struct pf_mtag *pf_mtag; -#endif -#ifdef __FreeBSD__ - m0 = m_copypacket(m, M_DONTWAIT); - if (m0 == NULL) + /* Allocate outgoing queue entry, mbuf and mbuf tag. */ + pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); + if (pfse == NULL) return; -#else - if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL) + + if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) { + free(pfse, M_PFTEMP); return; -#endif + } -#ifdef __FreeBSD__ - if ((pf_mtag = pf_get_mtag(m0)) == NULL) + if ((pf_mtag = pf_get_mtag(m0)) == NULL) { + free(pfse, M_PFTEMP); return; + } /* XXX: revisit */ m0->m_flags |= M_SKIP_FIREWALL; -#else - m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; -#endif if (r->rtableid >= 0) -#ifdef __FreeBSD__ - { M_SETFIB(m0, r->rtableid); - pf_mtag->rtableid = r->rtableid; -#else - m0->m_pkthdr.pf.rtableid = r->rtableid; -#endif -#ifdef __FreeBSD__ - } -#endif #ifdef ALTQ if (r->qid) { -#ifdef __FreeBSD__ pf_mtag->qid = r->qid; /* add hints for ecn */ pf_mtag->hdr = mtod(m0, struct ip *); -#else - m0->m_pkthdr.pf.qid = r->qid; - /* add hints for ecn */ - m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); -#endif } #endif /* ALTQ */ switch (af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ + { + struct ip *ip; + /* icmp_error() expects host byte ordering */ ip = mtod(m0, struct ip *); NTOHS(ip->ip_len); NTOHS(ip->ip_off); - PF_UNLOCK(); - icmp_error(m0, type, code, 0, 0); - PF_LOCK(); -#else - icmp_error(m0, type, code, 0, 0); -#endif + + pfse->pfse_type = PFSE_ICMP; break; + } #endif /* INET */ #ifdef INET6 case AF_INET6: -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - icmp6_error(m0, type, code, 0); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif + pfse->pfse_type = PFSE_ICMP6; break; #endif /* INET6 */ } + pfse->pfse_m = m0; + pfse->pfse_icmp_type = type; + pfse->pfse_icmp_code = code; + pf_send(pfse); } /* @@ -2669,7 +2385,7 @@ pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, return (1); } -int +static int pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) { switch (op) { @@ -2704,7 +2420,7 @@ pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) return (pf_match(op, a1, a2, p)); } -int +static int pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) { if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) @@ -2712,7 +2428,7 @@ pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) return (pf_match(op, a1, a2, u)); } -int +static int pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) { if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) @@ -2721,49 +2437,25 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) } int -#ifdef __FreeBSD__ -pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, - struct pf_mtag *pf_mtag) -#else -pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) -#endif +pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag) { if (*tag == -1) -#ifdef __FreeBSD__ - *tag = pf_mtag->tag; -#else - *tag = m->m_pkthdr.pf.tag; -#endif + *tag = mtag; return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } int -#ifdef __FreeBSD__ -pf_tag_packet(struct mbuf *m, int tag, int rtableid, - struct pf_mtag *pf_mtag) -#else -pf_tag_packet(struct mbuf *m, int tag, int rtableid) -#endif +pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag) { - if (tag <= 0 && rtableid < 0) - return (0); - if (tag > 0) -#ifdef __FreeBSD__ - pf_mtag->tag = tag; -#else - m->m_pkthdr.pf.tag = tag; -#endif - if (rtableid >= 0) -#ifdef __FreeBSD__ - { - M_SETFIB(m, rtableid); - } -#else - m->m_pkthdr.pf.rtableid = rtableid; -#endif + KASSERT(tag > 0, ("%s: tag %d", __func__, tag)); + + if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL)) + return (ENOMEM); + + pd->pf_mtag->tag = tag; return (0); } @@ -2774,26 +2466,19 @@ pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, { struct pf_anchor_stackframe *f; + PF_RULES_RASSERT(); + (*r)->anchor->match = 0; if (match) *match = 0; -#ifdef __FreeBSD__ if (*depth >= sizeof(V_pf_anchor_stack) / sizeof(V_pf_anchor_stack[0])) { -#else - if (*depth >= sizeof(pf_anchor_stack) / - sizeof(pf_anchor_stack[0])) { -#endif printf("pf_step_into_anchor: stack overflow\n"); *r = TAILQ_NEXT(*r, entries); return; } else if (*depth == 0 && a != NULL) *a = *r; -#ifdef __FreeBSD__ f = V_pf_anchor_stack + (*depth)++; -#else - f = pf_anchor_stack + (*depth)++; -#endif f->rs = *rs; f->r = *r; if ((*r)->anchor_wildcard) { @@ -2819,14 +2504,12 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, struct pf_anchor_stackframe *f; int quick = 0; + PF_RULES_RASSERT(); + do { if (*depth <= 0) break; -#ifdef __FreeBSD__ f = V_pf_anchor_stack + *depth - 1; -#else - f = pf_anchor_stack + *depth - 1; -#endif if (f->parent != NULL && f->child != NULL) { if (f->child->match || (match != NULL && *match)) { @@ -2913,35 +2596,15 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) #endif /* INET6 */ int -#ifdef __FreeBSD__ -pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg) -#else -pf_socket_lookup(int direction, struct pf_pdesc *pd) -#endif +pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m) { struct pf_addr *saddr, *daddr; u_int16_t sport, dport; -#ifdef __FreeBSD__ struct inpcbinfo *pi; -#else - struct inpcbtable *tb; -#endif struct inpcb *inp; - if (pd == NULL) - return (-1); pd->lookup.uid = UID_MAX; pd->lookup.gid = GID_MAX; - pd->lookup.pid = NO_PID; - -#ifdef __FreeBSD__ - if (inp_arg != NULL) { - INP_LOCK_ASSERT(inp_arg); - pd->lookup.uid = inp_arg->inp_cred->cr_uid; - pd->lookup.gid = inp_arg->inp_cred->cr_groups[0]; - return (1); - } -#endif switch (pd->proto) { case IPPROTO_TCP: @@ -2949,22 +2612,14 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) return (-1); sport = pd->hdr.tcp->th_sport; dport = pd->hdr.tcp->th_dport; -#ifdef __FreeBSD__ pi = &V_tcbinfo; -#else - tb = &tcbtable; -#endif break; case IPPROTO_UDP: if (pd->hdr.udp == NULL) return (-1); sport = pd->hdr.udp->uh_sport; dport = pd->hdr.udp->uh_dport; -#ifdef __FreeBSD__ pi = &V_udbinfo; -#else - tb = &udbtable; -#endif break; default: return (-1); @@ -2984,77 +2639,43 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) switch (pd->af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ - /* - * XXXRW: would be nice if we had an mbuf here so that we - * could use in_pcblookup_mbuf(). - */ - inp = in_pcblookup(pi, saddr->v4, sport, daddr->v4, - dport, INPLOOKUP_RLOCKPCB, NULL); + inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4, + dport, INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) { - inp = in_pcblookup(pi, saddr->v4, sport, + inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4, dport, INPLOOKUP_WILDCARD | - INPLOOKUP_RLOCKPCB, NULL); + INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) return (-1); } -#else - inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport); - if (inp == NULL) { - inp = in_pcblookup_listen(tb, daddr->v4, dport, 0, - NULL); - if (inp == NULL) - return (-1); - } -#endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: -#ifdef __FreeBSD__ - /* - * XXXRW: would be nice if we had an mbuf here so that we - * could use in6_pcblookup_mbuf(). - */ - inp = in6_pcblookup(pi, &saddr->v6, sport, - &daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL); + inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6, + dport, INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) { - inp = in6_pcblookup(pi, &saddr->v6, sport, + inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6, dport, INPLOOKUP_WILDCARD | - INPLOOKUP_RLOCKPCB, NULL); - if (inp == NULL) - return (-1); - } -#else - inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, - dport); - if (inp == NULL) { - inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0, - NULL); + INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) return (-1); } -#endif break; #endif /* INET6 */ default: return (-1); } -#ifdef __FreeBSD__ INP_RLOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; INP_RUNLOCK(inp); -#else - pd->lookup.uid = inp->inp_socket->so_euid; - pd->lookup.gid = inp->inp_socket->so_egid; - pd->lookup.pid = inp->inp_socket->so_cpid; -#endif + return (1); } -u_int8_t +static u_int8_t pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) { int hlen; @@ -3094,17 +2715,13 @@ pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) return (wscale); } -u_int16_t +static u_int16_t pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) { int hlen; u_int8_t hdr[60]; u_int8_t *opt, optlen; -#ifdef __FreeBSD__ u_int16_t mss = V_tcp_mssdflt; -#else - u_int16_t mss = tcp_mssdflt; -#endif hlen = th_off << 2; /* hlen <= sizeof(hdr) */ if (hlen <= sizeof(struct tcphdr)) @@ -3136,7 +2753,7 @@ pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) return (mss); } -u_int16_t +static u_int16_t pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) { #ifdef INET @@ -3148,13 +2765,8 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) struct route_in6 ro6; #endif /* INET6 */ struct rtentry *rt = NULL; -#ifdef __FreeBSD__ int hlen = 0; u_int16_t mss = V_tcp_mssdflt; -#else - int hlen; - u_int16_t mss = tcp_mssdflt; -#endif switch (af) { #ifdef INET @@ -3165,11 +2777,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; -#ifdef __FreeBSD__ in_rtalloc_ign(&ro, 0, rtableid); -#else /* ! __FreeBSD__ */ - rtalloc_noclone(&ro, NO_CLONING); -#endif rt = ro.ro_rt; break; #endif /* INET */ @@ -3181,11 +2789,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; -#ifdef __FreeBSD__ in6_rtalloc_ign(&ro6, 0, rtableid); -#else /* ! __FreeBSD__ */ - rtalloc_noclone((struct route *)&ro6, NO_CLONING); -#endif rt = ro6.ro_rt; break; #endif /* INET6 */ @@ -3193,11 +2797,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) if (rt && rt->rt_ifp) { mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); -#ifdef __FreeBSD__ mss = max(V_tcp_mssdflt, mss); -#else - mss = max(tcp_mssdflt, mss); -#endif RTFREE(rt); } mss = min(mss, offer); @@ -3205,7 +2805,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) return (mss); } -void +static void pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) { struct pf_rule *r = s->rule.ptr; @@ -3230,13 +2830,12 @@ pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) } } -u_int32_t +static u_int32_t pf_tcp_iss(struct pf_pdesc *pd) { MD5_CTX ctx; u_int32_t digest[4]; -#ifdef __FreeBSD__ if (V_pf_tcp_secret_init == 0) { read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); MD5Init(&V_pf_tcp_secret_ctx); @@ -3246,17 +2845,6 @@ pf_tcp_iss(struct pf_pdesc *pd) } ctx = V_pf_tcp_secret_ctx; -#else - if (pf_tcp_secret_init == 0) { - arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); - MD5Init(&pf_tcp_secret_ctx); - MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, - sizeof(pf_tcp_secret)); - pf_tcp_secret_init = 1; - } - - ctx = pf_tcp_secret_ctx; -#endif MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); @@ -3268,36 +2856,26 @@ pf_tcp_iss(struct pf_pdesc *pd) MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); } MD5Final((u_char *)digest, &ctx); -#ifdef __FreeBSD__ V_pf_tcp_iss_off += 4096; #define ISN_RANDOM_INCREMENT (4096 - 1) return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) + V_pf_tcp_iss_off); #undef ISN_RANDOM_INCREMENT -#else - pf_tcp_iss_off += 4096; - return (digest[0] + tcp_iss + pf_tcp_iss_off); -#endif } -int +static int pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, - struct pfi_kif *kif, struct mbuf *m, int off, void *h, - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, -#ifdef __FreeBSD__ - struct ifqueue *ifq, struct inpcb *inp) -#else - struct ifqueue *ifq) -#endif + struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd, + struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp) { struct pf_rule *nr = NULL; - struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct pf_addr * const saddr = pd->src; + struct pf_addr * const daddr = pd->dst; sa_family_t af = pd->af; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; struct tcphdr *th = pd->hdr.tcp; - struct pf_state_key *skw = NULL, *sks = NULL; struct pf_state_key *sk = NULL, *nk = NULL; u_short reason; int rewrite = 0, hdrlen = 0; @@ -3305,31 +2883,18 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, int asd = 0; int match = 0; int state_icmp = 0; -#ifdef __FreeBSD__ u_int16_t sport = 0, dport = 0; u_int16_t bproto_sum = 0, bip_sum = 0; -#else - u_int16_t sport, dport; - u_int16_t bproto_sum = 0, bip_sum; -#endif u_int8_t icmptype = 0, icmpcode = 0; + PF_RULES_RASSERT(); - if (direction == PF_IN && pf_check_congestion(ifq)) { - REASON_SET(&reason, PFRES_CONGEST); - return (PF_DROP); - } - -#ifdef __FreeBSD__ - if (inp != NULL) - pd->lookup.done = pf_socket_lookup(direction, pd, inp); - else if (V_debug_pfugidhack) { - PF_UNLOCK(); - DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); - pd->lookup.done = pf_socket_lookup(direction, pd, inp); - PF_LOCK(); + if (inp != NULL) { + INP_LOCK_ASSERT(inp); + pd->lookup.uid = inp->inp_cred->cr_uid; + pd->lookup.gid = inp->inp_cred->cr_groups[0]; + pd->lookup.done = 1; } -#endif switch (pd->proto) { case IPPROTO_TCP: @@ -3383,12 +2948,10 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); /* check packet for BINAT/NAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, - &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) { - if (nk == NULL || sk == NULL) { - REASON_SET(&reason, PFRES_MEMORY); - goto cleanup; - } + if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk, + &nk, saddr, daddr, sport, dport)) != NULL) { + KASSERT(sk != NULL, ("%s: null sk", __func__)); + KASSERT(nk != NULL, ("%s: null nk", __func__)); if (pd->ip_sum) bip_sum = *pd->ip_sum; @@ -3551,36 +3114,21 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); /* tcp/udp only. uid.op always 0 in other cases */ else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = -#ifdef __FreeBSD__ - pf_socket_lookup(direction, pd, inp), 1)) && -#else - pf_socket_lookup(direction, pd), 1)) && -#endif + pf_socket_lookup(direction, pd, m), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], pd->lookup.uid)) r = TAILQ_NEXT(r, entries); /* tcp/udp only. gid.op always 0 in other cases */ else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = -#ifdef __FreeBSD__ - pf_socket_lookup(direction, pd, inp), 1)) && -#else - pf_socket_lookup(direction, pd), 1)) && -#endif + pf_socket_lookup(direction, pd, m), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && -#ifdef __FreeBSD__ r->prob <= arc4random()) -#else - r->prob <= arc4random_uniform(UINT_MAX - 1) + 1) -#endif r = TAILQ_NEXT(r, entries); -#ifdef __FreeBSD__ - else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) -#else - else if (r->match_tag && !pf_match_tag(m, r, &tag)) -#endif + else if (r->match_tag && !pf_match_tag(m, r, &tag, + pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match( @@ -3617,8 +3165,8 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->log || (nr != NULL && nr->log)) { if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); - PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, - a, ruleset, pd); + PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a, + ruleset, pd, 1); } if ((r->action == PF_DROP) && @@ -3674,14 +3222,10 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, ack++; if (th->th_flags & TH_FIN) ack++; -#ifdef __FreeBSD__ pf_send_tcp(m, r, af, pd->dst, -#else - pf_send_tcp(r, af, pd->dst, -#endif pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); + r->return_ttl, 1, 0, kif->pfik_ifp); } } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && r->return_icmp) @@ -3696,106 +3240,69 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->action == PF_DROP) goto cleanup; -#ifdef __FreeBSD__ - if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) { -#else - if (pf_tag_packet(m, tag, rtableid)) { -#endif + if (tag > 0 && pf_tag_packet(m, pd, tag)) { REASON_SET(&reason, PFRES_MEMORY); goto cleanup; } + if (rtableid >= 0) + M_SETFIB(m, rtableid); if (!state_icmp && (r->keep_state || nr != NULL || (pd->flags & PFDESC_TCP_NORM))) { int action; - action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m, - off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, - bip_sum, hdrlen); + action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off, + sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum, + hdrlen); if (action != PF_PASS) return (action); } else { -#ifdef __FreeBSD__ - if (sk != NULL) - pool_put(&V_pf_state_key_pl, sk); - if (nk != NULL) - pool_put(&V_pf_state_key_pl, nk); -#else if (sk != NULL) - pool_put(&pf_state_key_pl, sk); + uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) - pool_put(&pf_state_key_pl, nk); -#endif + uma_zfree(V_pf_state_key_z, nk); } /* copy back packet headers if we performed NAT operations */ if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); -#if NPFSYNC > 0 - if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && -#ifdef __FreeBSD__ - direction == PF_OUT && pfsync_up_ptr != NULL && pfsync_up_ptr()) { -#else - direction == PF_OUT && pfsync_up()) { -#endif + if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) && + direction == PF_OUT && + pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m)) /* * We want the state created, but we dont * want to send this in case a partner * firewall has to know about it to allow * replies through it. */ -#ifdef __FreeBSD__ - if (pfsync_defer_ptr != NULL && - pfsync_defer_ptr(*sm, m)) -#else - if (pfsync_defer(*sm, m)) -#endif - return (PF_DEFER); - } -#endif + return (PF_DEFER); return (PF_PASS); cleanup: -#ifdef __FreeBSD__ - if (sk != NULL) - pool_put(&V_pf_state_key_pl, sk); - if (nk != NULL) - pool_put(&V_pf_state_key_pl, nk); -#else if (sk != NULL) - pool_put(&pf_state_key_pl, sk); + uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) - pool_put(&pf_state_key_pl, nk); -#endif + uma_zfree(V_pf_state_key_z, nk); return (PF_DROP); } -static __inline int +static int pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, - struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw, - struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk, - struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, - struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, - u_int16_t bip_sum, int hdrlen) + struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk, + struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport, + u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm, + int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen) { struct pf_state *s = NULL; struct pf_src_node *sn = NULL; struct tcphdr *th = pd->hdr.tcp; -#ifdef __FreeBSD__ u_int16_t mss = V_tcp_mssdflt; -#else - u_int16_t mss = tcp_mssdflt; -#endif u_short reason; /* check maximums */ if (r->max_states && (r->states_cur >= r->max_states)) { -#ifdef __FreeBSD__ V_pf_status.lcounters[LCNT_STATES]++; -#else - pf_status.lcounters[LCNT_STATES]++; -#endif REASON_SET(&reason, PFRES_MAXSTATES); return (PF_DROP); } @@ -3812,11 +3319,7 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, REASON_SET(&reason, PFRES_SRCLIMIT); goto csfailed; } -#ifdef __FreeBSD__ - s = pool_get(&V_pf_state_pl, PR_NOWAIT | PR_ZERO); -#else - s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); -#endif + s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); goto csfailed; @@ -3829,8 +3332,6 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, s->state_flags |= PFSTATE_ALLOWOPTS; if (r->rule_flag & PFRULE_STATESLOPPY) s->state_flags |= PFSTATE_SLOPPY; - if (r->rule_flag & PFRULE_PFLOW) - s->state_flags |= PFSTATE_PFLOW; s->log = r->log & PF_LOG_ALL; s->sync_state = PFSYNC_S_NONE; if (nr != NULL) @@ -3888,8 +3389,8 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, s->timeout = PFTM_OTHER_FIRST_PACKET; } - s->creation = time_second; - s->expire = time_second; + s->creation = time_uptime; + s->expire = time_uptime; if (sn != NULL) { s->src_node = sn; @@ -3907,11 +3408,7 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); -#ifdef __FreeBSD__ - pool_put(&V_pf_state_pl, s); -#else - pool_put(&pf_state_pl, s); -#endif + uma_zfree(V_pf_state_z, s); return (PF_DROP); } if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && @@ -3923,40 +3420,43 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, pf_normalize_tcp_cleanup(s); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); -#ifdef __FreeBSD__ - pool_put(&V_pf_state_pl, s); -#else - pool_put(&pf_state_pl, s); -#endif + uma_zfree(V_pf_state_z, s); return (PF_DROP); } } s->direction = pd->dir; - if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk, - pd->src, pd->dst, sport, dport)) - goto csfailed; + /* + * sk/nk could already been setup by pf_get_translation(). + */ + if (nr == NULL) { + KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p", + __func__, nr, sk, nk)); + sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport); + if (sk == NULL) + goto csfailed; + nk = sk; + } else + KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p", + __func__, nr, sk, nk)); - if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) { + /* Swap sk/nk for PF_OUT. */ + if (pf_state_insert(BOUND_IFACE(r, kif), + (pd->dir == PF_IN) ? sk : nk, + (pd->dir == PF_IN) ? nk : sk, s)) { if (pd->proto == IPPROTO_TCP) pf_normalize_tcp_cleanup(s); REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); -#ifdef __FreeBSD__ - pool_put(&V_pf_state_pl, s); -#else - pool_put(&pf_state_pl, s); -#endif + uma_zfree(V_pf_state_z, s); return (PF_DROP); } else *sm = s; pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ - if (tag > 0) { - pf_tag_ref(tag); + if (tag > 0) s->tag = tag; - } if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { s->src.state = PF_TCPS_PROXY_SRC; @@ -3984,13 +3484,9 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, mss = pf_calc_mss(pd->src, pd->af, rtid, mss); mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); s->src.mss = mss; -#ifdef __FreeBSD__ pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport, -#else - pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, -#endif th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL); REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } @@ -3998,48 +3494,27 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, return (PF_PASS); csfailed: -#ifdef __FreeBSD__ if (sk != NULL) - pool_put(&V_pf_state_key_pl, sk); + uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) - pool_put(&V_pf_state_key_pl, nk); -#else - if (sk != NULL) - pool_put(&pf_state_key_pl, sk); - if (nk != NULL) - pool_put(&pf_state_key_pl, nk); -#endif + uma_zfree(V_pf_state_key_z, nk); if (sn != NULL && sn->states == 0 && sn->expire == 0) { -#ifdef __FreeBSD__ - RB_REMOVE(pf_src_tree, &V_tree_src_tracking, sn); + pf_remove_src_node(sn); V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; V_pf_status.src_nodes--; - pool_put(&V_pf_src_tree_pl, sn); -#else - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); -#endif + uma_zfree(V_pf_sources_z, sn); } if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { -#ifdef __FreeBSD__ - RB_REMOVE(pf_src_tree, &V_tree_src_tracking, nsn); + pf_remove_src_node(nsn); V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; V_pf_status.src_nodes--; - pool_put(&V_pf_src_tree_pl, nsn); -#else - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); -#endif + uma_zfree(V_pf_sources_z, nsn); } return (PF_DROP); } -int +static int pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) @@ -4052,6 +3527,8 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, int asd = 0; int match = 0; + PF_RULES_RASSERT(); + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); while (r != NULL) { r->evaluations++; @@ -4086,11 +3563,8 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, else if (r->prob && r->prob <= (arc4random() % (UINT_MAX - 1) + 1)) r = TAILQ_NEXT(r, entries); -#ifdef __FreeBSD__ - else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) -#else - else if (r->match_tag && !pf_match_tag(m, r, &tag)) -#endif + else if (r->match_tag && !pf_match_tag(m, r, &tag, + pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { @@ -4116,17 +3590,13 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, REASON_SET(&reason, PFRES_MATCH); if (r->log) - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, - pd); + PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd, + 1); if (r->action != PF_PASS) return (PF_DROP); -#ifdef __FreeBSD__ - if (pf_tag_packet(m, tag, -1, pd->pf_mtag)) { -#else - if (pf_tag_packet(m, tag, -1)) { -#endif + if (tag > 0 && pf_tag_packet(m, pd, tag)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -4134,7 +3604,7 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, return (PF_PASS); } -int +static int pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd, u_short *reason, int *copyback) @@ -4334,7 +3804,7 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, src->state = dst->state = TCPS_TIME_WAIT; /* update expire time */ - (*state)->expire = time_second; + (*state)->expire = time_uptime; if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; @@ -4381,23 +3851,14 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, * and keep updating the state TTL. */ -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: loose state match: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, -#ifdef __FreeBSD__ pd->p_len, ackskew, (unsigned long long)(*state)->packets[0], (unsigned long long)(*state)->packets[1], -#else - pd->p_len, ackskew, (*state)->packets[0], - (*state)->packets[1], -#endif pd->dir == PF_IN ? "in" : "out", pd->dir == (*state)->direction ? "fwd" : "rev"); } @@ -4436,36 +3897,24 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, (*state)->src.state == TCPS_SYN_SENT) { /* Send RST for state mismatches during handshake */ if (!(th->th_flags & TH_RST)) -#ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, -#endif pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, (*state)->rule.ptr->return_ttl, 1, 0, - pd->eh, kif->pfik_ifp); + kif->pfik_ifp); src->seqlo = 0; src->seqhi = 1; src->max_win = 1; -#ifdef __FreeBSD__ } else if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - } else if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, ackskew, -#ifdef __FreeBSD__ (unsigned long long)(*state)->packets[0], (unsigned long long)(*state)->packets[1], -#else - (*state)->packets[0], (*state)->packets[1], -#endif pd->dir == PF_IN ? "in" : "out", pd->dir == (*state)->direction ? "fwd" : "rev"); printf("pf: State failure on: %c %c %c %c | %c %c\n", @@ -4484,7 +3933,7 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, return (PF_PASS); } -int +static int pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, struct pf_state **state, struct pf_pdesc *pd, u_short *reason) { @@ -4536,7 +3985,7 @@ pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, src->state = dst->state = TCPS_TIME_WAIT; /* update expire time */ - (*state)->expire = time_second; + (*state)->expire = time_uptime; if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; @@ -4555,7 +4004,7 @@ pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, return (PF_PASS); } -int +static int pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) @@ -4566,6 +4015,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_state_peer *src, *dst; struct pf_state_key *sk; + bzero(&key, sizeof(key)); key.af = pd->af; key.proto = IPPROTO_TCP; if (direction == PF_IN) { /* wire side, straight */ @@ -4580,11 +4030,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, key.port[0] = th->th_dport; } -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->src; @@ -4606,15 +4052,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } -#ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, -#endif pd->src, th->th_dport, th->th_sport, (*state)->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, - 0, NULL, NULL); + TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (!(th->th_flags & TH_ACK) || @@ -4640,15 +4081,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->src.max_win = MAX(ntohs(th->th_win), 1); if ((*state)->dst.seqhi == 1) (*state)->dst.seqhi = htonl(arc4random()); -#ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, -#endif &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], (*state)->dst.seqhi, 0, TH_SYN, 0, - (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); + (*state)->src.mss, 0, 0, (*state)->tag, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((th->th_flags & (TH_SYN|TH_ACK)) != @@ -4659,25 +4096,16 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, } else { (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); (*state)->dst.seqlo = ntohl(th->th_seq); -#ifdef __FreeBSD__ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, -#endif pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, TH_ACK, (*state)->src.max_win, 0, 0, 0, - (*state)->tag, NULL, NULL); -#ifdef __FreeBSD__ + (*state)->tag, NULL); pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, -#else - pf_send_tcp((*state)->rule.ptr, pd->af, -#endif &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, - TH_ACK, (*state)->dst.max_win, 0, 0, 1, - 0, NULL, NULL); + TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL); (*state)->src.seqdiff = (*state)->dst.seqhi - (*state)->src.seqlo; (*state)->dst.seqdiff = (*state)->src.seqhi - @@ -4697,11 +4125,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && dst->state >= TCPS_FIN_WAIT_2 && src->state >= TCPS_FIN_WAIT_2) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: state reuse "); pf_print_state(*state); pf_print_flags(th->th_flags); @@ -4709,7 +4133,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, } /* XXX make sure it's the same direction ?? */ (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; - pf_unlink_state(*state); + pf_unlink_state(*state, PF_ENTER_LOCKED); *state = NULL; return (PF_DROP); } @@ -4743,16 +4167,12 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, /* Copyback sequence modulation or stateful scrub changes if needed */ if (copyback) -#ifdef __FreeBSD__ m_copyback(m, off, sizeof(*th), (caddr_t)th); -#else - m_copyback(m, off, sizeof(*th), th); -#endif return (PF_PASS); } -int +static int pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd) { @@ -4760,6 +4180,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_state_key_cmp key; struct udphdr *uh = pd->hdr.udp; + bzero(&key, sizeof(key)); key.af = pd->af; key.proto = IPPROTO_UDP; if (direction == PF_IN) { /* wire side, straight */ @@ -4774,11 +4195,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, key.port[0] = uh->uh_dport; } -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->src; @@ -4795,7 +4212,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, dst->state = PFUDPS_MULTIPLE; /* update expire time */ - (*state)->expire = time_second; + (*state)->expire = time_uptime; if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) (*state)->timeout = PFTM_UDP_MULTIPLE; else @@ -4816,30 +4233,23 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, &uh->uh_sum, &nk->addr[pd->didx], nk->port[pd->didx], 1, pd->af); -#ifdef __FreeBSD__ m_copyback(m, off, sizeof(*uh), (caddr_t)uh); -#else - m_copyback(m, off, sizeof(*uh), uh); -#endif } return (PF_PASS); } -int +static int pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { struct pf_addr *saddr = pd->src, *daddr = pd->dst; -#ifdef __FreeBSD__ u_int16_t icmpid = 0, *icmpsum; -#else - u_int16_t icmpid, *icmpsum; -#endif u_int8_t icmptype; int state_icmp = 0; struct pf_state_key_cmp key; + bzero(&key, sizeof(key)); switch (pd->proto) { #ifdef INET case IPPROTO_ICMP: @@ -4887,13 +4297,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, PF_ACPY(&key.addr[0], pd->dst, key.af); } -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); - (*state)->expire = time_second; + (*state)->expire = time_uptime; (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ @@ -4926,10 +4332,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } m_copyback(m, off, ICMP_MINLEN, -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp); + (caddr_t )pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 @@ -4946,12 +4349,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, &pd->hdr.icmp6->icmp6_cksum, &nk->addr[pd->didx], 0); - m_copyback(m, off, - sizeof(struct icmp6_hdr), -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp6); + m_copyback(m, off, sizeof(struct icmp6_hdr), + (caddr_t )pd->hdr.icmp6); break; #endif /* INET6 */ } @@ -4965,9 +4364,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, */ struct pf_pdesc pd2; -#ifdef __FreeBSD__ bzero(&pd2, sizeof pd2); -#endif #ifdef INET struct ip h2; #endif /* INET */ @@ -4975,13 +4372,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct ip6_hdr h2_6; int terminal = 0; #endif /* INET6 */ -#ifdef __FreeBSD__ int ipoff2 = 0; int off2 = 0; -#else - int ipoff2; - int off2; -#endif pd2.af = pd->af; /* Payload packet is from the opposite direction. */ @@ -5102,11 +4494,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.port[pd2.sidx] = th.th_sport; key.port[pd2.didx] = th.th_dport; -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->dst; @@ -5132,11 +4520,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, if (!((*state)->state_flags & PFSTATE_SLOPPY) && (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: BAD ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); pf_print_host(pd->src, 0, pd->af); @@ -5149,11 +4533,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } else { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf: OK ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); pf_print_host(pd->src, 0, pd->af); @@ -5197,38 +4577,22 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp); + (caddr_t )pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), -#ifdef __FreeBSD__ - (caddr_t) -#endif - &h2); + (caddr_t )&h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp6); + (caddr_t )pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), -#ifdef __FreeBSD__ - (caddr_t) -#endif - &h2_6); + (caddr_t )&h2_6); break; #endif /* INET6 */ } -#ifdef __FreeBSD__ m_copyback(m, off2, 8, (caddr_t)&th); -#else - m_copyback(m, off2, 8, &th); -#endif } return (PF_PASS); @@ -5252,11 +4616,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.port[pd2.sidx] = uh.uh_sport; key.port[pd2.didx] = uh.uh_dport; -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != @@ -5287,38 +4647,21 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp); -#ifdef __FreeBSD__ + (caddr_t )pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); -#else - m_copyback(m, ipoff2, sizeof(h2), &h2); -#endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp6); + (caddr_t )pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), -#ifdef __FreeBSD__ - (caddr_t) -#endif - &h2_6); + (caddr_t )&h2_6); break; #endif /* INET6 */ } -#ifdef __FreeBSD__ m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); -#else - m_copyback(m, off2, sizeof(uh), &uh); -#endif } return (PF_PASS); break; @@ -5341,11 +4684,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = iih.icmp_id; -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != @@ -5372,15 +4711,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); -#ifdef __FreeBSD__ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); -#else - m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); - m_copyback(m, ipoff2, sizeof(h2), &h2); - m_copyback(m, off2, ICMP_MINLEN, &iih); -#endif } return (PF_PASS); break; @@ -5404,11 +4737,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = iih.icmp6_id; -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != @@ -5435,19 +4764,11 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); -#ifdef __FreeBSD__ m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t)pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); m_copyback(m, off2, sizeof(struct icmp6_hdr), (caddr_t)&iih); -#else - m_copyback(m, off, sizeof(struct icmp6_hdr), - pd->hdr.icmp6); - m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); - m_copyback(m, off2, sizeof(struct icmp6_hdr), - &iih); -#endif } return (PF_PASS); break; @@ -5460,11 +4781,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = 0; -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != @@ -5490,30 +4807,18 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, switch (pd2.af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); -#else - m_copyback(m, off, ICMP_MINLEN, - pd->hdr.icmp); - m_copyback(m, ipoff2, sizeof(h2), &h2); -#endif break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), -#ifdef __FreeBSD__ - (caddr_t) -#endif - pd->hdr.icmp6); + (caddr_t )pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), -#ifdef __FreeBSD__ - (caddr_t) -#endif - &h2_6); + (caddr_t )&h2_6); break; #endif /* INET6 */ } @@ -5525,13 +4830,14 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } } -int +static int pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; + bzero(&key, sizeof(key)); key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { @@ -5544,11 +4850,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, key.port[1] = key.port[0] = 0; } -#ifdef __FreeBSD__ - STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag); -#else - STATE_LOOKUP(kif, &key, direction, *state, m); -#endif + STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->src; @@ -5565,7 +4867,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, dst->state = PFOTHERS_MULTIPLE; /* update expire time */ - (*state)->expire = time_second; + (*state)->expire = time_uptime; if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) (*state)->timeout = PFTM_OTHER_MULTIPLE; else @@ -5575,17 +4877,10 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; -#ifdef __FreeBSD__ - KASSERT(nk, ("%s: nk is null", __FUNCTION__)); - KASSERT(pd, ("%s: pd is null", __FUNCTION__)); - KASSERT(pd->src, ("%s: pd->src is null", __FUNCTION__)); - KASSERT(pd->dst, ("%s: pd->dst is null", __FUNCTION__)); -#else - KASSERT(nk); - KASSERT(pd); - KASSERT(pd->src); - KASSERT(pd->dst); -#endif + KASSERT(nk, ("%s: nk is null", __func__)); + KASSERT(pd, ("%s: pd is null", __func__)); + KASSERT(pd->src, ("%s: pd->src is null", __func__)); + KASSERT(pd->dst, ("%s: pd->dst is null", __func__)); switch (pd->af) { #ifdef INET case AF_INET: @@ -5672,21 +4967,13 @@ int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, int rtableid) { -#ifdef __FreeBSD__ #ifdef RADIX_MPATH struct radix_node_head *rnh; #endif -#endif struct sockaddr_in *dst; int ret = 1; int check_mpath; -#ifndef __FreeBSD__ - extern int ipmultipath; -#endif #ifdef INET6 -#ifndef __FreeBSD__ - extern int ip6_multipath; -#endif struct sockaddr_in6 *dst6; struct route_in6 ro; #else @@ -5697,14 +4984,12 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, struct ifnet *ifp; check_mpath = 0; -#ifdef __FreeBSD__ #ifdef RADIX_MPATH /* XXX: stick to table 0 for now */ rnh = rt_tables_get_rnh(0, af); if (rnh != NULL && rn_mpath_capable(rnh)) check_mpath = 1; #endif -#endif bzero(&ro, sizeof(ro)); switch (af) { case AF_INET: @@ -5712,10 +4997,6 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; -#ifndef __FreeBSD__ - if (ipmultipath) - check_mpath = 1; -#endif break; #ifdef INET6 case AF_INET6: @@ -5729,10 +5010,6 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; -#ifndef __FreeBSD__ - if (ip6_multipath) - check_mpath = 1; -#endif break; #endif /* INET6 */ default: @@ -5743,7 +5020,6 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) goto out; -#ifdef __FreeBSD__ switch (af) { #ifdef INET6 case AF_INET6: @@ -5759,9 +5035,6 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, rtalloc_ign((struct route *)&ro, 0); /* No/default FIB. */ break; } -#else /* ! __FreeBSD__ */ - rtalloc_noclone((struct route *)&ro, NO_CLONING); -#endif if (ro.ro_rt != NULL) { /* No interface given, this is a no-route check */ @@ -5778,22 +5051,13 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, rn = (struct radix_node *)ro.ro_rt; do { rt = (struct rtentry *)rn; -#ifndef __FreeBSD__ /* CARPDEV */ - if (rt->rt_ifp->if_type == IFT_CARP) - ifp = rt->rt_ifp->if_carpdev; - else -#endif - ifp = rt->rt_ifp; + ifp = rt->rt_ifp; if (kif->pfik_ifp == ifp) ret = 1; -#ifdef __FreeBSD__ #ifdef RADIX_MPATH rn = rn_mpath_next(rn); #endif -#else - rn = rn_mpath_next(rn, 0); -#endif } while (check_mpath == 1 && rn != NULL && ret == 0); } else ret = 0; @@ -5803,373 +5067,186 @@ out: return (ret); } -int -pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, - int rtableid) -{ - struct sockaddr_in *dst; -#ifdef INET6 - struct sockaddr_in6 *dst6; - struct route_in6 ro; -#else - struct route ro; -#endif - int ret = 0; - - bzero(&ro, sizeof(ro)); - switch (af) { - case AF_INET: - dst = satosin(&ro.ro_dst); - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr = addr->v4; - break; -#ifdef INET6 - case AF_INET6: - dst6 = (struct sockaddr_in6 *)&ro.ro_dst; - dst6->sin6_family = AF_INET6; - dst6->sin6_len = sizeof(*dst6); - dst6->sin6_addr = addr->v6; - break; -#endif /* INET6 */ - default: - return (0); - } - -#ifdef __FreeBSD__ - switch (af) { -#ifdef INET6 - case AF_INET6: - in6_rtalloc_ign(&ro, 0, rtableid); - break; -#endif #ifdef INET - case AF_INET: - in_rtalloc_ign((struct route *)&ro, 0, rtableid); - break; -#endif - default: - rtalloc_ign((struct route *)&ro, 0); - break; - } -#else /* ! __FreeBSD__ */ - rtalloc_noclone((struct route *)&ro, NO_CLONING); -#endif - - if (ro.ro_rt != NULL) { -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ -#else - if (ro.ro_rt->rt_labelid == aw->v.rtlabel) - ret = 1; -#endif - RTFREE(ro.ro_rt); - } - - return (ret); -} - -#ifdef INET -void +static void pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0, *m1; - struct route iproute; - struct route *ro = NULL; - struct sockaddr_in *dst; + struct sockaddr_in dst; struct ip *ip; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; -#ifdef __FreeBSD__ int sw_csum; -#endif -#ifdef IPSEC - struct m_tag *mtag; -#endif /* IPSEC */ - if (m == NULL || *m == NULL || r == NULL || - (dir != PF_IN && dir != PF_OUT) || oifp == NULL) - panic("pf_route: invalid parameters"); + KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); + KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", + __func__)); -#ifdef __FreeBSD__ - if (pd->pf_mtag->routed++ > 3) { -#else - if ((*m)->m_pkthdr.pf.routed++ > 3) { -#endif + if ((pd->pf_mtag == NULL && + ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || + pd->pf_mtag->routed++ > 3) { m0 = *m; *m = NULL; - goto bad; + goto bad_locked; } if (r->rt == PF_DUPTO) { -#ifdef __FreeBSD__ - if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL) -#else - if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) -#endif + if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { + if (s) + PF_STATE_UNLOCK(s); return; + } } else { - if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { + if (s) + PF_STATE_UNLOCK(s); return; + } m0 = *m; } - if (m0->m_len < sizeof(struct ip)) { - DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route: m0->m_len < sizeof(struct ip)\n")); - goto bad; - } - ip = mtod(m0, struct ip *); - ro = &iproute; - bzero((caddr_t)ro, sizeof(*ro)); - dst = satosin(&ro->ro_dst); - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr = ip->ip_dst; + bzero(&dst, sizeof(dst)); + dst.sin_family = AF_INET; + dst.sin_len = sizeof(dst); + dst.sin_addr = ip->ip_dst; if (r->rt == PF_FASTROUTE) { -#ifdef __FreeBSD__ - in_rtalloc_ign(ro, 0, M_GETFIB(m0)); -#else - rtalloc(ro); -#endif - if (ro->ro_rt == 0) { -#ifdef __FreeBSD__ + struct rtentry *rt; + + if (s) + PF_STATE_UNLOCK(s); + rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0)); + if (rt == NULL) { + RTFREE_LOCKED(rt); KMOD_IPSTAT_INC(ips_noroute); -#else - ipstat.ips_noroute++; -#endif + error = EHOSTUNREACH; goto bad; } - ifp = ro->ro_rt->rt_ifp; - ro->ro_rt->rt_use++; + ifp = rt->rt_ifp; + rt->rt_rmx.rmx_pksent++; - if (ro->ro_rt->rt_flags & RTF_GATEWAY) - dst = satosin(ro->ro_rt->rt_gateway); + if (rt->rt_flags & RTF_GATEWAY) + bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst)); + RTFREE_LOCKED(rt); } else { if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); - goto bad; + ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); + goto bad_locked; } if (s == NULL) { pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET)) - dst->sin_addr.s_addr = naddr.v4.s_addr; + dst.sin_addr.s_addr = naddr.v4.s_addr; ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET)) - dst->sin_addr.s_addr = + dst.sin_addr.s_addr = s->rt_addr.v4.s_addr; ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; + PF_STATE_UNLOCK(s); } } if (ifp == NULL) goto bad; if (oifp != ifp) { -#ifdef __FreeBSD__ - PF_UNLOCK(); - if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { - PF_LOCK(); - goto bad; - } else if (m0 == NULL) { - PF_LOCK(); - goto done; - } - PF_LOCK(); -#else if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; -#endif if (m0->m_len < sizeof(struct ip)) { DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route: m0->m_len < sizeof(struct ip)\n")); + ("%s: m0->m_len < sizeof(struct ip)\n", __func__)); goto bad; } ip = mtod(m0, struct ip *); } -#ifdef __FreeBSD__ - /* Copied from FreeBSD 5.1-CURRENT ip_output. */ + if (ifp->if_flags & IFF_LOOPBACK) + m0->m_flags |= M_SKIP_FIREWALL; + + /* Back to host byte order. */ + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + + /* Copied from FreeBSD 10.0-CURRENT ip_output. */ m0->m_pkthdr.csum_flags |= CSUM_IP; sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist; if (sw_csum & CSUM_DELAY_DATA) { - /* - * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least) - */ - NTOHS(ip->ip_len); - NTOHS(ip->ip_off); /* XXX: needed? */ in_delayed_cksum(m0); - HTONS(ip->ip_len); - HTONS(ip->ip_off); sw_csum &= ~CSUM_DELAY_DATA; } +#ifdef SCTP + if (sw_csum & CSUM_SCTP) { + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + sw_csum &= ~CSUM_SCTP; + } +#endif m0->m_pkthdr.csum_flags &= ifp->if_hwassist; - if (ntohs(ip->ip_len) <= ifp->if_mtu || - (ifp->if_hwassist & CSUM_FRAGMENT && - ((ip->ip_off & htons(IP_DF)) == 0))) { - /* - * ip->ip_len = htons(ip->ip_len); - * ip->ip_off = htons(ip->ip_off); - */ - ip->ip_sum = 0; - if (sw_csum & CSUM_DELAY_IP) { - /* From KAME */ - if (ip->ip_v == IPVERSION && - (ip->ip_hl << 2) == sizeof(*ip)) { - ip->ip_sum = in_cksum_hdr(ip); - } else { - ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); - } - } - PF_UNLOCK(); - error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro); - PF_LOCK(); - goto done; - } -#else - /* Copied from ip_output. */ -#ifdef IPSEC /* - * If deferred crypto processing is needed, check that the - * interface supports it. + * If small enough for interface, or the interface will take + * care of the fragmentation for us, we can just send directly. */ - if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) - != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) { - /* Notify IPsec to do its own crypto. */ - ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); - goto bad; - } -#endif /* IPSEC */ - - /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ - if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) { - if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || - ifp->if_bridge != NULL) { - in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clr */ - } - } else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) { - if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || - ifp->if_bridge != NULL) { - in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clr */ - } - } - - if (ntohs(ip->ip_len) <= ifp->if_mtu) { + if (ip->ip_len <= ifp->if_mtu || + (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || + ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) { + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; - if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && - ifp->if_bridge == NULL) { - m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; -#ifdef __FreeBSD__ - KMOD_IPSTAT_INC(ips_outhwcsum); -#else - ipstat.ips_outhwcsum++; -#endif - } else + if (sw_csum & CSUM_DELAY_IP) ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); - /* Update relevant hardware checksum stats for TCP/UDP */ - if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) - KMOD_TCPSTAT_INC(tcps_outhwcsum); - else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) - KMOD_UDPSTAT_INC(udps_outhwcsum); - error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL); + m0->m_flags &= ~(M_PROTOFLAGS); + error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL); goto done; } -#endif - /* - * Too large for interface; fragment if possible. - * Must be able to put at least 8 bytes per fragment. - */ - if (ip->ip_off & htons(IP_DF)) { -#ifdef __FreeBSD__ + /* Balk when DF bit is set or the interface didn't support TSO. */ + if ((ip->ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { + error = EMSGSIZE; KMOD_IPSTAT_INC(ips_cantfrag); -#else - ipstat.ips_cantfrag++; -#endif if (r->rt != PF_DUPTO) { -#ifdef __FreeBSD__ - /* icmp_error() expects host byte ordering */ - NTOHS(ip->ip_len); - NTOHS(ip->ip_off); - PF_UNLOCK(); icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, ifp->if_mtu); - PF_LOCK(); -#else - icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, - ifp->if_mtu); -#endif goto done; } else goto bad; } - m1 = m0; -#ifdef __FreeBSD__ - /* - * XXX: is cheaper + less error prone than own function - */ - NTOHS(ip->ip_len); - NTOHS(ip->ip_off); error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum); -#else - error = ip_fragment(m0, ifp, ifp->if_mtu); -#endif - if (error) { -#ifndef __FreeBSD__ /* ip_fragment does not do m_freem() on FreeBSD */ - m0 = NULL; -#endif + if (error) goto bad; - } - for (m0 = m1; m0; m0 = m1) { + for (; m0; m0 = m1) { m1 = m0->m_nextpkt; - m0->m_nextpkt = 0; -#ifdef __FreeBSD__ + m0->m_nextpkt = NULL; if (error == 0) { - PF_UNLOCK(); - error = (*ifp->if_output)(ifp, m0, sintosa(dst), - NULL); - PF_LOCK(); + m0->m_flags &= ~(M_PROTOFLAGS); + error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL); } else -#else - if (error == 0) - error = (*ifp->if_output)(ifp, m0, sintosa(dst), - NULL); - else -#endif m_freem(m0); } if (error == 0) -#ifdef __FreeBSD__ KMOD_IPSTAT_INC(ips_fragmented); -#else - ipstat.ips_fragmented++; -#endif done: if (r->rt != PF_DUPTO) *m = NULL; - if (ro == &iproute && ro->ro_rt) - RTFREE(ro->ro_rt); return; +bad_locked: + if (s) + PF_STATE_UNLOCK(s); bad: m_freem(m0); goto done; @@ -6177,146 +5254,115 @@ bad: #endif /* INET */ #ifdef INET6 -void +static void pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0; - struct route_in6 ip6route; - struct route_in6 *ro; - struct sockaddr_in6 *dst; + struct sockaddr_in6 dst; struct ip6_hdr *ip6; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; - if (m == NULL || *m == NULL || r == NULL || - (dir != PF_IN && dir != PF_OUT) || oifp == NULL) - panic("pf_route6: invalid parameters"); + KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); + KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", + __func__)); -#ifdef __FreeBSD__ - if (pd->pf_mtag->routed++ > 3) { -#else - if ((*m)->m_pkthdr.pf.routed++ > 3) { -#endif + if ((pd->pf_mtag == NULL && + ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || + pd->pf_mtag->routed++ > 3) { m0 = *m; *m = NULL; - goto bad; + goto bad_locked; } if (r->rt == PF_DUPTO) { -#ifdef __FreeBSD__ - if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL) -#else - if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) -#endif + if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { + if (s) + PF_STATE_UNLOCK(s); return; + } } else { - if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { + if (s) + PF_STATE_UNLOCK(s); return; + } m0 = *m; } - if (m0->m_len < sizeof(struct ip6_hdr)) { - DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); - goto bad; - } ip6 = mtod(m0, struct ip6_hdr *); - ro = &ip6route; - bzero((caddr_t)ro, sizeof(*ro)); - dst = (struct sockaddr_in6 *)&ro->ro_dst; - dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof(*dst); - dst->sin6_addr = ip6->ip6_dst; + bzero(&dst, sizeof(dst)); + dst.sin6_family = AF_INET6; + dst.sin6_len = sizeof(dst); + dst.sin6_addr = ip6->ip6_dst; /* Cheat. XXX why only in the v6 case??? */ if (r->rt == PF_FASTROUTE) { -#ifdef __FreeBSD__ + if (s) + PF_STATE_UNLOCK(s); m0->m_flags |= M_SKIP_FIREWALL; - PF_UNLOCK(); - ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); -#else - m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); -#endif return; } if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); - goto bad; + ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); + goto bad_locked; } if (s == NULL) { pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET6)) - PF_ACPY((struct pf_addr *)&dst->sin6_addr, + PF_ACPY((struct pf_addr *)&dst.sin6_addr, &naddr, AF_INET6); ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET6)) - PF_ACPY((struct pf_addr *)&dst->sin6_addr, + PF_ACPY((struct pf_addr *)&dst.sin6_addr, &s->rt_addr, AF_INET6); ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } + + if (s) + PF_STATE_UNLOCK(s); + if (ifp == NULL) goto bad; if (oifp != ifp) { -#ifdef __FreeBSD__ - PF_UNLOCK(); - if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { - PF_LOCK(); - goto bad; - } else if (m0 == NULL) { - PF_LOCK(); - goto done; - } - PF_LOCK(); -#else if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; -#endif if (m0->m_len < sizeof(struct ip6_hdr)) { DPFPRINTF(PF_DEBUG_URGENT, - ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); + ("%s: m0->m_len < sizeof(struct ip6_hdr)\n", + __func__)); goto bad; } ip6 = mtod(m0, struct ip6_hdr *); } + if (ifp->if_flags & IFF_LOOPBACK) + m0->m_flags |= M_SKIP_FIREWALL; + /* * If the packet is too large for the outgoing interface, * send back an icmp6 error. */ - if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) - dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); - if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - nd6_output(ifp, ifp, m0, dst, NULL); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - } else { + if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr)) + dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) + nd6_output(ifp, ifp, m0, &dst, NULL); + else { in6_ifstat_inc(ifp, ifs6_in_toobig); -#ifdef __FreeBSD__ - if (r->rt != PF_DUPTO) { - PF_UNLOCK(); - icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); - PF_LOCK(); - } else -#else if (r->rt != PF_DUPTO) icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); else -#endif goto bad; } @@ -6325,13 +5371,15 @@ done: *m = NULL; return; +bad_locked: + if (s) + PF_STATE_UNLOCK(s); bad: m_freem(m0); goto done; } #endif /* INET6 */ -#ifdef __FreeBSD__ /* * FreeBSD supports cksum offloads for the following drivers. * em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4), @@ -6350,7 +5398,7 @@ bad: * TCP/UDP layer. * Also, set csum_data to 0xffff to force cksum validation. */ -int +static int pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af) { u_int16_t sum = 0; @@ -6370,7 +5418,7 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a } else { ip = mtod(m, struct ip *); sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htonl((u_short)len + + ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + IPPROTO_TCP)); } sum ^= 0xffff; @@ -6465,246 +5513,72 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a } return (0); } -#else /* !__FreeBSD__ */ -/* - * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag - * off is the offset where the protocol header starts - * len is the total length of protocol header plus payload - * returns 0 when the checksum is valid, otherwise returns 1. - */ -int -pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, - sa_family_t af) -{ - u_int16_t flag_ok, flag_bad; - u_int16_t sum; - - switch (p) { - case IPPROTO_TCP: - flag_ok = M_TCP_CSUM_IN_OK; - flag_bad = M_TCP_CSUM_IN_BAD; - break; - case IPPROTO_UDP: - flag_ok = M_UDP_CSUM_IN_OK; - flag_bad = M_UDP_CSUM_IN_BAD; - break; - case IPPROTO_ICMP: -#ifdef INET6 - case IPPROTO_ICMPV6: -#endif /* INET6 */ - flag_ok = flag_bad = 0; - break; - default: - return (1); - } - if (m->m_pkthdr.csum_flags & flag_ok) - return (0); - if (m->m_pkthdr.csum_flags & flag_bad) - return (1); - if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) - return (1); - if (m->m_pkthdr.len < off + len) - return (1); - switch (af) { -#ifdef INET - case AF_INET: - if (p == IPPROTO_ICMP) { - if (m->m_len < off) - return (1); - m->m_data += off; - m->m_len -= off; - sum = in_cksum(m, len); - m->m_data -= off; - m->m_len += off; - } else { - if (m->m_len < sizeof(struct ip)) - return (1); - sum = in4_cksum(m, p, off, len); - } - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (m->m_len < sizeof(struct ip6_hdr)) - return (1); - sum = in6_cksum(m, p, off, len); - break; -#endif /* INET6 */ - default: - return (1); - } - if (sum) { - m->m_pkthdr.csum_flags |= flag_bad; - switch (p) { - case IPPROTO_TCP: - KMOD_TCPSTAT_INC(tcps_rcvbadsum); - break; - case IPPROTO_UDP: - KMOD_UDPSTAT_INC(udps_badsum); - break; -#ifdef INET - case IPPROTO_ICMP: - KMOD_ICMPSTAT_INC(icps_checksum); - break; -#endif -#ifdef INET6 - case IPPROTO_ICMPV6: - KMOD_ICMP6STAT_INC(icp6s_checksum); - break; -#endif /* INET6 */ - } - return (1); - } - m->m_pkthdr.csum_flags |= flag_ok; - return (0); -} -#endif - -#ifndef __FreeBSD__ -struct pf_divert * -pf_find_divert(struct mbuf *m) -{ - struct m_tag *mtag; - - if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) - return (NULL); - - return ((struct pf_divert *)(mtag + 1)); -} - -struct pf_divert * -pf_get_divert(struct mbuf *m) -{ - struct m_tag *mtag; - - if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { - mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), - M_NOWAIT); - if (mtag == NULL) - return (NULL); - bzero(mtag + 1, sizeof(struct pf_divert)); - m_tag_prepend(m, mtag); - } - - return ((struct pf_divert *)(mtag + 1)); -} -#endif #ifdef INET int -#ifdef __FreeBSD__ -pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, - struct ether_header *eh, struct inpcb *inp) -#else -pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, - struct ether_header *eh) -#endif +pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0; -#ifdef __FreeBSD__ struct ip *h = NULL; struct m_tag *ipfwtag; struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; -#else - struct ip *h; - struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; -#endif struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, dirndx, pqid = 0; -#ifdef __FreeBSD__ - PF_LOCK(); + M_ASSERTPKTHDR(m); + if (!V_pf_status.running) - { - PF_UNLOCK(); return (PF_PASS); - } -#else - if (!pf_status.running) - return (PF_PASS); -#endif memset(&pd, 0, sizeof(pd)); -#ifdef __FreeBSD__ - if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { - PF_UNLOCK(); - DPFPRINTF(PF_DEBUG_URGENT, - ("pf_test: pf_get_mtag returned NULL\n")); - return (PF_DROP); - } -#endif -#ifndef __FreeBSD__ - if (ifp->if_type == IFT_CARP && ifp->if_carpdev) - kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; - else -#endif - kif = (struct pfi_kif *)ifp->if_pf_kif; + + kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif DPFPRINTF(PF_DEBUG_URGENT, ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) -#ifdef __FreeBSD__ - { - PF_UNLOCK(); -#endif return (PF_PASS); -#ifdef __FreeBSD__ - } -#endif -#ifdef __FreeBSD__ - M_ASSERTPKTHDR(m); -#else -#ifdef DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) - panic("non-M_PKTHDR is passed to pf_test"); -#endif /* DIAGNOSTIC */ -#endif + if (m->m_flags & M_SKIP_FIREWALL) + return (PF_PASS); - if (m->m_pkthdr.len < (int)sizeof(*h)) { + if (m->m_pkthdr.len < (int)sizeof(struct ip)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; goto done; } -#ifdef __FreeBSD__ - if (m->m_flags & M_SKIP_FIREWALL) { - PF_UNLOCK(); - return (PF_PASS); - } -#else - if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) - return (PF_PASS); -#endif - -#ifdef __FreeBSD__ + pd.pf_mtag = pf_find_mtag(m); + + PF_RULES_RLOCK(); + if (ip_divert_ptr != NULL && ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) { + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + goto done; + } pd.pf_mtag->flags |= PF_PACKET_LOOPED; m_tag_delete(m, ipfwtag); } - if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { + if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { m->m_flags |= M_FASTFWD_OURS; pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; } - } else -#endif - /* We do IP header normalization and packet reassembly here */ - if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { + } else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { + /* We do IP header normalization and packet reassembly here */ action = PF_DROP; goto done; } @@ -6712,7 +5586,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, h = mtod(m, struct ip *); off = h->ip_hl << 2; - if (off < (int)sizeof(*h)) { + if (off < (int)sizeof(struct ip)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; @@ -6731,7 +5605,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, pd.af = AF_INET; pd.tos = h->ip_tos; pd.tot_len = ntohs(h->ip_len); - pd.eh = eh; /* handle fragments that didn't get reassembled by normalization */ if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { @@ -6760,25 +5633,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ipintrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } @@ -6800,25 +5662,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ipintrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } @@ -6834,25 +5685,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ipintrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } @@ -6868,29 +5708,19 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, default: action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, &ipintrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } done: + PF_RULES_RUNLOCK(); if (action == PF_PASS && h->ip_hl > 5 && !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { action = PF_DROP; @@ -6900,23 +5730,20 @@ done: ("pf: dropping packet with ip options\n")); } - if ((s && s->tag) || r->rtableid >= 0) -#ifdef __FreeBSD__ - pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag); -#else - pf_tag_packet(m, s ? s->tag : 0, r->rtableid); -#endif - - if (dir == PF_IN && s && s->key[PF_SK_STACK]) -#ifdef __FreeBSD__ - pd.pf_mtag->statekey = s->key[PF_SK_STACK]; -#else - m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; -#endif + if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } + if (r->rtableid >= 0) + M_SETFIB(m, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { -#ifdef __FreeBSD__ + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } if (pqid || (pd.tos & IPTOS_LOWDELAY)) pd.pf_mtag->qid = r->pqid; else @@ -6924,14 +5751,6 @@ done: /* add hints for ecn */ pd.pf_mtag->hdr = h; -#else - if (pqid || (pd.tos & IPTOS_LOWDELAY)) - m->m_pkthdr.pf.qid = r->pqid; - else - m->m_pkthdr.pf.qid = r->qid; - /* add hints for ecn */ - m->m_pkthdr.pf.hdr = h; -#endif } #endif /* ALTQ */ @@ -6945,35 +5764,37 @@ done: (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) -#ifdef __FreeBSD__ m->m_flags |= M_SKIP_FIREWALL; -#else - m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; -#endif -#ifdef __FreeBSD__ - if (action == PF_PASS && r->divert.port && - ip_divert_ptr != NULL && !PACKET_LOOPED()) { + if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL && + !PACKET_LOOPED(&pd)) { ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0, - sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); + sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); if (ipfwtag != NULL) { ((struct ipfw_rule_ref *)(ipfwtag+1))->info = ntohs(r->divert.port); ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir; - m_tag_prepend(m, ipfwtag); - - PF_UNLOCK(); + if (s) + PF_STATE_UNLOCK(s); + m_tag_prepend(m, ipfwtag); if (m->m_flags & M_FASTFWD_OURS) { + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate tag\n")); + } pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT; m->m_flags &= ~M_FASTFWD_OURS; } - - ip_divert_ptr(*m0, - dir == PF_IN ? DIR_IN : DIR_OUT); + ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT); *m0 = NULL; + return (action); } else { /* XXX: ipfw has the same behaviour! */ @@ -6984,17 +5805,6 @@ done: ("pf: failed to allocate divert tag\n")); } } -#else - if (dir == PF_IN && action == PF_PASS && r->divert.port) { - struct pf_divert *divert; - - if ((divert = pf_get_divert(m))) { - m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; - divert->port = r->divert.port; - divert->addr.ipv4 = r->divert.addr.v4; - } - } -#endif if (log) { struct pf_rule *lr; @@ -7004,8 +5814,8 @@ done: lr = s->nat_rule.ptr; else lr = r; - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, - &pd); + PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd, + (s == NULL)); } kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; @@ -7038,11 +5848,7 @@ done: } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; -#ifdef __FreeBSD__ if (nr != NULL && r == &V_pf_default_rule) -#else - if (nr != NULL && r == &pf_default_rule) -#endif tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->src.addr.p.tbl, @@ -7068,96 +5874,53 @@ done: action = PF_PASS; break; default: - /* pf_route can free the mbuf causing *m0 to become NULL */ - if (r->rt) + /* pf_route() returns unlocked. */ + if (r->rt) { pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); + return (action); + } break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif + if (s) + PF_STATE_UNLOCK(s); + return (action); } #endif /* INET */ #ifdef INET6 int -#ifdef __FreeBSD__ -pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, - struct ether_header *eh, struct inpcb *inp) -#else -pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, - struct ether_header *eh) -#endif +pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0, *n = NULL; -#ifdef __FreeBSD__ struct ip6_hdr *h = NULL; struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; -#else - struct ip6_hdr *h; - struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; -#endif struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, terminal = 0, dirndx, rh_cnt = 0; -#ifdef __FreeBSD__ - PF_LOCK(); - if (!V_pf_status.running) { - PF_UNLOCK(); - return (PF_PASS); - } -#else - if (!pf_status.running) + M_ASSERTPKTHDR(m); + + if (!V_pf_status.running) return (PF_PASS); -#endif memset(&pd, 0, sizeof(pd)); -#ifdef __FreeBSD__ - if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { - PF_UNLOCK(); - DPFPRINTF(PF_DEBUG_URGENT, - ("pf_test: pf_get_mtag returned NULL\n")); - return (PF_DROP); - } -#endif -#ifndef __FreeBSD__ - if (ifp->if_type == IFT_CARP && ifp->if_carpdev) - kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; - else -#endif - kif = (struct pfi_kif *)ifp->if_pf_kif; + pd.pf_mtag = pf_find_mtag(m); + if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED) + return (PF_PASS); + + kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif DPFPRINTF(PF_DEBUG_URGENT, ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) -#ifdef __FreeBSD__ - { - PF_UNLOCK(); -#endif return (PF_PASS); -#ifdef __FreeBSD__ - } -#endif - -#ifdef __FreeBSD__ - M_ASSERTPKTHDR(m); -#else -#ifdef DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) - panic("non-M_PKTHDR is passed to pf_test6"); -#endif /* DIAGNOSTIC */ -#endif if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; @@ -7166,16 +5929,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, goto done; } -#ifdef __FreeBSD__ - if (pd.pf_mtag->flags & PF_TAG_GENERATED) { - PF_UNLOCK(); -#else - if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED) -#endif - return (PF_PASS); -#ifdef __FreeBSD__ - } -#endif + PF_RULES_RLOCK(); /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { @@ -7208,7 +5962,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, pd.af = AF_INET6; pd.tos = 0; pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); - pd.eh = eh; off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); pd.proto = h->ip6_nxt; @@ -7300,25 +6053,14 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ip6intrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } @@ -7340,25 +6082,14 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ip6intrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } @@ -7381,54 +6112,33 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ip6intrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } default: action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); -#else - pfsync_update_state(s); -#endif -#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) -#ifdef __FreeBSD__ - action = pf_test_rule(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, NULL, inp); -#else - action = pf_test_rule(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, &ip6intrq); -#endif + action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, + &a, &ruleset, inp); break; } done: + PF_RULES_RUNLOCK(); if (n != m) { m_freem(n); n = NULL; @@ -7444,37 +6154,26 @@ done: ("pf: dropping packet with dangerous v6 headers\n")); } - if ((s && s->tag) || r->rtableid >= 0) -#ifdef __FreeBSD__ - pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag); -#else - pf_tag_packet(m, s ? s->tag : 0, r->rtableid); -#endif - - if (dir == PF_IN && s && s->key[PF_SK_STACK]) -#ifdef __FreeBSD__ - pd.pf_mtag->statekey = s->key[PF_SK_STACK]; -#else - m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; -#endif + if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } + if (r->rtableid >= 0) + M_SETFIB(m, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { -#ifdef __FreeBSD__ + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } if (pd.tos & IPTOS_LOWDELAY) pd.pf_mtag->qid = r->pqid; else pd.pf_mtag->qid = r->qid; /* add hints for ecn */ pd.pf_mtag->hdr = h; -#else - if (pd.tos & IPTOS_LOWDELAY) - m->m_pkthdr.pf.qid = r->pqid; - else - m->m_pkthdr.pf.qid = r->qid; - /* add hints for ecn */ - m->m_pkthdr.pf.hdr = h; -#endif } #endif /* ALTQ */ @@ -7483,27 +6182,11 @@ done: (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) -#ifdef __FreeBSD__ m->m_flags |= M_SKIP_FIREWALL; -#else - m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; -#endif -#ifdef __FreeBSD__ /* XXX: Anybody working on it?! */ if (r->divert.port) printf("pf: divert(9) is not supported for IPv6\n"); -#else - if (dir == PF_IN && action == PF_PASS && r->divert.port) { - struct pf_divert *divert; - - if ((divert = pf_get_divert(m))) { - m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; - divert->port = r->divert.port; - divert->addr.ipv6 = r->divert.addr.v6; - } - } -#endif if (log) { struct pf_rule *lr; @@ -7513,8 +6196,8 @@ done: lr = s->nat_rule.ptr; else lr = r; - PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, - &pd); + PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset, + &pd, (s == NULL)); } kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; @@ -7547,11 +6230,7 @@ done: } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; -#ifdef __FreeBSD__ if (nr != NULL && r == &V_pf_default_rule) -#else - if (nr != NULL && r == &pf_default_rule) -#endif tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->src.addr.p.tbl, @@ -7575,46 +6254,17 @@ done: action = PF_PASS; break; default: - /* pf_route6 can free the mbuf causing *m0 to become NULL */ - if (r->rt) + /* pf_route6() returns unlocked. */ + if (r->rt) { pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); + return (action); + } break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif + if (s) + PF_STATE_UNLOCK(s); + return (action); } #endif /* INET6 */ - -int -pf_check_congestion(struct ifqueue *ifq) -{ -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ - return (0); -#else - if (ifq->ifq_congestion) - return (1); - else - return (0); -#endif -} - -/* - * must be called whenever any addressing information such as - * address, port, protocol has changed - */ -void -pf_pkt_addr_changed(struct mbuf *m) -{ -#ifdef __FreeBSD__ - struct pf_mtag *pf_tag; - - if ((pf_tag = pf_find_mtag(m)) != NULL) - pf_tag->statekey = NULL; -#else - m->m_pkthdr.pf.statekey = NULL; -#endif -} diff --git a/sys/contrib/pf/net/pf_if.c b/sys/contrib/pf/net/pf_if.c index b4491b8..c010b65 100644 --- a/sys/contrib/pf/net/pf_if.c +++ b/sys/contrib/pf/net/pf_if.c @@ -32,137 +32,90 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#if defined(__FreeBSD__) -#include "opt_inet.h" -#include "opt_inet6.h" - #include <sys/cdefs.h> + __FBSDID("$FreeBSD$"); -#endif + +#include "opt_inet.h" +#include "opt_inet6.h" #include <sys/param.h> -#include <sys/systm.h> -#ifdef __FreeBSD__ -#include <sys/malloc.h> -#endif -#include <sys/mbuf.h> -#include <sys/filio.h> -#include <sys/socket.h> -#include <sys/socketvar.h> #include <sys/kernel.h> -#ifndef __FreeBSD__ -#include <sys/device.h> -#endif -#include <sys/time.h> -#ifndef __FreeBSD__ -#include <sys/pool.h> -#endif +#include <sys/socket.h> #include <net/if.h> -#include <net/if_types.h> -#ifdef __FreeBSD__ -#include <net/vnet.h> -#endif - -#include <netinet/in.h> -#include <netinet/in_var.h> -#include <netinet/in_systm.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> - #include <net/pfvar.h> +#include <net/route.h> -#ifdef INET6 -#include <netinet/ip6.h> -#endif /* INET6 */ - -#ifdef __FreeBSD__ VNET_DEFINE(struct pfi_kif *, pfi_all); -VNET_DEFINE(uma_zone_t, pfi_addr_pl); -VNET_DEFINE(struct pfi_ifhead, pfi_ifs); -#define V_pfi_ifs VNET(pfi_ifs) -VNET_DEFINE(long, pfi_update); -#define V_pfi_update VNET(pfi_update) -VNET_DEFINE(struct pfr_addr *, pfi_buffer); +static VNET_DEFINE(long, pfi_update); +#define V_pfi_update VNET(pfi_update) +#define PFI_BUFFER_MAX 0x10000 + +static VNET_DEFINE(struct pfr_addr *, pfi_buffer); +static VNET_DEFINE(int, pfi_buffer_cnt); +static VNET_DEFINE(int, pfi_buffer_max); #define V_pfi_buffer VNET(pfi_buffer) -VNET_DEFINE(int, pfi_buffer_cnt); #define V_pfi_buffer_cnt VNET(pfi_buffer_cnt) -VNET_DEFINE(int, pfi_buffer_max); #define V_pfi_buffer_max VNET(pfi_buffer_max) -#else -struct pfi_kif *pfi_all = NULL; -struct pool pfi_addr_pl; -struct pfi_ifhead pfi_ifs; -long pfi_update = 1; -struct pfr_addr *pfi_buffer; -int pfi_buffer_cnt; -int pfi_buffer_max; -#endif -#ifdef __FreeBSD__ + eventhandler_tag pfi_attach_cookie; eventhandler_tag pfi_detach_cookie; eventhandler_tag pfi_attach_group_cookie; eventhandler_tag pfi_change_group_cookie; eventhandler_tag pfi_detach_group_cookie; eventhandler_tag pfi_ifaddr_event_cookie; -#endif - -void pfi_kif_update(struct pfi_kif *); -void pfi_dynaddr_update(struct pfi_dynaddr *dyn); -void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, - int, int); -void pfi_kifaddr_update(void *); -void pfi_instance_add(struct ifnet *, int, int); -void pfi_address_add(struct sockaddr *, int, int); -int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); -int pfi_skip_if(const char *, struct pfi_kif *); -int pfi_unmask(void *); -#ifdef __FreeBSD__ -void pfi_attach_ifnet_event(void * __unused, struct ifnet *); -void pfi_detach_ifnet_event(void * __unused, struct ifnet *); -void pfi_attach_group_event(void *, struct ifg_group *); -void pfi_change_group_event(void *, char *); -void pfi_detach_group_event(void *, struct ifg_group *); -void pfi_ifaddr_event(void * __unused, struct ifnet *); -#endif -RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); -RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); - -#define PFI_BUFFER_MAX 0x10000 -#define PFI_MTYPE M_IFADDR +static void pfi_attach_ifnet(struct ifnet *); +static void pfi_attach_ifgroup(struct ifg_group *); + +static void pfi_kif_update(struct pfi_kif *); +static void pfi_dynaddr_update(struct pfi_dynaddr *dyn); +static void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int, + int); +static void pfi_instance_add(struct ifnet *, int, int); +static void pfi_address_add(struct sockaddr *, int, int); +static int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); +static int pfi_skip_if(const char *, struct pfi_kif *); +static int pfi_unmask(void *); +static void pfi_attach_ifnet_event(void * __unused, struct ifnet *); +static void pfi_detach_ifnet_event(void * __unused, struct ifnet *); +static void pfi_attach_group_event(void *, struct ifg_group *); +static void pfi_change_group_event(void *, char *); +static void pfi_detach_group_event(void *, struct ifg_group *); +static void pfi_ifaddr_event(void * __unused, struct ifnet *); + +RB_HEAD(pfi_ifhead, pfi_kif); +static RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); +static RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); +static VNET_DEFINE(struct pfi_ifhead, pfi_ifs); +#define V_pfi_ifs VNET(pfi_ifs) + +#define PFI_BUFFER_MAX 0x10000 +MALLOC_DEFINE(PFI_MTYPE, "pf_ifnet", "pf(4) interface database"); + +LIST_HEAD(pfi_list, pfi_kif); +static VNET_DEFINE(struct pfi_list, pfi_unlinked_kifs); +#define V_pfi_unlinked_kifs VNET(pfi_unlinked_kifs) +static struct mtx pfi_unlnkdkifs_mtx; void pfi_initialize(void) { -#ifdef __FreeBSD__ - if (V_pfi_all != NULL) /* already initialized */ -#else - if (pfi_all != NULL) /* already initialized */ -#endif - return; + struct ifg_group *ifg; + struct ifnet *ifp; + struct pfi_kif *kif; -#ifndef __FreeBSD__ - pool_init(&V_pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, 0, 0, - "pfiaddrpl", &pool_allocator_nointr); -#endif -#ifdef __FreeBSD__ V_pfi_buffer_max = 64; V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer), PFI_MTYPE, M_WAITOK); - if ((V_pfi_all = pfi_kif_get(IFG_ALL)) == NULL) -#else - pfi_buffer_max = 64; - pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer), - PFI_MTYPE, M_WAITOK); + mtx_init(&pfi_unlnkdkifs_mtx, "pf unlinked interfaces", NULL, MTX_DEF); - if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL) -#endif - panic("pfi_kif_get for pfi_all failed"); -#ifdef __FreeBSD__ - struct ifg_group *ifg; - struct ifnet *ifp; + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); + PF_RULES_WLOCK(); + V_pfi_all = pfi_kif_attach(kif, IFG_ALL); + PF_RULES_WUNLOCK(); IFNET_RLOCK(); TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) @@ -183,144 +136,131 @@ pfi_initialize(void) pfi_detach_group_event, curvnet, EVENTHANDLER_PRI_ANY); pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event, pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); -#endif } -#ifdef __FreeBSD__ void pfi_cleanup(void) { struct pfi_kif *p; - PF_UNLOCK(); EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie); EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie); EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie); EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie); EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie); EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie); - PF_LOCK(); V_pfi_all = NULL; while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) { - if (p->pfik_rules || p->pfik_states) { - printf("pfi_cleanup: dangling refs for %s\n", - p->pfik_name); - } - RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p); free(p, PFI_MTYPE); } + while ((p = LIST_FIRST(&V_pfi_unlinked_kifs))) { + LIST_REMOVE(p, pfik_list); + free(p, PFI_MTYPE); + } + + mtx_destroy(&pfi_unlnkdkifs_mtx); + free(V_pfi_buffer, PFI_MTYPE); } -#endif struct pfi_kif * -pfi_kif_get(const char *kif_name) +pfi_kif_find(const char *kif_name) { - struct pfi_kif *kif; - struct pfi_kif_cmp s; + struct pfi_kif_cmp s; + + PF_RULES_ASSERT(); bzero(&s, sizeof(s)); strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name)); -#ifdef __FreeBSD__ - if ((kif = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s)) != NULL) -#else - if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL) -#endif - return (kif); - /* create new one */ -#ifdef __FreeBSD__ - if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL) -#else - if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT|M_ZERO)) == NULL) -#endif - return (NULL); + return (RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s)); +} +struct pfi_kif * +pfi_kif_attach(struct pfi_kif *kif, const char *kif_name) +{ + struct pfi_kif *kif1; + + PF_RULES_WASSERT(); + KASSERT(kif != NULL, ("%s: null kif", __func__)); + + kif1 = pfi_kif_find(kif_name); + if (kif1 != NULL) { + free(kif, PFI_MTYPE); + return (kif1); + } + + bzero(kif, sizeof(*kif)); strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name)); -#ifdef __FreeBSD__ /* * It seems that the value of time_second is in unintialzied state * when pf sets interface statistics clear time in boot phase if pf * was statically linked to kernel. Instead of setting the bogus * time value have pfi_get_ifaces handle this case. In - * pfi_get_ifaces it uses boottime.tv_sec if it sees the time is 0. + * pfi_get_ifaces it uses time_second if it sees the time is 0. */ kif->pfik_tzero = time_second > 1 ? time_second : 0; -#else - kif->pfik_tzero = time_second; -#endif TAILQ_INIT(&kif->pfik_dynaddrs); -#ifdef __FreeBSD__ RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif); -#else - RB_INSERT(pfi_ifhead, &pfi_ifs, kif); -#endif return (kif); } void -pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what) +pfi_kif_ref(struct pfi_kif *kif) { - switch (what) { - case PFI_KIF_REF_RULE: - kif->pfik_rules++; - break; - case PFI_KIF_REF_STATE: - kif->pfik_states++; - break; - default: - panic("pfi_kif_ref with unknown type"); - } + + PF_RULES_WASSERT(); + kif->pfik_rulerefs++; } void -pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what) +pfi_kif_unref(struct pfi_kif *kif) { - if (kif == NULL) - return; - switch (what) { - case PFI_KIF_REF_NONE: - break; - case PFI_KIF_REF_RULE: - if (kif->pfik_rules <= 0) { - printf("pfi_kif_unref: rules refcount <= 0\n"); - return; - } - kif->pfik_rules--; - break; - case PFI_KIF_REF_STATE: - if (kif->pfik_states <= 0) { - printf("pfi_kif_unref: state refcount <= 0\n"); - return; - } - kif->pfik_states--; - break; - default: - panic("pfi_kif_unref with unknown type"); - } + PF_RULES_WASSERT(); + KASSERT(kif->pfik_rulerefs > 0, ("%s: %p has zero refs", __func__, kif)); -#ifdef __FreeBSD__ - if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all) -#else - if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all) -#endif + kif->pfik_rulerefs--; + + if (kif->pfik_rulerefs > 0) return; - if (kif->pfik_rules || kif->pfik_states) + /* kif referencing an existing ifnet or group should exist. */ + if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all) return; -#ifdef __FreeBSD__ RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif); -#else - RB_REMOVE(pfi_ifhead, &pfi_ifs, kif); -#endif - free(kif, PFI_MTYPE); + + kif->pfik_flags |= PFI_IFLAG_REFS; + + mtx_lock(&pfi_unlnkdkifs_mtx); + LIST_INSERT_HEAD(&V_pfi_unlinked_kifs, kif, pfik_list); + mtx_unlock(&pfi_unlnkdkifs_mtx); +} + +void +pfi_kif_purge(void) +{ + struct pfi_kif *kif, *kif1; + + /* + * Do naive mark-and-sweep garbage collecting of old kifs. + * Reference flag is raised by pf_purge_expired_states(). + */ + mtx_lock(&pfi_unlnkdkifs_mtx); + LIST_FOREACH_SAFE(kif, &V_pfi_unlinked_kifs, pfik_list, kif1) { + if (!(kif->pfik_flags & PFI_IFLAG_REFS)) { + LIST_REMOVE(kif, pfik_list); + free(kif, PFI_MTYPE); + } else + kif->pfik_flags &= ~PFI_IFLAG_REFS; + } + mtx_unlock(&pfi_unlnkdkifs_mtx); } int @@ -332,6 +272,7 @@ pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) return (1); if (rule_kif->pfik_group != NULL) + /* XXXGL: locking? */ TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next) if (p->ifgl_group == rule_kif->pfik_group) return (1); @@ -339,125 +280,38 @@ pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) return (0); } -void +static void pfi_attach_ifnet(struct ifnet *ifp) { - struct pfi_kif *kif; - int s; + struct pfi_kif *kif; + + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); - pfi_initialize(); - s = splsoftnet(); -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); V_pfi_update++; -#else - pfi_update++; -#endif - if ((kif = pfi_kif_get(ifp->if_xname)) == NULL) - panic("pfi_kif_get failed"); + kif = pfi_kif_attach(kif, ifp->if_xname); kif->pfik_ifp = ifp; - ifp->if_pf_kif = (caddr_t)kif; - -#ifndef __FreeBSD__ - if ((kif->pfik_ah_cookie = hook_establish(ifp->if_addrhooks, 1, - pfi_kifaddr_update, kif)) == NULL) - panic("pfi_attach_ifnet: cannot allocate '%s' address hook", - ifp->if_xname); -#endif + ifp->if_pf_kif = kif; pfi_kif_update(kif); - - splx(s); + PF_RULES_WUNLOCK(); } -void -pfi_detach_ifnet(struct ifnet *ifp) -{ - int s; - struct pfi_kif *kif; - - if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL) - return; - - s = splsoftnet(); -#ifdef __FreeBSD__ - V_pfi_update++; -#else - pfi_update++; -#endif -#ifndef __FreeBSD__ - hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie); -#endif - pfi_kif_update(kif); - - kif->pfik_ifp = NULL; - ifp->if_pf_kif = NULL; - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - splx(s); -} - -void +static void pfi_attach_ifgroup(struct ifg_group *ifg) { - struct pfi_kif *kif; - int s; + struct pfi_kif *kif; - pfi_initialize(); - s = splsoftnet(); -#ifdef __FreeBSD__ - V_pfi_update++; -#else - pfi_update++; -#endif - if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL) - panic("pfi_kif_get failed"); - - kif->pfik_group = ifg; - ifg->ifg_pf_kif = (caddr_t)kif; - - splx(s); -} - -void -pfi_detach_ifgroup(struct ifg_group *ifg) -{ - int s; - struct pfi_kif *kif; - - if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL) - return; - - s = splsoftnet(); -#ifdef __FreeBSD__ - V_pfi_update++; -#else - pfi_update++; -#endif - - kif->pfik_group = NULL; - ifg->ifg_pf_kif = NULL; - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - splx(s); -} - -void -pfi_group_change(const char *group) -{ - struct pfi_kif *kif; - int s; + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); - s = splsoftnet(); -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); V_pfi_update++; -#else - pfi_update++; -#endif - if ((kif = pfi_kif_get(group)) == NULL) - panic("pfi_kif_get failed"); - - pfi_kif_update(kif); + kif = pfi_kif_attach(kif, ifg->ifg_group); - splx(s); + kif->pfik_group = ifg; + ifg->ifg_pf_kif = kif; + PF_RULES_WUNLOCK(); } int @@ -501,28 +355,27 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) struct pfi_dynaddr *dyn; char tblname[PF_TABLE_NAME_SIZE]; struct pf_ruleset *ruleset = NULL; - int s, rv = 0; + struct pfi_kif *kif; + int rv = 0; - if (aw->type != PF_ADDR_DYNIFTL) - return (0); -#ifdef __FreeBSD__ - if ((dyn = pool_get(&V_pfi_addr_pl, PR_NOWAIT | PR_ZERO)) -#else - if ((dyn = pool_get(&pfi_addr_pl, PR_WAITOK | PR_LIMITFAIL | PR_ZERO)) -#endif - == NULL) - return (1); + PF_RULES_WASSERT(); + KASSERT(aw->type == PF_ADDR_DYNIFTL, ("%s: type %u", + __func__, aw->type)); + KASSERT(aw->p.dyn == NULL, ("%s: dyn is %p", __func__, aw->p.dyn)); + + if ((dyn = malloc(sizeof(*dyn), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL) + return (ENOMEM); + + if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) { + free(dyn, PFI_MTYPE); + return (ENOMEM); + } - s = splsoftnet(); if (!strcmp(aw->v.ifname, "self")) - dyn->pfid_kif = pfi_kif_get(IFG_ALL); + dyn->pfid_kif = pfi_kif_attach(kif, IFG_ALL); else - dyn->pfid_kif = pfi_kif_get(aw->v.ifname); - if (dyn->pfid_kif == NULL) { - rv = 1; - goto _bad; - } - pfi_kif_ref(dyn->pfid_kif, PFI_KIF_REF_RULE); + dyn->pfid_kif = pfi_kif_attach(kif, aw->v.ifname); + pfi_kif_ref(dyn->pfid_kif); dyn->pfid_net = pfi_unmask(&aw->v.a.mask); if (af == AF_INET && dyn->pfid_net == 32) @@ -540,12 +393,12 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) snprintf(tblname + strlen(tblname), sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net); if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) { - rv = 1; + rv = ENOMEM; goto _bad; } - if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname, 1)) == NULL) { - rv = 1; + if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) { + rv = ENOMEM; goto _bad; } @@ -556,7 +409,7 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); aw->p.dyn = dyn; pfi_kif_update(dyn->pfid_kif); - splx(s); + return (0); _bad: @@ -565,108 +418,92 @@ _bad: if (ruleset != NULL) pf_remove_if_empty_ruleset(ruleset); if (dyn->pfid_kif != NULL) - pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); -#ifdef __FreeBSD__ - pool_put(&V_pfi_addr_pl, dyn); -#else - pool_put(&pfi_addr_pl, dyn); -#endif - splx(s); + pfi_kif_unref(dyn->pfid_kif); + free(dyn, PFI_MTYPE); + return (rv); } -void +static void pfi_kif_update(struct pfi_kif *kif) { struct ifg_list *ifgl; struct pfi_dynaddr *p; + PF_RULES_WASSERT(); + /* update all dynaddr */ TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry) pfi_dynaddr_update(p); /* again for all groups kif is member of */ - if (kif->pfik_ifp != NULL) + if (kif->pfik_ifp != NULL) { + IF_ADDR_RLOCK(kif->pfik_ifp); TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) pfi_kif_update((struct pfi_kif *) ifgl->ifgl_group->ifg_pf_kif); + IF_ADDR_RUNLOCK(kif->pfik_ifp); + } } -void +static void pfi_dynaddr_update(struct pfi_dynaddr *dyn) { struct pfi_kif *kif; struct pfr_ktable *kt; - if (dyn == NULL || dyn->pfid_kif == NULL || dyn->pfid_kt == NULL) - panic("pfi_dynaddr_update"); + PF_RULES_WASSERT(); + KASSERT(dyn && dyn->pfid_kif && dyn->pfid_kt, + ("%s: bad argument", __func__)); kif = dyn->pfid_kif; kt = dyn->pfid_kt; -#ifdef __FreeBSD__ if (kt->pfrkt_larg != V_pfi_update) { -#else - if (kt->pfrkt_larg != pfi_update) { -#endif /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); -#ifdef __FreeBSD__ kt->pfrkt_larg = V_pfi_update; -#else - kt->pfrkt_larg = pfi_update; -#endif } pfr_dynaddr_update(kt, dyn); } -void +static void pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) { int e, size2 = 0; struct ifg_member *ifgm; -#ifdef __FreeBSD__ V_pfi_buffer_cnt = 0; -#else - pfi_buffer_cnt = 0; -#endif if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); - else if (kif->pfik_group != NULL) + else if (kif->pfik_group != NULL) { + IFNET_RLOCK(); TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) pfi_instance_add(ifgm->ifgm_ifp, net, flags); + IFNET_RUNLOCK(); + } -#ifdef __FreeBSD__ if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2, NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) - printf("pfi_table_update: cannot set %d new addresses " - "into table %s: %d\n", V_pfi_buffer_cnt, kt->pfrkt_name, e); -#else - if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2, - NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) - printf("pfi_table_update: cannot set %d new addresses " - "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e); -#endif + printf("%s: cannot set %d new addresses into table %s: %d\n", + __func__, V_pfi_buffer_cnt, kt->pfrkt_name, e); } -void +static void pfi_instance_add(struct ifnet *ifp, int net, int flags) { struct ifaddr *ia; int got4 = 0, got6 = 0; int net2, af; - if (ifp == NULL) - return; - TAILQ_FOREACH(ia, &ifp->if_addrlist, ifa_list) { + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_list) { if (ia->ifa_addr == NULL) continue; af = ia->ifa_addr->sa_family; if (af != AF_INET && af != AF_INET6) continue; -#ifdef __FreeBSD__ /* * XXX: For point-to-point interfaces, (ifname:0) and IPv4, * jump over addresses without a proper route to work @@ -677,7 +514,6 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) !(ia->ifa_flags & IFA_ROUTE) && (flags & PFI_AFLAG_NOALIAS) && (af == AF_INET)) continue; -#endif if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6) continue; if ((flags & PFI_AFLAG_BROADCAST) && @@ -718,67 +554,39 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) else pfi_address_add(ia->ifa_addr, af, net2); } + IF_ADDR_RUNLOCK(ifp); } -void +static void pfi_address_add(struct sockaddr *sa, int af, int net) { struct pfr_addr *p; int i; -#ifdef __FreeBSD__ if (V_pfi_buffer_cnt >= V_pfi_buffer_max) { int new_max = V_pfi_buffer_max * 2; -#else - if (pfi_buffer_cnt >= pfi_buffer_max) { - int new_max = pfi_buffer_max * 2; -#endif if (new_max > PFI_BUFFER_MAX) { - printf("pfi_address_add: address buffer full (%d/%d)\n", -#ifdef __FreeBSD__ + printf("%s: address buffer full (%d/%d)\n", __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX); -#else - pfi_buffer_cnt, PFI_BUFFER_MAX); -#endif return; } p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE, -#ifdef __FreeBSD__ M_NOWAIT); -#else - M_DONTWAIT); -#endif if (p == NULL) { - printf("pfi_address_add: no memory to grow buffer " -#ifdef __FreeBSD__ - "(%d/%d)\n", V_pfi_buffer_cnt, PFI_BUFFER_MAX); -#else - "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX); -#endif + printf("%s: no memory to grow buffer (%d/%d)\n", + __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX); return; } -#ifdef __FreeBSD__ memcpy(V_pfi_buffer, p, V_pfi_buffer_cnt * sizeof(*V_pfi_buffer)); /* no need to zero buffer */ free(V_pfi_buffer, PFI_MTYPE); V_pfi_buffer = p; V_pfi_buffer_max = new_max; -#else - memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof(*pfi_buffer)); - /* no need to zero buffer */ - free(pfi_buffer, PFI_MTYPE); - pfi_buffer = p; - pfi_buffer_max = new_max; -#endif } if (af == AF_INET && net > 32) net = 128; -#ifdef __FreeBSD__ p = V_pfi_buffer + V_pfi_buffer_cnt++; -#else - p = pfi_buffer + pfi_buffer_cnt++; -#endif bzero(p, sizeof(*p)); p->pfra_af = af; p->pfra_net = net; @@ -797,55 +605,31 @@ pfi_address_add(struct sockaddr *sa, int af, int net) } void -pfi_dynaddr_remove(struct pf_addr_wrap *aw) +pfi_dynaddr_remove(struct pfi_dynaddr *dyn) { - int s; - if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || - aw->p.dyn->pfid_kif == NULL || aw->p.dyn->pfid_kt == NULL) - return; + KASSERT(dyn->pfid_kif != NULL, ("%s: null pfid_kif", __func__)); + KASSERT(dyn->pfid_kt != NULL, ("%s: null pfid_kt", __func__)); - s = splsoftnet(); - TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry); - pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE); - aw->p.dyn->pfid_kif = NULL; - pfr_detach_table(aw->p.dyn->pfid_kt); - aw->p.dyn->pfid_kt = NULL; -#ifdef __FreeBSD__ - pool_put(&V_pfi_addr_pl, aw->p.dyn); -#else - pool_put(&pfi_addr_pl, aw->p.dyn); -#endif - aw->p.dyn = NULL; - splx(s); + TAILQ_REMOVE(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); + pfi_kif_unref(dyn->pfid_kif); + pfr_detach_table(dyn->pfid_kt); + free(dyn, PFI_MTYPE); } void pfi_dynaddr_copyout(struct pf_addr_wrap *aw) { - if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || - aw->p.dyn->pfid_kif == NULL) - return; - aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6; -} -void -pfi_kifaddr_update(void *v) -{ - int s; - struct pfi_kif *kif = (struct pfi_kif *)v; + KASSERT(aw->type == PF_ADDR_DYNIFTL, + ("%s: type %u", __func__, aw->type)); - s = splsoftnet(); -#ifdef __FreeBSD__ - V_pfi_update++; -#else - pfi_update++; -#endif - pfi_kif_update(kif); - splx(s); + if (aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL) + return; + aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6; } -int +static int pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) { return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ)); @@ -858,19 +642,13 @@ pfi_update_status(const char *name, struct pf_status *pfs) struct pfi_kif_cmp key; struct ifg_member p_member, *ifgm; TAILQ_HEAD(, ifg_member) ifg_members; - int i, j, k, s; + int i, j, k; strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); - s = splsoftnet(); -#ifdef __FreeBSD__ p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key); -#else - p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); -#endif - if (p == NULL) { - splx(s); + if (p == NULL) return; - } + if (p->pfik_group != NULL) { bcopy(&p->pfik_group->ifg_members, &ifg_members, sizeof(ifg_members)); @@ -906,56 +684,29 @@ pfi_update_status(const char *name, struct pf_status *pfs) p->pfik_bytes[i][j][k]; } } - splx(s); } -int +void pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) { struct pfi_kif *p, *nextp; - int s, n = 0; -#ifdef __FreeBSD__ - int error; -#endif + int n = 0; - s = splsoftnet(); -#ifdef __FreeBSD__ for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) { nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); -#else - for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) { - nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); -#endif if (pfi_skip_if(name, p)) continue; - if (*size > n++) { - if (!p->pfik_tzero) - p->pfik_tzero = time_second; - pfi_kif_ref(p, PFI_KIF_REF_RULE); -#ifdef __FreeBSD__ - PF_COPYOUT(p, buf++, sizeof(*buf), error); - if (error) { -#else - if (copyout(p, buf++, sizeof(*buf))) { -#endif - pfi_kif_unref(p, PFI_KIF_REF_RULE); - splx(s); - return (EFAULT); - } -#ifdef __FreeBSD__ - nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); -#else - nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); -#endif - pfi_kif_unref(p, PFI_KIF_REF_RULE); - } + if (*size <= n++) + break; + if (!p->pfik_tzero) + p->pfik_tzero = time_second; + bcopy(p, buf++, sizeof(*buf)); + nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p); } - splx(s); *size = n; - return (0); } -int +static int pfi_skip_if(const char *filter, struct pfi_kif *p) { int n; @@ -978,19 +729,12 @@ int pfi_set_flags(const char *name, int flags) { struct pfi_kif *p; - int s; - s = splsoftnet(); -#ifdef __FreeBSD__ RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) { -#else - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { -#endif if (pfi_skip_if(name, p)) continue; p->pfik_flags |= flags; } - splx(s); return (0); } @@ -998,24 +742,17 @@ int pfi_clear_flags(const char *name, int flags) { struct pfi_kif *p; - int s; - s = splsoftnet(); -#ifdef __FreeBSD__ RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) { -#else - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { -#endif if (pfi_skip_if(name, p)) continue; p->pfik_flags &= ~flags; } - splx(s); return (0); } /* from pf_print_state.c */ -int +static int pfi_unmask(void *addr) { struct pf_addr *m = addr; @@ -1034,77 +771,89 @@ pfi_unmask(void *addr) return (b); } -#ifdef __FreeBSD__ -void +static void pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp) { CURVNET_SET(ifp->if_vnet); - PF_LOCK(); pfi_attach_ifnet(ifp); #ifdef ALTQ + PF_RULES_WLOCK(); pf_altq_ifnet_event(ifp, 0); + PF_RULES_WUNLOCK(); #endif - PF_UNLOCK(); CURVNET_RESTORE(); } -void +static void pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp) { + struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif; CURVNET_SET(ifp->if_vnet); - PF_LOCK(); - pfi_detach_ifnet(ifp); + PF_RULES_WLOCK(); + V_pfi_update++; + pfi_kif_update(kif); + + kif->pfik_ifp = NULL; + ifp->if_pf_kif = NULL; #ifdef ALTQ pf_altq_ifnet_event(ifp, 1); #endif - PF_UNLOCK(); + PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } -void +static void pfi_attach_group_event(void *arg , struct ifg_group *ifg) { CURVNET_SET((struct vnet *)arg); - PF_LOCK(); pfi_attach_ifgroup(ifg); - PF_UNLOCK(); CURVNET_RESTORE(); } -void +static void pfi_change_group_event(void *arg, char *gname) { + struct pfi_kif *kif; + + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); CURVNET_SET((struct vnet *)arg); - PF_LOCK(); - pfi_group_change(gname); - PF_UNLOCK(); + PF_RULES_WLOCK(); + V_pfi_update++; + kif = pfi_kif_attach(kif, gname); + pfi_kif_update(kif); + PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } -void +static void pfi_detach_group_event(void *arg, struct ifg_group *ifg) { + struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif; CURVNET_SET((struct vnet *)arg); - PF_LOCK(); - pfi_detach_ifgroup(ifg); - PF_UNLOCK(); + PF_RULES_WLOCK(); + V_pfi_update++; + + kif->pfik_group = NULL; + ifg->ifg_pf_kif = NULL; + PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } -void +static void pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp) { CURVNET_SET(ifp->if_vnet); - PF_LOCK(); - if (ifp && ifp->if_pf_kif) - pfi_kifaddr_update(ifp->if_pf_kif); - PF_UNLOCK(); + PF_RULES_WLOCK(); + if (ifp && ifp->if_pf_kif) { + V_pfi_update++; + pfi_kif_update(ifp->if_pf_kif); + } + PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } -#endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index 6b5d8f5..032f051 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -35,7 +35,6 @@ * */ -#ifdef __FreeBSD__ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -44,187 +43,116 @@ __FBSDID("$FreeBSD$"); #include "opt_bpf.h" #include "opt_pf.h" -#define NPFSYNC 1 - -#ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG -#else -#define NPFLOG 0 -#endif - -#else /* !__FreeBSD__ */ -#include "pfsync.h" -#include "pflog.h" -#endif /* __FreeBSD__ */ - #include <sys/param.h> -#include <sys/systm.h> -#include <sys/mbuf.h> -#include <sys/filio.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/endian.h> #include <sys/fcntl.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/kernel.h> -#include <sys/time.h> -#ifdef __FreeBSD__ -#include <sys/ucred.h> +#include <sys/filio.h> +#include <sys/interrupt.h> #include <sys/jail.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/mbuf.h> #include <sys/module.h> -#include <sys/conf.h> #include <sys/proc.h> +#include <sys/smp.h> +#include <sys/socket.h> #include <sys/sysctl.h> -#else -#include <sys/timeout.h> -#include <sys/pool.h> -#endif -#include <sys/proc.h> -#include <sys/malloc.h> -#include <sys/kthread.h> -#ifndef __FreeBSD__ -#include <sys/rwlock.h> -#include <uvm/uvm_extern.h> -#endif +#include <sys/md5.h> +#include <sys/ucred.h> #include <net/if.h> -#include <net/if_types.h> -#ifdef __FreeBSD__ -#include <net/vnet.h> -#endif #include <net/route.h> +#include <net/pfil.h> +#include <net/pfvar.h> +#include <net/if_pfsync.h> +#include <net/if_pflog.h> #include <netinet/in.h> -#include <netinet/in_var.h> -#include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/ip_var.h> #include <netinet/ip_icmp.h> -#ifdef __FreeBSD__ -#include <sys/md5.h> -#else -#include <dev/rndvar.h> -#include <crypto/md5.h> -#endif -#include <net/pfvar.h> - -#include <net/if_pfsync.h> - -#if NPFLOG > 0 -#include <net/if_pflog.h> -#endif /* NPFLOG > 0 */ - #ifdef INET6 #include <netinet/ip6.h> -#include <netinet/in_pcb.h> #endif /* INET6 */ #ifdef ALTQ #include <altq/altq.h> #endif -#ifdef __FreeBSD__ -#include <sys/limits.h> -#include <sys/lock.h> -#include <sys/mutex.h> -#include <net/pfil.h> -#endif /* __FreeBSD__ */ - -#ifdef __FreeBSD__ -void init_zone_var(void); -void cleanup_pf_zone(void); -int pfattach(void); -#else -void pfattach(int); -void pf_thread_create(void *); -int pfopen(dev_t, int, int, struct proc *); -int pfclose(dev_t, int, int, struct proc *); -#endif -struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, +static int pfattach(void); +static struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); -void pf_mv_pool(struct pf_palist *, struct pf_palist *); -void pf_empty_pool(struct pf_palist *); -#ifdef __FreeBSD__ -int pfioctl(struct cdev *, u_long, caddr_t, int, struct thread *); -#else -int pfioctl(dev_t, u_long, caddr_t, int, struct proc *); -#endif +static void pf_mv_pool(struct pf_palist *, struct pf_palist *); +static void pf_empty_pool(struct pf_palist *); +static int pfioctl(struct cdev *, u_long, caddr_t, int, + struct thread *); #ifdef ALTQ -int pf_begin_altq(u_int32_t *); -int pf_rollback_altq(u_int32_t); -int pf_commit_altq(u_int32_t); -int pf_enable_altq(struct pf_altq *); -int pf_disable_altq(struct pf_altq *); +static int pf_begin_altq(u_int32_t *); +static int pf_rollback_altq(u_int32_t); +static int pf_commit_altq(u_int32_t); +static int pf_enable_altq(struct pf_altq *); +static int pf_disable_altq(struct pf_altq *); +static u_int32_t pf_qname2qid(char *); +static void pf_qid_unref(u_int32_t); #endif /* ALTQ */ -int pf_begin_rules(u_int32_t *, int, const char *); -int pf_rollback_rules(u_int32_t, int, char *); -int pf_setup_pfsync_matching(struct pf_ruleset *); -void pf_hash_rule(MD5_CTX *, struct pf_rule *); -void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); -int pf_commit_rules(u_int32_t, int, char *); -int pf_addr_setup(struct pf_ruleset *, +static int pf_begin_rules(u_int32_t *, int, const char *); +static int pf_rollback_rules(u_int32_t, int, char *); +static int pf_setup_pfsync_matching(struct pf_ruleset *); +static void pf_hash_rule(MD5_CTX *, struct pf_rule *); +static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); +static int pf_commit_rules(u_int32_t, int, char *); +static int pf_addr_setup(struct pf_ruleset *, struct pf_addr_wrap *, sa_family_t); -void pf_addr_copyout(struct pf_addr_wrap *); - -#define TAGID_MAX 50000 +static void pf_addr_copyout(struct pf_addr_wrap *); -#ifdef __FreeBSD__ -VNET_DEFINE(struct pf_rule, pf_default_rule); -VNET_DEFINE(struct sx, pf_consistency_lock); +VNET_DEFINE(struct pf_rule, pf_default_rule); #ifdef ALTQ static VNET_DEFINE(int, pf_altq_running); #define V_pf_altq_running VNET(pf_altq_running) #endif -TAILQ_HEAD(pf_tags, pf_tagname); +#define TAGID_MAX 50000 +struct pf_tagname { + TAILQ_ENTRY(pf_tagname) entries; + char name[PF_TAG_NAME_SIZE]; + uint16_t tag; + int ref; +}; +TAILQ_HEAD(pf_tags, pf_tagname); #define V_pf_tags VNET(pf_tags) VNET_DEFINE(struct pf_tags, pf_tags); #define V_pf_qids VNET(pf_qids) VNET_DEFINE(struct pf_tags, pf_qids); - -#else /* !__FreeBSD__ */ -struct pf_rule pf_default_rule; -struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER("pfcnslk"); -#ifdef ALTQ -static int pf_altq_running; -#endif - -TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), - pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); -#endif /* __FreeBSD__ */ +static MALLOC_DEFINE(M_PFTAG, "pf_tag", "pf(4) tag names"); +static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db"); +static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif -u_int16_t tagname2tag(struct pf_tags *, char *); -void tag2tagname(struct pf_tags *, u_int16_t, char *); -void tag_unref(struct pf_tags *, u_int16_t); -int pf_rtlabel_add(struct pf_addr_wrap *); -void pf_rtlabel_remove(struct pf_addr_wrap *); -void pf_rtlabel_copyout(struct pf_addr_wrap *); +static u_int16_t tagname2tag(struct pf_tags *, char *); +static u_int16_t pf_tagname2tag(char *); +static void tag_unref(struct pf_tags *, u_int16_t); -#ifdef __FreeBSD__ #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x -#else -#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x -#endif -#ifdef __FreeBSD__ struct cdev *pf_dev; - + /* * XXX - These are new and need to be checked when moveing to a new version */ static void pf_clear_states(void); static int pf_clear_tables(void); -static void pf_clear_srcnodes(void); -/* - * XXX - These are new and need to be checked when moveing to a new version - */ - +static void pf_clear_srcnodes(struct pf_src_node *); +static void pf_tbladdr_copyout(struct pf_addr_wrap *); + /* * Wrapper functions for pfil(9) hooks */ @@ -240,7 +168,7 @@ static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct inpcb *inp); #endif - + static int hook_pf(void); static int dehook_pf(void); static int shutdown_pf(void); @@ -256,7 +184,8 @@ static struct cdevsw pf_cdevsw = { static volatile VNET_DEFINE(int, pf_pfil_hooked); #define V_pf_pfil_hooked VNET(pf_pfil_hooked) VNET_DEFINE(int, pf_end_threads); -struct mtx pf_task_mtx; + +struct rwlock pf_rules_lock; /* pfsync */ pfsync_state_import_t *pfsync_state_import_ptr = NULL; @@ -264,126 +193,27 @@ pfsync_insert_state_t *pfsync_insert_state_ptr = NULL; pfsync_update_state_t *pfsync_update_state_ptr = NULL; pfsync_delete_state_t *pfsync_delete_state_ptr = NULL; pfsync_clear_states_t *pfsync_clear_states_ptr = NULL; -pfsync_state_in_use_t *pfsync_state_in_use_ptr = NULL; pfsync_defer_t *pfsync_defer_ptr = NULL; -pfsync_up_t *pfsync_up_ptr = NULL; -/* pflow */ -export_pflow_t *export_pflow_ptr = NULL; /* pflog */ pflog_packet_t *pflog_packet_ptr = NULL; -VNET_DEFINE(int, debug_pfugidhack); -SYSCTL_VNET_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW, - &VNET_NAME(debug_pfugidhack), 0, - "Enable/disable pf user/group rules mpsafe hack"); - -static void -init_pf_mutex(void) -{ - - mtx_init(&pf_task_mtx, "pf task mtx", NULL, MTX_DEF); -} - -static void -destroy_pf_mutex(void) -{ - - mtx_destroy(&pf_task_mtx); -} -void -init_zone_var(void) -{ - V_pf_src_tree_pl = V_pf_rule_pl = NULL; - V_pf_state_pl = V_pf_state_key_pl = V_pf_state_item_pl = NULL; - V_pf_altq_pl = V_pf_pooladdr_pl = NULL; - V_pf_frent_pl = V_pf_frag_pl = V_pf_cache_pl = V_pf_cent_pl = NULL; - V_pf_state_scrub_pl = NULL; - V_pfr_ktable_pl = V_pfr_kentry_pl = V_pfr_kcounters_pl = NULL; -} - -void -cleanup_pf_zone(void) -{ - UMA_DESTROY(V_pf_src_tree_pl); - UMA_DESTROY(V_pf_rule_pl); - UMA_DESTROY(V_pf_state_pl); - UMA_DESTROY(V_pf_state_key_pl); - UMA_DESTROY(V_pf_state_item_pl); - UMA_DESTROY(V_pf_altq_pl); - UMA_DESTROY(V_pf_pooladdr_pl); - UMA_DESTROY(V_pf_frent_pl); - UMA_DESTROY(V_pf_frag_pl); - UMA_DESTROY(V_pf_cache_pl); - UMA_DESTROY(V_pf_cent_pl); - UMA_DESTROY(V_pfr_ktable_pl); - UMA_DESTROY(V_pfr_kentry_pl); - UMA_DESTROY(V_pfr_kcounters_pl); - UMA_DESTROY(V_pf_state_scrub_pl); - UMA_DESTROY(V_pfi_addr_pl); -} - -int +static int pfattach(void) { u_int32_t *my_timeout = V_pf_default_rule.timeout; - int error = 1; + int error; - do { - UMA_CREATE(V_pf_src_tree_pl, struct pf_src_node, "pfsrctrpl"); - UMA_CREATE(V_pf_rule_pl, struct pf_rule, "pfrulepl"); - UMA_CREATE(V_pf_state_pl, struct pf_state, "pfstatepl"); - UMA_CREATE(V_pf_state_key_pl, struct pf_state, "pfstatekeypl"); - UMA_CREATE(V_pf_state_item_pl, struct pf_state, "pfstateitempl"); - UMA_CREATE(V_pf_altq_pl, struct pf_altq, "pfaltqpl"); - UMA_CREATE(V_pf_pooladdr_pl, struct pf_pooladdr, "pfpooladdrpl"); - UMA_CREATE(V_pfr_ktable_pl, struct pfr_ktable, "pfrktable"); - UMA_CREATE(V_pfr_kentry_pl, struct pfr_kentry, "pfrkentry"); - UMA_CREATE(V_pfr_kcounters_pl, struct pfr_kcounters, "pfrkcounters"); - UMA_CREATE(V_pf_frent_pl, struct pf_frent, "pffrent"); - UMA_CREATE(V_pf_frag_pl, struct pf_fragment, "pffrag"); - UMA_CREATE(V_pf_cache_pl, struct pf_fragment, "pffrcache"); - UMA_CREATE(V_pf_cent_pl, struct pf_frcache, "pffrcent"); - UMA_CREATE(V_pf_state_scrub_pl, struct pf_state_scrub, - "pfstatescrub"); - UMA_CREATE(V_pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl"); - error = 0; - } while(0); - if (error) { - cleanup_pf_zone(); - return (error); - } + pf_initialize(); pfr_initialize(); pfi_initialize(); - if ( (error = pf_osfp_initialize()) ) { - cleanup_pf_zone(); - pf_osfp_cleanup(); - return (error); - } + pf_normalize_init(); + + V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; + V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; - V_pf_pool_limits[PF_LIMIT_STATES].pp = V_pf_state_pl; - V_pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; - V_pf_pool_limits[PF_LIMIT_SRC_NODES].pp = V_pf_src_tree_pl; - V_pf_pool_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; - V_pf_pool_limits[PF_LIMIT_FRAGS].pp = V_pf_frent_pl; - V_pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; - V_pf_pool_limits[PF_LIMIT_TABLES].pp = V_pfr_ktable_pl; - V_pf_pool_limits[PF_LIMIT_TABLES].limit = PFR_KTABLE_HIWAT; - V_pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].pp = V_pfr_kentry_pl; - V_pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT; - uma_zone_set_max(V_pf_pool_limits[PF_LIMIT_STATES].pp, - V_pf_pool_limits[PF_LIMIT_STATES].limit); - - RB_INIT(&V_tree_src_tracking); RB_INIT(&V_pf_anchors); pf_init_ruleset(&pf_main_ruleset); - TAILQ_INIT(&V_pf_altqs[0]); - TAILQ_INIT(&V_pf_altqs[1]); - TAILQ_INIT(&V_pf_pabuf); - V_pf_altqs_active = &V_pf_altqs[0]; - V_pf_altqs_inactive = &V_pf_altqs[1]; - TAILQ_INIT(&V_state_list); - /* default rule should never be garbage collected */ V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next; V_pf_default_rule.action = PF_PASS; @@ -412,8 +242,6 @@ pfattach(void) my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; - pf_normalize_init(); - bzero(&V_pf_status, sizeof(V_pf_status)); V_pf_status.debug = PF_DEBUG_URGENT; @@ -422,119 +250,19 @@ pfattach(void) /* XXX do our best to avoid a conflict */ V_pf_status.hostid = arc4random(); - if (kproc_create(pf_purge_thread, curvnet, NULL, 0, 0, "pfpurge")) - return (ENXIO); - - m_addr_chg_pf_p = pf_pkt_addr_changed; - - return (error); -} -#else /* !__FreeBSD__ */ - -void -pfattach(int num) -{ - u_int32_t *timeout = pf_default_rule.timeout; - - pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0, 0, 0, "pfrulepl", - &pool_allocator_nointr); - pool_init(&pf_src_tree_pl, sizeof(struct pf_src_node), 0, 0, 0, - "pfsrctrpl", NULL); - pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl", - NULL); - pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0, 0, 0, - "pfstatekeypl", NULL); - pool_init(&pf_state_item_pl, sizeof(struct pf_state_item), 0, 0, 0, - "pfstateitempl", NULL); - pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl", - &pool_allocator_nointr); - pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0, - "pfpooladdrpl", &pool_allocator_nointr); - pfr_initialize(); - pfi_initialize(); - pf_osfp_initialize(); - - pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp, - pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0); - - if (physmem <= atop(100*1024*1024)) - pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = - PFR_KENTRY_HIWAT_SMALL; - - RB_INIT(&tree_src_tracking); - RB_INIT(&pf_anchors); - pf_init_ruleset(&pf_main_ruleset); - TAILQ_INIT(&pf_altqs[0]); - TAILQ_INIT(&pf_altqs[1]); - TAILQ_INIT(&pf_pabuf); - pf_altqs_active = &pf_altqs[0]; - pf_altqs_inactive = &pf_altqs[1]; - TAILQ_INIT(&state_list); - - /* default rule should never be garbage collected */ - pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; - pf_default_rule.action = PF_PASS; - pf_default_rule.nr = -1; - pf_default_rule.rtableid = -1; - - /* initialize default timeouts */ - timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; - timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; - timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL; - timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL; - timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL; - timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL; - timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL; - timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL; - timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL; - timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL; - timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL; - timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL; - timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL; - timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL; - timeout[PFTM_FRAG] = PFTM_FRAG_VAL; - timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; - timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; - timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; - timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; - timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; - - pf_normalize_init(); - bzero(&pf_status, sizeof(pf_status)); - pf_status.debug = PF_DEBUG_URGENT; - - /* XXX do our best to avoid a conflict */ - pf_status.hostid = arc4random(); - - /* require process context to purge states, so perform in a thread */ - kthread_create_deferred(pf_thread_create, NULL); -} - -void -pf_thread_create(void *v) -{ - if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge")) - panic("pfpurge thread"); -} - -int -pfopen(dev_t dev, int flags, int fmt, struct proc *p) -{ - if (minor(dev) >= 1) - return (ENXIO); - return (0); -} + if ((error = kproc_create(pf_purge_thread, curvnet, NULL, 0, 0, + "pf purge")) != 0) + /* XXXGL: leaked all above. */ + return (error); + if ((error = swi_add(NULL, "pf send", pf_intr, curvnet, SWI_NET, + INTR_MPSAFE, &V_pf_swi_cookie)) != 0) + /* XXXGL: leaked all above. */ + return (error); -int -pfclose(dev_t dev, int flags, int fmt, struct proc *p) -{ - if (minor(dev) >= 1) - return (ENXIO); return (0); } -#endif -struct pf_pool * +static struct pf_pool * pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, u_int32_t rule_number, u_int8_t r_last, u_int8_t active, u_int8_t check_ticket) @@ -578,7 +306,7 @@ pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, return (&rule->rpool); } -void +static void pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) { struct pf_pooladdr *mv_pool_pa; @@ -589,80 +317,89 @@ pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) } } -void +static void pf_empty_pool(struct pf_palist *poola) { - struct pf_pooladdr *empty_pool_pa; - - while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) { - pfi_dynaddr_remove(&empty_pool_pa->addr); - pf_tbladdr_remove(&empty_pool_pa->addr); - pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE); - TAILQ_REMOVE(poola, empty_pool_pa, entries); -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, empty_pool_pa); -#else - pool_put(&pf_pooladdr_pl, empty_pool_pa); -#endif + struct pf_pooladdr *pa; + + while ((pa = TAILQ_FIRST(poola)) != NULL) { + switch (pa->addr.type) { + case PF_ADDR_DYNIFTL: + pfi_dynaddr_remove(pa->addr.p.dyn); + break; + case PF_ADDR_TABLE: + pfr_detach_table(pa->addr.p.tbl); + break; + } + if (pa->kif) + pfi_kif_unref(pa->kif); + TAILQ_REMOVE(poola, pa, entries); + free(pa, M_PFRULE); } } +static void +pf_unlink_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) +{ + + PF_RULES_WASSERT(); + + TAILQ_REMOVE(rulequeue, rule, entries); + + PF_UNLNKDRULES_LOCK(); + rule->rule_flag |= PFRULE_REFS; + TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries); + PF_UNLNKDRULES_UNLOCK(); +} + void -pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) +pf_free_rule(struct pf_rule *rule) { - if (rulequeue != NULL) { - if (rule->states_cur <= 0) { - /* - * XXX - we need to remove the table *before* detaching - * the rule to make sure the table code does not delete - * the anchor under our feet. - */ - pf_tbladdr_remove(&rule->src.addr); - pf_tbladdr_remove(&rule->dst.addr); - if (rule->overload_tbl) - pfr_detach_table(rule->overload_tbl); - } - TAILQ_REMOVE(rulequeue, rule, entries); - rule->entries.tqe_prev = NULL; - rule->nr = -1; - } - if (rule->states_cur > 0 || rule->src_nodes > 0 || - rule->entries.tqe_prev != NULL) - return; - pf_tag_unref(rule->tag); - pf_tag_unref(rule->match_tag); + PF_RULES_WASSERT(); + + if (rule->tag) + tag_unref(&V_pf_tags, rule->tag); + if (rule->match_tag) + tag_unref(&V_pf_tags, rule->match_tag); #ifdef ALTQ if (rule->pqid != rule->qid) pf_qid_unref(rule->pqid); pf_qid_unref(rule->qid); #endif - pf_rtlabel_remove(&rule->src.addr); - pf_rtlabel_remove(&rule->dst.addr); - pfi_dynaddr_remove(&rule->src.addr); - pfi_dynaddr_remove(&rule->dst.addr); - if (rulequeue == NULL) { - pf_tbladdr_remove(&rule->src.addr); - pf_tbladdr_remove(&rule->dst.addr); - if (rule->overload_tbl) - pfr_detach_table(rule->overload_tbl); + switch (rule->src.addr.type) { + case PF_ADDR_DYNIFTL: + pfi_dynaddr_remove(rule->src.addr.p.dyn); + break; + case PF_ADDR_TABLE: + pfr_detach_table(rule->src.addr.p.tbl); + break; + } + switch (rule->dst.addr.type) { + case PF_ADDR_DYNIFTL: + pfi_dynaddr_remove(rule->dst.addr.p.dyn); + break; + case PF_ADDR_TABLE: + pfr_detach_table(rule->dst.addr.p.tbl); + break; } - pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE); + if (rule->overload_tbl) + pfr_detach_table(rule->overload_tbl); + if (rule->kif) + pfi_kif_unref(rule->kif); pf_anchor_remove(rule); pf_empty_pool(&rule->rpool.list); -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, rule); -#else - pool_put(&pf_rule_pl, rule); -#endif + free(rule, M_PFRULE); } -u_int16_t +static u_int16_t tagname2tag(struct pf_tags *head, char *tagname) { struct pf_tagname *tag, *p = NULL; u_int16_t new_tagid = 1; + PF_RULES_WASSERT(); + TAILQ_FOREACH(tag, head, entries) if (strcmp(tagname, tag->name) == 0) { tag->ref++; @@ -685,7 +422,7 @@ tagname2tag(struct pf_tags *head, char *tagname) return (0); /* allocate and fill new struct pf_tagname */ - tag = malloc(sizeof(*tag), M_TEMP, M_NOWAIT|M_ZERO); + tag = malloc(sizeof(*tag), M_PFTAG, M_NOWAIT|M_ZERO); if (tag == NULL) return (0); strlcpy(tag->name, tagname, sizeof(tag->name)); @@ -700,207 +437,78 @@ tagname2tag(struct pf_tags *head, char *tagname) return (tag->tag); } -void -tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) -{ - struct pf_tagname *tag; - - TAILQ_FOREACH(tag, head, entries) - if (tag->tag == tagid) { - strlcpy(p, tag->name, PF_TAG_NAME_SIZE); - return; - } -} - -void +static void tag_unref(struct pf_tags *head, u_int16_t tag) { struct pf_tagname *p, *next; - if (tag == 0) - return; + PF_RULES_WASSERT(); for (p = TAILQ_FIRST(head); p != NULL; p = next) { next = TAILQ_NEXT(p, entries); if (tag == p->tag) { if (--p->ref == 0) { TAILQ_REMOVE(head, p, entries); - free(p, M_TEMP); + free(p, M_PFTAG); } break; } } } -u_int16_t +static u_int16_t pf_tagname2tag(char *tagname) { -#ifdef __FreeBSD__ return (tagname2tag(&V_pf_tags, tagname)); -#else - return (tagname2tag(&pf_tags, tagname)); -#endif -} - -void -pf_tag2tagname(u_int16_t tagid, char *p) -{ -#ifdef __FreeBSD__ - tag2tagname(&V_pf_tags, tagid, p); -#else - tag2tagname(&pf_tags, tagid, p); -#endif -} - -void -pf_tag_ref(u_int16_t tag) -{ - struct pf_tagname *t; - -#ifdef __FreeBSD__ - TAILQ_FOREACH(t, &V_pf_tags, entries) -#else - TAILQ_FOREACH(t, &pf_tags, entries) -#endif - if (t->tag == tag) - break; - if (t != NULL) - t->ref++; -} - -void -pf_tag_unref(u_int16_t tag) -{ -#ifdef __FreeBSD__ - tag_unref(&V_pf_tags, tag); -#else - tag_unref(&pf_tags, tag); -#endif -} - -int -pf_rtlabel_add(struct pf_addr_wrap *a) -{ -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ - return (0); -#else - if (a->type == PF_ADDR_RTLABEL && - (a->v.rtlabel = rtlabel_name2id(a->v.rtlabelname)) == 0) - return (-1); - return (0); -#endif -} - -void -pf_rtlabel_remove(struct pf_addr_wrap *a) -{ -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ -#else - if (a->type == PF_ADDR_RTLABEL) - rtlabel_unref(a->v.rtlabel); -#endif -} - -void -pf_rtlabel_copyout(struct pf_addr_wrap *a) -{ -#ifdef __FreeBSD__ - /* XXX_IMPORT: later */ - if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) - strlcpy(a->v.rtlabelname, "?", sizeof(a->v.rtlabelname)); -#else - const char *name; - - if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) { - if ((name = rtlabel_id2name(a->v.rtlabel)) == NULL) - strlcpy(a->v.rtlabelname, "?", - sizeof(a->v.rtlabelname)); - else - strlcpy(a->v.rtlabelname, name, - sizeof(a->v.rtlabelname)); - } -#endif } #ifdef ALTQ -u_int32_t +static u_int32_t pf_qname2qid(char *qname) { -#ifdef __FreeBSD__ return ((u_int32_t)tagname2tag(&V_pf_qids, qname)); -#else - return ((u_int32_t)tagname2tag(&pf_qids, qname)); -#endif -} - -void -pf_qid2qname(u_int32_t qid, char *p) -{ -#ifdef __FreeBSD__ - tag2tagname(&V_pf_qids, (u_int16_t)qid, p); -#else - tag2tagname(&pf_qids, (u_int16_t)qid, p); -#endif } -void +static void pf_qid_unref(u_int32_t qid) { -#ifdef __FreeBSD__ tag_unref(&V_pf_qids, (u_int16_t)qid); -#else - tag_unref(&pf_qids, (u_int16_t)qid); -#endif } -int +static int pf_begin_altq(u_int32_t *ticket) { struct pf_altq *altq; int error = 0; + PF_RULES_WASSERT(); + /* Purge the old altq list */ -#ifdef __FreeBSD__ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0) { -#endif /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, altq); -#else - pool_put(&pf_altq_pl, altq); -#endif + free(altq, M_PFALTQ); } if (error) return (error); -#ifdef __FreeBSD__ *ticket = ++V_ticket_altqs_inactive; V_altqs_inactive_open = 1; -#else - *ticket = ++ticket_altqs_inactive; - altqs_inactive_open = 1; -#endif return (0); } -int +static int pf_rollback_altq(u_int32_t ticket) { struct pf_altq *altq; int error = 0; -#ifdef __FreeBSD__ + PF_RULES_WASSERT(); + if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) return (0); /* Purge the old altq list */ @@ -908,101 +516,54 @@ pf_rollback_altq(u_int32_t ticket) TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - if (!altqs_inactive_open || ticket != ticket_altqs_inactive) - return (0); - /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0) { -#endif /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, altq); -#else - pool_put(&pf_altq_pl, altq); -#endif + free(altq, M_PFALTQ); } -#ifdef __FreeBSD__ V_altqs_inactive_open = 0; -#else - altqs_inactive_open = 0; -#endif return (error); } -int +static int pf_commit_altq(u_int32_t ticket) { struct pf_altqqueue *old_altqs; struct pf_altq *altq; - int s, err, error = 0; + int err, error = 0; + + PF_RULES_WASSERT(); -#ifdef __FreeBSD__ if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) -#else - if (!altqs_inactive_open || ticket != ticket_altqs_inactive) -#endif return (EBUSY); /* swap altqs, keep the old. */ - s = splsoftnet(); -#ifdef __FreeBSD__ old_altqs = V_pf_altqs_active; V_pf_altqs_active = V_pf_altqs_inactive; V_pf_altqs_inactive = old_altqs; V_ticket_altqs_active = V_ticket_altqs_inactive; -#else - old_altqs = pf_altqs_active; - pf_altqs_active = pf_altqs_inactive; - pf_altqs_inactive = old_altqs; - ticket_altqs_active = ticket_altqs_inactive; -#endif /* Attach new disciplines */ -#ifdef __FreeBSD__ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - TAILQ_FOREACH(altq, pf_altqs_active, entries) { - if (altq->qname[0] == 0) { -#endif /* attach the discipline */ error = altq_pfattach(altq); -#ifdef __FreeBSD__ if (error == 0 && V_pf_altq_running) -#else - if (error == 0 && pf_altq_running) -#endif error = pf_enable_altq(altq); - if (error != 0) { - splx(s); + if (error != 0) return (error); - } } } /* Purge the old altq list */ -#ifdef __FreeBSD__ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0) { -#endif /* detach and destroy the discipline */ -#ifdef __FreeBSD__ if (V_pf_altq_running) -#else - if (pf_altq_running) -#endif error = pf_disable_altq(altq); err = altq_pfdetach(altq); if (err != 0 && error == 0) @@ -1012,28 +573,19 @@ pf_commit_altq(u_int32_t ticket) error = err; } else pf_qid_unref(altq->qid); -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, altq); -#else - pool_put(&pf_altq_pl, altq); -#endif + free(altq, M_PFALTQ); } - splx(s); -#ifdef __FreeBSD__ V_altqs_inactive_open = 0; -#else - altqs_inactive_open = 0; -#endif return (error); } -int +static int pf_enable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; - int s, error = 0; + int error = 0; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); @@ -1045,26 +597,18 @@ pf_enable_altq(struct pf_altq *altq) if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { tb.rate = altq->ifbandwidth; tb.depth = altq->tbrsize; - s = splnet(); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif error = tbr_set(&ifp->if_snd, &tb); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - splx(s); } return (error); } -int +static int pf_disable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; - int s, error; + int error; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); @@ -1081,21 +625,12 @@ pf_disable_altq(struct pf_altq *altq) if (error == 0) { /* clear tokenbucket regulator */ tb.rate = 0; - s = splnet(); -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif error = tbr_set(&ifp->if_snd, &tb); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - splx(s); } return (error); } -#ifdef __FreeBSD__ void pf_altq_ifnet_event(struct ifnet *ifp, int remove) { @@ -1105,26 +640,16 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) int error = 0; /* Interrupt userland queue modifications */ -#ifdef __FreeBSD__ if (V_altqs_inactive_open) pf_rollback_altq(V_ticket_altqs_inactive); -#else - if (altqs_inactive_open) - pf_rollback_altq(ticket_altqs_inactive); -#endif /* Start new altq ruleset */ if (pf_begin_altq(&ticket)) return; /* Copy the current active set */ -#ifdef __FreeBSD__ TAILQ_FOREACH(a1, V_pf_altqs_active, entries) { - a2 = pool_get(&V_pf_altq_pl, PR_NOWAIT); -#else - TAILQ_FOREACH(a1, pf_altqs_active, entries) { - a2 = pool_get(&pf_altq_pl, PR_NOWAIT); -#endif + a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); if (a2 == NULL) { error = ENOMEM; break; @@ -1134,19 +659,11 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) if (a2->qname[0] != 0) { if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { error = EBUSY; -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, a2); -#else - pool_put(&pf_altq_pl, a2); -#endif + free(a2, M_PFALTQ); break; } a2->altq_disc = NULL; -#ifdef __FreeBSD__ TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) { -#else - TAILQ_FOREACH(a3, pf_altqs_inactive, entries) { -#endif if (strncmp(a3->ifname, a2->ifname, IFNAMSIZ) == 0 && a3->qname[0] == 0) { a2->altq_disc = a3->altq_disc; @@ -1160,55 +677,42 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) (remove && ifp1 == ifp)) { a2->local_flags |= PFALTQ_FLAG_IF_REMOVED; } else { - PF_UNLOCK(); error = altq_add(a2); - PF_LOCK(); -#ifdef __FreeBSD__ if (ticket != V_ticket_altqs_inactive) -#else - if (ticket != ticket_altqs_inactive) -#endif error = EBUSY; if (error) { -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, a2); -#else - pool_put(&pf_altq_pl, a2); -#endif + free(a2, M_PFALTQ); break; } } -#ifdef __FreeBSD__ TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries); -#else - TAILQ_INSERT_TAIL(pf_altqs_inactive, a2, entries); -#endif } if (error != 0) pf_rollback_altq(ticket); else pf_commit_altq(ticket); - } -#endif +} #endif /* ALTQ */ -int +static int pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; + PF_RULES_WASSERT(); + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_or_create_ruleset(anchor); if (rs == NULL) return (EINVAL); while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { - pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule); rs->rules[rs_num].inactive.rcount--; } *ticket = ++rs->rules[rs_num].inactive.ticket; @@ -1216,12 +720,14 @@ pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) return (0); } -int +static int pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; + PF_RULES_WASSERT(); + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_ruleset(anchor); @@ -1229,7 +735,7 @@ pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) rs->rules[rs_num].inactive.ticket != ticket) return (0); while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { - pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule); rs->rules[rs_num].inactive.rcount--; } rs->rules[rs_num].inactive.open = 0; @@ -1252,7 +758,7 @@ pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\ } while (0) -void +static void pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) { PF_MD5_UPD(pfr, addr.type); @@ -1269,9 +775,6 @@ pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) PF_MD5_UPD(pfr, addr.v.a.addr.addr32); PF_MD5_UPD(pfr, addr.v.a.mask.addr32); break; - case PF_ADDR_RTLABEL: - PF_MD5_UPD(pfr, addr.v.rtlabelname); - break; } PF_MD5_UPD(pfr, port[0]); @@ -1280,7 +783,7 @@ pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) PF_MD5_UPD(pfr, port_op); } -void +static void pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) { u_int16_t x; @@ -1319,15 +822,17 @@ pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) PF_MD5_UPD(rule, tos); } -int +static int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule, **old_array; struct pf_rulequeue *old_rules; - int s, error; + int error; u_int32_t old_rcount; + PF_RULES_WASSERT(); + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_ruleset(anchor); @@ -1343,7 +848,6 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) } /* Swap rules, keep the old. */ - s = splsoftnet(); old_rules = rs->rules[rs_num].active.ptr; old_rcount = rs->rules[rs_num].active.rcount; old_array = rs->rules[rs_num].active.ptr_array; @@ -1365,18 +869,18 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) /* Purge the old rule list. */ while ((rule = TAILQ_FIRST(old_rules)) != NULL) - pf_rm_rule(old_rules, rule); + pf_unlink_rule(old_rules, rule); if (rs->rules[rs_num].inactive.ptr_array) free(rs->rules[rs_num].inactive.ptr_array, M_TEMP); rs->rules[rs_num].inactive.ptr_array = NULL; rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_ruleset(rs); - splx(s); + return (0); } -int +static int pf_setup_pfsync_matching(struct pf_ruleset *rs) { MD5_CTX ctx; @@ -1412,55 +916,53 @@ pf_setup_pfsync_matching(struct pf_ruleset *rs) } MD5Final(digest, &ctx); -#ifdef __FreeBSD__ memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum)); -#else - memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum)); -#endif return (0); } -int +static int pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr, sa_family_t af) { - if (pfi_dynaddr_setup(addr, af) || - pf_tbladdr_setup(ruleset, addr)) - return (EINVAL); + int error = 0; - return (0); + switch (addr->type) { + case PF_ADDR_TABLE: + addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname); + if (addr->p.tbl == NULL) + error = ENOMEM; + break; + case PF_ADDR_DYNIFTL: + error = pfi_dynaddr_setup(addr, af); + break; + } + + return (error); } -void +static void pf_addr_copyout(struct pf_addr_wrap *addr) { - pfi_dynaddr_copyout(addr); - pf_tbladdr_copyout(addr); - pf_rtlabel_copyout(addr); + + switch (addr->type) { + case PF_ADDR_DYNIFTL: + pfi_dynaddr_copyout(addr); + break; + case PF_ADDR_TABLE: + pf_tbladdr_copyout(addr); + break; + } } -int -#ifdef __FreeBSD__ +static int pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) -#else -pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) -#endif { - struct pf_pooladdr *pa = NULL; - struct pf_pool *pool = NULL; -#ifndef __FreeBSD__ - int s; -#endif int error = 0; CURVNET_SET(TD_TO_VNET(td)); /* XXX keep in sync with switch() below */ -#ifdef __FreeBSD__ if (securelevel_gt(td->td_ucred, 2)) -#else - if (securelevel > 1) -#endif switch (cmd) { case DIOCGETRULES: case DIOCGETRULE: @@ -1496,9 +998,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETSRCNODES: case DIOCCLRSRCNODES: case DIOCIGETIFACES: -#ifdef __FreeBSD__ case DIOCGIFSPEED: -#endif case DIOCSETIFFLAG: case DIOCCLRIFFLAG: break; @@ -1538,9 +1038,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCOSFPGET: case DIOCGETSRCNODES: case DIOCIGETIFACES: -#ifdef __FreeBSD__ case DIOCGIFSPEED: -#endif break; case DIOCRCLRTABLES: case DIOCRADDTABLES: @@ -1566,85 +1064,51 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) return (EACCES); } - if (flags & FWRITE) -#ifdef __FreeBSD__ - sx_xlock(&V_pf_consistency_lock); - else - sx_slock(&V_pf_consistency_lock); -#else - rw_enter_write(&pf_consistency_lock); - else - rw_enter_read(&pf_consistency_lock); -#endif - -#ifdef __FreeBSD__ - PF_LOCK(); -#else - s = splsoftnet(); -#endif switch (cmd) { - case DIOCSTART: -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); if (V_pf_status.running) -#else - if (pf_status.running) -#endif error = EEXIST; else { -#ifdef __FreeBSD__ - PF_UNLOCK(); + int cpu; + + PF_RULES_WUNLOCK(); error = hook_pf(); - PF_LOCK(); if (error) { DPFPRINTF(PF_DEBUG_MISC, - ("pf: pfil registeration fail\n")); + ("pf: pfil registration failed\n")); break; } + PF_RULES_WLOCK(); V_pf_status.running = 1; V_pf_status.since = time_second; - if (V_pf_status.stateid == 0) { - V_pf_status.stateid = time_second; - V_pf_status.stateid = V_pf_status.stateid << 32; - } -#else - pf_status.running = 1; - pf_status.since = time_second; + CPU_FOREACH(cpu) + V_pf_stateid[cpu] = time_second; - if (pf_status.stateid == 0) { - pf_status.stateid = time_second; - pf_status.stateid = pf_status.stateid << 32; - } -#endif DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); } + PF_RULES_WUNLOCK(); break; case DIOCSTOP: -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); if (!V_pf_status.running) error = ENOENT; else { V_pf_status.running = 0; - PF_UNLOCK(); + PF_RULES_WUNLOCK(); error = dehook_pf(); - PF_LOCK(); if (error) { V_pf_status.running = 1; DPFPRINTF(PF_DEBUG_MISC, - ("pf: pfil unregisteration failed\n")); + ("pf: pfil unregistration failed\n")); } + PF_RULES_WLOCK(); V_pf_status.since = time_second; -#else - if (!pf_status.running) - error = ENOENT; - else { - pf_status.running = 0; - pf_status.since = time_second; -#endif DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); } + PF_RULES_WUNLOCK(); break; case DIOCADDRULE: { @@ -1652,89 +1116,57 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_ruleset *ruleset; struct pf_rule *rule, *tail; struct pf_pooladdr *pa; + struct pfi_kif *kif = NULL; int rs_num; - pr->anchor[sizeof(pr->anchor) - 1] = 0; - ruleset = pf_find_ruleset(pr->anchor); - if (ruleset == NULL) { + if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; } - rs_num = pf_get_ruleset_number(pr->rule.action); - if (rs_num >= PF_RULESET_MAX) { - error = EINVAL; +#ifndef INET + if (pr->rule.af == AF_INET) { + error = EAFNOSUPPORT; break; } - if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { - error = EINVAL; +#endif /* INET */ +#ifndef INET6 + if (pr->rule.af == AF_INET6) { + error = EAFNOSUPPORT; break; } +#endif /* INET6 */ + + rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK); + bcopy(&pr->rule, rule, sizeof(struct pf_rule)); + if (rule->ifname[0]) + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); + rule->cuid = td->td_ucred->cr_ruid; + rule->cpid = td->td_proc ? td->td_proc->p_pid : 0; + TAILQ_INIT(&rule->rpool.list); + +#define ERROUT(x) { error = (x); goto DIOCADDRULE_error; } + + PF_RULES_WLOCK(); + pr->anchor[sizeof(pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); + if (ruleset == NULL) + ERROUT(EINVAL); + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) + ERROUT(EINVAL); if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) { -#ifdef __FreeBSD__ DPFPRINTF(PF_DEBUG_MISC, ("ticket: %d != [%d]%d\n", pr->ticket, rs_num, ruleset->rules[rs_num].inactive.ticket)); -#endif - error = EBUSY; - break; + ERROUT(EBUSY); } -#ifdef __FreeBSD__ if (pr->pool_ticket != V_ticket_pabuf) { DPFPRINTF(PF_DEBUG_MISC, ("pool_ticket: %d != %d\n", pr->pool_ticket, V_ticket_pabuf)); -#else - if (pr->pool_ticket != ticket_pabuf) { -#endif - error = EBUSY; - break; - } -#ifdef __FreeBSD__ - rule = pool_get(&V_pf_rule_pl, PR_NOWAIT); -#else - rule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL); -#endif - if (rule == NULL) { - error = ENOMEM; - break; + ERROUT(EBUSY); } - bcopy(&pr->rule, rule, sizeof(struct pf_rule)); -#ifdef __FreeBSD__ - rule->cuid = td->td_ucred->cr_ruid; - rule->cpid = td->td_proc ? td->td_proc->p_pid : 0; -#else - rule->cuid = p->p_cred->p_ruid; - rule->cpid = p->p_pid; -#endif - rule->anchor = NULL; - rule->kif = NULL; - TAILQ_INIT(&rule->rpool.list); - /* initialize refcounting */ - rule->states_cur = 0; - rule->src_nodes = 0; - rule->entries.tqe_prev = NULL; -#ifndef INET - if (rule->af == AF_INET) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, rule); -#else - pool_put(&pf_rule_pl, rule); -#endif - error = EAFNOSUPPORT; - break; - } -#endif /* INET */ -#ifndef INET6 - if (rule->af == AF_INET6) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, rule); -#else - pool_put(&pf_rule_pl, rule); -#endif - error = EAFNOSUPPORT; - break; - } -#endif /* INET6 */ + tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_rulequeue); if (tail) @@ -1742,24 +1174,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else rule->nr = 0; if (rule->ifname[0]) { - rule->kif = pfi_kif_get(rule->ifname); - if (rule->kif == NULL) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, rule); -#else - pool_put(&pf_rule_pl, rule); -#endif - error = EINVAL; - break; - } - pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE); - } + rule->kif = pfi_kif_attach(kif, rule->ifname); + pfi_kif_ref(rule->kif); + } else + rule->kif = NULL; -#ifdef __FreeBSD__ /* ROUTING */ if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs) -#else - if (rule->rtableid > 0 && !rtable_exists(rule->rtableid)) -#endif error = EBUSY; #ifdef ALTQ @@ -1784,43 +1204,34 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; if (rule->rt && !rule->direction) error = EINVAL; -#if NPFLOG > 0 if (!rule->log) rule->logif = 0; if (rule->logif >= PFLOGIFS_MAX) error = EINVAL; -#endif - if (pf_rtlabel_add(&rule->src.addr) || - pf_rtlabel_add(&rule->dst.addr)) - error = EBUSY; if (pf_addr_setup(ruleset, &rule->src.addr, rule->af)) - error = EINVAL; + error = ENOMEM; if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af)) - error = EINVAL; + error = ENOMEM; if (pf_anchor_setup(rule, ruleset, pr->anchor_call)) error = EINVAL; -#ifdef __FreeBSD__ TAILQ_FOREACH(pa, &V_pf_pabuf, entries) -#else - TAILQ_FOREACH(pa, &pf_pabuf, entries) -#endif - if (pf_tbladdr_setup(ruleset, &pa->addr)) - error = EINVAL; + if (pa->addr.type == PF_ADDR_TABLE) { + pa->addr.p.tbl = pfr_attach_table(ruleset, + pa->addr.v.tblname); + if (pa->addr.p.tbl == NULL) + error = ENOMEM; + } if (rule->overload_tblname[0]) { if ((rule->overload_tbl = pfr_attach_table(ruleset, - rule->overload_tblname, 0)) == NULL) + rule->overload_tblname)) == NULL) error = EINVAL; else rule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } -#ifdef __FreeBSD__ pf_mv_pool(&V_pf_pabuf, &rule->rpool.list); -#else - pf_mv_pool(&pf_pabuf, &rule->rpool.list); -#endif if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || (rule->action == PF_BINAT)) && rule->anchor == NULL) || (rule->rt > PF_FASTROUTE)) && @@ -1828,24 +1239,26 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; if (error) { - pf_rm_rule(NULL, rule); + pf_free_rule(rule); + PF_RULES_WUNLOCK(); break; } -#ifdef __FreeBSD__ - if (!V_debug_pfugidhack && (rule->uid.op || rule->gid.op || - rule->log & PF_LOG_SOCKET_LOOKUP)) { - DPFPRINTF(PF_DEBUG_MISC, - ("pf: debug.pfugidhack enabled\n")); - V_debug_pfugidhack = 1; - } -#endif rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); rule->evaluations = rule->packets[0] = rule->packets[1] = rule->bytes[0] = rule->bytes[1] = 0; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; + PF_RULES_WUNLOCK(); + break; + +#undef ERROUT +DIOCADDRULE_error: + PF_RULES_WUNLOCK(); + free(rule, M_PFRULE); + if (kif) + free(kif, PFI_MTYPE); break; } @@ -1855,14 +1268,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_rule *tail; int rs_num; + PF_RULES_WLOCK(); pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { + PF_RULES_WUNLOCK(); error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { + PF_RULES_WUNLOCK(); error = EINVAL; break; } @@ -1873,6 +1289,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else pr->nr = 0; pr->ticket = ruleset->rules[rs_num].active.ticket; + PF_RULES_WUNLOCK(); break; } @@ -1882,18 +1299,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_rule *rule; int rs_num, i; + PF_RULES_WLOCK(); pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { + PF_RULES_WUNLOCK(); error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { + PF_RULES_WUNLOCK(); error = EINVAL; break; } if (pr->ticket != ruleset->rules[rs_num].active.ticket) { + PF_RULES_WUNLOCK(); error = EBUSY; break; } @@ -1901,11 +1322,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) while ((rule != NULL) && (rule->nr != pr->nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { + PF_RULES_WUNLOCK(); error = EBUSY; break; } bcopy(rule, &pr->rule, sizeof(struct pf_rule)); if (pf_anchor_copyout(ruleset, rule, pr)) { + PF_RULES_WUNLOCK(); error = EBUSY; break; } @@ -1924,6 +1347,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rule->bytes[0] = rule->bytes[1] = 0; rule->states_tot = 0; } + PF_RULES_WUNLOCK(); break; } @@ -1931,116 +1355,80 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_rule *pcr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *oldrule = NULL, *newrule = NULL; + struct pfi_kif *kif = NULL; + struct pf_pooladdr *pa; u_int32_t nr = 0; int rs_num; - if (!(pcr->action == PF_CHANGE_REMOVE || - pcr->action == PF_CHANGE_GET_TICKET) && -#ifdef __FreeBSD__ - pcr->pool_ticket != V_ticket_pabuf) { -#else - pcr->pool_ticket != ticket_pabuf) { -#endif - error = EBUSY; - break; - } - if (pcr->action < PF_CHANGE_ADD_HEAD || pcr->action > PF_CHANGE_GET_TICKET) { error = EINVAL; break; } - ruleset = pf_find_ruleset(pcr->anchor); - if (ruleset == NULL) { - error = EINVAL; - break; - } - rs_num = pf_get_ruleset_number(pcr->rule.action); - if (rs_num >= PF_RULESET_MAX) { + if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; } - if (pcr->action == PF_CHANGE_GET_TICKET) { - pcr->ticket = ++ruleset->rules[rs_num].active.ticket; - break; - } else { - if (pcr->ticket != - ruleset->rules[rs_num].active.ticket) { - error = EINVAL; - break; - } - if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { - error = EINVAL; - break; - } - } - if (pcr->action != PF_CHANGE_REMOVE) { -#ifdef __FreeBSD__ - newrule = pool_get(&V_pf_rule_pl, PR_NOWAIT); -#else - newrule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL); -#endif - if (newrule == NULL) { - error = ENOMEM; - break; - } - bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); -#ifdef __FreeBSD__ - newrule->cuid = td->td_ucred->cr_ruid; - newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; -#else - newrule->cuid = p->p_cred->p_ruid; - newrule->cpid = p->p_pid; -#endif - TAILQ_INIT(&newrule->rpool.list); - /* initialize refcounting */ - newrule->states_cur = 0; - newrule->entries.tqe_prev = NULL; #ifndef INET - if (newrule->af == AF_INET) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, newrule); -#else - pool_put(&pf_rule_pl, newrule); -#endif + if (pcr->rule.af == AF_INET) { error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 - if (newrule->af == AF_INET6) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, newrule); -#else - pool_put(&pf_rule_pl, newrule); -#endif + if (pcr->rule.af == AF_INET6) { error = EAFNOSUPPORT; break; } #endif /* INET6 */ + newrule = malloc(sizeof(*newrule), M_PFRULE, M_WAITOK); + bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); + newrule->cuid = td->td_ucred->cr_ruid; + newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; + TAILQ_INIT(&newrule->rpool.list); + /* Initialize refcounting. */ + newrule->states_cur = 0; + newrule->entries.tqe_prev = NULL; + + if (newrule->ifname[0]) + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); + } + +#define ERROUT(x) { error = (x); goto DIOCCHANGERULE_error; } + + PF_RULES_WLOCK(); + if (!(pcr->action == PF_CHANGE_REMOVE || + pcr->action == PF_CHANGE_GET_TICKET) && + pcr->pool_ticket != V_ticket_pabuf) + ERROUT(EBUSY); + + ruleset = pf_find_ruleset(pcr->anchor); + if (ruleset == NULL) + ERROUT(EINVAL); + + rs_num = pf_get_ruleset_number(pcr->rule.action); + if (rs_num >= PF_RULESET_MAX) + ERROUT(EINVAL); + + if (pcr->action == PF_CHANGE_GET_TICKET) { + pcr->ticket = ++ruleset->rules[rs_num].active.ticket; + ERROUT(0); + } else if (pcr->ticket != + ruleset->rules[rs_num].active.ticket) + ERROUT(EINVAL); + + if (pcr->action != PF_CHANGE_REMOVE) { if (newrule->ifname[0]) { - newrule->kif = pfi_kif_get(newrule->ifname); - if (newrule->kif == NULL) { -#ifdef __FreeBSD__ - pool_put(&V_pf_rule_pl, newrule); -#else - pool_put(&pf_rule_pl, newrule); -#endif - error = EINVAL; - break; - } - pfi_kif_ref(newrule->kif, PFI_KIF_REF_RULE); + newrule->kif = pfi_kif_attach(kif, + newrule->ifname); + pfi_kif_ref(newrule->kif); } else newrule->kif = NULL; if (newrule->rtableid > 0 && -#ifdef __FreeBSD__ /* ROUTING */ newrule->rtableid >= rt_numfibs) -#else - !rtable_exists(newrule->rtableid)) -#endif error = EBUSY; #ifdef ALTQ @@ -2067,32 +1455,28 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; if (newrule->rt && !newrule->direction) error = EINVAL; -#if NPFLOG > 0 if (!newrule->log) newrule->logif = 0; if (newrule->logif >= PFLOGIFS_MAX) error = EINVAL; -#endif - if (pf_rtlabel_add(&newrule->src.addr) || - pf_rtlabel_add(&newrule->dst.addr)) - error = EBUSY; if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af)) - error = EINVAL; + error = ENOMEM; if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af)) - error = EINVAL; + error = ENOMEM; if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) error = EINVAL; -#ifdef __FreeBSD__ TAILQ_FOREACH(pa, &V_pf_pabuf, entries) -#else - TAILQ_FOREACH(pa, &pf_pabuf, entries) -#endif - if (pf_tbladdr_setup(ruleset, &pa->addr)) - error = EINVAL; + if (pa->addr.type == PF_ADDR_TABLE) { + pa->addr.p.tbl = + pfr_attach_table(ruleset, + pa->addr.v.tblname); + if (pa->addr.p.tbl == NULL) + error = ENOMEM; + } if (newrule->overload_tblname[0]) { if ((newrule->overload_tbl = pfr_attach_table( - ruleset, newrule->overload_tblname, 0)) == + ruleset, newrule->overload_tblname)) == NULL) error = EINVAL; else @@ -2100,11 +1484,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) PFR_TFLAG_ACTIVE; } -#ifdef __FreeBSD__ pf_mv_pool(&V_pf_pabuf, &newrule->rpool.list); -#else - pf_mv_pool(&pf_pabuf, &newrule->rpool.list); -#endif if (((((newrule->action == PF_NAT) || (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || @@ -2114,30 +1494,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; if (error) { - pf_rm_rule(NULL, newrule); + pf_free_rule(newrule); + PF_RULES_WUNLOCK(); break; } -#ifdef __FreeBSD__ - if (!V_debug_pfugidhack && (newrule->uid.op || - newrule->gid.op || - newrule->log & PF_LOG_SOCKET_LOOKUP)) { - DPFPRINTF(PF_DEBUG_MISC, - ("pf: debug.pfugidhack enabled\n")); - V_debug_pfugidhack = 1; - } -#endif - newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); newrule->evaluations = 0; newrule->packets[0] = newrule->packets[1] = 0; newrule->bytes[0] = newrule->bytes[1] = 0; } -#ifdef __FreeBSD__ pf_empty_pool(&V_pf_pabuf); -#else - pf_empty_pool(&pf_pabuf); -#endif if (pcr->action == PF_CHANGE_ADD_HEAD) oldrule = TAILQ_FIRST( @@ -2152,14 +1519,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) oldrule = TAILQ_NEXT(oldrule, entries); if (oldrule == NULL) { if (newrule != NULL) - pf_rm_rule(NULL, newrule); + pf_free_rule(newrule); + PF_RULES_WUNLOCK(); error = EINVAL; break; } } if (pcr->action == PF_CHANGE_REMOVE) { - pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule); + pf_unlink_rule(ruleset->rules[rs_num].active.ptr, + oldrule); ruleset->rules[rs_num].active.rcount--; } else { if (oldrule == NULL) @@ -2186,114 +1555,120 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); pf_remove_if_empty_ruleset(ruleset); + PF_RULES_WUNLOCK(); + break; + +#undef ERROUT +DIOCCHANGERULE_error: + PF_RULES_WUNLOCK(); + if (newrule != NULL) + free(newrule, M_PFRULE); + if (kif != NULL) + free(kif, PFI_MTYPE); break; } case DIOCCLRSTATES: { - struct pf_state *s, *nexts; + struct pf_state *s; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; - u_int killed = 0; - -#ifdef __FreeBSD__ - for (s = RB_MIN(pf_state_tree_id, &V_tree_id); s; s = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, s); -#else - for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); -#endif - - if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, - s->kif->pfik_name)) { -#if NPFSYNC > 0 - /* don't send out individual delete messages */ - SET(s->state_flags, PFSTATE_NOSYNC); -#endif - pf_unlink_state(s); - killed++; - } + u_int i, killed = 0; + + for (i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + +relock_DIOCCLRSTATES: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) + if (!psk->psk_ifname[0] || + !strcmp(psk->psk_ifname, + s->kif->pfik_name)) { + /* + * Don't send out individual + * delete messages. + */ + s->state_flags |= PFSTATE_NOSYNC; + pf_unlink_state(s, PF_ENTER_LOCKED); + killed++; + goto relock_DIOCCLRSTATES; + } + PF_HASHROW_UNLOCK(ih); } psk->psk_killed = killed; -#if NPFSYNC > 0 -#ifdef __FreeBSD__ if (pfsync_clear_states_ptr != NULL) pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname); -#else - pfsync_clear_states(pf_status.hostid, psk->psk_ifname); -#endif -#endif break; } case DIOCKILLSTATES: { - struct pf_state *s, *nexts; + struct pf_state *s; struct pf_state_key *sk; struct pf_addr *srcaddr, *dstaddr; u_int16_t srcport, dstport; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; - u_int killed = 0; + u_int i, killed = 0; if (psk->psk_pfcmp.id) { if (psk->psk_pfcmp.creatorid == 0) -#ifdef __FreeBSD__ psk->psk_pfcmp.creatorid = V_pf_status.hostid; -#else - psk->psk_pfcmp.creatorid = pf_status.hostid; -#endif - if ((s = pf_find_state_byid(&psk->psk_pfcmp))) { - pf_unlink_state(s); + if ((s = pf_find_state_byid(psk->psk_pfcmp.id, + psk->psk_pfcmp.creatorid))) { + pf_unlink_state(s, PF_ENTER_LOCKED); psk->psk_killed = 1; } break; } -#ifdef __FreeBSD__ - for (s = RB_MIN(pf_state_tree_id, &V_tree_id); s; - s = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, s); -#else - for (s = RB_MIN(pf_state_tree_id, &tree_id); s; - s = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); -#endif - sk = s->key[PF_SK_WIRE]; - - if (s->direction == PF_OUT) { - srcaddr = &sk->addr[1]; - dstaddr = &sk->addr[0]; - srcport = sk->port[0]; - dstport = sk->port[0]; - } else { - srcaddr = &sk->addr[0]; - dstaddr = &sk->addr[1]; - srcport = sk->port[0]; - dstport = sk->port[0]; - } - if ((!psk->psk_af || sk->af == psk->psk_af) - && (!psk->psk_proto || psk->psk_proto == - sk->proto) && - PF_MATCHA(psk->psk_src.neg, - &psk->psk_src.addr.v.a.addr, - &psk->psk_src.addr.v.a.mask, - srcaddr, sk->af) && - PF_MATCHA(psk->psk_dst.neg, - &psk->psk_dst.addr.v.a.addr, - &psk->psk_dst.addr.v.a.mask, - dstaddr, sk->af) && - (psk->psk_src.port_op == 0 || - pf_match_port(psk->psk_src.port_op, - psk->psk_src.port[0], psk->psk_src.port[1], - srcport)) && - (psk->psk_dst.port_op == 0 || - pf_match_port(psk->psk_dst.port_op, - psk->psk_dst.port[0], psk->psk_dst.port[1], - dstport)) && - (!psk->psk_label[0] || (s->rule.ptr->label[0] && - !strcmp(psk->psk_label, s->rule.ptr->label))) && - (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, - s->kif->pfik_name))) { - pf_unlink_state(s); - killed++; + for (i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + +relock_DIOCKILLSTATES: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + sk = s->key[PF_SK_WIRE]; + if (s->direction == PF_OUT) { + srcaddr = &sk->addr[1]; + dstaddr = &sk->addr[0]; + srcport = sk->port[0]; + dstport = sk->port[0]; + } else { + srcaddr = &sk->addr[0]; + dstaddr = &sk->addr[1]; + srcport = sk->port[0]; + dstport = sk->port[0]; + } + + if ((!psk->psk_af || sk->af == psk->psk_af) + && (!psk->psk_proto || psk->psk_proto == + sk->proto) && + PF_MATCHA(psk->psk_src.neg, + &psk->psk_src.addr.v.a.addr, + &psk->psk_src.addr.v.a.mask, + srcaddr, sk->af) && + PF_MATCHA(psk->psk_dst.neg, + &psk->psk_dst.addr.v.a.addr, + &psk->psk_dst.addr.v.a.mask, + dstaddr, sk->af) && + (psk->psk_src.port_op == 0 || + pf_match_port(psk->psk_src.port_op, + psk->psk_src.port[0], psk->psk_src.port[1], + srcport)) && + (psk->psk_dst.port_op == 0 || + pf_match_port(psk->psk_dst.port_op, + psk->psk_dst.port[0], psk->psk_dst.port[1], + dstport)) && + (!psk->psk_label[0] || + (s->rule.ptr->label[0] && + !strcmp(psk->psk_label, + s->rule.ptr->label))) && + (!psk->psk_ifname[0] || + !strcmp(psk->psk_ifname, + s->kif->pfik_name))) { + pf_unlink_state(s, PF_ENTER_LOCKED); + killed++; + goto relock_DIOCKILLSTATES; + } } + PF_HASHROW_UNLOCK(ih); } psk->psk_killed = killed; break; @@ -2308,98 +1683,83 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } -#ifdef __FreeBSD__ - if (pfsync_state_import_ptr != NULL) + if (pfsync_state_import_ptr != NULL) { + PF_RULES_RLOCK(); error = pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL); -#else - error = pfsync_state_import(sp, PFSYNC_SI_IOCTL); -#endif + PF_RULES_RUNLOCK(); + } + error = EOPNOTSUPP; break; } case DIOCGETSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pf_state *s; - struct pf_state_cmp id_key; - - bcopy(ps->state.id, &id_key.id, sizeof(id_key.id)); - id_key.creatorid = ps->state.creatorid; - s = pf_find_state_byid(&id_key); + s = pf_find_state_byid(ps->state.id, ps->state.creatorid); if (s == NULL) { error = ENOENT; break; } pfsync_state_export(&ps->state, s); + PF_STATE_UNLOCK(s); break; } case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; - struct pf_state *state; - struct pfsync_state *p, *pstore; - u_int32_t nr = 0; + struct pf_state *s; + struct pfsync_state *pstore, *p; + int i, nr; if (ps->ps_len == 0) { -#ifdef __FreeBSD__ - nr = V_pf_status.states; -#else - nr = pf_status.states; -#endif + nr = uma_zone_get_cur(V_pf_state_z); ps->ps_len = sizeof(struct pfsync_state) * nr; break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif + p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK); + nr = 0; - p = ps->ps_states; + for (i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; -#ifdef __FreeBSD__ - state = TAILQ_FIRST(&V_state_list); -#else - state = TAILQ_FIRST(&state_list); -#endif - while (state) { - if (state->timeout != PFTM_UNLINKED) { - if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) - break; - pfsync_state_export(pstore, state); -#ifdef __FreeBSD__ - PF_COPYOUT(pstore, p, sizeof(*p), error); -#else - error = copyout(pstore, p, sizeof(*p)); -#endif - if (error) { - free(pstore, M_TEMP); - goto fail; + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + + if (s->timeout == PFTM_UNLINKED) + continue; + + if ((nr+1) * sizeof(*p) > ps->ps_len) { + PF_HASHROW_UNLOCK(ih); + goto DIOCGETSTATES_full; } + pfsync_state_export(p, s); p++; nr++; } - state = TAILQ_NEXT(state, entry_list); + PF_HASHROW_UNLOCK(ih); + } +DIOCGETSTATES_full: + error = copyout(pstore, ps->ps_states, + sizeof(struct pfsync_state) * nr); + if (error) { + free(pstore, M_TEMP); + break; } - ps->ps_len = sizeof(struct pfsync_state) * nr; - free(pstore, M_TEMP); + break; } case DIOCGETSTATUS: { struct pf_status *s = (struct pf_status *)addr; -#ifdef __FreeBSD__ + PF_RULES_RLOCK(); bcopy(&V_pf_status, s, sizeof(struct pf_status)); -#else - bcopy(&pf_status, s, sizeof(struct pf_status)); -#endif pfi_update_status(s->ifname, s); + PF_RULES_RUNLOCK(); break; } @@ -2407,37 +1767,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_if *pi = (struct pfioc_if *)addr; if (pi->ifname[0] == 0) { -#ifdef __FreeBSD__ bzero(V_pf_status.ifname, IFNAMSIZ); -#else - bzero(pf_status.ifname, IFNAMSIZ); -#endif break; } -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); strlcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ); -#else - strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ); -#endif + PF_RULES_WUNLOCK(); break; } case DIOCCLRSTATUS: { -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); bzero(V_pf_status.counters, sizeof(V_pf_status.counters)); bzero(V_pf_status.fcounters, sizeof(V_pf_status.fcounters)); bzero(V_pf_status.scounters, sizeof(V_pf_status.scounters)); V_pf_status.since = time_second; if (*V_pf_status.ifname) pfi_update_status(V_pf_status.ifname, NULL); -#else - bzero(pf_status.counters, sizeof(pf_status.counters)); - bzero(pf_status.fcounters, sizeof(pf_status.fcounters)); - bzero(pf_status.scounters, sizeof(pf_status.scounters)); - pf_status.since = time_second; - if (*pf_status.ifname) - pfi_update_status(pf_status.ifname, NULL); -#endif + PF_RULES_WUNLOCK(); break; } @@ -2473,6 +1820,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (m > 1) error = E2BIG; /* more than one state */ else if (state != NULL) { + /* XXXGL: not locked read */ sk = state->key[sidx]; PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af); pnl->rsport = sk->port[sidx]; @@ -2491,23 +1839,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pt->timeout < 0 || pt->timeout >= PFTM_MAX || pt->seconds < 0) { error = EINVAL; - goto fail; + break; } -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); old = V_pf_default_rule.timeout[pt->timeout]; -#else - old = pf_default_rule.timeout[pt->timeout]; -#endif if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) pt->seconds = 1; -#ifdef __FreeBSD__ V_pf_default_rule.timeout[pt->timeout] = pt->seconds; -#else - pf_default_rule.timeout[pt->timeout] = pt->seconds; -#endif if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) wakeup(pf_purge_thread); pt->seconds = old; + PF_RULES_WUNLOCK(); break; } @@ -2516,13 +1858,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) { error = EINVAL; - goto fail; + break; } -#ifdef __FreeBSD__ + PF_RULES_RLOCK(); pt->seconds = V_pf_default_rule.timeout[pt->timeout]; -#else - pt->seconds = pf_default_rule.timeout[pt->timeout]; -#endif + PF_RULES_RUNLOCK(); break; } @@ -2531,13 +1871,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) { error = EINVAL; - goto fail; + break; } -#ifdef __FreeBSD__ - pl->limit = V_pf_pool_limits[pl->index].limit; -#else - pl->limit = pf_pool_limits[pl->index].limit; -#endif + PF_RULES_RLOCK(); + pl->limit = V_pf_limits[pl->index].limit; + PF_RULES_RUNLOCK(); break; } @@ -2545,41 +1883,27 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_limit *pl = (struct pfioc_limit *)addr; int old_limit; + PF_RULES_WLOCK(); if (pl->index < 0 || pl->index >= PF_LIMIT_MAX || -#ifdef __FreeBSD__ - V_pf_pool_limits[pl->index].pp == NULL) { -#else - pf_pool_limits[pl->index].pp == NULL) { -#endif + V_pf_limits[pl->index].zone == NULL) { + PF_RULES_WUNLOCK(); error = EINVAL; - goto fail; - } -#ifdef __FreeBSD__ - uma_zone_set_max(V_pf_pool_limits[pl->index].pp, pl->limit); - old_limit = V_pf_pool_limits[pl->index].limit; - V_pf_pool_limits[pl->index].limit = pl->limit; - pl->limit = old_limit; -#else - if (pool_sethardlimit(pf_pool_limits[pl->index].pp, - pl->limit, NULL, 0) != 0) { - error = EBUSY; - goto fail; + break; } - old_limit = pf_pool_limits[pl->index].limit; - pf_pool_limits[pl->index].limit = pl->limit; + uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit); + old_limit = V_pf_limits[pl->index].limit; + V_pf_limits[pl->index].limit = pl->limit; pl->limit = old_limit; -#endif + PF_RULES_WUNLOCK(); break; } case DIOCSETDEBUG: { u_int32_t *level = (u_int32_t *)addr; -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); V_pf_status.debug = *level; -#else - pf_status.debug = *level; -#endif + PF_RULES_WUNLOCK(); break; } @@ -2588,16 +1912,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_ruleset *ruleset = &pf_main_ruleset; struct pf_rule *rule; + PF_RULES_WLOCK(); TAILQ_FOREACH(rule, ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) { rule->evaluations = 0; rule->packets[0] = rule->packets[1] = 0; rule->bytes[0] = rule->bytes[1] = 0; } + PF_RULES_WUNLOCK(); break; } -#ifdef __FreeBSD__ case DIOCGIFSPEED: { struct pf_ifspeed *psp = (struct pf_ifspeed *)addr; struct pf_ifspeed ps; @@ -2615,32 +1940,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } -#endif /* __FreeBSD__ */ #ifdef ALTQ case DIOCSTARTALTQ: { struct pf_altq *altq; + PF_RULES_WLOCK(); /* enable all altq interfaces on active list */ -#ifdef __FreeBSD__ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - TAILQ_FOREACH(altq, pf_altqs_active, entries) { - if (altq->qname[0] == 0) { -#endif error = pf_enable_altq(altq); if (error != 0) break; } } if (error == 0) -#ifdef __FreeBSD__ V_pf_altq_running = 1; -#else - pf_altq_running = 1; -#endif + PF_RULES_WUNLOCK(); DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); break; } @@ -2648,26 +1965,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCSTOPALTQ: { struct pf_altq *altq; + PF_RULES_WLOCK(); /* disable all altq interfaces on active list */ -#ifdef __FreeBSD__ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { -#else - TAILQ_FOREACH(altq, pf_altqs_active, entries) { - if (altq->qname[0] == 0) { -#endif error = pf_disable_altq(altq); if (error != 0) break; } } if (error == 0) -#ifdef __FreeBSD__ V_pf_altq_running = 0; -#else - pf_altq_running = 0; -#endif + PF_RULES_WUNLOCK(); DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); break; } @@ -2675,28 +1985,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCADDALTQ: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq, *a; + struct ifnet *ifp; + + altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK); + bcopy(&pa->altq, altq, sizeof(struct pf_altq)); + altq->local_flags = 0; -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); if (pa->ticket != V_ticket_altqs_inactive) { -#else - if (pa->ticket != ticket_altqs_inactive) { -#endif + PF_RULES_WUNLOCK(); + free(altq, M_PFALTQ); error = EBUSY; break; } -#ifdef __FreeBSD__ - altq = pool_get(&V_pf_altq_pl, PR_NOWAIT); -#else - altq = pool_get(&pf_altq_pl, PR_WAITOK|PR_LIMITFAIL); -#endif - if (altq == NULL) { - error = ENOMEM; - break; - } - bcopy(&pa->altq, altq, sizeof(struct pf_altq)); -#ifdef __FreeBSD__ - altq->local_flags = 0; -#endif /* * if this is for a queue, find the discipline and @@ -2704,20 +2005,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) */ if (altq->qname[0] != 0) { if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { + PF_RULES_WUNLOCK(); error = EBUSY; -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, altq); -#else - pool_put(&pf_altq_pl, altq); -#endif + free(altq, M_PFALTQ); break; } altq->altq_disc = NULL; -#ifdef __FreeBSD__ TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) { -#else - TAILQ_FOREACH(a, pf_altqs_inactive, entries) { -#endif if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 && a->qname[0] == 0) { altq->altq_disc = a->altq_disc; @@ -2726,34 +2020,20 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } -#ifdef __FreeBSD__ - struct ifnet *ifp; - - if ((ifp = ifunit(altq->ifname)) == NULL) { + if ((ifp = ifunit(altq->ifname)) == NULL) altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; - } else { - PF_UNLOCK(); -#endif - error = altq_add(altq); -#ifdef __FreeBSD__ - PF_LOCK(); - } -#endif + else + error = altq_add(altq); + if (error) { -#ifdef __FreeBSD__ - pool_put(&V_pf_altq_pl, altq); -#else - pool_put(&pf_altq_pl, altq); -#endif + PF_RULES_WUNLOCK(); + free(altq, M_PFALTQ); break; } -#ifdef __FreeBSD__ TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); -#else - TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries); -#endif bcopy(altq, &pa->altq, sizeof(struct pf_altq)); + PF_RULES_WUNLOCK(); break; } @@ -2761,16 +2041,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq; + PF_RULES_RLOCK(); pa->nr = 0; -#ifdef __FreeBSD__ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) pa->nr++; pa->ticket = V_ticket_altqs_active; -#else - TAILQ_FOREACH(altq, pf_altqs_active, entries) - pa->nr++; - pa->ticket = ticket_altqs_active; -#endif + PF_RULES_RUNLOCK(); break; } @@ -2779,29 +2055,25 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_altq *altq; u_int32_t nr; -#ifdef __FreeBSD__ + PF_RULES_RLOCK(); if (pa->ticket != V_ticket_altqs_active) { -#else - if (pa->ticket != ticket_altqs_active) { -#endif + PF_RULES_RUNLOCK(); error = EBUSY; break; } nr = 0; -#ifdef __FreeBSD__ altq = TAILQ_FIRST(V_pf_altqs_active); -#else - altq = TAILQ_FIRST(pf_altqs_active); -#endif while ((altq != NULL) && (nr < pa->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; } if (altq == NULL) { + PF_RULES_RUNLOCK(); error = EBUSY; break; } bcopy(altq, &pa->altq, sizeof(struct pf_altq)); + PF_RULES_RUNLOCK(); break; } @@ -2816,41 +2088,32 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) u_int32_t nr; int nbytes; -#ifdef __FreeBSD__ + PF_RULES_RLOCK(); if (pq->ticket != V_ticket_altqs_active) { -#else - if (pq->ticket != ticket_altqs_active) { -#endif + PF_RULES_RUNLOCK(); error = EBUSY; break; } nbytes = pq->nbytes; nr = 0; -#ifdef __FreeBSD__ altq = TAILQ_FIRST(V_pf_altqs_active); -#else - altq = TAILQ_FIRST(pf_altqs_active); -#endif while ((altq != NULL) && (nr < pq->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; } if (altq == NULL) { + PF_RULES_RUNLOCK(); error = EBUSY; break; } -#ifdef __FreeBSD__ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) { + PF_RULES_RUNLOCK(); error = ENXIO; break; } - PF_UNLOCK(); -#endif + PF_RULES_RUNLOCK(); error = altq_getqstats(altq, pq->buf, &nbytes); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif if (error == 0) { pq->scheduler = altq->scheduler; pq->nbytes = nbytes; @@ -2862,27 +2125,18 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCBEGINADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); pf_empty_pool(&V_pf_pabuf); pp->ticket = ++V_ticket_pabuf; -#else - pf_empty_pool(&pf_pabuf); - pp->ticket = ++ticket_pabuf; -#endif + PF_RULES_WUNLOCK(); break; } case DIOCADDADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + struct pf_pooladdr *pa; + struct pfi_kif *kif = NULL; -#ifdef __FreeBSD__ - if (pp->ticket != V_ticket_pabuf) { -#else - if (pp->ticket != ticket_pabuf) { -#endif - error = EBUSY; - break; - } #ifndef INET if (pp->af == AF_INET) { error = EAFNOSUPPORT; @@ -2901,70 +2155,68 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } -#ifdef __FreeBSD__ - pa = pool_get(&V_pf_pooladdr_pl, PR_NOWAIT); -#else - pa = pool_get(&pf_pooladdr_pl, PR_WAITOK|PR_LIMITFAIL); -#endif - if (pa == NULL) { - error = ENOMEM; + pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK); + bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); + if (pa->ifname[0]) + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); + PF_RULES_WLOCK(); + if (pp->ticket != V_ticket_pabuf) { + PF_RULES_WUNLOCK(); + if (pa->ifname[0]) + free(kif, PFI_MTYPE); + free(pa, M_PFRULE); + error = EBUSY; break; } - bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); if (pa->ifname[0]) { - pa->kif = pfi_kif_get(pa->ifname); - if (pa->kif == NULL) { -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, pa); -#else - pool_put(&pf_pooladdr_pl, pa); -#endif - error = EINVAL; - break; - } - pfi_kif_ref(pa->kif, PFI_KIF_REF_RULE); - } - if (pfi_dynaddr_setup(&pa->addr, pp->af)) { - pfi_dynaddr_remove(&pa->addr); - pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE); -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, pa); -#else - pool_put(&pf_pooladdr_pl, pa); -#endif - error = EINVAL; + pa->kif = pfi_kif_attach(kif, pa->ifname); + pfi_kif_ref(pa->kif); + } else + pa->kif = NULL; + if (pa->addr.type == PF_ADDR_DYNIFTL && ((error = + pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) { + if (pa->ifname[0]) + pfi_kif_unref(pa->kif); + PF_RULES_WUNLOCK(); + free(pa, M_PFRULE); break; } -#ifdef __FreeBSD__ TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries); -#else - TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries); -#endif + PF_RULES_WUNLOCK(); break; } case DIOCGETADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + struct pf_pool *pool; + struct pf_pooladdr *pa; + PF_RULES_RLOCK(); pp->nr = 0; pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 0); if (pool == NULL) { + PF_RULES_RUNLOCK(); error = EBUSY; break; } TAILQ_FOREACH(pa, &pool->list, entries) pp->nr++; + PF_RULES_RUNLOCK(); break; } case DIOCGETADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + struct pf_pool *pool; + struct pf_pooladdr *pa; u_int32_t nr = 0; + PF_RULES_RLOCK(); pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 1); if (pool == NULL) { + PF_RULES_RUNLOCK(); error = EBUSY; break; } @@ -2974,18 +2226,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) nr++; } if (pa == NULL) { + PF_RULES_RUNLOCK(); error = EBUSY; break; } bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); pf_addr_copyout(&pp->addr.addr); + PF_RULES_RUNLOCK(); break; } case DIOCCHANGEADDR: { struct pfioc_pooladdr *pca = (struct pfioc_pooladdr *)addr; + struct pf_pool *pool; struct pf_pooladdr *oldpa = NULL, *newpa = NULL; struct pf_ruleset *ruleset; + struct pfi_kif *kif = NULL; if (pca->action < PF_CHANGE_ADD_HEAD || pca->action > PF_CHANGE_REMOVE) { @@ -2999,76 +2255,60 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } - ruleset = pf_find_ruleset(pca->anchor); - if (ruleset == NULL) { - error = EBUSY; - break; - } - pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action, - pca->r_num, pca->r_last, 1, 1); - if (pool == NULL) { - error = EBUSY; - break; - } if (pca->action != PF_CHANGE_REMOVE) { -#ifdef __FreeBSD__ - newpa = pool_get(&V_pf_pooladdr_pl, - PR_NOWAIT); -#else - newpa = pool_get(&pf_pooladdr_pl, - PR_WAITOK|PR_LIMITFAIL); -#endif - if (newpa == NULL) { - error = ENOMEM; - break; - } - bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); #ifndef INET if (pca->af == AF_INET) { -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, newpa); -#else - pool_put(&pf_pooladdr_pl, newpa); -#endif error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pca->af == AF_INET6) { -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, newpa); -#else - pool_put(&pf_pooladdr_pl, newpa); -#endif error = EAFNOSUPPORT; break; } #endif /* INET6 */ + newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK); + bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); + if (newpa->ifname[0]) + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); + } + +#define ERROUT(x) { error = (x); goto DIOCCHANGEADDR_error; } + PF_RULES_WLOCK(); + ruleset = pf_find_ruleset(pca->anchor); + if (ruleset == NULL) + ERROUT(EBUSY); + + pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action, + pca->r_num, pca->r_last, 1, 1); + if (pool == NULL) + ERROUT(EBUSY); + + if (pca->action != PF_CHANGE_REMOVE) { if (newpa->ifname[0]) { - newpa->kif = pfi_kif_get(newpa->ifname); - if (newpa->kif == NULL) { -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, newpa); -#else - pool_put(&pf_pooladdr_pl, newpa); -#endif - error = EINVAL; - break; - } - pfi_kif_ref(newpa->kif, PFI_KIF_REF_RULE); + newpa->kif = pfi_kif_attach(kif, newpa->ifname); + pfi_kif_ref(newpa->kif); } else newpa->kif = NULL; - if (pfi_dynaddr_setup(&newpa->addr, pca->af) || - pf_tbladdr_setup(ruleset, &newpa->addr)) { - pfi_dynaddr_remove(&newpa->addr); - pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE); -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, newpa); -#else - pool_put(&pf_pooladdr_pl, newpa); -#endif - error = EINVAL; + + switch (newpa->addr.type) { + case PF_ADDR_DYNIFTL: + error = pfi_dynaddr_setup(&newpa->addr, + pca->af); + break; + case PF_ADDR_TABLE: + newpa->addr.p.tbl = pfr_attach_table(ruleset, + newpa->addr.v.tblname); + if (newpa->addr.p.tbl == NULL) + error = ENOMEM; + break; + } + if (error) { + if (newpa->kif) + pfi_kif_unref(newpa->kif); + PF_RULES_WUNLOCK(); + free(newpa, M_PFRULE); break; } } @@ -3086,6 +2326,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) i++; } if (oldpa == NULL) { + PF_RULES_WUNLOCK(); error = EINVAL; break; } @@ -3093,14 +2334,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pca->action == PF_CHANGE_REMOVE) { TAILQ_REMOVE(&pool->list, oldpa, entries); - pfi_dynaddr_remove(&oldpa->addr); - pf_tbladdr_remove(&oldpa->addr); - pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE); -#ifdef __FreeBSD__ - pool_put(&V_pf_pooladdr_pl, oldpa); -#else - pool_put(&pf_pooladdr_pl, oldpa); -#endif + switch (oldpa->addr.type) { + case PF_ADDR_DYNIFTL: + pfi_dynaddr_remove(oldpa->addr.p.dyn); + break; + case PF_ADDR_TABLE: + pfr_detach_table(oldpa->addr.p.tbl); + break; + } + if (oldpa->kif) + pfi_kif_unref(oldpa->kif); + free(oldpa, M_PFRULE); } else { if (oldpa == NULL) TAILQ_INSERT_TAIL(&pool->list, newpa, entries); @@ -3115,6 +2359,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pool->cur = TAILQ_FIRST(&pool->list); PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); + PF_RULES_WUNLOCK(); + break; + +#undef ERROUT +DIOCCHANGEADDR_error: + PF_RULES_WUNLOCK(); + if (newpa != NULL) + free(newpa, M_PFRULE); + if (kif != NULL) + free(kif, PFI_MTYPE); break; } @@ -3123,19 +2377,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_ruleset *ruleset; struct pf_anchor *anchor; + PF_RULES_RLOCK(); pr->path[sizeof(pr->path) - 1] = 0; if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { - error = EINVAL; + PF_RULES_RUNLOCK(); + error = ENOENT; break; } pr->nr = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ -#ifdef __FreeBSD__ RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors) -#else - RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) -#endif if (anchor->parent == NULL) pr->nr++; } else { @@ -3143,6 +2395,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) &ruleset->anchor->children) pr->nr++; } + PF_RULES_RUNLOCK(); break; } @@ -3152,19 +2405,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_anchor *anchor; u_int32_t nr = 0; + PF_RULES_RLOCK(); pr->path[sizeof(pr->path) - 1] = 0; if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { - error = EINVAL; + PF_RULES_RUNLOCK(); + error = ENOENT; break; } pr->name[0] = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ -#ifdef __FreeBSD__ RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors) -#else - RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) -#endif if (anchor->parent == NULL && nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); @@ -3181,6 +2432,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (!pr->name[0]) error = EBUSY; + PF_RULES_RUNLOCK(); break; } @@ -3191,81 +2443,149 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = ENODEV; break; } + PF_RULES_WLOCK(); error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); break; } case DIOCRADDTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_table *pfrts; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } - error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size, + totlen = io->pfrio_size * sizeof(struct pfr_table); + pfrts = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfrts, totlen); + if (error) { + free(pfrts, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_add_tables(pfrts, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + free(pfrts, M_TEMP); break; } case DIOCRDELTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_table *pfrts; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } - error = pfr_del_tables(io->pfrio_buffer, io->pfrio_size, + totlen = io->pfrio_size * sizeof(struct pfr_table); + pfrts = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfrts, totlen); + if (error) { + free(pfrts, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_del_tables(pfrts, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + free(pfrts, M_TEMP); break; } case DIOCRGETTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_table *pfrts; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } - error = pfr_get_tables(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_table); + pfrts = malloc(totlen, M_TEMP, M_WAITOK); + PF_RULES_RLOCK(); + error = pfr_get_tables(&io->pfrio_table, pfrts, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error == 0) + error = copyout(pfrts, io->pfrio_buffer, totlen); + free(pfrts, M_TEMP); break; } case DIOCRGETTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_tstats *pfrtstats; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_tstats)) { error = ENODEV; break; } - error = pfr_get_tstats(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_tstats); + pfrtstats = malloc(totlen, M_TEMP, M_WAITOK); + PF_RULES_WLOCK(); + error = pfr_get_tstats(&io->pfrio_table, pfrtstats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + if (error == 0) + error = copyout(pfrtstats, io->pfrio_buffer, totlen); + free(pfrtstats, M_TEMP); break; } case DIOCRCLRTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_table *pfrts; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } - error = pfr_clr_tstats(io->pfrio_buffer, io->pfrio_size, + totlen = io->pfrio_size * sizeof(struct pfr_table); + pfrts = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfrts, totlen); + if (error) { + free(pfrts, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_clr_tstats(pfrts, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + free(pfrts, M_TEMP); break; } case DIOCRSETTFLAGS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_table *pfrts; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } - error = pfr_set_tflags(io->pfrio_buffer, io->pfrio_size, + totlen = io->pfrio_size * sizeof(struct pfr_table); + pfrts = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfrts, totlen); + if (error) { + free(pfrts, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_set_tflags(pfrts, io->pfrio_size, io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + free(pfrts, M_TEMP); break; } @@ -3276,332 +2596,398 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = ENODEV; break; } + PF_RULES_WLOCK(); error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); break; } case DIOCRADDADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_add_addrs(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_add_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRDELADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_del_addrs(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_del_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRSETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer, + totlen = (io->pfrio_size + io->pfrio_size2) * + sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_set_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | PFR_FLAG_USERIOCTL, 0); + PF_RULES_WUNLOCK(); + if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRGETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_get_addrs(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + PF_RULES_RLOCK(); + error = pfr_get_addrs(&io->pfrio_table, pfras, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error == 0) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRGETASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_astats *pfrastats; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_astats)) { error = ENODEV; break; } - error = pfr_get_astats(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_astats); + pfrastats = malloc(totlen, M_TEMP, M_WAITOK); + PF_RULES_RLOCK(); + error = pfr_get_astats(&io->pfrio_table, pfrastats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error == 0) + error = copyout(pfrastats, io->pfrio_buffer, totlen); + free(pfrastats, M_TEMP); break; } case DIOCRCLRASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_clr_astats(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_clr_astats(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRTSTADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_tst_addrs(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_RLOCK(); + error = pfr_tst_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error == 0) + error = copyout(pfras, io->pfrio_buffer, totlen); + free(pfras, M_TEMP); break; } case DIOCRINADEFINE: { struct pfioc_table *io = (struct pfioc_table *)addr; + struct pfr_addr *pfras; + size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } - error = pfr_ina_define(&io->pfrio_table, io->pfrio_buffer, + totlen = io->pfrio_size * sizeof(struct pfr_addr); + pfras = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->pfrio_buffer, pfras, totlen); + if (error) { + free(pfras, M_TEMP); + break; + } + PF_RULES_WLOCK(); + error = pfr_ina_define(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_WUNLOCK(); + free(pfras, M_TEMP); break; } case DIOCOSFPADD: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; + PF_RULES_WLOCK(); error = pf_osfp_add(io); + PF_RULES_WUNLOCK(); break; } case DIOCOSFPGET: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; + PF_RULES_RLOCK(); error = pf_osfp_get(io); + PF_RULES_RUNLOCK(); break; } case DIOCXBEGIN: { struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e *ioe; - struct pfr_table *table; + struct pfioc_trans_e *ioes, *ioe; + size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; - goto fail; + break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); - table = malloc(sizeof(*table), M_TEMP, M_WAITOK); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - for (i = 0; i < io->size; i++) { -#ifdef __FreeBSD__ - PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); + totlen = sizeof(struct pfioc_trans_e) * io->size; + ioes = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->array, ioes, totlen); if (error) { -#else - if (copyin(io->array+i, ioe, sizeof(*ioe))) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); - error = EFAULT; - goto fail; - } + free(ioes, M_TEMP); + break; + } + PF_RULES_WLOCK(); + for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EINVAL; goto fail; } if ((error = pf_begin_altq(&ioe->ticket))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(table, sizeof(*table)); - strlcpy(table->pfrt_anchor, ioe->anchor, - sizeof(table->pfrt_anchor)); - if ((error = pfr_ina_begin(table, + { + struct pfr_table table; + + bzero(&table, sizeof(table)); + strlcpy(table.pfrt_anchor, ioe->anchor, + sizeof(table.pfrt_anchor)); + if ((error = pfr_ina_begin(&table, &ioe->ticket, NULL, 0))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; } break; + } default: if ((error = pf_begin_rules(&ioe->ticket, ioe->rs_num, ioe->anchor))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; } break; } -#ifdef __FreeBSD__ - PF_COPYOUT(ioe, io->array+i, sizeof(io->array[i]), - error); - if (error) { -#else - if (copyout(ioe, io->array+i, sizeof(io->array[i]))) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); - error = EFAULT; - goto fail; - } } - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + error = copyout(ioes, io->array, totlen); + free(ioes, M_TEMP); break; } case DIOCXROLLBACK: { struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e *ioe; - struct pfr_table *table; + struct pfioc_trans_e *ioe, *ioes; + size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; - goto fail; + break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); - table = malloc(sizeof(*table), M_TEMP, M_WAITOK); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - for (i = 0; i < io->size; i++) { -#ifdef __FreeBSD__ - PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); - if (error) { -#else - if (copyin(io->array+i, ioe, sizeof(*ioe))) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); - error = EFAULT; - goto fail; - } + totlen = sizeof(struct pfioc_trans_e) * io->size; + ioes = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->array, ioes, totlen); + if (error) { + free(ioes, M_TEMP); + break; + } + PF_RULES_WLOCK(); + for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EINVAL; goto fail; } if ((error = pf_rollback_altq(ioe->ticket))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(table, sizeof(*table)); - strlcpy(table->pfrt_anchor, ioe->anchor, - sizeof(table->pfrt_anchor)); - if ((error = pfr_ina_rollback(table, + { + struct pfr_table table; + + bzero(&table, sizeof(table)); + strlcpy(table.pfrt_anchor, ioe->anchor, + sizeof(table.pfrt_anchor)); + if ((error = pfr_ina_rollback(&table, ioe->ticket, NULL, 0))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; + } default: if ((error = pf_rollback_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; } } - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); break; } case DIOCXCOMMIT: { struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e *ioe; - struct pfr_table *table; + struct pfioc_trans_e *ioe, *ioes; struct pf_ruleset *rs; + size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; - goto fail; + break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK); - table = malloc(sizeof(*table), M_TEMP, M_WAITOK); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - /* first makes sure everything will succeed */ - for (i = 0; i < io->size; i++) { -#ifdef __FreeBSD__ - PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); - if (error) { -#else - if (copyin(io->array+i, ioe, sizeof(*ioe))) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); - error = EFAULT; - goto fail; - } + totlen = sizeof(struct pfioc_trans_e) * io->size; + ioes = malloc(totlen, M_TEMP, M_WAITOK); + error = copyin(io->array, ioes, totlen); + if (error) { + free(ioes, M_TEMP); + break; + } + PF_RULES_WLOCK(); + /* First makes sure everything will succeed. */ + for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EINVAL; goto fail; } -#ifdef __FreeBSD__ if (!V_altqs_inactive_open || ioe->ticket != V_ticket_altqs_inactive) { -#else - if (!altqs_inactive_open || ioe->ticket != - ticket_altqs_inactive) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EBUSY; goto fail; } @@ -3611,8 +2997,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EBUSY; goto fail; } @@ -3620,8 +3006,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) default: if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EINVAL; goto fail; } @@ -3630,175 +3016,141 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) !rs->rules[ioe->rs_num].inactive.open || rs->rules[ioe->rs_num].inactive.ticket != ioe->ticket) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); error = EBUSY; goto fail; } break; } } - /* now do the commit - no errors should happen here */ - for (i = 0; i < io->size; i++) { -#ifdef __FreeBSD__ - PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error); - if (error) { -#else - if (copyin(io->array+i, ioe, sizeof(*ioe))) { -#endif - free(table, M_TEMP); - free(ioe, M_TEMP); - error = EFAULT; - goto fail; - } + /* Now do the commit - no errors should happen here. */ + for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if ((error = pf_commit_altq(ioe->ticket))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(table, sizeof(*table)); - strlcpy(table->pfrt_anchor, ioe->anchor, - sizeof(table->pfrt_anchor)); - if ((error = pfr_ina_commit(table, ioe->ticket, - NULL, NULL, 0))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + { + struct pfr_table table; + + bzero(&table, sizeof(table)); + strlcpy(table.pfrt_anchor, ioe->anchor, + sizeof(table.pfrt_anchor)); + if ((error = pfr_ina_commit(&table, + ioe->ticket, NULL, NULL, 0))) { + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; + } default: if ((error = pf_commit_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); goto fail; /* really bad */ } break; } } - free(table, M_TEMP); - free(ioe, M_TEMP); + PF_RULES_WUNLOCK(); + free(ioes, M_TEMP); break; } case DIOCGETSRCNODES: { struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; + struct pf_srchash *sh; struct pf_src_node *n, *p, *pstore; - u_int32_t nr = 0; - int space = psn->psn_len; - - if (space == 0) { -#ifdef __FreeBSD__ - RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) -#else - RB_FOREACH(n, pf_src_tree, &tree_src_tracking) -#endif - nr++; + uint32_t i, nr = 0; + + if (psn->psn_len == 0) { + for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask; + i++, sh++) { + PF_HASHROW_LOCK(sh); + LIST_FOREACH(n, &sh->nodes, entry) + nr++; + PF_HASHROW_UNLOCK(sh); + } psn->psn_len = sizeof(struct pf_src_node) * nr; break; } -#ifdef __FreeBSD__ - PF_UNLOCK(); -#endif - pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK); -#ifdef __FreeBSD__ - PF_LOCK(); -#endif - p = psn->psn_src_nodes; -#ifdef __FreeBSD__ - RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { -#else - RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { -#endif - int secs = time_second, diff; + p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK); + for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask; + i++, sh++) { + PF_HASHROW_LOCK(sh); + LIST_FOREACH(n, &sh->nodes, entry) { + int secs = time_uptime, diff; if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) break; - bcopy(n, pstore, sizeof(*pstore)); + bcopy(n, p, sizeof(struct pf_src_node)); if (n->rule.ptr != NULL) - pstore->rule.nr = n->rule.ptr->nr; - pstore->creation = secs - pstore->creation; - if (pstore->expire > secs) - pstore->expire -= secs; + p->rule.nr = n->rule.ptr->nr; + p->creation = secs - p->creation; + if (p->expire > secs) + p->expire -= secs; else - pstore->expire = 0; + p->expire = 0; - /* adjust the connection rate estimate */ + /* Adjust the connection rate estimate. */ diff = secs - n->conn_rate.last; if (diff >= n->conn_rate.seconds) - pstore->conn_rate.count = 0; + p->conn_rate.count = 0; else - pstore->conn_rate.count -= + p->conn_rate.count -= n->conn_rate.count * diff / n->conn_rate.seconds; - -#ifdef __FreeBSD__ - PF_COPYOUT(pstore, p, sizeof(*p), error); -#else - error = copyout(pstore, p, sizeof(*p)); -#endif - if (error) { - free(pstore, M_TEMP); - goto fail; - } p++; nr++; + } + PF_HASHROW_UNLOCK(sh); + } + error = copyout(pstore, psn->psn_src_nodes, + sizeof(struct pf_src_node) * nr); + if (error) { + free(pstore, M_TEMP); + break; } psn->psn_len = sizeof(struct pf_src_node) * nr; - free(pstore, M_TEMP); break; } case DIOCCLRSRCNODES: { - struct pf_src_node *n; - struct pf_state *state; -#ifdef __FreeBSD__ - RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { -#else - RB_FOREACH(state, pf_state_tree_id, &tree_id) { -#endif - state->src_node = NULL; - state->nat_src_node = NULL; - } -#ifdef __FreeBSD__ - RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { -#else - RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { -#endif - n->expire = 1; - n->states = 0; - } - pf_purge_expired_src_nodes(1); -#ifdef __FreeBSD__ + pf_clear_srcnodes(NULL); + pf_purge_expired_src_nodes(); V_pf_status.src_nodes = 0; -#else - pf_status.src_nodes = 0; -#endif break; } case DIOCKILLSRCNODES: { - struct pf_src_node *sn; - struct pf_state *s; struct pfioc_src_node_kill *psnk = (struct pfioc_src_node_kill *)addr; - u_int killed = 0; - -#ifdef __FreeBSD__ - RB_FOREACH(sn, pf_src_tree, &V_tree_src_tracking) { -#else - RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) { -#endif + struct pf_srchash *sh; + struct pf_src_node *sn; + u_int i, killed = 0; + + for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask; + i++, sh++) { + /* + * XXXGL: we don't ever acquire sources hash lock + * but if we ever do, the below call to pf_clear_srcnodes() + * would lead to a LOR. + */ + PF_HASHROW_LOCK(sh); + LIST_FOREACH(sn, &sh->nodes, entry) if (PF_MATCHA(psnk->psnk_src.neg, &psnk->psnk_src.addr.v.a.addr, &psnk->psnk_src.addr.v.a.mask, @@ -3808,27 +3160,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) &psnk->psnk_dst.addr.v.a.mask, &sn->raddr, sn->af)) { /* Handle state to src_node linkage */ - if (sn->states != 0) { - RB_FOREACH(s, pf_state_tree_id, -#ifdef __FreeBSD__ - &V_tree_id) { -#else - &tree_id) { -#endif - if (s->src_node == sn) - s->src_node = NULL; - if (s->nat_src_node == sn) - s->nat_src_node = NULL; - } - sn->states = 0; - } + if (sn->states != 0) + pf_clear_srcnodes(sn); sn->expire = 1; killed++; } + PF_HASHROW_UNLOCK(sh); } if (killed > 0) - pf_purge_expired_src_nodes(1); + pf_purge_expired_src_nodes(); psnk->psnk_killed = killed; break; @@ -3837,47 +3178,56 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; -#ifdef __FreeBSD__ + PF_RULES_WLOCK(); if (*hostid == 0) V_pf_status.hostid = arc4random(); else V_pf_status.hostid = *hostid; -#else - if (*hostid == 0) - pf_status.hostid = arc4random(); - else - pf_status.hostid = *hostid; -#endif + PF_RULES_WUNLOCK(); break; } case DIOCOSFPFLUSH: + PF_RULES_WLOCK(); pf_osfp_flush(); + PF_RULES_WUNLOCK(); break; case DIOCIGETIFACES: { struct pfioc_iface *io = (struct pfioc_iface *)addr; + struct pfi_kif *ifstore; + size_t bufsiz; if (io->pfiio_esize != sizeof(struct pfi_kif)) { error = ENODEV; break; } - error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer, - &io->pfiio_size); + + bufsiz = io->pfiio_size * sizeof(struct pfi_kif); + ifstore = malloc(bufsiz, M_TEMP, M_WAITOK); + PF_RULES_RLOCK(); + pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size); + PF_RULES_RUNLOCK(); + error = copyout(ifstore, io->pfiio_buffer, bufsiz); + free(ifstore, M_TEMP); break; } case DIOCSETIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; + PF_RULES_WLOCK(); error = pfi_set_flags(io->pfiio_name, io->pfiio_flags); + PF_RULES_WUNLOCK(); break; } case DIOCCLRIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; + PF_RULES_WLOCK(); error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags); + PF_RULES_WUNLOCK(); break; } @@ -3886,27 +3236,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } fail: -#ifdef __FreeBSD__ - PF_UNLOCK(); - - if (flags & FWRITE) - sx_xunlock(&V_pf_consistency_lock); - else - sx_sunlock(&V_pf_consistency_lock); -#else - splx(s); - if (flags & FWRITE) - rw_exit_write(&pf_consistency_lock); - else - rw_exit_read(&pf_consistency_lock); -#endif - CURVNET_RESTORE(); return (error); } -#ifdef __FreeBSD__ void pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) { @@ -3927,12 +3261,12 @@ pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) /* copy from state */ strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); - sp->creation = htonl(time_second - st->creation); + sp->creation = htonl(time_uptime - st->creation); sp->expire = pf_state_expires(st); - if (sp->expire <= time_second) + if (sp->expire <= time_uptime) sp->expire = htonl(0); else - sp->expire = htonl(sp->expire - time_second); + sp->expire = htonl(sp->expire - time_uptime); sp->direction = st->direction; sp->log = st->log; @@ -3943,7 +3277,7 @@ pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) if (st->nat_src_node) sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; - bcopy(&st->id, &sp->id, sizeof(sp->id)); + sp->id = st->id; sp->creatorid = st->creatorid; pf_state_peer_hton(&st->src, &sp->src); pf_state_peer_hton(&st->dst, &sp->dst); @@ -3968,33 +3302,43 @@ pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) } +static void +pf_tbladdr_copyout(struct pf_addr_wrap *aw) +{ + struct pfr_ktable *kt; + + KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type)); + + kt = aw->p.tbl; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + aw->p.tbl = NULL; + aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? + kt->pfrkt_cnt : -1; +} + /* * XXX - Check for version missmatch!!! */ static void pf_clear_states(void) { - struct pf_state *state; - -#ifdef __FreeBSD__ - RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { -#else - RB_FOREACH(state, pf_state_tree_id, &tree_id) { -#endif - state->timeout = PFTM_PURGE; -#if NPFSYNC - /* don't send out individual delete messages */ - state->sync_state = PFSTATE_NOSYNC; -#endif - pf_unlink_state(state); + struct pf_state *s; + u_int i; + + for (i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; +relock: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + s->timeout = PFTM_PURGE; + /* Don't send out individual delete messages. */ + s->sync_state = PFSTATE_NOSYNC; + pf_unlink_state(s, PF_ENTER_LOCKED); + goto relock; + } + PF_HASHROW_UNLOCK(ih); } - -#if 0 /* NPFSYNC */ -/* - * XXX This is called on module unload, we do not want to sync that over? */ - */ - pfsync_clear_states(V_pf_status.hostid, psk->psk_ifname); -#endif } static int @@ -4012,24 +3356,38 @@ pf_clear_tables(void) } static void -pf_clear_srcnodes(void) +pf_clear_srcnodes(struct pf_src_node *n) { - struct pf_src_node *n; - struct pf_state *state; + struct pf_state *s; + int i; -#ifdef __FreeBSD__ - RB_FOREACH(state, pf_state_tree_id, &V_tree_id) { -#else - RB_FOREACH(state, pf_state_tree_id, &tree_id) { -#endif - state->src_node = NULL; - state->nat_src_node = NULL; + for (i = 0; i <= V_pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + + PF_HASHROW_LOCK(ih); + LIST_FOREACH(s, &ih->states, entry) { + if (n == NULL || n == s->src_node) + s->src_node = NULL; + if (n == NULL || n == s->nat_src_node) + s->nat_src_node = NULL; + } + PF_HASHROW_UNLOCK(ih); } -#ifdef __FreeBSD__ - RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) { -#else - RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { -#endif + + if (n == NULL) { + struct pf_srchash *sh; + + for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask; + i++, sh++) { + PF_HASHROW_LOCK(sh); + LIST_FOREACH(n, &sh->nodes, entry) { + n->expire = 1; + n->states = 0; + } + PF_HASHROW_UNLOCK(sh); + } + } else { + /* XXX: hash slot should already be locked here. */ n->expire = 1; n->states = 0; } @@ -4047,7 +3405,7 @@ shutdown_pf(void) int error = 0; u_int32_t t[5]; char nn = '\0'; - + V_pf_status.running = 0; do { if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn)) @@ -4058,22 +3416,22 @@ shutdown_pf(void) if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n")); - break; /* XXX: rollback? */ + break; /* XXX: rollback? */ } /* XXX: these should always succeed here */ @@ -4086,17 +3444,17 @@ shutdown_pf(void) if ((error = pf_clear_tables()) != 0) break; - #ifdef ALTQ +#ifdef ALTQ if ((error = pf_begin_altq(&t[0])) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n")); break; } pf_commit_altq(t[0]); - #endif +#endif pf_clear_states(); - pf_clear_srcnodes(); + pf_clear_srcnodes(NULL); /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have thier own cleanup code */ @@ -4116,7 +3474,7 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, * in network stack. OpenBSD's network stack have converted * ip_len/ip_off to host byte order frist as FreeBSD. * Now this is not true anymore , so we should convert back to network - * byte order. + * byte order. */ struct ip *h = NULL; int chk; @@ -4128,7 +3486,7 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, HTONS(h->ip_off); } CURVNET_SET(ifp->if_vnet); - chk = pf_test(PF_IN, ifp, m, NULL, inp); + chk = pf_test(PF_IN, ifp, m, inp); CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); @@ -4153,7 +3511,7 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, * in network stack. OpenBSD's network stack have converted * ip_len/ip_off to host byte order frist as FreeBSD. * Now this is not true anymore , so we should convert back to network - * byte order. + * byte order. */ struct ip *h = NULL; int chk; @@ -4170,7 +3528,7 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, HTONS(h->ip_off); } CURVNET_SET(ifp->if_vnet); - chk = pf_test(PF_OUT, ifp, m, NULL, inp); + chk = pf_test(PF_OUT, ifp, m, inp); CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); @@ -4203,8 +3561,7 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, * filtering we have change this to lo0 as it is the case in IPv4. */ CURVNET_SET(ifp->if_vnet); - chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, - NULL, inp); + chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp); CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); @@ -4231,7 +3588,7 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } CURVNET_SET(ifp->if_vnet); - chk = pf_test6(PF_OUT, ifp, m, NULL, inp); + chk = pf_test6(PF_OUT, ifp, m, inp); CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); @@ -4251,10 +3608,8 @@ hook_pf(void) struct pfil_head *pfh_inet6; #endif - PF_UNLOCK_ASSERT(); - if (V_pf_pfil_hooked) - return (0); + return (0); #ifdef INET pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); @@ -4292,8 +3647,6 @@ dehook_pf(void) struct pfil_head *pfh_inet6; #endif - PF_UNLOCK_ASSERT(); - if (V_pf_pfil_hooked == 0) return (0); @@ -4323,6 +3676,8 @@ dehook_pf(void) static int pf_load(void) { + int error; + VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK(); @@ -4330,19 +3685,17 @@ pf_load(void) CURVNET_SET(vnet_iter); V_pf_pfil_hooked = 0; V_pf_end_threads = 0; - V_debug_pfugidhack = 0; TAILQ_INIT(&V_pf_tags); TAILQ_INIT(&V_pf_qids); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); - init_pf_mutex(); + rw_init(&pf_rules_lock, "pf rulesets"); + pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME); - init_zone_var(); - sx_init(&V_pf_consistency_lock, "pf_statetbl_lock"); - if (pfattach() < 0) - return (ENOMEM); + if ((error = pfattach()) != 0) + return (error); return (0); } @@ -4352,10 +3705,10 @@ pf_unload(void) { int error = 0; - PF_LOCK(); + PF_RULES_WLOCK(); V_pf_status.running = 0; - PF_UNLOCK(); - m_addr_chg_pf_p = NULL; + PF_RULES_WUNLOCK(); + swi_remove(V_pf_swi_cookie); error = dehook_pf(); if (error) { /* @@ -4366,22 +3719,23 @@ pf_unload(void) printf("%s : pfil unregisteration fail\n", __FUNCTION__); return error; } - PF_LOCK(); + PF_RULES_WLOCK(); shutdown_pf(); V_pf_end_threads = 1; while (V_pf_end_threads < 2) { wakeup_one(pf_purge_thread); - msleep(pf_purge_thread, &pf_task_mtx, 0, "pftmo", hz); + rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0); } + pf_normalize_cleanup(); pfi_cleanup(); + pfr_cleanup(); pf_osfp_flush(); - pf_osfp_cleanup(); - cleanup_pf_zone(); - PF_UNLOCK(); + pf_cleanup(); + PF_RULES_WUNLOCK(); destroy_dev(pf_dev); - destroy_pf_mutex(); - sx_destroy(&V_pf_consistency_lock); - return error; + rw_destroy(&pf_rules_lock); + + return (error); } static int @@ -4406,9 +3760,10 @@ pf_modevent(module_t mod, int type, void *data) error = EINVAL; break; } - return error; + + return (error); } - + static moduledata_t pf_mod = { "pf", pf_modevent, @@ -4417,4 +3772,3 @@ static moduledata_t pf_mod = { DECLARE_MODULE(pf, pf_mod, SI_SUB_PSEUDO, SI_ORDER_FIRST); MODULE_VERSION(pf, PF_MODVER); -#endif /* __FreeBSD__ */ diff --git a/sys/contrib/pf/net/pf_lb.c b/sys/contrib/pf/net/pf_lb.c index 4adc6f0..5b47852 100644 --- a/sys/contrib/pf/net/pf_lb.c +++ b/sys/contrib/pf/net/pf_lb.c @@ -35,136 +35,31 @@ * */ -#ifdef __FreeBSD__ -#include "opt_inet.h" -#include "opt_inet6.h" - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#endif -#ifdef __FreeBSD__ -#include "opt_bpf.h" #include "opt_pf.h" - -#ifdef DEV_BPF -#define NBPFILTER DEV_BPF -#else -#define NBPFILTER 0 -#endif - -#ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG -#else -#define NPFLOG 0 -#endif - -#ifdef DEV_PFSYNC -#define NPFSYNC DEV_PFSYNC -#else -#define NPFSYNC 0 -#endif - -#ifdef DEV_PFLOW -#define NPFLOW DEV_PFLOW -#else -#define NPFLOW 0 -#endif - -#else -#include "bpfilter.h" -#include "pflog.h" -#include "pfsync.h" -#include "pflow.h" -#endif +#include "opt_inet.h" +#include "opt_inet6.h" #include <sys/param.h> -#include <sys/systm.h> -#include <sys/mbuf.h> -#include <sys/filio.h> #include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/kernel.h> -#include <sys/time.h> -#ifdef __FreeBSD__ #include <sys/sysctl.h> -#endif -#ifndef __FreeBSD__ -#include <sys/pool.h> -#endif -#include <sys/proc.h> -#ifdef __FreeBSD__ -#include <sys/kthread.h> -#include <sys/lock.h> -#include <sys/sx.h> -#else -#include <sys/rwlock.h> -#endif - -#ifdef __FreeBSD__ -#include <sys/md5.h> -#else -#include <crypto/md5.h> -#endif #include <net/if.h> -#include <net/if_types.h> -#include <net/bpf.h> -#include <net/route.h> -#include <net/radix_mpath.h> - -#include <netinet/in.h> -#include <netinet/in_var.h> -#include <netinet/in_systm.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcp_seq.h> -#include <netinet/udp.h> -#include <netinet/ip_icmp.h> -#include <netinet/in_pcb.h> -#include <netinet/tcp_timer.h> -#include <netinet/tcp_var.h> -#include <netinet/udp_var.h> -#include <netinet/icmp_var.h> -#include <netinet/if_ether.h> - -#ifndef __FreeBSD__ -#include <dev/rndvar.h> -#endif #include <net/pfvar.h> #include <net/if_pflog.h> -#include <net/if_pflow.h> - -#if NPFSYNC > 0 -#include <net/if_pfsync.h> -#endif /* NPFSYNC > 0 */ - -#ifdef INET6 -#include <netinet/ip6.h> -#include <netinet/in_pcb.h> -#include <netinet/icmp6.h> -#include <netinet6/nd6.h> -#endif /* INET6 */ - +#include <net/pf_mtag.h> -#ifdef __FreeBSD__ #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x -#else -#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x -#endif - -/* - * Global variables - */ -void pf_hash(struct pf_addr *, struct pf_addr *, +static void pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); -struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, +static struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, int, int, struct pfi_kif *, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t, int); -int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, +static int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, struct pf_addr *, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, struct pf_src_node **); @@ -185,7 +80,7 @@ int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, /* * hash function based on bridge_hash in if_bridge.c */ -void +static void pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, struct pf_poolhashkey *key, sa_family_t af) { @@ -226,7 +121,7 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, } } -struct pf_rule * +static struct pf_rule * pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, struct pf_addr *daddr, u_int16_t dport, int rs_num) @@ -279,11 +174,8 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, !pf_match_port(dst->port_op, dst->port[0], dst->port[1], dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; -#ifdef __FreeBSD__ - else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) -#else - else if (r->match_tag && !pf_match_tag(m, r, &tag)) -#endif + else if (r->match_tag && !pf_match_tag(m, r, &tag, + pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, @@ -304,19 +196,19 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, NULL, NULL); } -#ifdef __FreeBSD__ - if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) -#else - if (pf_tag_packet(m, tag, rtableid)) -#endif + + if (tag > 0 && pf_tag_packet(m, pd, tag)) return (NULL); + if (rtableid >= 0) + M_SETFIB(m, rtableid); + if (rm != NULL && (rm->action == PF_NONAT || rm->action == PF_NORDR || rm->action == PF_NOBINAT)) return (NULL); return (rm); } -int +static int pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, @@ -370,20 +262,12 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, high = tmp; } /* low < high */ -#ifdef __FreeBSD__ cut = htonl(arc4random()) % (1 + high - low) + low; -#else - cut = arc4random_uniform(1 + high - low) + low; -#endif /* low <= cut <= high */ for (tmp = cut; tmp <= high; ++(tmp)) { key.port[0] = htons(tmp); if (pf_find_state_all(&key, PF_IN, NULL) == -#ifdef __FreeBSD__ NULL) { -#else - NULL && !in_baddynamic(tmp, proto)) { -#endif *nport = htons(tmp); return (0); } @@ -391,11 +275,7 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, for (tmp = cut - 1; tmp >= low; --(tmp)) { key.port[0] = htons(tmp); if (pf_find_state_all(&key, PF_IN, NULL) == -#ifdef __FreeBSD__ NULL) { -#else - NULL && !in_baddynamic(tmp, proto)) { -#endif *nport = htons(tmp); return (0); } @@ -422,38 +302,17 @@ int pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) { - unsigned char hash[16]; struct pf_pool *rpool = &r->rpool; - struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; - struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; - struct pf_pooladdr *acur = rpool->cur; - struct pf_src_node k; + struct pf_addr *raddr = NULL, *rmask = NULL; if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { - k.af = af; - PF_ACPY(&k.addr, saddr, af); - if (r->rule_flag & PFRULE_RULESRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) - k.rule.ptr = r; - else - k.rule.ptr = NULL; -#ifdef __FreeBSD__ - V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; - *sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k); -#else - pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; - *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); -#endif + *sn = pf_find_src_node(saddr, r, af, 0); if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { PF_ACPY(naddr, &(*sn)->raddr, af); -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif printf("pf_map_addr: src tracking maps "); - pf_print_host(&k.addr, 0, af); + pf_print_host(saddr, 0, af); printf(" to "); pf_print_host(naddr, 0, af); printf("\n"); @@ -542,31 +401,58 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, } break; case PF_POOL_SRCHASH: + { + unsigned char hash[16]; + pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); break; + } case PF_POOL_ROUNDROBIN: + { + struct pf_pooladdr *acur = rpool->cur; + + /* + * XXXGL: in the round-robin case we need to store + * the round-robin machine state in the rule, thus + * forwarding thread needs to modify rule. + * + * This is done w/o locking, because performance is assumed + * more important than round-robin precision. + * + * In the simpliest case we just update the "rpool->cur" + * pointer. However, if pool contains tables or dynamic + * addresses, then "tblidx" is also used to store machine + * state. Since "tblidx" is int, concurrent access to it can't + * lead to inconsistence, only to lost of precision. + * + * Things get worse, if table contains not hosts, but + * prefixes. In this case counter also stores machine state, + * and for IPv6 address, counter can't be updated atomically. + * Probably, using round-robin on a table containing IPv6 + * prefixes (or even IPv4) would cause a panic. + */ + if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) + &rpool->tblidx, &rpool->counter, af)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) + &rpool->tblidx, &rpool->counter, af)) goto get_addr; } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) goto get_addr; try_next: - if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) + if (TAILQ_NEXT(rpool->cur, entries) == NULL) rpool->cur = TAILQ_FIRST(&rpool->list); + else + rpool->cur = TAILQ_NEXT(rpool->cur, entries); if (rpool->cur->addr.type == PF_ADDR_TABLE) { rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) { + &rpool->tblidx, &rpool->counter, af)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; @@ -575,8 +461,7 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, - &raddr, &rmask, af)) { + &rpool->tblidx, &rpool->counter, af)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; @@ -594,15 +479,12 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, PF_ACPY(init_addr, naddr, af); PF_AINC(&rpool->counter, af); break; + } } if (*sn != NULL) PF_ACPY(&(*sn)->raddr, naddr, af); -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC && -#else - if (pf_status.debug >= PF_DEBUG_MISC && -#endif (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { printf("pf_map_addr: selected address "); pf_print_host(naddr, 0, af); @@ -615,13 +497,17 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, struct pf_rule * pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_src_node **sn, - struct pf_state_key **skw, struct pf_state_key **sks, struct pf_state_key **skp, struct pf_state_key **nkp, struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t sport, u_int16_t dport) { struct pf_rule *r = NULL; + struct pf_addr *naddr; + uint16_t *nport; + PF_RULES_RASSERT(); + KASSERT(*skp == NULL, ("*skp not NULL")); + KASSERT(*nkp == NULL, ("*nkp not NULL")); if (direction == PF_OUT) { r = pf_match_translation(pd, m, off, direction, kif, saddr, @@ -637,157 +523,141 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, saddr, sport, daddr, dport, PF_RULESET_BINAT); } - if (r != NULL) { - struct pf_addr *naddr; - u_int16_t *nport; - - if (pf_state_key_setup(pd, r, skw, sks, skp, nkp, - saddr, daddr, sport, dport)) - return r; - - /* XXX We only modify one side for now. */ - naddr = &(*nkp)->addr[1]; - nport = &(*nkp)->port[1]; - - switch (r->action) { - case PF_NONAT: - case PF_NOBINAT: - case PF_NORDR: - return (NULL); - case PF_NAT: - if (pf_get_sport(pd->af, pd->proto, r, saddr, - daddr, dport, naddr, nport, r->rpool.proxy_port[0], - r->rpool.proxy_port[1], sn)) { - DPFPRINTF(PF_DEBUG_MISC, - ("pf: NAT proxy port allocation " - "(%u-%u) failed\n", - r->rpool.proxy_port[0], - r->rpool.proxy_port[1])); - return (NULL); - } - break; - case PF_BINAT: - switch (direction) { - case PF_OUT: - if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ - switch (pd->af) { + if (r == NULL) + return (NULL); + + switch (r->action) { + case PF_NONAT: + case PF_NOBINAT: + case PF_NORDR: + return (NULL); + } + + *skp = pf_state_key_setup(pd, saddr, daddr, sport, dport); + if (*skp == NULL) + return (NULL); + *nkp = pf_state_key_clone(*skp); + if (*nkp == NULL) { + uma_zfree(V_pf_state_key_z, skp); + *skp = NULL; + return (NULL); + } + + /* XXX We only modify one side for now. */ + naddr = &(*nkp)->addr[1]; + nport = &(*nkp)->port[1]; + + switch (r->action) { + case PF_NAT: + if (pf_get_sport(pd->af, pd->proto, r, saddr, daddr, dport, + naddr, nport, r->rpool.proxy_port[0], + r->rpool.proxy_port[1], sn)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: NAT proxy port allocation (%u-%u) failed\n", + r->rpool.proxy_port[0], r->rpool.proxy_port[1])); + goto notrans; + } + break; + case PF_BINAT: + switch (direction) { + case PF_OUT: + if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ + switch (pd->af) { #ifdef INET - case AF_INET: - if (r->rpool.cur->addr.p.dyn-> - pfid_acnt4 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->rpool.cur->addr.p.dyn-> - pfid_addr4, - &r->rpool.cur->addr.p.dyn-> - pfid_mask4, - saddr, AF_INET); - break; + case AF_INET: + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt4 < 1) + goto notrans; + PF_POOLMASK(naddr, + &r->rpool.cur->addr.p.dyn-> + pfid_addr4, + &r->rpool.cur->addr.p.dyn-> + pfid_mask4, saddr, AF_INET); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - if (r->rpool.cur->addr.p.dyn-> - pfid_acnt6 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->rpool.cur->addr.p.dyn-> - pfid_addr6, - &r->rpool.cur->addr.p.dyn-> - pfid_mask6, - saddr, AF_INET6); - break; -#endif /* INET6 */ - } - } else + case AF_INET6: + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt6 < 1) + goto notrans; PF_POOLMASK(naddr, - &r->rpool.cur->addr.v.a.addr, - &r->rpool.cur->addr.v.a.mask, - saddr, pd->af); - break; - case PF_IN: - if (r->src.addr.type == PF_ADDR_DYNIFTL) { - switch (pd->af) { + &r->rpool.cur->addr.p.dyn-> + pfid_addr6, + &r->rpool.cur->addr.p.dyn-> + pfid_mask6, saddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else + PF_POOLMASK(naddr, + &r->rpool.cur->addr.v.a.addr, + &r->rpool.cur->addr.v.a.mask, saddr, + pd->af); + break; + case PF_IN: + if (r->src.addr.type == PF_ADDR_DYNIFTL) { + switch (pd->af) { #ifdef INET - case AF_INET: - if (r->src.addr.p.dyn-> - pfid_acnt4 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->src.addr.p.dyn-> - pfid_addr4, - &r->src.addr.p.dyn-> - pfid_mask4, - daddr, AF_INET); - break; + case AF_INET: + if (r->src.addr.p.dyn-> pfid_acnt4 < 1) + goto notrans; + PF_POOLMASK(naddr, + &r->src.addr.p.dyn->pfid_addr4, + &r->src.addr.p.dyn->pfid_mask4, + daddr, AF_INET); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - if (r->src.addr.p.dyn-> - pfid_acnt6 < 1) - return (NULL); - PF_POOLMASK(naddr, - &r->src.addr.p.dyn-> - pfid_addr6, - &r->src.addr.p.dyn-> - pfid_mask6, - daddr, AF_INET6); - break; -#endif /* INET6 */ - } - } else + case AF_INET6: + if (r->src.addr.p.dyn->pfid_acnt6 < 1) + goto notrans; PF_POOLMASK(naddr, - &r->src.addr.v.a.addr, - &r->src.addr.v.a.mask, daddr, - pd->af); - break; - } - break; - case PF_RDR: { - if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) - return (NULL); - if ((r->rpool.opts & PF_POOL_TYPEMASK) == - PF_POOL_BITMASK) - PF_POOLMASK(naddr, naddr, - &r->rpool.cur->addr.v.a.mask, daddr, - pd->af); - - if (r->rpool.proxy_port[1]) { - u_int32_t tmp_nport; - - tmp_nport = ((ntohs(dport) - - ntohs(r->dst.port[0])) % - (r->rpool.proxy_port[1] - - r->rpool.proxy_port[0] + 1)) + - r->rpool.proxy_port[0]; - - /* wrap around if necessary */ - if (tmp_nport > 65535) - tmp_nport -= 65535; - *nport = htons((u_int16_t)tmp_nport); - } else if (r->rpool.proxy_port[0]) - *nport = htons(r->rpool.proxy_port[0]); + &r->src.addr.p.dyn->pfid_addr6, + &r->src.addr.p.dyn->pfid_mask6, + daddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else + PF_POOLMASK(naddr, &r->src.addr.v.a.addr, + &r->src.addr.v.a.mask, daddr, pd->af); break; } - default: - return (NULL); - } - /* - * Translation was a NOP. - * Pretend there was no match. - */ - if (!bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) { -#ifdef __FreeBSD__ - pool_put(&V_pf_state_key_pl, *nkp); - pool_put(&V_pf_state_key_pl, *skp); -#else - pool_put(&pf_state_key_pl, *nkp); - pool_put(&pf_state_key_pl, *skp); -#endif - *skw = *sks = *nkp = *skp = NULL; - return (NULL); - } + break; + case PF_RDR: { + if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) + goto notrans; + if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) + PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask, + daddr, pd->af); + + if (r->rpool.proxy_port[1]) { + uint32_t tmp_nport; + + tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) % + (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] + + 1)) + r->rpool.proxy_port[0]; + + /* Wrap around if necessary. */ + if (tmp_nport > 65535) + tmp_nport -= 65535; + *nport = htons((uint16_t)tmp_nport); + } else if (r->rpool.proxy_port[0]) + *nport = htons(r->rpool.proxy_port[0]); + break; + } + default: + panic("%s: unknown action %u", __func__, r->action); } - return (r); -} + /* Return success only if translation really happened. */ + if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) + return (r); +notrans: + uma_zfree(V_pf_state_key_z, *nkp); + uma_zfree(V_pf_state_key_z, *skp); + *skp = *nkp = NULL; + + return (NULL); +} diff --git a/sys/contrib/pf/net/pf_mtag.h b/sys/contrib/pf/net/pf_mtag.h index 141a867..baff00a 100644 --- a/sys/contrib/pf/net/pf_mtag.h +++ b/sys/contrib/pf/net/pf_mtag.h @@ -42,17 +42,12 @@ struct pf_mtag { void *hdr; /* saved hdr pos in mbuf, for ECN */ - void *statekey; /* pf stackside statekey */ u_int32_t qid; /* queue id */ - u_int rtableid; /* alternate routing table id */ u_int16_t tag; /* tag id */ u_int8_t flags; u_int8_t routed; }; -static __inline struct pf_mtag *pf_find_mtag(struct mbuf *); -static __inline struct pf_mtag *pf_get_mtag(struct mbuf *); - static __inline struct pf_mtag * pf_find_mtag(struct mbuf *m) { @@ -63,22 +58,5 @@ pf_find_mtag(struct mbuf *m) return ((struct pf_mtag *)(mtag + 1)); } - -static __inline struct pf_mtag * -pf_get_mtag(struct mbuf *m) -{ - struct m_tag *mtag; - - if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) { - mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag), - M_NOWAIT); - if (mtag == NULL) - return (NULL); - bzero(mtag + 1, sizeof(struct pf_mtag)); - m_tag_prepend(m, mtag); - } - - return ((struct pf_mtag *)(mtag + 1)); -} #endif /* _KERNEL */ #endif /* _NET_PF_MTAG_H_ */ diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c index 2b20c85..9063fe8 100644 --- a/sys/contrib/pf/net/pf_norm.c +++ b/sys/contrib/pf/net/pf_norm.c @@ -25,78 +25,56 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifdef __FreeBSD__ -#include "opt_inet.h" -#include "opt_inet6.h" -#include "opt_pf.h" - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#ifdef DEV_PFLOG -#define NPFLOG DEV_PFLOG -#else -#define NPFLOG 0 -#endif -#else -#include "pflog.h" -#endif +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_pf.h" #include <sys/param.h> -#include <sys/systm.h> +#include <sys/lock.h> #include <sys/mbuf.h> -#include <sys/filio.h> -#include <sys/fcntl.h> +#include <sys/mutex.h> +#include <sys/refcount.h> +#include <sys/rwlock.h> #include <sys/socket.h> -#include <sys/kernel.h> -#include <sys/time.h> -#ifndef __FreeBSD__ -#include <sys/pool.h> -#include <dev/rndvar.h> -#endif #include <net/if.h> -#include <net/if_types.h> -#include <net/bpf.h> -#include <net/route.h> +#include <net/vnet.h> +#include <net/pfvar.h> +#include <net/pf_mtag.h> #include <net/if_pflog.h> #include <netinet/in.h> -#include <netinet/in_var.h> -#include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/ip_var.h> #include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> #include <netinet/tcp_seq.h> -#include <netinet/udp.h> -#include <netinet/ip_icmp.h> #ifdef INET6 #include <netinet/ip6.h> #endif /* INET6 */ -#include <net/pfvar.h> - -#ifndef __FreeBSD__ struct pf_frent { LIST_ENTRY(pf_frent) fr_next; - struct ip *fr_ip; - struct mbuf *fr_m; -}; - -struct pf_frcache { - LIST_ENTRY(pf_frcache) fr_next; - uint16_t fr_off; - uint16_t fr_end; + union { + struct { + struct ip *_fr_ip; + struct mbuf *_fr_m; + } _frag; + struct { + uint16_t _fr_off; + uint16_t _fr_end; + } _cache; + } _u; }; -#endif - -#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ -#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ -#define PFFRAG_DROP 0x0004 /* Drop all fragments */ -#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) +#define fr_ip _u._frag._fr_ip +#define fr_m _u._frag._fr_m +#define fr_off _u._cache._fr_off +#define fr_end _u._cache._fr_end -#ifndef __FreeBSD__ struct pf_fragment { RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; @@ -104,145 +82,104 @@ struct pf_fragment { struct in_addr fr_dst; u_int8_t fr_p; /* protocol of this fragment */ u_int8_t fr_flags; /* status flags */ +#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ +#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ +#define PFFRAG_DROP 0x0004 /* Drop all fragments */ +#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) u_int16_t fr_id; /* fragment id for reassemble */ u_int16_t fr_max; /* fragment data max */ u_int32_t fr_timeout; -#define fr_queue fr_u.fru_queue -#define fr_cache fr_u.fru_cache - union { - LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ - LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ - } fr_u; + LIST_HEAD(, pf_frent) fr_queue; }; -#endif -#ifdef __FreeBSD__ +static struct mtx pf_frag_mtx; +#define PF_FRAG_LOCK() mtx_lock(&pf_frag_mtx) +#define PF_FRAG_UNLOCK() mtx_unlock(&pf_frag_mtx) +#define PF_FRAG_ASSERT() mtx_assert(&pf_frag_mtx, MA_OWNED) + +VNET_DEFINE(uma_zone_t, pf_state_scrub_z); /* XXX: shared with pfsync */ + +static VNET_DEFINE(uma_zone_t, pf_frent_z); +#define V_pf_frent_z VNET(pf_frent_z) +static VNET_DEFINE(uma_zone_t, pf_frag_z); +#define V_pf_frag_z VNET(pf_frag_z) + TAILQ_HEAD(pf_fragqueue, pf_fragment); TAILQ_HEAD(pf_cachequeue, pf_fragment); -VNET_DEFINE(struct pf_fragqueue, pf_fragqueue); +static VNET_DEFINE(struct pf_fragqueue, pf_fragqueue); #define V_pf_fragqueue VNET(pf_fragqueue) -VNET_DEFINE(struct pf_cachequeue, pf_cachequeue); +static VNET_DEFINE(struct pf_cachequeue, pf_cachequeue); #define V_pf_cachequeue VNET(pf_cachequeue) -#else -TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; -TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; -#endif - -#ifndef __FreeBSD__ -static __inline int pf_frag_compare(struct pf_fragment *, - struct pf_fragment *); -#else -static int pf_frag_compare(struct pf_fragment *, - struct pf_fragment *); -#endif - -#ifdef __FreeBSD__ RB_HEAD(pf_frag_tree, pf_fragment); -VNET_DEFINE(struct pf_frag_tree, pf_frag_tree); +static VNET_DEFINE(struct pf_frag_tree, pf_frag_tree); #define V_pf_frag_tree VNET(pf_frag_tree) -VNET_DEFINE(struct pf_frag_tree, pf_cache_tree); +static VNET_DEFINE(struct pf_frag_tree, pf_cache_tree); #define V_pf_cache_tree VNET(pf_cache_tree) -#else -RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; -#endif -RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); -RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); +static int pf_frag_compare(struct pf_fragment *, + struct pf_fragment *); +static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); +static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); /* Private prototypes */ -void pf_ip2key(struct pf_fragment *, struct ip *); -void pf_remove_fragment(struct pf_fragment *); -void pf_flush_fragments(void); -void pf_free_fragment(struct pf_fragment *); -struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); -struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, - struct pf_frent *, int); -struct mbuf *pf_fragcache(struct mbuf **, struct ip*, - struct pf_fragment **, int, int, int *); -int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, +static void pf_free_fragment(struct pf_fragment *); +static void pf_remove_fragment(struct pf_fragment *); +static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, struct tcphdr *, int, sa_family_t); -void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t, +#ifdef INET +static void pf_ip2key(struct pf_fragment *, struct ip *); +static void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t, u_int8_t); +static void pf_flush_fragments(void); +static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); +static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, + struct pf_frent *, int); +static struct mbuf *pf_fragcache(struct mbuf **, struct ip*, + struct pf_fragment **, int, int, int *); +#endif /* INET */ #ifdef INET6 -void pf_scrub_ip6(struct mbuf **, u_int8_t); +static void pf_scrub_ip6(struct mbuf **, u_int8_t); #endif -#ifdef __FreeBSD__ #define DPFPRINTF(x) do { \ if (V_pf_status.debug >= PF_DEBUG_MISC) { \ printf("%s: ", __func__); \ printf x ; \ } \ } while(0) -#else -#define DPFPRINTF(x) do { \ - if (pf_status.debug >= PF_DEBUG_MISC) { \ - printf("%s: ", __func__); \ - printf x ; \ - } \ -} while(0) -#endif - -/* Globals */ -#ifdef __FreeBSD__ -VNET_DEFINE(uma_zone_t, pf_frent_pl); -VNET_DEFINE(uma_zone_t, pf_frag_pl); -VNET_DEFINE(uma_zone_t, pf_cache_pl); -VNET_DEFINE(uma_zone_t, pf_cent_pl); -VNET_DEFINE(uma_zone_t, pf_state_scrub_pl); - -VNET_DEFINE(int, pf_nfrents); -#define V_pf_nfrents VNET(pf_nfrents) -VNET_DEFINE(int, pf_ncache); -#define V_pf_ncache VNET(pf_ncache) -#else -struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; -struct pool pf_state_scrub_pl; -int pf_nfrents, pf_ncache; -#endif void pf_normalize_init(void) { -#ifdef __FreeBSD__ - /* - * XXX - * No high water mark support(It's hint not hard limit). - * uma_zone_set_max(pf_frag_pl, PFFRAG_FRAG_HIWAT); - */ - uma_zone_set_max(V_pf_frent_pl, PFFRAG_FRENT_HIWAT); - uma_zone_set_max(V_pf_cache_pl, PFFRAG_FRCACHE_HIWAT); - uma_zone_set_max(V_pf_cent_pl, PFFRAG_FRCENT_HIWAT); -#else - pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", - NULL); - pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", - NULL); - pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, - "pffrcache", NULL); - pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", - NULL); - pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, - "pfstscr", NULL); - - pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); - pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); - pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); - pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); -#endif -#ifdef __FreeBSD__ + V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + V_pf_state_scrub_z = uma_zcreate("pf state scrubs", + sizeof(struct pf_state_scrub), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + + V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z; + V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; + uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT); + + mtx_init(&pf_frag_mtx, "pf fragments", NULL, MTX_DEF); + TAILQ_INIT(&V_pf_fragqueue); TAILQ_INIT(&V_pf_cachequeue); -#else - TAILQ_INIT(&pf_fragqueue); - TAILQ_INIT(&pf_cachequeue); -#endif } -#ifdef __FreeBSD__ +void +pf_normalize_cleanup(void) +{ + + uma_zdestroy(V_pf_state_scrub_z); + uma_zdestroy(V_pf_frent_z); + uma_zdestroy(V_pf_frag_z); + + mtx_destroy(&pf_frag_mtx); +} + static int -#else -static __inline int -#endif pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) { int diff; @@ -266,22 +203,13 @@ void pf_purge_expired_fragments(void) { struct pf_fragment *frag; -#ifdef __FreeBSD__ - u_int32_t expire = time_second - + u_int32_t expire = time_uptime - V_pf_default_rule.timeout[PFTM_FRAG]; -#else - u_int32_t expire = time_second - - pf_default_rule.timeout[PFTM_FRAG]; -#endif -#ifdef __FreeBSD__ + PF_FRAG_LOCK(); while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) { KASSERT((BUFFER_FRAGMENTS(frag)), ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__)); -#else - while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { - KASSERT(BUFFER_FRAGMENTS(frag)); -#endif if (frag->fr_timeout > expire) break; @@ -289,92 +217,56 @@ pf_purge_expired_fragments(void) pf_free_fragment(frag); } -#ifdef __FreeBSD__ while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) { KASSERT((!BUFFER_FRAGMENTS(frag)), ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__)); -#else - while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { - KASSERT(!BUFFER_FRAGMENTS(frag)); -#endif if (frag->fr_timeout > expire) break; DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); pf_free_fragment(frag); -#ifdef __FreeBSD__ KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) || TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag), ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s", __FUNCTION__)); -#else - KASSERT(TAILQ_EMPTY(&pf_cachequeue) || - TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); -#endif } + PF_FRAG_UNLOCK(); } +#ifdef INET /* * Try to flush old fragments to make space for new ones */ - -void +static void pf_flush_fragments(void) { - struct pf_fragment *frag; + struct pf_fragment *frag, *cache; int goal; -#ifdef __FreeBSD__ - goal = V_pf_nfrents * 9 / 10; - DPFPRINTF(("trying to free > %d frents\n", - V_pf_nfrents - goal)); - while (goal < V_pf_nfrents) { -#else - goal = pf_nfrents * 9 / 10; - DPFPRINTF(("trying to free > %d frents\n", - pf_nfrents - goal)); - while (goal < pf_nfrents) { -#endif -#ifdef __FreeBSD__ - frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue); -#else - frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); -#endif - if (frag == NULL) - break; - pf_free_fragment(frag); - } + PF_FRAG_ASSERT(); - -#ifdef __FreeBSD__ - goal = V_pf_ncache * 9 / 10; - DPFPRINTF(("trying to free > %d cache entries\n", - V_pf_ncache - goal)); - while (goal < V_pf_ncache) { -#else - goal = pf_ncache * 9 / 10; - DPFPRINTF(("trying to free > %d cache entries\n", - pf_ncache - goal)); - while (goal < pf_ncache) { -#endif -#ifdef __FreeBSD__ - frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue); -#else - frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); -#endif - if (frag == NULL) + goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10; + DPFPRINTF(("trying to free %d frag entriess\n", goal)); + while (goal < uma_zone_get_cur(V_pf_frent_z)) { + frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue); + if (frag) + pf_free_fragment(frag); + cache = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue); + if (cache) + pf_free_fragment(cache); + if (frag == NULL && cache == NULL) break; - pf_free_fragment(frag); } } +#endif /* INET */ /* Frees the fragments and all associated entries */ - -void +static void pf_free_fragment(struct pf_fragment *frag) { struct pf_frent *frent; - struct pf_frcache *frcache; + + PF_FRAG_ASSERT(); /* Free all fragments */ if (BUFFER_FRAGMENTS(frag)) { @@ -383,43 +275,28 @@ pf_free_fragment(struct pf_fragment *frag) LIST_REMOVE(frent, fr_next); m_freem(frent->fr_m); -#ifdef __FreeBSD__ - pool_put(&V_pf_frent_pl, frent); - V_pf_nfrents--; -#else - pool_put(&pf_frent_pl, frent); - pf_nfrents--; -#endif + uma_zfree(V_pf_frent_z, frent); } } else { - for (frcache = LIST_FIRST(&frag->fr_cache); frcache; - frcache = LIST_FIRST(&frag->fr_cache)) { - LIST_REMOVE(frcache, fr_next); - -#ifdef __FreeBSD__ - KASSERT((LIST_EMPTY(&frag->fr_cache) || - LIST_FIRST(&frag->fr_cache)->fr_off > - frcache->fr_end), - ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >" - " frcache->fr_end): %s", __FUNCTION__)); + for (frent = LIST_FIRST(&frag->fr_queue); frent; + frent = LIST_FIRST(&frag->fr_queue)) { + LIST_REMOVE(frent, fr_next); - pool_put(&V_pf_cent_pl, frcache); - V_pf_ncache--; -#else - KASSERT(LIST_EMPTY(&frag->fr_cache) || - LIST_FIRST(&frag->fr_cache)->fr_off > - frcache->fr_end); + KASSERT((LIST_EMPTY(&frag->fr_queue) || + LIST_FIRST(&frag->fr_queue)->fr_off > + frent->fr_end), + ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >" + " frent->fr_end): %s", __func__)); - pool_put(&pf_cent_pl, frcache); - pf_ncache--; -#endif + uma_zfree(V_pf_frent_z, frent); } } pf_remove_fragment(frag); } -void +#ifdef INET +static void pf_ip2key(struct pf_fragment *key, struct ip *ip) { key->fr_p = ip->ip_p; @@ -428,70 +305,55 @@ pf_ip2key(struct pf_fragment *key, struct ip *ip) key->fr_dst.s_addr = ip->ip_dst.s_addr; } -struct pf_fragment * +static struct pf_fragment * pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) { struct pf_fragment key; struct pf_fragment *frag; + PF_FRAG_ASSERT(); + pf_ip2key(&key, ip); frag = RB_FIND(pf_frag_tree, tree, &key); if (frag != NULL) { /* XXX Are we sure we want to update the timeout? */ - frag->fr_timeout = time_second; + frag->fr_timeout = time_uptime; if (BUFFER_FRAGMENTS(frag)) { -#ifdef __FreeBSD__ TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); -#else - TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); - TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); -#endif } else { -#ifdef __FreeBSD__ TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next); TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next); -#else - TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); - TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); -#endif } } return (frag); } +#endif /* INET */ /* Removes a fragment from the fragment queue and frees the fragment */ -void +static void pf_remove_fragment(struct pf_fragment *frag) { + + PF_FRAG_ASSERT(); + if (BUFFER_FRAGMENTS(frag)) { -#ifdef __FreeBSD__ RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag); TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); - pool_put(&V_pf_frag_pl, frag); -#else - RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); - TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); - pool_put(&pf_frag_pl, frag); -#endif + uma_zfree(V_pf_frag_z, frag); } else { -#ifdef __FreeBSD__ RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag); TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next); - pool_put(&V_pf_cache_pl, frag); -#else - RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); - TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); - pool_put(&pf_cache_pl, frag); -#endif + uma_zfree(V_pf_frag_z, frag); } } +#ifdef INET #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) -struct mbuf * +static struct mbuf * pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, struct pf_frent *frent, int mff) { @@ -504,12 +366,9 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; u_int16_t max = ip_len + off; -#ifdef __FreeBSD__ + PF_FRAG_ASSERT(); KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)), ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__)); -#else - KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag)); -#endif /* Strip off ip header */ m->m_data += hlen; @@ -517,18 +376,10 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, /* Create a new reassembly queue for this packet */ if (*frag == NULL) { -#ifdef __FreeBSD__ - *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT); -#else - *frag = pool_get(&pf_frag_pl, PR_NOWAIT); -#endif + *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); if (*frag == NULL) { pf_flush_fragments(); -#ifdef __FreeBSD__ - *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT); -#else - *frag = pool_get(&pf_frag_pl, PR_NOWAIT); -#endif + *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); if (*frag == NULL) goto drop_fragment; } @@ -539,16 +390,11 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, (*frag)->fr_dst = frent->fr_ip->ip_dst; (*frag)->fr_p = frent->fr_ip->ip_p; (*frag)->fr_id = frent->fr_ip->ip_id; - (*frag)->fr_timeout = time_second; + (*frag)->fr_timeout = time_uptime; LIST_INIT(&(*frag)->fr_queue); -#ifdef __FreeBSD__ RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag); TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next); -#else - RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); - TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); -#endif /* We do not have a previous fragment */ frep = NULL; @@ -565,12 +411,8 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, frep = frea; } -#ifdef __FreeBSD__ KASSERT((frep != NULL || frea != NULL), ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));; -#else - KASSERT(frep != NULL || frea != NULL); -#endif if (frep != NULL && FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * @@ -613,13 +455,7 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, next = LIST_NEXT(frea, fr_next); m_freem(frea->fr_m); LIST_REMOVE(frea, fr_next); -#ifdef __FreeBSD__ - pool_put(&V_pf_frent_pl, frea); - V_pf_nfrents--; -#else - pool_put(&pf_frent_pl, frea); - pf_nfrents--; -#endif + uma_zfree(V_pf_frent_z, frea); } insert: @@ -660,11 +496,7 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, /* We have all the data */ frent = LIST_FIRST(&(*frag)->fr_queue); -#ifdef __FreeBSD__ KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__)); -#else - KASSERT(frent != NULL); -#endif if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { DPFPRINTF(("drop: too big: %d\n", off)); pf_free_fragment(*frag); @@ -679,36 +511,20 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, m2 = m->m_next; m->m_next = NULL; m_cat(m, m2); -#ifdef __FreeBSD__ - pool_put(&V_pf_frent_pl, frent); - V_pf_nfrents--; -#else - pool_put(&pf_frent_pl, frent); - pf_nfrents--; -#endif + uma_zfree(V_pf_frent_z, frent); for (frent = next; frent != NULL; frent = next) { next = LIST_NEXT(frent, fr_next); m2 = frent->fr_m; -#ifdef __FreeBSD__ - pool_put(&V_pf_frent_pl, frent); - V_pf_nfrents--; -#else - pool_put(&pf_frent_pl, frent); - pf_nfrents--; -#endif -#ifdef __FreeBSD__ + uma_zfree(V_pf_frent_z, frent); m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags; m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data; -#endif m_cat(m, m2); } -#ifdef __FreeBSD__ while (m->m_pkthdr.csum_data & 0xffff0000) m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16); -#endif ip->ip_src = (*frag)->fr_src; ip->ip_dst = (*frag)->fr_dst; @@ -735,71 +551,43 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, drop_fragment: /* Oops - fail safe - drop packet */ -#ifdef __FreeBSD__ - pool_put(&V_pf_frent_pl, frent); - V_pf_nfrents--; -#else - pool_put(&pf_frent_pl, frent); - pf_nfrents--; -#endif + uma_zfree(V_pf_frent_z, frent); m_freem(m); return (NULL); } -struct mbuf * +static struct mbuf * pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, int drop, int *nomem) { struct mbuf *m = *m0; - struct pf_frcache *frp, *fra, *cur = NULL; + struct pf_frent *frp, *fra, *cur = NULL; int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); u_int16_t off = ntohs(h->ip_off) << 3; u_int16_t max = ip_len + off; int hosed = 0; -#ifdef __FreeBSD__ + PF_FRAG_ASSERT(); KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)), ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__)); -#else - KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); -#endif /* Create a new range queue for this packet */ if (*frag == NULL) { -#ifdef __FreeBSD__ - *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT); -#else - *frag = pool_get(&pf_cache_pl, PR_NOWAIT); -#endif + *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); if (*frag == NULL) { pf_flush_fragments(); -#ifdef __FreeBSD__ - *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT); -#else - *frag = pool_get(&pf_cache_pl, PR_NOWAIT); -#endif + *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); if (*frag == NULL) goto no_mem; } /* Get an entry for the queue */ -#ifdef __FreeBSD__ - cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); - if (cur == NULL) { - pool_put(&V_pf_cache_pl, *frag); -#else - cur = pool_get(&pf_cent_pl, PR_NOWAIT); + cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (cur == NULL) { - pool_put(&pf_cache_pl, *frag); -#endif + uma_zfree(V_pf_frag_z, *frag); *frag = NULL; goto no_mem; } -#ifdef __FreeBSD__ - V_pf_ncache++; -#else - pf_ncache++; -#endif (*frag)->fr_flags = PFFRAG_NOBUFFER; (*frag)->fr_max = 0; @@ -807,20 +595,15 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, (*frag)->fr_dst = h->ip_dst; (*frag)->fr_p = h->ip_p; (*frag)->fr_id = h->ip_id; - (*frag)->fr_timeout = time_second; + (*frag)->fr_timeout = time_uptime; cur->fr_off = off; cur->fr_end = max; - LIST_INIT(&(*frag)->fr_cache); - LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); + LIST_INIT(&(*frag)->fr_queue); + LIST_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next); -#ifdef __FreeBSD__ RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag); TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next); -#else - RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); - TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); -#endif DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); @@ -832,18 +615,14 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, * - off contains the real shifted offset. */ frp = NULL; - LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { + LIST_FOREACH(fra, &(*frag)->fr_queue, fr_next) { if (fra->fr_off > off) break; frp = fra; } -#ifdef __FreeBSD__ KASSERT((frp != NULL || fra != NULL), ("!(frp != NULL || fra != NULL): %s", __FUNCTION__)); -#else - KASSERT(frp != NULL || fra != NULL); -#endif if (frp != NULL) { int precut; @@ -885,24 +664,16 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, * than this mbuf magic. For my next trick, * I'll pull a rabbit out of my laptop. */ -#ifdef __FreeBSD__ - *m0 = m_dup(m, M_DONTWAIT); -#else - *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT); -#endif + *m0 = m_dup(m, M_NOWAIT); if (*m0 == NULL) goto no_mem; -#ifdef __FreeBSD__ /* From KAME Project : We have missed this! */ m_adj(*m0, (h->ip_hl << 2) - (*m0)->m_pkthdr.len); - KASSERT(((*m0)->m_next == NULL), - ("(*m0)->m_next != NULL: %s", + KASSERT(((*m0)->m_next == NULL), + ("(*m0)->m_next != NULL: %s", __FUNCTION__)); -#else - KASSERT((*m0)->m_next == NULL); -#endif m_adj(m, precut + (h->ip_hl << 2)); m_cat(*m0, m); m = *m0; @@ -917,15 +688,10 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, h = mtod(m, struct ip *); -#ifdef __FreeBSD__ KASSERT(((int)m->m_len == ntohs(h->ip_len) - precut), ("m->m_len != ntohs(h->ip_len) - precut: %s", __FUNCTION__)); -#else - KASSERT((int)m->m_len == - ntohs(h->ip_len) - precut); -#endif h->ip_off = htons(ntohs(h->ip_off) + (precut >> 3)); h->ip_len = htons(ntohs(h->ip_len) - precut); @@ -939,18 +705,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, h->ip_id, -precut, frp->fr_off, frp->fr_end, off, max)); -#ifdef __FreeBSD__ - cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); -#else - cur = pool_get(&pf_cent_pl, PR_NOWAIT); -#endif + cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (cur == NULL) goto no_mem; -#ifdef __FreeBSD__ - V_pf_ncache++; -#else - pf_ncache++; -#endif cur->fr_off = off; cur->fr_end = max; @@ -989,14 +746,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, m->m_pkthdr.len = plen; } h = mtod(m, struct ip *); -#ifdef __FreeBSD__ KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut), ("m->m_len != ntohs(h->ip_len) - aftercut: %s", __FUNCTION__)); -#else - KASSERT((int)m->m_len == - ntohs(h->ip_len) - aftercut); -#endif h->ip_len = htons(ntohs(h->ip_len) - aftercut); } else { hosed++; @@ -1007,18 +759,9 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, h->ip_id, -aftercut, off, max, fra->fr_off, fra->fr_end)); -#ifdef __FreeBSD__ - cur = pool_get(&V_pf_cent_pl, PR_NOWAIT); -#else - cur = pool_get(&pf_cent_pl, PR_NOWAIT); -#endif + cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (cur == NULL) goto no_mem; -#ifdef __FreeBSD__ - V_pf_ncache++; -#else - pf_ncache++; -#endif cur->fr_off = off; cur->fr_end = max; @@ -1036,36 +779,20 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, max, fra->fr_off, fra->fr_end)); fra->fr_off = cur->fr_off; LIST_REMOVE(cur, fr_next); -#ifdef __FreeBSD__ - pool_put(&V_pf_cent_pl, cur); - V_pf_ncache--; -#else - pool_put(&pf_cent_pl, cur); - pf_ncache--; -#endif + uma_zfree(V_pf_frent_z, cur); cur = NULL; } else if (frp && fra->fr_off <= frp->fr_end) { /* Need to merge in a modified 'frp' */ -#ifdef __FreeBSD__ KASSERT((cur == NULL), ("cur != NULL: %s", __FUNCTION__)); -#else - KASSERT(cur == NULL); -#endif DPFPRINTF(("fragcache[%d]: adjacent(merge " "%d-%d) %d-%d (%d-%d)\n", h->ip_id, frp->fr_off, frp->fr_end, off, max, fra->fr_off, fra->fr_end)); fra->fr_off = frp->fr_off; LIST_REMOVE(frp, fr_next); -#ifdef __FreeBSD__ - pool_put(&V_pf_cent_pl, frp); - V_pf_ncache--; -#else - pool_put(&pf_cent_pl, frp); - pf_ncache--; -#endif + uma_zfree(V_pf_frent_z, frp); frp = NULL; } @@ -1093,8 +820,8 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, /* Check if we are completely reassembled */ if (((*frag)->fr_flags & PFFRAG_SEENLAST) && - LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && - LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { + LIST_FIRST(&(*frag)->fr_queue)->fr_off == 0 && + LIST_FIRST(&(*frag)->fr_queue)->fr_end == (*frag)->fr_max) { /* Remove from fragment queue */ DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, (*frag)->fr_max)); @@ -1132,7 +859,6 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, return (NULL); } -#ifdef INET int pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, struct pf_pdesc *pd) @@ -1150,6 +876,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, int ip_off; int tag = -1; + PF_RULES_RASSERT(); + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; @@ -1169,11 +897,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; -#ifdef __FreeBSD__ - else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) -#else - else if (r->match_tag && !pf_match_tag(m, r, &tag)) -#endif + else if (r->match_tag && !pf_match_tag(m, r, &tag, + pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else break; @@ -1231,13 +956,10 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, max = fragoff + ip_len; if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { - /* Fully buffer all of the fragments */ -#ifdef __FreeBSD__ + /* Fully buffer all of the fragments */ + PF_FRAG_LOCK(); frag = pf_find_fragment(h, &V_pf_frag_tree); -#else - frag = pf_find_fragment(h, &pf_frag_tree); -#endif /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && @@ -1245,26 +967,19 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto bad; /* Get an entry for the fragment queue */ -#ifdef __FreeBSD__ - frent = pool_get(&V_pf_frent_pl, PR_NOWAIT); -#else - frent = pool_get(&pf_frent_pl, PR_NOWAIT); -#endif + frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (frent == NULL) { + PF_FRAG_UNLOCK(); REASON_SET(reason, PFRES_MEMORY); return (PF_DROP); } -#ifdef __FreeBSD__ - V_pf_nfrents++; -#else - pf_nfrents++; -#endif frent->fr_ip = h; frent->fr_m = m; /* Might return a completely reassembled mbuf, or NULL */ DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); *m0 = m = pf_reassemble(m0, &frag, frent, mff); + PF_FRAG_UNLOCK(); if (m == NULL) return (PF_DROP); @@ -1289,11 +1004,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, /* non-buffering fragment cache (drops or masks overlaps) */ int nomem = 0; -#ifdef __FreeBSD__ if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) { -#else - if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) { -#endif /* * Already passed the fragment cache in the * input direction. If we continued, it would @@ -1302,11 +1013,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto fragment_pass; } -#ifdef __FreeBSD__ + PF_FRAG_LOCK(); frag = pf_find_fragment(h, &V_pf_cache_tree); -#else - frag = pf_find_fragment(h, &pf_cache_tree); -#endif /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && @@ -1318,6 +1026,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, *m0 = m = pf_fragcache(m0, h, &frag, mff, (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); + PF_FRAG_UNLOCK(); if (m == NULL) { if (nomem) goto no_mem; @@ -1337,11 +1046,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, } #endif if (dir == PF_IN) -#ifdef __FreeBSD__ pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; -#else - m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE; -#endif if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; @@ -1369,25 +1074,30 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, no_mem: REASON_SET(reason, PFRES_MEMORY); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); bad: DPFPRINTF(("dropping bad fragment\n")); /* Free associated fragments */ - if (frag != NULL) + if (frag != NULL) { pf_free_fragment(frag); + PF_FRAG_UNLOCK(); + } REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); } @@ -1413,6 +1123,8 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, u_int8_t proto; int terminal; + PF_RULES_RASSERT(); + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; @@ -1553,19 +1265,22 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, shortpkt: REASON_SET(reason, PFRES_SHORT); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); badfrag: REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, + 1); return (PF_DROP); } #endif /* INET6 */ @@ -1581,6 +1296,8 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, u_int8_t flags; sa_family_t af = pd->af; + PF_RULES_RASSERT(); + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; @@ -1674,18 +1391,15 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, /* copy back packet headers if we sanitized */ if (rewrite) -#ifdef __FreeBSD__ m_copyback(m, off, sizeof(*th), (caddr_t)th); -#else - m_copyback(m, off, sizeof(*th), th); -#endif return (PF_PASS); tcp_drop: REASON_SET(&reason, PFRES_NORM); if (rm != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); + PFLOG_PACKET(kif, m, AF_INET, dir, reason, r, NULL, NULL, pd, + 1); return (PF_DROP); } @@ -1697,19 +1411,12 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, u_int8_t hdr[60]; u_int8_t *opt; -#ifdef __FreeBSD__ - KASSERT((src->scrub == NULL), + KASSERT((src->scrub == NULL), ("pf_normalize_tcp_init: src->scrub != NULL")); - src->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT); -#else - KASSERT(src->scrub == NULL); - - src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); -#endif + src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT); if (src->scrub == NULL) return (1); - bzero(src->scrub, sizeof(*src->scrub)); switch (pd->af) { #ifdef INET @@ -1782,17 +1489,10 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, void pf_normalize_tcp_cleanup(struct pf_state *state) { -#ifdef __FreeBSD__ - if (state->src.scrub) - pool_put(&V_pf_state_scrub_pl, state->src.scrub); - if (state->dst.scrub) - pool_put(&V_pf_state_scrub_pl, state->dst.scrub); -#else if (state->src.scrub) - pool_put(&pf_state_scrub_pl, state->src.scrub); + uma_zfree(V_pf_state_scrub_z, state->src.scrub); if (state->dst.scrub) - pool_put(&pf_state_scrub_pl, state->dst.scrub); -#endif + uma_zfree(V_pf_state_scrub_z, state->dst.scrub); /* Someday... flush the TCP segment reassembly descriptors. */ } @@ -1810,12 +1510,8 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, int copyback = 0; int got_ts = 0; -#ifdef __FreeBSD__ - KASSERT((src->scrub || dst->scrub), - ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!")); -#else - KASSERT(src->scrub || dst->scrub); -#endif + KASSERT((src->scrub || dst->scrub), + ("%s: src->scrub && dst->scrub!", __func__)); /* * Enforce the minimum TTL seen for this connection. Negate a common @@ -1870,11 +1566,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, if (got_ts) { /* Huh? Multiple timestamps!? */ -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif DPFPRINTF(("multiple TS??")); pf_print_state(state); printf("\n"); @@ -1942,12 +1634,8 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, getmicrouptime(&uptime); if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || - time_second - state->creation > TS_MAX_CONN)) { -#ifdef __FreeBSD__ + time_uptime - state->creation > TS_MAX_CONN)) { if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif DPFPRINTF(("src idled out of PAWS\n")); pf_print_state(state); printf("\n"); @@ -1957,11 +1645,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, } if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif DPFPRINTF(("dst idled out of PAWS\n")); pf_print_state(state); printf("\n"); @@ -1978,7 +1662,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * measurement of RTT (round trip time) and PAWS * (protection against wrapped sequence numbers). PAWS * gives us a set of rules for rejecting packets on - * long fat pipes (packets that were somehow delayed + * long fat pipes (packets that were somehow delayed * in transit longer than the time it took to send the * full TCP sequence space of 4Gb). We can use these * rules and infer a few others that will let us treat @@ -2045,34 +1729,16 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * this packet. */ if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) -#ifdef __FreeBSD__ ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF]; -#else - ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; -#endif - /* Calculate max ticks since the last timestamp */ #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ #define TS_MICROSECS 1000000 /* microseconds per second */ -#ifdef __FreeBSD__ -#ifndef timersub -#define timersub(tvp, uvp, vvp) \ - do { \ - (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ - (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ - if ((vvp)->tv_usec < 0) { \ - (vvp)->tv_sec--; \ - (vvp)->tv_usec += 1000000; \ - } \ - } while (0) -#endif -#endif - timersub(&uptime, &src->scrub->pfss_last, &delta_ts); + delta_ts = uptime; + timevalsub(&delta_ts, &src->scrub->pfss_last); tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); - if ((src->state >= TCPS_ESTABLISHED && dst->state >= TCPS_ESTABLISHED) && (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || @@ -2092,7 +1758,6 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, tsval_from_last) ? '1' : ' ', SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); -#ifdef __FreeBSD__ DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u " "idle: %jus %lums\n", tsval, tsecr, tsval_from_last, @@ -2103,22 +1768,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u" "\n", dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); -#else - DPFPRINTF((" tsval: %lu tsecr: %lu +ticks: %lu " - "idle: %lus %lums\n", - tsval, tsecr, tsval_from_last, delta_ts.tv_sec, - delta_ts.tv_usec / 1000)); - DPFPRINTF((" src->tsval: %lu tsecr: %lu\n", - src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); - DPFPRINTF((" dst->tsval: %lu tsecr: %lu tsval0: %lu" - "\n", dst->scrub->pfss_tsval, - dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); -#endif -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif pf_print_state(state); pf_print_flags(th->th_flags); printf("\n"); @@ -2166,11 +1816,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * Hey! Someone tried to sneak a packet in. Or the * stack changed its RFC1323 behavior?!?! */ -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC) { -#else - if (pf_status.debug >= PF_DEBUG_MISC) { -#endif DPFPRINTF(("Did not receive expected RFC1323 " "timestamp\n")); pf_print_state(state); @@ -2197,11 +1843,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, src->scrub->pfss_flags |= PFSS_DATA_TS; else { src->scrub->pfss_flags |= PFSS_DATA_NOTS; -#ifdef __FreeBSD__ if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub && -#else - if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && -#endif (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { /* Don't warn if other host rejected RFC1323 */ DPFPRINTF(("Broken RFC1323 stack did not " @@ -2247,7 +1889,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, return (0); } -int +static int pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, int off, sa_family_t af) { @@ -2255,11 +1897,7 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, int thoff; int opt, cnt, optlen = 0; int rewrite = 0; -#ifdef __FreeBSD__ u_char opts[TCP_MAXOLEN]; -#else - u_char opts[MAX_TCPOPTLEN]; -#endif u_char *optp = opts; thoff = th->th_off << 2; @@ -2303,7 +1941,8 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, return (rewrite); } -void +#ifdef INET +static void pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) { struct mbuf *m = *m0; @@ -2344,9 +1983,10 @@ pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); } } +#endif /* INET */ #ifdef INET6 -void +static void pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl) { struct mbuf *m = *m0; diff --git a/sys/contrib/pf/net/pf_osfp.c b/sys/contrib/pf/net/pf_osfp.c index dcd8af7..29d4a40 100644 --- a/sys/contrib/pf/net/pf_osfp.c +++ b/sys/contrib/pf/net/pf_osfp.c @@ -17,23 +17,14 @@ * */ -#ifdef __FreeBSD__ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#endif #include <sys/param.h> +#include <sys/kernel.h> #include <sys/socket.h> -#ifdef _KERNEL -#include <sys/systm.h> -#ifndef __FreeBSD__ -#include <sys/pool.h> -#endif -#endif /* _KERNEL */ -#include <sys/mbuf.h> #include <netinet/in.h> -#include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/tcp.h> @@ -41,77 +32,31 @@ __FBSDID("$FreeBSD$"); #include <net/pfvar.h> #include <netinet/ip6.h> -#ifdef _KERNEL #include <netinet6/in6_var.h> -#endif - -#ifdef _KERNEL -#ifdef __FreeBSD__ +static MALLOC_DEFINE(M_PFOSFP, "pf_osfp", "pf(4) operating system fingerprints"); #define DPFPRINTF(format, x...) \ if (V_pf_status.debug >= PF_DEBUG_NOISY) \ printf(format , ##x) -#else -#define DPFPRINTF(format, x...) \ - if (pf_status.debug >= PF_DEBUG_NOISY) \ - printf(format , ##x) -#endif -#ifdef __FreeBSD__ -typedef uma_zone_t pool_t; -#else -typedef struct pool pool_t; -#endif -#else -/* Userland equivalents so we can lend code to tcpdump et al. */ - -#include <arpa/inet.h> -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <netdb.h> -#define pool_t int -#define pool_get(pool, flags) malloc(*(pool)) -#define pool_put(pool, item) free(item) -#define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size) - -#ifdef __FreeBSD__ -#define NTOHS(x) (x) = ntohs((u_int16_t)(x)) -#endif - -#ifdef PFDEBUG -#include <sys/stdarg.h> -#define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) -#else -#define DPFPRINTF(format, x...) ((void)0) -#endif /* PFDEBUG */ -#endif /* _KERNEL */ - - -#ifdef __FreeBSD__ SLIST_HEAD(pf_osfp_list, pf_os_fingerprint); -VNET_DEFINE(struct pf_osfp_list, pf_osfp_list); +static VNET_DEFINE(struct pf_osfp_list, pf_osfp_list) = + SLIST_HEAD_INITIALIZER(); #define V_pf_osfp_list VNET(pf_osfp_list) -VNET_DEFINE(pool_t, pf_osfp_entry_pl); -#define pf_osfp_entry_pl VNET(pf_osfp_entry_pl) -VNET_DEFINE(pool_t, pf_osfp_pl); -#define pf_osfp_pl VNET(pf_osfp_pl) -#else -SLIST_HEAD(pf_osfp_list, pf_os_fingerprint) pf_osfp_list; -pool_t pf_osfp_entry_pl; -pool_t pf_osfp_pl; -#endif -struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *, +static struct pf_osfp_enlist *pf_osfp_fingerprint_hdr(const struct ip *, + const struct ip6_hdr *, + const struct tcphdr *); +static struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *, struct pf_os_fingerprint *, u_int8_t); -struct pf_os_fingerprint *pf_osfp_find_exact(struct pf_osfp_list *, +static struct pf_os_fingerprint *pf_osfp_find_exact(struct pf_osfp_list *, struct pf_os_fingerprint *); -void pf_osfp_insert(struct pf_osfp_list *, +static void pf_osfp_insert(struct pf_osfp_list *, struct pf_os_fingerprint *); +#ifdef PFDEBUG +static struct pf_os_fingerprint *pf_osfp_validate(void); +#endif - -#ifdef _KERNEL /* * Passively fingerprint the OS of the host (IPv4 TCP SYN packets only) * Returns the list of possible OSes. @@ -140,19 +85,14 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off, return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr)); } -#endif /* _KERNEL */ -struct pf_osfp_enlist * +static struct pf_osfp_enlist * pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const struct tcphdr *tcp) { struct pf_os_fingerprint fp, *fpresult; int cnt, optlen = 0; const u_int8_t *optp; -#ifdef _KERNEL char srcname[128]; -#else - char srcname[NI_MAXHOST]; -#endif if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN) return (NULL); @@ -164,49 +104,21 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st memset(&fp, 0, sizeof(fp)); if (ip) { -#ifndef _KERNEL - struct sockaddr_in sin; -#endif - fp.fp_psize = ntohs(ip->ip_len); fp.fp_ttl = ip->ip_ttl; if (ip->ip_off & htons(IP_DF)) fp.fp_flags |= PF_OSFP_DF; -#ifdef _KERNEL strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname)); -#else - memset(&sin, 0, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_addr = ip->ip_src; - (void)getnameinfo((struct sockaddr *)&sin, - sizeof(struct sockaddr_in), srcname, sizeof(srcname), - NULL, 0, NI_NUMERICHOST); -#endif } #ifdef INET6 else if (ip6) { -#ifndef _KERNEL - struct sockaddr_in6 sin6; -#endif - /* jumbo payload? */ fp.fp_psize = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); fp.fp_ttl = ip6->ip6_hlim; fp.fp_flags |= PF_OSFP_DF; fp.fp_flags |= PF_OSFP_INET6; -#ifdef _KERNEL strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src), sizeof(srcname)); -#else - memset(&sin6, 0, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_addr = ip6->ip6_src; - (void)getnameinfo((struct sockaddr *)&sin6, - sizeof(struct sockaddr_in6), srcname, sizeof(srcname), - NULL, 0, NI_NUMERICHOST); -#endif } #endif else @@ -284,11 +196,7 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", fp.fp_wscale); -#ifdef __FreeBSD__ if ((fpresult = pf_osfp_find(&V_pf_osfp_list, &fp, -#else - if ((fpresult = pf_osfp_find(&pf_osfp_list, &fp, -#endif PF_OSFP_MAXTTL_OFFSET))) return (&fpresult->fp_oses); return (NULL); @@ -324,52 +232,6 @@ pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os) return (0); } -/* Initialize the OS fingerprint system */ -#ifdef __FreeBSD__ -int -#else -void -#endif -pf_osfp_initialize(void) -{ -#if defined(__FreeBSD__) && defined(_KERNEL) - int error = ENOMEM; - - do { - pf_osfp_entry_pl = pf_osfp_pl = NULL; - UMA_CREATE(pf_osfp_entry_pl, struct pf_osfp_entry, "pfospfen"); - UMA_CREATE(pf_osfp_pl, struct pf_os_fingerprint, "pfosfp"); - error = 0; - } while(0); - - SLIST_INIT(&V_pf_osfp_list); -#else - pool_init(&pf_osfp_entry_pl, sizeof(struct pf_osfp_entry), 0, 0, 0, - "pfosfpen", &pool_allocator_nointr); - pool_init(&pf_osfp_pl, sizeof(struct pf_os_fingerprint), 0, 0, 0, - "pfosfp", &pool_allocator_nointr); - SLIST_INIT(&pf_osfp_list); -#endif - -#ifdef __FreeBSD__ -#ifdef _KERNEL - return (error); -#else - return (0); -#endif -#endif -} - -#if defined(__FreeBSD__) && (_KERNEL) -void -pf_osfp_cleanup(void) -{ - - UMA_DESTROY(pf_osfp_entry_pl); - UMA_DESTROY(pf_osfp_pl); -} -#endif - /* Flush the fingerprint list */ void pf_osfp_flush(void) @@ -377,18 +239,13 @@ pf_osfp_flush(void) struct pf_os_fingerprint *fp; struct pf_osfp_entry *entry; -#ifdef __FreeBSD__ while ((fp = SLIST_FIRST(&V_pf_osfp_list))) { SLIST_REMOVE_HEAD(&V_pf_osfp_list, fp_next); -#else - while ((fp = SLIST_FIRST(&pf_osfp_list))) { - SLIST_REMOVE_HEAD(&pf_osfp_list, fp_next); -#endif while ((entry = SLIST_FIRST(&fp->fp_oses))) { SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry); - pool_put(&pf_osfp_entry_pl, entry); + free(entry, M_PFOSFP); } - pool_put(&pf_osfp_pl, fp); + free(fp, M_PFOSFP); } } @@ -400,6 +257,8 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) struct pf_os_fingerprint *fp, fpadd; struct pf_osfp_entry *entry; + PF_RULES_WASSERT(); + memset(&fpadd, 0, sizeof(fpadd)); fpadd.fp_tcpopts = fpioc->fp_tcpopts; fpadd.fp_wsize = fpioc->fp_wsize; @@ -436,31 +295,18 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fpioc->fp_os.fp_os); #endif -#ifdef __FreeBSD__ if ((fp = pf_osfp_find_exact(&V_pf_osfp_list, &fpadd))) { -#else - if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) { -#endif SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os)) return (EEXIST); } - if ((entry = pool_get(&pf_osfp_entry_pl, -#ifdef __FreeBSD__ - PR_NOWAIT)) == NULL) -#else - PR_WAITOK|PR_LIMITFAIL)) == NULL) -#endif + if ((entry = malloc(sizeof(*entry), M_PFOSFP, M_NOWAIT)) + == NULL) return (ENOMEM); } else { - if ((fp = pool_get(&pf_osfp_pl, -#ifdef __FreeBSD__ - PR_NOWAIT)) == NULL) -#else - PR_WAITOK|PR_LIMITFAIL)) == NULL) -#endif + if ((fp = malloc(sizeof(*fp), M_PFOSFP, M_ZERO | M_NOWAIT)) + == NULL) return (ENOMEM); - memset(fp, 0, sizeof(*fp)); fp->fp_tcpopts = fpioc->fp_tcpopts; fp->fp_wsize = fpioc->fp_wsize; fp->fp_psize = fpioc->fp_psize; @@ -470,20 +316,12 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fp->fp_wscale = fpioc->fp_wscale; fp->fp_ttl = fpioc->fp_ttl; SLIST_INIT(&fp->fp_oses); - if ((entry = pool_get(&pf_osfp_entry_pl, -#ifdef __FreeBSD__ - PR_NOWAIT)) == NULL) { -#else - PR_WAITOK|PR_LIMITFAIL)) == NULL) { -#endif - pool_put(&pf_osfp_pl, fp); + if ((entry = malloc(sizeof(*entry), M_PFOSFP, M_NOWAIT)) + == NULL) { + free(fp, M_PFOSFP); return (ENOMEM); } -#ifdef __FreeBSD__ pf_osfp_insert(&V_pf_osfp_list, fp); -#else - pf_osfp_insert(&pf_osfp_list, fp); -#endif } memcpy(entry, &fpioc->fp_os, sizeof(*entry)); @@ -503,7 +341,7 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) /* Find a fingerprint in the list */ -struct pf_os_fingerprint * +static struct pf_os_fingerprint * pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, u_int8_t ttldiff) { @@ -578,7 +416,7 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, } /* Find an exact fingerprint in the list */ -struct pf_os_fingerprint * +static struct pf_os_fingerprint * pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find) { struct pf_os_fingerprint *f; @@ -599,7 +437,7 @@ pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find) } /* Insert a fingerprint into the list */ -void +static void pf_osfp_insert(struct pf_osfp_list *list, struct pf_os_fingerprint *ins) { struct pf_os_fingerprint *f, *prev = NULL; @@ -625,11 +463,7 @@ pf_osfp_get(struct pf_osfp_ioctl *fpioc) memset(fpioc, 0, sizeof(*fpioc)); -#ifdef __FreeBSD__ SLIST_FOREACH(fp, &V_pf_osfp_list, fp_next) { -#else - SLIST_FOREACH(fp, &pf_osfp_list, fp_next) { -#endif SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (i++ == num) { fpioc->fp_mss = fp->fp_mss; @@ -650,17 +484,14 @@ pf_osfp_get(struct pf_osfp_ioctl *fpioc) } +#ifdef PFDEBUG /* Validate that each signature is reachable */ -struct pf_os_fingerprint * +static struct pf_os_fingerprint * pf_osfp_validate(void) { struct pf_os_fingerprint *f, *f2, find; -#ifdef __FreeBSD__ SLIST_FOREACH(f, &V_pf_osfp_list, fp_next) { -#else - SLIST_FOREACH(f, &pf_osfp_list, fp_next) { -#endif memcpy(&find, f, sizeof(find)); /* We do a few MSS/th_win percolations to make things unique */ @@ -672,11 +503,7 @@ pf_osfp_validate(void) find.fp_wsize *= (find.fp_mss + 40); else if (f->fp_flags & PF_OSFP_WSIZE_MOD) find.fp_wsize *= 2; -#ifdef __FreeBSD__ if (f != (f2 = pf_osfp_find(&V_pf_osfp_list, &find, 0))) { -#else - if (f != (f2 = pf_osfp_find(&pf_osfp_list, &find, 0))) { -#endif if (f2) printf("Found \"%s %s %s\" instead of " "\"%s %s %s\"\n", @@ -696,3 +523,4 @@ pf_osfp_validate(void) } return (NULL); } +#endif /* PFDEBUG */ diff --git a/sys/contrib/pf/net/pf_ruleset.c b/sys/contrib/pf/net/pf_ruleset.c index ca8667c..77652a6 100644 --- a/sys/contrib/pf/net/pf_ruleset.c +++ b/sys/contrib/pf/net/pf_ruleset.c @@ -35,15 +35,14 @@ * */ -#ifdef __FreeBSD__ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#endif #include <sys/param.h> #include <sys/socket.h> #ifdef _KERNEL # include <sys/systm.h> +# include <sys/refcount.h> #endif /* _KERNEL */ #include <sys/mbuf.h> @@ -61,20 +60,10 @@ __FBSDID("$FreeBSD$"); #ifdef _KERNEL -#ifdef __FreeBSD__ #define DPFPRINTF(format, x...) \ if (V_pf_status.debug >= PF_DEBUG_NOISY) \ printf(format , ##x) -#else -#define DPFPRINTF(format, x...) \ - if (pf_status.debug >= PF_DEBUG_NOISY) \ - printf(format , ##x) -#endif -#ifdef __FreeBSD__ #define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO) -#else -#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO) -#endif #define rs_free(x) free(x, M_TEMP) #else @@ -96,24 +85,22 @@ __FBSDID("$FreeBSD$"); #endif /* PFDEBUG */ #endif /* _KERNEL */ -#if defined(__FreeBSD__) && !defined(_KERNEL) -#undef V_pf_anchors -#define V_pf_anchors pf_anchors - -#undef pf_main_ruleset -#define pf_main_ruleset pf_main_anchor.ruleset -#endif - -#if defined(__FreeBSD__) && defined(_KERNEL) +#ifdef _KERNEL VNET_DEFINE(struct pf_anchor_global, pf_anchors); VNET_DEFINE(struct pf_anchor, pf_main_anchor); -#else +#else /* ! _KERNEL */ struct pf_anchor_global pf_anchors; struct pf_anchor pf_main_anchor; -#endif +#undef V_pf_anchors +#define V_pf_anchors pf_anchors +#undef pf_main_ruleset +#define pf_main_ruleset pf_main_anchor.ruleset +#endif /* _KERNEL */ static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); +static struct pf_anchor *pf_find_anchor(const char *); + RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); @@ -169,7 +156,7 @@ pf_init_ruleset(struct pf_ruleset *ruleset) } } -struct pf_anchor * +static struct pf_anchor * pf_find_anchor(const char *path) { struct pf_anchor *key, *found; @@ -178,11 +165,7 @@ pf_find_anchor(const char *path) if (key == NULL) return (NULL); strlcpy(key->path, path, sizeof(key->path)); -#ifdef __FreeBSD__ found = RB_FIND(pf_anchor_global, &V_pf_anchors, key); -#else - found = RB_FIND(pf_anchor_global, &pf_anchors, key); -#endif rs_free(key); return (found); } @@ -208,11 +191,7 @@ pf_find_or_create_ruleset(const char *path) { char *p, *q, *r; struct pf_ruleset *ruleset; -#ifdef __FreeBSD__ struct pf_anchor *anchor = NULL, *dup, *parent = NULL; -#else - struct pf_anchor *anchor, *dup, *parent = NULL; -#endif if (path[0] == 0) return (&pf_main_ruleset); @@ -263,11 +242,7 @@ pf_find_or_create_ruleset(const char *path) strlcat(anchor->path, "/", sizeof(anchor->path)); } strlcat(anchor->path, anchor->name, sizeof(anchor->path)); -#ifdef __FreeBSD__ if ((dup = RB_INSERT(pf_anchor_global, &V_pf_anchors, anchor)) != -#else - if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != -#endif NULL) { printf("pf_find_or_create_ruleset: RB_INSERT1 " "'%s' '%s' collides with '%s' '%s'\n", @@ -284,11 +259,7 @@ pf_find_or_create_ruleset(const char *path) "RB_INSERT2 '%s' '%s' collides with " "'%s' '%s'\n", anchor->path, anchor->name, dup->path, dup->name); -#ifdef __FreeBSD__ RB_REMOVE(pf_anchor_global, &V_pf_anchors, -#else - RB_REMOVE(pf_anchor_global, &pf_anchors, -#endif anchor); rs_free(anchor); rs_free(p); @@ -324,11 +295,7 @@ pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || ruleset->rules[i].inactive.open) return; -#ifdef __FreeBSD__ RB_REMOVE(pf_anchor_global, &V_pf_anchors, ruleset->anchor); -#else - RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); -#endif if ((parent = ruleset->anchor->parent) != NULL) RB_REMOVE(pf_anchor_node, &parent->children, ruleset->anchor); diff --git a/sys/contrib/pf/net/pf_table.c b/sys/contrib/pf/net/pf_table.c index ea77e31..fa88045 100644 --- a/sys/contrib/pf/net/pf_table.c +++ b/sys/contrib/pf/net/pf_table.c @@ -30,31 +30,24 @@ * */ -#ifdef __FreeBSD__ -#include "opt_inet.h" -#include "opt_inet6.h" - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#endif + +#include "opt_inet.h" +#include "opt_inet6.h" #include <sys/param.h> -#include <sys/systm.h> -#include <sys/socket.h> -#include <sys/mbuf.h> #include <sys/kernel.h> -#ifdef __FreeBSD__ +#include <sys/lock.h> #include <sys/malloc.h> -#else -#include <sys/pool.h> -#endif +#include <sys/mutex.h> +#include <sys/refcount.h> +#include <sys/rwlock.h> +#include <sys/socket.h> +#include <vm/uma.h> #include <net/if.h> -#include <net/route.h> -#include <netinet/in.h> -#ifndef __FreeBSD__ -#include <netinet/ip_ipsp.h> -#endif +#include <net/vnet.h> #include <net/pfvar.h> #define ACCEPT_FLAGS(flags, oklist) \ @@ -64,53 +57,6 @@ __FBSDID("$FreeBSD$"); return (EINVAL); \ } while (0) -#ifdef __FreeBSD__ -static inline int -_copyin(const void *uaddr, void *kaddr, size_t len) -{ - int r; - - PF_UNLOCK(); - r = copyin(uaddr, kaddr, len); - PF_LOCK(); - - return (r); -} - -static inline int -_copyout(const void *uaddr, void *kaddr, size_t len) -{ - int r; - - PF_UNLOCK(); - r = copyout(uaddr, kaddr, len); - PF_LOCK(); - - return (r); -} - -#define COPYIN(from, to, size, flags) \ - ((flags & PFR_FLAG_USERIOCTL) ? \ - _copyin((from), (to), (size)) : \ - (bcopy((from), (to), (size)), 0)) - -#define COPYOUT(from, to, size, flags) \ - ((flags & PFR_FLAG_USERIOCTL) ? \ - _copyout((from), (to), (size)) : \ - (bcopy((from), (to), (size)), 0)) - -#else -#define COPYIN(from, to, size, flags) \ - ((flags & PFR_FLAG_USERIOCTL) ? \ - copyin((from), (to), (size)) : \ - (bcopy((from), (to), (size)), 0)) - -#define COPYOUT(from, to, size, flags) \ - ((flags & PFR_FLAG_USERIOCTL) ? \ - copyout((from), (to), (size)) : \ - (bcopy((from), (to), (size)), 0)) -#endif - #define FILLIN_SIN(sin, addr) \ do { \ (sin).sin_len = sizeof(sin); \ @@ -164,7 +110,6 @@ struct pfr_walktree { struct pfi_dynaddr *pfrw1_dyn; } pfrw_1; int pfrw_free; - int pfrw_flags; }; #define pfrw_addr pfrw_1.pfrw1_addr #define pfrw_astats pfrw_1.pfrw1_astats @@ -175,77 +120,69 @@ struct pfr_walktree { #define senderr(e) do { rv = (e); goto _bad; } while (0) -#ifdef __FreeBSD__ -VNET_DEFINE(uma_zone_t, pfr_ktable_pl); -VNET_DEFINE(uma_zone_t, pfr_kentry_pl); -VNET_DEFINE(uma_zone_t, pfr_kcounters_pl); -VNET_DEFINE(struct sockaddr_in, pfr_sin); -#define V_pfr_sin VNET(pfr_sin) -VNET_DEFINE(struct sockaddr_in6, pfr_sin6); -#define V_pfr_sin6 VNET(pfr_sin6) -VNET_DEFINE(union sockaddr_union, pfr_mask); -#define V_pfr_mask VNET(pfr_mask) -VNET_DEFINE(struct pf_addr, pfr_ffaddr); -#define V_pfr_ffaddr VNET(pfr_ffaddr) -#else -struct pool pfr_ktable_pl; -struct pool pfr_kentry_pl; -struct pool pfr_kcounters_pl; -struct sockaddr_in pfr_sin; -struct sockaddr_in6 pfr_sin6; -union sockaddr_union pfr_mask; -struct pf_addr pfr_ffaddr; -#endif - -void pfr_copyout_addr(struct pfr_addr *, +static MALLOC_DEFINE(M_PFTABLE, "pf_table", "pf(4) tables structures"); +static VNET_DEFINE(uma_zone_t, pfr_kentry_z); +#define V_pfr_kentry_z VNET(pfr_kentry_z) +static VNET_DEFINE(uma_zone_t, pfr_kcounters_z); +#define V_pfr_kcounters_z VNET(pfr_kcounters_z) + +static struct pf_addr pfr_ffaddr = { + .addr32 = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff } +}; + +static void pfr_copyout_addr(struct pfr_addr *, struct pfr_kentry *ke); -int pfr_validate_addr(struct pfr_addr *); -void pfr_enqueue_addrs(struct pfr_ktable *, +static int pfr_validate_addr(struct pfr_addr *); +static void pfr_enqueue_addrs(struct pfr_ktable *, struct pfr_kentryworkq *, int *, int); -void pfr_mark_addrs(struct pfr_ktable *); -struct pfr_kentry *pfr_lookup_addr(struct pfr_ktable *, +static void pfr_mark_addrs(struct pfr_ktable *); +static struct pfr_kentry + *pfr_lookup_addr(struct pfr_ktable *, struct pfr_addr *, int); -struct pfr_kentry *pfr_create_kentry(struct pfr_addr *, int); -void pfr_destroy_kentries(struct pfr_kentryworkq *); -void pfr_destroy_kentry(struct pfr_kentry *); -void pfr_insert_kentries(struct pfr_ktable *, +static struct pfr_kentry *pfr_create_kentry(struct pfr_addr *); +static void pfr_destroy_kentries(struct pfr_kentryworkq *); +static void pfr_destroy_kentry(struct pfr_kentry *); +static void pfr_insert_kentries(struct pfr_ktable *, struct pfr_kentryworkq *, long); -void pfr_remove_kentries(struct pfr_ktable *, +static void pfr_remove_kentries(struct pfr_ktable *, struct pfr_kentryworkq *); -void pfr_clstats_kentries(struct pfr_kentryworkq *, long, +static void pfr_clstats_kentries(struct pfr_kentryworkq *, long, int); -void pfr_reset_feedback(struct pfr_addr *, int, int); -void pfr_prepare_network(union sockaddr_union *, int, int); -int pfr_route_kentry(struct pfr_ktable *, +static void pfr_reset_feedback(struct pfr_addr *, int); +static void pfr_prepare_network(union sockaddr_union *, int, int); +static int pfr_route_kentry(struct pfr_ktable *, struct pfr_kentry *); -int pfr_unroute_kentry(struct pfr_ktable *, +static int pfr_unroute_kentry(struct pfr_ktable *, struct pfr_kentry *); -int pfr_walktree(struct radix_node *, void *); -int pfr_validate_table(struct pfr_table *, int, int); -int pfr_fix_anchor(char *); -void pfr_commit_ktable(struct pfr_ktable *, long); -void pfr_insert_ktables(struct pfr_ktableworkq *); -void pfr_insert_ktable(struct pfr_ktable *); -void pfr_setflags_ktables(struct pfr_ktableworkq *); -void pfr_setflags_ktable(struct pfr_ktable *, int); -void pfr_clstats_ktables(struct pfr_ktableworkq *, long, +static int pfr_walktree(struct radix_node *, void *); +static int pfr_validate_table(struct pfr_table *, int, int); +static int pfr_fix_anchor(char *); +static void pfr_commit_ktable(struct pfr_ktable *, long); +static void pfr_insert_ktables(struct pfr_ktableworkq *); +static void pfr_insert_ktable(struct pfr_ktable *); +static void pfr_setflags_ktables(struct pfr_ktableworkq *); +static void pfr_setflags_ktable(struct pfr_ktable *, int); +static void pfr_clstats_ktables(struct pfr_ktableworkq *, long, int); -void pfr_clstats_ktable(struct pfr_ktable *, long, int); -struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int, int); -void pfr_destroy_ktables(struct pfr_ktableworkq *, int); -void pfr_destroy_ktable(struct pfr_ktable *, int); -int pfr_ktable_compare(struct pfr_ktable *, +static void pfr_clstats_ktable(struct pfr_ktable *, long, int); +static struct pfr_ktable + *pfr_create_ktable(struct pfr_table *, long, int); +static void pfr_destroy_ktables(struct pfr_ktableworkq *, int); +static void pfr_destroy_ktable(struct pfr_ktable *, int); +static int pfr_ktable_compare(struct pfr_ktable *, struct pfr_ktable *); -struct pfr_ktable *pfr_lookup_table(struct pfr_table *); -void pfr_clean_node_mask(struct pfr_ktable *, +static struct pfr_ktable + *pfr_lookup_table(struct pfr_table *); +static void pfr_clean_node_mask(struct pfr_ktable *, struct pfr_kentryworkq *); -int pfr_table_count(struct pfr_table *, int); -int pfr_skip_table(struct pfr_table *, +static int pfr_table_count(struct pfr_table *, int); +static int pfr_skip_table(struct pfr_table *, struct pfr_ktable *, int); -struct pfr_kentry *pfr_kentry_byidx(struct pfr_ktable *, int, int); +static struct pfr_kentry + *pfr_kentry_byidx(struct pfr_ktable *, int, int); -RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); -RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); +static RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); +static RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); struct pfr_ktablehead pfr_ktables; struct pfr_table pfr_nulltable; @@ -254,28 +191,23 @@ int pfr_ktable_cnt; void pfr_initialize(void) { -#ifndef __FreeBSD__ - pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable), 0, 0, 0, - "pfrktable", NULL); - pool_init(&pfr_kentry_pl, sizeof(struct pfr_kentry), 0, 0, 0, - "pfrkentry", NULL); - pool_init(&pfr_kcounters_pl, sizeof(struct pfr_kcounters), 0, 0, 0, - "pfrkcounters", NULL); - - pfr_sin.sin_len = sizeof(pfr_sin); - pfr_sin.sin_family = AF_INET; - pfr_sin6.sin6_len = sizeof(pfr_sin6); - pfr_sin6.sin6_family = AF_INET6; - - memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr)); -#else - V_pfr_sin.sin_len = sizeof(V_pfr_sin); - V_pfr_sin.sin_family = AF_INET; - V_pfr_sin6.sin6_len = sizeof(V_pfr_sin6); - V_pfr_sin6.sin6_family = AF_INET6; - - memset(&V_pfr_ffaddr, 0xff, sizeof(V_pfr_ffaddr)); -#endif + + V_pfr_kentry_z = uma_zcreate("pf table entries", + sizeof(struct pfr_kentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, + 0); + V_pfr_kcounters_z = uma_zcreate("pf table counters", + sizeof(struct pfr_kcounters), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + V_pf_limits[PF_LIMIT_TABLE_ENTRIES].zone = V_pfr_kentry_z; + V_pf_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT; +} + +void +pfr_cleanup(void) +{ + + uma_zdestroy(V_pfr_kentry_z); + uma_zdestroy(V_pfr_kcounters_z); } int @@ -283,9 +215,10 @@ pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) { struct pfr_ktable *kt; struct pfr_kentryworkq workq; - int s; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + PF_RULES_WASSERT(); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -296,16 +229,8 @@ pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) pfr_enqueue_addrs(kt, &workq, ndel, 0); if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); pfr_remove_kentries(kt, &workq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - if (kt->pfrkt_cnt) { - printf("pfr_clr_addrs: corruption detected (%d).\n", - kt->pfrkt_cnt); - kt->pfrkt_cnt = 0; - } + KASSERT(kt->pfrkt_cnt == 0, ("%s: non-null pfrkt_cnt", __func__)); } return (0); } @@ -317,12 +242,13 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_ktable *kt, *tmpkt; struct pfr_kentryworkq workq; struct pfr_kentry *p, *q; - struct pfr_addr ad; - int i, rv, s, xadd = 0; + struct pfr_addr *ad; + int i, rv, xadd = 0; long tzero = time_second; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_FEEDBACK); + PF_RULES_WASSERT(); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -330,53 +256,42 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); - tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0, - !(flags & PFR_FLAG_USERIOCTL)); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); if (tmpkt == NULL) return (ENOMEM); SLIST_INIT(&workq); - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - senderr(EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) senderr(EINVAL); - p = pfr_lookup_addr(kt, &ad, 1); - q = pfr_lookup_addr(tmpkt, &ad, 1); + p = pfr_lookup_addr(kt, ad, 1); + q = pfr_lookup_addr(tmpkt, ad, 1); if (flags & PFR_FLAG_FEEDBACK) { if (q != NULL) - ad.pfra_fback = PFR_FB_DUPLICATE; + ad->pfra_fback = PFR_FB_DUPLICATE; else if (p == NULL) - ad.pfra_fback = PFR_FB_ADDED; - else if (p->pfrke_not != ad.pfra_not) - ad.pfra_fback = PFR_FB_CONFLICT; + ad->pfra_fback = PFR_FB_ADDED; + else if (p->pfrke_not != ad->pfra_not) + ad->pfra_fback = PFR_FB_CONFLICT; else - ad.pfra_fback = PFR_FB_NONE; + ad->pfra_fback = PFR_FB_NONE; } if (p == NULL && q == NULL) { - p = pfr_create_kentry(&ad, - !(flags & PFR_FLAG_USERIOCTL)); + p = pfr_create_kentry(ad); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { pfr_destroy_kentry(p); - ad.pfra_fback = PFR_FB_NONE; + ad->pfra_fback = PFR_FB_NONE; } else { SLIST_INSERT_HEAD(&workq, p, pfrke_workq); xadd++; } } - if (flags & PFR_FLAG_FEEDBACK) - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - senderr(EFAULT); } pfr_clean_node_mask(tmpkt, &workq); - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_insert_kentries(kt, &workq, tzero); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } else + else pfr_destroy_kentries(&workq); if (nadd != NULL) *nadd = xadd; @@ -386,7 +301,7 @@ _bad: pfr_clean_node_mask(tmpkt, &workq); pfr_destroy_kentries(&workq); if (flags & PFR_FLAG_FEEDBACK) - pfr_reset_feedback(addr, size, flags); + pfr_reset_feedback(addr, size); pfr_destroy_ktable(tmpkt, 0); return (rv); } @@ -398,11 +313,12 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_ktable *kt; struct pfr_kentryworkq workq; struct pfr_kentry *p; - struct pfr_addr ad; - int i, rv, s, xdel = 0, log = 1; + struct pfr_addr *ad; + int i, rv, xdel = 0, log = 1; + + PF_RULES_WASSERT(); - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_FEEDBACK); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -418,7 +334,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, * * one is O(N) and is better for large 'n' * one is O(n*LOG(N)) and is better for small 'n' - * + * * following code try to decide which one is best. */ for (i = kt->pfrkt_cnt; i > 0; i >>= 1) @@ -428,56 +344,44 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, pfr_mark_addrs(kt); } else { /* iterate over addresses to delete */ - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - return (EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) return (EINVAL); - p = pfr_lookup_addr(kt, &ad, 1); + p = pfr_lookup_addr(kt, ad, 1); if (p != NULL) p->pfrke_mark = 0; } } SLIST_INIT(&workq); - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - senderr(EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) senderr(EINVAL); - p = pfr_lookup_addr(kt, &ad, 1); + p = pfr_lookup_addr(kt, ad, 1); if (flags & PFR_FLAG_FEEDBACK) { if (p == NULL) - ad.pfra_fback = PFR_FB_NONE; - else if (p->pfrke_not != ad.pfra_not) - ad.pfra_fback = PFR_FB_CONFLICT; + ad->pfra_fback = PFR_FB_NONE; + else if (p->pfrke_not != ad->pfra_not) + ad->pfra_fback = PFR_FB_CONFLICT; else if (p->pfrke_mark) - ad.pfra_fback = PFR_FB_DUPLICATE; + ad->pfra_fback = PFR_FB_DUPLICATE; else - ad.pfra_fback = PFR_FB_DELETED; + ad->pfra_fback = PFR_FB_DELETED; } - if (p != NULL && p->pfrke_not == ad.pfra_not && + if (p != NULL && p->pfrke_not == ad->pfra_not && !p->pfrke_mark) { p->pfrke_mark = 1; SLIST_INSERT_HEAD(&workq, p, pfrke_workq); xdel++; } - if (flags & PFR_FLAG_FEEDBACK) - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - senderr(EFAULT); } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_remove_kentries(kt, &workq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (ndel != NULL) *ndel = xdel; return (0); _bad: if (flags & PFR_FLAG_FEEDBACK) - pfr_reset_feedback(addr, size, flags); + pfr_reset_feedback(addr, size); return (rv); } @@ -490,11 +394,12 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq addq, delq, changeq; struct pfr_kentry *p, *q; struct pfr_addr ad; - int i, rv, s, xadd = 0, xdel = 0, xchange = 0; + int i, rv, xadd = 0, xdel = 0, xchange = 0; long tzero = time_second; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_FEEDBACK); + PF_RULES_WASSERT(); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, ignore_pfrt_flags, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); @@ -503,8 +408,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); - tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0, - !(flags & PFR_FLAG_USERIOCTL)); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); if (tmpkt == NULL) return (ENOMEM); pfr_mark_addrs(kt); @@ -512,8 +416,11 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, SLIST_INIT(&delq); SLIST_INIT(&changeq); for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - senderr(EFAULT); + /* + * XXXGL: undertand pf_if usage of this function + * and make ad a moving pointer + */ + bcopy(addr + i, &ad, sizeof(ad)); if (pfr_validate_addr(&ad)) senderr(EINVAL); ad.pfra_fback = PFR_FB_NONE; @@ -535,8 +442,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, ad.pfra_fback = PFR_FB_DUPLICATE; goto _skip; } - p = pfr_create_kentry(&ad, - !(flags & PFR_FLAG_USERIOCTL)); + p = pfr_create_kentry(&ad); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -550,8 +456,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, } _skip: if (flags & PFR_FLAG_FEEDBACK) - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - senderr(EFAULT); + bcopy(&ad, addr + i, sizeof(ad)); } pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY); if ((flags & PFR_FLAG_FEEDBACK) && *size2) { @@ -563,20 +468,15 @@ _skip: SLIST_FOREACH(p, &delq, pfrke_workq) { pfr_copyout_addr(&ad, p); ad.pfra_fback = PFR_FB_DELETED; - if (COPYOUT(&ad, addr+size+i, sizeof(ad), flags)) - senderr(EFAULT); + bcopy(&ad, addr + size + i, sizeof(ad)); i++; } } pfr_clean_node_mask(tmpkt, &addq); if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); pfr_insert_kentries(kt, &addq, tzero); pfr_remove_kentries(kt, &delq); pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG); - if (flags & PFR_FLAG_ATOMIC) - splx(s); } else pfr_destroy_kentries(&addq); if (nadd != NULL) @@ -593,7 +493,7 @@ _bad: pfr_clean_node_mask(tmpkt, &addq); pfr_destroy_kentries(&addq); if (flags & PFR_FLAG_FEEDBACK) - pfr_reset_feedback(addr, size, flags); + pfr_reset_feedback(addr, size); pfr_destroy_ktable(tmpkt, 0); return (rv); } @@ -604,9 +504,11 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, { struct pfr_ktable *kt; struct pfr_kentry *p; - struct pfr_addr ad; + struct pfr_addr *ad; int i, xmatch = 0; + PF_RULES_RASSERT(); + ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); @@ -614,22 +516,18 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - return (EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) return (EINVAL); - if (ADDR_NETWORK(&ad)) + if (ADDR_NETWORK(ad)) return (EINVAL); - p = pfr_lookup_addr(kt, &ad, 0); + p = pfr_lookup_addr(kt, ad, 0); if (flags & PFR_FLAG_REPLACE) - pfr_copyout_addr(&ad, p); - ad.pfra_fback = (p == NULL) ? PFR_FB_NONE : + pfr_copyout_addr(ad, p); + ad->pfra_fback = (p == NULL) ? PFR_FB_NONE : (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH); if (p != NULL && !p->pfrke_not) xmatch++; - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - return (EFAULT); } if (nmatch != NULL) *nmatch = xmatch; @@ -644,6 +542,8 @@ pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, struct pfr_walktree w; int rv; + PF_RULES_RASSERT(); + ACCEPT_FLAGS(flags, 0); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); @@ -659,27 +559,16 @@ pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, w.pfrw_op = PFRW_GET_ADDRS; w.pfrw_addr = addr; w.pfrw_free = kt->pfrkt_cnt; - w.pfrw_flags = flags; -#ifdef __FreeBSD__ rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#else - rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#endif if (!rv) -#ifdef __FreeBSD__ rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#else - rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#endif if (rv) return (rv); - if (w.pfrw_free) { - printf("pfr_get_addrs: corruption detected (%d).\n", - w.pfrw_free); - return (ENOTTY); - } + KASSERT(w.pfrw_free == 0, ("%s: corruption detected (%d)", __func__, + w.pfrw_free)); + *size = kt->pfrkt_cnt; return (0); } @@ -691,11 +580,13 @@ pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, struct pfr_ktable *kt; struct pfr_walktree w; struct pfr_kentryworkq workq; - int rv, s; + int rv; long tzero = time_second; + PF_RULES_RASSERT(); + /* XXX PFR_FLAG_CLSTATS disabled */ - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC); + ACCEPT_FLAGS(flags, 0); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); @@ -710,27 +601,14 @@ pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, w.pfrw_op = PFRW_GET_ASTATS; w.pfrw_astats = addr; w.pfrw_free = kt->pfrkt_cnt; - w.pfrw_flags = flags; - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); -#ifdef __FreeBSD__ rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#else - rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#endif if (!rv) -#ifdef __FreeBSD__ - rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, + rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#else - rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#endif if (!rv && (flags & PFR_FLAG_CLSTATS)) { pfr_enqueue_addrs(kt, &workq, NULL, 0); pfr_clstats_kentries(&workq, tzero, 0); } - if (flags & PFR_FLAG_ATOMIC) - splx(s); if (rv) return (rv); @@ -750,28 +628,25 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_ktable *kt; struct pfr_kentryworkq workq; struct pfr_kentry *p; - struct pfr_addr ad; - int i, rv, s, xzero = 0; + struct pfr_addr *ad; + int i, rv, xzero = 0; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_FEEDBACK); + PF_RULES_WASSERT(); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); SLIST_INIT(&workq); - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - senderr(EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) senderr(EINVAL); - p = pfr_lookup_addr(kt, &ad, 1); + p = pfr_lookup_addr(kt, ad, 1); if (flags & PFR_FLAG_FEEDBACK) { - ad.pfra_fback = (p != NULL) ? + ad->pfra_fback = (p != NULL) ? PFR_FB_CLEARED : PFR_FB_NONE; - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - senderr(EFAULT); } if (p != NULL) { SLIST_INSERT_HEAD(&workq, p, pfrke_workq); @@ -779,23 +654,18 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, } } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_clstats_kentries(&workq, 0, 0); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (nzero != NULL) *nzero = xzero; return (0); _bad: if (flags & PFR_FLAG_FEEDBACK) - pfr_reset_feedback(addr, size, flags); + pfr_reset_feedback(addr, size); return (rv); } -int +static int pfr_validate_addr(struct pfr_addr *ad) { int i; @@ -829,7 +699,7 @@ pfr_validate_addr(struct pfr_addr *ad) return (0); } -void +static void pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, int *naddr, int sweep) { @@ -840,58 +710,37 @@ pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE; w.pfrw_workq = workq; if (kt->pfrkt_ip4 != NULL) -#ifdef __FreeBSD__ - if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, + if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) -#else - if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) -#endif printf("pfr_enqueue_addrs: IPv4 walktree failed.\n"); if (kt->pfrkt_ip6 != NULL) -#ifdef __FreeBSD__ - if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, + if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) -#else - if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) -#endif printf("pfr_enqueue_addrs: IPv6 walktree failed.\n"); if (naddr != NULL) *naddr = w.pfrw_cnt; } -void +static void pfr_mark_addrs(struct pfr_ktable *kt) { struct pfr_walktree w; bzero(&w, sizeof(w)); w.pfrw_op = PFRW_MARK; -#ifdef __FreeBSD__ if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) -#else - if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) -#endif printf("pfr_mark_addrs: IPv4 walktree failed.\n"); -#ifdef __FreeBSD__ if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) -#else - if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) -#endif printf("pfr_mark_addrs: IPv6 walktree failed.\n"); } -struct pfr_kentry * +static struct pfr_kentry * pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) { union sockaddr_union sa, mask; -#ifdef __FreeBSD__ struct radix_node_head *head = NULL; -#else - struct radix_node_head *head; -#endif struct pfr_kentry *ke; - int s; bzero(&sa, sizeof(sa)); if (ad->pfra_af == AF_INET) { @@ -903,12 +752,7 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) } if (ADDR_NETWORK(ad)) { pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net); - s = splsoftnet(); /* rn_lookup makes use of globals */ -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#endif ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head); - splx(s); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; } else { @@ -921,19 +765,12 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) return (ke); } -struct pfr_kentry * -pfr_create_kentry(struct pfr_addr *ad, int intr) +static struct pfr_kentry * +pfr_create_kentry(struct pfr_addr *ad) { struct pfr_kentry *ke; -#ifdef __FreeBSD__ - ke = pool_get(&V_pfr_kentry_pl, PR_NOWAIT | PR_ZERO); -#else - if (intr) - ke = pool_get(&pfr_kentry_pl, PR_NOWAIT | PR_ZERO); - else - ke = pool_get(&pfr_kentry_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL); -#endif + ke = uma_zalloc(V_pfr_kentry_z, M_NOWAIT | M_ZERO); if (ke == NULL) return (NULL); @@ -947,7 +784,7 @@ pfr_create_kentry(struct pfr_addr *ad, int intr) return (ke); } -void +static void pfr_destroy_kentries(struct pfr_kentryworkq *workq) { struct pfr_kentry *p, *q; @@ -958,20 +795,15 @@ pfr_destroy_kentries(struct pfr_kentryworkq *workq) } } -void +static void pfr_destroy_kentry(struct pfr_kentry *ke) { if (ke->pfrke_counters) -#ifdef __FreeBSD__ - pool_put(&V_pfr_kcounters_pl, ke->pfrke_counters); - pool_put(&V_pfr_kentry_pl, ke); -#else - pool_put(&pfr_kcounters_pl, ke->pfrke_counters); - pool_put(&pfr_kentry_pl, ke); -#endif + uma_zfree(V_pfr_kcounters_z, ke->pfrke_counters); + uma_zfree(V_pfr_kentry_z, ke); } -void +static void pfr_insert_kentries(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, long tzero) { @@ -1000,7 +832,7 @@ pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero) p = pfr_lookup_addr(kt, ad, 1); if (p != NULL) return (0); - p = pfr_create_kentry(ad, 1); + p = pfr_create_kentry(ad); if (p == NULL) return (EINVAL); @@ -1014,7 +846,7 @@ pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero) return (0); } -void +static void pfr_remove_kentries(struct pfr_ktable *kt, struct pfr_kentryworkq *workq) { @@ -1029,7 +861,7 @@ pfr_remove_kentries(struct pfr_ktable *kt, pfr_destroy_kentries(workq); } -void +static void pfr_clean_node_mask(struct pfr_ktable *kt, struct pfr_kentryworkq *workq) { @@ -1039,45 +871,33 @@ pfr_clean_node_mask(struct pfr_ktable *kt, pfr_unroute_kentry(kt, p); } -void +static void pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange) { struct pfr_kentry *p; - int s; SLIST_FOREACH(p, workq, pfrke_workq) { - s = splsoftnet(); if (negchange) p->pfrke_not = !p->pfrke_not; if (p->pfrke_counters) { -#ifdef __FreeBSD__ - pool_put(&V_pfr_kcounters_pl, p->pfrke_counters); -#else - pool_put(&pfr_kcounters_pl, p->pfrke_counters); -#endif + uma_zfree(V_pfr_kcounters_z, p->pfrke_counters); p->pfrke_counters = NULL; } - splx(s); p->pfrke_tzero = tzero; } } -void -pfr_reset_feedback(struct pfr_addr *addr, int size, int flags) +static void +pfr_reset_feedback(struct pfr_addr *addr, int size) { - struct pfr_addr ad; + struct pfr_addr *ad; int i; - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - break; - ad.pfra_fback = PFR_FB_NONE; - if (COPYOUT(&ad, addr+i, sizeof(ad), flags)) - break; - } + for (i = 0, ad = addr; i < size; i++, ad++) + ad->pfra_fback = PFR_FB_NONE; } -void +static void pfr_prepare_network(union sockaddr_union *sa, int af, int net) { int i; @@ -1102,17 +922,12 @@ pfr_prepare_network(union sockaddr_union *sa, int af, int net) } } -int +static int pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; -#ifdef __FreeBSD__ struct radix_node_head *head = NULL; -#else - struct radix_node_head *head; -#endif - int s; bzero(ke->pfrke_node, sizeof(ke->pfrke_node)); if (ke->pfrke_af == AF_INET) @@ -1120,63 +935,32 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) else if (ke->pfrke_af == AF_INET6) head = kt->pfrkt_ip6; - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#endif if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); -#ifdef __FreeBSD__ rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node); -#else - rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node, 0); -#endif } else -#ifdef __FreeBSD__ rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node); -#else - rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node, 0); -#endif - splx(s); return (rn == NULL ? -1 : 0); } -int +static int pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; -#ifdef __FreeBSD__ struct radix_node_head *head = NULL; -#else - struct radix_node_head *head; -#endif - int s; if (ke->pfrke_af == AF_INET) head = kt->pfrkt_ip4; else if (ke->pfrke_af == AF_INET6) head = kt->pfrkt_ip6; - s = splsoftnet(); -#ifdef __FreeBSD__ - PF_LOCK_ASSERT(); -#endif if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); -#ifdef __FreeBSD__ rn = rn_delete(&ke->pfrke_sa, &mask, head); -#else - rn = rn_delete(&ke->pfrke_sa, &mask, head, NULL); -#endif } else -#ifdef __FreeBSD__ rn = rn_delete(&ke->pfrke_sa, NULL, head); -#else - rn = rn_delete(&ke->pfrke_sa, NULL, head, NULL); -#endif - splx(s); if (rn == NULL) { printf("pfr_unroute_kentry: delete failed.\n"); @@ -1185,7 +969,7 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) return (0); } -void +static void pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) { bzero(ad, sizeof(*ad)); @@ -1200,12 +984,11 @@ pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr; } -int +static int pfr_walktree(struct radix_node *rn, void *arg) { struct pfr_kentry *ke = (struct pfr_kentry *)rn; struct pfr_walktree *w = arg; - int s, flags = w->pfrw_flags; switch (w->pfrw_op) { case PFRW_MARK: @@ -1221,11 +1004,7 @@ pfr_walktree(struct radix_node *rn, void *arg) break; case PFRW_GET_ADDRS: if (w->pfrw_free-- > 0) { - struct pfr_addr ad; - - pfr_copyout_addr(&ad, ke); - if (copyout(&ad, w->pfrw_addr, sizeof(ad))) - return (EFAULT); + pfr_copyout_addr(w->pfrw_addr, ke); w->pfrw_addr++; } break; @@ -1235,7 +1014,6 @@ pfr_walktree(struct radix_node *rn, void *arg) pfr_copyout_addr(&as.pfras_a, ke); - s = splsoftnet(); if (ke->pfrke_counters) { bcopy(ke->pfrke_counters->pfrkc_packets, as.pfras_packets, sizeof(as.pfras_packets)); @@ -1246,11 +1024,9 @@ pfr_walktree(struct radix_node *rn, void *arg) bzero(as.pfras_bytes, sizeof(as.pfras_bytes)); as.pfras_a.pfra_fback = PFR_FB_NOCOUNT; } - splx(s); as.pfras_tzero = ke->pfrke_tzero; - if (COPYOUT(&as, w->pfrw_astats, sizeof(as), flags)) - return (EFAULT); + bcopy(&as, w->pfrw_astats, sizeof(as)); w->pfrw_astats++; } break; @@ -1263,40 +1039,28 @@ pfr_walktree(struct radix_node *rn, void *arg) } break; case PFRW_DYNADDR_UPDATE: + { + union sockaddr_union pfr_mask; + if (ke->pfrke_af == AF_INET) { if (w->pfrw_dyn->pfid_acnt4++ > 0) break; -#ifdef __FreeBSD__ - pfr_prepare_network(&V_pfr_mask, AF_INET, ke->pfrke_net); -#else pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net); -#endif - w->pfrw_dyn->pfid_addr4 = *SUNION2PF( - &ke->pfrke_sa, AF_INET); - w->pfrw_dyn->pfid_mask4 = *SUNION2PF( -#ifdef __FreeBSD__ - &V_pfr_mask, AF_INET); -#else - &pfr_mask, AF_INET); -#endif + w->pfrw_dyn->pfid_addr4 = *SUNION2PF(&ke->pfrke_sa, + AF_INET); + w->pfrw_dyn->pfid_mask4 = *SUNION2PF(&pfr_mask, + AF_INET); } else if (ke->pfrke_af == AF_INET6){ if (w->pfrw_dyn->pfid_acnt6++ > 0) break; -#ifdef __FreeBSD__ - pfr_prepare_network(&V_pfr_mask, AF_INET6, ke->pfrke_net); -#else pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net); -#endif - w->pfrw_dyn->pfid_addr6 = *SUNION2PF( - &ke->pfrke_sa, AF_INET6); - w->pfrw_dyn->pfid_mask6 = *SUNION2PF( -#ifdef __FreeBSD__ - &V_pfr_mask, AF_INET6); -#else - &pfr_mask, AF_INET6); -#endif + w->pfrw_dyn->pfid_addr6 = *SUNION2PF(&ke->pfrke_sa, + AF_INET6); + w->pfrw_dyn->pfid_mask6 = *SUNION2PF(&pfr_mask, + AF_INET6); } break; + } } return (0); } @@ -1306,10 +1070,9 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p; - int s, xdel = 0; + int xdel = 0; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_ALLRSETS); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); if (pfr_table_count(filter, flags) < 0) @@ -1327,13 +1090,8 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); xdel++; } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_setflags_ktables(&workq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (ndel != NULL) *ndel = xdel; return (0); @@ -1344,23 +1102,21 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) { struct pfr_ktableworkq addq, changeq; struct pfr_ktable *p, *q, *r, key; - int i, rv, s, xadd = 0; + int i, rv, xadd = 0; long tzero = time_second; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); SLIST_INIT(&addq); SLIST_INIT(&changeq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) - senderr(EFAULT); + bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)); if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK, flags & PFR_FLAG_USERIOCTL)) senderr(EINVAL); key.pfrkt_flags |= PFR_TFLAG_ACTIVE; p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (p == NULL) { - p = pfr_create_ktable(&key.pfrkt_t, tzero, 1, - !(flags & PFR_FLAG_USERIOCTL)); + p = pfr_create_ktable(&key.pfrkt_t, tzero, 1); if (p == NULL) senderr(ENOMEM); SLIST_FOREACH(q, &addq, pfrkt_workq) { @@ -1386,8 +1142,7 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) } } key.pfrkt_flags = 0; - r = pfr_create_ktable(&key.pfrkt_t, 0, 1, - !(flags & PFR_FLAG_USERIOCTL)); + r = pfr_create_ktable(&key.pfrkt_t, 0, 1); if (r == NULL) senderr(ENOMEM); SLIST_INSERT_HEAD(&addq, r, pfrkt_workq); @@ -1405,12 +1160,8 @@ _skip: ; } if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); pfr_insert_ktables(&addq); pfr_setflags_ktables(&changeq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); } else pfr_destroy_ktables(&addq, 0); if (nadd != NULL) @@ -1426,13 +1177,12 @@ pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, *q, key; - int i, s, xdel = 0; + int i, xdel = 0; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) - return (EFAULT); + bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)); if (pfr_validate_table(&key.pfrkt_t, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); @@ -1449,13 +1199,8 @@ _skip: ; } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_setflags_ktables(&workq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (ndel != NULL) *ndel = xdel; return (0); @@ -1468,6 +1213,8 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, struct pfr_ktable *p; int n, nn; + PF_RULES_RASSERT(); + ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); @@ -1483,13 +1230,11 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, continue; if (n-- <= 0) continue; - if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl), flags)) - return (EFAULT); - } - if (n) { - printf("pfr_get_tables: corruption detected (%d).\n", n); - return (ENOTTY); + bcopy(&p->pfrkt_t, tbl++, sizeof(*tbl)); } + + KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n)); + *size = nn; return (0); } @@ -1500,11 +1245,11 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, { struct pfr_ktable *p; struct pfr_ktableworkq workq; - int s, n, nn; + int n, nn; long tzero = time_second; /* XXX PFR_FLAG_CLSTATS disabled */ - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_ALLRSETS); + ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); n = nn = pfr_table_count(filter, flags); @@ -1515,32 +1260,20 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, return (0); } SLIST_INIT(&workq); - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { if (pfr_skip_table(filter, p, flags)) continue; if (n-- <= 0) continue; - if (!(flags & PFR_FLAG_ATOMIC)) - s = splsoftnet(); - if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl), flags)) { - splx(s); - return (EFAULT); - } - if (!(flags & PFR_FLAG_ATOMIC)) - splx(s); + bcopy(&p->pfrkt_ts, tbl++, sizeof(*tbl)); SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); } if (flags & PFR_FLAG_CLSTATS) pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - if (n) { - printf("pfr_get_tstats: corruption detected (%d).\n", n); - return (ENOTTY); - } + + KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n)); + *size = nn; return (0); } @@ -1550,15 +1283,13 @@ pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, key; - int i, s, xzero = 0; + int i, xzero = 0; long tzero = time_second; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | - PFR_FLAG_ADDRSTOO); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) - return (EFAULT); + bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t)); if (pfr_validate_table(&key.pfrkt_t, 0, 0)) return (EINVAL); p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); @@ -1567,13 +1298,8 @@ pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) xzero++; } } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (nzero != NULL) *nzero = xzero; return (0); @@ -1585,17 +1311,16 @@ pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag, { struct pfr_ktableworkq workq; struct pfr_ktable *p, *q, key; - int i, s, xchange = 0, xdel = 0; + int i, xchange = 0, xdel = 0; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); if ((setflag & ~PFR_TFLAG_USRMASK) || (clrflag & ~PFR_TFLAG_USRMASK) || (setflag & clrflag)) return (EINVAL); SLIST_INIT(&workq); for (i = 0; i < size; i++) { - if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) - return (EFAULT); + bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t)); if (pfr_validate_table(&key.pfrkt_t, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); @@ -1619,13 +1344,8 @@ pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag, _skip: ; } - if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); + if (!(flags & PFR_FLAG_DUMMY)) pfr_setflags_ktables(&workq); - if (flags & PFR_FLAG_ATOMIC) - splx(s); - } if (nchange != NULL) *nchange = xchange; if (ndel != NULL) @@ -1674,10 +1394,12 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq addrq; struct pfr_ktable *kt, *rt, *shadow, key; struct pfr_kentry *p; - struct pfr_addr ad; + struct pfr_addr *ad; struct pf_ruleset *rs; int i, rv, xadd = 0, xaddr = 0; + PF_RULES_WASSERT(); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO); if (size && !(flags & PFR_FLAG_ADDRSTOO)) return (EINVAL); @@ -1691,8 +1413,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, SLIST_INIT(&tableq); kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl); if (kt == NULL) { - kt = pfr_create_ktable(tbl, 0, 1, - !(flags & PFR_FLAG_USERIOCTL)); + kt = pfr_create_ktable(tbl, 0, 1); if (kt == NULL) return (ENOMEM); SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq); @@ -1708,8 +1429,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, kt->pfrkt_root = rt; goto _skip; } - rt = pfr_create_ktable(&key.pfrkt_t, 0, 1, - !(flags & PFR_FLAG_USERIOCTL)); + rt = pfr_create_ktable(&key.pfrkt_t, 0, 1); if (rt == NULL) { pfr_destroy_ktables(&tableq, 0); return (ENOMEM); @@ -1719,20 +1439,18 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, } else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE)) xadd++; _skip: - shadow = pfr_create_ktable(tbl, 0, 0, !(flags & PFR_FLAG_USERIOCTL)); + shadow = pfr_create_ktable(tbl, 0, 0); if (shadow == NULL) { pfr_destroy_ktables(&tableq, 0); return (ENOMEM); } SLIST_INIT(&addrq); - for (i = 0; i < size; i++) { - if (COPYIN(addr+i, &ad, sizeof(ad), flags)) - senderr(EFAULT); - if (pfr_validate_addr(&ad)) + for (i = 0, ad = addr; i < size; i++, ad++) { + if (pfr_validate_addr(ad)) senderr(EINVAL); - if (pfr_lookup_addr(shadow, &ad, 1) != NULL) + if (pfr_lookup_addr(shadow, ad, 1) != NULL) continue; - p = pfr_create_kentry(&ad, 0); + p = pfr_create_kentry(ad); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(shadow, p)) { @@ -1776,6 +1494,8 @@ pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags) struct pf_ruleset *rs; int xdel = 0; + PF_RULES_WASSERT(); + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) @@ -1806,10 +1526,12 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, struct pfr_ktable *p, *q; struct pfr_ktableworkq workq; struct pf_ruleset *rs; - int s, xadd = 0, xchange = 0; + int xadd = 0, xchange = 0; long tzero = time_second; - ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + PF_RULES_WASSERT(); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (EBUSY); @@ -1827,14 +1549,10 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, } if (!(flags & PFR_FLAG_DUMMY)) { - if (flags & PFR_FLAG_ATOMIC) - s = splsoftnet(); for (p = SLIST_FIRST(&workq); p != NULL; p = q) { q = SLIST_NEXT(p, pfrkt_workq); pfr_commit_ktable(p, tzero); } - if (flags & PFR_FLAG_ATOMIC) - splx(s); rs->topen = 0; pf_remove_if_empty_ruleset(rs); } @@ -1846,12 +1564,14 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, return (0); } -void +static void pfr_commit_ktable(struct pfr_ktable *kt, long tzero) { struct pfr_ktable *shadow = kt->pfrkt_shadow; int nflags; + PF_RULES_WASSERT(); + if (shadow->pfrkt_cnt == NO_ADDRESSES) { if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) pfr_clstats_ktable(kt, tzero, 1); @@ -1905,7 +1625,7 @@ pfr_commit_ktable(struct pfr_ktable *kt, long tzero) pfr_setflags_ktable(kt, nflags); } -int +static int pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved) { int i; @@ -1930,7 +1650,7 @@ pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved) * Rewrite anchors referenced by tables to remove slashes * and check for validity. */ -int +static int pfr_fix_anchor(char *anchor) { size_t siz = MAXPATHLEN; @@ -1955,11 +1675,13 @@ pfr_fix_anchor(char *anchor) return (0); } -int +static int pfr_table_count(struct pfr_table *filter, int flags) { struct pf_ruleset *rs; + PF_RULES_ASSERT(); + if (flags & PFR_FLAG_ALLRSETS) return (pfr_ktable_cnt); if (filter->pfrt_anchor[0]) { @@ -1969,7 +1691,7 @@ pfr_table_count(struct pfr_table *filter, int flags) return (pf_main_ruleset.tables); } -int +static int pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags) { if (flags & PFR_FLAG_ALLRSETS) @@ -1979,7 +1701,7 @@ pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags) return (0); } -void +static void pfr_insert_ktables(struct pfr_ktableworkq *workq) { struct pfr_ktable *p; @@ -1988,9 +1710,12 @@ pfr_insert_ktables(struct pfr_ktableworkq *workq) pfr_insert_ktable(p); } -void +static void pfr_insert_ktable(struct pfr_ktable *kt) { + + PF_RULES_WASSERT(); + RB_INSERT(pfr_ktablehead, &pfr_ktables, kt); pfr_ktable_cnt++; if (kt->pfrkt_root != NULL) @@ -1999,7 +1724,7 @@ pfr_insert_ktable(struct pfr_ktable *kt) kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR); } -void +static void pfr_setflags_ktables(struct pfr_ktableworkq *workq) { struct pfr_ktable *p, *q; @@ -2010,11 +1735,13 @@ pfr_setflags_ktables(struct pfr_ktableworkq *workq) } } -void +static void pfr_setflags_ktable(struct pfr_ktable *kt, int newf) { struct pfr_kentryworkq addrq; + PF_RULES_WASSERT(); + if (!(newf & PFR_TFLAG_REFERENCED) && !(newf & PFR_TFLAG_PERSIST)) newf &= ~PFR_TFLAG_ACTIVE; @@ -2042,7 +1769,7 @@ pfr_setflags_ktable(struct pfr_ktable *kt, int newf) kt->pfrkt_flags = newf; } -void +static void pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse) { struct pfr_ktable *p; @@ -2051,39 +1778,30 @@ pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse) pfr_clstats_ktable(p, tzero, recurse); } -void +static void pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse) { struct pfr_kentryworkq addrq; - int s; if (recurse) { pfr_enqueue_addrs(kt, &addrq, NULL, 0); pfr_clstats_kentries(&addrq, tzero, 0); } - s = splsoftnet(); bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets)); bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes)); kt->pfrkt_match = kt->pfrkt_nomatch = 0; - splx(s); kt->pfrkt_tzero = tzero; } -struct pfr_ktable * -pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset, - int intr) +static struct pfr_ktable * +pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) { struct pfr_ktable *kt; struct pf_ruleset *rs; -#ifdef __FreeBSD__ - kt = pool_get(&V_pfr_ktable_pl, PR_NOWAIT|PR_ZERO); -#else - if (intr) - kt = pool_get(&pfr_ktable_pl, PR_NOWAIT|PR_ZERO|PR_LIMITFAIL); - else - kt = pool_get(&pfr_ktable_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL); -#endif + PF_RULES_WASSERT(); + + kt = malloc(sizeof(*kt), M_PFTABLE, M_NOWAIT|M_ZERO); if (kt == NULL) return (NULL); kt->pfrkt_t = *tbl; @@ -2110,7 +1828,7 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset, return (kt); } -void +static void pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) { struct pfr_ktable *p, *q; @@ -2121,7 +1839,7 @@ pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) } } -void +static void pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) { struct pfr_kentryworkq addrq; @@ -2131,7 +1849,6 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) pfr_clean_node_mask(kt, &addrq); pfr_destroy_kentries(&addrq); } -#if defined(__FreeBSD__) && (__FreeBSD_version >= 500100) if (kt->pfrkt_ip4 != NULL) { RADIX_NODE_HEAD_DESTROY(kt->pfrkt_ip4); free((caddr_t)kt->pfrkt_ip4, M_RTABLE); @@ -2140,26 +1857,16 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) RADIX_NODE_HEAD_DESTROY(kt->pfrkt_ip6); free((caddr_t)kt->pfrkt_ip6, M_RTABLE); } -#else - if (kt->pfrkt_ip4 != NULL) - free((caddr_t)kt->pfrkt_ip4, M_RTABLE); - if (kt->pfrkt_ip6 != NULL) - free((caddr_t)kt->pfrkt_ip6, M_RTABLE); -#endif if (kt->pfrkt_shadow != NULL) pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr); if (kt->pfrkt_rs != NULL) { kt->pfrkt_rs->tables--; pf_remove_if_empty_ruleset(kt->pfrkt_rs); } -#ifdef __FreeBSD__ - pool_put(&V_pfr_ktable_pl, kt); -#else - pool_put(&pfr_ktable_pl, kt); -#endif + free(kt, M_PFTABLE); } -int +static int pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q) { int d; @@ -2169,7 +1876,7 @@ pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q) return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor)); } -struct pfr_ktable * +static struct pfr_ktable * pfr_lookup_table(struct pfr_table *tbl) { /* struct pfr_ktable start like a struct pfr_table */ @@ -2183,6 +1890,8 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) struct pfr_kentry *ke = NULL; int match; + PF_RULES_RASSERT(); + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -2191,29 +1900,33 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) switch (af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ - V_pfr_sin.sin_addr.s_addr = a->addr32[0]; - ke = (struct pfr_kentry *)rn_match(&V_pfr_sin, kt->pfrkt_ip4); -#else - pfr_sin.sin_addr.s_addr = a->addr32[0]; - ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); -#endif + { + struct sockaddr_in sin; + + bzero(&sin, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&sin, kt->pfrkt_ip4); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; + } #endif /* INET */ #ifdef INET6 case AF_INET6: -#ifdef __FreeBSD__ - bcopy(a, &V_pfr_sin6.sin6_addr, sizeof(V_pfr_sin6.sin6_addr)); - ke = (struct pfr_kentry *)rn_match(&V_pfr_sin6, kt->pfrkt_ip6); -#else - bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); - ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); -#endif + { + struct sockaddr_in6 sin6; + + bzero(&sin6, sizeof(sin6)); + sin6.sin6_len = sizeof(sin6); + sin6.sin6_family = AF_INET6; + bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&sin6, kt->pfrkt_ip6); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; + } #endif /* INET6 */ } match = (ke && !ke->pfrke_not); @@ -2238,29 +1951,31 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, switch (af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ - V_pfr_sin.sin_addr.s_addr = a->addr32[0]; - ke = (struct pfr_kentry *)rn_match(&V_pfr_sin, kt->pfrkt_ip4); -#else - pfr_sin.sin_addr.s_addr = a->addr32[0]; - ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); -#endif + { + struct sockaddr_in sin; + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&sin, kt->pfrkt_ip4); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; + } #endif /* INET */ #ifdef INET6 case AF_INET6: -#ifdef __FreeBSD__ - bcopy(a, &V_pfr_sin6.sin6_addr, sizeof(V_pfr_sin6.sin6_addr)); - ke = (struct pfr_kentry *)rn_match(&V_pfr_sin6, kt->pfrkt_ip6); -#else - bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); - ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); -#endif + { + struct sockaddr_in6 sin6; + + sin6.sin6_len = sizeof(sin6); + sin6.sin6_family = AF_INET6; + bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&sin6, kt->pfrkt_ip6); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; + } #endif /* INET6 */ default: ; @@ -2275,12 +1990,8 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, if (ke != NULL && op_pass != PFR_OP_XPASS && (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) { if (ke->pfrke_counters == NULL) -#ifdef __FreeBSD__ - ke->pfrke_counters = pool_get(&V_pfr_kcounters_pl, -#else - ke->pfrke_counters = pool_get(&pfr_kcounters_pl, -#endif - PR_NOWAIT | PR_ZERO); + ke->pfrke_counters = uma_zalloc(V_pfr_kcounters_z, + M_NOWAIT | M_ZERO); if (ke->pfrke_counters != NULL) { ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++; ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len; @@ -2289,26 +2000,28 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, } struct pfr_ktable * -pfr_attach_table(struct pf_ruleset *rs, char *name, int intr) +pfr_attach_table(struct pf_ruleset *rs, char *name) { struct pfr_ktable *kt, *rt; struct pfr_table tbl; struct pf_anchor *ac = rs->anchor; + PF_RULES_WASSERT(); + bzero(&tbl, sizeof(tbl)); strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name)); if (ac != NULL) strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor)); kt = pfr_lookup_table(&tbl); if (kt == NULL) { - kt = pfr_create_ktable(&tbl, time_second, 1, intr); + kt = pfr_create_ktable(&tbl, time_second, 1); if (kt == NULL) return (NULL); if (ac != NULL) { bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor)); rt = pfr_lookup_table(&tbl); if (rt == NULL) { - rt = pfr_create_ktable(&tbl, 0, 1, intr); + rt = pfr_create_ktable(&tbl, 0, 1); if (rt == NULL) { pfr_destroy_ktable(kt, 0); return (NULL); @@ -2327,38 +2040,36 @@ pfr_attach_table(struct pf_ruleset *rs, char *name, int intr) void pfr_detach_table(struct pfr_ktable *kt) { - if (kt->pfrkt_refcnt[PFR_REFCNT_RULE] <= 0) - printf("pfr_detach_table: refcount = %d.\n", - kt->pfrkt_refcnt[PFR_REFCNT_RULE]); - else if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE]) + + PF_RULES_WASSERT(); + KASSERT(kt->pfrkt_refcnt[PFR_REFCNT_RULE] > 0, ("%s: refcount %d\n", + __func__, kt->pfrkt_refcnt[PFR_REFCNT_RULE])); + + if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE]) pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED); } int pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, - struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af) + sa_family_t af) { -#ifdef __FreeBSD__ + struct pf_addr *addr, *cur, *mask; + union sockaddr_union uaddr, umask; struct pfr_kentry *ke, *ke2 = NULL; - struct pf_addr *addr = NULL; -#else - struct pfr_kentry *ke, *ke2; - struct pf_addr *addr; -#endif - union sockaddr_union mask; int idx = -1, use_counter = 0; -#ifdef __FreeBSD__ - if (af == AF_INET) - addr = (struct pf_addr *)&V_pfr_sin.sin_addr; - else if (af == AF_INET6) - addr = (struct pf_addr *)&V_pfr_sin6.sin6_addr; -#else - if (af == AF_INET) - addr = (struct pf_addr *)&pfr_sin.sin_addr; - else if (af == AF_INET6) - addr = (struct pf_addr *)&pfr_sin6.sin6_addr; -#endif + switch (af) { + case AF_INET: + uaddr.sin.sin_len = sizeof(struct sockaddr_in); + uaddr.sin.sin_family = AF_INET; + break; + case AF_INET6: + uaddr.sin6.sin6_len = sizeof(struct sockaddr_in6); + uaddr.sin6.sin6_family = AF_INET6; + break; + } + addr = SUNION2PF(&uaddr, af); + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -2377,21 +2088,13 @@ _next_block: kt->pfrkt_nomatch++; return (1); } -#ifdef __FreeBSD__ - pfr_prepare_network(&V_pfr_mask, af, ke->pfrke_net); -#else - pfr_prepare_network(&pfr_mask, af, ke->pfrke_net); -#endif - *raddr = SUNION2PF(&ke->pfrke_sa, af); -#ifdef __FreeBSD__ - *rmask = SUNION2PF(&V_pfr_mask, af); -#else - *rmask = SUNION2PF(&pfr_mask, af); -#endif + pfr_prepare_network(&umask, af, ke->pfrke_net); + cur = SUNION2PF(&ke->pfrke_sa, af); + mask = SUNION2PF(&umask, af); if (use_counter) { /* is supplied address within block? */ - if (!PF_MATCHA(0, *raddr, *rmask, counter, af)) { + if (!PF_MATCHA(0, cur, mask, counter, af)) { /* no, go to next block in table */ idx++; use_counter = 0; @@ -2400,7 +2103,7 @@ _next_block: PF_ACPY(addr, counter, af); } else { /* use first address of block */ - PF_ACPY(addr, *raddr, af); + PF_ACPY(addr, cur, af); } if (!KENTRY_NETWORK(ke)) { @@ -2412,21 +2115,16 @@ _next_block: } for (;;) { /* we don't want to use a nested block */ -#ifdef __FreeBSD__ - if (af == AF_INET) - ke2 = (struct pfr_kentry *)rn_match(&V_pfr_sin, + switch (af) { + case AF_INET: + ke2 = (struct pfr_kentry *)rn_match(&uaddr, kt->pfrkt_ip4); - else if (af == AF_INET6) - ke2 = (struct pfr_kentry *)rn_match(&V_pfr_sin6, - kt->pfrkt_ip6); -#else - if (af == AF_INET) - ke2 = (struct pfr_kentry *)rn_match(&pfr_sin, - kt->pfrkt_ip4); - else if (af == AF_INET6) - ke2 = (struct pfr_kentry *)rn_match(&pfr_sin6, + break; + case AF_INET6: + ke2 = (struct pfr_kentry *)rn_match(&uaddr, kt->pfrkt_ip6); -#endif + break; + } /* no need to check KENTRY_RNF_ROOT() here */ if (ke2 == ke) { /* lookup return the same block - perfect */ @@ -2437,14 +2135,10 @@ _next_block: } /* we need to increase the counter past the nested block */ - pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net); -#ifdef __FreeBSD__ - PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &V_pfr_ffaddr, af); -#else - PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af); -#endif + pfr_prepare_network(&umask, AF_INET, ke2->pfrke_net); + PF_POOLMASK(addr, addr, SUNION2PF(&umask, af), &pfr_ffaddr, af); PF_AINC(addr, af); - if (!PF_MATCHA(0, *raddr, *rmask, addr, af)) { + if (!PF_MATCHA(0, cur, mask, addr, af)) { /* ok, we reached the end of our main block */ /* go to next block in table */ idx++; @@ -2454,7 +2148,7 @@ _next_block: } } -struct pfr_kentry * +static struct pfr_kentry * pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af) { struct pfr_walktree w; @@ -2466,20 +2160,12 @@ pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af) switch (af) { #ifdef INET case AF_INET: -#ifdef __FreeBSD__ kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#else - rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#endif return (w.pfrw_kentry); #endif /* INET */ #ifdef INET6 case AF_INET6: -#ifdef __FreeBSD__ kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#else - rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#endif return (w.pfrw_kentry); #endif /* INET6 */ default: @@ -2491,26 +2177,15 @@ void pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn) { struct pfr_walktree w; - int s; bzero(&w, sizeof(w)); w.pfrw_op = PFRW_DYNADDR_UPDATE; w.pfrw_dyn = dyn; - s = splsoftnet(); dyn->pfid_acnt4 = 0; dyn->pfid_acnt6 = 0; if (!dyn->pfid_af || dyn->pfid_af == AF_INET) -#ifdef __FreeBSD__ kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#else - rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); -#endif if (!dyn->pfid_af || dyn->pfid_af == AF_INET6) -#ifdef __FreeBSD__ kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#else - rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); -#endif - splx(s); } diff --git a/sys/contrib/pf/net/pfvar.h b/sys/contrib/pf/net/pfvar.h index dab70c5..ca4e449 100644 --- a/sys/contrib/pf/net/pfvar.h +++ b/sys/contrib/pf/net/pfvar.h @@ -34,37 +34,14 @@ #define _NET_PFVAR_H_ #include <sys/param.h> -#include <sys/types.h> #include <sys/queue.h> +#include <sys/refcount.h> #include <sys/tree.h> -#ifdef __FreeBSD__ -#include <sys/lock.h> -#include <sys/sx.h> -#else -#include <sys/rwlock.h> -#endif #include <net/radix.h> -#include <net/route.h> -#ifdef __FreeBSD__ -#include <net/if_clone.h> -#include <net/pf_mtag.h> -#include <vm/uma.h> -#else -#include <netinet/ip_ipsp.h> -#endif - -#ifdef __FreeBSD__ #include <netinet/in.h> -#endif - -#include <netinet/tcp_fsm.h> -struct ip; -struct ip6_hdr; -#ifdef __FreeBSD__ -struct inpcb; -#endif +#include <net/pf_mtag.h> #define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0) #define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1) @@ -126,12 +103,12 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, - PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; + PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; #define PF_POOL_IDMASK 0x0f enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, - PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED, + PF_ADDR_TABLE, PF_ADDR_URPFFAILED, PF_ADDR_RANGE }; #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 @@ -173,11 +150,6 @@ struct pf_addr_wrap { } a; char ifname[IFNAMSIZ]; char tblname[PF_TABLE_NAME_SIZE]; -#ifdef __FreeBSD__ -#define RTLABEL_LEN 32 -#endif - char rtlabelname[RTLABEL_LEN]; - u_int32_t rtlabel; } v; union { struct pfi_dynaddr *dyn; @@ -199,7 +171,6 @@ struct pfi_dynaddr { struct pf_addr pfid_mask6; struct pfr_ktable *pfid_kt; struct pfi_kif *pfid_kif; - void *pfid_hook_cookie; int pfid_net; /* mask or 128 */ int pfid_acnt4; /* address count IPv4 */ int pfid_acnt6; /* address count IPv6 */ @@ -210,10 +181,6 @@ struct pfi_dynaddr { /* * Address manipulation macros */ - -#ifdef __FreeBSD__ -#define splsoftnet() splnet() - #define HTONL(x) (x) = htonl((__uint32_t)(x)) #define HTONS(x) (x) = htons((__uint16_t)(x)) #define NTOHL(x) (x) = ntohl((__uint32_t)(x)) @@ -221,46 +188,44 @@ struct pfi_dynaddr { #define PF_NAME "pf" -#define PR_NOWAIT M_NOWAIT -#define PR_WAITOK M_WAITOK -#define PR_ZERO M_ZERO -#define pool_get(p, f) uma_zalloc(*(p), (f)) -#define pool_put(p, o) uma_zfree(*(p), (o)) - -#define UMA_CREATE(var, type, desc) \ - var = uma_zcreate(desc, sizeof(type), \ - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); \ - if (var == NULL) \ - break -#define UMA_DESTROY(var) \ - if (var) \ - uma_zdestroy(var) - -#ifdef __FreeBSD__ -extern struct mtx pf_task_mtx; - -#define PF_LOCK_ASSERT() mtx_assert(&pf_task_mtx, MA_OWNED) -#define PF_UNLOCK_ASSERT() mtx_assert(&pf_task_mtx, MA_NOTOWNED) -#define PF_LOCK() mtx_lock(&pf_task_mtx) -#define PF_UNLOCK() mtx_unlock(&pf_task_mtx) -#else -#define PF_LOCK_ASSERT() -#define PF_UNLOCK_ASSERT() -#define PF_LOCK() -#define PF_UNLOCK() -#endif /* __FreeBSD__ */ - -#define PF_COPYIN(uaddr, kaddr, len, r) do { \ - PF_UNLOCK(); \ - r = copyin((uaddr), (kaddr), (len)); \ - PF_LOCK(); \ -} while(0) +#define PF_HASHROW_ASSERT(h) mtx_assert(&(h)->lock, MA_OWNED) +#define PF_HASHROW_LOCK(h) mtx_lock(&(h)->lock) +#define PF_HASHROW_UNLOCK(h) mtx_unlock(&(h)->lock) -#define PF_COPYOUT(kaddr, uaddr, len, r) do { \ - PF_UNLOCK(); \ - r = copyout((kaddr), (uaddr), (len)); \ - PF_LOCK(); \ -} while(0) +#define PF_STATE_LOCK(s) \ + do { \ + struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \ + PF_HASHROW_LOCK(_ih); \ + } while (0) + +#define PF_STATE_UNLOCK(s) \ + do { \ + struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH((s))]; \ + PF_HASHROW_UNLOCK(_ih); \ + } while (0) + +#ifdef INVARIANTS +#define PF_STATE_LOCK_ASSERT(s) \ + do { \ + struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \ + PF_HASHROW_ASSERT(_ih); \ + } while (0) +#else /* !INVARIANTS */ +#define PF_STATE_LOCK_ASSERT(s) do {} while (0) +#endif /* INVARIANTS */ + +extern struct mtx pf_unlnkdrules_mtx; +#define PF_UNLNKDRULES_LOCK() mtx_lock(&pf_unlnkdrules_mtx) +#define PF_UNLNKDRULES_UNLOCK() mtx_unlock(&pf_unlnkdrules_mtx) + +extern struct rwlock pf_rules_lock; +#define PF_RULES_RLOCK() rw_rlock(&pf_rules_lock) +#define PF_RULES_RUNLOCK() rw_runlock(&pf_rules_lock) +#define PF_RULES_WLOCK() rw_wlock(&pf_rules_lock) +#define PF_RULES_WUNLOCK() rw_wunlock(&pf_rules_lock) +#define PF_RULES_ASSERT() rw_assert(&pf_rules_lock, RA_LOCKED) +#define PF_RULES_RASSERT() rw_assert(&pf_rules_lock, RA_RLOCKED) +#define PF_RULES_WASSERT() rw_assert(&pf_rules_lock, RA_WLOCKED) #define PF_MODVER 1 #define PFLOG_MODVER 1 @@ -272,7 +237,7 @@ extern struct mtx pf_task_mtx; #define PFSYNC_MINVER 1 #define PFSYNC_PREFVER PFSYNC_MODVER #define PFSYNC_MAXVER 1 -#endif /* __FreeBSD__ */ + #ifdef INET #ifndef INET6 #define PF_INET_ONLY @@ -412,8 +377,6 @@ extern struct mtx pf_task_mtx; pf_routable((x), (af), NULL, (rtid))) || \ (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ pf_routable((x), (af), (ifp), (rtid))) || \ - ((aw)->type == PF_ADDR_RTLABEL && \ - !pf_rtlabel_match((x), (af), (aw), (rtid))) || \ ((aw)->type == PF_ADDR_TABLE && \ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ ((aw)->type == PF_ADDR_DYNIFTL && \ @@ -473,7 +436,6 @@ struct pf_pool { struct pf_addr counter; int tblidx; u_int16_t proxy_port[2]; - u_int8_t port_op; u_int8_t opts; }; @@ -705,6 +667,7 @@ struct pf_rule { #define PFRULE_NOSYNC 0x0010 #define PFRULE_SRCTRACK 0x0020 /* track source states */ #define PFRULE_RULESRCTRACK 0x0040 /* per rule */ +#define PFRULE_REFS 0x0080 /* rule has references */ /* scrub flags */ #define PFRULE_NODF 0x0100 @@ -717,7 +680,6 @@ struct pf_rule { /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ #define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */ -#define PFRULE_PFLOW 0x00040000 #define PFSTATE_HIWAT 10000 /* default state table size */ #define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ @@ -734,7 +696,7 @@ struct pf_threshold { }; struct pf_src_node { - RB_ENTRY(pf_src_node) entry; + LIST_ENTRY(pf_src_node) entry; struct pf_addr addr; struct pf_addr raddr; union pf_rule_ptr rule; @@ -787,9 +749,7 @@ struct pf_state_peer { u_int8_t pad[1]; }; -TAILQ_HEAD(pf_state_queue, pf_state); - -/* keep synced with struct pf_state_key, used in RB_FIND */ +/* Keep synced with struct pf_state_key. */ struct pf_state_key_cmp { struct pf_addr addr[2]; u_int16_t port[2]; @@ -798,13 +758,6 @@ struct pf_state_key_cmp { u_int8_t pad[2]; }; -struct pf_state_item { - TAILQ_ENTRY(pf_state_item) entry; - struct pf_state *s; -}; - -TAILQ_HEAD(pf_statelisthead, pf_state_item); - struct pf_state_key { struct pf_addr addr[2]; u_int16_t port[2]; @@ -812,13 +765,11 @@ struct pf_state_key { u_int8_t proto; u_int8_t pad[2]; - RB_ENTRY(pf_state_key) entry; - struct pf_statelisthead states; - struct pf_state_key *reverse; - struct inpcb *inp; + LIST_ENTRY(pf_state_key) entry; + TAILQ_HEAD(, pf_state) states[2]; }; -/* keep synced with struct pf_state, used in RB_FIND */ +/* Keep synced with struct pf_state. */ struct pf_state_cmp { u_int64_t id; u_int32_t creatorid; @@ -830,17 +781,12 @@ struct pf_state { u_int64_t id; u_int32_t creatorid; u_int8_t direction; -#ifdef __FreeBSD__ - u_int8_t pad[2]; - u_int8_t local_flags; -#define PFSTATE_EXPIRING 0x01 -#else u_int8_t pad[3]; -#endif + u_int refs; TAILQ_ENTRY(pf_state) sync_list; - TAILQ_ENTRY(pf_state) entry_list; - RB_ENTRY(pf_state) entry_id; + TAILQ_ENTRY(pf_state) key_list[2]; + LIST_ENTRY(pf_state) entry; struct pf_state_peer src; struct pf_state_peer dst; union pf_rule_ptr rule; @@ -862,7 +808,7 @@ struct pf_state { u_int8_t state_flags; #define PFSTATE_ALLOWOPTS 0x01 #define PFSTATE_SLOPPY 0x02 -#define PFSTATE_PFLOW 0x04 +/* was PFSTATE_PFLOW 0x04 */ #define PFSTATE_NOSYNC 0x08 #define PFSTATE_ACK 0x10 u_int8_t timeout; @@ -903,7 +849,7 @@ struct pfsync_state_key { }; struct pfsync_state { - u_int32_t id[2]; + u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; struct pfsync_state_peer src; @@ -920,11 +866,7 @@ struct pfsync_state { sa_family_t af; u_int8_t proto; u_int8_t direction; -#ifdef __FreeBSD__ - u_int8_t local_flags; -#define PFSTATE_EXPIRING 0x01 - u_int8_t pad; -#endif + u_int8_t __spare[2]; u_int8_t log; u_int8_t state_flags; u_int8_t timeout; @@ -932,7 +874,6 @@ struct pfsync_state { u_int8_t updates; } __packed; -#ifdef __FreeBSD__ #ifdef _KERNEL /* pfsync */ typedef int pfsync_state_import_t(struct pfsync_state *, u_int8_t); @@ -940,50 +881,28 @@ typedef void pfsync_insert_state_t(struct pf_state *); typedef void pfsync_update_state_t(struct pf_state *); typedef void pfsync_delete_state_t(struct pf_state *); typedef void pfsync_clear_states_t(u_int32_t, const char *); -typedef int pfsync_state_in_use_t(struct pf_state *); typedef int pfsync_defer_t(struct pf_state *, struct mbuf *); -typedef int pfsync_up_t(void); extern pfsync_state_import_t *pfsync_state_import_ptr; extern pfsync_insert_state_t *pfsync_insert_state_ptr; extern pfsync_update_state_t *pfsync_update_state_ptr; extern pfsync_delete_state_t *pfsync_delete_state_ptr; extern pfsync_clear_states_t *pfsync_clear_states_ptr; -extern pfsync_state_in_use_t *pfsync_state_in_use_ptr; extern pfsync_defer_t *pfsync_defer_ptr; -extern pfsync_up_t *pfsync_up_ptr; void pfsync_state_export(struct pfsync_state *, struct pf_state *); -/* pflow */ -typedef int export_pflow_t(struct pf_state *); - -extern export_pflow_t *export_pflow_ptr; - /* pflog */ struct pf_ruleset; struct pf_pdesc; typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, - struct pf_ruleset *, struct pf_pdesc *); - + struct pf_ruleset *, struct pf_pdesc *, int); extern pflog_packet_t *pflog_packet_ptr; -/* pf uid hack */ -VNET_DECLARE(int, debug_pfugidhack); -#define V_debug_pfugidhack VNET(debug_pfugidhack) - #define V_pf_end_threads VNET(pf_end_threads) -#endif - -/* Macros to set/clear/test flags. */ -#ifdef _KERNEL -#define SET(t, f) ((t) |= (f)) -#define CLR(t, f) ((t) &= ~(f)) -#define ISSET(t, f) ((t) & (f)) -#endif -#endif +#endif /* _KERNEL */ #define PFSYNC_FLAG_SRCNODE 0x04 #define PFSYNC_FLAG_NATSRCNODE 0x08 @@ -1085,9 +1004,20 @@ RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); #define PFR_TFLAG_REFDANCHOR 0x00000020 #define PFR_TFLAG_COUNTERS 0x00000040 /* Adjust masks below when adding flags. */ -#define PFR_TFLAG_USRMASK 0x00000043 -#define PFR_TFLAG_SETMASK 0x0000003C -#define PFR_TFLAG_ALLMASK 0x0000007F +#define PFR_TFLAG_USRMASK (PFR_TFLAG_PERSIST | \ + PFR_TFLAG_CONST | \ + PFR_TFLAG_COUNTERS) +#define PFR_TFLAG_SETMASK (PFR_TFLAG_ACTIVE | \ + PFR_TFLAG_INACTIVE | \ + PFR_TFLAG_REFERENCED | \ + PFR_TFLAG_REFDANCHOR) +#define PFR_TFLAG_ALLMASK (PFR_TFLAG_PERSIST | \ + PFR_TFLAG_CONST | \ + PFR_TFLAG_ACTIVE | \ + PFR_TFLAG_INACTIVE | \ + PFR_TFLAG_REFERENCED | \ + PFR_TFLAG_REFDANCHOR | \ + PFR_TFLAG_COUNTERS) struct pfr_table { char pfrt_anchor[MAXPATHLEN]; @@ -1158,22 +1088,13 @@ struct pfr_kentry { struct radix_node pfrke_node[2]; union sockaddr_union pfrke_sa; SLIST_ENTRY(pfr_kentry) pfrke_workq; - union { - - struct pfr_kcounters *pfrke_counters; -#if 0 - struct pfr_kroute *pfrke_route; -#endif - } u; + struct pfr_kcounters *pfrke_counters; long pfrke_tzero; u_int8_t pfrke_af; u_int8_t pfrke_net; u_int8_t pfrke_not; u_int8_t pfrke_mark; }; -#define pfrke_counters u.pfrke_counters -#define pfrke_route u.pfrke_route - SLIST_HEAD(pfr_ktableworkq, pfr_ktable); RB_HEAD(pfr_ktablehead, pfr_ktable); @@ -1202,25 +1123,6 @@ struct pfr_ktable { #define pfrkt_nomatch pfrkt_ts.pfrts_nomatch #define pfrkt_tzero pfrkt_ts.pfrts_tzero -RB_HEAD(pf_state_tree, pf_state_key); -RB_PROTOTYPE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); - -RB_HEAD(pf_state_tree_ext_gwy, pf_state_key); -RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state_key, - entry_ext_gwy, pf_state_compare_ext_gwy); - -RB_HEAD(pfi_ifhead, pfi_kif); - -/* state tables */ -#ifdef __FreeBSD__ -#ifdef _KERNEL -VNET_DECLARE(struct pf_state_tree, pf_statetbl); -#define V_pf_statetbl VNET(pf_statetbl) -#endif -#else -extern struct pf_state_tree pf_statetbl; -#endif - /* keep synced with pfi_kif, used in RB_FIND */ struct pfi_kif_cmp { char pfik_name[IFNAMSIZ]; @@ -1228,25 +1130,23 @@ struct pfi_kif_cmp { struct pfi_kif { char pfik_name[IFNAMSIZ]; - RB_ENTRY(pfi_kif) pfik_tree; + union { + RB_ENTRY(pfi_kif) _pfik_tree; + LIST_ENTRY(pfi_kif) _pfik_list; + } _pfik_glue; +#define pfik_tree _pfik_glue._pfik_tree +#define pfik_list _pfik_glue._pfik_list u_int64_t pfik_packets[2][2][2]; u_int64_t pfik_bytes[2][2][2]; u_int32_t pfik_tzero; - int pfik_flags; - void *pfik_ah_cookie; + u_int pfik_flags; struct ifnet *pfik_ifp; struct ifg_group *pfik_group; - int pfik_states; - int pfik_rules; + u_int pfik_rulerefs; TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; }; -enum pfi_kif_refs { - PFI_KIF_REF_NONE, - PFI_KIF_REF_STATE, - PFI_KIF_REF_RULE -}; - +#define PFI_IFLAG_REFS 0x0001 /* has state references */ #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ struct pf_pdesc { @@ -1254,7 +1154,6 @@ struct pf_pdesc { int done; uid_t uid; gid_t gid; - pid_t pid; } lookup; u_int64_t tot_len; /* Make Mickey money */ union { @@ -1268,15 +1167,11 @@ struct pf_pdesc { } hdr; struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ - struct ether_header - *eh; struct pf_addr *src; /* src address */ struct pf_addr *dst; /* dst address */ u_int16_t *sport; u_int16_t *dport; -#ifdef __FreeBSD__ struct pf_mtag *pf_mtag; -#endif u_int32_t p_len; /* total length of payload */ @@ -1400,7 +1295,6 @@ struct pf_pdesc { *(a) = (x); \ } while (0) -#ifdef __FreeBSD__ #define REASON_SET(a, x) \ do { \ if ((a) != NULL) \ @@ -1408,15 +1302,6 @@ struct pf_pdesc { if (x < PFRES_MAX) \ V_pf_status.counters[x]++; \ } while (0) -#else -#define REASON_SET(a, x) \ - do { \ - if ((a) != NULL) \ - *(a) = (x); \ - if (x < PFRES_MAX) \ - pf_status.counters[x]++; \ - } while (0) -#endif struct pf_status { u_int64_t counters[PFRES_MAX]; @@ -1425,7 +1310,6 @@ struct pf_status { u_int64_t scounters[SCNT_MAX]; u_int64_t pcounters[2][2][3]; u_int64_t bcounters[2][2]; - u_int64_t stateid; u_int32_t running; u_int32_t states; u_int32_t src_nodes; @@ -1485,10 +1369,9 @@ struct pf_altq { u_int32_t parent_qid; /* parent queue id */ u_int32_t bandwidth; /* queue bandwidth */ u_int8_t priority; /* priority */ -#ifdef __FreeBSD__ u_int8_t local_flags; /* dynamic interface */ #define PFALTQ_FLAG_IF_REMOVED 0x01 -#endif + u_int16_t qlimit; /* queue size limit */ u_int16_t flags; /* misc flags */ union { @@ -1500,13 +1383,6 @@ struct pf_altq { u_int32_t qid; /* return value */ }; -struct pf_tagname { - TAILQ_ENTRY(pf_tagname) entries; - char name[PF_TAG_NAME_SIZE]; - u_int16_t tag; - int ref; -}; - struct pf_divert { union { struct in_addr ipv4; @@ -1516,13 +1392,7 @@ struct pf_divert { }; #define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ -#define PFFRAG_FRAG_HIWAT 1000 /* Number of fragmented packets */ -#define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ -#define PFFRAG_FRCACHE_HIWAT 10000 /* Number of fragment descriptors */ - -#define PFR_KTABLE_HIWAT 1000 /* Number of tables */ #define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ -#define PFR_KENTRY_HIWAT_SMALL 100000 /* Number of table entries (tiny hosts) */ /* * ioctl parameter structures @@ -1653,7 +1523,7 @@ struct pfioc_trans { } *array; }; -#define PFR_FLAG_ATOMIC 0x00000001 +#define PFR_FLAG_ATOMIC 0x00000001 /* unused */ #define PFR_FLAG_DUMMY 0x00000002 #define PFR_FLAG_FEEDBACK 0x00000004 #define PFR_FLAG_CLSTATS 0x00000008 @@ -1765,56 +1635,54 @@ struct pfioc_iface { #define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) #define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) #define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) -#ifdef __FreeBSD__ struct pf_ifspeed { char ifname[IFNAMSIZ]; u_int32_t baudrate; }; #define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed) -#endif #ifdef _KERNEL -RB_HEAD(pf_src_tree, pf_src_node); -RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare); -#ifdef __FreeBSD__ -VNET_DECLARE(struct pf_src_tree, tree_src_tracking); -#define V_tree_src_tracking VNET(tree_src_tracking) -#else -extern struct pf_src_tree tree_src_tracking; -#endif +struct pf_srchash { + LIST_HEAD(, pf_src_node) nodes; + struct mtx lock; +}; -RB_HEAD(pf_state_tree_id, pf_state); -RB_PROTOTYPE(pf_state_tree_id, pf_state, - entry_id, pf_state_compare_id); -#ifdef __FreeBSD__ -VNET_DECLARE(struct pf_state_tree_id, tree_id); -#define V_tree_id VNET(tree_id) -VNET_DECLARE(struct pf_state_queue, state_list); -#define V_state_list VNET(state_list) -#else -extern struct pf_state_tree_id tree_id; -extern struct pf_state_queue state_list; -#endif +struct pf_keyhash { + LIST_HEAD(, pf_state_key) keys; + struct mtx lock; +}; + +struct pf_idhash { + LIST_HEAD(, pf_state) states; + struct mtx lock; +}; + +#define PF_HASHSIZ (32768) +VNET_DECLARE(struct pf_keyhash *, pf_keyhash); +VNET_DECLARE(struct pf_idhash *, pf_idhash); +VNET_DECLARE(u_long, pf_hashmask); +#define V_pf_keyhash VNET(pf_keyhash) +#define V_pf_idhash VNET(pf_idhash) +#define V_pf_hashmask VNET(pf_hashmask) +VNET_DECLARE(struct pf_srchash *, pf_srchash); +VNET_DECLARE(u_long, pf_srchashmask); +#define V_pf_srchash VNET(pf_srchash) +#define V_pf_srchashmask VNET(pf_srchashmask) + +#define PF_IDHASH(s) (be64toh((s)->id) % (V_pf_hashmask + 1)) + +VNET_DECLARE(void *, pf_swi_cookie); +#define V_pf_swi_cookie VNET(pf_swi_cookie) + +VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]); +#define V_pf_stateid VNET(pf_stateid) -TAILQ_HEAD(pf_poolqueue, pf_pool); -#ifdef __FreeBSD__ -VNET_DECLARE(struct pf_poolqueue, pf_pools[2]); -#define V_pf_pools VNET(pf_pools) -#else -extern struct pf_poolqueue pf_pools[2]; -#endif TAILQ_HEAD(pf_altqqueue, pf_altq); -#ifdef __FreeBSD__ VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]); #define V_pf_altqs VNET(pf_altqs) VNET_DECLARE(struct pf_palist, pf_pabuf); #define V_pf_pabuf VNET(pf_pabuf) -#else -extern struct pf_altqqueue pf_altqs[2]; -extern struct pf_palist pf_pabuf; -#endif -#ifdef __FreeBSD__ VNET_DECLARE(u_int32_t, ticket_altqs_active); #define V_ticket_altqs_active VNET(ticket_altqs_active) VNET_DECLARE(u_int32_t, ticket_altqs_inactive); @@ -1827,147 +1695,120 @@ VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active); #define V_pf_altqs_active VNET(pf_altqs_active) VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive); #define V_pf_altqs_inactive VNET(pf_altqs_inactive) -VNET_DECLARE(struct pf_poolqueue *, pf_pools_active); -#define V_pf_pools_active VNET(pf_pools_active) -VNET_DECLARE(struct pf_poolqueue *, pf_pools_inactive); -#define V_pf_pools_inactive VNET(pf_pools_inactive) -#else -extern u_int32_t ticket_altqs_active; -extern u_int32_t ticket_altqs_inactive; -extern int altqs_inactive_open; -extern u_int32_t ticket_pabuf; -extern struct pf_altqqueue *pf_altqs_active; -extern struct pf_altqqueue *pf_altqs_inactive; -extern struct pf_poolqueue *pf_pools_active; -extern struct pf_poolqueue *pf_pools_inactive; -#endif -extern int pf_tbladdr_setup(struct pf_ruleset *, - struct pf_addr_wrap *); -extern void pf_tbladdr_remove(struct pf_addr_wrap *); -extern void pf_tbladdr_copyout(struct pf_addr_wrap *); + +VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules); +#define V_pf_unlinked_rules VNET(pf_unlinked_rules) + +void pf_initialize(void); +void pf_cleanup(void); + +struct pf_mtag *pf_get_mtag(struct mbuf *); + extern void pf_calc_skip_steps(struct pf_rulequeue *); -#ifdef __FreeBSD__ #ifdef ALTQ extern void pf_altq_ifnet_event(struct ifnet *, int); #endif -VNET_DECLARE(uma_zone_t, pf_src_tree_pl); -#define V_pf_src_tree_pl VNET(pf_src_tree_pl) -VNET_DECLARE(uma_zone_t, pf_rule_pl); -#define V_pf_rule_pl VNET(pf_rule_pl) -VNET_DECLARE(uma_zone_t, pf_state_pl); -#define V_pf_state_pl VNET(pf_state_pl) -VNET_DECLARE(uma_zone_t, pf_state_key_pl); -#define V_pf_state_key_pl VNET(pf_state_key_pl) -VNET_DECLARE(uma_zone_t, pf_state_item_pl); -#define V_pf_state_item_pl VNET(pf_state_item_pl) -VNET_DECLARE(uma_zone_t, pf_altq_pl); -#define V_pf_altq_pl VNET(pf_altq_pl) -VNET_DECLARE(uma_zone_t, pf_pooladdr_pl); -#define V_pf_pooladdr_pl VNET(pf_pooladdr_pl) -VNET_DECLARE(uma_zone_t, pfr_ktable_pl); -#define V_pfr_ktable_pl VNET(pfr_ktable_pl) -VNET_DECLARE(uma_zone_t, pfr_kentry_pl); -#define V_pfr_kentry_pl VNET(pfr_kentry_pl) -VNET_DECLARE(uma_zone_t, pfr_kcounters_pl); -#define V_pfr_kcounters_pl VNET(pfr_kcounters_pl) -VNET_DECLARE(uma_zone_t, pf_cache_pl); -#define V_pf_cache_pl VNET(pf_cache_pl) -VNET_DECLARE(uma_zone_t, pf_cent_pl); -#define V_pf_cent_pl VNET(pf_cent_pl) -VNET_DECLARE(uma_zone_t, pf_state_scrub_pl); -#define V_pf_state_scrub_pl VNET(pf_state_scrub_pl) -VNET_DECLARE(uma_zone_t, pfi_addr_pl); -#define V_pfi_addr_pl VNET(pfi_addr_pl) -#else -extern struct pool pf_src_tree_pl, pf_rule_pl; -extern struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl, - pf_altq_pl, pf_pooladdr_pl; -extern struct pool pf_state_scrub_pl; -#endif +VNET_DECLARE(uma_zone_t, pf_state_z); +#define V_pf_state_z VNET(pf_state_z) +VNET_DECLARE(uma_zone_t, pf_state_key_z); +#define V_pf_state_key_z VNET(pf_state_key_z) +VNET_DECLARE(uma_zone_t, pf_state_scrub_z); +#define V_pf_state_scrub_z VNET(pf_state_scrub_z) + extern void pf_purge_thread(void *); -#ifdef __FreeBSD__ -extern int pf_purge_expired_src_nodes(int); -extern int pf_purge_expired_states(u_int32_t , int); -#else -extern void pf_purge_expired_src_nodes(int); -extern void pf_purge_expired_states(u_int32_t); -#endif -extern void pf_unlink_state(struct pf_state *); -extern void pf_free_state(struct pf_state *); +extern void pf_intr(void *); +extern void pf_purge_expired_src_nodes(void); + +extern int pf_unlink_state(struct pf_state *, u_int); +#define PF_ENTER_LOCKED 0x00000001 +#define PF_RETURN_LOCKED 0x00000002 extern int pf_state_insert(struct pfi_kif *, struct pf_state_key *, struct pf_state_key *, struct pf_state *); -extern int pf_insert_src_node(struct pf_src_node **, - struct pf_rule *, struct pf_addr *, - sa_family_t); -void pf_src_tree_remove_state(struct pf_state *); -extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *); +extern void pf_free_state(struct pf_state *); + +static __inline u_int +pf_hashsrc(struct pf_addr *addr, sa_family_t af) +{ + u_int h; + +#define ADDR_HASH(a) ((a) ^ ((a) >> 16)) + + switch (af) { + case AF_INET: + h = ADDR_HASH(addr->v4.s_addr); + break; + case AF_INET6: + h = ADDR_HASH(addr->v6.__u6_addr.__u6_addr32[3]); + default: + panic("%s: unknown address family %u", __func__, af); + } +#undef ADDR_HASH + + return (h & V_pf_srchashmask); +} + +static __inline void +pf_ref_state(struct pf_state *s) +{ + + refcount_acquire(&s->refs); +} + +static __inline int +pf_release_state(struct pf_state *s) +{ + + if (refcount_release(&s->refs)) { + pf_free_state(s); + return (1); + } else + return (0); +} + +extern struct pf_state *pf_find_state_byid(uint64_t, uint32_t); extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *, u_int, int *); +struct pf_src_node *pf_find_src_node(struct pf_addr *, struct pf_rule *, + sa_family_t, int); extern void pf_print_state(struct pf_state *); extern void pf_print_flags(u_int8_t); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); -#ifdef __FreeBSD__ VNET_DECLARE(struct ifnet *, sync_ifp); #define V_sync_ifp VNET(sync_ifp); VNET_DECLARE(struct pf_rule, pf_default_rule); #define V_pf_default_rule VNET(pf_default_rule) -#else -extern struct ifnet *sync_ifp; -extern struct pf_rule pf_default_rule; -#endif extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, u_int8_t); -void pf_rm_rule(struct pf_rulequeue *, - struct pf_rule *); -#ifndef __FreeBSD__ -struct pf_divert *pf_find_divert(struct mbuf *); -#endif +void pf_free_rule(struct pf_rule *); #ifdef INET -#ifdef __FreeBSD__ -int pf_test(int, struct ifnet *, struct mbuf **, struct ether_header *, - struct inpcb *); -#else -int pf_test(int, struct ifnet *, struct mbuf **, struct ether_header *); -#endif +int pf_test(int, struct ifnet *, struct mbuf **, struct inpcb *); #endif /* INET */ #ifdef INET6 -#ifdef __FreeBSD__ -int pf_test6(int, struct ifnet *, struct mbuf **, struct ether_header *, - struct inpcb *); -#else -int pf_test6(int, struct ifnet *, struct mbuf **, struct ether_header *); -#endif +int pf_test6(int, struct ifnet *, struct mbuf **, struct inpcb *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, u_int8_t); void pf_addr_inc(struct pf_addr *, sa_family_t); #endif /* INET6 */ -#ifdef __FreeBSD__ u_int32_t pf_new_isn(struct pf_state *); -#endif void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); -int pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, - u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, - struct pf_pdesc *); void pf_send_deferred_syn(struct pf_state *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match_addr_range(struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); -int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); -int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t); -int pf_match_gid(u_int8_t, gid_t, gid_t, gid_t); void pf_normalize_init(void); +void pf_normalize_cleanup(void); int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *, struct pf_pdesc *); int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *, @@ -1985,25 +1826,17 @@ u_int32_t void pf_purge_expired_fragments(void); int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *, int); -int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *, - int); -#ifdef __FreeBSD__ -int pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *); -#else -int pf_socket_lookup(int, struct pf_pdesc *); -#endif +int pf_socket_lookup(int, struct pf_pdesc *, struct mbuf *); struct pf_state_key *pf_alloc_state_key(int); -void pf_pkt_addr_changed(struct mbuf *); -int pf_state_key_attach(struct pf_state_key *, struct pf_state *, int); void pfr_initialize(void); +void pfr_cleanup(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, u_int64_t, int, int, int); -int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, - struct pf_addr **, struct pf_addr **, sa_family_t); +int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t); void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); struct pfr_ktable * - pfr_attach_table(struct pf_ruleset *, char *, int); + pfr_attach_table(struct pf_ruleset *, char *); void pfr_detach_table(struct pfr_ktable *); int pfr_clr_tables(struct pfr_table *, int *, int); int pfr_add_tables(struct pfr_table *, int, int *, int); @@ -2032,120 +1865,44 @@ int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); -#ifdef __FreeBSD__ +MALLOC_DECLARE(PFI_MTYPE); VNET_DECLARE(struct pfi_kif *, pfi_all); #define V_pfi_all VNET(pfi_all) -#else -extern struct pfi_kif *pfi_all; -#endif void pfi_initialize(void); -#ifdef __FreeBSD__ void pfi_cleanup(void); -#endif -struct pfi_kif *pfi_kif_get(const char *); -void pfi_kif_ref(struct pfi_kif *, enum pfi_kif_refs); -void pfi_kif_unref(struct pfi_kif *, enum pfi_kif_refs); +void pfi_kif_ref(struct pfi_kif *); +void pfi_kif_unref(struct pfi_kif *); +struct pfi_kif *pfi_kif_find(const char *); +struct pfi_kif *pfi_kif_attach(struct pfi_kif *, const char *); int pfi_kif_match(struct pfi_kif *, struct pfi_kif *); -void pfi_attach_ifnet(struct ifnet *); -void pfi_detach_ifnet(struct ifnet *); -void pfi_attach_ifgroup(struct ifg_group *); -void pfi_detach_ifgroup(struct ifg_group *); -void pfi_group_change(const char *); +void pfi_kif_purge(void); int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, sa_family_t); int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); -void pfi_dynaddr_remove(struct pf_addr_wrap *); +void pfi_dynaddr_remove(struct pfi_dynaddr *); void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_update_status(const char *, struct pf_status *); -int pfi_get_ifaces(const char *, struct pfi_kif *, int *); +void pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_set_flags(const char *, int); int pfi_clear_flags(const char *, int); -#ifdef __FreeBSD__ -int pf_match_tag(struct mbuf *, struct pf_rule *, int *, - struct pf_mtag *); -#else -int pf_match_tag(struct mbuf *, struct pf_rule *, int *); -#endif -u_int16_t pf_tagname2tag(char *); -void pf_tag2tagname(u_int16_t, char *); -void pf_tag_ref(u_int16_t); -void pf_tag_unref(u_int16_t); -#ifdef __FreeBSD__ -int pf_tag_packet(struct mbuf *, int, int, struct pf_mtag *); -#else -int pf_tag_packet(struct mbuf *, int, int); -#endif -u_int32_t pf_qname2qid(char *); +int pf_match_tag(struct mbuf *, struct pf_rule *, int *, int); +int pf_tag_packet(struct mbuf *, struct pf_pdesc *, int); void pf_qid2qname(u_int32_t, char *); -void pf_qid_unref(u_int32_t); -#ifdef __FreeBSD__ VNET_DECLARE(struct pf_status, pf_status); #define V_pf_status VNET(pf_status) -#else -extern struct pf_status pf_status; -#endif -#ifdef __FreeBSD__ -VNET_DECLARE(uma_zone_t, pf_frent_pl); -#define V_pf_frent_pl VNET(pf_frent_pl) -VNET_DECLARE(uma_zone_t, pf_frag_pl); -#define V_pf_frag_pl VNET(pf_frag_pl) -VNET_DECLARE(struct sx, pf_consistency_lock); -#define V_pf_consistency_lock VNET(pf_consistency_lock) -#else -extern struct pool pf_frent_pl, pf_frag_pl; -extern struct rwlock pf_consistency_lock; -#endif - -struct pf_pool_limit { - void *pp; - unsigned limit; -}; -#ifdef __FreeBSD__ -VNET_DECLARE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]); -#define V_pf_pool_limits VNET(pf_pool_limits) -#else -extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; -#endif - -#ifdef __FreeBSD__ -struct pf_frent { - LIST_ENTRY(pf_frent) fr_next; - struct ip *fr_ip; - struct mbuf *fr_m; -}; - -struct pf_frcache { - LIST_ENTRY(pf_frcache) fr_next; - uint16_t fr_off; - uint16_t fr_end; -}; - -struct pf_fragment { - RB_ENTRY(pf_fragment) fr_entry; - TAILQ_ENTRY(pf_fragment) frag_next; - struct in_addr fr_src; - struct in_addr fr_dst; - u_int8_t fr_p; /* protocol of this fragment */ - u_int8_t fr_flags; /* status flags */ - u_int16_t fr_id; /* fragment id for reassemble */ - u_int16_t fr_max; /* fragment data max */ - u_int32_t fr_timeout; -#define fr_queue fr_u.fru_queue -#define fr_cache fr_u.fru_cache - union { - LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ - LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ - } fr_u; +struct pf_limit { + uma_zone_t zone; + u_int limit; }; -#endif /* (__FreeBSD__) */ +VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); +#define V_pf_limits VNET(pf_limits) #endif /* _KERNEL */ -#ifdef __FreeBSD__ #ifdef _KERNEL VNET_DECLARE(struct pf_anchor_global, pf_anchors); #define V_pf_anchors VNET(pf_anchors) @@ -2153,11 +1910,6 @@ VNET_DECLARE(struct pf_anchor, pf_main_anchor); #define V_pf_main_anchor VNET(pf_main_anchor) #define pf_main_ruleset V_pf_main_anchor.ruleset #endif -#else -extern struct pf_anchor_global pf_anchors; -extern struct pf_anchor pf_main_anchor; -#define pf_main_ruleset pf_main_anchor.ruleset -#endif /* these ruleset functions can be linked into userland programs (pfctl) */ int pf_get_ruleset_number(u_int8_t); @@ -2168,20 +1920,10 @@ int pf_anchor_copyout(const struct pf_ruleset *, const struct pf_rule *, struct pfioc_rule *); void pf_anchor_remove(struct pf_rule *); void pf_remove_if_empty_ruleset(struct pf_ruleset *); -struct pf_anchor *pf_find_anchor(const char *); struct pf_ruleset *pf_find_ruleset(const char *); struct pf_ruleset *pf_find_or_create_ruleset(const char *); void pf_rs_initialize(void); -#ifndef __FreeBSD__ -#ifdef _KERNEL -int pf_anchor_copyout(const struct pf_ruleset *, - const struct pf_rule *, struct pfioc_rule *); -void pf_anchor_remove(struct pf_rule *); - -#endif /* _KERNEL */ -#endif - /* The fingerprint functions can be linked into userland programs (tcpdump) */ int pf_osfp_add(struct pf_osfp_ioctl *); #ifdef _KERNEL @@ -2189,20 +1931,9 @@ struct pf_osfp_enlist * pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int, const struct tcphdr *); #endif /* _KERNEL */ -struct pf_osfp_enlist * - pf_osfp_fingerprint_hdr(const struct ip *, const struct ip6_hdr *, - const struct tcphdr *); void pf_osfp_flush(void); int pf_osfp_get(struct pf_osfp_ioctl *); -#ifdef __FreeBSD__ -int pf_osfp_initialize(void); -void pf_osfp_cleanup(void); -#else -void pf_osfp_initialize(void); -#endif int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); -struct pf_os_fingerprint * - pf_osfp_validate(void); #ifdef _KERNEL void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); @@ -2219,16 +1950,12 @@ int pf_map_addr(u_int8_t, struct pf_rule *, struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, int, int, struct pfi_kif *, struct pf_src_node **, struct pf_state_key **, struct pf_state_key **, - struct pf_state_key **, struct pf_state_key **, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t); -int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *, - struct pf_state_key **, struct pf_state_key **, - struct pf_state_key **, struct pf_state_key **, - struct pf_addr *, struct pf_addr *, - u_int16_t, u_int16_t); +struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *, + struct pf_addr *, u_int16_t, u_int16_t); +struct pf_state_key *pf_state_key_clone(struct pf_state_key *); #endif /* _KERNEL */ - #endif /* _NET_PFVAR_H_ */ diff --git a/sys/modules/pf/Makefile b/sys/modules/pf/Makefile index d4b01da..2f941e1 100644 --- a/sys/modules/pf/Makefile +++ b/sys/modules/pf/Makefile @@ -7,8 +7,8 @@ KMOD= pf SRCS= pf.c pf_if.c pf_lb.c pf_osfp.c pf_ioctl.c pf_norm.c pf_table.c \ - pf_ruleset.c \ - in4_cksum.c \ + pf_ruleset.c in4_cksum.c \ + bus_if.h device_if.h \ opt_pf.h opt_inet.h opt_inet6.h opt_bpf.h opt_global.h CFLAGS+= -I${.CURDIR}/../../contrib/pf @@ -33,7 +33,6 @@ opt_pf.h: echo "#define DEV_PF 1" > ${.TARGET} echo "#define DEV_PFLOG 1" >> ${.TARGET} echo "#define DEV_PFSYNC 1" >> ${.TARGET} - echo "#define DEV_PFLOW 1" >> ${.TARGET} .if defined(VIMAGE) opt_global.h: diff --git a/sys/net/if.c b/sys/net/if.c index bf332db..6a2812e 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1084,6 +1084,7 @@ if_addgroup(struct ifnet *ifp, const char *groupname) struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; + int new = 0; if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && groupname[strlen(groupname) - 1] <= '9') @@ -1124,8 +1125,8 @@ if_addgroup(struct ifnet *ifp, const char *groupname) strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 0; TAILQ_INIT(&ifg->ifg_members); - EVENTHANDLER_INVOKE(group_attach_event, ifg); TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); + new = 1; } ifg->ifg_refcnt++; @@ -1139,6 +1140,8 @@ if_addgroup(struct ifnet *ifp, const char *groupname) IFNET_WUNLOCK(); + if (new) + EVENTHANDLER_INVOKE(group_attach_event, ifg); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); @@ -1177,10 +1180,11 @@ if_delgroup(struct ifnet *ifp, const char *groupname) if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); + IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); - } - IFNET_WUNLOCK(); + } else + IFNET_WUNLOCK(); free(ifgl, M_TEMP); @@ -1221,11 +1225,12 @@ if_delgroups(struct ifnet *ifp) if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); + IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); - } - IFNET_WUNLOCK(); + } else + IFNET_WUNLOCK(); free(ifgl, M_TEMP); diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c index 22f35df..6c60390 100644 --- a/sys/netinet/in_gif.c +++ b/sys/netinet/in_gif.c @@ -256,8 +256,6 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m) #endif } - m_addr_changed(m); - error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL); if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) && diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index 3260197..6451cb6 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -675,8 +675,6 @@ icmp_reflect(struct mbuf *m) goto done; /* Ip_output() will check for broadcast */ } - m_addr_changed(m); - t = ip->ip_dst; ip->ip_dst = ip->ip_src; diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c index 18a9c5a..0dfab1f 100644 --- a/sys/netinet/ipfw/ip_fw2.c +++ b/sys/netinet/ipfw/ip_fw2.c @@ -1698,20 +1698,30 @@ do { \ case O_ALTQ: { struct pf_mtag *at; + struct m_tag *mtag; ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; + /* + * ALTQ uses mbuf tags from another + * packet filtering system - pf(4). + * We allocate a tag in its format + * and fill it in, pretending to be pf(4). + */ match = 1; at = pf_find_mtag(m); if (at != NULL && at->qid != 0) break; - at = pf_get_mtag(m); - if (at == NULL) { + mtag = m_tag_get(PACKET_TAG_PF, + sizeof(struct pf_mtag), M_NOWAIT | M_ZERO); + if (mtag == NULL) { /* * Let the packet fall back to the * default ALTQ. */ break; } + m_tag_prepend(m, mtag); + at = (struct pf_mtag *)(mtag + 1); at->qid = altq->qid; at->hdr = ip; break; diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index d2b841a..e48b666 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -100,9 +100,6 @@ void (*ip_divert_ptr)(struct mbuf *, int); int (*ng_ipfw_input_p)(struct mbuf **, int, struct ip_fw_args *, int); -/* Hook for telling pf that the destination address changed */ -void (*m_addr_chg_pf_p)(struct mbuf *m); - #ifdef INET /* * Hooks for multicast routing. They all default to NULL, so leave them not diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 9d35e0a..4cc2e69 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -544,7 +544,6 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, m_freem(m->m_next); m->m_next = NULL; m->m_data = (caddr_t)ipgen; - m_addr_changed(m); /* m_len is set later */ tlen = 0; #define xchg(a,b,type) { type t; t=a; a=b; b=t; } diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index a1e19d9..202dc05 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -1177,8 +1177,6 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code) ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; - m_addr_changed(m); - if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; @@ -2298,8 +2296,6 @@ icmp6_reflect(struct mbuf *m, size_t off) m->m_flags &= ~(M_BCAST|M_MCAST); - m_addr_changed(m); - ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) icmp6_ifoutstat_inc(outif, type, code); diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c index c329e11..573287c 100644 --- a/sys/netinet6/in6_gif.c +++ b/sys/netinet6/in6_gif.c @@ -264,8 +264,6 @@ in6_gif_output(struct ifnet *ifp, #endif } - m_addr_changed(m); - #ifdef IPV6_MINMTU /* * force fragmentation to minimum MTU, to avoid path MTU discovery. diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index 8b53bf4..a004aef 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -473,8 +473,6 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, key_sa_recordxfer(sav, m); /* record data transfer */ - m_addr_changed(m); - #ifdef DEV_ENC encif->if_ipackets++; encif->if_ibytes += m->m_pkthdr.len; diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 1a1b646..b03e4b6 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -190,8 +190,6 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) } key_sa_recordxfer(sav, m); /* record data transfer */ - m_addr_changed(m); - /* * We're done with IPsec processing, transmit the packet using the * appropriate network protocol (IP or IPv6). SPD lookup will be diff --git a/sys/netipsec/xform_ipip.c b/sys/netipsec/xform_ipip.c index 0d5fdb4..c6f6379 100644 --- a/sys/netipsec/xform_ipip.c +++ b/sys/netipsec/xform_ipip.c @@ -392,8 +392,6 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp) panic("%s: bogus ip version %u", __func__, v>>4); } - m_addr_changed(m); - if (netisr_queue(isr, m)) { /* (0) on success. */ V_ipipstat.ipips_qfull++; DPRINTF(("%s: packet dropped because of full queue\n", diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 38adfaf..a26d5ca 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -749,16 +749,6 @@ m_last(struct mbuf *m) return (m); } -extern void (*m_addr_chg_pf_p)(struct mbuf *m); - -static __inline void -m_addr_changed(struct mbuf *m) -{ - - if (m_addr_chg_pf_p) - m_addr_chg_pf_p(m); -} - /* * mbuf, cluster, and external object allocation macros (for compatibility * purposes). @@ -998,7 +988,7 @@ struct mbuf *m_unshare(struct mbuf *, int how); #define PACKET_TAG_DIVERT 17 /* divert info */ #define PACKET_TAG_IPFORWARD 18 /* ipforward info */ #define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ -#define PACKET_TAG_PF 21 /* PF + ALTQ information */ +#define PACKET_TAG_PF (21 | MTAG_PERSISTENT) /* PF/ALTQ information */ #define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ #define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ #define PACKET_TAG_CARP 28 /* CARP info */ diff --git a/sys/sys/param.h b/sys/sys/param.h index a3bf897..a079980 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1000017 /* Master, propagated to newvers */ +#define __FreeBSD_version 1000018 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/usr.bin/netstat/if.c b/usr.bin/netstat/if.c index c08fbf0..854844f 100644 --- a/usr.bin/netstat/if.c +++ b/usr.bin/netstat/if.c @@ -81,6 +81,32 @@ static void catchalarm(int); static char addr_buf[NI_MAXHOST]; /* for getnameinfo() */ #endif +static const char* pfsyncacts[] = { + /* PFSYNC_ACT_CLR */ "clear all request", + /* PFSYNC_ACT_INS */ "state insert", + /* PFSYNC_ACT_INS_ACK */ "state inserted ack", + /* PFSYNC_ACT_UPD */ "state update", + /* PFSYNC_ACT_UPD_C */ "compressed state update", + /* PFSYNC_ACT_UPD_REQ */ "uncompressed state request", + /* PFSYNC_ACT_DEL */ "state delete", + /* PFSYNC_ACT_DEL_C */ "compressed state delete", + /* PFSYNC_ACT_INS_F */ "fragment insert", + /* PFSYNC_ACT_DEL_F */ "fragment delete", + /* PFSYNC_ACT_BUS */ "bulk update mark", + /* PFSYNC_ACT_TDB */ "TDB replay counter update", + /* PFSYNC_ACT_EOF */ "end of frame mark", +}; + +static void +pfsync_acts_stats(const char *fmt, uint64_t *a) +{ + int i; + + for (i = 0; i < PFSYNC_ACT_MAX; i++, a++) + if (*a || sflag <= 1) + printf(fmt, *a, pfsyncacts[i], plural(*a)); +} + /* * Dump pfsync statistics structure. */ @@ -106,11 +132,11 @@ pfsync_stats(u_long off, const char *name, int af1 __unused, int proto __unused) #define p(f, m) if (pfsyncstat.f || sflag <= 1) \ printf(m, (uintmax_t)pfsyncstat.f, plural(pfsyncstat.f)) -#define p2(f, m) if (pfsyncstat.f || sflag <= 1) \ - printf(m, (uintmax_t)pfsyncstat.f) p(pfsyncs_ipackets, "\t%ju packet%s received (IPv4)\n"); p(pfsyncs_ipackets6, "\t%ju packet%s received (IPv6)\n"); + pfsync_acts_stats("\t %ju %s%s received\n", + &pfsyncstat.pfsyncs_iacts[0]); p(pfsyncs_badif, "\t\t%ju packet%s discarded for bad interface\n"); p(pfsyncs_badttl, "\t\t%ju packet%s discarded for bad ttl\n"); p(pfsyncs_hdrops, "\t\t%ju packet%s shorter than header\n"); @@ -123,10 +149,11 @@ pfsync_stats(u_long off, const char *name, int af1 __unused, int proto __unused) p(pfsyncs_badstate, "\t\t%ju failed state lookup/insert%s\n"); p(pfsyncs_opackets, "\t%ju packet%s sent (IPv4)\n"); p(pfsyncs_opackets6, "\t%ju packet%s sent (IPv6)\n"); - p2(pfsyncs_onomem, "\t\t%ju send failed due to mbuf memory error\n"); - p2(pfsyncs_oerrors, "\t\t%ju send error\n"); + pfsync_acts_stats("\t %ju %s%s sent\n", + &pfsyncstat.pfsyncs_oacts[0]); + p(pfsyncs_onomem, "\t\t%ju failure%s due to mbuf memory error\n"); + p(pfsyncs_oerrors, "\t\t%ju send error%s\n"); #undef p -#undef p2 } /* diff --git a/usr.sbin/bsnmpd/modules/snmp_pf/BEGEMOT-PF-MIB.txt b/usr.sbin/bsnmpd/modules/snmp_pf/BEGEMOT-PF-MIB.txt index 0b90bb2..d2b247f 100644 --- a/usr.sbin/bsnmpd/modules/snmp_pf/BEGEMOT-PF-MIB.txt +++ b/usr.sbin/bsnmpd/modules/snmp_pf/BEGEMOT-PF-MIB.txt @@ -585,7 +585,7 @@ PfInterfacesIfEntry ::= SEQUENCE { pfInterfacesIfDescr OCTET STRING, pfInterfacesIfType INTEGER, pfInterfacesIfTZero TimeTicks, - pfInterfacesIfRefsState Unsigned32, + pfInterfacesIfRefsState Null, pfInterfacesIfRefsRule Unsigned32, pfInterfacesIf4BytesInPass Counter64, pfInterfacesIf4BytesInBlock Counter64, diff --git a/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c b/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c index bc4bc35..1048ffe 100644 --- a/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c +++ b/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c @@ -586,11 +586,8 @@ pf_iftable(struct snmp_context __unused *ctx, struct snmp_value *val, val->v.uint32 = (time(NULL) - e->pfi.pfik_tzero) * 100; break; - case LEAF_pfInterfacesIfRefsState: - val->v.uint32 = e->pfi.pfik_states; - break; case LEAF_pfInterfacesIfRefsRule: - val->v.uint32 = e->pfi.pfik_rules; + val->v.uint32 = e->pfi.pfik_rulerefs; break; case LEAF_pfInterfacesIf4BytesInPass: val->v.counter64 = diff --git a/usr.sbin/bsnmpd/modules/snmp_pf/pf_tree.def b/usr.sbin/bsnmpd/modules/snmp_pf/pf_tree.def index 7b791b3..1dfa14c 100644 --- a/usr.sbin/bsnmpd/modules/snmp_pf/pf_tree.def +++ b/usr.sbin/bsnmpd/modules/snmp_pf/pf_tree.def @@ -108,7 +108,7 @@ (2 pfInterfacesIfDescr OCTETSTRING GET) (3 pfInterfacesIfType ENUM ( 0 group 1 instance 2 detached ) GET) (4 pfInterfacesIfTZero TIMETICKS GET) - (5 pfInterfacesIfRefsState UNSIGNED32 GET) + (5 pfInterfacesIfRefsState NULL GET) (6 pfInterfacesIfRefsRule UNSIGNED32 GET) (7 pfInterfacesIf4BytesInPass COUNTER64 GET) (8 pfInterfacesIf4BytesInBlock COUNTER64 GET) |