From dc6ae5fc92df6a060834e8dee2fee112a4586afc Mon Sep 17 00:00:00 2001 From: mlaier Date: Tue, 3 Jul 2007 12:06:01 +0000 Subject: Import pf from OpenBSD 4.1 --- sys/contrib/pf/net/if_pflog.c | 136 +++-- sys/contrib/pf/net/if_pflog.h | 16 +- sys/contrib/pf/net/if_pfsync.c | 520 +++++++++++++---- sys/contrib/pf/net/if_pfsync.h | 69 ++- sys/contrib/pf/net/pf.c | 1204 ++++++++++++++++++++++++--------------- sys/contrib/pf/net/pf_if.c | 695 ++++++++++------------ sys/contrib/pf/net/pf_ioctl.c | 1002 +++++++++++++++++--------------- sys/contrib/pf/net/pf_norm.c | 143 +++-- sys/contrib/pf/net/pf_osfp.c | 94 ++- sys/contrib/pf/net/pf_ruleset.c | 415 ++++++++++++++ sys/contrib/pf/net/pf_table.c | 10 +- sys/contrib/pf/net/pfvar.h | 359 +++++++----- 12 files changed, 2973 insertions(+), 1690 deletions(-) create mode 100644 sys/contrib/pf/net/pf_ruleset.c (limited to 'sys/contrib') diff --git a/sys/contrib/pf/net/if_pflog.c b/sys/contrib/pf/net/if_pflog.c index 41e1e65..561a2f6 100644 --- a/sys/contrib/pf/net/if_pflog.c +++ b/sys/contrib/pf/net/if_pflog.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pflog.c,v 1.12 2004/05/19 17:50:51 dhartmei Exp $ */ +/* $OpenBSD: if_pflog.c,v 1.22 2006/12/15 09:31:20 otto Exp $ */ /* * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -72,44 +73,90 @@ #define DPRINTF(x) #endif -struct pflog_softc pflogif[NPFLOG]; - void pflogattach(int); int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); int pflogioctl(struct ifnet *, u_long, caddr_t); -void pflogrtrequest(int, struct rtentry *, struct sockaddr *); void pflogstart(struct ifnet *); +int pflog_clone_create(struct if_clone *, int); +int pflog_clone_destroy(struct ifnet *); + +LIST_HEAD(, pflog_softc) pflogif_list; +struct if_clone pflog_cloner = + IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy); + +struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */ extern int ifqmaxlen; void pflogattach(int npflog) { + int i; + LIST_INIT(&pflogif_list); + for (i = 0; i < PFLOGIFS_MAX; i++) + pflogifs[i] = NULL; + (void) pflog_clone_create(&pflog_cloner, 0); + if_clone_attach(&pflog_cloner); +} + +int +pflog_clone_create(struct if_clone *ifc, int unit) +{ struct ifnet *ifp; - int i; - - bzero(pflogif, sizeof(pflogif)); - - for (i = 0; i < NPFLOG; i++) { - ifp = &pflogif[i].sc_if; - snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", i); - ifp->if_softc = &pflogif[i]; - ifp->if_mtu = PFLOGMTU; - ifp->if_ioctl = pflogioctl; - ifp->if_output = pflogoutput; - ifp->if_start = pflogstart; - ifp->if_type = IFT_PFLOG; - ifp->if_snd.ifq_maxlen = ifqmaxlen; - ifp->if_hdrlen = PFLOG_HDRLEN; - if_attach(ifp); - if_alloc_sadl(ifp); + struct pflog_softc *pflogif; + int s; + + if (unit >= PFLOGIFS_MAX) + return (EINVAL); + + if ((pflogif = malloc(sizeof(*pflogif), M_DEVBUF, M_NOWAIT)) == NULL) + return (ENOMEM); + bzero(pflogif, sizeof(*pflogif)); + + pflogif->sc_unit = unit; + ifp = &pflogif->sc_if; + snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", unit); + ifp->if_softc = pflogif; + ifp->if_mtu = PFLOGMTU; + ifp->if_ioctl = pflogioctl; + ifp->if_output = pflogoutput; + ifp->if_start = pflogstart; + ifp->if_type = IFT_PFLOG; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = PFLOG_HDRLEN; + if_attach(ifp); + if_alloc_sadl(ifp); #if NBPFILTER > 0 - bpfattach(&pflogif[i].sc_if.if_bpf, ifp, DLT_PFLOG, - PFLOG_HDRLEN); + bpfattach(&pflogif->sc_if.if_bpf, ifp, DLT_PFLOG, PFLOG_HDRLEN); #endif - } + + s = splnet(); + LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list); + pflogifs[unit] = ifp; + splx(s); + + return (0); +} + +int +pflog_clone_destroy(struct ifnet *ifp) +{ + struct pflog_softc *pflogif = ifp->if_softc; + int s; + + s = splnet(); + pflogifs[pflogif->sc_unit] = NULL; + LIST_REMOVE(pflogif, sc_list); + splx(s); + +#if NBPFILTER > 0 + bpfdetach(ifp); +#endif + if_detach(ifp); + free(pflogif, M_DEVBUF); + return (0); } /* @@ -122,7 +169,7 @@ pflogstart(struct ifnet *ifp) int s; for (;;) { - s = splimp(); + s = splnet(); IF_DROP(&ifp->if_snd); IF_DEQUEUE(&ifp->if_snd, m); splx(s); @@ -143,14 +190,6 @@ pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, } /* ARGSUSED */ -void -pflogrtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa) -{ - if (rt) - rt->rt_rmx.rmx_mtu = PFLOGMTU; -} - -/* ARGSUSED */ int pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { @@ -174,16 +213,18 @@ pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) int pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, u_int8_t reason, struct pf_rule *rm, struct pf_rule *am, - struct pf_ruleset *ruleset) + struct pf_ruleset *ruleset, struct pf_pdesc *pd) { #if NBPFILTER > 0 struct ifnet *ifn; struct pfloghdr hdr; - struct mbuf m1; - if (kif == NULL || m == NULL || rm == NULL) + if (kif == NULL || m == NULL || rm == NULL || pd == NULL) return (-1); + if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf) + return (0); + bzero(&hdr, sizeof(hdr)); hdr.length = PFLOG_REAL_HDRLEN; hdr.af = af; @@ -201,6 +242,17 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, strlcpy(hdr.ruleset, ruleset->anchor->name, sizeof(hdr.ruleset)); } + if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done) + pd->lookup.done = pf_socket_lookup(dir, pd); + if (pd->lookup.done > 0) { + hdr.uid = pd->lookup.uid; + hdr.pid = pd->lookup.pid; + } else { + hdr.uid = UID_MAX; + hdr.pid = NO_PID; + } + hdr.rule_uid = rm->cuid; + hdr.rule_pid = rm->cpid; hdr.dir = dir; #ifdef INET @@ -213,14 +265,10 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, } #endif /* INET */ - m1.m_next = m; - m1.m_len = PFLOG_HDRLEN; - m1.m_data = (char *) &hdr; - - ifn = &(pflogif[0].sc_if); - - if (ifn->if_bpf) - bpf_mtap(ifn->if_bpf, &m1); + ifn->if_opackets++; + ifn->if_obytes += m->m_pkthdr.len; + bpf_mtap_hdr(ifn->if_bpf, (char *)&hdr, PFLOG_HDRLEN, m, + BPF_DIRECTION_OUT); #endif return (0); diff --git a/sys/contrib/pf/net/if_pflog.h b/sys/contrib/pf/net/if_pflog.h index 7a43b10..e9e0b01 100644 --- a/sys/contrib/pf/net/if_pflog.h +++ b/sys/contrib/pf/net/if_pflog.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pflog.h,v 1.11 2004/05/19 17:50:51 dhartmei Exp $ */ +/* $OpenBSD: if_pflog.h,v 1.14 2006/10/25 11:27:01 henning Exp $ */ /* * Copyright 2001 Niels Provos * All rights reserved. @@ -27,8 +27,12 @@ #ifndef _NET_IF_PFLOG_H_ #define _NET_IF_PFLOG_H_ +#define PFLOGIFS_MAX 16 + struct pflog_softc { - struct ifnet sc_if; /* the interface */ + struct ifnet sc_if; /* the interface */ + int sc_unit; + LIST_ENTRY(pflog_softc) sc_list; }; #define PFLOG_RULESET_NAME_SIZE 16 @@ -42,6 +46,10 @@ struct pfloghdr { char ruleset[PFLOG_RULESET_NAME_SIZE]; u_int32_t rulenr; u_int32_t subrulenr; + uid_t uid; + pid_t pid; + uid_t rule_uid; + pid_t rule_pid; u_int8_t dir; u_int8_t pad[3]; }; @@ -64,9 +72,9 @@ struct old_pfloghdr { #ifdef _KERNEL #if NPFLOG > 0 -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) pflog_packet(i,a,b,c,d,e,f,g) +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) pflog_packet(i,a,b,c,d,e,f,g,h) #else -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) ((void)0) +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0) #endif /* NPFLOG > 0 */ #endif /* _KERNEL */ #endif /* _NET_IF_PFLOG_H_ */ diff --git a/sys/contrib/pf/net/if_pfsync.c b/sys/contrib/pf/net/if_pfsync.c index d0f56d0..1106339 100644 --- a/sys/contrib/pf/net/if_pfsync.c +++ b/sys/contrib/pf/net/if_pfsync.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.c,v 1.46 2005/02/20 15:58:38 mcbride Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff @@ -26,8 +26,6 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -#include "bpfilter.h" -#include "pfsync.h" #include #include @@ -43,11 +41,12 @@ #include #include #include +#include +#include #include #include #ifdef INET -#include #include #include #include @@ -55,20 +54,20 @@ #endif #ifdef INET6 -#ifndef INET -#include -#endif #include #endif /* INET6 */ #include "carp.h" #if NCARP > 0 -extern int carp_suppress_preempt; +#include #endif #include #include +#include "bpfilter.h" +#include "pfsync.h" + #define PFSYNC_MINMTU \ (sizeof(struct pfsync_header) + sizeof(struct pf_state)) @@ -79,12 +78,17 @@ int pfsyncdebug; #define DPRINTF(x) #endif -struct pfsync_softc pfsyncif; -struct pfsyncstats pfsyncstats; +struct pfsync_softc *pfsyncif = NULL; +struct pfsyncstats pfsyncstats; void pfsyncattach(int); +int pfsync_clone_create(struct if_clone *, int); +int pfsync_clone_destroy(struct ifnet *); void pfsync_setmtu(struct pfsync_softc *, int); -int pfsync_insert_net_state(struct pfsync_state *); +int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, + struct pf_state_peer *); +int pfsync_insert_net_state(struct pfsync_state *, u_int8_t); +void pfsync_update_net_tdb(struct pfsync_tdb *); int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); int pfsyncioctl(struct ifnet *, u_long, caddr_t); @@ -93,7 +97,10 @@ void pfsyncstart(struct ifnet *); struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); int pfsync_sendout(struct pfsync_softc *); +int pfsync_tdb_sendout(struct pfsync_softc *); +int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); void pfsync_timeout(void *); +void pfsync_tdb_timeout(void *); void pfsync_send_bus(struct pfsync_softc *, u_int8_t); void pfsync_bulk_update(void *); void pfsync_bulkfail(void *); @@ -101,41 +108,77 @@ void pfsync_bulkfail(void *); int pfsync_sync_ok; extern int ifqmaxlen; +struct if_clone pfsync_cloner = + IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); + void pfsyncattach(int npfsync) { + if_clone_attach(&pfsync_cloner); +} +int +pfsync_clone_create(struct if_clone *ifc, int unit) +{ struct ifnet *ifp; + if (unit != 0) + return (EINVAL); + pfsync_sync_ok = 1; - bzero(&pfsyncif, sizeof(pfsyncif)); - pfsyncif.sc_mbuf = NULL; - pfsyncif.sc_mbuf_net = NULL; - pfsyncif.sc_statep.s = NULL; - pfsyncif.sc_statep_net.s = NULL; - pfsyncif.sc_maxupdates = 128; - pfsyncif.sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; - pfsyncif.sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; - pfsyncif.sc_ureq_received = 0; - pfsyncif.sc_ureq_sent = 0; - ifp = &pfsyncif.sc_if; - strlcpy(ifp->if_xname, "pfsync0", sizeof ifp->if_xname); - ifp->if_softc = &pfsyncif; + if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT)) == NULL) + return (ENOMEM); + bzero(pfsyncif, sizeof(*pfsyncif)); + pfsyncif->sc_mbuf = NULL; + pfsyncif->sc_mbuf_net = NULL; + pfsyncif->sc_mbuf_tdb = NULL; + pfsyncif->sc_statep.s = NULL; + pfsyncif->sc_statep_net.s = NULL; + pfsyncif->sc_statep_tdb.t = NULL; + pfsyncif->sc_maxupdates = 128; + pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; + pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; + pfsyncif->sc_ureq_received = 0; + pfsyncif->sc_ureq_sent = 0; + pfsyncif->sc_bulk_send_next = NULL; + pfsyncif->sc_bulk_terminator = NULL; + ifp = &pfsyncif->sc_if; + snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); + ifp->if_softc = pfsyncif; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_start = pfsyncstart; ifp->if_type = IFT_PFSYNC; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = PFSYNC_HDRLEN; - pfsync_setmtu(&pfsyncif, MCLBYTES); - timeout_set(&pfsyncif.sc_tmo, pfsync_timeout, &pfsyncif); - timeout_set(&pfsyncif.sc_bulk_tmo, pfsync_bulk_update, &pfsyncif); - timeout_set(&pfsyncif.sc_bulkfail_tmo, pfsync_bulkfail, &pfsyncif); + pfsync_setmtu(pfsyncif, ETHERMTU); + timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif); + timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif); + timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif); + timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif); if_attach(ifp); if_alloc_sadl(ifp); +#if NCARP > 0 + if_addgroup(ifp, "carp"); +#endif + +#if NBPFILTER > 0 + bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); +#endif + + return (0); +} + +int +pfsync_clone_destroy(struct ifnet *ifp) +{ #if NBPFILTER > 0 - bpfattach(&pfsyncif.sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); + bpfdetach(ifp); #endif + if_detach(ifp); + free(pfsyncif, M_DEVBUF); + pfsyncif = NULL; + return (0); } /* @@ -148,7 +191,7 @@ pfsyncstart(struct ifnet *ifp) int s; for (;;) { - s = splimp(); + s = splnet(); IF_DROP(&ifp->if_snd); IF_DEQUEUE(&ifp->if_snd, m); splx(s); @@ -161,7 +204,21 @@ pfsyncstart(struct ifnet *ifp) } int -pfsync_insert_net_state(struct pfsync_state *sp) +pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, + struct pf_state_peer *d) +{ + if (s->scrub.scrub_flag && d->scrub == NULL) { + d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); + if (d->scrub == NULL) + return (ENOMEM); + bzero(d->scrub, sizeof(*d->scrub)); + } + + return (0); +} + +int +pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) { struct pf_state *st = NULL; struct pf_rule *r = NULL; @@ -173,7 +230,7 @@ pfsync_insert_net_state(struct pfsync_state *sp) return (EINVAL); } - kif = pfi_lookup_create(sp->ifname); + kif = pfi_kif_get(sp->ifname); if (kif == NULL) { if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync_insert_net_state: " @@ -183,19 +240,33 @@ pfsync_insert_net_state(struct pfsync_state *sp) } /* - * Just use the default rule until we have infrastructure to find the - * best matching rule. + * If the ruleset checksums match, it's safe to associate the state + * with the rule of that number. */ - r = &pf_default_rule; + if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag) + r = pf_main_ruleset.rules[ + PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; + else + r = &pf_default_rule; if (!r->max_states || r->states < r->max_states) st = pool_get(&pf_state_pl, PR_NOWAIT); if (st == NULL) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); return (ENOMEM); } bzero(st, sizeof(*st)); + /* allocate memory for scrub info */ + if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || + pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) { + pfi_kif_unref(kif, PFI_KIF_REF_NONE); + if (st->src.scrub) + pool_put(&pf_state_scrub_pl, st->src.scrub); + pool_put(&pf_state_pl, st); + return (ENOMEM); + } + st->rule.ptr = r; /* XXX get pointers to nat_rule and anchor */ @@ -225,11 +296,14 @@ pfsync_insert_net_state(struct pfsync_state *sp) st->creatorid = sp->creatorid; st->sync_flags = PFSTATE_FROMSYNC; - if (pf_insert_state(kif, st)) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ r->states--; + if (st->dst.scrub) + pool_put(&pf_state_scrub_pl, st->dst.scrub); + if (st->src.scrub) + pool_put(&pf_state_scrub_pl, st->src.scrub); pool_put(&pf_state_pl, st); return (EINVAL); } @@ -242,22 +316,25 @@ pfsync_input(struct mbuf *m, ...) { struct ip *ip = mtod(m, struct ip *); struct pfsync_header *ph; - struct pfsync_softc *sc = &pfsyncif; - struct pf_state *st, key; + struct pfsync_softc *sc = pfsyncif; + struct pf_state *st; + struct pf_state_cmp key; struct pfsync_state *sp; struct pfsync_state_upd *up; struct pfsync_state_del *dp; struct pfsync_state_clr *cp; struct pfsync_state_upd_req *rup; struct pfsync_state_bus *bus; + struct pfsync_tdb *pt; struct in_addr src; struct mbuf *mp; int iplen, action, error, i, s, count, offp, sfail, stale = 0; + u_int8_t chksum_flag = 0; pfsyncstats.pfsyncs_ipackets++; /* verify that we have a sync interface configured */ - if (!sc->sc_sync_ifp || !pf_status.running) + if (!sc || !sc->sc_sync_ifp || !pf_status.running) goto done; /* verify that the packet came in on the right interface */ @@ -306,6 +383,9 @@ pfsync_input(struct mbuf *m, ...) /* Cheaper to grab this now than having to mess with mbufs later */ src = ip->ip_src; + if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) + chksum_flag++; + switch (action) { case PFSYNC_ACT_CLR: { struct pf_state *nexts; @@ -323,28 +403,24 @@ pfsync_input(struct mbuf *m, ...) if (cp->ifname[0] == '\0') { for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); + nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); if (st->creatorid == creatorid) { - st->timeout = PFTM_PURGE; - pf_purge_expired_state(st); + st->sync_flags |= PFSTATE_FROMSYNC; + pf_unlink_state(st); } } } else { - kif = pfi_lookup_if(cp->ifname); - if (kif == NULL) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_input: PFSYNC_ACT_CLR " - "bad interface: %s\n", cp->ifname); + if ((kif = pfi_kif_get(cp->ifname)) == NULL) { splx(s); - goto done; + return; } for (st = RB_MIN(pf_state_tree_lan_ext, &kif->pfik_lan_ext); st; st = nexts) { nexts = RB_NEXT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, st); if (st->creatorid == creatorid) { - st->timeout = PFTM_PURGE; - pf_purge_expired_state(st); + st->sync_flags |= PFSTATE_FROMSYNC; + pf_unlink_state(st); } } } @@ -375,7 +451,8 @@ pfsync_input(struct mbuf *m, ...) continue; } - if ((error = pfsync_insert_net_state(sp))) { + if ((error = pfsync_insert_net_state(sp, + chksum_flag))) { if (error == ENOMEM) { splx(s); goto done; @@ -414,7 +491,7 @@ pfsync_input(struct mbuf *m, ...) st = pf_find_state_byid(&key); if (st == NULL) { /* insert the update */ - if (pfsync_insert_net_state(sp)) + if (pfsync_insert_net_state(sp, chksum_flag)) pfsyncstats.pfsyncs_badstate++; continue; } @@ -453,7 +530,7 @@ pfsync_input(struct mbuf *m, ...) */ if (st->src.state > sp->src.state) sfail = 5; - else if ( st->dst.state > sp->dst.state) + else if (st->dst.state > sp->dst.state) sfail = 6; } if (sfail) { @@ -478,6 +555,7 @@ pfsync_input(struct mbuf *m, ...) } continue; } + pfsync_alloc_scrub_memory(&sp->dst, &st->dst); pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); st->expire = ntohl(sp->expire) + time_second; @@ -509,9 +587,8 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } - st->timeout = PFTM_PURGE; st->sync_flags |= PFSTATE_FROMSYNC; - pf_purge_expired_state(st); + pf_unlink_state(st); } splx(s); break; @@ -604,6 +681,7 @@ pfsync_input(struct mbuf *m, ...) PFSYNC_FLAG_STALE); continue; } + pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->src, &st->src); pf_state_peer_ntoh(&up->dst, &st->dst); st->expire = ntohl(up->expire) + time_second; @@ -632,9 +710,8 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } - st->timeout = PFTM_PURGE; st->sync_flags |= PFSTATE_FROMSYNC; - pf_purge_expired_state(st); + pf_unlink_state(st); } splx(s); break; @@ -660,6 +737,10 @@ pfsync_input(struct mbuf *m, ...) if (key.id == 0 && key.creatorid == 0) { sc->sc_ureq_received = time_uptime; + if (sc->sc_bulk_send_next == NULL) + sc->sc_bulk_send_next = + TAILQ_FIRST(&state_list); + sc->sc_bulk_terminator = sc->sc_bulk_send_next; if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received " "bulk update request\n"); @@ -709,7 +790,7 @@ pfsync_input(struct mbuf *m, ...) timeout_del(&sc->sc_bulkfail_tmo); #if NCARP > 0 if (!pfsync_sync_ok) - carp_suppress_preempt--; + carp_group_demote_adj(&sc->sc_if, -1); #endif pfsync_sync_ok = 1; if (pf_status.debug >= PF_DEBUG_MISC) @@ -723,6 +804,18 @@ pfsync_input(struct mbuf *m, ...) break; } break; + case PFSYNC_ACT_TDB_UPD: + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + count * sizeof(*pt), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + s = splsoftnet(); + for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); + i < count; i++, pt++) + pfsync_update_net_tdb(pt); + splx(s); + break; } done: @@ -862,7 +955,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sc->sc_ureq_sent = time_uptime; #if NCARP > 0 if (pfsync_sync_ok) - carp_suppress_preempt++; + carp_group_demote_adj(&sc->sc_if, 1); #endif pfsync_sync_ok = 0; if (pf_status.debug >= PF_DEBUG_MISC) @@ -938,6 +1031,10 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) len = sizeof(struct pfsync_header) + sizeof(struct pfsync_state_bus); break; + case PFSYNC_ACT_TDB_UPD: + len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + + sizeof(struct pfsync_header); + break; default: len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + sizeof(struct pfsync_header); @@ -962,17 +1059,23 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) h->af = 0; h->count = 0; h->action = action; + if (action != PFSYNC_ACT_TDB_UPD) + bcopy(&pf_status.pf_chksum, &h->pf_chksum, + PF_MD5_DIGEST_LENGTH); *sp = (void *)((char *)h + PFSYNC_HDRLEN); - timeout_add(&sc->sc_tmo, hz); + if (action == PFSYNC_ACT_TDB_UPD) + timeout_add(&sc->sc_tdb_tmo, hz); + else + timeout_add(&sc->sc_tmo, hz); return (m); } int pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) { - struct ifnet *ifp = &pfsyncif.sc_if; - struct pfsync_softc *sc = ifp->if_softc; + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; struct pfsync_header *h, *h_net; struct pfsync_state *sp = NULL; struct pfsync_state_upd *up = NULL; @@ -982,6 +1085,10 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) int s, ret = 0; u_int8_t i = 255, newaction = 0; + if (sc == NULL) + return (0); + ifp = &sc->sc_if; + /* * If a packet falls in the forest and there's nobody around to * hear, does it make a sound? @@ -1044,8 +1151,6 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) secs = time_second; st->pfsync_time = time_uptime; - TAILQ_REMOVE(&state_updates, st, u.s.entry_updates); - TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates); if (sp == NULL) { /* not a "duplicate" update */ @@ -1067,10 +1172,10 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); sp->creation = htonl(secs - st->creation); - sp->packets[0] = htonl(st->packets[0]); - sp->packets[1] = htonl(st->packets[1]); - sp->bytes[0] = htonl(st->bytes[0]); - sp->bytes[1] = htonl(st->bytes[1]); + pf_state_counter_hton(st->packets[0], sp->packets[0]); + pf_state_counter_hton(st->packets[1], sp->packets[1]); + pf_state_counter_hton(st->bytes[0], sp->bytes[0]); + pf_state_counter_hton(st->bytes[1], sp->bytes[1]); if ((r = st->rule.ptr) == NULL) sp->rule = htonl(-1); else @@ -1169,12 +1274,16 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) int pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) { - struct ifnet *ifp = &pfsyncif.sc_if; + struct ifnet *ifp = NULL; struct pfsync_header *h; - struct pfsync_softc *sc = ifp->if_softc; + struct pfsync_softc *sc = pfsyncif; struct pfsync_state_upd_req *rup; int ret = 0; + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, (void *)&sc->sc_statep.s)) == NULL) @@ -1211,11 +1320,15 @@ pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) int pfsync_clear_states(u_int32_t creatorid, char *ifname) { - struct ifnet *ifp = &pfsyncif.sc_if; - struct pfsync_softc *sc = ifp->if_softc; + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; struct pfsync_state_clr *cp; int s, ret; + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; s = splnet(); if (sc->sc_mbuf != NULL) pfsync_sendout(sc); @@ -1246,6 +1359,17 @@ pfsync_timeout(void *v) splx(s); } +void +pfsync_tdb_timeout(void *v) +{ + struct pfsync_softc *sc = v; + int s; + + s = splnet(); + pfsync_tdb_sendout(sc); + splx(s); +} + /* This must be called in splnet() */ void pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) @@ -1282,28 +1406,39 @@ pfsync_bulk_update(void *v) * Grab at most PFSYNC_BULKPACKETS worth of states which have not * been sent since the latest request was made. */ - while ((state = TAILQ_FIRST(&state_updates)) != NULL && - ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) { - if (state->pfsync_time > sc->sc_ureq_received) { - /* we're done */ - pfsync_send_bus(sc, PFSYNC_BUS_END); - sc->sc_ureq_received = 0; - timeout_del(&sc->sc_bulk_tmo); - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: bulk update complete\n"); - break; - } else { - /* send an update and move to end of list */ - if (!state->sync_flags) + state = sc->sc_bulk_send_next; + if (state) + do { + /* send state update if syncable and not already sent */ + if (!state->sync_flags + && state->timeout < PFTM_MAX + && state->pfsync_time <= sc->sc_ureq_received) { pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); - state->pfsync_time = time_uptime; - TAILQ_REMOVE(&state_updates, state, u.s.entry_updates); - TAILQ_INSERT_TAIL(&state_updates, state, - u.s.entry_updates); + i++; + } - /* look again for more in a bit */ - timeout_add(&sc->sc_bulk_tmo, 1); - } + /* figure next state to send */ + state = TAILQ_NEXT(state, u.s.entry_list); + + /* wrap to start of list if we hit the end */ + if (!state) + state = TAILQ_FIRST(&state_list); + } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && + state != sc->sc_bulk_terminator); + + if (!state || state == sc->sc_bulk_terminator) { + /* we're done */ + pfsync_send_bus(sc, PFSYNC_BUS_END); + sc->sc_ureq_received = 0; + sc->sc_bulk_send_next = NULL; + sc->sc_bulk_terminator = NULL; + timeout_del(&sc->sc_bulk_tmo); + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: bulk update complete\n"); + } else { + /* look again for more in a bit */ + timeout_add(&sc->sc_bulk_tmo, 1); + sc->sc_bulk_send_next = state; } if (sc->sc_mbuf != NULL) pfsync_sendout(sc); @@ -1334,7 +1469,7 @@ pfsync_bulkfail(void *v) sc->sc_bulk_tries = 0; #if NCARP > 0 if (!pfsync_sync_ok) - carp_suppress_preempt--; + carp_group_demote_adj(&sc->sc_if, -1); #endif pfsync_sync_ok = 1; if (pf_status.debug >= PF_DEBUG_MISC) @@ -1346,10 +1481,11 @@ pfsync_bulkfail(void *v) /* This must be called in splnet() */ int -pfsync_sendout(sc) - struct pfsync_softc *sc; +pfsync_sendout(struct pfsync_softc *sc) { +#if NBPFILTER > 0 struct ifnet *ifp = &sc->sc_if; +#endif struct mbuf *m; timeout_del(&sc->sc_tmo); @@ -1362,7 +1498,7 @@ pfsync_sendout(sc) #if NBPFILTER > 0 if (ifp->if_bpf) - bpf_mtap(ifp->if_bpf, m); + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); #endif if (sc->sc_mbuf_net) { @@ -1372,10 +1508,41 @@ pfsync_sendout(sc) sc->sc_statep_net.s = NULL; } - if (sc->sc_sync_ifp || sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { - struct ip *ip; - struct sockaddr sa; + return pfsync_sendout_mbuf(sc, m); +} + +int +pfsync_tdb_sendout(struct pfsync_softc *sc) +{ +#if NBPFILTER > 0 + struct ifnet *ifp = &sc->sc_if; +#endif + struct mbuf *m; + + timeout_del(&sc->sc_tdb_tmo); + + if (sc->sc_mbuf_tdb == NULL) + return (0); + m = sc->sc_mbuf_tdb; + sc->sc_mbuf_tdb = NULL; + sc->sc_statep_tdb.t = NULL; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif + + return pfsync_sendout_mbuf(sc, m); +} +int +pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) +{ + struct sockaddr sa; + struct ip *ip; + + if (sc->sc_sync_ifp || + sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); if (m == NULL) { pfsyncstats.pfsyncs_onomem++; @@ -1409,3 +1576,154 @@ pfsync_sendout(sc) return (0); } + +/* Update an in-kernel tdb. Silently fail if no tdb is found. */ +void +pfsync_update_net_tdb(struct pfsync_tdb *pt) +{ + struct tdb *tdb; + int s; + + /* check for invalid values */ + if (ntohl(pt->spi) <= SPI_RESERVED_MAX || + (pt->dst.sa.sa_family != AF_INET && + pt->dst.sa.sa_family != AF_INET6)) + goto bad; + + s = spltdb(); + tdb = gettdb(pt->spi, &pt->dst, pt->sproto); + if (tdb) { + pt->rpl = ntohl(pt->rpl); + pt->cur_bytes = betoh64(pt->cur_bytes); + + /* Neither replay nor byte counter should ever decrease. */ + if (pt->rpl < tdb->tdb_rpl || + pt->cur_bytes < tdb->tdb_cur_bytes) { + splx(s); + goto bad; + } + + tdb->tdb_rpl = pt->rpl; + tdb->tdb_cur_bytes = pt->cur_bytes; + } + splx(s); + return; + + bad: + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " + "invalid value\n"); + pfsyncstats.pfsyncs_badstate++; + return; +} + +/* One of our local tdbs have been updated, need to sync rpl with others */ +int +pfsync_update_tdb(struct tdb *tdb, int output) +{ + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; + struct pfsync_header *h; + struct pfsync_tdb *pt = NULL; + int s, i, ret; + + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; + if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && + sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { + /* Don't leave any stale pfsync packets hanging around. */ + if (sc->sc_mbuf_tdb != NULL) { + m_freem(sc->sc_mbuf_tdb); + sc->sc_mbuf_tdb = NULL; + sc->sc_statep_tdb.t = NULL; + } + return (0); + } + + s = splnet(); + if (sc->sc_mbuf_tdb == NULL) { + if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD, + (void *)&sc->sc_statep_tdb.t)) == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + } else { + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + if (h->action != PFSYNC_ACT_TDB_UPD) { + /* + * XXX will never happen as long as there's + * only one "TDB action". + */ + pfsync_tdb_sendout(sc); + sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, + PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t); + if (sc->sc_mbuf_tdb == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + } else if (sc->sc_maxupdates) { + /* + * If it's an update, look in the packet to see if + * we already have an update for the state. + */ + struct pfsync_tdb *u = + (void *)((char *)h + PFSYNC_HDRLEN); + + for (i = 0; !pt && i < h->count; i++) { + if (tdb->tdb_spi == u->spi && + tdb->tdb_sproto == u->sproto && + !bcmp(&tdb->tdb_dst, &u->dst, + SA_LEN(&u->dst.sa))) { + pt = u; + pt->updates++; + } + u++; + } + } + } + + if (pt == NULL) { + /* not a "duplicate" update */ + pt = sc->sc_statep_tdb.t++; + sc->sc_mbuf_tdb->m_pkthdr.len = + sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb); + h->count++; + bzero(pt, sizeof(*pt)); + + pt->spi = tdb->tdb_spi; + memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst); + pt->sproto = tdb->tdb_sproto; + } + + /* + * When a failover happens, the master's rpl is probably above + * what we see here (we may be up to a second late), so + * increase it a bit for outbound tdbs to manage most such + * situations. + * + * For now, just add an offset that is likely to be larger + * than the number of packets we can see in one second. The RFC + * just says the next packet must have a higher seq value. + * + * XXX What is a good algorithm for this? We could use + * a rate-determined increase, but to know it, we would have + * to extend struct tdb. + * XXX pt->rpl can wrap over MAXINT, but if so the real tdb + * will soon be replaced anyway. For now, just don't handle + * this edge case. + */ +#define RPL_INCR 16384 + pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0)); + pt->cur_bytes = htobe64(tdb->tdb_cur_bytes); + + if (h->count == sc->sc_maxcount || + (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates))) + ret = pfsync_tdb_sendout(sc); + + splx(s); + return (ret); +} diff --git a/sys/contrib/pf/net/if_pfsync.h b/sys/contrib/pf/net/if_pfsync.h index ddd049a..5ed465e 100644 --- a/sys/contrib/pf/net/if_pfsync.h +++ b/sys/contrib/pf/net/if_pfsync.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.h,v 1.19 2005/01/20 17:47:38 mcbride Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.30 2006/10/31 14:49:01 henning Exp $ */ /* * Copyright (c) 2001 Michael Shalayeff @@ -35,6 +35,7 @@ struct pfsync_state_scrub { u_int16_t pfss_flags; u_int8_t pfss_ttl; /* stashed TTL */ +#define PFSYNC_SCRUB_FLAG_VALID 0x01 u_int8_t scrub_flag; u_int32_t pfss_ts_mod; /* timestamp modulation */ } __packed; @@ -54,8 +55,7 @@ struct pfsync_state_peer { u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ - u_int8_t scrub_flag; - u_int8_t pad[5]; + u_int8_t pad[6]; } __packed; struct pfsync_state { @@ -72,8 +72,8 @@ struct pfsync_state { u_int32_t nat_rule; u_int32_t creation; u_int32_t expire; - u_int32_t packets[2]; - u_int32_t bytes[2]; + u_int32_t packets[2][2]; + u_int32_t bytes[2][2]; u_int32_t creatorid; sa_family_t af; u_int8_t proto; @@ -88,6 +88,16 @@ struct pfsync_state { #define PFSYNC_FLAG_COMPRESS 0x01 #define PFSYNC_FLAG_STALE 0x02 +struct pfsync_tdb { + u_int32_t spi; + union sockaddr_union dst; + u_int32_t rpl; + u_int64_t cur_bytes; + u_int8_t sproto; + u_int8_t updates; + u_int8_t pad[2]; +} __packed; + struct pfsync_state_upd { u_int32_t id[2]; struct pfsync_state_peer src; @@ -143,6 +153,10 @@ union sc_statep { struct pfsync_state_upd_req *r; }; +union sc_tdb_statep { + struct pfsync_tdb *t; +}; + extern int pfsync_sync_ok; struct pfsync_softc { @@ -151,26 +165,33 @@ struct pfsync_softc { struct ip_moptions sc_imo; struct timeout sc_tmo; + struct timeout sc_tdb_tmo; struct timeout sc_bulk_tmo; struct timeout sc_bulkfail_tmo; struct in_addr sc_sync_peer; struct in_addr sc_sendaddr; struct mbuf *sc_mbuf; /* current cumulative mbuf */ struct mbuf *sc_mbuf_net; /* current cumulative mbuf */ + struct mbuf *sc_mbuf_tdb; /* dito for TDB updates */ union sc_statep sc_statep; union sc_statep sc_statep_net; + union sc_tdb_statep sc_statep_tdb; u_int32_t sc_ureq_received; u_int32_t sc_ureq_sent; + struct pf_state *sc_bulk_send_next; + struct pf_state *sc_bulk_terminator; int sc_bulk_tries; int sc_maxcount; /* number of states in mtu */ int sc_maxupdates; /* number of updates/state */ }; + +extern struct pfsync_softc *pfsyncif; #endif struct pfsync_header { u_int8_t version; -#define PFSYNC_VERSION 2 +#define PFSYNC_VERSION 3 u_int8_t af; u_int8_t action; #define PFSYNC_ACT_CLR 0 /* clear all states */ @@ -183,8 +204,10 @@ struct pfsync_header { #define PFSYNC_ACT_DEL_F 7 /* delete fragments */ #define PFSYNC_ACT_UREQ 8 /* request "uncompressed" state */ #define PFSYNC_ACT_BUS 9 /* Bulk Update Status */ -#define PFSYNC_ACT_MAX 10 +#define PFSYNC_ACT_TDB_UPD 10 /* TDB replay counter update */ +#define PFSYNC_ACT_MAX 11 u_int8_t count; + u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; } __packed; #define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */ @@ -193,7 +216,7 @@ struct pfsync_header { #define PFSYNC_ACTIONS \ "CLR ST", "INS ST", "UPD ST", "DEL ST", \ "UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \ - "UPD REQ", "BLK UPD STAT" + "UPD REQ", "BLK UPD STAT", "TDB UPD" #define PFSYNC_DFLTTL 255 @@ -236,6 +259,13 @@ struct pfsyncreq { (d)->mss = htons((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ + if ((s)->scrub) { \ + (d)->scrub.pfss_flags = \ + htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ + (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ + (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ + (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ + } \ } while (0) #define pf_state_peer_ntoh(s,d) do { \ @@ -246,6 +276,13 @@ struct pfsyncreq { (d)->mss = ntohs((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ + if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ + (d)->scrub != NULL) { \ + (d)->scrub->pfss_flags = \ + ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ + (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ + (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ + } \ } while (0) #define pf_state_host_hton(s,d) do { \ @@ -258,6 +295,17 @@ struct pfsyncreq { (d)->port = (s)->port; \ } while (0) +#define pf_state_counter_hton(s,d) do { \ + d[0] = htonl((s>>32)&0xffffffff); \ + d[1] = htonl(s&0xffffffff); \ +} while (0) + +#define pf_state_counter_ntoh(s,d) do { \ + d = ntohl(s[0]); \ + d = d<<32; \ + d += ntohl(s[1]); \ +} while (0) + #ifdef _KERNEL void pfsync_input(struct mbuf *, ...); int pfsync_clear_states(u_int32_t, char *); @@ -267,7 +315,8 @@ int pfsync_pack_state(u_int8_t, struct pf_state *, int); (st->proto == IPPROTO_PFSYNC)) \ st->sync_flags |= PFSTATE_NOSYNC; \ else if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_INS, (st), 1); \ + pfsync_pack_state(PFSYNC_ACT_INS, (st), \ + PFSYNC_FLAG_COMPRESS); \ st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) #define pfsync_update_state(st) do { \ @@ -280,8 +329,8 @@ int pfsync_pack_state(u_int8_t, struct pf_state *, int); if (!st->sync_flags) \ pfsync_pack_state(PFSYNC_ACT_DEL, (st), \ PFSYNC_FLAG_COMPRESS); \ - st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) +int pfsync_update_tdb(struct tdb *, int); #endif #endif /* _NET_IF_PFSYNC_H_ */ diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index 340b988f..793dc34 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.483 2005/03/15 17:38:43 dhartmei Exp $ */ +/* $OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -48,11 +48,14 @@ #include #include #include +#include +#include #include #include #include #include +#include #include #include @@ -92,8 +95,6 @@ * Global variables */ -struct pf_anchor_global pf_anchors; -struct pf_ruleset pf_main_ruleset; struct pf_altqqueue pf_altqs[2]; struct pf_palist pf_pabuf; struct pf_altqqueue *pf_altqs_active; @@ -105,8 +106,6 @@ u_int32_t ticket_altqs_inactive; int altqs_inactive_open; u_int32_t ticket_pabuf; -struct timeout pf_expire_to; /* expire timeout */ - struct pf_anchor_stackframe { struct pf_ruleset *rs; struct pf_rule *r; @@ -127,6 +126,8 @@ int pf_check_threshold(struct pf_threshold *); void pf_change_ap(struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); +int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, + struct tcphdr *, struct pf_state_peer *); #ifdef INET6 void pf_change_a6(struct pf_addr *, u_int16_t *, struct pf_addr *, u_int8_t); @@ -139,7 +140,7 @@ void pf_send_tcp(const struct pf_rule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, - struct ether_header *, struct ifnet *); + u_int16_t, struct ether_header *, struct ifnet *); void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_rule *); struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, @@ -182,9 +183,11 @@ int pf_test_state_icmp(struct pf_state **, int, void *, struct pf_pdesc *, u_short *); int pf_test_state_other(struct pf_state **, int, struct pfi_kif *, struct pf_pdesc *); -struct pf_tag *pf_get_tag(struct mbuf *); int pf_match_tag(struct mbuf *, struct pf_rule *, - struct pf_tag **, int *); + struct pf_mtag *, int *); +int pf_step_out_of_anchor(int *, struct pf_ruleset **, + int, struct pf_rule **, struct pf_rule **, + int *); void pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); int pf_map_addr(u_int8_t, struct pf_rule *, @@ -195,11 +198,12 @@ int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, struct pf_src_node **); void pf_route(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *); + struct ifnet *, struct pf_state *, + struct pf_pdesc *); void pf_route6(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *); -int pf_socket_lookup(uid_t *, gid_t *, - int, struct pf_pdesc *); + struct ifnet *, struct pf_state *, + struct pf_pdesc *); +int pf_socket_lookup(int, struct pf_pdesc *); u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, @@ -212,16 +216,20 @@ int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); -static int pf_add_mbuf_tag(struct mbuf *, u_int); struct pf_state *pf_find_state_recurse(struct pfi_kif *, - struct pf_state *, u_int8_t); + struct pf_state_cmp *, u_int8_t); int pf_src_connlimit(struct pf_state **); int pf_check_congestion(struct ifqueue *); +extern struct pool pfr_ktable_pl; +extern struct pool pfr_kentry_pl; + struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { { &pf_state_pl, PFSTATE_HIWAT }, { &pf_src_tree_pl, PFSNODE_HIWAT }, - { &pf_frent_pl, PFFRAG_FRENT_HIWAT } + { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, + { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, + { &pfr_kentry_pl, PFR_KENTRY_HIWAT } }; #define STATE_LOOKUP() \ @@ -252,9 +260,8 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \ (s)->lan.port != (s)->gwy.port -#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) : \ - ((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent : \ - (k)->pfik_parent->pfik_parent) +#define BOUND_IFACE(r, k) \ + ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all #define STATE_INC_COUNTERS(s) \ do { \ @@ -281,12 +288,11 @@ static __inline int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *); static __inline int pf_state_compare_id(struct pf_state *, struct pf_state *); -static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); struct pf_src_tree tree_src_tracking; struct pf_state_tree_id tree_id; -struct pf_state_queue state_updates; +struct pf_state_queue state_list; RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); RB_GENERATE(pf_state_tree_lan_ext, pf_state, @@ -295,8 +301,6 @@ RB_GENERATE(pf_state_tree_ext_gwy, pf_state, u.s.entry_ext_gwy, pf_state_compare_ext_gwy); RB_GENERATE(pf_state_tree_id, pf_state, u.s.entry_id, pf_state_compare_id); -RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); -RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); static __inline int pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) @@ -493,14 +497,6 @@ pf_state_compare_id(struct pf_state *a, struct pf_state *b) return (0); } -static __inline int -pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b) -{ - int c = strcmp(a->path, b->path); - - return (c ? (c < 0 ? -1 : 1) : 0); -} - #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) @@ -522,14 +518,14 @@ pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) #endif /* INET6 */ struct pf_state * -pf_find_state_byid(struct pf_state *key) +pf_find_state_byid(struct pf_state_cmp *key) { pf_status.fcounters[FCNT_STATE_SEARCH]++; - return (RB_FIND(pf_state_tree_id, &tree_id, key)); + return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); } struct pf_state * -pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) +pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree) { struct pf_state *s; @@ -537,20 +533,20 @@ pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) switch (tree) { case PF_LAN_EXT: - for (; kif != NULL; kif = kif->pfik_parent) { - s = RB_FIND(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, key); - if (s != NULL) - return (s); - } + if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext, + (struct pf_state *)key)) != NULL) + return (s); + if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext, + (struct pf_state *)key)) != NULL) + return (s); return (NULL); case PF_EXT_GWY: - for (; kif != NULL; kif = kif->pfik_parent) { - s = RB_FIND(pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy, key); - if (s != NULL) - return (s); - } + if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, + (struct pf_state *)key)) != NULL) + return (s); + if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy, + (struct pf_state *)key)) != NULL) + return (s); return (NULL); default: panic("pf_find_state_recurse"); @@ -558,7 +554,7 @@ pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) } struct pf_state * -pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) +pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more) { struct pf_state *s, *ss = NULL; struct pfi_kif *kif; @@ -569,7 +565,7 @@ pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) case PF_LAN_EXT: TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { s = RB_FIND(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, key); + &kif->pfik_lan_ext, (struct pf_state *)key); if (s == NULL) continue; if (more == NULL) @@ -581,7 +577,7 @@ pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) case PF_EXT_GWY: TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { s = RB_FIND(pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy, key); + &kif->pfik_ext_gwy, (struct pf_state *)key); if (s == NULL) continue; if (more == NULL) @@ -632,6 +628,7 @@ pf_src_connlimit(struct pf_state **state) int bad = 0; (*state)->src_node->conn++; + (*state)->src.tcp_est = 1; pf_add_threshold(&(*state)->src_node->conn_rate); if ((*state)->rule.ptr->max_src_conn && @@ -845,11 +842,10 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *state) RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state); return (-1); } - TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates); - + TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list); pf_status.fcounters[FCNT_STATE_INSERT]++; pf_status.states++; - pfi_attach_state(kif); + pfi_kif_ref(kif, PFI_KIF_REF_STATE); #if NPFSYNC pfsync_insert_state(state); #endif @@ -857,18 +853,28 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *state) } void -pf_purge_timeout(void *arg) +pf_purge_thread(void *v) { - struct timeout *to = arg; - int s; + int nloops = 0, s; + + for (;;) { + tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); + + s = splsoftnet(); - s = splsoftnet(); - pf_purge_expired_states(); - pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(); - splx(s); + /* process a fraction of the state table every second */ + pf_purge_expired_states(1 + (pf_status.states + / pf_default_rule.timeout[PFTM_INTERVAL])); - timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz); + /* purge other expired types every PFTM_INTERVAL seconds */ + if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(0); + nloops = 0; + } + + splx(s); + } } u_int32_t @@ -884,6 +890,7 @@ pf_state_expires(const struct pf_state *state) return (time_second); if (state->timeout == PFTM_UNTIL_PACKET) return (0); + KASSERT(state->timeout != PFTM_UNLINKED); KASSERT(state->timeout < PFTM_MAX); timeout = state->rule.ptr->timeout[state->timeout]; if (!timeout) @@ -908,14 +915,21 @@ pf_state_expires(const struct pf_state *state) } void -pf_purge_expired_src_nodes(void) +pf_purge_expired_src_nodes(int waslocked) { struct pf_src_node *cur, *next; + int locked = waslocked; for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); if (cur->states <= 0 && cur->expire <= time_second) { + if (! locked) { + rw_enter_write(&pf_consistency_lock); + next = RB_NEXT(pf_src_tree, + &tree_src_tracking, cur); + locked = 1; + } if (cur->rule.ptr != NULL) { cur->rule.ptr->src_nodes--; if (cur->rule.ptr->states <= 0 && @@ -928,6 +942,9 @@ pf_purge_expired_src_nodes(void) pool_put(&pf_src_tree_pl, cur); } } + + if (locked && !waslocked) + rw_exit_write(&pf_consistency_lock); } void @@ -937,8 +954,7 @@ pf_src_tree_remove_state(struct pf_state *s) if (s->src_node != NULL) { if (s->proto == IPPROTO_TCP) { - if (s->src.state == PF_TCPS_PROXY_DST || - s->timeout >= PFTM_TCP_ESTABLISHED) + if (s->src.tcp_est) --s->src_node->conn; } if (--s->src_node->states <= 0) { @@ -961,24 +977,42 @@ pf_src_tree_remove_state(struct pf_state *s) s->src_node = s->nat_src_node = NULL; } +/* callers should be at splsoftnet */ void -pf_purge_expired_state(struct pf_state *cur) +pf_unlink_state(struct pf_state *cur) { - if (cur->src.state == PF_TCPS_PROXY_DST) + if (cur->src.state == PF_TCPS_PROXY_DST) { pf_send_tcp(cur->rule.ptr, cur->af, &cur->ext.addr, &cur->lan.addr, cur->ext.port, cur->lan.port, cur->src.seqhi, cur->src.seqlo + 1, - TH_RST|TH_ACK, 0, 0, 0, 1, NULL, NULL); + TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); + } RB_REMOVE(pf_state_tree_ext_gwy, &cur->u.s.kif->pfik_ext_gwy, cur); RB_REMOVE(pf_state_tree_lan_ext, &cur->u.s.kif->pfik_lan_ext, cur); RB_REMOVE(pf_state_tree_id, &tree_id, cur); #if NPFSYNC - pfsync_delete_state(cur); + if (cur->creatorid == pf_status.hostid) + pfsync_delete_state(cur); #endif + cur->timeout = PFTM_UNLINKED; pf_src_tree_remove_state(cur); +} + +/* callers should be at splsoftnet and hold the + * write_lock on pf_consistency_lock */ +void +pf_free_state(struct pf_state *cur) +{ +#if NPFSYNC + if (pfsyncif != NULL && + (pfsyncif->sc_bulk_send_next == cur || + pfsyncif->sc_bulk_terminator == cur)) + return; +#endif + KASSERT(cur->timeout == PFTM_UNLINKED); if (--cur->rule.ptr->states <= 0 && cur->rule.ptr->src_nodes <= 0) pf_rm_rule(NULL, cur->rule.ptr); @@ -990,8 +1024,8 @@ pf_purge_expired_state(struct pf_state *cur) if (--cur->anchor.ptr->states <= 0) pf_rm_rule(NULL, cur->anchor.ptr); pf_normalize_tcp_cleanup(cur); - pfi_detach_state(cur->u.s.kif); - TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates); + pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE); + TAILQ_REMOVE(&state_list, cur, u.s.entry_list); if (cur->tag) pf_tag_unref(cur->tag); pool_put(&pf_state_pl, cur); @@ -1000,16 +1034,44 @@ pf_purge_expired_state(struct pf_state *cur) } void -pf_purge_expired_states(void) +pf_purge_expired_states(u_int32_t maxcheck) { - struct pf_state *cur, *next; - - for (cur = RB_MIN(pf_state_tree_id, &tree_id); - cur; cur = next) { - next = RB_NEXT(pf_state_tree_id, &tree_id, cur); - if (pf_state_expires(cur) <= time_second) - pf_purge_expired_state(cur); + static struct pf_state *cur = NULL; + struct pf_state *next; + int locked = 0; + + while (maxcheck--) { + /* wrap to start of list when we hit the end */ + if (cur == NULL) { + cur = TAILQ_FIRST(&state_list); + if (cur == NULL) + break; /* list empty */ + } + + /* get next state, as cur may get deleted */ + next = TAILQ_NEXT(cur, u.s.entry_list); + + if (cur->timeout == PFTM_UNLINKED) { + /* free unlinked state */ + if (! locked) { + rw_enter_write(&pf_consistency_lock); + locked = 1; + } + pf_free_state(cur); + } else if (pf_state_expires(cur) <= time_second) { + /* unlink and free expired state */ + pf_unlink_state(cur); + if (! locked) { + rw_enter_write(&pf_consistency_lock); + locked = 1; + } + pf_free_state(cur); + } + cur = next; } + + if (locked) + rw_exit_write(&pf_consistency_lock); } int @@ -1235,9 +1297,12 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) case PF_ADDR_DYNIFTL: return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); case PF_ADDR_NOROUTE: + case PF_ADDR_URPFFAILED: return (0); case PF_ADDR_TABLE: return (aw1->p.tbl != aw2->p.tbl); + case PF_ADDR_RTLABEL: + return (aw1->v.rtlabel != aw2->v.rtlabel); default: printf("invalid address type: %d\n", aw1->type); return (1); @@ -1424,12 +1489,70 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, } } + +/* + * Need to modulate the sequence numbers in the TCP SACK option + * (credits to Krzysztof Pfaff for report and patch) + */ +int +pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, + struct tcphdr *th, struct pf_state_peer *dst) +{ + int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; + u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; + int copyback = 0, i, olen; + struct sackblk sack; + +#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) + if (hlen < TCPOLEN_SACKLEN || + !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) + return 0; + + while (hlen >= TCPOLEN_SACKLEN) { + olen = opt[1]; + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_SACK: + if (olen > hlen) + olen = hlen; + if (olen >= TCPOLEN_SACKLEN) { + for (i = 2; i + TCPOLEN_SACK <= olen; + i += TCPOLEN_SACK) { + memcpy(&sack, &opt[i], sizeof(sack)); + pf_change_a(&sack.start, &th->th_sum, + htonl(ntohl(sack.start) - + dst->seqdiff), 0); + pf_change_a(&sack.end, &th->th_sum, + htonl(ntohl(sack.end) - + dst->seqdiff), 0); + memcpy(&opt[i], &sack, sizeof(sack)); + } + copyback = 1; + } + /* FALLTHROUGH */ + default: + if (olen < 2) + olen = 2; + hlen -= olen; + opt += olen; + } + } + + if (copyback) + m_copyback(m, off + sizeof(*th), thoptlen, opts); + return (copyback); +} + void pf_send_tcp(const struct pf_rule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, - struct ether_header *eh, struct ifnet *ifp) + u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) { struct mbuf *m; int len, tlen; @@ -1440,7 +1563,8 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, struct ip6_hdr *h6; #endif /* INET6 */ struct tcphdr *th; - char *opt; + char *opt; + struct pf_mtag *pf_mtag; /* maximum segment size tcp option */ tlen = sizeof(struct tcphdr); @@ -1464,30 +1588,24 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, m = m_gethdr(M_DONTWAIT, MT_HEADER); if (m == NULL) return; - if (tag) { - struct m_tag *mtag; - - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) { - m_freem(m); - return; - } - m_tag_prepend(m, mtag); + if ((pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + return; } + if (tag) + pf_mtag->flags |= PF_TAG_GENERATED; + + pf_mtag->tag = rtag; + + if (r != NULL && r->rtableid >= 0) + pf_mtag->rtableid = r->rtableid; + #ifdef ALTQ if (r != NULL && r->qid) { - struct m_tag *mtag; - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = af; - atag->hdr = mtod(m, struct ip *); - m_tag_prepend(m, mtag); - } + pf_mtag->qid = r->qid; + /* add hints for ecn */ + pf_mtag->af = af; + pf_mtag->hdr = mtod(m, struct ip *); } #endif /* ALTQ */ m->m_data += max_linkhdr; @@ -1597,39 +1715,31 @@ void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) { - struct m_tag *mtag; + struct pf_mtag *pf_mtag; struct mbuf *m0; - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) - return; m0 = m_copy(m, 0, M_COPYALL); - if (m0 == NULL) { - m_tag_free(mtag); + + if ((pf_mtag = pf_get_mtag(m0)) == NULL) return; - } - m_tag_prepend(m0, mtag); + pf_mtag->flags |= PF_TAG_GENERATED; + + if (r->rtableid >= 0) + pf_mtag->rtableid = r->rtableid; #ifdef ALTQ if (r->qid) { - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = af; - atag->hdr = mtod(m0, struct ip *); - m_tag_prepend(m0, mtag); - } + pf_mtag->qid = r->qid; + /* add hints for ecn */ + pf_mtag->af = af; + pf_mtag->hdr = mtod(m0, struct ip *); } #endif /* ALTQ */ switch (af) { #ifdef INET case AF_INET: - icmp_error(m0, type, code, 0, (void *)NULL); + icmp_error(m0, type, code, 0, 0); break; #endif /* INET */ #ifdef INET6 @@ -1737,58 +1847,71 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) return (pf_match(op, a1, a2, g)); } -struct pf_tag * -pf_get_tag(struct mbuf *m) +struct pf_mtag * +pf_find_mtag(struct mbuf *m) { struct m_tag *mtag; - if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL) - return ((struct pf_tag *)(mtag + 1)); - else + if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) return (NULL); + + return ((struct pf_mtag *)(mtag + 1)); } -int -pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_tag **pftag, int *tag) +struct pf_mtag * +pf_get_mtag(struct mbuf *m) { - if (*tag == -1) { /* find mbuf tag */ - *pftag = pf_get_tag(m); - if (*pftag != NULL) - *tag = (*pftag)->tag; - else - *tag = 0; + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) { + mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag), + M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, sizeof(struct pf_mtag)); + m_tag_prepend(m, mtag); } + return ((struct pf_mtag *)(mtag + 1)); +} + +int +pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag, + int *tag) +{ + if (*tag == -1) + *tag = pf_mtag->tag; + return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } int -pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag) +pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid) { - struct m_tag *mtag; - - if (tag <= 0) + if (tag <= 0 && rtableid < 0) return (0); - if (pftag == NULL) { - mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT); - if (mtag == NULL) + if (pf_mtag == NULL) + if ((pf_mtag = pf_get_mtag(m)) == NULL) return (1); - ((struct pf_tag *)(mtag + 1))->tag = tag; - m_tag_prepend(m, mtag); - } else - pftag->tag = tag; + if (tag > 0) + pf_mtag->tag = tag; + if (rtableid >= 0) + pf_mtag->rtableid = rtableid; return (0); } static void pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, - struct pf_rule **r, struct pf_rule **a) + struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; + (*r)->anchor->match = 0; + if (match) + *match = 0; if (*depth >= sizeof(pf_anchor_stack) / sizeof(pf_anchor_stack[0])) { printf("pf_step_into_anchor: stack overflow\n"); @@ -1815,17 +1938,23 @@ pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); } -static void +int pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, - struct pf_rule **r, struct pf_rule **a) + struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; + int quick = 0; do { if (*depth <= 0) break; f = pf_anchor_stack + *depth - 1; if (f->parent != NULL && f->child != NULL) { + if (f->child->match || + (match != NULL && *match)) { + f->r->anchor->match = 1; + *match = 0; + } f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); if (f->child != NULL) { *rs = &f->child->ruleset; @@ -1840,8 +1969,12 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, if (*depth == 0 && a != NULL) *a = NULL; *rs = f->rs; + if (f->r->anchor->match || (match != NULL && *match)) + quick = f->r->quick; *r = TAILQ_NEXT(f->r, entries); } while (*r == NULL); + + return (quick); } #ifdef INET6 @@ -2145,7 +2278,7 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, struct pf_src_node **sn) { - struct pf_state key; + struct pf_state_cmp key; struct pf_addr init_addr; u_int16_t cut; @@ -2237,8 +2370,8 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, { struct pf_rule *r, *rm = NULL; struct pf_ruleset *ruleset = NULL; - struct pf_tag *pftag = NULL; int tag = -1; + int rtableid = -1; int asd = 0; r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); @@ -2256,8 +2389,7 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, } r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -2265,7 +2397,8 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->neg)) + else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, + src->neg, kif)) r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : PF_SKIP_DST_ADDR].ptr; else if (src->port_op && !pf_match_port(src->port_op, @@ -2273,15 +2406,16 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : PF_SKIP_DST_PORT].ptr; else if (dst != NULL && - PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg)) + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0)) + else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, + 0, NULL)) r = TAILQ_NEXT(r, entries); else if (dst != NULL && dst->port_op && !pf_match_port(dst->port_op, dst->port[0], dst->port[1], dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, @@ -2290,15 +2424,19 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { rm = r; } else - pf_step_into_anchor(&asd, &ruleset, rs_num, &r, NULL); + pf_step_into_anchor(&asd, &ruleset, rs_num, + &r, NULL, NULL); } if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, NULL); + pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, + NULL, NULL); } - if (pf_tag_packet(m, pftag, tag)) + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) return (NULL); if (rm != NULL && (rm->action == PF_NONAT || rm->action == PF_NORDR || rm->action == PF_NOBINAT)) @@ -2458,28 +2596,35 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, } int -pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) +pf_socket_lookup(int direction, struct pf_pdesc *pd) { struct pf_addr *saddr, *daddr; u_int16_t sport, dport; struct inpcbtable *tb; struct inpcb *inp; - *uid = UID_MAX; - *gid = GID_MAX; + if (pd == NULL) + return (-1); + pd->lookup.uid = UID_MAX; + pd->lookup.gid = GID_MAX; + pd->lookup.pid = NO_PID; switch (pd->proto) { case IPPROTO_TCP: + if (pd->hdr.tcp == NULL) + return (-1); sport = pd->hdr.tcp->th_sport; dport = pd->hdr.tcp->th_dport; tb = &tcbtable; break; case IPPROTO_UDP: + if (pd->hdr.udp == NULL) + return (-1); sport = pd->hdr.udp->uh_sport; dport = pd->hdr.udp->uh_dport; tb = &udbtable; break; default: - return (0); + return (-1); } if (direction == PF_IN) { saddr = pd->src; @@ -2500,7 +2645,7 @@ pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) if (inp == NULL) { inp = in_pcblookup_listen(tb, daddr->v4, dport, 0); if (inp == NULL) - return (0); + return (-1); } break; #endif /* INET */ @@ -2511,16 +2656,17 @@ pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) if (inp == NULL) { inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0); if (inp == NULL) - return (0); + return (-1); } break; #endif /* INET6 */ default: - return (0); + return (-1); } - *uid = inp->inp_socket->so_euid; - *gid = inp->inp_socket->so_egid; + pd->lookup.uid = inp->inp_socket->so_euid; + pd->lookup.gid = inp->inp_socket->so_egid; + pd->lookup.pid = inp->inp_socket->so_cpid; return (1); } @@ -2691,18 +2837,15 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, struct tcphdr *th = pd->hdr.tcp; u_int16_t bport, nport = 0; sa_family_t af = pd->af; - int lookup = -1; - uid_t uid; - gid_t gid; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; int rewrite = 0; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; u_int16_t mss = tcp_mssdflt; int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); @@ -2743,8 +2886,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -2752,35 +2894,37 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != IPPROTO_TCP) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if ((r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - uid)) + pd->lookup.uid)) r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - gid)) + pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) @@ -2788,7 +2932,10 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -2797,11 +2944,11 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -2809,10 +2956,11 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { if (rewrite) m_copyback(m, off, sizeof(*th), th); - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if ((r->action == PF_DROP) && @@ -2843,7 +2991,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, pf_send_tcp(r, af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, 1, pd->eh, kif->pfik_ifp); + r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); } else if ((af == AF_INET) && r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r); @@ -2855,7 +3003,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->action == PF_DROP) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -2875,7 +3023,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { @@ -2915,7 +3063,9 @@ cleanup: s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = IPPROTO_TCP; s->direction = direction; s->af = af; @@ -2950,7 +3100,8 @@ cleanup: if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_MODULATE) { /* Generate sequence number modulator */ - while ((s->src.seqdiff = htonl(arc4random())) == 0) + while ((s->src.seqdiff = + tcp_rndiss_next() - s->src.seqlo) == 0) ; pf_change_a(&th->th_seq, &th->th_sum, htonl(s->src.seqlo + s->src.seqdiff), 0); @@ -3043,7 +3194,7 @@ cleanup: s->src.mss = mss; pf_send_tcp(r, af, daddr, saddr, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, NULL, NULL); + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } @@ -3067,17 +3218,14 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, struct udphdr *uh = pd->hdr.udp; u_int16_t bport, nport = 0; sa_family_t af = pd->af; - int lookup = -1; - uid_t uid; - gid_t gid; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; int rewrite = 0; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); @@ -3118,8 +3266,7 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3127,40 +3274,45 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != IPPROTO_UDP) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], uh->uh_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], uh->uh_dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - uid)) + pd->lookup.uid)) r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - gid)) + pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3169,11 +3321,11 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3181,10 +3333,11 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { if (rewrite) m_copyback(m, off, sizeof(*uh), uh); - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if ((r->action == PF_DROP) && @@ -3213,7 +3366,7 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->action == PF_DROP) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -3229,7 +3382,7 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { @@ -3269,7 +3422,9 @@ cleanup: s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = IPPROTO_UDP; s->direction = direction; s->af = af; @@ -3350,12 +3505,12 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, sa_family_t af = pd->af; u_int8_t icmptype, icmpcode; int state_icmp = 0; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; #ifdef INET6 int rewrite = 0; #endif /* INET6 */ int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); @@ -3454,8 +3609,7 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3463,28 +3617,33 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->type && r->type != icmptype + 1) r = TAILQ_NEXT(r, entries); else if (r->code && r->code != icmpcode + 1) r = TAILQ_NEXT(r, entries); - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3493,11 +3652,11 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3505,19 +3664,20 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { #ifdef INET6 if (rewrite) m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); #endif /* INET6 */ - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -3533,7 +3693,7 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { @@ -3573,7 +3733,9 @@ cleanup: s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = pd->proto; s->direction = direction; s->af = af; @@ -3651,9 +3813,9 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_addr *saddr = pd->src, *daddr = pd->dst; sa_family_t af = pd->af; u_short reason; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); @@ -3710,8 +3872,7 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3719,24 +3880,29 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3745,11 +3911,11 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3757,8 +3923,9 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + if (r->log || (nr != NULL && nr->natpass && nr->log)) + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); if ((r->action == PF_DROP) && ((r->rule_flag & PFRULE_RETURNICMP) || @@ -3797,7 +3964,7 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -3813,7 +3980,7 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { @@ -3853,7 +4020,9 @@ cleanup: s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = pd->proto; s->direction = direction; s->af = af; @@ -3913,15 +4082,14 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, struct pf_ruleset *ruleset = NULL; sa_family_t af = pd->af; u_short reason; - struct pf_tag *pftag = NULL; int tag = -1; int asd = 0; + int match = 0; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3929,11 +4097,13 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->src.port_op || r->dst.port_op || r->flagset || r->type || r->code || @@ -3941,10 +4111,11 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3953,11 +4124,11 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3966,12 +4137,13 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, REASON_SET(&reason, PFRES_MATCH); if (r->log) - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, + pd); if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } @@ -3984,7 +4156,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { - struct pf_state key; + struct pf_state_cmp key; struct tcphdr *th = pd->hdr.tcp; u_int16_t win = ntohs(th->th_win); u_int32_t ack, end, seq, orig_seq; @@ -4031,7 +4203,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pd->src, th->th_dport, th->th_sport, (*state)->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, - NULL, NULL); + 0, NULL, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (!(th->th_flags & TH_ACK) || @@ -4069,7 +4241,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, &dst->addr, src->port, dst->port, (*state)->dst.seqhi, 0, TH_SYN, 0, - (*state)->src.mss, 0, 0, NULL, NULL); + (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((th->th_flags & (TH_SYN|TH_ACK)) != @@ -4084,20 +4256,20 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, TH_ACK, (*state)->src.max_win, 0, 0, 0, - NULL, NULL); + (*state)->tag, NULL, NULL); pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, &dst->addr, src->port, dst->port, (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, TH_ACK, (*state)->dst.max_win, 0, 0, 1, - NULL, NULL); + 0, NULL, NULL); (*state)->src.seqdiff = (*state)->dst.seqhi - (*state)->src.seqlo; (*state)->dst.seqdiff = (*state)->src.seqhi - (*state)->dst.seqlo; (*state)->src.seqhi = (*state)->src.seqlo + - (*state)->src.max_win; - (*state)->dst.seqhi = (*state)->dst.seqlo + (*state)->dst.max_win; + (*state)->dst.seqhi = (*state)->dst.seqlo + + (*state)->src.max_win; (*state)->src.wscale = (*state)->dst.wscale = 0; (*state)->src.state = (*state)->dst.state = TCPS_ESTABLISHED; @@ -4132,7 +4304,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, /* Deferred generation of sequence number modulator */ if (dst->seqdiff && !src->seqdiff) { - while ((src->seqdiff = htonl(arc4random())) == 0) + while ((src->seqdiff = tcp_rndiss_next() - seq) == 0) ; ack = ntohl(th->th_ack) - dst->seqdiff; pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + @@ -4221,6 +4393,25 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, ackskew = dst->seqlo - ack; + + /* + * Need to demodulate the sequence numbers in any TCP SACK options + * (Selective ACK). We could optionally validate the SACK values + * against the current ACK window, either forwards or backwards, but + * I'm not confident that SACK has been implemented properly + * everywhere. It wouldn't surprise me if several stacks accidently + * SACK too far backwards of previously ACKed data. There really aren't + * any security implications of bad SACKing unless the target stack + * doesn't validate the option length correctly. Someone trying to + * spoof into a TCP connection won't bother blindly sending SACK + * options anyway. + */ + if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { + if (pf_modulate_sack(m, off, pd, th, dst)) + copyback = 1; + } + + #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ if (SEQ_GEQ(src->seqhi, end) && /* Last octet inside other's window space */ @@ -4231,8 +4422,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || - (pd->flags & PFDESC_IP_REAS) == 0)) { - /* Require an exact sequence match on resets when possible */ + (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) { + /* Require an exact/+1 sequence match on resets when possible */ if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(m, off, pd, reason, th, @@ -4278,8 +4469,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; - else if (src->state >= TCPS_FIN_WAIT_2 || - dst->state >= TCPS_FIN_WAIT_2) + else if (src->state >= TCPS_CLOSING && + dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_FIN_WAIT; else if (src->state < TCPS_ESTABLISHED || dst->state < TCPS_ESTABLISHED) @@ -4325,9 +4516,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, printf("pf: loose state match: "); pf_print_state(*state); pf_print_flags(th->th_flags); - printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n", - seq, ack, pd->p_len, ackskew, - (*state)->packets[0], (*state)->packets[1]); + printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len, + ackskew, (*state)->packets[0], + (*state)->packets[1]); } if (dst->scrub || src->scrub) { @@ -4368,7 +4560,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, - (*state)->rule.ptr->return_ttl, 1, + (*state)->rule.ptr->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); src->seqlo = 0; src->seqhi = 1; @@ -4377,8 +4569,9 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, printf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); - printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d " - "dir=%s,%s\n", seq, ack, pd->p_len, ackskew, + printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu dir=%s,%s\n", + seq, orig_seq, ack, pd->p_len, ackskew, (*state)->packets[0], (*state)->packets[1], direction == PF_IN ? "in" : "out", direction == (*state)->direction ? "fwd" : "rev"); @@ -4421,7 +4614,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state key; + struct pf_state_cmp key; struct udphdr *uh = pd->hdr.udp; key.af = pd->af; @@ -4485,6 +4678,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, u_int16_t icmpid, *icmpsum; u_int8_t icmptype; int state_icmp = 0; + struct pf_state_cmp key; switch (pd->proto) { #ifdef INET @@ -4522,8 +4716,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * ICMP query/reply message not related to a TCP/UDP packet. * Search for an ICMP state. */ - struct pf_state key; - key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { @@ -4715,7 +4907,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, case IPPROTO_TCP: { struct tcphdr th; u_int32_t seq; - struct pf_state key; struct pf_state_peer *src, *dst; u_int8_t dws; int copyback = 0; @@ -4832,7 +5023,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } case IPPROTO_UDP: { struct udphdr uh; - struct pf_state key; if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), NULL, reason, pd2.af)) { @@ -4899,7 +5089,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET case IPPROTO_ICMP: { struct icmp iih; - struct pf_state key; if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, NULL, reason, pd2.af)) { @@ -4951,7 +5140,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET6 case IPPROTO_ICMPV6: { struct icmp6_hdr iih; - struct pf_state key; if (!pf_pull_hdr(m, off2, &iih, sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { @@ -5003,8 +5191,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } #endif /* INET6 */ default: { - struct pf_state key; - key.af = pd2.af; key.proto = pd2.proto; if (direction == PF_IN) { @@ -5067,7 +5253,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state key; + struct pf_state_cmp key; key.af = pd->af; key.proto = pd->proto; @@ -5195,16 +5381,24 @@ pf_pull_hdr(struct mbuf *m, int off, void *p, int len, } int -pf_routable(struct pf_addr *addr, sa_family_t af) +pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) { struct sockaddr_in *dst; + int ret = 1; + int check_mpath; + extern int ipmultipath; #ifdef INET6 + extern int ip6_multipath; struct sockaddr_in6 *dst6; struct route_in6 ro; #else struct route ro; #endif + struct radix_node *rn; + struct rtentry *rt; + struct ifnet *ifp; + check_mpath = 0; bzero(&ro, sizeof(ro)); switch (af) { case AF_INET: @@ -5212,6 +5406,8 @@ pf_routable(struct pf_addr *addr, sa_family_t af) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; + if (ipmultipath) + check_mpath = 1; break; #ifdef INET6 case AF_INET6: @@ -5219,20 +5415,50 @@ pf_routable(struct pf_addr *addr, sa_family_t af) dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; + if (ip6_multipath) + check_mpath = 1; break; #endif /* INET6 */ default: return (0); } + /* Skip checks for ipsec interfaces */ + if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) + goto out; + rtalloc_noclone((struct route *)&ro, NO_CLONING); if (ro.ro_rt != NULL) { - RTFREE(ro.ro_rt); - return (1); - } + /* No interface given, this is a no-route check */ + if (kif == NULL) + goto out; + + if (kif->pfik_ifp == NULL) { + ret = 0; + goto out; + } + + /* Perform uRPF check if passed input interface */ + ret = 0; + rn = (struct radix_node *)ro.ro_rt; + do { + rt = (struct rtentry *)rn; + if (rt->rt_ifp->if_type == IFT_CARP) + ifp = rt->rt_ifp->if_carpdev; + else + ifp = rt->rt_ifp; - return (0); + if (kif->pfik_ifp == ifp) + ret = 1; + rn = rn_mpath_next(rn); + } while (check_mpath == 1 && rn != NULL && ret == 0); + } else + ret = 0; +out: + if (ro.ro_rt != NULL) + RTFREE(ro.ro_rt); + return (ret); } int @@ -5281,39 +5507,29 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) #ifdef INET void pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, - struct pf_state *s) + struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0, *m1; - struct m_tag *mtag; struct route iproute; - struct route *ro; + struct route *ro = NULL; struct sockaddr_in *dst; struct ip *ip; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; +#ifdef IPSEC + struct m_tag *mtag; +#endif /* IPSEC */ if (m == NULL || *m == NULL || r == NULL || (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route: invalid parameters"); - if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) { - if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) == - NULL) { - m0 = *m; - *m = NULL; - goto bad; - } - *(char *)(mtag + 1) = 1; - m_tag_prepend(*m, mtag); - } else { - if (*(char *)(mtag + 1) > 3) { - m0 = *m; - *m = NULL; - goto bad; - } - (*(char *)(mtag + 1))++; + if (pd->pf_mtag->routed++ > 3) { + m0 = *m; + *m = NULL; + goto bad; } if (r->rt == PF_DUPTO) { @@ -5403,33 +5619,33 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, #endif /* IPSEC */ /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ - if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) { + if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) { if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */ } - } else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) { + } else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) { if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */ } } if (ntohs(ip->ip_len) <= ifp->if_mtu) { if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && ifp->if_bridge == NULL) { - m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT; + m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; ipstat.ips_outhwcsum++; } else { ip->ip_sum = 0; ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); } /* Update relevant hardware checksum stats for TCP/UDP */ - if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) + if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) tcpstat.tcps_outhwcsum++; - else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) + else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) udpstat.udps_outhwcsum++; error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL); goto done; @@ -5443,7 +5659,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ipstat.ips_cantfrag++; if (r->rt != PF_DUPTO) { icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, - ifp); + ifp->if_mtu); goto done; } else goto bad; @@ -5485,10 +5701,9 @@ bad: #ifdef INET6 void pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, - struct pf_state *s) + struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0; - struct m_tag *mtag; struct route_in6 ip6route; struct route_in6 *ro; struct sockaddr_in6 *dst; @@ -5502,22 +5717,10 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route6: invalid parameters"); - if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) { - if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) == - NULL) { - m0 = *m; - *m = NULL; - goto bad; - } - *(char *)(mtag + 1) = 1; - m_tag_prepend(*m, mtag); - } else { - if (*(char *)(mtag + 1) > 3) { - m0 = *m; - *m = NULL; - goto bad; - } - (*(char *)(mtag + 1))++; + if (pd->pf_mtag->routed++ > 3) { + m0 = *m; + *m = NULL; + goto bad; } if (r->rt == PF_DUPTO) { @@ -5543,12 +5746,9 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, dst->sin6_len = sizeof(*dst); dst->sin6_addr = ip6->ip6_dst; - /* Cheat. */ + /* Cheat. XXX why only in the v6 case??? */ if (r->rt == PF_FASTROUTE) { - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m0, mtag); + pd->pf_mtag->flags |= PF_TAG_GENERATED; ip6_output(m0, NULL, NULL, 0, NULL, NULL); return; } @@ -5591,7 +5791,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, * If the packet is too large for the outgoing interface, * send back an icmp6 error. */ - if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr)) + if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { error = nd6_output(ifp, ifp, m0, dst, NULL); @@ -5646,9 +5846,9 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, default: return (1); } - if (m->m_pkthdr.csum & flag_ok) + if (m->m_pkthdr.csum_flags & flag_ok) return (0); - if (m->m_pkthdr.csum & flag_bad) + if (m->m_pkthdr.csum_flags & flag_bad) return (1); if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) return (1); @@ -5683,7 +5883,7 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, return (1); } if (sum) { - m->m_pkthdr.csum |= flag_bad; + m->m_pkthdr.csum_flags |= flag_bad; switch (p) { case IPPROTO_TCP: tcpstat.tcps_rcvbadsum++; @@ -5702,21 +5902,7 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, } return (1); } - m->m_pkthdr.csum |= flag_ok; - return (0); -} - -static int -pf_add_mbuf_tag(struct mbuf *m, u_int tag) -{ - struct m_tag *mtag; - - if (m_tag_find(m, tag, NULL) != NULL) - return (0); - mtag = m_tag_get(tag, 0, M_NOWAIT); - if (mtag == NULL) - return (1); - m_tag_prepend(m, mtag); + m->m_pkthdr.csum_flags |= flag_ok; return (0); } @@ -5735,14 +5921,22 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct pf_pdesc pd; int off, dirndx, pqid = 0; - if (!pf_status.running || - (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) + if (!pf_status.running) + return (PF_PASS); + + memset(&pd, 0, sizeof(pd)); + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test: pf_get_mtag returned NULL\n")); + return (PF_DROP); + } + if (pd.pf_mtag->flags & PF_TAG_GENERATED) return (PF_PASS); if (ifp->if_type == IFT_CARP && ifp->if_carpdev) ifp = ifp->if_carpdev; - kif = pfi_index2kif[ifp->if_index]; + kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); @@ -5756,7 +5950,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, panic("non-M_PKTHDR is passed to pf_test"); #endif /* DIAGNOSTIC */ - memset(&pd, 0, sizeof(pd)); if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); @@ -5810,6 +6003,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, } if (dir == PF_IN && pf_check_proto_cksum(m, off, ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) { + REASON_SET(&reason, PFRES_PROTCKSUM); action = PF_DROP; goto done; } @@ -5846,12 +6040,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); @@ -5880,6 +6076,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, if (dir == PF_IN && pf_check_proto_cksum(m, off, ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, @@ -5922,26 +6119,18 @@ done: ("pf: dropping packet with ip options\n")); } - if (s && s->tag) - pf_tag_packet(m, pf_get_tag(m), s->tag); + if ((s && s->tag) || r->rtableid) + pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { - struct m_tag *mtag; - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - if (pqid || pd.tos == IPTOS_LOWDELAY) - atag->qid = r->pqid; - else - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = AF_INET; - atag->hdr = h; - m_tag_prepend(m, mtag); - } + if (pqid || (pd.tos & IPTOS_LOWDELAY)) + pd.pf_mtag->qid = r->pqid; + else + pd.pf_mtag->qid = r->qid; + /* add hints for ecn */ + pd.pf_mtag->af = AF_INET; + pd.pf_mtag->hdr = h; } #endif /* ALTQ */ @@ -5954,41 +6143,48 @@ done: pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && - pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - } + (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) + pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; + + if (log) { + struct pf_rule *lr; - if (log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, a, ruleset); + if (s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->log & PF_LOG_ALL) + lr = s->nat_rule.ptr; + else + lr = r; + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, + &pd); + } kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; if (action == PF_PASS || r->action == PF_DROP) { - r->packets++; - r->bytes += pd.tot_len; + dirndx = (dir == PF_OUT); + r->packets[dirndx]++; + r->bytes[dirndx] += pd.tot_len; if (a != NULL) { - a->packets++; - a->bytes += pd.tot_len; + a->packets[dirndx]++; + a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { - dirndx = (dir == s->direction) ? 0 : 1; - s->packets[dirndx]++; - s->bytes[dirndx] += pd.tot_len; if (s->nat_rule.ptr != NULL) { - s->nat_rule.ptr->packets++; - s->nat_rule.ptr->bytes += pd.tot_len; + s->nat_rule.ptr->packets[dirndx]++; + s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; } if (s->src_node != NULL) { - s->src_node->packets++; - s->src_node->bytes += pd.tot_len; + s->src_node->packets[dirndx]++; + s->src_node->bytes[dirndx] += pd.tot_len; } if (s->nat_src_node != NULL) { - s->nat_src_node->packets++; - s->nat_src_node->bytes += pd.tot_len; + s->nat_src_node->packets[dirndx]++; + s->nat_src_node->bytes[dirndx] += pd.tot_len; } + dirndx = (dir == s->direction) ? 0 : 1; + s->packets[dirndx]++; + s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; @@ -6033,7 +6229,7 @@ done: action = PF_PASS; } else if (r->rt) /* pf_route can free the mbuf causing *m0 to become NULL */ - pf_route(m0, r, dir, ifp, s); + pf_route(m0, r, dir, ifp, s, &pd); return (action); } @@ -6046,7 +6242,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, { struct pfi_kif *kif; u_short action, reason = 0, log = 0; - struct mbuf *m = *m0; + struct mbuf *m = *m0, *n = NULL; struct ip6_hdr *h; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; struct pf_state *s = NULL; @@ -6054,14 +6250,22 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct pf_pdesc pd; int off, terminal = 0, dirndx; - if (!pf_status.running || - (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) + if (!pf_status.running) + return (PF_PASS); + + memset(&pd, 0, sizeof(pd)); + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test6: pf_get_mtag returned NULL\n")); + return (PF_DROP); + } + if (pd.pf_mtag->flags & PF_TAG_GENERATED) return (PF_PASS); if (ifp->if_type == IFT_CARP && ifp->if_carpdev) ifp = ifp->if_carpdev; - kif = pfi_index2kif[ifp->if_index]; + kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); @@ -6075,7 +6279,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, panic("non-M_PKTHDR is passed to pf_test6"); #endif /* DIAGNOSTIC */ - memset(&pd, 0, sizeof(pd)); if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); @@ -6091,6 +6294,18 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, m = *m0; h = mtod(m, struct ip6_hdr *); +#if 1 + /* + * we do not support jumbogram yet. if we keep going, zero ip6_plen + * will do something bad, so drop the packet for now. + */ + if (htons(h->ip6_plen) == 0) { + action = PF_DROP; + REASON_SET(&reason, PFRES_NORM); /*XXX*/ + goto done; + } +#endif + pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); @@ -6110,9 +6325,67 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, if (action == PF_DROP) REASON_SET(&reason, PFRES_FRAG); goto done; + case IPPROTO_ROUTING: { + struct ip6_rthdr rthdr; + struct ip6_rthdr0 rthdr0; + struct in6_addr finaldst; + struct ip6_hdr *ip6; + + if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, + &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr\n")); + action = PF_DROP; + log = 1; + goto done; + } + if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { + if (!pf_pull_hdr(m, off, &rthdr0, + sizeof(rthdr0), NULL, &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr0\n")); + action = PF_DROP; + log = 1; + goto done; + } + if (rthdr0.ip6r0_segleft != 0) { + if (!pf_pull_hdr(m, off + + sizeof(rthdr0) + + rthdr0.ip6r0_len * 8 - + sizeof(finaldst), &finaldst, + sizeof(finaldst), NULL, + &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr0\n")); + action = PF_DROP; + log = 1; + goto done; + } + + n = m_copym(m, 0, M_COPYALL, M_DONTWAIT); + if (!n) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: mbuf shortage\n")); + action = PF_DROP; + log = 1; + goto done; + } + n = m_pullup(n, sizeof(struct ip6_hdr)); + if (!n) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: mbuf shortage\n")); + action = PF_DROP; + log = 1; + goto done; + } + ip6 = mtod(n, struct ip6_hdr *); + ip6->ip6_dst = finaldst; + } + } + /* FALLTHROUGH */ + } case IPPROTO_AH: case IPPROTO_HOPOPTS: - case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct ip6_ext opt6; @@ -6139,6 +6412,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, } } while (!terminal); + /* if there's no routing header, use unmodified mbuf for checksumming */ + if (!n) + n = m; + switch (pd.proto) { case IPPROTO_TCP: { @@ -6150,7 +6427,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, + if (dir == PF_IN && pf_check_proto_cksum(n, off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), IPPROTO_TCP, AF_INET6)) { action = PF_DROP; @@ -6185,7 +6462,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, + if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n, off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), IPPROTO_UDP, AF_INET6)) { action = PF_DROP; @@ -6196,6 +6473,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); @@ -6221,7 +6499,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, + if (dir == PF_IN && pf_check_proto_cksum(n, off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), IPPROTO_ICMPV6, AF_INET6)) { action = PF_DROP; @@ -6259,28 +6537,25 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, } done: - /* XXX handle IPv6 options, if not allowed. not implemented. */ + if (n != m) { + m_freem(n); + n = NULL; + } + + /* XXX handle IPv6 options, if not allowed. not implemented. */ - if (s && s->tag) - pf_tag_packet(m, pf_get_tag(m), s->tag); + if ((s && s->tag) || r->rtableid) + pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { - struct m_tag *mtag; - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - if (pd.tos == IPTOS_LOWDELAY) - atag->qid = r->pqid; - else - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = AF_INET6; - atag->hdr = h; - m_tag_prepend(m, mtag); - } + if (pd.tos & IPTOS_LOWDELAY) + pd.pf_mtag->qid = r->pqid; + else + pd.pf_mtag->qid = r->qid; + /* add hints for ecn */ + pd.pf_mtag->af = AF_INET6; + pd.pf_mtag->hdr = h; } #endif /* ALTQ */ @@ -6288,41 +6563,48 @@ done: pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - IN6_IS_ADDR_LOOPBACK(&pd.dst->v6) && - pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - } + IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) + pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; - if (log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, r, a, ruleset); + if (log) { + struct pf_rule *lr; + + if (s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->log & PF_LOG_ALL) + lr = s->nat_rule.ptr; + else + lr = r; + PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, + &pd); + } kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; if (action == PF_PASS || r->action == PF_DROP) { - r->packets++; - r->bytes += pd.tot_len; + dirndx = (dir == PF_OUT); + r->packets[dirndx]++; + r->bytes[dirndx] += pd.tot_len; if (a != NULL) { - a->packets++; - a->bytes += pd.tot_len; + a->packets[dirndx]++; + a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { - dirndx = (dir == s->direction) ? 0 : 1; - s->packets[dirndx]++; - s->bytes[dirndx] += pd.tot_len; if (s->nat_rule.ptr != NULL) { - s->nat_rule.ptr->packets++; - s->nat_rule.ptr->bytes += pd.tot_len; + s->nat_rule.ptr->packets[dirndx]++; + s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; } if (s->src_node != NULL) { - s->src_node->packets++; - s->src_node->bytes += pd.tot_len; + s->src_node->packets[dirndx]++; + s->src_node->bytes[dirndx] += pd.tot_len; } if (s->nat_src_node != NULL) { - s->nat_src_node->packets++; - s->nat_src_node->bytes += pd.tot_len; + s->nat_src_node->packets[dirndx]++; + s->nat_src_node->bytes[dirndx] += pd.tot_len; } + dirndx = (dir == s->direction) ? 0 : 1; + s->packets[dirndx]++; + s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; @@ -6367,7 +6649,7 @@ done: action = PF_PASS; } else if (r->rt) /* pf_route6 can free the mbuf causing *m0 to become NULL */ - pf_route6(m0, r, dir, ifp, s); + pf_route6(m0, r, dir, ifp, s, &pd); return (action); } diff --git a/sys/contrib/pf/net/pf_if.c b/sys/contrib/pf/net/pf_if.c index 26ce0ee..6a15a89 100644 --- a/sys/contrib/pf/net/pf_if.c +++ b/sys/contrib/pf/net/pf_if.c @@ -1,6 +1,8 @@ -/* $OpenBSD: pf_if.c,v 1.23 2004/12/22 17:17:55 dhartmei Exp $ */ +/* $OpenBSD: pf_if.c,v 1.46 2006/12/13 09:01:59 itojun Exp $ */ /* + * Copyright 2005 Henning Brauer + * Copyright 2005 Ryan McBride * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2003 Cedric Berger * All rights reserved. @@ -55,40 +57,25 @@ #include #endif /* INET6 */ -#define ACCEPT_FLAGS(oklist) \ - do { \ - if ((flags & ~(oklist)) & \ - PFI_FLAG_ALLMASK) \ - return (EINVAL); \ - } while (0) - -#define senderr(e) do { rv = (e); goto _bad; } while (0) - -struct pfi_kif **pfi_index2kif; -struct pfi_kif *pfi_self; -int pfi_indexlim; -struct pfi_ifhead pfi_ifs; +struct pfi_kif *pfi_all = NULL; struct pfi_statehead pfi_statehead; -int pfi_ifcnt; struct pool pfi_addr_pl; +struct pfi_ifhead pfi_ifs; long pfi_update = 1; struct pfr_addr *pfi_buffer; int pfi_buffer_cnt; int pfi_buffer_max; -void pfi_dynaddr_update(void *); -void pfi_kifaddr_update(void *); +void pfi_kif_update(struct pfi_kif *); +void pfi_dynaddr_update(struct pfi_dynaddr *dyn); void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int, int); +void pfi_kifaddr_update(void *); void pfi_instance_add(struct ifnet *, int, int); void pfi_address_add(struct sockaddr *, int, int); int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); -struct pfi_kif *pfi_if_create(const char *, struct pfi_kif *, int); -void pfi_copy_group(char *, const char *, int); -void pfi_newgroup(const char *, int); -int pfi_skip_if(const char *, struct pfi_kif *, int); +int pfi_skip_if(const char *, struct pfi_kif *); int pfi_unmask(void *); -void pfi_dohooks(struct pfi_kif *); RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); @@ -99,7 +86,7 @@ RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); void pfi_initialize(void) { - if (pfi_self != NULL) /* already initialized */ + if (pfi_all != NULL) /* already initialized */ return; TAILQ_INIT(&pfi_statehead); @@ -108,173 +95,235 @@ pfi_initialize(void) pfi_buffer_max = 64; pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer), PFI_MTYPE, M_WAITOK); - pfi_self = pfi_if_create("self", NULL, PFI_IFLAG_GROUP); + + if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL) + panic("pfi_kif_get for pfi_all failed"); +} + +struct pfi_kif * +pfi_kif_get(const char *kif_name) +{ + struct pfi_kif *kif; + struct pfi_kif_cmp s; + + bzero(&s, sizeof(s)); + strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name)); + if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL) + return (kif); + + /* create new one */ + if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT)) == NULL) + return (NULL); + + bzero(kif, sizeof(*kif)); + strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name)); + kif->pfik_tzero = time_second; + TAILQ_INIT(&kif->pfik_dynaddrs); + + RB_INSERT(pfi_ifhead, &pfi_ifs, kif); + return (kif); } void -pfi_attach_clone(struct if_clone *ifc) +pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what) { - pfi_initialize(); - pfi_newgroup(ifc->ifc_name, PFI_IFLAG_CLONABLE); + switch (what) { + case PFI_KIF_REF_RULE: + kif->pfik_rules++; + break; + case PFI_KIF_REF_STATE: + if (!kif->pfik_states++) + TAILQ_INSERT_TAIL(&pfi_statehead, kif, pfik_w_states); + break; + default: + panic("pfi_kif_ref with unknown type"); + } +} + +void +pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what) +{ + if (kif == NULL) + return; + + switch (what) { + case PFI_KIF_REF_NONE: + break; + case PFI_KIF_REF_RULE: + if (kif->pfik_rules <= 0) { + printf("pfi_kif_unref: rules refcount <= 0\n"); + return; + } + kif->pfik_rules--; + break; + case PFI_KIF_REF_STATE: + if (kif->pfik_states <= 0) { + printf("pfi_kif_unref: state refcount <= 0\n"); + return; + } + if (!--kif->pfik_states) + TAILQ_REMOVE(&pfi_statehead, kif, pfik_w_states); + break; + default: + panic("pfi_kif_unref with unknown type"); + } + + if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all) + return; + + if (kif->pfik_rules || kif->pfik_states) + return; + + RB_REMOVE(pfi_ifhead, &pfi_ifs, kif); + free(kif, PFI_MTYPE); +} + +int +pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) +{ + struct ifg_list *p; + + if (rule_kif == NULL || rule_kif == packet_kif) + return (1); + + if (rule_kif->pfik_group != NULL) + TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next) + if (p->ifgl_group == rule_kif->pfik_group) + return (1); + + return (0); } void pfi_attach_ifnet(struct ifnet *ifp) { - struct pfi_kif *p, *q, key; - int s; + struct pfi_kif *kif; + int s; pfi_initialize(); s = splsoftnet(); pfi_update++; - if (ifp->if_index >= pfi_indexlim) { - /* - * grow pfi_index2kif, similar to ifindex2ifnet code in if.c - */ - size_t m, n, oldlim; - struct pfi_kif **mp, **np; - - oldlim = pfi_indexlim; - if (pfi_indexlim == 0) - pfi_indexlim = 64; - while (ifp->if_index >= pfi_indexlim) - pfi_indexlim <<= 1; - - m = oldlim * sizeof(struct pfi_kif *); - mp = pfi_index2kif; - n = pfi_indexlim * sizeof(struct pfi_kif *); - np = malloc(n, PFI_MTYPE, M_DONTWAIT); - if (np == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate translation table"); - bzero(np, n); - if (mp != NULL) - bcopy(mp, np, m); - pfi_index2kif = np; - if (mp != NULL) - free(mp, PFI_MTYPE); - } + if ((kif = pfi_kif_get(ifp->if_xname)) == NULL) + panic("pfi_kif_get failed"); + + kif->pfik_ifp = ifp; + ifp->if_pf_kif = (caddr_t)kif; + + if ((kif->pfik_ah_cookie = hook_establish(ifp->if_addrhooks, 1, + pfi_kifaddr_update, kif)) == NULL) + panic("pfi_attach_ifnet: cannot allocate '%s' address hook", + ifp->if_xname); + + pfi_kif_update(kif); - strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (p == NULL) { - /* add group */ - pfi_copy_group(key.pfik_name, ifp->if_xname, - sizeof(key.pfik_name)); - q = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (q == NULL) - q = pfi_if_create(key.pfik_name, pfi_self, PFI_IFLAG_GROUP); - if (q == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate '%s' group", key.pfik_name); - - /* add interface */ - p = pfi_if_create(ifp->if_xname, q, PFI_IFLAG_INSTANCE); - if (p == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate '%s' interface", ifp->if_xname); - } else - q = p->pfik_parent; - p->pfik_ifp = ifp; - p->pfik_flags |= PFI_IFLAG_ATTACHED; - p->pfik_ah_cookie = - hook_establish(ifp->if_addrhooks, 1, pfi_kifaddr_update, p); - pfi_index2kif[ifp->if_index] = p; - pfi_dohooks(p); splx(s); } void pfi_detach_ifnet(struct ifnet *ifp) { - struct pfi_kif *p, *q, key; - int s; + int s; + struct pfi_kif *kif; - strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); + if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL) + return; s = splsoftnet(); pfi_update++; - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (p == NULL) { - printf("pfi_detach_ifnet: cannot find %s", ifp->if_xname); - splx(s); - return; - } - hook_disestablish(p->pfik_ifp->if_addrhooks, p->pfik_ah_cookie); - q = p->pfik_parent; - p->pfik_ifp = NULL; - p->pfik_flags &= ~PFI_IFLAG_ATTACHED; - pfi_index2kif[ifp->if_index] = NULL; - pfi_dohooks(p); - pfi_maybe_destroy(p); + hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie); + pfi_kif_update(kif); + + kif->pfik_ifp = NULL; + ifp->if_pf_kif = NULL; + pfi_kif_unref(kif, PFI_KIF_REF_NONE); splx(s); } -struct pfi_kif * -pfi_lookup_create(const char *name) +void +pfi_attach_ifgroup(struct ifg_group *ifg) { - struct pfi_kif *p, *q, key; + struct pfi_kif *kif; int s; + pfi_initialize(); s = splsoftnet(); - p = pfi_lookup_if(name); - if (p == NULL) { - pfi_copy_group(key.pfik_name, name, sizeof(key.pfik_name)); - q = pfi_lookup_if(key.pfik_name); - if (q == NULL) { - pfi_newgroup(key.pfik_name, PFI_IFLAG_DYNAMIC); - q = pfi_lookup_if(key.pfik_name); - } - p = pfi_lookup_if(name); - if (p == NULL && q != NULL) - p = pfi_if_create(name, q, PFI_IFLAG_INSTANCE); - } - splx(s); - return (p); -} + pfi_update++; + if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL) + panic("pfi_kif_get failed"); -struct pfi_kif * -pfi_attach_rule(const char *name) -{ - struct pfi_kif *p; + kif->pfik_group = ifg; + ifg->ifg_pf_kif = (caddr_t)kif; - p = pfi_lookup_create(name); - if (p != NULL) - p->pfik_rules++; - return (p); + splx(s); } void -pfi_detach_rule(struct pfi_kif *p) +pfi_detach_ifgroup(struct ifg_group *ifg) { - if (p == NULL) + int s; + struct pfi_kif *kif; + + if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL) return; - if (p->pfik_rules > 0) - p->pfik_rules--; - else - printf("pfi_detach_rule: reference count at 0\n"); - pfi_maybe_destroy(p); + + s = splsoftnet(); + pfi_update++; + + kif->pfik_group = NULL; + ifg->ifg_pf_kif = NULL; + pfi_kif_unref(kif, PFI_KIF_REF_NONE); + splx(s); } void -pfi_attach_state(struct pfi_kif *p) +pfi_group_change(const char *group) { - if (!p->pfik_states++) - TAILQ_INSERT_TAIL(&pfi_statehead, p, pfik_w_states); + struct pfi_kif *kif; + int s; + + s = splsoftnet(); + pfi_update++; + if ((kif = pfi_kif_get(group)) == NULL) + panic("pfi_kif_get failed"); + + pfi_kif_update(kif); + + splx(s); } -void -pfi_detach_state(struct pfi_kif *p) +int +pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) { - if (p == NULL) - return; - if (p->pfik_states <= 0) { - printf("pfi_detach_state: reference count <= 0\n"); - return; + switch (af) { +#ifdef INET + case AF_INET: + switch (dyn->pfid_acnt4) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr4, + &dyn->pfid_mask4, a, AF_INET)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); + } + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + switch (dyn->pfid_acnt6) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr6, + &dyn->pfid_mask6, a, AF_INET6)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); + } + break; +#endif /* INET6 */ + default: + return (0); } - if (!--p->pfik_states) - TAILQ_REMOVE(&pfi_statehead, p, pfik_w_states); - pfi_maybe_destroy(p); } int @@ -287,15 +336,20 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) if (aw->type != PF_ADDR_DYNIFTL) return (0); - dyn = pool_get(&pfi_addr_pl, PR_NOWAIT); - if (dyn == NULL) + if ((dyn = pool_get(&pfi_addr_pl, PR_NOWAIT)) == NULL) return (1); bzero(dyn, sizeof(*dyn)); s = splsoftnet(); - dyn->pfid_kif = pfi_attach_rule(aw->v.ifname); - if (dyn->pfid_kif == NULL) - senderr(1); + if (!strcmp(aw->v.ifname, "self")) + dyn->pfid_kif = pfi_kif_get(IFG_ALL); + else + dyn->pfid_kif = pfi_kif_get(aw->v.ifname); + if (dyn->pfid_kif == NULL) { + rv = 1; + goto _bad; + } + pfi_kif_ref(dyn->pfid_kif, PFI_KIF_REF_RULE); dyn->pfid_net = pfi_unmask(&aw->v.a.mask); if (af == AF_INET && dyn->pfid_net == 32) @@ -312,24 +366,23 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) if (dyn->pfid_net != 128) snprintf(tblname + strlen(tblname), sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net); - ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR); - if (ruleset == NULL) - senderr(1); + if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) { + rv = 1; + goto _bad; + } - dyn->pfid_kt = pfr_attach_table(ruleset, tblname); - if (dyn->pfid_kt == NULL) - senderr(1); + if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) { + rv = 1; + goto _bad; + } dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE; dyn->pfid_iflags = aw->iflags; dyn->pfid_af = af; - dyn->pfid_hook_cookie = hook_establish(dyn->pfid_kif->pfik_ah_head, 1, - pfi_dynaddr_update, dyn); - if (dyn->pfid_hook_cookie == NULL) - senderr(1); + TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); aw->p.dyn = dyn; - pfi_dynaddr_update(aw->p.dyn); + pfi_kif_update(dyn->pfid_kif); splx(s); return (0); @@ -339,16 +392,32 @@ _bad: if (ruleset != NULL) pf_remove_if_empty_ruleset(ruleset); if (dyn->pfid_kif != NULL) - pfi_detach_rule(dyn->pfid_kif); + pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); pool_put(&pfi_addr_pl, dyn); splx(s); return (rv); } void -pfi_dynaddr_update(void *p) +pfi_kif_update(struct pfi_kif *kif) +{ + struct ifg_list *ifgl; + struct pfi_dynaddr *p; + + /* update all dynaddr */ + TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry) + pfi_dynaddr_update(p); + + /* again for all groups kif is member of */ + if (kif->pfik_ifp != NULL) + TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) + pfi_kif_update((struct pfi_kif *) + ifgl->ifgl_group->ifg_pf_kif); +} + +void +pfi_dynaddr_update(struct pfi_dynaddr *dyn) { - struct pfi_dynaddr *dyn = (struct pfi_dynaddr *)p; struct pfi_kif *kif; struct pfr_ktable *kt; @@ -357,6 +426,7 @@ pfi_dynaddr_update(void *p) kif = dyn->pfid_kif; kt = dyn->pfid_kt; + if (kt->pfrkt_larg != pfi_update) { /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); @@ -369,28 +439,18 @@ void pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) { int e, size2 = 0; - struct pfi_kif *p; - struct pfr_table t; + struct ifg_member *ifgm; - if ((kif->pfik_flags & PFI_IFLAG_INSTANCE) && kif->pfik_ifp == NULL) { - pfr_clr_addrs(&kt->pfrkt_t, NULL, 0); - return; - } pfi_buffer_cnt = 0; - if ((kif->pfik_flags & PFI_IFLAG_INSTANCE)) + + if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); - else if (strcmp(kif->pfik_name, "self")) { - TAILQ_FOREACH(p, &kif->pfik_grouphead, pfik_instances) - pfi_instance_add(p->pfik_ifp, net, flags); - } else { - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) - if (p->pfik_flags & PFI_IFLAG_INSTANCE) - pfi_instance_add(p->pfik_ifp, net, flags); - } - t = kt->pfrkt_t; - t.pfrt_flags = 0; - if ((e = pfr_set_addrs(&t, pfi_buffer, pfi_buffer_cnt, &size2, - NULL, NULL, NULL, 0))) + else if (kif->pfik_group != NULL) + TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) + pfi_instance_add(ifgm->ifgm_ifp, net, flags); + + if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2, + NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) printf("pfi_table_update: cannot set %d new addresses " "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e); } @@ -434,13 +494,12 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) got6 = 1; net2 = net; if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) { - if (af == AF_INET) { + if (af == AF_INET) net2 = pfi_unmask(&((struct sockaddr_in *) ia->ifa_netmask)->sin_addr); - } else if (af == AF_INET6) { + else if (af == AF_INET6) net2 = pfi_unmask(&((struct sockaddr_in6 *) ia->ifa_netmask)->sin6_addr); - } } if (af == AF_INET && net2 > 32) net2 = 32; @@ -488,9 +547,9 @@ pfi_address_add(struct sockaddr *sa, int af, int net) p->pfra_net = net; if (af == AF_INET) p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr; - if (af == AF_INET6) { + else if (af == AF_INET6) { p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr; - if (IN6_IS_ADDR_LINKLOCAL(&p->pfra_ip6addr)) + if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr)) p->pfra_ip6addr.s6_addr16[1] = 0; } /* mask network address bits */ @@ -510,9 +569,8 @@ pfi_dynaddr_remove(struct pf_addr_wrap *aw) return; s = splsoftnet(); - hook_disestablish(aw->p.dyn->pfid_kif->pfik_ah_head, - aw->p.dyn->pfid_hook_cookie); - pfi_detach_rule(aw->p.dyn->pfid_kif); + TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry); + pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE); aw->p.dyn->pfid_kif = NULL; pfr_detach_table(aw->p.dyn->pfid_kt); aw->p.dyn->pfid_kt = NULL; @@ -533,11 +591,12 @@ pfi_dynaddr_copyout(struct pf_addr_wrap *aw) void pfi_kifaddr_update(void *v) { - int s; + int s; + struct pfi_kif *kif = (struct pfi_kif *)v; s = splsoftnet(); pfi_update++; - pfi_dohooks(v); + pfi_kif_update(kif); splx(s); } @@ -547,107 +606,16 @@ pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ)); } -struct pfi_kif * -pfi_if_create(const char *name, struct pfi_kif *q, int flags) -{ - struct pfi_kif *p; - - p = malloc(sizeof(*p), PFI_MTYPE, M_DONTWAIT); - if (p == NULL) - return (NULL); - bzero(p, sizeof(*p)); - p->pfik_ah_head = malloc(sizeof(*p->pfik_ah_head), PFI_MTYPE, - M_DONTWAIT); - if (p->pfik_ah_head == NULL) { - free(p, PFI_MTYPE); - return (NULL); - } - bzero(p->pfik_ah_head, sizeof(*p->pfik_ah_head)); - TAILQ_INIT(p->pfik_ah_head); - TAILQ_INIT(&p->pfik_grouphead); - strlcpy(p->pfik_name, name, sizeof(p->pfik_name)); - RB_INIT(&p->pfik_lan_ext); - RB_INIT(&p->pfik_ext_gwy); - p->pfik_flags = flags; - p->pfik_parent = q; - p->pfik_tzero = time_second; - - RB_INSERT(pfi_ifhead, &pfi_ifs, p); - if (q != NULL) { - q->pfik_addcnt++; - TAILQ_INSERT_TAIL(&q->pfik_grouphead, p, pfik_instances); - } - pfi_ifcnt++; - return (p); -} - -int -pfi_maybe_destroy(struct pfi_kif *p) -{ - int i, j, k, s; - struct pfi_kif *q = p->pfik_parent; - - if ((p->pfik_flags & (PFI_IFLAG_ATTACHED | PFI_IFLAG_GROUP)) || - p->pfik_rules > 0 || p->pfik_states > 0) - return (0); - - s = splsoftnet(); - if (q != NULL) { - for (i = 0; i < 2; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) { - q->pfik_bytes[i][j][k] += - p->pfik_bytes[i][j][k]; - q->pfik_packets[i][j][k] += - p->pfik_packets[i][j][k]; - } - q->pfik_delcnt++; - TAILQ_REMOVE(&q->pfik_grouphead, p, pfik_instances); - } - pfi_ifcnt--; - RB_REMOVE(pfi_ifhead, &pfi_ifs, p); - splx(s); - - free(p->pfik_ah_head, PFI_MTYPE); - free(p, PFI_MTYPE); - return (1); -} - -void -pfi_copy_group(char *p, const char *q, int m) -{ - while (m > 1 && *q && !(*q >= '0' && *q <= '9')) { - *p++ = *q++; - m--; - } - if (m > 0) - *p++ = '\0'; -} - -void -pfi_newgroup(const char *name, int flags) -{ - struct pfi_kif *p; - - p = pfi_lookup_if(name); - if (p == NULL) - p = pfi_if_create(name, pfi_self, PFI_IFLAG_GROUP); - if (p == NULL) { - printf("pfi_newgroup: cannot allocate '%s' group", name); - return; - } - p->pfik_flags |= flags; -} - void pfi_fill_oldstatus(struct pf_status *pfs) { - struct pfi_kif *p, key; - int i, j, k, s; + struct pfi_kif *p; + struct pfi_kif_cmp key; + int i, j, k, s; strlcpy(key.pfik_name, pfs->ifname, sizeof(key.pfik_name)); s = splsoftnet(); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); + p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); if (p == NULL) { splx(s); return; @@ -666,84 +634,46 @@ pfi_fill_oldstatus(struct pf_status *pfs) } int -pfi_clr_istats(const char *name, int *nzero, int flags) +pfi_clr_istats(const char *name) { struct pfi_kif *p; - int n = 0, s; - long tzero = time_second; + int s; - ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); s = splsoftnet(); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, flags)) + if (pfi_skip_if(name, p)) continue; bzero(p->pfik_packets, sizeof(p->pfik_packets)); bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); - p->pfik_tzero = tzero; - n++; - } - splx(s); - if (nzero != NULL) - *nzero = n; - return (0); -} - -int -pfi_set_flags(const char *name, int flags) -{ - struct pfi_kif *p; - int s; - - if (flags & ~PFI_IFLAG_SETABLE_MASK) - return (EINVAL); - - s = splsoftnet(); - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, PFI_FLAG_GROUP|PFI_FLAG_INSTANCE)) - continue; - p->pfik_flags |= flags; + p->pfik_tzero = time_second; } splx(s); - return (0); -} -int -pfi_clear_flags(const char *name, int flags) -{ - struct pfi_kif *p; - int s; - - if (flags & ~PFI_IFLAG_SETABLE_MASK) - return (EINVAL); - - s = splsoftnet(); - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, PFI_FLAG_GROUP|PFI_FLAG_INSTANCE)) - continue; - p->pfik_flags &= ~flags; - } - splx(s); return (0); } int -pfi_get_ifaces(const char *name, struct pfi_if *buf, int *size, int flags) +pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) { - struct pfi_kif *p; + struct pfi_kif *p, *nextp; int s, n = 0; - ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); s = splsoftnet(); - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, flags)) + for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) { + nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); + if (pfi_skip_if(name, p)) continue; if (*size > n++) { if (!p->pfik_tzero) p->pfik_tzero = time_second; + pfi_kif_ref(p, PFI_KIF_REF_RULE); if (copyout(p, buf++, sizeof(*buf))) { + pfi_kif_unref(p, PFI_KIF_REF_RULE); splx(s); return (EFAULT); } + nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); + pfi_kif_unref(p, PFI_KIF_REF_RULE); } } splx(s); @@ -751,25 +681,11 @@ pfi_get_ifaces(const char *name, struct pfi_if *buf, int *size, int flags) return (0); } -struct pfi_kif * -pfi_lookup_if(const char *name) -{ - struct pfi_kif *p, key; - - strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - return (p); -} - int -pfi_skip_if(const char *filter, struct pfi_kif *p, int f) +pfi_skip_if(const char *filter, struct pfi_kif *p) { int n; - if ((p->pfik_flags & PFI_IFLAG_GROUP) && !(f & PFI_FLAG_GROUP)) - return (1); - if ((p->pfik_flags & PFI_IFLAG_INSTANCE) && !(f & PFI_FLAG_INSTANCE)) - return (1); if (filter == NULL || !*filter) return (0); if (!strcmp(p->pfik_name, filter)) @@ -784,6 +700,38 @@ pfi_skip_if(const char *filter, struct pfi_kif *p, int f) return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9'); } +int +pfi_set_flags(const char *name, int flags) +{ + struct pfi_kif *p; + int s; + + s = splsoftnet(); + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p)) + continue; + p->pfik_flags |= flags; + } + splx(s); + return (0); +} + +int +pfi_clear_flags(const char *name, int flags) +{ + struct pfi_kif *p; + int s; + + s = splsoftnet(); + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p)) + continue; + p->pfik_flags &= ~flags; + } + splx(s); + return (0); +} + /* from pf_print_state.c */ int pfi_unmask(void *addr) @@ -804,44 +752,3 @@ pfi_unmask(void *addr) return (b); } -void -pfi_dohooks(struct pfi_kif *p) -{ - for (; p != NULL; p = p->pfik_parent) - dohooks(p->pfik_ah_head, 0); -} - -int -pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) -{ - switch (af) { -#ifdef INET - case AF_INET: - switch (dyn->pfid_acnt4) { - case 0: - return (0); - case 1: - return (PF_MATCHA(0, &dyn->pfid_addr4, - &dyn->pfid_mask4, a, AF_INET)); - default: - return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); - } - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - switch (dyn->pfid_acnt6) { - case 0: - return (0); - case 1: - return (PF_MATCHA(0, &dyn->pfid_addr6, - &dyn->pfid_mask6, a, AF_INET6)); - default: - return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); - } - break; -#endif /* INET6 */ - default: - return (0); - } -} diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index d4cb3c7..5694717 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.139 2005/03/03 07:13:39 dhartmei Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.175 2007/02/26 22:47:43 deraadt Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -48,7 +48,11 @@ #include #include #include +#include #include +#include +#include +#include #include #include @@ -62,12 +66,17 @@ #include #include +#include #include #if NPFSYNC > 0 #include #endif /* NPFSYNC > 0 */ +#if NPFLOG > 0 +#include +#endif /* NPFLOG > 0 */ + #ifdef INET6 #include #include @@ -78,17 +87,11 @@ #endif void pfattach(int); +void pf_thread_create(void *); int pfopen(dev_t, int, int, struct proc *); int pfclose(dev_t, int, int, struct proc *); struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); -int pf_get_ruleset_number(u_int8_t); -void pf_init_ruleset(struct pf_ruleset *); -int pf_anchor_setup(struct pf_rule *, - const struct pf_ruleset *, const char *); -int pf_anchor_copyout(const struct pf_ruleset *, - const struct pf_rule *, struct pfioc_rule *); -void pf_anchor_remove(struct pf_rule *); void pf_mv_pool(struct pf_palist *, struct pf_palist *); void pf_empty_pool(struct pf_palist *); @@ -102,11 +105,13 @@ int pf_disable_altq(struct pf_altq *); #endif /* ALTQ */ int pf_begin_rules(u_int32_t *, int, const char *); int pf_rollback_rules(u_int32_t, int, char *); +int pf_setup_pfsync_matching(struct pf_ruleset *); +void pf_hash_rule(MD5_CTX *, struct pf_rule *); +void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); int pf_commit_rules(u_int32_t, int, char *); -extern struct timeout pf_expire_to; - struct pf_rule pf_default_rule; +struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER; #ifdef ALTQ static int pf_altq_running; #endif @@ -118,9 +123,9 @@ TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif -static u_int16_t tagname2tag(struct pf_tags *, char *); -static void tag2tagname(struct pf_tags *, u_int16_t, char *); -static void tag_unref(struct pf_tags *, u_int16_t); +u_int16_t tagname2tag(struct pf_tags *, char *); +void tag2tagname(struct pf_tags *, u_int16_t, char *); +void tag_unref(struct pf_tags *, u_int16_t); int pf_rtlabel_add(struct pf_addr_wrap *); void pf_rtlabel_remove(struct pf_addr_wrap *); void pf_rtlabel_copyout(struct pf_addr_wrap *); @@ -149,6 +154,10 @@ pfattach(int num) pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp, pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0); + if (ctob(physmem) <= 100*1024*1024) + pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = + PFR_KENTRY_HIWAT_SMALL; + RB_INIT(&tree_src_tracking); RB_INIT(&pf_anchors); pf_init_ruleset(&pf_main_ruleset); @@ -157,12 +166,13 @@ pfattach(int num) TAILQ_INIT(&pf_pabuf); pf_altqs_active = &pf_altqs[0]; pf_altqs_inactive = &pf_altqs[1]; - TAILQ_INIT(&state_updates); + TAILQ_INIT(&state_list); /* default rule should never be garbage collected */ pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; pf_default_rule.action = PF_PASS; pf_default_rule.nr = -1; + pf_default_rule.rtableid = -1; /* initialize default timeouts */ timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; @@ -183,9 +193,8 @@ pfattach(int num) timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; - - timeout_set(&pf_expire_to, pf_purge_timeout, &pf_expire_to); - timeout_add(&pf_expire_to, timeout[PFTM_INTERVAL] * hz); + timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; + timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; pf_normalize_init(); bzero(&pf_status, sizeof(pf_status)); @@ -193,6 +202,16 @@ pfattach(int num) /* XXX do our best to avoid a conflict */ pf_status.hostid = arc4random(); + + /* require process context to purge states, so perform in a thread */ + kthread_create_deferred(pf_thread_create, NULL); +} + +void +pf_thread_create(void *v) +{ + if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge")) + panic("pfpurge thread"); } int @@ -255,292 +274,6 @@ pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, return (&rule->rpool); } -int -pf_get_ruleset_number(u_int8_t action) -{ - switch (action) { - case PF_SCRUB: - case PF_NOSCRUB: - return (PF_RULESET_SCRUB); - break; - case PF_PASS: - case PF_DROP: - return (PF_RULESET_FILTER); - break; - case PF_NAT: - case PF_NONAT: - return (PF_RULESET_NAT); - break; - case PF_BINAT: - case PF_NOBINAT: - return (PF_RULESET_BINAT); - break; - case PF_RDR: - case PF_NORDR: - return (PF_RULESET_RDR); - break; - default: - return (PF_RULESET_MAX); - break; - } -} - -void -pf_init_ruleset(struct pf_ruleset *ruleset) -{ - int i; - - memset(ruleset, 0, sizeof(struct pf_ruleset)); - for (i = 0; i < PF_RULESET_MAX; i++) { - TAILQ_INIT(&ruleset->rules[i].queues[0]); - TAILQ_INIT(&ruleset->rules[i].queues[1]); - ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; - ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; - } -} - -struct pf_anchor * -pf_find_anchor(const char *path) -{ - static struct pf_anchor key; - - memset(&key, 0, sizeof(key)); - strlcpy(key.path, path, sizeof(key.path)); - return (RB_FIND(pf_anchor_global, &pf_anchors, &key)); -} - -struct pf_ruleset * -pf_find_ruleset(const char *path) -{ - struct pf_anchor *anchor; - - while (*path == '/') - path++; - if (!*path) - return (&pf_main_ruleset); - anchor = pf_find_anchor(path); - if (anchor == NULL) - return (NULL); - else - return (&anchor->ruleset); -} - -struct pf_ruleset * -pf_find_or_create_ruleset(const char *path) -{ - static char p[MAXPATHLEN]; - char *q, *r; - struct pf_ruleset *ruleset; - struct pf_anchor *anchor, *dup, *parent = NULL; - - while (*path == '/') - path++; - ruleset = pf_find_ruleset(path); - if (ruleset != NULL) - return (ruleset); - strlcpy(p, path, sizeof(p)); - while (parent == NULL && (q = strrchr(p, '/')) != NULL) { - *q = 0; - if ((ruleset = pf_find_ruleset(p)) != NULL) { - parent = ruleset->anchor; - break; - } - } - if (q == NULL) - q = p; - else - q++; - strlcpy(p, path, sizeof(p)); - if (!*q) - return (NULL); - while ((r = strchr(q, '/')) != NULL || *q) { - if (r != NULL) - *r = 0; - if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE || - (parent != NULL && strlen(parent->path) >= - MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) - return (NULL); - anchor = (struct pf_anchor *)malloc(sizeof(*anchor), M_TEMP, - M_NOWAIT); - if (anchor == NULL) - return (NULL); - memset(anchor, 0, sizeof(*anchor)); - RB_INIT(&anchor->children); - strlcpy(anchor->name, q, sizeof(anchor->name)); - if (parent != NULL) { - strlcpy(anchor->path, parent->path, - sizeof(anchor->path)); - strlcat(anchor->path, "/", sizeof(anchor->path)); - } - strlcat(anchor->path, anchor->name, sizeof(anchor->path)); - if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != - NULL) { - printf("pf_find_or_create_ruleset: RB_INSERT1 " - "'%s' '%s' collides with '%s' '%s'\n", - anchor->path, anchor->name, dup->path, dup->name); - free(anchor, M_TEMP); - return (NULL); - } - if (parent != NULL) { - anchor->parent = parent; - if ((dup = RB_INSERT(pf_anchor_node, &parent->children, - anchor)) != NULL) { - printf("pf_find_or_create_ruleset: " - "RB_INSERT2 '%s' '%s' collides with " - "'%s' '%s'\n", anchor->path, anchor->name, - dup->path, dup->name); - RB_REMOVE(pf_anchor_global, &pf_anchors, - anchor); - free(anchor, M_TEMP); - return (NULL); - } - } - pf_init_ruleset(&anchor->ruleset); - anchor->ruleset.anchor = anchor; - parent = anchor; - if (r != NULL) - q = r + 1; - else - *q = 0; - } - return (&anchor->ruleset); -} - -void -pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) -{ - struct pf_anchor *parent; - int i; - - while (ruleset != NULL) { - if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || - !RB_EMPTY(&ruleset->anchor->children) || - ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || - ruleset->topen) - return; - for (i = 0; i < PF_RULESET_MAX; ++i) - if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || - !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || - ruleset->rules[i].inactive.open) - return; - RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); - if ((parent = ruleset->anchor->parent) != NULL) - RB_REMOVE(pf_anchor_node, &parent->children, - ruleset->anchor); - free(ruleset->anchor, M_TEMP); - if (parent == NULL) - return; - ruleset = &parent->ruleset; - } -} - -int -pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, - const char *name) -{ - static char *p, path[MAXPATHLEN]; - struct pf_ruleset *ruleset; - - r->anchor = NULL; - r->anchor_relative = 0; - r->anchor_wildcard = 0; - if (!name[0]) - return (0); - if (name[0] == '/') - strlcpy(path, name + 1, sizeof(path)); - else { - /* relative path */ - r->anchor_relative = 1; - if (s->anchor == NULL || !s->anchor->path[0]) - path[0] = 0; - else - strlcpy(path, s->anchor->path, sizeof(path)); - while (name[0] == '.' && name[1] == '.' && name[2] == '/') { - if (!path[0]) { - printf("pf_anchor_setup: .. beyond root\n"); - return (1); - } - if ((p = strrchr(path, '/')) != NULL) - *p = 0; - else - path[0] = 0; - r->anchor_relative++; - name += 3; - } - if (path[0]) - strlcat(path, "/", sizeof(path)); - strlcat(path, name, sizeof(path)); - } - if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) { - r->anchor_wildcard = 1; - *p = 0; - } - ruleset = pf_find_or_create_ruleset(path); - if (ruleset == NULL || ruleset->anchor == NULL) { - printf("pf_anchor_setup: ruleset\n"); - return (1); - } - r->anchor = ruleset->anchor; - r->anchor->refcnt++; - return (0); -} - -int -pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, - struct pfioc_rule *pr) -{ - pr->anchor_call[0] = 0; - if (r->anchor == NULL) - return (0); - if (!r->anchor_relative) { - strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call)); - strlcat(pr->anchor_call, r->anchor->path, - sizeof(pr->anchor_call)); - } else { - char a[MAXPATHLEN], b[MAXPATHLEN], *p; - int i; - - if (rs->anchor == NULL) - a[0] = 0; - else - strlcpy(a, rs->anchor->path, sizeof(a)); - strlcpy(b, r->anchor->path, sizeof(b)); - for (i = 1; i < r->anchor_relative; ++i) { - if ((p = strrchr(a, '/')) == NULL) - p = a; - *p = 0; - strlcat(pr->anchor_call, "../", - sizeof(pr->anchor_call)); - } - if (strncmp(a, b, strlen(a))) { - printf("pf_anchor_copyout: '%s' '%s'\n", a, b); - return (1); - } - if (strlen(b) > strlen(a)) - strlcat(pr->anchor_call, b + (a[0] ? strlen(a) + 1 : 0), - sizeof(pr->anchor_call)); - } - if (r->anchor_wildcard) - strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*", - sizeof(pr->anchor_call)); - return (0); -} - -void -pf_anchor_remove(struct pf_rule *r) -{ - if (r->anchor == NULL) - return; - if (r->anchor->refcnt <= 0) { - printf("pf_anchor_remove: broken refcount"); - r->anchor = NULL; - return; - } - if (!--r->anchor->refcnt) - pf_remove_if_empty_ruleset(&r->anchor->ruleset); - r->anchor = NULL; -} - void pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) { @@ -560,7 +293,7 @@ pf_empty_pool(struct pf_palist *poola) while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) { pfi_dynaddr_remove(&empty_pool_pa->addr); pf_tbladdr_remove(&empty_pool_pa->addr); - pfi_detach_rule(empty_pool_pa->kif); + pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE); TAILQ_REMOVE(poola, empty_pool_pa, entries); pool_put(&pf_pooladdr_pl, empty_pool_pa); } @@ -606,13 +339,13 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) if (rule->overload_tbl) pfr_detach_table(rule->overload_tbl); } - pfi_detach_rule(rule->kif); + pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE); pf_anchor_remove(rule); pf_empty_pool(&rule->rpool.list); pool_put(&pf_rule_pl, rule); } -static u_int16_t +u_int16_t tagname2tag(struct pf_tags *head, char *tagname) { struct pf_tagname *tag, *p = NULL; @@ -657,7 +390,7 @@ tagname2tag(struct pf_tags *head, char *tagname) return (tag->tag); } -static void +void tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) { struct pf_tagname *tag; @@ -669,7 +402,7 @@ tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) } } -static void +void tag_unref(struct pf_tags *head, u_int16_t tag) { struct pf_tagname *p, *next; @@ -698,7 +431,7 @@ pf_tagname2tag(char *tagname) void pf_tag2tagname(u_int16_t tagid, char *p) { - return (tag2tagname(&pf_tags, tagid, p)); + tag2tagname(&pf_tags, tagid, p); } void @@ -716,7 +449,7 @@ pf_tag_ref(u_int16_t tag) void pf_tag_unref(u_int16_t tag) { - return (tag_unref(&pf_tags, tag)); + tag_unref(&pf_tags, tag); } int @@ -760,13 +493,13 @@ pf_qname2qid(char *qname) void pf_qid2qname(u_int32_t qid, char *p) { - return (tag2tagname(&pf_qids, (u_int16_t)qid, p)); + tag2tagname(&pf_qids, (u_int16_t)qid, p); } void pf_qid_unref(u_int32_t qid) { - return (tag_unref(&pf_qids, (u_int16_t)qid)); + tag_unref(&pf_qids, (u_int16_t)qid); } int @@ -885,7 +618,7 @@ pf_enable_altq(struct pf_altq *altq) if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { tb.rate = altq->ifbandwidth; tb.depth = altq->tbrsize; - s = splimp(); + s = splnet(); error = tbr_set(&ifp->if_snd, &tb); splx(s); } @@ -915,7 +648,7 @@ pf_disable_altq(struct pf_altq *altq) if (error == 0) { /* clear tokenbucket regulator */ tb.rate = 0; - s = splimp(); + s = splnet(); error = tbr_set(&ifp->if_snd, &tb); splx(s); } @@ -935,8 +668,10 @@ pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) rs = pf_find_or_create_ruleset(anchor); if (rs == NULL) return (EINVAL); - while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.rcount--; + } *ticket = ++rs->rules[rs_num].inactive.ticket; rs->rules[rs_num].inactive.open = 1; return (0); @@ -954,19 +689,105 @@ pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) if (rs == NULL || !rs->rules[rs_num].inactive.open || rs->rules[rs_num].inactive.ticket != ticket) return (0); - while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.rcount--; + } rs->rules[rs_num].inactive.open = 0; return (0); } +#define PF_MD5_UPD(st, elm) \ + MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm)) + +#define PF_MD5_UPD_STR(st, elm) \ + MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm)) + +#define PF_MD5_UPD_HTONL(st, elm, stor) do { \ + (stor) = htonl((st)->elm); \ + MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\ +} while (0) + +#define PF_MD5_UPD_HTONS(st, elm, stor) do { \ + (stor) = htons((st)->elm); \ + MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\ +} while (0) + +void +pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) +{ + PF_MD5_UPD(pfr, addr.type); + switch (pfr->addr.type) { + case PF_ADDR_DYNIFTL: + PF_MD5_UPD(pfr, addr.v.ifname); + PF_MD5_UPD(pfr, addr.iflags); + break; + case PF_ADDR_TABLE: + PF_MD5_UPD(pfr, addr.v.tblname); + break; + case PF_ADDR_ADDRMASK: + /* XXX ignore af? */ + PF_MD5_UPD(pfr, addr.v.a.addr.addr32); + PF_MD5_UPD(pfr, addr.v.a.mask.addr32); + break; + case PF_ADDR_RTLABEL: + PF_MD5_UPD(pfr, addr.v.rtlabelname); + break; + } + + PF_MD5_UPD(pfr, port[0]); + PF_MD5_UPD(pfr, port[1]); + PF_MD5_UPD(pfr, neg); + PF_MD5_UPD(pfr, port_op); +} + +void +pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) +{ + u_int16_t x; + u_int32_t y; + + pf_hash_rule_addr(ctx, &rule->src); + pf_hash_rule_addr(ctx, &rule->dst); + PF_MD5_UPD_STR(rule, label); + PF_MD5_UPD_STR(rule, ifname); + PF_MD5_UPD_STR(rule, match_tagname); + PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */ + PF_MD5_UPD_HTONL(rule, os_fingerprint, y); + PF_MD5_UPD_HTONL(rule, prob, y); + PF_MD5_UPD_HTONL(rule, uid.uid[0], y); + PF_MD5_UPD_HTONL(rule, uid.uid[1], y); + PF_MD5_UPD(rule, uid.op); + PF_MD5_UPD_HTONL(rule, gid.gid[0], y); + PF_MD5_UPD_HTONL(rule, gid.gid[1], y); + PF_MD5_UPD(rule, gid.op); + PF_MD5_UPD_HTONL(rule, rule_flag, y); + PF_MD5_UPD(rule, action); + PF_MD5_UPD(rule, direction); + PF_MD5_UPD(rule, af); + PF_MD5_UPD(rule, quick); + PF_MD5_UPD(rule, ifnot); + PF_MD5_UPD(rule, match_tag_not); + PF_MD5_UPD(rule, natpass); + PF_MD5_UPD(rule, keep_state); + PF_MD5_UPD(rule, proto); + PF_MD5_UPD(rule, type); + PF_MD5_UPD(rule, code); + PF_MD5_UPD(rule, flags); + PF_MD5_UPD(rule, flagset); + PF_MD5_UPD(rule, allow_opts); + PF_MD5_UPD(rule, rt); + PF_MD5_UPD(rule, tos); +} + int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; - struct pf_rule *rule; + struct pf_rule *rule, **old_array; struct pf_rulequeue *old_rules; - int s; + int s, error; + u_int32_t old_rcount; if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); @@ -975,19 +796,41 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) ticket != rs->rules[rs_num].inactive.ticket) return (EBUSY); + /* Calculate checksum for the main ruleset */ + if (rs == &pf_main_ruleset) { + error = pf_setup_pfsync_matching(rs); + if (error != 0) + return (error); + } + /* Swap rules, keep the old. */ s = splsoftnet(); old_rules = rs->rules[rs_num].active.ptr; + old_rcount = rs->rules[rs_num].active.rcount; + old_array = rs->rules[rs_num].active.ptr_array; + rs->rules[rs_num].active.ptr = rs->rules[rs_num].inactive.ptr; + rs->rules[rs_num].active.ptr_array = + rs->rules[rs_num].inactive.ptr_array; + rs->rules[rs_num].active.rcount = + rs->rules[rs_num].inactive.rcount; rs->rules[rs_num].inactive.ptr = old_rules; + rs->rules[rs_num].inactive.ptr_array = old_array; + rs->rules[rs_num].inactive.rcount = old_rcount; + rs->rules[rs_num].active.ticket = rs->rules[rs_num].inactive.ticket; pf_calc_skip_steps(rs->rules[rs_num].active.ptr); + /* Purge the old rule list. */ while ((rule = TAILQ_FIRST(old_rules)) != NULL) pf_rm_rule(old_rules, rule); + if (rs->rules[rs_num].inactive.ptr_array) + free(rs->rules[rs_num].inactive.ptr_array, M_TEMP); + rs->rules[rs_num].inactive.ptr_array = NULL; + rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_ruleset(rs); splx(s); @@ -995,6 +838,46 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) } int +pf_setup_pfsync_matching(struct pf_ruleset *rs) +{ + MD5_CTX ctx; + struct pf_rule *rule; + int rs_cnt; + u_int8_t digest[PF_MD5_DIGEST_LENGTH]; + + MD5Init(&ctx); + for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) { + /* XXX PF_RULESET_SCRUB as well? */ + if (rs_cnt == PF_RULESET_SCRUB) + continue; + + if (rs->rules[rs_cnt].inactive.ptr_array) + free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP); + rs->rules[rs_cnt].inactive.ptr_array = NULL; + + if (rs->rules[rs_cnt].inactive.rcount) { + rs->rules[rs_cnt].inactive.ptr_array = + malloc(sizeof(caddr_t) * + rs->rules[rs_cnt].inactive.rcount, + M_TEMP, M_NOWAIT); + + if (!rs->rules[rs_cnt].inactive.ptr_array) + return (ENOMEM); + } + + TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, + entries) { + pf_hash_rule(&ctx, rule); + (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule; + } + } + + MD5Final(digest, &ctx); + memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum)); + return (0); +} + +int pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) { struct pf_pooladdr *pa = NULL; @@ -1039,7 +922,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETSRCNODES: case DIOCCLRSRCNODES: case DIOCIGETIFACES: - case DIOCICLRISTATS: case DIOCSETIFFLAG: case DIOCCLRIFFLAG: break; @@ -1058,7 +940,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (!(flags & FWRITE)) switch (cmd) { case DIOCGETRULES: - case DIOCGETRULE: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: @@ -1071,6 +952,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETQSTATS: case DIOCGETRULESETS: case DIOCGETRULESET: + case DIOCNATLOOK: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRGETADDRS: @@ -1090,13 +972,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCRSETADDRS: case DIOCRSETTFLAGS: if (((struct pfioc_table *)addr)->pfrio_flags & - PFR_FLAG_DUMMY) + PFR_FLAG_DUMMY) { + flags |= FWRITE; /* need write lock for dummy */ break; /* dummy operation ok */ + } return (EACCES); + case DIOCGETRULE: + if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR) + return (EACCES); + break; default: return (EACCES); } + if (flags & FWRITE) + rw_enter_write(&pf_consistency_lock); + else + rw_enter_read(&pf_consistency_lock); + s = splsoftnet(); switch (cmd) { @@ -1160,6 +1053,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } bcopy(&pr->rule, rule, sizeof(struct pf_rule)); + rule->cuid = p->p_cred->p_ruid; + rule->cpid = p->p_pid; rule->anchor = NULL; rule->kif = NULL; TAILQ_INIT(&rule->rpool.list); @@ -1188,14 +1083,18 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else rule->nr = 0; if (rule->ifname[0]) { - rule->kif = pfi_attach_rule(rule->ifname); + rule->kif = pfi_kif_get(rule->ifname); if (rule->kif == NULL) { pool_put(&pf_rule_pl, rule); error = EINVAL; break; } + pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE); } + if (rule->rtableid > 0 && !rtable_exists(rule->rtableid)) + error = EBUSY; + #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { @@ -1218,6 +1117,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; if (rule->rt && !rule->direction) error = EINVAL; +#if NPFLOG > 0 + if (rule->logif >= PFLOGIFS_MAX) + error = EINVAL; +#endif if (pf_rtlabel_add(&rule->src.addr) || pf_rtlabel_add(&rule->dst.addr)) error = EBUSY; @@ -1256,9 +1159,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); - rule->evaluations = rule->packets = rule->bytes = 0; + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); + ruleset->rules[rs_num].inactive.rcount++; break; } @@ -1334,6 +1239,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else pr->rule.skip[i].nr = rule->skip[i].ptr->nr; + + if (pr->action == PF_GET_CLR_CNTR) { + rule->evaluations = 0; + rule->packets[0] = rule->packets[1] = 0; + rule->bytes[0] = rule->bytes[1] = 0; + } break; } @@ -1389,6 +1300,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); + newrule->cuid = p->p_cred->p_ruid; + newrule->cpid = p->p_pid; TAILQ_INIT(&newrule->rpool.list); /* initialize refcounting */ newrule->states = 0; @@ -1408,15 +1321,20 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #endif /* INET6 */ if (newrule->ifname[0]) { - newrule->kif = pfi_attach_rule(newrule->ifname); + newrule->kif = pfi_kif_get(newrule->ifname); if (newrule->kif == NULL) { pool_put(&pf_rule_pl, newrule); error = EINVAL; break; } + pfi_kif_ref(newrule->kif, PFI_KIF_REF_RULE); } else newrule->kif = NULL; + if (newrule->rtableid > 0 && + !rtable_exists(newrule->rtableid)) + error = EBUSY; + #ifdef ALTQ /* set queue IDs */ if (newrule->qname[0] != 0) { @@ -1473,7 +1391,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || (newrule->rt > PF_FASTROUTE)) && - !pcr->anchor[0])) && + !newrule->anchor)) && (TAILQ_FIRST(&newrule->rpool.list) == NULL)) error = EINVAL; @@ -1482,8 +1400,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); - newrule->evaluations = newrule->packets = 0; - newrule->bytes = 0; + newrule->evaluations = 0; + newrule->packets[0] = newrule->packets[1] = 0; + newrule->bytes[0] = newrule->bytes[1] = 0; } pf_empty_pool(&pf_pabuf); @@ -1506,9 +1425,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } - if (pcr->action == PF_CHANGE_REMOVE) + if (pcr->action == PF_CHANGE_REMOVE) { pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule); - else { + ruleset->rules[rs_num].active.rcount--; + } else { if (oldrule == NULL) TAILQ_INSERT_TAIL( ruleset->rules[rs_num].active.ptr, @@ -1520,6 +1440,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) TAILQ_INSERT_AFTER( ruleset->rules[rs_num].active.ptr, oldrule, newrule, entries); + ruleset->rules[rs_num].active.rcount++; } nr = 0; @@ -1536,23 +1457,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCCLRSTATES: { - struct pf_state *state; + struct pf_state *state, *nexts; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; int killed = 0; - RB_FOREACH(state, pf_state_tree_id, &tree_id) { + for (state = RB_MIN(pf_state_tree_id, &tree_id); state; + state = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); + if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, state->u.s.kif->pfik_name)) { - state->timeout = PFTM_PURGE; #if NPFSYNC /* don't send out individual delete messages */ state->sync_flags = PFSTATE_NOSYNC; #endif + pf_unlink_state(state); killed++; } } - pf_purge_expired_states(); - pf_status.states = 0; psk->psk_af = killed; #if NPFSYNC pfsync_clear_states(pf_status.hostid, psk->psk_ifname); @@ -1561,37 +1483,52 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCKILLSTATES: { - struct pf_state *state; + struct pf_state *state, *nexts; + struct pf_state_host *src, *dst; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; int killed = 0; - RB_FOREACH(state, pf_state_tree_id, &tree_id) { + for (state = RB_MIN(pf_state_tree_id, &tree_id); state; + state = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); + + if (state->direction == PF_OUT) { + src = &state->lan; + dst = &state->ext; + } else { + src = &state->ext; + dst = &state->lan; + } if ((!psk->psk_af || state->af == psk->psk_af) && (!psk->psk_proto || psk->psk_proto == state->proto) && PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, - &state->lan.addr, state->af) && + &src->addr, state->af) && PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, - &state->ext.addr, state->af) && + &dst->addr, state->af) && (psk->psk_src.port_op == 0 || pf_match_port(psk->psk_src.port_op, psk->psk_src.port[0], psk->psk_src.port[1], - state->lan.port)) && + src->port)) && (psk->psk_dst.port_op == 0 || pf_match_port(psk->psk_dst.port_op, psk->psk_dst.port[0], psk->psk_dst.port[1], - state->ext.port)) && + dst->port)) && (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, state->u.s.kif->pfik_name))) { - state->timeout = PFTM_PURGE; +#if NPFSYNC > 0 + /* send immediate delete of state */ + pfsync_delete_state(state); + state->sync_flags |= PFSTATE_NOSYNC; +#endif + pf_unlink_state(state); killed++; } } - pf_purge_expired_states(); psk->psk_af = killed; break; } @@ -1611,7 +1548,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = ENOMEM; break; } - kif = pfi_lookup_create(ps->state.u.ifname); + kif = pfi_kif_get(ps->state.u.ifname); if (kif == NULL) { pool_put(&pf_state_pl, state); error = ENOENT; @@ -1629,7 +1566,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) state->bytes[0] = state->bytes[1] = 0; if (pf_insert_state(kif, state)) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); pool_put(&pf_state_pl, state); error = ENOMEM; } @@ -1640,6 +1577,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_state *ps = (struct pfioc_state *)addr; struct pf_state *state; u_int32_t nr; + int secs; nr = 0; RB_FOREACH(state, pf_state_tree_id, &tree_id) { @@ -1651,15 +1589,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } - bcopy(state, &ps->state, sizeof(struct pf_state)); + secs = time_second; + bcopy(state, &ps->state, sizeof(ps->state)); + strlcpy(ps->state.u.ifname, state->u.s.kif->pfik_name, + sizeof(ps->state.u.ifname)); ps->state.rule.nr = state->rule.ptr->nr; ps->state.nat_rule.nr = (state->nat_rule.ptr == NULL) ? -1 : state->nat_rule.ptr->nr; ps->state.anchor.nr = (state->anchor.ptr == NULL) ? -1 : state->anchor.ptr->nr; + ps->state.creation = secs - ps->state.creation; ps->state.expire = pf_state_expires(state); - if (ps->state.expire > time_second) - ps->state.expire -= time_second; + if (ps->state.expire > secs) + ps->state.expire -= secs; else ps->state.expire = 0; break; @@ -1668,48 +1610,57 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; struct pf_state *state; - struct pf_state *p, pstore; - struct pfi_kif *kif; + struct pf_state *p, *pstore; u_int32_t nr = 0; int space = ps->ps_len; if (space == 0) { - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) - nr += kif->pfik_states; + nr = pf_status.states; ps->ps_len = sizeof(struct pf_state) * nr; break; } + pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK); + p = ps->ps_states; - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) - RB_FOREACH(state, pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy) { + + state = TAILQ_FIRST(&state_list); + while (state) { + if (state->timeout != PFTM_UNLINKED) { int secs = time_second; if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) break; - bcopy(state, &pstore, sizeof(pstore)); - strlcpy(pstore.u.ifname, kif->pfik_name, - sizeof(pstore.u.ifname)); - pstore.rule.nr = state->rule.ptr->nr; - pstore.nat_rule.nr = (state->nat_rule.ptr == + bcopy(state, pstore, sizeof(*pstore)); + strlcpy(pstore->u.ifname, + state->u.s.kif->pfik_name, + sizeof(pstore->u.ifname)); + pstore->rule.nr = state->rule.ptr->nr; + pstore->nat_rule.nr = (state->nat_rule.ptr == NULL) ? -1 : state->nat_rule.ptr->nr; - pstore.anchor.nr = (state->anchor.ptr == + pstore->anchor.nr = (state->anchor.ptr == NULL) ? -1 : state->anchor.ptr->nr; - pstore.creation = secs - pstore.creation; - pstore.expire = pf_state_expires(state); - if (pstore.expire > secs) - pstore.expire -= secs; + pstore->creation = secs - pstore->creation; + pstore->expire = pf_state_expires(state); + if (pstore->expire > secs) + pstore->expire -= secs; else - pstore.expire = 0; - error = copyout(&pstore, p, sizeof(*p)); - if (error) + pstore->expire = 0; + error = copyout(pstore, p, sizeof(*p)); + if (error) { + free(pstore, M_TEMP); goto fail; + } p++; nr++; } + state = TAILQ_NEXT(state, u.s.entry_list); + } + ps->ps_len = sizeof(struct pf_state) * nr; + + free(pstore, M_TEMP); break; } @@ -1739,16 +1690,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) bzero(pf_status.counters, sizeof(pf_status.counters)); bzero(pf_status.fcounters, sizeof(pf_status.fcounters)); bzero(pf_status.scounters, sizeof(pf_status.scounters)); + pf_status.since = time_second; if (*pf_status.ifname) - pfi_clr_istats(pf_status.ifname, NULL, - PFI_FLAG_INSTANCE); + pfi_clr_istats(pf_status.ifname); break; } case DIOCNATLOOK: { struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; struct pf_state *state; - struct pf_state key; + struct pf_state_cmp key; int m = 0, direction = pnl->direction; key.af = pnl->af; @@ -1757,7 +1708,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (!pnl->proto || PF_AZERO(&pnl->saddr, pnl->af) || PF_AZERO(&pnl->daddr, pnl->af) || - !pnl->dport || !pnl->sport) + ((pnl->proto == IPPROTO_TCP || + pnl->proto == IPPROTO_UDP) && + (!pnl->dport || !pnl->sport))) error = EINVAL; else { /* @@ -1813,7 +1766,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) goto fail; } old = pf_default_rule.timeout[pt->timeout]; + if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) + pt->seconds = 1; pf_default_rule.timeout[pt->timeout] = pt->seconds; + if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) + wakeup(pf_purge_thread); pt->seconds = old; break; } @@ -1868,13 +1825,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCCLRRULECTRS: { + /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */ struct pf_ruleset *ruleset = &pf_main_ruleset; struct pf_rule *rule; TAILQ_FOREACH(rule, - ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) - rule->evaluations = rule->packets = - rule->bytes = 0; + ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) { + rule->evaluations = 0; + rule->packets[0] = rule->packets[1] = 0; + rule->bytes[0] = rule->bytes[1] = 0; + } break; } @@ -2067,16 +2027,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); if (pa->ifname[0]) { - pa->kif = pfi_attach_rule(pa->ifname); + pa->kif = pfi_kif_get(pa->ifname); if (pa->kif == NULL) { pool_put(&pf_pooladdr_pl, pa); error = EINVAL; break; } + pfi_kif_ref(pa->kif, PFI_KIF_REF_RULE); } if (pfi_dynaddr_setup(&pa->addr, pp->af)) { pfi_dynaddr_remove(&pa->addr); - pfi_detach_rule(pa->kif); + pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, pa); error = EINVAL; break; @@ -2176,18 +2137,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #endif /* INET6 */ if (newpa->ifname[0]) { - newpa->kif = pfi_attach_rule(newpa->ifname); + newpa->kif = pfi_kif_get(newpa->ifname); if (newpa->kif == NULL) { pool_put(&pf_pooladdr_pl, newpa); error = EINVAL; break; } + pfi_kif_ref(newpa->kif, PFI_KIF_REF_RULE); } else newpa->kif = NULL; if (pfi_dynaddr_setup(&newpa->addr, pca->af) || pf_tbladdr_setup(ruleset, &newpa->addr)) { pfi_dynaddr_remove(&newpa->addr); - pfi_detach_rule(newpa->kif); + pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, newpa); error = EINVAL; break; @@ -2216,7 +2178,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) TAILQ_REMOVE(&pool->list, oldpa, entries); pfi_dynaddr_remove(&oldpa->addr); pf_tbladdr_remove(&oldpa->addr); - pfi_detach_rule(oldpa->kif); + pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, oldpa); } else { if (oldpa == NULL) @@ -2426,7 +2388,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer, io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | - PFR_FLAG_USERIOCTL); + PFR_FLAG_USERIOCTL, 0); break; } @@ -2506,150 +2468,203 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } case DIOCXBEGIN: { - struct pfioc_trans *io = (struct pfioc_trans *) - addr; - static struct pfioc_trans_e ioe; - static struct pfr_table table; - int i; + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)malloc(sizeof(*table), + M_TEMP, M_WAITOK); for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0]) { + if (ioe->anchor[0]) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - if ((error = pf_begin_altq(&ioe.ticket))) + if ((error = pf_begin_altq(&ioe->ticket))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - if ((error = pfr_ina_begin(&table, - &ioe.ticket, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_begin(table, + &ioe->ticket, NULL, 0))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; + } break; default: - if ((error = pf_begin_rules(&ioe.ticket, - ioe.rs_num, ioe.anchor))) + if ((error = pf_begin_rules(&ioe->ticket, + ioe->rs_num, ioe->anchor))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; + } break; } - if (copyout(&ioe, io->array+i, sizeof(io->array[i]))) { + if (copyout(ioe, io->array+i, sizeof(io->array[i]))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } } + free(table, M_TEMP); + free(ioe, M_TEMP); break; } case DIOCXROLLBACK: { - struct pfioc_trans *io = (struct pfioc_trans *) - addr; - static struct pfioc_trans_e ioe; - static struct pfr_table table; - int i; + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)malloc(sizeof(*table), + M_TEMP, M_WAITOK); for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0]) { + if (ioe->anchor[0]) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - if ((error = pf_rollback_altq(ioe.ticket))) + if ((error = pf_rollback_altq(ioe->ticket))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - if ((error = pfr_ina_rollback(&table, - ioe.ticket, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_rollback(table, + ioe->ticket, NULL, 0))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; default: - if ((error = pf_rollback_rules(ioe.ticket, - ioe.rs_num, ioe.anchor))) + if ((error = pf_rollback_rules(ioe->ticket, + ioe->rs_num, ioe->anchor))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; } } + free(table, M_TEMP); + free(ioe, M_TEMP); break; } case DIOCXCOMMIT: { - struct pfioc_trans *io = (struct pfioc_trans *) - addr; - static struct pfioc_trans_e ioe; - static struct pfr_table table; - struct pf_ruleset *rs; - int i; - - if (io->esize != sizeof(ioe)) { + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + struct pf_ruleset *rs; + int i; + + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)malloc(sizeof(*table), + M_TEMP, M_WAITOK); /* first makes sure everything will succeed */ for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0]) { + if (ioe->anchor[0]) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - if (!altqs_inactive_open || ioe.ticket != + if (!altqs_inactive_open || ioe->ticket != ticket_altqs_inactive) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EBUSY; goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - rs = pf_find_ruleset(ioe.anchor); - if (rs == NULL || !rs->topen || ioe.ticket != + rs = pf_find_ruleset(ioe->anchor); + if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EBUSY; goto fail; } break; default: - if (ioe.rs_num < 0 || ioe.rs_num >= + if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - rs = pf_find_ruleset(ioe.anchor); + rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || - !rs->rules[ioe.rs_num].inactive.open || - rs->rules[ioe.rs_num].inactive.ticket != - ioe.ticket) { + !rs->rules[ioe->rs_num].inactive.open || + rs->rules[ioe->rs_num].inactive.ticket != + ioe->ticket) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EBUSY; goto fail; } @@ -2658,39 +2673,51 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } /* now do the commit - no errors should happen here */ for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if ((error = pf_commit_altq(ioe.ticket))) + if ((error = pf_commit_altq(ioe->ticket))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - if ((error = pfr_ina_commit(&table, ioe.ticket, - NULL, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_commit(table, ioe->ticket, + NULL, NULL, 0))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; default: - if ((error = pf_commit_rules(ioe.ticket, - ioe.rs_num, ioe.anchor))) + if ((error = pf_commit_rules(ioe->ticket, + ioe->rs_num, ioe->anchor))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; } } + free(table, M_TEMP); + free(ioe, M_TEMP); break; } case DIOCGETSRCNODES: { struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; - struct pf_src_node *n; - struct pf_src_node *p, pstore; + struct pf_src_node *n, *p, *pstore; u_int32_t nr = 0; int space = psn->psn_len; @@ -2701,6 +2728,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } + pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK); + p = psn->psn_src_nodes; RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { int secs = time_second, diff; @@ -2708,31 +2737,35 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) break; - bcopy(n, &pstore, sizeof(pstore)); + bcopy(n, pstore, sizeof(*pstore)); if (n->rule.ptr != NULL) - pstore.rule.nr = n->rule.ptr->nr; - pstore.creation = secs - pstore.creation; - if (pstore.expire > secs) - pstore.expire -= secs; + pstore->rule.nr = n->rule.ptr->nr; + pstore->creation = secs - pstore->creation; + if (pstore->expire > secs) + pstore->expire -= secs; else - pstore.expire = 0; + pstore->expire = 0; /* adjust the connection rate estimate */ diff = secs - n->conn_rate.last; if (diff >= n->conn_rate.seconds) - pstore.conn_rate.count = 0; + pstore->conn_rate.count = 0; else - pstore.conn_rate.count -= + pstore->conn_rate.count -= n->conn_rate.count * diff / n->conn_rate.seconds; - error = copyout(&pstore, p, sizeof(*p)); - if (error) + error = copyout(pstore, p, sizeof(*p)); + if (error) { + free(pstore, M_TEMP); goto fail; + } p++; nr++; } psn->psn_len = sizeof(struct pf_src_node) * nr; + + free(pstore, M_TEMP); break; } @@ -2748,11 +2781,50 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) n->expire = 1; n->states = 0; } - pf_purge_expired_src_nodes(); + pf_purge_expired_src_nodes(1); pf_status.src_nodes = 0; break; } + case DIOCKILLSRCNODES: { + struct pf_src_node *sn; + struct pf_state *s; + struct pfioc_src_node_kill *psnk = \ + (struct pfioc_src_node_kill *) addr; + int killed = 0; + + RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) { + if (PF_MATCHA(psnk->psnk_src.neg, \ + &psnk->psnk_src.addr.v.a.addr, \ + &psnk->psnk_src.addr.v.a.mask, \ + &sn->addr, sn->af) && + PF_MATCHA(psnk->psnk_dst.neg, \ + &psnk->psnk_dst.addr.v.a.addr, \ + &psnk->psnk_dst.addr.v.a.mask, \ + &sn->raddr, sn->af)) { + /* Handle state to src_node linkage */ + if (sn->states != 0) { + RB_FOREACH(s, pf_state_tree_id, + &tree_id) { + if (s->src_node == sn) + s->src_node = NULL; + if (s->nat_src_node == sn) + s->nat_src_node = NULL; + } + sn->states = 0; + } + sn->expire = 1; + killed++; + } + } + + if (killed > 0) + pf_purge_expired_src_nodes(1); + + psnk->psnk_af = killed; + break; + } + case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; @@ -2770,20 +2842,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCIGETIFACES: { struct pfioc_iface *io = (struct pfioc_iface *)addr; - if (io->pfiio_esize != sizeof(struct pfi_if)) { + if (io->pfiio_esize != sizeof(struct pfi_kif)) { error = ENODEV; break; } error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer, - &io->pfiio_size, io->pfiio_flags); - break; - } - - case DIOCICLRISTATS: { - struct pfioc_iface *io = (struct pfioc_iface *)addr; - - error = pfi_clr_istats(io->pfiio_name, &io->pfiio_nzero, - io->pfiio_flags); + &io->pfiio_size); break; } @@ -2807,5 +2871,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } fail: splx(s); + if (flags & FWRITE) + rw_exit_write(&pf_consistency_lock); + else + rw_exit_read(&pf_consistency_lock); return (error); } diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c index 3c3dbc0..df339ae 100644 --- a/sys/contrib/pf/net/pf_norm.c +++ b/sys/contrib/pf/net/pf_norm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_norm.c,v 1.97 2004/09/21 16:59:12 aaron Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */ /* * Copyright 2001 Niels Provos @@ -412,7 +412,7 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, break; } - /* This fragment is completely overlapped, loose it */ + /* This fragment is completely overlapped, lose it */ next = LIST_NEXT(frea, fr_next); m_freem(frea->fr_m); LIST_REMOVE(frea, fr_next); @@ -704,7 +704,7 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, } else { hosed++; } - } else { + } else if (frp == NULL) { /* There is a gap between fragments */ DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", h->ip_id, -aftercut, off, max, fra->fr_off, @@ -831,8 +831,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; @@ -841,19 +840,23 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, else if (r->proto && r->proto != h->ip_p) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.neg)) + (struct pf_addr *)&h->ip_src.s_addr, AF_INET, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg)) + (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; } - if (r == NULL) + if (r == NULL || r->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } /* Check for illegal packets */ if (hlen < (int)sizeof(struct ip)) @@ -863,8 +866,12 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto drop; /* Clear IP_DF if the rule uses the no-df option */ - if (r->rule_flag & PFRULE_NODF) + if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) { + u_int16_t ip_off = h->ip_off; + h->ip_off &= htons(~IP_DF); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); + } /* We will need other tests here */ if (!fragoff && !mff) @@ -922,6 +929,18 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, if (m == NULL) return (PF_DROP); + /* use mtag from concatenated mbuf chain */ + pd->pf_mtag = pf_find_mtag(m); +#ifdef DIAGNOSTIC + if (pd->pf_mtag == NULL) { + printf("%s: pf_find_mtag returned NULL(1)\n", __func__); + if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + *m0 = NULL; + goto no_mem; + } + } +#endif if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; @@ -930,15 +949,13 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, /* non-buffering fragment cache (drops or masks overlaps) */ int nomem = 0; - if (dir == PF_OUT) { - if (m_tag_find(m, PACKET_TAG_PF_FRAGCACHE, NULL) != - NULL) { - /* Already passed the fragment cache in the - * input direction. If we continued, it would - * appear to be a dup and would be dropped. - */ - goto fragment_pass; - } + if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) { + /* + * Already passed the fragment cache in the + * input direction. If we continued, it would + * appear to be a dup and would be dropped. + */ + goto fragment_pass; } frag = pf_find_fragment(h, &pf_cache_tree); @@ -959,14 +976,21 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto drop; } - if (dir == PF_IN) { - struct m_tag *mtag; - - mtag = m_tag_get(PACKET_TAG_PF_FRAGCACHE, 0, M_NOWAIT); - if (mtag == NULL) + /* use mtag from copied and trimmed mbuf chain */ + pd->pf_mtag = pf_find_mtag(m); +#ifdef DIAGNOSTIC + if (pd->pf_mtag == NULL) { + printf("%s: pf_find_mtag returned NULL(2)\n", __func__); + if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + *m0 = NULL; goto no_mem; - m_tag_prepend(m, mtag); + } } +#endif + if (dir == PF_IN) + pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; goto fragment_pass; @@ -974,11 +998,20 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, no_fragment: /* At this point, only IP_DF is allowed in ip_off */ - h->ip_off &= htons(IP_DF); + if (h->ip_off & ~htons(IP_DF)) { + u_int16_t ip_off = h->ip_off; + + h->ip_off &= htons(IP_DF); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); + } /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) + if (r->min_ttl && h->ip_ttl < r->min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + h->ip_ttl = r->min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } if (r->rule_flag & PFRULE_RANDOMID) { u_int16_t ip_id = h->ip_id; @@ -993,8 +1026,12 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, fragment_pass: /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) + if (r->min_ttl && h->ip_ttl < r->min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + h->ip_ttl = r->min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) pd->flags |= PFDESC_IP_REAS; return (PF_PASS); @@ -1002,13 +1039,13 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, no_mem: REASON_SET(reason, PFRES_MEMORY); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); bad: @@ -1020,7 +1057,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); } @@ -1048,8 +1085,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; @@ -1060,19 +1096,23 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, r = r->skip[PF_SKIP_PROTO].ptr; #endif else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.neg)) + (struct pf_addr *)&h->ip6_src, AF_INET6, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.neg)) + (struct pf_addr *)&h->ip6_dst, AF_INET6, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; } - if (r == NULL) + if (r == NULL || r->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } /* Check for illegal packets */ if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) @@ -1184,19 +1224,19 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, shortpkt: REASON_SET(reason, PFRES_SHORT); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); badfrag: REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); } #endif /* INET6 */ @@ -1215,8 +1255,7 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; @@ -1224,12 +1263,14 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) @@ -1246,8 +1287,10 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, if (rm == NULL || rm->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) pd->flags |= PFDESC_TCP_NORM; @@ -1309,7 +1352,7 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, tcp_drop: REASON_SET(&reason, PFRES_NORM); if (rm != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); return (PF_DROP); } @@ -1743,7 +1786,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * timestamps. And require all data packets to contain a timestamp * if the first does. PAWS implicitly requires that all data packets be * timestamped. But I think there are middle-man devices that hijack - * TCP streams immedietly after the 3whs and don't timestamp their + * TCP streams immediately after the 3whs and don't timestamp their * packets (seen in a WWW accelerator or cache). */ if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & diff --git a/sys/contrib/pf/net/pf_osfp.c b/sys/contrib/pf/net/pf_osfp.c index b2adcf2..15dca8e 100644 --- a/sys/contrib/pf/net/pf_osfp.c +++ b/sys/contrib/pf/net/pf_osfp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_osfp.c,v 1.10 2004/04/09 19:30:41 frantzen Exp $ */ +/* $OpenBSD: pf_osfp.c,v 1.12 2006/12/13 18:14:10 itojun Exp $ */ /* * Copyright (c) 2003 Mike Frantzen @@ -32,9 +32,10 @@ #include #include -#ifdef INET6 #include -#endif /* INET6 */ +#ifdef _KERNEL +#include +#endif #ifdef _KERNEL @@ -51,6 +52,7 @@ typedef struct pool pool_t; # include # include # include +# include # define pool_t int # define pool_get(pool, flags) malloc(*(pool)) # define pool_put(pool, item) free(item) @@ -87,38 +89,96 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off, const struct tcphdr *tcp) { struct ip *ip; + struct ip6_hdr *ip6; char hdr[60]; - /* XXX don't have a fingerprint database for IPv6 :-( */ - if (pd->af != PF_INET || pd->proto != IPPROTO_TCP || (tcp->th_off << 2) - < sizeof(*tcp)) + if ((pd->af != PF_INET && pd->af != PF_INET6) || + pd->proto != IPPROTO_TCP || (tcp->th_off << 2) < sizeof(*tcp)) return (NULL); - ip = mtod(m, struct ip *); - if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, pd->af)) - return (NULL); + if (pd->af == PF_INET) { + ip = mtod(m, struct ip *); + ip6 = (struct ip6_hdr *)NULL; + } else { + ip = (struct ip *)NULL; + ip6 = mtod(m, struct ip6_hdr *); + } + if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, + pd->af)) return (NULL); - return (pf_osfp_fingerprint_hdr(ip, (struct tcphdr *)hdr)); + return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr)); } #endif /* _KERNEL */ struct pf_osfp_enlist * -pf_osfp_fingerprint_hdr(const struct ip *ip, const struct tcphdr *tcp) +pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const struct tcphdr *tcp) { struct pf_os_fingerprint fp, *fpresult; int cnt, optlen = 0; const u_int8_t *optp; +#ifdef _KERNEL + char srcname[128]; +#else + char srcname[NI_MAXHOST]; +#endif - if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN || (ip->ip_off & - htons(IP_OFFMASK))) + if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN) return (NULL); + if (ip) { + if ((ip->ip_off & htons(IP_OFFMASK)) != 0) + return (NULL); + } memset(&fp, 0, sizeof(fp)); - fp.fp_psize = ntohs(ip->ip_len); - fp.fp_ttl = ip->ip_ttl; - if (ip->ip_off & htons(IP_DF)) + if (ip) { +#ifndef _KERNEL + struct sockaddr_in sin; +#endif + + fp.fp_psize = ntohs(ip->ip_len); + fp.fp_ttl = ip->ip_ttl; + if (ip->ip_off & htons(IP_DF)) + fp.fp_flags |= PF_OSFP_DF; +#ifdef _KERNEL + strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname)); +#else + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_addr = ip->ip_src; + (void)getnameinfo((struct sockaddr *)&sin, + sizeof(struct sockaddr_in), srcname, sizeof(srcname), + NULL, 0, NI_NUMERICHOST); +#endif + } +#ifdef INET6 + else if (ip6) { +#ifndef _KERNEL + struct sockaddr_in6 sin6; +#endif + + /* jumbo payload? */ + fp.fp_psize = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); + fp.fp_ttl = ip6->ip6_hlim; fp.fp_flags |= PF_OSFP_DF; + fp.fp_flags |= PF_OSFP_INET6; +#ifdef _KERNEL + strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src), + sizeof(srcname)); +#else + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_addr = ip6->ip6_src; + (void)getnameinfo((struct sockaddr *)&sin6, + sizeof(struct sockaddr_in6), srcname, sizeof(srcname), + NULL, 0, NI_NUMERICHOST); +#endif + } +#endif + else + return (NULL); fp.fp_wsize = ntohs(tcp->th_win); @@ -181,7 +241,7 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct tcphdr *tcp) DPFPRINTF("fingerprinted %s:%d %d:%d:%d:%d:%llx (%d) " "(TS=%s,M=%s%d,W=%s%d)\n", - inet_ntoa(ip->ip_src), ntohs(tcp->th_sport), + srcname, ntohs(tcp->th_sport), fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0, fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt, (fp.fp_flags & PF_OSFP_TS0) ? "0" : "", diff --git a/sys/contrib/pf/net/pf_ruleset.c b/sys/contrib/pf/net/pf_ruleset.c new file mode 100644 index 0000000..0db7b1a --- /dev/null +++ b/sys/contrib/pf/net/pf_ruleset.c @@ -0,0 +1,415 @@ +/* $OpenBSD: pf_ruleset.c,v 1.1 2006/10/27 13:56:51 mcbride Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002,2003 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include +#include +#ifdef _KERNEL +# include +#endif /* _KERNEL */ +#include + +#include +#include +#include +#include + +#include +#include + +#ifdef INET6 +#include +#endif /* INET6 */ + + +#ifdef _KERNEL +# define DPFPRINTF(format, x...) \ + if (pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format , ##x) +#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK) +#define rs_free(x) free(x, M_TEMP) + +#else +/* Userland equivalents so we can lend code to pfctl et al. */ + +# include +# include +# include +# include +# include +# define rs_malloc(x) malloc(x) +# define rs_free(x) free(x) + +# ifdef PFDEBUG +# include +# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) +# else +# define DPFPRINTF(format, x...) ((void)0) +# endif /* PFDEBUG */ +#endif /* _KERNEL */ + + +struct pf_anchor_global pf_anchors; +struct pf_anchor pf_main_anchor; + +int pf_get_ruleset_number(u_int8_t); +void pf_init_ruleset(struct pf_ruleset *); +int pf_anchor_setup(struct pf_rule *, + const struct pf_ruleset *, const char *); +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); + +static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); + +RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); +RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); + +static __inline int +pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b) +{ + int c = strcmp(a->path, b->path); + + return (c ? (c < 0 ? -1 : 1) : 0); +} + +int +pf_get_ruleset_number(u_int8_t action) +{ + switch (action) { + case PF_SCRUB: + case PF_NOSCRUB: + return (PF_RULESET_SCRUB); + break; + case PF_PASS: + case PF_DROP: + return (PF_RULESET_FILTER); + break; + case PF_NAT: + case PF_NONAT: + return (PF_RULESET_NAT); + break; + case PF_BINAT: + case PF_NOBINAT: + return (PF_RULESET_BINAT); + break; + case PF_RDR: + case PF_NORDR: + return (PF_RULESET_RDR); + break; + default: + return (PF_RULESET_MAX); + break; + } +} + +void +pf_init_ruleset(struct pf_ruleset *ruleset) +{ + int i; + + memset(ruleset, 0, sizeof(struct pf_ruleset)); + for (i = 0; i < PF_RULESET_MAX; i++) { + TAILQ_INIT(&ruleset->rules[i].queues[0]); + TAILQ_INIT(&ruleset->rules[i].queues[1]); + ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; + ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; + } +} + +struct pf_anchor * +pf_find_anchor(const char *path) +{ + struct pf_anchor *key, *found; + + key = (struct pf_anchor *)rs_malloc(sizeof(*key)); + memset(key, 0, sizeof(*key)); + strlcpy(key->path, path, sizeof(key->path)); + found = RB_FIND(pf_anchor_global, &pf_anchors, key); + rs_free(key); + return (found); +} + +struct pf_ruleset * +pf_find_ruleset(const char *path) +{ + struct pf_anchor *anchor; + + while (*path == '/') + path++; + if (!*path) + return (&pf_main_ruleset); + anchor = pf_find_anchor(path); + if (anchor == NULL) + return (NULL); + else + return (&anchor->ruleset); +} + +struct pf_ruleset * +pf_find_or_create_ruleset(const char *path) +{ + char *p, *q, *r; + struct pf_ruleset *ruleset; + struct pf_anchor *anchor, *dup, *parent = NULL; + + if (path[0] == 0) + return (&pf_main_ruleset); + while (*path == '/') + path++; + ruleset = pf_find_ruleset(path); + if (ruleset != NULL) + return (ruleset); + p = (char *)rs_malloc(MAXPATHLEN); + bzero(p, MAXPATHLEN); + strlcpy(p, path, MAXPATHLEN); + while (parent == NULL && (q = strrchr(p, '/')) != NULL) { + *q = 0; + if ((ruleset = pf_find_ruleset(p)) != NULL) { + parent = ruleset->anchor; + break; + } + } + if (q == NULL) + q = p; + else + q++; + strlcpy(p, path, MAXPATHLEN); + if (!*q) { + rs_free(p); + return (NULL); + } + while ((r = strchr(q, '/')) != NULL || *q) { + if (r != NULL) + *r = 0; + if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE || + (parent != NULL && strlen(parent->path) >= + MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) { + rs_free(p); + return (NULL); + } + anchor = (struct pf_anchor *)rs_malloc(sizeof(*anchor)); + if (anchor == NULL) { + rs_free(p); + return (NULL); + } + memset(anchor, 0, sizeof(*anchor)); + RB_INIT(&anchor->children); + strlcpy(anchor->name, q, sizeof(anchor->name)); + if (parent != NULL) { + strlcpy(anchor->path, parent->path, + sizeof(anchor->path)); + strlcat(anchor->path, "/", sizeof(anchor->path)); + } + strlcat(anchor->path, anchor->name, sizeof(anchor->path)); + if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != + NULL) { + printf("pf_find_or_create_ruleset: RB_INSERT1 " + "'%s' '%s' collides with '%s' '%s'\n", + anchor->path, anchor->name, dup->path, dup->name); + rs_free(anchor); + rs_free(p); + return (NULL); + } + if (parent != NULL) { + anchor->parent = parent; + if ((dup = RB_INSERT(pf_anchor_node, &parent->children, + anchor)) != NULL) { + printf("pf_find_or_create_ruleset: " + "RB_INSERT2 '%s' '%s' collides with " + "'%s' '%s'\n", anchor->path, anchor->name, + dup->path, dup->name); + RB_REMOVE(pf_anchor_global, &pf_anchors, + anchor); + rs_free(anchor); + rs_free(p); + return (NULL); + } + } + pf_init_ruleset(&anchor->ruleset); + anchor->ruleset.anchor = anchor; + parent = anchor; + if (r != NULL) + q = r + 1; + else + *q = 0; + } + rs_free(p); + return (&anchor->ruleset); +} + +void +pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) +{ + struct pf_anchor *parent; + int i; + + while (ruleset != NULL) { + if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || + !RB_EMPTY(&ruleset->anchor->children) || + ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || + ruleset->topen) + return; + for (i = 0; i < PF_RULESET_MAX; ++i) + if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || + !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || + ruleset->rules[i].inactive.open) + return; + RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); + if ((parent = ruleset->anchor->parent) != NULL) + RB_REMOVE(pf_anchor_node, &parent->children, + ruleset->anchor); + rs_free(ruleset->anchor); + if (parent == NULL) + return; + ruleset = &parent->ruleset; + } +} + +int +pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, + const char *name) +{ + char *p, *path; + struct pf_ruleset *ruleset; + + r->anchor = NULL; + r->anchor_relative = 0; + r->anchor_wildcard = 0; + if (!name[0]) + return (0); + path = (char *)rs_malloc(MAXPATHLEN); + bzero(path, MAXPATHLEN); + if (name[0] == '/') + strlcpy(path, name + 1, MAXPATHLEN); + else { + /* relative path */ + r->anchor_relative = 1; + if (s->anchor == NULL || !s->anchor->path[0]) + path[0] = 0; + else + strlcpy(path, s->anchor->path, MAXPATHLEN); + while (name[0] == '.' && name[1] == '.' && name[2] == '/') { + if (!path[0]) { + printf("pf_anchor_setup: .. beyond root\n"); + rs_free(path); + return (1); + } + if ((p = strrchr(path, '/')) != NULL) + *p = 0; + else + path[0] = 0; + r->anchor_relative++; + name += 3; + } + if (path[0]) + strlcat(path, "/", MAXPATHLEN); + strlcat(path, name, MAXPATHLEN); + } + if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) { + r->anchor_wildcard = 1; + *p = 0; + } + ruleset = pf_find_or_create_ruleset(path); + rs_free(path); + if (ruleset == NULL || ruleset->anchor == NULL) { + printf("pf_anchor_setup: ruleset\n"); + return (1); + } + r->anchor = ruleset->anchor; + r->anchor->refcnt++; + return (0); +} + +int +pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, + struct pfioc_rule *pr) +{ + pr->anchor_call[0] = 0; + if (r->anchor == NULL) + return (0); + if (!r->anchor_relative) { + strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call)); + strlcat(pr->anchor_call, r->anchor->path, + sizeof(pr->anchor_call)); + } else { + char *a, *p; + int i; + + a = (char *)rs_malloc(MAXPATHLEN); + bzero(a, MAXPATHLEN); + if (rs->anchor == NULL) + a[0] = 0; + else + strlcpy(a, rs->anchor->path, MAXPATHLEN); + for (i = 1; i < r->anchor_relative; ++i) { + if ((p = strrchr(a, '/')) == NULL) + p = a; + *p = 0; + strlcat(pr->anchor_call, "../", + sizeof(pr->anchor_call)); + } + if (strncmp(a, r->anchor->path, strlen(a))) { + printf("pf_anchor_copyout: '%s' '%s'\n", a, + r->anchor->path); + rs_free(a); + return (1); + } + if (strlen(r->anchor->path) > strlen(a)) + strlcat(pr->anchor_call, r->anchor->path + (a[0] ? + strlen(a) + 1 : 0), sizeof(pr->anchor_call)); + rs_free(a); + } + if (r->anchor_wildcard) + strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*", + sizeof(pr->anchor_call)); + return (0); +} + +void +pf_anchor_remove(struct pf_rule *r) +{ + if (r->anchor == NULL) + return; + if (r->anchor->refcnt <= 0) { + printf("pf_anchor_remove: broken refcount\n"); + r->anchor = NULL; + return; + } + if (!--r->anchor->refcnt) + pf_remove_if_empty_ruleset(&r->anchor->ruleset); + r->anchor = NULL; +} diff --git a/sys/contrib/pf/net/pf_table.c b/sys/contrib/pf/net/pf_table.c index 621809a..a79ed37 100644 --- a/sys/contrib/pf/net/pf_table.c +++ b/sys/contrib/pf/net/pf_table.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_table.c,v 1.62 2004/12/07 18:02:04 mcbride Exp $ */ +/* $OpenBSD: pf_table.c,v 1.68 2006/05/02 10:08:45 dhartmei Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -404,7 +404,8 @@ _bad: int pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, - int *size2, int *nadd, int *ndel, int *nchange, int flags) + int *size2, int *nadd, int *ndel, int *nchange, int flags, + u_int32_t ignore_pfrt_flags) { struct pfr_ktable *kt, *tmpkt; struct pfr_kentryworkq addq, delq, changeq; @@ -414,7 +415,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); - if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + if (pfr_validate_table(tbl, ignore_pfrt_flags, flags & + PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -2047,7 +2049,7 @@ pfr_attach_table(struct pf_ruleset *rs, char *name) bzero(&tbl, sizeof(tbl)); strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name)); if (ac != NULL) - strlcpy(tbl.pfrt_anchor, ac->name, sizeof(tbl.pfrt_anchor)); + strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor)); kt = pfr_lookup_table(&tbl); if (kt == NULL) { kt = pfr_create_ktable(&tbl, time_second, 1); diff --git a/sys/contrib/pf/net/pfvar.h b/sys/contrib/pf/net/pfvar.h index d27d01d..d650f79 100644 --- a/sys/contrib/pf/net/pfvar.h +++ b/sys/contrib/pf/net/pfvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.213 2005/03/03 07:13:39 dhartmei Exp $ */ +/* $OpenBSD: pfvar.h,v 1.244 2007/02/23 21:31:51 deraadt Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -44,10 +45,18 @@ #include struct ip; +struct ip6_hdr; #define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0) #define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1) +#define PF_MD5_DIGEST_LENGTH 16 +#ifdef MD5_DIGEST_LENGTH +#if PF_MD5_DIGEST_LENGTH != MD5_DIGEST_LENGTH +#error +#endif +#endif + enum { PF_INOUT, PF_IN, PF_OUT }; enum { PF_LAN_EXT, PF_EXT_GWY, PF_ID }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, @@ -60,6 +69,8 @@ enum { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY }; enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; +enum { PF_GET_NONE, PF_GET_CLR_CNTR }; + /* * Note about PFTM_*: real indices into pf_rule.timeout[] come before * PFTM_MAX, special cases afterwards. See pf_state_expires(). @@ -71,7 +82,8 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE, PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL, PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE, - PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNTIL_PACKET }; + PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED, + PFTM_UNTIL_PACKET }; /* PFTM default values */ #define PFTM_TCP_FIRST_PACKET_VAL 120 /* First TCP packet */ @@ -94,17 +106,22 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, #define PFTM_TS_DIFF_VAL 30 /* Allowed TS diff */ enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; -enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, PF_LIMIT_MAX }; +enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, + PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; #define PF_POOL_IDMASK 0x0f enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, - PF_ADDR_TABLE, PF_ADDR_RTLABEL }; + PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED }; #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_WSCALE_FLAG 0x80 #define PF_WSCALE_MASK 0x0f +#define PF_LOG 0x01 +#define PF_LOG_ALL 0x02 +#define PF_LOG_SOCKET_LOOKUP 0x04 + struct pf_addr { union { struct in_addr v4; @@ -152,18 +169,19 @@ struct pf_addr_wrap { #ifdef _KERNEL struct pfi_dynaddr { - struct pf_addr pfid_addr4; - struct pf_addr pfid_mask4; - struct pf_addr pfid_addr6; - struct pf_addr pfid_mask6; - struct pfr_ktable *pfid_kt; - struct pfi_kif *pfid_kif; - void *pfid_hook_cookie; - int pfid_net; /* optional mask, or 128 */ - int pfid_acnt4; /* address count, IPv4 */ - int pfid_acnt6; /* address count, IPv6 */ - sa_family_t pfid_af; /* rule address family */ - u_int8_t pfid_iflags; /* PFI_AFLAG_* */ + TAILQ_ENTRY(pfi_dynaddr) entry; + struct pf_addr pfid_addr4; + struct pf_addr pfid_mask4; + struct pf_addr pfid_addr6; + struct pf_addr pfid_mask6; + struct pfr_ktable *pfid_kt; + struct pfi_kif *pfid_kif; + void *pfid_hook_cookie; + int pfid_net; /* mask or 128 */ + int pfid_acnt4; /* address count IPv4 */ + int pfid_acnt6; /* address count IPv6 */ + sa_family_t pfid_af; /* rule af */ + u_int8_t pfid_iflags; /* PFI_AFLAG_* */ }; /* @@ -299,23 +317,26 @@ struct pfi_dynaddr { #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ -#define PF_MISMATCHAW(aw, x, af, neg) \ - ( \ - (((aw)->type == PF_ADDR_NOROUTE && \ - pf_routable((x), (af))) || \ - ((aw)->type == PF_ADDR_RTLABEL && \ - !pf_rtlabel_match((x), (af), (aw))) || \ - ((aw)->type == PF_ADDR_TABLE && \ - !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ - ((aw)->type == PF_ADDR_DYNIFTL && \ - !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ - ((aw)->type == PF_ADDR_ADDRMASK && \ - !PF_AZERO(&(aw)->v.a.mask, (af)) && \ - !PF_MATCHA(0, &(aw)->v.a.addr, \ - &(aw)->v.a.mask, (x), (af)))) != \ - (neg) \ +#define PF_MISMATCHAW(aw, x, af, neg, ifp) \ + ( \ + (((aw)->type == PF_ADDR_NOROUTE && \ + pf_routable((x), (af), NULL)) || \ + (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ + pf_routable((x), (af), (ifp))) || \ + ((aw)->type == PF_ADDR_RTLABEL && \ + !pf_rtlabel_match((x), (af), (aw))) || \ + ((aw)->type == PF_ADDR_TABLE && \ + !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ + ((aw)->type == PF_ADDR_DYNIFTL && \ + !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + ((aw)->type == PF_ADDR_ADDRMASK && \ + !PF_AZERO(&(aw)->v.a.mask, (af)) && \ + !PF_MATCHA(0, &(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af))))) != \ + (neg) \ ) + struct pf_rule_uid { uid_t uid[2]; u_int8_t op; @@ -433,6 +454,7 @@ struct pf_os_fingerprint { #define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */ #define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */ #define PF_OSFP_TS0 0x2000 /* Zero timestamp */ +#define PF_OSFP_INET6 0x4000 /* IPv6 */ u_int8_t fp_optcnt; /* TCP option count */ u_int8_t fp_wscale; /* TCP window scaling */ u_int8_t fp_ttl; /* IPv4 TTL */ @@ -488,11 +510,11 @@ struct pf_rule { union pf_rule_ptr skip[PF_SKIP_COUNT]; #define PF_RULE_LABEL_SIZE 64 char label[PF_RULE_LABEL_SIZE]; -#define PF_QNAME_SIZE 16 +#define PF_QNAME_SIZE 64 char ifname[IFNAMSIZ]; char qname[PF_QNAME_SIZE]; char pqname[PF_QNAME_SIZE]; -#define PF_TAG_NAME_SIZE 16 +#define PF_TAG_NAME_SIZE 64 char tagname[PF_TAG_NAME_SIZE]; char match_tagname[PF_TAG_NAME_SIZE]; @@ -502,8 +524,8 @@ struct pf_rule { struct pf_pool rpool; u_int64_t evaluations; - u_int64_t packets; - u_int64_t bytes; + u_int64_t packets[2]; + u_int64_t bytes[2]; struct pfi_kif *kif; struct pf_anchor *anchor; @@ -511,6 +533,7 @@ struct pf_rule { pf_osfp_t os_fingerprint; + int rtableid; u_int32_t timeout[PFTM_MAX]; u_int32_t states; u_int32_t max_states; @@ -527,6 +550,8 @@ struct pf_rule { u_int32_t rt_listid; u_int32_t nr; u_int32_t prob; + uid_t cuid; + pid_t cpid; u_int16_t return_icmp; u_int16_t return_icmp6; @@ -541,6 +566,7 @@ struct pf_rule { u_int8_t action; u_int8_t direction; u_int8_t log; + u_int8_t logif; u_int8_t quick; u_int8_t ifnot; u_int8_t match_tag_not; @@ -588,9 +614,10 @@ struct pf_rule { /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ -#define PFRULE_GRBOUND 0x00020000 /* group-bound */ #define PFSTATE_HIWAT 10000 /* default state table size */ +#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ +#define PFSTATE_ADAPT_END 12000 /* default adaptive timeout end */ struct pf_threshold { @@ -608,8 +635,8 @@ struct pf_src_node { struct pf_addr raddr; union pf_rule_ptr rule; struct pfi_kif *kif; - u_int32_t bytes; - u_int32_t packets; + u_int64_t bytes[2]; + u_int64_t packets[2]; u_int32_t states; u_int32_t conn; struct pf_threshold conn_rate; @@ -651,26 +678,53 @@ struct pf_state_peer { u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ u_int16_t mss; /* Maximum segment size option */ + u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ struct pf_state_scrub *scrub; /* state is scrubbed */ + u_int8_t pad[3]; }; TAILQ_HEAD(pf_state_queue, pf_state); +/* keep synced with struct pf_state, used in RB_FIND */ +struct pf_state_cmp { + u_int64_t id; + u_int32_t creatorid; + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t pad; +}; + struct pf_state { u_int64_t id; + u_int32_t creatorid; + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t pad; + u_int8_t log; + u_int8_t allow_opts; + u_int8_t timeout; + u_int8_t sync_flags; +#define PFSTATE_NOSYNC 0x01 +#define PFSTATE_FROMSYNC 0x02 +#define PFSTATE_STALE 0x04 union { struct { RB_ENTRY(pf_state) entry_lan_ext; RB_ENTRY(pf_state) entry_ext_gwy; RB_ENTRY(pf_state) entry_id; - TAILQ_ENTRY(pf_state) entry_updates; + TAILQ_ENTRY(pf_state) entry_list; struct pfi_kif *kif; } s; char ifname[IFNAMSIZ]; } u; - struct pf_state_host lan; - struct pf_state_host gwy; - struct pf_state_host ext; struct pf_state_peer src; struct pf_state_peer dst; union pf_rule_ptr rule; @@ -680,24 +734,12 @@ struct pf_state { struct pfi_kif *rt_kif; struct pf_src_node *src_node; struct pf_src_node *nat_src_node; + u_int64_t packets[2]; + u_int64_t bytes[2]; u_int32_t creation; u_int32_t expire; u_int32_t pfsync_time; - u_int32_t packets[2]; - u_int32_t bytes[2]; - u_int32_t creatorid; u_int16_t tag; - sa_family_t af; - u_int8_t proto; - u_int8_t direction; - u_int8_t log; - u_int8_t allow_opts; - u_int8_t timeout; - u_int8_t sync_flags; -#define PFSTATE_NOSYNC 0x01 -#define PFSTATE_FROMSYNC 0x02 -#define PFSTATE_STALE 0x04 - u_int8_t pad; }; TAILQ_HEAD(pf_rulequeue, pf_rule); @@ -709,6 +751,8 @@ struct pf_ruleset { struct pf_rulequeue queues[2]; struct { struct pf_rulequeue *ptr; + struct pf_rule **ptr_array; + u_int32_t rcount; u_int32_t ticket; int open; } active, inactive; @@ -730,6 +774,7 @@ struct pf_anchor { char path[MAXPATHLEN]; struct pf_ruleset ruleset; int refcnt; /* anchor rules */ + int match; }; RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); @@ -846,55 +891,47 @@ RB_HEAD(pf_state_tree_ext_gwy, pf_state); RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state, u.s.entry_ext_gwy, pf_state_compare_ext_gwy); -struct pfi_if { - char pfif_name[IFNAMSIZ]; - u_int64_t pfif_packets[2][2][2]; - u_int64_t pfif_bytes[2][2][2]; - u_int64_t pfif_addcnt; - u_int64_t pfif_delcnt; - long pfif_tzero; - int pfif_states; - int pfif_rules; - int pfif_flags; -}; - -TAILQ_HEAD(pfi_grouphead, pfi_kif); TAILQ_HEAD(pfi_statehead, pfi_kif); RB_HEAD(pfi_ifhead, pfi_kif); + +/* keep synced with pfi_kif, used in RB_FIND */ +struct pfi_kif_cmp { + char pfik_name[IFNAMSIZ]; +}; + struct pfi_kif { - struct pfi_if pfik_if; + char pfik_name[IFNAMSIZ]; RB_ENTRY(pfi_kif) pfik_tree; + u_int64_t pfik_packets[2][2][2]; + u_int64_t pfik_bytes[2][2][2]; + u_int32_t pfik_tzero; + int pfik_flags; struct pf_state_tree_lan_ext pfik_lan_ext; struct pf_state_tree_ext_gwy pfik_ext_gwy; - struct pfi_grouphead pfik_grouphead; - TAILQ_ENTRY(pfi_kif) pfik_instances; TAILQ_ENTRY(pfi_kif) pfik_w_states; - struct hook_desc_head *pfik_ah_head; void *pfik_ah_cookie; - struct pfi_kif *pfik_parent; struct ifnet *pfik_ifp; + struct ifg_group *pfik_group; int pfik_states; int pfik_rules; + TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; +}; + +enum pfi_kif_refs { + PFI_KIF_REF_NONE, + PFI_KIF_REF_STATE, + PFI_KIF_REF_RULE }; -#define pfik_name pfik_if.pfif_name -#define pfik_packets pfik_if.pfif_packets -#define pfik_bytes pfik_if.pfif_bytes -#define pfik_tzero pfik_if.pfif_tzero -#define pfik_flags pfik_if.pfif_flags -#define pfik_addcnt pfik_if.pfif_addcnt -#define pfik_delcnt pfik_if.pfif_delcnt -#define pfik_states pfik_if.pfif_states -#define pfik_rules pfik_if.pfif_rules - -#define PFI_IFLAG_GROUP 0x0001 /* group of interfaces */ -#define PFI_IFLAG_INSTANCE 0x0002 /* single instance */ -#define PFI_IFLAG_CLONABLE 0x0010 /* clonable group */ -#define PFI_IFLAG_DYNAMIC 0x0020 /* dynamic group */ -#define PFI_IFLAG_ATTACHED 0x0040 /* interface attached */ + #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ -#define PFI_IFLAG_SETABLE_MASK 0x0100 /* setable via DIOC{SET,CLR}IFFLAG */ struct pf_pdesc { + struct { + int done; + uid_t uid; + gid_t gid; + pid_t pid; + } lookup; u_int64_t tot_len; /* Make Mickey money */ union { struct tcphdr *tcp; @@ -912,6 +949,7 @@ struct pf_pdesc { struct pf_addr *dst; struct ether_header *eh; + struct pf_mtag *pf_mtag; u_int16_t *ip_sum; u_int32_t p_len; /* total length of payload */ u_int16_t flags; /* Let SCRUB trigger behavior in @@ -1052,6 +1090,7 @@ struct pf_status { u_int32_t debug; u_int32_t hostid; char ifname[IFNAMSIZ]; + u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; }; struct cbq_opts { @@ -1114,6 +1153,20 @@ struct pf_altq { u_int32_t qid; /* return value */ }; +#define PF_TAG_GENERATED 0x01 +#define PF_TAG_FRAGCACHE 0x02 +#define PF_TAG_TRANSLATE_LOCALHOST 0x04 + +struct pf_mtag { + void *hdr; /* saved hdr pos in mbuf, for ECN */ + u_int rtableid; /* alternate routing table id */ + u_int32_t qid; /* queue id */ + u_int16_t tag; /* tag id */ + u_int8_t flags; + u_int8_t routed; + sa_family_t af; /* for ECN */ +}; + struct pf_tag { u_int16_t tag; /* tag id */ }; @@ -1130,6 +1183,10 @@ struct pf_tagname { #define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ #define PFFRAG_FRCACHE_HIWAT 10000 /* Number of fragment descriptors */ +#define PFR_KTABLE_HIWAT 1000 /* Number of tables */ +#define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ +#define PFR_KENTRY_HIWAT_SMALL 100000 /* Number of table entries (tiny hosts) */ + /* * ioctl parameter structures */ @@ -1175,6 +1232,13 @@ struct pfioc_state { struct pf_state state; }; +struct pfioc_src_node_kill { + /* XXX returns the number of src nodes killed in psnk_af */ + sa_family_t psnk_af; + struct pf_rule_addr psnk_src; + struct pf_rule_addr psnk_dst; +}; + struct pfioc_state_kill { /* XXX returns the number of states killed in psk_af */ sa_family_t psk_af; @@ -1282,11 +1346,6 @@ struct pfioc_table { #define pfrio_setflag pfrio_size2 #define pfrio_clrflag pfrio_nadd - -#define PFI_FLAG_GROUP 0x0001 /* gets groups of interfaces */ -#define PFI_FLAG_INSTANCE 0x0002 /* gets single interfaces */ -#define PFI_FLAG_ALLMASK 0x0003 - struct pfioc_iface { char pfiio_name[IFNAMSIZ]; void *pfiio_buffer; @@ -1365,9 +1424,9 @@ struct pfioc_iface { #define DIOCCLRSRCNODES _IO('D', 85) #define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) #define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) -#define DIOCICLRISTATS _IOWR('D', 88, struct pfioc_iface) #define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) #define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) +#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) #ifdef _KERNEL RB_HEAD(pf_src_tree, pf_src_node); @@ -1378,16 +1437,13 @@ RB_HEAD(pf_state_tree_id, pf_state); RB_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); extern struct pf_state_tree_id tree_id; -extern struct pf_state_queue state_updates; +extern struct pf_state_queue state_list; -extern struct pf_anchor_global pf_anchors; -extern struct pf_ruleset pf_main_ruleset; TAILQ_HEAD(pf_poolqueue, pf_pool); extern struct pf_poolqueue pf_pools[2]; TAILQ_HEAD(pf_altqqueue, pf_altq); extern struct pf_altqqueue pf_altqs[2]; extern struct pf_palist pf_pabuf; -extern struct pfi_kif **pfi_index2kif; extern u_int32_t ticket_altqs_active; extern u_int32_t ticket_altqs_inactive; @@ -1405,26 +1461,22 @@ extern void pf_calc_skip_steps(struct pf_rulequeue *); extern struct pool pf_src_tree_pl, pf_rule_pl; extern struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; extern struct pool pf_state_scrub_pl; -extern void pf_purge_timeout(void *); -extern void pf_purge_expired_src_nodes(void); -extern void pf_purge_expired_states(void); -extern void pf_purge_expired_state(struct pf_state *); +extern void pf_purge_thread(void *); +extern void pf_purge_expired_src_nodes(int); +extern void pf_purge_expired_states(u_int32_t); +extern void pf_unlink_state(struct pf_state *); +extern void pf_free_state(struct pf_state *); extern int pf_insert_state(struct pfi_kif *, struct pf_state *); extern int pf_insert_src_node(struct pf_src_node **, struct pf_rule *, struct pf_addr *, sa_family_t); void pf_src_tree_remove_state(struct pf_state *); -extern struct pf_state *pf_find_state_byid(struct pf_state *); -extern struct pf_state *pf_find_state_all(struct pf_state *key, +extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *); +extern struct pf_state *pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more); extern void pf_print_state(struct pf_state *); extern void pf_print_flags(u_int8_t); -extern struct pf_anchor *pf_find_anchor(const char *); -extern struct pf_ruleset *pf_find_ruleset(const char *); -extern struct pf_ruleset *pf_find_or_create_ruleset(const char *); -extern void pf_remove_if_empty_ruleset( - struct pf_ruleset *); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); @@ -1450,7 +1502,8 @@ void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); int pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, - u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *); + u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, + struct pf_pdesc *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); @@ -1474,8 +1527,9 @@ int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, u_int32_t pf_state_expires(const struct pf_state *); void pf_purge_expired_fragments(void); -int pf_routable(struct pf_addr *addr, sa_family_t af); +int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *); int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *); +int pf_socket_lookup(int, struct pf_pdesc *); void pfr_initialize(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, @@ -1500,7 +1554,7 @@ int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *, - int *, int *, int *, int); + int *, int *, int *, int, u_int32_t); int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int); int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int); int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *, @@ -1513,41 +1567,44 @@ int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); +extern struct pfi_statehead pfi_statehead; +extern struct pfi_kif *pfi_all; + void pfi_initialize(void); -void pfi_attach_clone(struct if_clone *); +struct pfi_kif *pfi_kif_get(const char *); +void pfi_kif_ref(struct pfi_kif *, enum pfi_kif_refs); +void pfi_kif_unref(struct pfi_kif *, enum pfi_kif_refs); +int pfi_kif_match(struct pfi_kif *, struct pfi_kif *); void pfi_attach_ifnet(struct ifnet *); void pfi_detach_ifnet(struct ifnet *); -struct pfi_kif *pfi_lookup_create(const char *); -struct pfi_kif *pfi_lookup_if(const char *); -int pfi_maybe_destroy(struct pfi_kif *); -struct pfi_kif *pfi_attach_rule(const char *); -void pfi_detach_rule(struct pfi_kif *); -void pfi_attach_state(struct pfi_kif *); -void pfi_detach_state(struct pfi_kif *); +void pfi_attach_ifgroup(struct ifg_group *); +void pfi_detach_ifgroup(struct ifg_group *); +void pfi_group_change(const char *); +int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, + sa_family_t); int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); -void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_dynaddr_remove(struct pf_addr_wrap *); +void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_fill_oldstatus(struct pf_status *); -int pfi_clr_istats(const char *, int *, int); -int pfi_get_ifaces(const char *, struct pfi_if *, int *, int); +int pfi_clr_istats(const char *); +int pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_set_flags(const char *, int); int pfi_clear_flags(const char *, int); -int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, - sa_family_t); - -extern struct pfi_statehead pfi_statehead; -u_int16_t pf_tagname2tag(char *); -void pf_tag2tagname(u_int16_t, char *); -void pf_tag_ref(u_int16_t); -void pf_tag_unref(u_int16_t); -int pf_tag_packet(struct mbuf *, struct pf_tag *, int); -u_int32_t pf_qname2qid(char *); -void pf_qid2qname(u_int32_t, char *); -void pf_qid_unref(u_int32_t); +u_int16_t pf_tagname2tag(char *); +void pf_tag2tagname(u_int16_t, char *); +void pf_tag_ref(u_int16_t); +void pf_tag_unref(u_int16_t); +int pf_tag_packet(struct mbuf *, struct pf_mtag *, int, int); +u_int32_t pf_qname2qid(char *); +void pf_qid2qname(u_int32_t, char *); +void pf_qid_unref(u_int32_t); +struct pf_mtag *pf_find_mtag(struct mbuf *); +struct pf_mtag *pf_get_mtag(struct mbuf *); extern struct pf_status pf_status; extern struct pool pf_frent_pl, pf_frag_pl; +extern struct rwlock pf_consistency_lock; struct pf_pool_limit { void *pp; @@ -1557,6 +1614,31 @@ extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; #endif /* _KERNEL */ +extern struct pf_anchor_global pf_anchors; +extern struct pf_anchor pf_main_anchor; +#define pf_main_ruleset pf_main_anchor.ruleset + +/* these ruleset functions can be linked into userland programs (pfctl) */ +int pf_get_ruleset_number(u_int8_t); +void pf_init_ruleset(struct pf_ruleset *); +int pf_anchor_setup(struct pf_rule *, + const struct pf_ruleset *, const char *); +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); +void pf_remove_if_empty_ruleset(struct pf_ruleset *); +struct pf_anchor *pf_find_anchor(const char *); +struct pf_ruleset *pf_find_ruleset(const char *); +struct pf_ruleset *pf_find_or_create_ruleset(const char *); +void pf_rs_initialize(void); + +#ifdef _KERNEL +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); + +#endif /* _KERNEL */ + /* The fingerprint functions can be linked into userland programs (tcpdump) */ int pf_osfp_add(struct pf_osfp_ioctl *); #ifdef _KERNEL @@ -1565,7 +1647,8 @@ struct pf_osfp_enlist * const struct tcphdr *); #endif /* _KERNEL */ struct pf_osfp_enlist * - pf_osfp_fingerprint_hdr(const struct ip *, const struct tcphdr *); + pf_osfp_fingerprint_hdr(const struct ip *, const struct ip6_hdr *, + const struct tcphdr *); void pf_osfp_flush(void); int pf_osfp_get(struct pf_osfp_ioctl *); void pf_osfp_initialize(void); -- cgit v1.1