diff options
Diffstat (limited to 'sys/netpfil')
-rw-r--r-- | sys/netpfil/ipfw/ip_dn_io.c | 1 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_dummynet.c | 3 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw2.c | 278 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_dynamic.c | 23 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_nat.c | 85 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_pfil.c | 22 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_private.h | 57 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_sockopt.c | 330 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_table.c | 240 | ||||
-rw-r--r-- | sys/netpfil/pf/if_pflog.c | 6 | ||||
-rw-r--r-- | sys/netpfil/pf/if_pfsync.c | 17 | ||||
-rw-r--r-- | sys/netpfil/pf/pf.c | 922 | ||||
-rw-r--r-- | sys/netpfil/pf/pf.h | 7 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_altq.h | 22 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_ioctl.c | 84 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_mtag.h | 1 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_norm.c | 1 | ||||
-rw-r--r-- | sys/netpfil/pf/pf_ruleset.c | 1 |
18 files changed, 1790 insertions, 310 deletions
diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c index fb75198..a67cf0a 100644 --- a/sys/netpfil/ipfw/ip_dn_io.c +++ b/sys/netpfil/ipfw/ip_dn_io.c @@ -651,6 +651,7 @@ dummynet_send(struct mbuf *m) * to carry reinject info. */ dst = pkt->dn_dir; + pkt->rule.info |= IPFW_IS_DUMMYNET; ifp = pkt->ifp; tag->m_tag_cookie = MTAG_IPFW_RULE; tag->m_tag_id = 0; diff --git a/sys/netpfil/ipfw/ip_dummynet.c b/sys/netpfil/ipfw/ip_dummynet.c index 3a12120..57216be 100644 --- a/sys/netpfil/ipfw/ip_dummynet.c +++ b/sys/netpfil/ipfw/ip_dummynet.c @@ -127,7 +127,7 @@ ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) op = "Clamp"; } else return *v; - if (op && msg) + if (op && msg && bootverbose) printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); return *v; } @@ -2297,7 +2297,6 @@ static moduledata_t dummynet_mod = { #define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN #define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); -MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); MODULE_VERSION(dummynet, 3); /* diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index 1a5b699..c07546b 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -140,8 +140,7 @@ VNET_DEFINE(int, fw_verbose); VNET_DEFINE(u_int64_t, norule_counter); VNET_DEFINE(int, verbose_limit); -/* layer3_chain contains the list of rules for layer 3 */ -VNET_DEFINE(struct ip_fw_chain, layer3_chain); +VNET_DEFINE(struct ip_fw_contextes, ip_fw_contexts); VNET_DEFINE(int, ipfw_nat_ready) = 0; @@ -182,9 +181,6 @@ SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, "Make the default rule accept all packets."); TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept); TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count, - CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0, - "Number of static rules"); #ifdef INET6 SYSCTL_DECL(_net_inet6_ip6); @@ -358,8 +354,8 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uin /* Check by name or by IP address */ if (cmd->name[0] != '\0') { /* match by name */ if (cmd->name[0] == '\1') /* use tablearg to match */ - return ipfw_lookup_table_extended(chain, cmd->p.glob, - ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE); + return (ipfw_lookup_table_extended(chain, cmd->p.glob, + ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE, NULL) != NULL); /* Check name */ if (cmd->p.glob) { if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) @@ -904,6 +900,9 @@ ipfw_chk(struct ip_fw_args *args) */ struct ifnet *oif = args->oif; + if (V_ip_fw_contexts.chain[oif->if_ispare[0]] == NULL) + return (IP_FW_PASS); + int f_pos = 0; /* index of current rule in the array */ int retval = 0; @@ -954,7 +953,14 @@ ipfw_chk(struct ip_fw_args *args) */ int dyn_dir = MATCH_UNKNOWN; ipfw_dyn_rule *q = NULL; - struct ip_fw_chain *chain = &V_layer3_chain; + void *tblent = NULL, *tblent2 = NULL; + + /* XXX: WARNING - The chain is accessed unlocked here. + * There is a potential race here with context handling. + * The chain pointer will get destroyed and a NULL + * pointer dereference can happen! + */ + struct ip_fw_chain *chain = V_ip_fw_contexts.chain[oif->if_ispare[0]]; /* * We store in ulp a pointer to the upper layer protocol header. @@ -1288,6 +1294,8 @@ do { \ continue; skip_or = 0; + tblent = NULL; + tblent2 = NULL; for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; l -= cmdlen, cmd += cmdlen) { int match; @@ -1402,7 +1410,7 @@ do { \ break; case O_IN: /* "out" is "not in" */ - match = (oif == NULL); + match = (args->dir == DIR_IN); break; case O_LAYER2: @@ -1438,11 +1446,18 @@ do { \ case O_IP_SRC_LOOKUP: case O_IP_DST_LOOKUP: if (is_ipv4) { + struct ether_addr *ea = NULL; + uint32_t key = (cmd->opcode == O_IP_DST_LOOKUP) ? dst_ip.s_addr : src_ip.s_addr; uint32_t v = 0; + if (args->eh) { + ea = (struct ether_addr*)((cmd->opcode == O_IP_DST_LOOKUP) ? + args->eh->ether_dhost : + args->eh->ether_shost); + } if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { /* generic lookup. The key must be * in 32bit big-endian format. @@ -1484,22 +1499,37 @@ do { \ } else break; } - match = ipfw_lookup_table(chain, - cmd->arg1, key, &v); - if (!match) + tblent2 = ipfw_lookup_table(chain, + cmd->arg1, key, &v, ea); + if (tblent2 == NULL) { + match = 0; break; + } else + match = 1; if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) match = ((ipfw_insn_u32 *)cmd)->d[0] == v; - else + if (match) tablearg = v; } else if (is_ipv6) { + struct ether_addr *ea = NULL; uint32_t v = 0; + + if (args->eh) { + ea = (struct ether_addr*)((cmd->opcode == O_IP_DST_LOOKUP) ? + args->eh->ether_dhost : + args->eh->ether_shost); + } void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ? &args->f_id.dst_ip6: &args->f_id.src_ip6; - match = ipfw_lookup_table_extended(chain, + tblent = ipfw_lookup_table_extended(chain, cmd->arg1, pkey, &v, - IPFW_TABLE_CIDR); + IPFW_TABLE_CIDR, ea); + if (tblent == NULL) { + match = 0; + break; + } else + match = 1; if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) match = ((ipfw_insn_u32 *)cmd)->d[0] == v; if (match) @@ -2314,8 +2344,7 @@ do { \ break; case O_FORWARD_IP: - if (args->eh) /* not valid on layer2 pkts */ - break; + if (!args->eh) {/* not valid on layer2 pkts */ if (q == NULL || q->rule != f || dyn_dir == MATCH_FORWARD) { struct sockaddr_in *sa; @@ -2330,6 +2359,48 @@ do { \ args->next_hop = sa; } } + } else if (args->eh) { + struct m_tag *fwd_tag; + struct sockaddr_in *sa; + u_short sum; + + /* + * Checksum correct? (from ip_fastfwd.c) + */ + if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) + sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); + else { + if (hlen == sizeof(struct ip)) + sum = in_cksum_hdr(ip); + else + sum = in_cksum(m, hlen); + } + if (sum) { + IPSTAT_INC(ips_badsum); + retval = IP_FW_DENY; + break; + } + + /* + * Remember that we have checked the IP header and found it valid. + */ + m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); + + sa = &(((ipfw_insn_sa *)cmd)->sa); + fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, + sizeof(struct sockaddr_in), M_NOWAIT); + if (fwd_tag == NULL) + retval = IP_FW_DENY; + else { + bcopy(sa, (fwd_tag+1), sizeof(struct sockaddr_in)); + m_tag_prepend(m, fwd_tag); + + if (in_localip(sa->sin_addr)) + m->m_flags |= M_FASTFWD_OURS; + m->m_flags |= M_IP_NEXTHOP; + } + } + retval = IP_FW_PASS; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ @@ -2337,8 +2408,7 @@ do { \ #ifdef INET6 case O_FORWARD_IP6: - if (args->eh) /* not valid on layer2 pkts */ - break; + if (!args->eh) {/* not valid on layer2 pkts */ if (q == NULL || q->rule != f || dyn_dir == MATCH_FORWARD) { struct sockaddr_in6 *sin6; @@ -2346,6 +2416,24 @@ do { \ sin6 = &(((ipfw_insn_sa6 *)cmd)->sa); args->next_hop6 = sin6; } + } else if (args->eh) { + struct m_tag *fwd_tag; + struct sockaddr_in6 *sin6; + + sin6 = &(((ipfw_insn_sa6 *)cmd)->sa); + fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, + sizeof(struct sockaddr_in6), M_NOWAIT); + if (fwd_tag == NULL) + retval = IP_FW_DENY; + else { + bcopy(sin6, (fwd_tag+1), sizeof(struct sockaddr_in6)); + m_tag_prepend(m, fwd_tag); + + if (in6_localip(&sin6->sin6_addr)) + m->m_flags |= M_FASTFWD_OURS; + m->m_flags |= M_IP6_NEXTHOP; + } + } retval = IP_FW_PASS; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ @@ -2417,7 +2505,7 @@ do { \ set_match(args, f_pos, chain); /* Check if this is 'global' nat rule */ if (cmd->arg1 == 0) { - retval = ipfw_nat_ptr(args, NULL, m); + retval = ipfw_nat_ptr(args, NULL, m, chain); break; } t = ((ipfw_insn_nat *)cmd)->nat; @@ -2432,7 +2520,7 @@ do { \ if (cmd->arg1 != IP_FW_TABLEARG) ((ipfw_insn_nat *)cmd)->nat = t; } - retval = ipfw_nat_ptr(args, t, m); + retval = ipfw_nat_ptr(args, t, m, chain); break; case O_REASS: { @@ -2502,6 +2590,10 @@ do { \ struct ip_fw *rule = chain->map[f_pos]; /* Update statistics */ IPFW_INC_RULE_COUNTER(rule, pktlen); + if (tblent != NULL) + ipfw_count_table_xentry_stats(tblent, pktlen); + if (tblent2 != NULL) + ipfw_count_table_entry_stats(tblent2, pktlen); } else { retval = IP_FW_DENY; printf("ipfw: ouch!, skip past end of rules, denying packet\n"); @@ -2536,7 +2628,9 @@ sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS) if ((error != 0) || (req->newptr == NULL)) return (error); - return (ipfw_resize_tables(&V_layer3_chain, ntables)); + for (int i = 1; i < IP_FW_MAXCTX; i++) + error += ipfw_resize_tables(V_ip_fw_contexts.chain[i], ntables); + return (error); } #endif /* @@ -2614,11 +2708,6 @@ ipfw_destroy(void) static int vnet_ipfw_init(const void *unused) { - int error; - struct ip_fw *rule = NULL; - struct ip_fw_chain *chain; - - chain = &V_layer3_chain; /* First set up some values that are compile time options */ V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ @@ -2629,10 +2718,55 @@ vnet_ipfw_init(const void *unused) #ifdef IPFIREWALL_VERBOSE_LIMIT V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; #endif + + for (int i = 0; i < IP_FW_MAXCTX; i++) + V_ip_fw_contexts.chain[i] = NULL; + + IPFW_CTX_LOCK_INIT(); + + V_ip_fw_contexts.ifnet_arrival = EVENTHANDLER_REGISTER(ifnet_arrival_event, + ipfw_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); + + ipfw_dyn_init(); + + /* First set up some values that are compile time options */ + V_ipfw_vnet_ready = 1; /* Open for business */ + + /* + * Hook the sockopt handler and pfil hooks for ipv4 and ipv6. + * Even if the latter two fail we still keep the module alive + * because the sockopt and layer2 paths are still useful. + * ipfw[6]_hook return 0 on success, ENOENT on failure, + * so we can ignore the exact return value and just set a flag. + * + * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so + * changes in the underlying (per-vnet) variables trigger + * immediate hook()/unhook() calls. + * In layer2 we have the same behaviour, except that V_ether_ipfw + * is checked on each packet because there are no pfil hooks. + */ + V_ip_fw_ctl_ptr = ipfw_ctl; + return ipfw_attach_hooks(1); +} + +int +ipfw_context_init(int index) +{ + struct ip_fw_chain *chain; + struct ip_fw *rule = NULL; + + if (index >= IP_FW_MAXCTX) + return (-1); + + TAILQ_INIT(&V_ip_fw_contexts.iflist[index]); + + chain = V_ip_fw_contexts.chain[index]; + + IPFW_LOCK_INIT(chain); + #ifdef IPFIREWALL_NAT LIST_INIT(&chain->nat); #endif - /* insert the default rule and create the initial map */ chain->n_rules = 1; chain->static_len = sizeof(struct ip_fw); @@ -2642,13 +2776,7 @@ vnet_ipfw_init(const void *unused) /* Set initial number of tables */ V_fw_tables_max = default_fw_tables; - error = ipfw_init_tables(chain); - if (error) { - printf("ipfw2: setting up tables failed\n"); - free(chain->map, M_IPFW); - free(rule, M_IPFW); - return (ENOSPC); - } + ipfw_init_tables(chain); /* fill and insert the default rule */ rule->act_ofs = 0; @@ -2660,28 +2788,13 @@ vnet_ipfw_init(const void *unused) chain->default_rule = chain->map[0] = rule; chain->id = rule->id = 1; - IPFW_LOCK_INIT(chain); - ipfw_dyn_init(chain); - - /* First set up some values that are compile time options */ - V_ipfw_vnet_ready = 1; /* Open for business */ + /* + * This can potentially be done on first dynamic rule + * being added to chain. + */ + resize_dynamic_table(chain, V_curr_dyn_buckets); - /* - * Hook the sockopt handler and pfil hooks for ipv4 and ipv6. - * Even if the latter two fail we still keep the module alive - * because the sockopt and layer2 paths are still useful. - * ipfw[6]_hook return 0 on success, ENOENT on failure, - * so we can ignore the exact return value and just set a flag. - * - * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so - * changes in the underlying (per-vnet) variables trigger - * immediate hook()/unhook() calls. - * In layer2 we have the same behaviour, except that V_ether_ipfw - * is checked on each packet because there are no pfil hooks. - */ - V_ip_fw_ctl_ptr = ipfw_ctl; - error = ipfw_attach_hooks(1); - return (error); + return (0); } /* @@ -2690,11 +2803,9 @@ vnet_ipfw_init(const void *unused) static int vnet_ipfw_uninit(const void *unused) { - struct ip_fw *reap, *rule; - struct ip_fw_chain *chain = &V_layer3_chain; - int i; V_ipfw_vnet_ready = 0; /* tell new callers to go away */ + /* * disconnect from ipv4, ipv6, layer2 and sockopt. * Then grab, release and grab again the WLOCK so we make @@ -2702,12 +2813,51 @@ vnet_ipfw_uninit(const void *unused) */ (void)ipfw_attach_hooks(0 /* detach */); V_ip_fw_ctl_ptr = NULL; - IPFW_UH_WLOCK(chain); - IPFW_UH_WUNLOCK(chain); ipfw_dyn_uninit(0); /* run the callout_drain */ + IPFW_CTX_WLOCK(); + EVENTHANDLER_DEREGISTER(ifnet_arrival_event, V_ip_fw_contexts.ifnet_arrival); + for (int i = 0; i < IP_FW_MAXCTX; i++) { + ipfw_context_uninit(i); + } + IPFW_CTX_WUNLOCK(); + IPFW_CTX_LOCK_DESTROY(); + + ipfw_dyn_uninit(1); /* free the remaining parts */ + + return (0); +} + +int +ipfw_context_uninit(int index) +{ + struct ip_fw_chain *chain; + struct ip_fw_ctx_iflist *ifl; + struct ip_fw *reap, *rule; + struct ifnet *ifp; + int i; + + if (index >= IP_FW_MAXCTX) + return (-1); + + chain = V_ip_fw_contexts.chain[index]; + if (chain == NULL) + return (0); + + while (!TAILQ_EMPTY(&V_ip_fw_contexts.iflist[index])) { + ifl = TAILQ_FIRST(&V_ip_fw_contexts.iflist[index]); + TAILQ_REMOVE(&V_ip_fw_contexts.iflist[index], ifl, entry); + ifp = ifunit(ifl->ifname); + if (ifp != NULL) + ifp->if_ispare[0] = 0; + free(ifl, M_IPFW); + } + IPFW_UH_WLOCK(chain); + IPFW_UH_WUNLOCK(chain); + IPFW_UH_WLOCK(chain); + ipfw_destroy_tables(chain); reap = NULL; IPFW_WLOCK(chain); @@ -2723,8 +2873,10 @@ vnet_ipfw_uninit(const void *unused) if (reap != NULL) ipfw_reap_rules(reap); IPFW_LOCK_DESTROY(chain); - ipfw_dyn_uninit(1); /* free the remaining parts */ - return 0; + + free(chain, M_IPFW); + + return (0); } /* diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c index ad957e9..2f802fc 100644 --- a/sys/netpfil/ipfw/ip_fw_dynamic.c +++ b/sys/netpfil/ipfw/ip_fw_dynamic.c @@ -121,11 +121,9 @@ struct ipfw_dyn_bucket { */ static VNET_DEFINE(struct ipfw_dyn_bucket *, ipfw_dyn_v); static VNET_DEFINE(u_int32_t, dyn_buckets_max); -static VNET_DEFINE(u_int32_t, curr_dyn_buckets); static VNET_DEFINE(struct callout, ipfw_timeout); #define V_ipfw_dyn_v VNET(ipfw_dyn_v) #define V_dyn_buckets_max VNET(dyn_buckets_max) -#define V_curr_dyn_buckets VNET(curr_dyn_buckets) #define V_ipfw_timeout VNET(ipfw_timeout) static VNET_DEFINE(uma_zone_t, ipfw_dyn_rule_zone); @@ -181,6 +179,8 @@ static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ static int last_log; /* Log ratelimiting */ +VNET_DEFINE(u_int32_t, curr_dyn_buckets); + static void ipfw_dyn_tick(void *vnetx); static void check_dyn_rules(struct ip_fw_chain *, struct ip_fw *, int, int, int); @@ -472,7 +472,7 @@ ipfw_dyn_unlock(ipfw_dyn_rule *q) IPFW_BUCK_UNLOCK(q->bucket); } -static int +int resize_dynamic_table(struct ip_fw_chain *chain, int nbuckets) { int i, k, nbuckets_old; @@ -975,7 +975,6 @@ ipfw_dyn_send_ka(struct mbuf **mtailp, ipfw_dyn_rule *q) static void ipfw_dyn_tick(void * vnetx) { - struct ip_fw_chain *chain; int check_ka = 0; #ifdef VIMAGE struct vnet *vp = vnetx; @@ -983,7 +982,6 @@ ipfw_dyn_tick(void * vnetx) CURVNET_SET(vp); - chain = &V_layer3_chain; /* Run keepalive checks every keepalive_period iff ka is enabled */ if ((V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) && @@ -992,7 +990,12 @@ ipfw_dyn_tick(void * vnetx) check_ka = 1; } - check_dyn_rules(chain, NULL, RESVD_SET, check_ka, 1); + IPFW_CTX_RLOCK(); + for (int i = 1; i < IP_FW_MAXCTX; i++) { + if (V_ip_fw_contexts.chain[i] != NULL) + check_dyn_rules(V_ip_fw_contexts.chain[i], NULL, RESVD_SET, check_ka, 1); + } + IPFW_CTX_RUNLOCK(); callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, vnetx, 0); @@ -1308,7 +1311,7 @@ ipfw_expire_dyn_rules(struct ip_fw_chain *chain, struct ip_fw *rule, int set) } void -ipfw_dyn_init(struct ip_fw_chain *chain) +ipfw_dyn_init() { V_ipfw_dyn_v = NULL; @@ -1337,12 +1340,6 @@ ipfw_dyn_init(struct ip_fw_chain *chain) uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max); callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); - - /* - * This can potentially be done on first dynamic rule - * being added to chain. - */ - resize_dynamic_table(chain, V_curr_dyn_buckets); } void diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c index 0fb4534..627603d 100644 --- a/sys/netpfil/ipfw/ip_fw_nat.c +++ b/sys/netpfil/ipfw/ip_fw_nat.c @@ -64,26 +64,33 @@ ifaddr_change(void *arg __unused, struct ifnet *ifp) KASSERT(curvnet == ifp->if_vnet, ("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet)); - chain = &V_layer3_chain; - IPFW_WLOCK(chain); - /* Check every nat entry... */ - LIST_FOREACH(ptr, &chain->nat, _next) { - /* ...using nic 'ifp->if_xname' as dynamic alias address. */ - if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0) + + IPFW_CTX_RLOCK(); + for (int i = 1; i < IP_FW_MAXCTX; i++) { + chain = V_ip_fw_contexts.chain[i]; + if (chain == NULL) continue; - if_addr_rlock(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr == NULL) - continue; - if (ifa->ifa_addr->sa_family != AF_INET) + IPFW_WLOCK(chain); + /* Check every nat entry... */ + LIST_FOREACH(ptr, &chain->nat, _next) { + /* ...using nic 'ifp->if_xname' as dynamic alias address. */ + if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0) continue; - ptr->ip = ((struct sockaddr_in *) - (ifa->ifa_addr))->sin_addr; - LibAliasSetAddress(ptr->lib, ptr->ip); + if_addr_rlock(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr == NULL) + continue; + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + ptr->ip = ((struct sockaddr_in *) + (ifa->ifa_addr))->sin_addr; + LibAliasSetAddress(ptr->lib, ptr->ip); + } + if_addr_runlock(ifp); } - if_addr_runlock(ifp); + IPFW_WUNLOCK(chain); } - IPFW_WUNLOCK(chain); + IPFW_CTX_RUNLOCK(); } /* @@ -206,18 +213,18 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) /* * ipfw_nat - perform mbuf header translation. * - * Note V_layer3_chain has to be locked while calling ipfw_nat() in + * Note *chain has to be locked while calling ipfw_nat() in * 'global' operation mode (t == NULL). * */ static int -ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) +ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m, + struct ip_fw_chain *chain) { struct mbuf *mcl; struct ip *ip; /* XXX - libalias duct tape */ int ldt, retval, found; - struct ip_fw_chain *chain; char *c; ldt = 0; @@ -276,7 +283,6 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) } found = 0; - chain = &V_layer3_chain; IPFW_RLOCK_ASSERT(chain); /* Check every nat entry... */ LIST_FOREACH(t, &chain->nat, _next) { @@ -391,11 +397,10 @@ lookup_nat(struct nat_list *l, int nat_id) } static int -ipfw_nat_cfg(struct sockopt *sopt) +ipfw_nat_cfg(struct sockopt *sopt, struct ip_fw_chain *chain) { struct cfg_nat *cfg, *ptr; char *buf; - struct ip_fw_chain *chain = &V_layer3_chain; size_t len; int gencnt, error = 0; @@ -468,10 +473,9 @@ out: } static int -ipfw_nat_del(struct sockopt *sopt) +ipfw_nat_del(struct sockopt *sopt, struct ip_fw_chain *chain) { struct cfg_nat *ptr; - struct ip_fw_chain *chain = &V_layer3_chain; int i; sooptcopyin(sopt, &i, sizeof i, sizeof i); @@ -492,9 +496,8 @@ ipfw_nat_del(struct sockopt *sopt) } static int -ipfw_nat_get_cfg(struct sockopt *sopt) +ipfw_nat_get_cfg(struct sockopt *sopt, struct ip_fw_chain *chain) { - struct ip_fw_chain *chain = &V_layer3_chain; struct cfg_nat *n; struct cfg_redir *r; struct cfg_spool *s; @@ -552,14 +555,11 @@ retry: } static int -ipfw_nat_get_log(struct sockopt *sopt) +ipfw_nat_get_log(struct sockopt *sopt, struct ip_fw_chain *chain) { uint8_t *data; struct cfg_nat *ptr; int i, size; - struct ip_fw_chain *chain; - - chain = &V_layer3_chain; IPFW_RLOCK(chain); /* one pass to count, one to copy the data */ @@ -604,17 +604,22 @@ vnet_ipfw_nat_uninit(const void *arg __unused) struct cfg_nat *ptr, *ptr_temp; struct ip_fw_chain *chain; - chain = &V_layer3_chain; - IPFW_WLOCK(chain); - LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) { - LIST_REMOVE(ptr, _next); - del_redir_spool_cfg(ptr, &ptr->redir_chain); - LibAliasUninit(ptr->lib); - free(ptr, M_IPFW); + IPFW_CTX_RLOCK(); + for (int i = 1; i < IP_FW_MAXCTX; i++) { + chain = V_ip_fw_contexts.chain[i]; + IPFW_WLOCK(chain); + LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) { + LIST_REMOVE(ptr, _next); + del_redir_spool_cfg(ptr, &ptr->redir_chain); + LibAliasUninit(ptr->lib); + free(ptr, M_IPFW); + } + flush_nat_ptrs(chain, -1 /* flush all */); + V_ipfw_nat_ready = 0; + IPFW_WUNLOCK(chain); } - flush_nat_ptrs(chain, -1 /* flush all */); - V_ipfw_nat_ready = 0; - IPFW_WUNLOCK(chain); + IPFW_CTX_RUNLOCK(); + return (0); } diff --git a/sys/netpfil/ipfw/ip_fw_pfil.c b/sys/netpfil/ipfw/ip_fw_pfil.c index 2bcd1dd..bf225b8 100644 --- a/sys/netpfil/ipfw/ip_fw_pfil.c +++ b/sys/netpfil/ipfw/ip_fw_pfil.c @@ -143,8 +143,9 @@ again: } args.m = *m0; - args.oif = dir == DIR_OUT ? ifp : NULL; + args.oif = ifp; args.inp = inp; + args.dir = dir; ipfw = ipfw_chk(&args); *m0 = args.m; @@ -314,9 +315,8 @@ ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir, /* XXX can we free it after use ? */ mtag->m_tag_id = PACKET_TAG_NONE; r = (struct ipfw_rule_ref *)(mtag + 1); - if (r->info & IPFW_ONEPASS) - return (0); - args.rule = *r; + m_tag_delete(*m0, mtag); + return (0); } /* I need some amt of data to be contiguous */ @@ -333,12 +333,15 @@ ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir, save_eh = *eh; /* save copy for restore below */ m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */ + dir = dir == PFIL_IN ? DIR_IN : DIR_OUT; + args.m = m; /* the packet we are looking at */ - args.oif = dir == PFIL_OUT ? dst: NULL; /* destination, if any */ + args.oif = dst; /* destination, if any */ args.next_hop = NULL; /* we do not support forward yet */ args.next_hop6 = NULL; /* we do not support forward yet */ args.eh = &save_eh; /* MAC header for bridged/MAC packets */ args.inp = NULL; /* used by ipfw uid/gid/jail rules */ + args.dir = dir; /* pfSense addition */ i = ipfw_chk(&args); m = args.m; if (m != NULL) { @@ -369,13 +372,12 @@ ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir, case IP_FW_DUMMYNET: ret = EACCES; - int dir; if (ip_dn_io_ptr == NULL) break; /* i.e. drop */ *m0 = NULL; - dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN); + dir = PROTO_LAYER2 | dir; ip_dn_io_ptr(&m, dir, &args); return 0; @@ -499,7 +501,11 @@ ipfw_hook(int onoff, int pf) hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet; - (void) (onoff ? pfil_add_hook : pfil_remove_hook) + if (onoff) + (void) pfil_add_named_hook + (hook_func, NULL, "ipfw", PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh); + else + (void) pfil_remove_hook (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh); return 0; diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h index e4a2f31..4f4cf93 100644 --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -101,6 +101,7 @@ struct ip_fw_args { struct ipfw_flow_id f_id; /* grabbed from IP header */ //uint32_t cookie; /* a cookie depending on rule action */ + uint32_t dir; /* direction */ struct inpcb *inp; struct _ip6dn_args dummypar; /* dummynet->ip6_output */ @@ -170,6 +171,9 @@ enum { /* result for matching dynamic rules */ MATCH_UNKNOWN, }; +VNET_DECLARE(u_int32_t, curr_dyn_buckets); +#define V_curr_dyn_buckets VNET(curr_dyn_buckets) + /* * The lock for dynamic rules is only used once outside the file, * and only to release the result of lookup_dyn_rule(). @@ -178,6 +182,7 @@ enum { /* result for matching dynamic rules */ struct ip_fw_chain; void ipfw_expire_dyn_rules(struct ip_fw_chain *, struct ip_fw *, int); void ipfw_dyn_unlock(ipfw_dyn_rule *q); +int resize_dynamic_table(struct ip_fw_chain *, int); struct tcphdr; struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *, @@ -189,7 +194,7 @@ ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, void ipfw_remove_dyn_children(struct ip_fw *rule); void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep); -void ipfw_dyn_init(struct ip_fw_chain *); /* per-vnet initialization */ +void ipfw_dyn_init(void); /* per-vnet initialization */ void ipfw_dyn_uninit(int); /* per-vnet deinitialization */ int ipfw_dyn_len(void); @@ -200,9 +205,6 @@ VNET_DECLARE(int, fw_one_pass); VNET_DECLARE(int, fw_verbose); #define V_fw_verbose VNET(fw_verbose) -VNET_DECLARE(struct ip_fw_chain, layer3_chain); -#define V_layer3_chain VNET(layer3_chain) - VNET_DECLARE(u_int32_t, set_disable); #define V_set_disable VNET(set_disable) @@ -236,6 +238,33 @@ struct ip_fw_chain { #endif }; +struct ip_fw_ctx_iflist { + TAILQ_ENTRY(ip_fw_ctx_iflist) entry; + char ifname[IFNAMSIZ]; +}; + +#define IP_FW_MAXCTX 4096 +struct ip_fw_contextes { + struct ip_fw_chain *chain[IP_FW_MAXCTX]; /* Arrays of contextes */ + TAILQ_HEAD(, ip_fw_ctx_iflist) iflist[IP_FW_MAXCTX]; + struct rwlock rwctx; + eventhandler_tag ifnet_arrival; +}; + +VNET_DECLARE(struct ip_fw_contextes, ip_fw_contexts); +#define V_ip_fw_contexts VNET(ip_fw_contexts) + +#define IPFW_CTX_LOCK_INIT() rw_init(&V_ip_fw_contexts.rwctx, "IPFW context") +#define IPFW_CTX_LOCK_DESTROY() rw_destroy(&V_ip_fw_contexts.rwctx) +#define IPFW_CTX_WLOCK() rw_wlock(&V_ip_fw_contexts.rwctx) +#define IPFW_CTX_WUNLOCK() rw_wunlock(&V_ip_fw_contexts.rwctx) +#define IPFW_CTX_RLOCK() rw_rlock(&V_ip_fw_contexts.rwctx) +#define IPFW_CTX_RUNLOCK() rw_runlock(&V_ip_fw_contexts.rwctx) + +void ipfw_attach_ifnet_event(void *, struct ifnet *); +int ipfw_context_init(int); +int ipfw_context_uninit(int); + struct sockopt; /* used by tcp_var.h */ /* Macro for working with various counters */ @@ -303,16 +332,21 @@ int ipfw_chk(struct ip_fw_args *args); void ipfw_reap_rules(struct ip_fw *head); /* In ip_fw_table.c */ +struct ether_addr; struct radix_node; -int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint32_t *val); -int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint32_t *val, int type); +void *ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, + uint32_t *val, struct ether_addr *); +void *ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, + uint32_t *val, int type, struct ether_addr *); +void ipfw_count_table_entry_stats(void *, int); +void ipfw_count_table_xentry_stats(void *, int); +int ipfw_zero_table_xentry_stats(struct ip_fw_chain *, ipfw_table_xentry *); +int ipfw_lookup_table_xentry(struct ip_fw_chain *, ipfw_table_xentry *); int ipfw_init_tables(struct ip_fw_chain *ch); void ipfw_destroy_tables(struct ip_fw_chain *ch); int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl); int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value); + uint8_t plen, uint8_t mlen, uint8_t type, u_int64_t mac_addr, uint32_t value); int ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, uint8_t plen, uint8_t mlen, uint8_t type); int ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); @@ -326,8 +360,9 @@ int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables); extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int); -typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *); -typedef int ipfw_nat_cfg_t(struct sockopt *); +typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *, + struct ip_fw_chain *); +typedef int ipfw_nat_cfg_t(struct sockopt *, struct ip_fw_chain *); VNET_DECLARE(int, ipfw_nat_ready); #define V_ipfw_nat_ready VNET(ipfw_nat_ready) diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index 3c342f7..2776e4c 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -736,8 +736,8 @@ check_ipfw_struct(struct ip_fw *rule, int size) if (!IPFW_NAT_LOADED) return EINVAL; if (cmdlen != F_INSN_SIZE(ipfw_insn_nat)) - goto bad_size; - goto check_action; + goto bad_size; + goto check_action; case O_FORWARD_MAC: /* XXX not implemented yet */ case O_CHECK_STATE: case O_COUNT: @@ -943,12 +943,15 @@ ipfw_ctl(struct sockopt *sopt) #define RULE_MAXSIZE (256*sizeof(u_int32_t)) int error; size_t size, len, valsize; + struct ifnet *ifp; struct ip_fw *buf, *rule; - struct ip_fw_chain *chain; + static struct ip_fw_chain *chain; + struct ip_fw_ctx_iflist *tmpifl, *tmpifl2 = NULL; + ip_fw3_opheader *op3 = NULL; u_int32_t rulenum[2]; uint32_t opt; char xbuf[128]; - ip_fw3_opheader *op3 = NULL; + char *ifname; error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); if (error) @@ -965,7 +968,6 @@ ipfw_ctl(struct sockopt *sopt) return (error); } - chain = &V_layer3_chain; error = 0; /* Save original valsize before it is altered via sooptcopyin() */ @@ -980,9 +982,238 @@ ipfw_ctl(struct sockopt *sopt) return (error); op3 = (ip_fw3_opheader *)xbuf; opt = op3->opcode; + + if (op3->ctxid >= IP_FW_MAXCTX) + return (EINVAL); + + if (opt != IP_FW_CTX_GET && opt != IP_FW_CTX_ADD) { + if (op3->ctxid == 0) + return (ENOENT); + + IPFW_CTX_RLOCK(); + chain = V_ip_fw_contexts.chain[op3->ctxid]; + IPFW_CTX_RUNLOCK(); + if (chain == NULL) + return (ENOENT); + } } + /* Verification needed to avoid problems */ switch (opt) { + case IP_FW_CTX_GET: + case IP_FW_CTX_ADD: + case IP_FW_CTX_DEL: + break; + default: + if (chain == NULL) + return (EINVAL); + /* NOTREACHED */ + } + + switch (opt) { + case IP_FW_CTX_ADD: + IPFW_CTX_WLOCK(); + if (V_ip_fw_contexts.chain[op3->ctxid] != NULL) { + IPFW_CTX_WUNLOCK(); + return (EEXIST); + } + + chain = malloc(sizeof(struct ip_fw_chain), M_IPFW, M_WAITOK | M_ZERO); + TAILQ_INIT(&V_ip_fw_contexts.iflist[op3->ctxid]); + V_ip_fw_contexts.chain[op3->ctxid] = chain; + ipfw_context_init(op3->ctxid); /* XXX: error checking */ + IPFW_CTX_WUNLOCK(); + break; + + case IP_FW_CTX_DEL: + IPFW_CTX_WLOCK(); + if (V_ip_fw_contexts.chain[op3->ctxid] == NULL) { + IPFW_CTX_WUNLOCK(); + return (ENOENT); + } + + ipfw_context_uninit(op3->ctxid); + V_ip_fw_contexts.chain[op3->ctxid] = NULL; + IPFW_CTX_WUNLOCK(); + break; + + case IP_FW_CTX_GET: + { + int i, n, len = 0, want; + char *bufout, *tmpbuf; + + sopt->sopt_valsize = valsize; + + IPFW_CTX_RLOCK(); + for (i = 1; i < IP_FW_MAXCTX; i++) { + if (op3->ctxid > 0 && op3->ctxid != i) + continue; + if (op3->ctxid > 0 && op3->ctxid < i) + break; + + if (V_ip_fw_contexts.chain[i] == NULL) + continue; + + /* Calculate number of bytes for the integer */ + n = i; + while (n > 0) { + n /= 10; + len++; + } + TAILQ_FOREACH(tmpifl, &V_ip_fw_contexts.iflist[i], entry) { + len += strlen(tmpifl->ifname) + 1; + } + len += 3; // newline, :, space + } + IPFW_CTX_RUNLOCK(); + + if (len > sopt->sopt_valsize) { + sopt->sopt_valsize = len; + break; + } + + bufout = malloc(len, M_TEMP, M_WAITOK | M_ZERO); + if (bufout == NULL) + break; + + /* Record our size for later checks */ + want = len; + len = 0; + IPFW_CTX_RLOCK(); + /* Recalculate length to detect if smth changed */ + for (i = 1; i < IP_FW_MAXCTX; i++) { + if (op3->ctxid > 0 && op3->ctxid != i) + continue; + if (op3->ctxid > 0 && op3->ctxid < i) + break; + + if (V_ip_fw_contexts.chain[i] == NULL) + continue; + + /* Calculate number of bytes for the integer */ + n = i; + while (n > 0) { + n /= 10; + len++; + } + TAILQ_FOREACH(tmpifl, &V_ip_fw_contexts.iflist[i], entry) { + len += strlen(tmpifl->ifname) + 1; + } + len += 3; // newline, :, space + } + + if (want >= len) { + tmpbuf = bufout; + for (i = 1; i < IP_FW_MAXCTX; i++) { + if (op3->ctxid > 0 && op3->ctxid != i) + continue; + if (op3->ctxid > 0 && op3->ctxid < i) + break; + + if (V_ip_fw_contexts.chain[i] == NULL) + continue; + + sprintf(tmpbuf, "%d: ", i); + tmpbuf += strlen(tmpbuf); + TAILQ_FOREACH(tmpifl, &V_ip_fw_contexts.iflist[i], entry) { + sprintf(tmpbuf, "%s,", tmpifl->ifname); + tmpbuf += strlen(tmpifl->ifname) + 1; + } + sprintf(tmpbuf, "\n"); + tmpbuf++; + } + } + IPFW_CTX_RUNLOCK(); + + if (want >= len) + error = sooptcopyout(sopt, bufout, len); + else + len = 0; + free(bufout, M_TEMP); + } + break; + + case IP_FW_CTX_SET: + /* XXX: Maybe not use this option at all? */ + IPFW_CTX_RLOCK(); + if (V_ip_fw_contexts.chain[op3->ctxid] == NULL) + error = ENOENT; + else + chain = V_ip_fw_contexts.chain[op3->ctxid]; + IPFW_CTX_RUNLOCK(); + break; + + case IP_FW_CTX_ADDMEMBER: + { + int i; + + ifname = (char *)(op3 + 1); + ifp = ifunit(ifname); + if (ifp == NULL) + return (ENOENT); + + tmpifl = malloc(sizeof(*tmpifl), M_IPFW, M_WAITOK | M_ZERO); + + IPFW_CTX_WLOCK(); + if (V_ip_fw_contexts.chain[op3->ctxid] == NULL) { + IPFW_CTX_WUNLOCK(); + free(tmpifl, M_IPFW); + return (ENOENT); + } + + for (i = 1; i < IP_FW_MAXCTX; i++) { + if (V_ip_fw_contexts.chain[i] == NULL) + continue; + + TAILQ_FOREACH(tmpifl2, &V_ip_fw_contexts.iflist[i], entry) { + if (strlen(tmpifl2->ifname) != strlen(ifname)) + continue; + if (!strcmp(tmpifl2->ifname, ifname)) + goto ctxifacefound; + } + } +ctxifacefound: + if (tmpifl2 != NULL) { + IPFW_CTX_WUNLOCK(); + free(tmpifl, M_IPFW); + return (EEXIST); + } + + strlcpy(tmpifl->ifname, ifname, IFNAMSIZ); + TAILQ_INSERT_HEAD(&V_ip_fw_contexts.iflist[op3->ctxid], tmpifl, entry); + ifp->if_ispare[0] = op3->ctxid; + IPFW_CTX_WUNLOCK(); + } + break; + + case IP_FW_CTX_DELMEMBER: + IPFW_CTX_WLOCK(); + if (V_ip_fw_contexts.chain[op3->ctxid] == NULL) { + IPFW_CTX_WUNLOCK(); + return (ENOENT); + } + + ifname = (char *)(op3 + 1); + TAILQ_FOREACH(tmpifl2, &V_ip_fw_contexts.iflist[op3->ctxid], entry) { + if (strlen(tmpifl2->ifname) != strlen(ifname)) + continue; + if (!strcmp(tmpifl2->ifname, ifname)) + break; + } + if (tmpifl2 == NULL) { + IPFW_CTX_WUNLOCK(); + return (ENOENT); + } + + TAILQ_REMOVE(&V_ip_fw_contexts.iflist[op3->ctxid], tmpifl2, entry); + IPFW_CTX_WUNLOCK(); + free(tmpifl2, M_IPFW); + + ifp = ifunit(ifname); + if (ifp != NULL) + ifp->if_ispare[0] = 0; + break; + case IP_FW_GET: /* * pass up a copy of the current rules. Static rules @@ -1124,7 +1355,7 @@ ipfw_ctl(struct sockopt *sopt) break; error = ipfw_add_table_entry(chain, ent.tbl, &ent.addr, sizeof(ent.addr), ent.masklen, - IPFW_TABLE_CIDR, ent.value); + IPFW_TABLE_CIDR, ent.mac_addr, ent.value); } break; @@ -1157,12 +1388,12 @@ ipfw_ctl(struct sockopt *sopt) error = EINVAL; break; } - + len = xent->len - offsetof(ipfw_table_xentry, k); error = (opt == IP_FW_TABLE_XADD) ? ipfw_add_table_entry(chain, xent->tbl, &xent->k, - len, xent->masklen, xent->type, xent->value) : + len, xent->masklen, xent->type, xent->mac_addr, xent->value) : ipfw_del_table_entry(chain, xent->tbl, &xent->k, len, xent->masklen, xent->type); } @@ -1245,6 +1476,54 @@ ipfw_ctl(struct sockopt *sopt) } break; + case IP_FW_TABLE_XZEROENTRY: /* IP_FW3 */ + { + ipfw_table_xentry *xent = (ipfw_table_xentry *)(op3 + 1); + + /* Check minimum header size */ + if (IP_FW3_OPLENGTH(sopt) < offsetof(ipfw_table_xentry, k)) { + error = EINVAL; + break; + } + + /* Check if len field is valid */ + if (xent->len > sizeof(ipfw_table_xentry)) { + error = EINVAL; + break; + } + + error = ipfw_zero_table_xentry_stats(chain, xent); + if (!error) { + xent->timestamp += boottime.tv_sec; + error = sooptcopyout(sopt, xent, sizeof(*xent)); + } + } + break; + + case IP_FW_TABLE_XLISTENTRY: /* IP_FW3 */ + { + ipfw_table_xentry *xent = (ipfw_table_xentry *)(op3 + 1); + + /* Check minimum header size */ + if (IP_FW3_OPLENGTH(sopt) < offsetof(ipfw_table_xentry, k)) { + error = EINVAL; + break; + } + + /* Check if len field is valid */ + if (xent->len > sizeof(ipfw_table_xentry)) { + error = EINVAL; + break; + } + + error = ipfw_lookup_table_xentry(chain, xent); + if (!error) { + xent->timestamp += boottime.tv_sec; + error = sooptcopyout(sopt, xent, sizeof(*xent)); + } + } + break; + case IP_FW_TABLE_XLIST: /* IP_FW3 */ { ipfw_xtable *tbl; @@ -1284,7 +1563,7 @@ ipfw_ctl(struct sockopt *sopt) /*--- NAT operations are protected by the IPFW_LOCK ---*/ case IP_FW_NAT_CFG: if (IPFW_NAT_LOADED) - error = ipfw_nat_cfg_ptr(sopt); + error = ipfw_nat_cfg_ptr(sopt, chain); else { printf("IP_FW_NAT_CFG: %s\n", "ipfw_nat not present, please load it"); @@ -1294,7 +1573,7 @@ ipfw_ctl(struct sockopt *sopt) case IP_FW_NAT_DEL: if (IPFW_NAT_LOADED) - error = ipfw_nat_del_ptr(sopt); + error = ipfw_nat_del_ptr(sopt, chain); else { printf("IP_FW_NAT_DEL: %s\n", "ipfw_nat not present, please load it"); @@ -1304,7 +1583,7 @@ ipfw_ctl(struct sockopt *sopt) case IP_FW_NAT_GET_CONFIG: if (IPFW_NAT_LOADED) - error = ipfw_nat_get_cfg_ptr(sopt); + error = ipfw_nat_get_cfg_ptr(sopt, chain); else { printf("IP_FW_NAT_GET_CFG: %s\n", "ipfw_nat not present, please load it"); @@ -1314,7 +1593,7 @@ ipfw_ctl(struct sockopt *sopt) case IP_FW_NAT_GET_LOG: if (IPFW_NAT_LOADED) - error = ipfw_nat_get_log_ptr(sopt); + error = ipfw_nat_get_log_ptr(sopt, chain); else { printf("IP_FW_NAT_GET_LOG: %s\n", "ipfw_nat not present, please load it"); @@ -1331,6 +1610,33 @@ ipfw_ctl(struct sockopt *sopt) #undef RULE_MAXSIZE } +void +ipfw_attach_ifnet_event(void *arg __unused, struct ifnet *ifp) +{ + struct ip_fw_ctx_iflist *tmpifl; + + CURVNET_SET(ifp->if_vnet); + + IPFW_CTX_RLOCK(); + for (int i = 1; i < IP_FW_MAXCTX; i++) { + if (V_ip_fw_contexts.chain[i] == NULL) + continue; + TAILQ_FOREACH(tmpifl, &V_ip_fw_contexts.iflist[i], entry) { + if (strlen(tmpifl->ifname) != strlen(ifp->if_xname)) + continue; + if (!strcmp(tmpifl->ifname, ifp->if_xname)) { + printf("Restoring context for interface %s to %d\n", ifp->if_xname, i); + ifp->if_ispare[0] = i; + goto ifctxdone; + break; + } + } + } +ifctxdone: + IPFW_CTX_RUNLOCK(); + + CURVNET_RESTORE(); +} #define RULE_MAXSIZE (256*sizeof(u_int32_t)) diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c index 760a10c..689596f 100644 --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include <net/route.h> #include <net/vnet.h> +#include <net/ethernet.h> #include <netinet/in.h> #include <netinet/ip_var.h> /* struct ipfw_rule_ref */ #include <netinet/ip_fw.h> @@ -74,7 +75,11 @@ static MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); struct table_entry { struct radix_node rn[2]; struct sockaddr_in addr, mask; + u_int64_t mac_addr; u_int32_t value; + u_int32_t timestamp; + u_int64_t bytes; + u_int64_t packets; }; struct xaddr_iface { @@ -97,7 +102,11 @@ struct table_xentry { #endif struct xaddr_iface ifmask; } m; + u_int64_t mac_addr; u_int32_t value; + u_int32_t timestamp; + u_int64_t bytes; + u_int64_t packets; }; /* @@ -137,7 +146,7 @@ ipv6_writemask(struct in6_addr *addr6, uint8_t mask) int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value) + uint8_t plen, uint8_t mlen, uint8_t type, u_int64_t mac_addr, uint32_t value) { struct radix_node_head *rnh, **rnh_ptr; struct table_entry *ent; @@ -161,6 +170,7 @@ ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, return (EINVAL); ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); ent->value = value; + ent->mac_addr = mac_addr; /* Set 'total' structure length */ KEY_LEN(ent->addr) = KEY_LEN_INET; KEY_LEN(ent->mask) = KEY_LEN_INET; @@ -182,6 +192,7 @@ ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, return (EINVAL); xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); xent->value = value; + xent->mac_addr = mac_addr; /* Set 'total' structure length */ KEY_LEN(xent->a.addr6) = KEY_LEN_INET6; KEY_LEN(xent->m.mask6) = KEY_LEN_INET6; @@ -281,6 +292,28 @@ ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, IPFW_WUNLOCK(ch); if (rn == NULL) { + if (type == IPFW_TABLE_CIDR) { + /* Just update if any new value needed */ + if (plen == sizeof(in_addr_t)) { + ent = (struct table_entry *)(rnh->rnh_matchaddr(addr_ptr, rnh)); + if (ent != NULL) { + if (ent->mac_addr) { + if (!bcmp(&mac_addr, &ent->mac_addr, ETHER_ADDR_LEN)) + ent->value = value; + } else + ent->value = value; + } + } else { + xent = (struct table_xentry *)(rnh->rnh_matchaddr(addr_ptr, rnh)); + if (xent != NULL) { + if (xent->mac_addr) { + if (!bcmp(&mac_addr, &xent->mac_addr, ETHER_ADDR_LEN)) + xent->value = value; + } else + xent->value = value; + } + } + } free(ent_ptr, M_IPFW_TBL); return (EEXIST); } @@ -530,31 +563,194 @@ ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) return (0); } -int +void * ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint32_t *val) + uint32_t *val, struct ether_addr *ea) { struct radix_node_head *rnh; struct table_entry *ent; struct sockaddr_in sa; if (tbl >= V_fw_tables_max) - return (0); + return (NULL); if ((rnh = ch->tables[tbl]) == NULL) - return (0); + return (NULL); KEY_LEN(sa) = KEY_LEN_INET; sa.sin_addr.s_addr = addr; ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh)); if (ent != NULL) { - *val = ent->value; - return (1); + if (ea && ent->mac_addr) { + if (bcmp((u_char *)&ent->mac_addr, ea->octet, ETHER_ADDR_LEN) != 0) + ent = NULL; + } + if (ent != NULL) { + *val = ent->value; + return (ent); + } } - return (0); + return (NULL); +} + +void +ipfw_count_table_entry_stats(void *arg, int pktlen) +{ + struct table_entry *xent = arg; + + xent->packets++; + xent->bytes += pktlen; + xent->timestamp = time_uptime; +} + +void +ipfw_count_table_xentry_stats(void *arg, int pktlen) +{ + ipfw_table_xentry *xent= arg; + + xent->packets++; + xent->bytes += pktlen; + xent->timestamp = time_uptime; } int +ipfw_zero_table_xentry_stats(struct ip_fw_chain *ch, ipfw_table_xentry *arg) +{ + struct radix_node_head *rnh; + struct table_xentry *xent; + struct sockaddr_in6 sa6; + struct xaddr_iface iface; + + if (arg->tbl >= V_fw_tables_max) + return (EINVAL); + if (ch->tables[arg->tbl] != NULL) + rnh = ch->tables[arg->tbl]; + else if (ch->xtables[arg->tbl] != NULL) + rnh = ch->xtables[arg->tbl]; + else + return (EINVAL); + + switch (arg->type) { + case IPFW_TABLE_CIDR: + if (ch->tables[arg->tbl] != NULL) { + /* XXX: Maybe better by FreeBSD 11!! */ + struct sockaddr_in sa; + struct table_entry *ent; + + KEY_LEN(sa) = KEY_LEN_INET; + sa.sin_addr.s_addr = *((in_addr_t *)&arg->k.addr6); + ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh)); + if (ent == NULL) + return (EINVAL); + + arg->bytes = 0; + arg->packets = 0; + arg->value = ent->value; + arg->timestamp = time_uptime; + + return (0); + } else { + KEY_LEN(sa6) = KEY_LEN_INET6; + memcpy(&sa6.sin6_addr, &arg->k.addr6, sizeof(struct in6_addr)); + xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); + } + break; + + case IPFW_TABLE_INTERFACE: + KEY_LEN(iface) = KEY_LEN_IFACE + + strlcpy(iface.ifname, arg->k.iface, IF_NAMESIZE) + 1; + /* Assume direct match */ + /* FIXME: Add interface pattern matching */ + xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh)); + break; + + default: + return (EINVAL); + } + + if (xent != NULL) { + xent->bytes = 0; + xent->packets = 0; + xent->timestamp = time_uptime; + + return (0); + } + return (EINVAL); +} + +int +ipfw_lookup_table_xentry(struct ip_fw_chain *ch, ipfw_table_xentry *arg) +{ + struct radix_node_head *rnh; + struct table_xentry *xent; + + if (arg->tbl >= V_fw_tables_max) + return (EINVAL); + if (ch->tables[arg->tbl] != NULL) + rnh = ch->tables[arg->tbl]; + else if (ch->xtables[arg->tbl] != NULL) + rnh = ch->xtables[arg->tbl]; + else + return (EINVAL); + + switch (arg->type) { + case IPFW_TABLE_CIDR: + { + if (ch->tables[arg->tbl] != NULL) { + /* XXX: Maybe better by FreeBSD 11!! */ + struct sockaddr_in sa; + struct table_entry *ent; + + KEY_LEN(sa) = KEY_LEN_INET; + sa.sin_addr.s_addr = *((in_addr_t *)&arg->k.addr6); + ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh)); + if (ent == NULL) + return (EINVAL); + + arg->bytes = ent->bytes; + arg->packets = ent->packets; + arg->value = ent->value; + arg->timestamp = ent->timestamp; + arg->mac_addr = ent->mac_addr; + return (0); + } else { + struct sockaddr_in6 sa6; + KEY_LEN(sa6) = KEY_LEN_INET6; + memcpy(&sa6.sin6_addr, &arg->k.addr6, sizeof(struct in6_addr)); + xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); + } + } + break; + + case IPFW_TABLE_INTERFACE: + { + struct xaddr_iface iface; + + KEY_LEN(iface) = KEY_LEN_IFACE + + strlcpy(iface.ifname, arg->k.iface, IF_NAMESIZE) + 1; + /* Assume direct match */ + /* FIXME: Add interface pattern matching */ + xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh)); + } + break; + + default: + return (0); + } + + if (xent != NULL) { + arg->bytes = xent->bytes; + arg->packets = xent->packets; + arg->value = xent->value; + arg->timestamp = xent->timestamp; + arg->mac_addr = xent->mac_addr; + + return (0); + } + return (EINVAL); +} + +void * ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint32_t *val, int type) + uint32_t *val, int type, struct ether_addr *ea) { struct radix_node_head *rnh; struct table_xentry *xent; @@ -562,15 +758,21 @@ ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, struct xaddr_iface iface; if (tbl >= V_fw_tables_max) - return (0); + return (NULL); if ((rnh = ch->xtables[tbl]) == NULL) - return (0); + return (NULL); switch (type) { case IPFW_TABLE_CIDR: KEY_LEN(sa6) = KEY_LEN_INET6; memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr)); xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); + if (xent != NULL) { + if (ea && xent->mac_addr) { + if (bcmp((u_char *)&xent->mac_addr, ea->octet, ETHER_ADDR_LEN) != 0) + xent = NULL; + } + } break; case IPFW_TABLE_INTERFACE: @@ -582,14 +784,14 @@ ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, break; default: - return (0); + return (NULL); } if (xent != NULL) { *val = xent->value; - return (1); + return (xent); } - return (0); + return (NULL); } static int @@ -696,9 +898,13 @@ dump_table_xentry_base(struct radix_node *rn, void *arg) else xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); /* Save IPv4 address as deprecated IPv6 compatible */ - xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr; + xent->k.addr6.s6_addr32[0] = n->addr.sin_addr.s_addr; xent->flags = IPFW_TCF_INET; xent->value = n->value; + xent->bytes = n->bytes; + xent->packets = n->packets; + xent->timestamp = n->timestamp; + xent->mac_addr = n->mac_addr; tbl->cnt++; return (0); } @@ -742,6 +948,10 @@ dump_table_xentry_extended(struct radix_node *rn, void *arg) } xent->value = n->value; + xent->bytes = n->bytes; + xent->packets = n->packets; + xent->timestamp = n->timestamp; + xent->mac_addr = n->mac_addr; tbl->cnt++; return (0); } diff --git a/sys/netpfil/pf/if_pflog.c b/sys/netpfil/pf/if_pflog.c index 1efd5e2..5c22806 100644 --- a/sys/netpfil/pf/if_pflog.c +++ b/sys/netpfil/pf/if_pflog.c @@ -209,7 +209,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, return (0); bzero(&hdr, sizeof(hdr)); - hdr.length = PFLOG_REAL_HDRLEN; + hdr.length = PFLOG_HDRLEN; hdr.af = af; hdr.action = rm->action; hdr.reason = reason; @@ -218,13 +218,16 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, if (am == NULL) { hdr.rulenr = htonl(rm->nr); hdr.subrulenr = 1; + hdr.ridentifier = rm->cuid; } else { hdr.rulenr = htonl(am->nr); hdr.subrulenr = htonl(rm->nr); + hdr.ridentifier = rm->cuid; if (ruleset != NULL && ruleset->anchor != NULL) strlcpy(hdr.ruleset, ruleset->anchor->name, sizeof(hdr.ruleset)); } +#ifdef PF_USER_INFO /* * XXXGL: we avoid pf_socket_lookup() when we are holding * state lock, since this leads to unsafe LOR. @@ -239,6 +242,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, hdr.pid = NO_PID; hdr.rule_uid = rm->cuid; hdr.rule_pid = rm->cpid; +#endif hdr.dir = dir; #ifdef INET diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index 90e6f8f..7936c07 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -185,9 +185,6 @@ struct pfsync_softc { struct ip_moptions sc_imo; struct in_addr sc_sync_peer; uint32_t sc_flags; -#define PFSYNCF_OK 0x00000001 -#define PFSYNCF_DEFER 0x00000002 -#define PFSYNCF_PUSH 0x00000004 uint8_t sc_maxupdates; struct ip sc_template; struct callout sc_tmo; @@ -365,7 +362,7 @@ pfsync_clone_destroy(struct ifnet *ifp) callout_drain(&sc->sc_bulkfail_tmo); callout_drain(&sc->sc_bulk_tmo); - if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); bpfdetach(ifp); if_detach(ifp); @@ -1150,7 +1147,7 @@ pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; callout_stop(&sc->sc_bulkfail_tmo); - if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk done"); sc->sc_flags |= PFSYNCF_OK; @@ -1308,8 +1305,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; - pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == - (sc->sc_flags & PFSYNCF_DEFER)); + pfsyncr.pfsyncr_defer = sc->sc_flags; PFSYNC_UNLOCK(sc); return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); @@ -1401,7 +1397,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; /* Request a full state table update. */ - if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(V_pfsync_carp_adj, "pfsync bulk start"); sc->sc_flags &= ~PFSYNCF_OK; @@ -1631,6 +1627,7 @@ pfsync_sendout(int schedswi) sc->sc_ifp->if_obytes += m->m_pkthdr.len; sc->sc_len = PFSYNC_MINPKT; + /* XXX: SHould not drop voluntarily update packets! */ if (!_IF_QFULL(&sc->sc_ifp->if_snd)) _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); else { @@ -1776,7 +1773,7 @@ pfsync_undefer_state(struct pf_state *st, int drop) } } - panic("%s: unable to find deferred state", __func__); + printf("%s: unable to find deferred state", __func__); } static void @@ -2144,7 +2141,7 @@ pfsync_bulk_fail(void *arg) sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; PFSYNC_LOCK(sc); - if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk fail"); sc->sc_flags |= PFSYNCF_OK; diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 48da880..ebda220 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -62,6 +62,8 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/if_types.h> +#include <net/ethernet.h> +#include <net/if_vlan_var.h> #include <net/route.h> #include <net/radix_mpath.h> #include <net/vnet.h> @@ -86,6 +88,9 @@ __FBSDID("$FreeBSD$"); #include <netinet/udp_var.h> #include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */ +#include <netinet/ip_fw.h> +#include <netinet/ip_dummynet.h> +#include <netinet/ip_divert.h> #ifdef INET6 #include <netinet/ip6.h> @@ -226,6 +231,8 @@ static int pf_state_key_attach(struct pf_state_key *, static void pf_state_key_detach(struct pf_state *, int); static int pf_state_key_ctor(void *, int, void *, int); static u_int32_t pf_tcp_iss(struct pf_pdesc *); +void pf_rule_to_actions(struct pf_rule *, + struct pf_rule_actions *); static int pf_test_rule(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, struct pf_pdesc *, struct pf_rule **, @@ -258,7 +265,8 @@ static int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); static int pf_test_state_other(struct pf_state **, int, - struct pfi_kif *, struct mbuf *, struct pf_pdesc *); + struct pfi_kif *, struct mbuf *, int, + struct pf_pdesc *); static u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); static u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, @@ -281,6 +289,10 @@ static u_int pf_purge_expired_states(u_int, int); static void pf_purge_unlinked_rules(void); static int pf_mtag_uminit(void *, int, int); static void pf_mtag_free(struct m_tag *); +static void pf_packet_redo_nat(struct mbuf *, struct pf_pdesc *, + int, struct pf_state *, int); +static void pf_packet_undo_nat(struct mbuf *, struct pf_pdesc *, + int, struct pf_state *, int); #ifdef INET static void pf_route(struct mbuf **, struct pf_rule *, int, struct ifnet *, struct pf_state *, @@ -300,28 +312,36 @@ VNET_DECLARE(int, pf_end_threads); VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); -#define PACKET_LOOPED(pd) ((pd)->pf_mtag && \ - (pd)->pf_mtag->flags & PF_PACKET_LOOPED) +#define PACKET_LOOPED(mtag) ((mtag)->flags & PF_PACKET_LOOPED) + +#define PF_DIVERT_MAXPACKETS_REACHED() \ +do { \ + if (r->spare2 && \ + s->packets[dir == PF_OUT] > r->spare2) \ + /* fake that divert already happened */ \ + pd.pf_mtag->flags |= PF_PACKET_LOOPED; \ +} while(0) #define STATE_LOOKUP(i, k, d, s, pd) \ do { \ (s) = pf_find_state((i), (k), (d)); \ if ((s) == NULL) \ return (PF_DROP); \ - if (PACKET_LOOPED(pd)) \ + if (PACKET_LOOPED(pd->pf_mtag)) { \ + if ((s)->key[PF_SK_WIRE] != (s)->key[PF_SK_STACK]) { \ + pf_packet_redo_nat(m, pd, off, s, direction); \ + } \ return (PF_PASS); \ + } \ if ((d) == PF_OUT && \ (((s)->rule.ptr->rt == PF_ROUTETO && \ - (s)->rule.ptr->direction == PF_OUT) || \ - ((s)->rule.ptr->rt == PF_REPLYTO && \ - (s)->rule.ptr->direction == PF_IN)) && \ + (s)->rule.ptr->direction == PF_OUT)) && \ (s)->rt_kif != NULL && \ (s)->rt_kif != (i)) \ return (PF_PASS); \ } while (0) -#define BOUND_IFACE(r, k) \ - ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all +#define BOUND_IFACE(r, k) k #define STATE_INC_COUNTERS(s) \ do { \ @@ -407,6 +427,160 @@ pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af) return (0); } +static void +pf_packet_undo_nat(struct mbuf *m, struct pf_pdesc *pd, int off, + struct pf_state *state, int direction) +{ + struct pf_state_key *nk; + + if (state == NULL || state->nat_rule.ptr == NULL) + return; + + if (state->nat_rule.ptr->action == PF_RDR || + state->nat_rule.ptr->action == PF_BINAT) + nk = (state)->key[PF_SK_WIRE]; + else + nk = (state)->key[PF_SK_STACK]; + + switch (pd->proto) { + case IPPROTO_TCP: { + struct tcphdr *th = pd->hdr.tcp; + + if (direction == PF_OUT) { + pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, pd->af); + } else { + pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, pd->af); + } + m_copyback(m, off, sizeof(*th), (caddr_t)th); + } + break; + case IPPROTO_UDP: { + struct udphdr *uh = pd->hdr.udp; + + if (direction == PF_OUT) { + pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, pd->af); + } else { + pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 1, pd->af); + } + m_copyback(m, off, sizeof(*uh), (caddr_t)uh); + } + break; + /* case IPPROTO_ICMP: */ + /* XXX: If we want to do this for icmp is probably wrong!?! */ + /* break; */ + default: + if (direction == PF_OUT) { + switch (pd->af) { + case AF_INET: + pf_change_a(&pd->src->v4.s_addr, + pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, + 0); + break; + case AF_INET6: + PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + break; + } + } else { + switch (pd->af) { + case AF_INET: + pf_change_a(&pd->dst->v4.s_addr, + pd->ip_sum, nk->addr[pd->didx].v4.s_addr, + 0); + break; + case AF_INET6: + PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + break; + } + } + break; + } +} + +static void +pf_packet_redo_nat(struct mbuf *m, struct pf_pdesc *pd, int off, + struct pf_state *state, int direction) +{ + struct pf_state_key *nk; + + if (state == NULL || state->nat_rule.ptr == NULL) + return; + + if (state->nat_rule.ptr->action == PF_RDR || + state->nat_rule.ptr->action == PF_BINAT) + nk = (state)->key[PF_SK_STACK]; + else + nk = (state)->key[PF_SK_WIRE]; + + switch (pd->proto) { + case IPPROTO_TCP: { + struct tcphdr *th = pd->hdr.tcp; + + if (direction == PF_OUT) { + pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, pd->af); + } else { + pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, pd->af); + } + m_copyback(m, off, sizeof(*th), (caddr_t)th); + } + break; + case IPPROTO_UDP: { + struct udphdr *uh = pd->hdr.udp; + + if (direction == PF_OUT) { + pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, pd->af); + } else { + pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 1, pd->af); + } + m_copyback(m, off, sizeof(*uh), (caddr_t)uh); + } + break; + /* case IPPROTO_ICMP: */ + /* XXX: If we want to do this for icmp is probably wrong!?! */ + /* break; */ + default: + if (direction == PF_OUT) { + switch (pd->af) { + case AF_INET: + pf_change_a(&pd->src->v4.s_addr, + pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, + 0); + break; + case AF_INET6: + PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + break; + } + } else { + switch (pd->af) { + case AF_INET: + pf_change_a(&pd->dst->v4.s_addr, + pd->ip_sum, nk->addr[pd->didx].v4.s_addr, + 0); + break; + case AF_INET6: + PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + break; + } + } + break; + } +} + static __inline uint32_t pf_hashkey(struct pf_state_key *sk) { @@ -440,6 +614,20 @@ pf_hashsrc(struct pf_addr *addr, sa_family_t af) return (h & pf_srchashmask); } +#ifdef ALTQ +static int +pf_state_hash(struct pf_state *s) +{ + u_int32_t hv = (intptr_t)s / sizeof(*s); + + hv ^= crc32(&s->src, sizeof(s->src)); + hv ^= crc32(&s->dst, sizeof(s->dst)); + if (hv == 0) + hv = 1; + return (hv); +} +#endif + #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) @@ -1285,7 +1473,7 @@ pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir) /* List is sorted, if-bound states before floating ones. */ TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) - if (s->kif == V_pfi_all || s->kif == kif) { + { PF_STATE_LOCK(s); PF_HASHROW_UNLOCK(kh); if (s->timeout >= PFTM_MAX) { @@ -1971,9 +2159,9 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) switch (aw1->type) { case PF_ADDR_ADDRMASK: case PF_ADDR_RANGE: - if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) + if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) return (1); - if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) + if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) return (1); return (0); case PF_ADDR_DYNIFTL: @@ -2440,6 +2628,26 @@ pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af, pf_send(pfse); } +int +pf_ieee8021q_setpcp(struct mbuf *m, struct pf_rule *r) +{ + struct m_tag *mtag; + + KASSERT(r->ieee8021q_pcp.setpcp & SETPCP_VALID, + ("%s with invalid setpcp", __func__)); + + mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_OUT, NULL); + if (mtag == NULL) { + mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_OUT, + sizeof(uint8_t), M_NOWAIT); + if (mtag == NULL) + return (ENOMEM); + m_tag_prepend(m, mtag); + } + *(uint8_t *)(mtag + 1) = (r->ieee8021q_pcp.setpcp & SETPCP_PCP_MASK); + return (0); +} + static void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) @@ -2613,6 +2821,37 @@ pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) return (pf_match(op, a1, a2, p)); } +int +pf_match_ieee8021q_pcp(u_int8_t op, u_int8_t pcp1, u_int8_t pcp2, + struct mbuf *m) +{ + struct m_tag *mtag; + uint8_t mpcp; + + /* + * Packets without 802.1q headers are treated as having a PCP of 0 + * (best effort). + */ + mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); + if (mtag != NULL) + mpcp = *(uint8_t *)(mtag + 1); + else + mpcp = IEEE8021Q_PCP_BE; + + /* + * 802.1q uses a non-traditional ordering, in which 1 < 0, allowing + * default 0-tagged ("best effort") traffic to take precedence over + * 1-tagged ("background") traffic. Renumber both PCP arguments + * before making a comparison so that we can use boring arithmetic + * operators. + */ + pcp1 = ((pcp1 == 0) ? 1 : ((pcp1 == 1) ? 0 : pcp1)); + pcp2 = ((pcp2 == 0) ? 1 : ((pcp2 == 1) ? 0 : pcp2)); + mpcp = ((mpcp == 0) ? 1 : ((mpcp == 1) ? 0 : mpcp)); + return (pf_match(op, pcp1, pcp2, mpcp)); +} + +#ifdef PF_USER_INFO static int pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) { @@ -2628,6 +2867,7 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) return (0); return (pf_match(op, a1, a2, g)); } +#endif int pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag) @@ -2821,6 +3061,22 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) } #endif /* INET6 */ +void +pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) +{ + if (r->qid) + a->qid = r->qid; + if (r->pqid) + a->pqid = r->pqid; + if (r->pdnpipe) + a->pdnpipe = r->pdnpipe; + if (r->dnpipe) + a->dnpipe = r->dnpipe; + if (r->free_flags & PFRULE_DN_IS_PIPE) + a->flags |= PFRULE_DN_IS_PIPE; +} + +#ifdef PF_USER_INFO int pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m) { @@ -2900,6 +3156,7 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m) return (1); } +#endif static u_int8_t pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) @@ -3091,12 +3348,14 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, PF_RULES_RASSERT(); +#ifdef PF_USER_INFO if (inp != NULL) { INP_LOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; pd->lookup.done = 1; } +#endif switch (pd->proto) { case IPPROTO_TCP: @@ -3307,7 +3566,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, /* icmp only. type always 0 in other cases */ else if (r->code && r->code != icmpcode + 1) r = TAILQ_NEXT(r, entries); - else if (r->tos && !(r->tos == pd->tos)) + else if ((r->rule_flag & PFRULE_TOS) && r->tos && + !(r->tos == pd->tos)) + r = TAILQ_NEXT(r, entries); + else if ((r->rule_flag & PFRULE_DSCP) && r->tos && + !(r->tos == (pd->tos & DSCP_MASK))) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); @@ -3315,6 +3578,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, (r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); /* tcp/udp only. uid.op always 0 in other cases */ +#ifdef PF_USER_INFO else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = pf_socket_lookup(direction, pd, m), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], @@ -3326,6 +3590,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], pd->lookup.gid)) r = TAILQ_NEXT(r, entries); +#endif + else if (r->ieee8021q_pcp.op && + !pf_match_ieee8021q_pcp(r->ieee8021q_pcp.op, + r->ieee8021q_pcp.pcp[0], r->ieee8021q_pcp.pcp[1], m)) + r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); @@ -3343,10 +3612,20 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->rtableid >= 0) rtableid = r->rtableid; if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; + if (r->action == PF_MATCH) { + r->packets[direction == PF_OUT]++; + r->bytes[direction == PF_OUT] += pd->tot_len; + pf_rule_to_actions(r, &pd->act); + if (r->log) + PFLOG_PACKET(kif, m, af, + direction, PFRES_MATCH, r, + a, ruleset, pd, 1); + } else { + match = 1; + *rm = r; + *am = a; + *rsm = ruleset; + } if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); @@ -3365,6 +3644,9 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); + /* apply actions for last matching pass/block rule */ + pf_rule_to_actions(r, &pd->act); + if (r->log || (nr != NULL && nr->log)) { if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); @@ -3538,6 +3820,11 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, s->state_flags |= PFSTATE_SLOPPY; s->log = r->log & PF_LOG_ALL; s->sync_state = PFSYNC_S_NONE; + s->qid = pd->act.qid; + s->pqid = pd->act.pqid; + s->pdnpipe = pd->act.pdnpipe; + s->dnpipe = pd->act.dnpipe; + s->state_flags |= pd->act.flags; if (nr != NULL) s->log |= nr->log & PF_LOG_ALL; switch (pd->proto) { @@ -3776,6 +4063,9 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); + else if ((r->rule_flag & PFRULE_DSCP) && r->tos && + !(r->tos == (pd->tos & DSCP_MASK))) + r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else if (pd->proto == IPPROTO_UDP && @@ -3788,6 +4078,10 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, pd->proto == IPPROTO_ICMPV6) && (r->type || r->code)) r = TAILQ_NEXT(r, entries); + else if (r->ieee8021q_pcp.op && + !pf_match_ieee8021q_pcp(r->ieee8021q_pcp.op, + r->ieee8021q_pcp.pcp[0], r->ieee8021q_pcp.pcp[1], m)) + r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= (arc4random() % (UINT_MAX - 1) + 1)) r = TAILQ_NEXT(r, entries); @@ -3796,10 +4090,20 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; + if (r->action == PF_MATCH) { + r->packets[direction == PF_OUT]++; + r->bytes[direction == PF_OUT] += pd->tot_len; + pf_rule_to_actions(r, &pd->act); + if (r->log) + PFLOG_PACKET(kif, m, af, + direction, PFRES_MATCH, r, + a, ruleset, pd, 1); + } else { + match = 1; + *rm = r; + *am = a; + *rsm = ruleset; + } if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); @@ -3818,6 +4122,9 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, REASON_SET(&reason, PFRES_MATCH); + /* apply actions for last matching pass/block rule */ + pf_rule_to_actions(r, &pd->act); + if (r->log) PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd, 1); @@ -5061,7 +5368,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, static int pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, - struct mbuf *m, struct pf_pdesc *pd) + struct mbuf *m, int off, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; @@ -5339,6 +5646,12 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ip = mtod(m0, struct ip *); + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + bzero(&dst, sizeof(dst)); dst.sin_family = AF_INET; dst.sin_len = sizeof(dst); @@ -5386,7 +5699,71 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (ifp == NULL) goto bad; - if (oifp != ifp) { + else if (r->rt == PF_REPLYTO || (r->rt == PF_ROUTETO && ifp->if_type == IFT_ENC)) { + /* XXX: Copied from ifaof_ifpforaddr() since it mostly will not return NULL! */ + struct sockaddr_in inaddr; + struct sockaddr *addr; + struct ifaddr *ifa; + char *cp, *cp2, *cp3; + char *cplim; + + inaddr.sin_addr = ip->ip_dst; + inaddr.sin_family = AF_INET; + inaddr.sin_len = sizeof(inaddr); + inaddr.sin_port = 0; + addr = (struct sockaddr *)&inaddr; + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + if (ifa->ifa_netmask == 0) { + if ((bcmp(addr, ifa->ifa_addr, addr->sa_len) == 0) || + (ifa->ifa_dstaddr && + (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0))) { + IF_ADDR_RUNLOCK(ifp); + return; + } + continue; + } + if (ifp->if_flags & IFF_POINTOPOINT) { + if (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } else { + cp = addr->sa_data; + cp2 = ifa->ifa_addr->sa_data; + cp3 = ifa->ifa_netmask->sa_data; + cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; + for (; cp3 < cplim; cp3++) + if ((*cp++ ^ *cp2++) & *cp3) + break; + if (cp3 == cplim) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } + } + IF_ADDR_RUNLOCK(ifp); + } + else if (r->rt == PF_ROUTETO && r->direction == dir && in_localip(ip->ip_dst)) + return; + + if (s != NULL && r->rt == PF_REPLYTO) { + /* + * Send it out since it came from state recorded ifp(rt_addr). + * Routing table lookup might have chosen not correct interface! + */ + } else if (oifp != ifp) { + if (in_broadcast(ip->ip_dst, oifp)) /* XXX: LOCKING of address list?! */ + return; + + if (s && r->rt == PF_ROUTETO && pd->nat_rule != NULL && + r->direction == PF_OUT && r->direction == dir && pd->pf_mtag->routed < 2) { + pf_packet_undo_nat(m0, pd, ntohs(ip->ip_off), s, dir); + } + if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) @@ -5440,6 +5817,9 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, error = EMSGSIZE; KMOD_IPSTAT_INC(ips_cantfrag); if (r->rt != PF_DUPTO) { + if (s && pd->nat_rule != NULL) + pf_packet_undo_nat(m0, pd, ntohs(ip->ip_off), s, dir); + icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, ifp->if_mtu); goto done; @@ -5519,6 +5899,12 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ip6 = mtod(m0, struct ip6_hdr *); + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + bzero(&dst, sizeof(dst)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(dst); @@ -5558,9 +5944,71 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (ifp == NULL) goto bad; + else if (r->rt == PF_REPLYTO) { + /* XXX: Copied from ifaof_ifpforaddr() since it mostly will not return NULL! */ + struct sockaddr_in6 inaddr6; + struct sockaddr *addr; + struct ifaddr *ifa; + char *cp, *cp2, *cp3; + char *cplim; + + inaddr6.sin6_addr = ip6->ip6_dst; + inaddr6.sin6_family = AF_INET6; + inaddr6.sin6_len = sizeof(inaddr6); + inaddr6.sin6_port = 0; + inaddr6.sin6_flowinfo = 0; + addr = (struct sockaddr *)&inaddr6; + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (ifa->ifa_netmask == 0) { + if ((bcmp(addr, ifa->ifa_addr, addr->sa_len) == 0) || + (ifa->ifa_dstaddr && + (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0))) { + IF_ADDR_RUNLOCK(ifp); + return; + } + continue; + } + if (ifp->if_flags & IFF_POINTOPOINT) { + if (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } else { + cp = addr->sa_data; + cp2 = ifa->ifa_addr->sa_data; + cp3 = ifa->ifa_netmask->sa_data; + cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; + for (; cp3 < cplim; cp3++) + if ((*cp++ ^ *cp2++) & *cp3) + break; + if (cp3 == cplim) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } + } + IF_ADDR_RUNLOCK(ifp); + } else if (r->rt == PF_ROUTETO && r->direction == dir && in6_localaddr(&ip6->ip6_dst)) + return; + + if (s != NULL && r->rt == PF_REPLYTO) { + /* + * Send it out since it came from state recorded ifp(rt_addr). + * Routing table lookup might have chosen not correct interface! + */ + } else if (oifp != ifp) { + + if (s && r->rt == PF_ROUTETO && pd->nat_rule != NULL && + r->direction == PF_OUT && r->direction == dir && pd->pf_mtag->routed < 2) { + int ip_off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); + pf_packet_undo_nat(m0, pd, ip_off, s, dir); + } - if (oifp != ifp) { - if (pf_test6(PF_FWD, ifp, &m0, NULL) != PF_PASS) + if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; @@ -5593,9 +6041,12 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, nd6_output(ifp, ifp, m0, &dst, NULL); else { in6_ifstat_inc(ifp, ifs6_in_toobig); - if (r->rt != PF_DUPTO) + if (r->rt != PF_DUPTO) { + if (s && pd->nat_rule != NULL) + pf_packet_undo_nat(m0, pd, ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr), s, dir); + icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); - else + } else goto bad; } @@ -5761,7 +6212,11 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; - int off, dirndx, pqid = 0; + int off = 0, dirndx, pqid = 0; + int loopedfrom = 0; + u_int16_t divertcookie = 0; + u_int8_t divflags = 0; + struct ip_fw_args dnflow; M_ASSERTPKTHDR(m); @@ -5783,26 +6238,33 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) if (m->m_flags & M_SKIP_FIREWALL) return (PF_PASS); - pd.pf_mtag = pf_find_mtag(m); + pd.pf_mtag = pf_get_mtag(m); + if (pd.pf_mtag == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping packet due to failed memory allocation for tags\n")); + return PF_DROP; + } PF_RULES_RLOCK(); - if (ip_divert_ptr != NULL && + if ((ip_divert_ptr != NULL || ip_dn_io_ptr != NULL) && ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); - if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) { - if (pd.pf_mtag == NULL && - ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { - action = PF_DROP; - goto done; - } - pd.pf_mtag->flags |= PF_PACKET_LOOPED; - m_tag_delete(m, ipfwtag); + pd.pf_mtag->flags |= PF_PACKET_LOOPED; + if (rr->info & IPFW_IS_DUMMYNET) + loopedfrom = 1; + if (rr->info & IPFW_IS_DIVERT) { + divertcookie = rr->rulenum; + divflags = (u_int8_t)(divertcookie >> 8); + divertcookie &= ~PFSTATE_DIVERT_MASK; } if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { m->m_flags |= M_FASTFWD_OURS; pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; } + m_tag_delete(m, ipfwtag); } else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { /* We do IP header normalization and packet reassembly here */ action = PF_DROP; @@ -5845,6 +6307,10 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct tcphdr th; pd.hdr.tcp = &th; + dnflow.f_id._flags = th.th_flags; + dnflow.f_id.dst_port = ntohs(th.th_dport); + dnflow.f_id.src_port = ntohs(th.th_sport); + if (!pf_pull_hdr(m, off, &th, sizeof(th), &action, &reason, AF_INET)) { log = action != PF_PASS; @@ -5859,8 +6325,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -5874,6 +6338,9 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct udphdr uh; pd.hdr.udp = &uh; + dnflow.f_id.dst_port = ntohs(uh.uh_dport); + dnflow.f_id.src_port = ntohs(uh.uh_sport); + if (!pf_pull_hdr(m, off, &uh, sizeof(uh), &action, &reason, AF_INET)) { log = action != PF_PASS; @@ -5888,8 +6355,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -5911,8 +6376,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -5932,10 +6395,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) #endif default: - action = pf_test_state_other(&s, dir, kif, m, &pd); + action = pf_test_state_other(&s, dir, kif, m, off, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -5956,6 +6417,17 @@ done: ("pf: dropping packet with ip options\n")); } + if (s) { + PF_DIVERT_MAXPACKETS_REACHED(); + + if (divflags) { + s->divert_cookie = divertcookie; + s->local_flags |= divflags; + } else { + divertcookie = s->divert_cookie; + divflags = s->local_flags; + } + } if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); @@ -5963,23 +6435,160 @@ done: if (r->rtableid >= 0) M_SETFIB(m, r->rtableid); + if ((r->ieee8021q_pcp.setpcp & SETPCP_VALID) && + pf_ieee8021q_setpcp(m, r)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate 802.1q mtag\n")); + } + #ifdef ALTQ - if (action == PF_PASS && r->qid) { - if (pd.pf_mtag == NULL && - ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); + if (s && s->qid) { + pd.act.pqid = s->pqid; + pd.act.qid = s->qid; + } else if (r->qid) { + pd.act.pqid = r->pqid; + pd.act.qid = r->qid; + } + if (action == PF_PASS && pd.act.qid) { + if (s != NULL) + pd.pf_mtag->qid_hash = pf_state_hash(s); + if (pqid || (pd.tos & IPTOS_LOWDELAY)) + pd.pf_mtag->qid = pd.act.pqid; + else + pd.pf_mtag->qid = pd.act.qid; + /* Add hints for ecn. */ + pd.pf_mtag->hdr = h; + } +#endif /* ALTQ */ + + if (divflags & PFSTATE_DIVERT_TAG) + pd.pf_mtag->tag = divertcookie; + else if (divflags & PFSTATE_DIVERT_ALTQ) + pd.pf_mtag->qid = divertcookie; + else if (divflags & PFSTATE_DIVERT_ACTION) { + struct pf_rule *dlr; + action = PF_DROP; + if (s) + pf_unlink_state(s, PF_ENTER_LOCKED); + REASON_SET(&reason, PFRES_DIVERT); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: changing action to with overload from divert.\n")); + dlr = r; + PFLOG_PACKET(kif, m, AF_INET, dir, reason, dlr, a, + ruleset, &pd, (s == NULL)); + m_freem(*m0); + *m0 = NULL; + /* NOTE: Fake this to avoid divert giving errors to the application. */ + return (PF_PASS); + } + + if (divflags & PFSTATE_DIVERT_DNCOOKIE) { + pd.act.dnpipe = divertcookie; + pd.act.pdnpipe = divertcookie; + pd.act.flags |= PFRULE_DN_IS_PIPE; + } else if (s && (s->dnpipe || s->pdnpipe)) { + pd.act.dnpipe = s->dnpipe; + pd.act.pdnpipe = s->pdnpipe; + pd.act.flags = s->state_flags; + } else if (r->dnpipe || r->pdnpipe) { + pd.act.dnpipe = r->dnpipe; + pd.act.dnpipe = r->pdnpipe; + pd.act.flags = r->free_flags; + } + + if (pd.act.dnpipe && ip_dn_io_ptr != NULL && loopedfrom != 1) { + if (dir != r->direction && pd.act.pdnpipe) { + dnflow.rule.info = pd.act.pdnpipe; + } else if (dir == r->direction) { + dnflow.rule.info = pd.act.dnpipe; + } else + goto continueprocessing; + + if (pd.act.flags & PFRULE_DN_IS_PIPE) + dnflow.rule.info |= IPFW_IS_PIPE; + dnflow.f_id.addr_type = 4; /* IPv4 type */ + dnflow.f_id.proto = pd.proto; + if (dir == PF_OUT && s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action == PF_NAT) + dnflow.f_id.src_ip = + ntohl(s->key[(s->direction == PF_IN)]-> + addr[(s->direction == PF_OUT)].v4.s_addr); + else + dnflow.f_id.src_ip = ntohl(h->ip_src.s_addr); + dnflow.f_id.dst_ip = ntohl(h->ip_dst.s_addr); + dnflow.f_id.extra = dnflow.rule.info; + + if (m->m_flags & M_FASTFWD_OURS) { + pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT; + m->m_flags &= ~M_FASTFWD_OURS; + } + + if (s != NULL && s->nat_rule.ptr) + pf_packet_undo_nat(m, &pd, off, s, dir); + + ip_dn_io_ptr(m0, + (dir == PF_IN) ? DIR_IN : DIR_OUT, + &dnflow); + /* This is dummynet fast io processing */ + if (*m0 != NULL) { + m_tag_delete(*m0, m_tag_first(*m0)); + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; + if (s != NULL && s->nat_rule.ptr) + pf_packet_redo_nat(m, &pd, off, s, dir); } else { - if (pqid || (pd.tos & IPTOS_LOWDELAY)) - pd.pf_mtag->qid = r->pqid; - else - pd.pf_mtag->qid = r->qid; - /* Add hints for ecn. */ - pd.pf_mtag->hdr = h; + *m0 = NULL; + if (s) + PF_STATE_UNLOCK(s); + return (action); } + } +continueprocessing: + + if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL && + !PACKET_LOOPED(&pd)) { + if (!r->spare2 || + (s && s->packets[dir == PF_OUT] <= r->spare2)) { + ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0, + sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); + if (ipfwtag != NULL) { + ((struct ipfw_rule_ref *)(ipfwtag+1))->info = + ntohs(r->divert.port); + ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir; + + if (s) + PF_STATE_UNLOCK(s); + + m_tag_prepend(m, ipfwtag); + if (m->m_flags & M_FASTFWD_OURS) { + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate tag\n")); + } + pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT; + m->m_flags &= ~M_FASTFWD_OURS; + } + ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT); + *m0 = NULL; + return (action); + } else { + /* XXX: ipfw has the same behaviour! */ + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate divert tag\n")); + } + } } -#endif /* ALTQ */ /* * connections redirected to loopback should not match sockets @@ -5990,50 +6599,17 @@ done: pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) + (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { m->m_flags |= M_SKIP_FIREWALL; - if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL && - !PACKET_LOOPED(&pd)) { + if (PACKET_LOOPED(pd.pf_mtag) && !loopedfrom) + m->m_flags |= M_FASTFWD_OURS; + } - ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0, - sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); - if (ipfwtag != NULL) { - ((struct ipfw_rule_ref *)(ipfwtag+1))->info = - ntohs(r->divert.port); - ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir; + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; - if (s) - PF_STATE_UNLOCK(s); - - m_tag_prepend(m, ipfwtag); - if (m->m_flags & M_FASTFWD_OURS) { - if (pd.pf_mtag == NULL && - ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - log = 1; - DPFPRINTF(PF_DEBUG_MISC, - ("pf: failed to allocate tag\n")); - } else { - pd.pf_mtag->flags |= - PF_FASTFWD_OURS_PRESENT; - m->m_flags &= ~M_FASTFWD_OURS; - } - } - ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT); - *m0 = NULL; - - return (action); - } else { - /* XXX: ipfw has the same behaviour! */ - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - log = 1; - DPFPRINTF(PF_DEBUG_MISC, - ("pf: failed to allocate divert tag\n")); - } - } + if (action == PF_PASS && s != NULL && pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); if (log) { struct pf_rule *lr; @@ -6134,7 +6710,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; - int off, terminal = 0, dirndx, rh_cnt = 0; + int off = 0, terminal = 0, dirndx, rh_cnt = 0; + int loopedfrom = 0; + struct m_tag *dn_tag; + struct ip_fw_args dnflow; int fwdir = dir; M_ASSERTPKTHDR(m); @@ -6145,18 +6724,27 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) * We do need to be careful about bridges. If the * net.link.bridge.pfil_bridge sysctl is set we can be filtering on a * bridge, so if the input interface is a bridge member and the output - * interface is its bridge we're not actually forwarding but bridging. + * interface is its bridge or a member of the same bridge we're not + * actually forwarding but bridging. */ - if (dir == PF_OUT && m->m_pkthdr.rcvif && ifp != m->m_pkthdr.rcvif - && (m->m_pkthdr.rcvif->if_bridge == NULL - || m->m_pkthdr.rcvif->if_bridge != ifp->if_softc)) + if (dir == PF_OUT && m->m_pkthdr.rcvif && ifp != m->m_pkthdr.rcvif && + (m->m_pkthdr.rcvif->if_bridge == NULL || + (m->m_pkthdr.rcvif->if_bridge != ifp->if_softc && + m->m_pkthdr.rcvif->if_bridge != ifp->if_bridge))) fwdir = PF_FWD; if (!V_pf_status.running) return (PF_PASS); memset(&pd, 0, sizeof(pd)); - pd.pf_mtag = pf_find_mtag(m); + pd.pf_mtag = pf_get_mtag(m); + if (pd.pf_mtag == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping packet due to failed memory allocation for tags\n")); + return PF_DROP; + } if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED) return (PF_PASS); @@ -6175,8 +6763,20 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) PF_RULES_RLOCK(); + if (((ip_dn_io_ptr != NULL) || (ip_divert_ptr != NULL)) && + ((dn_tag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { + struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(dn_tag+1); + pd.pf_mtag->flags |= PF_PACKET_LOOPED; + if (rr->info & IPFW_IS_DUMMYNET) + loopedfrom = 1; + if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { + m->m_flags |= M_FASTFWD_OURS; + pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; + } + m_tag_delete(m, dn_tag); + } /* We do IP header normalization and packet reassembly here */ - if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { + else if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } @@ -6285,6 +6885,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct tcphdr th; pd.hdr.tcp = &th; + dnflow.f_id._flags = th.th_flags; + dnflow.f_id.dst_port = th.th_dport; + dnflow.f_id.src_port = th.th_sport; + if (!pf_pull_hdr(m, off, &th, sizeof(th), &action, &reason, AF_INET6)) { log = action != PF_PASS; @@ -6297,8 +6901,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6312,6 +6914,9 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct udphdr uh; pd.hdr.udp = &uh; + dnflow.f_id.dst_port = uh.uh_dport; + dnflow.f_id.src_port = uh.uh_sport; + if (!pf_pull_hdr(m, off, &uh, sizeof(uh), &action, &reason, AF_INET6)) { log = action != PF_PASS; @@ -6326,8 +6931,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6356,8 +6959,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6368,10 +6969,8 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) } default: - action = pf_test_state_other(&s, dir, kif, m, &pd); + action = pf_test_state_other(&s, dir, kif, m, off, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6405,23 +7004,90 @@ done: if (r->rtableid >= 0) M_SETFIB(m, r->rtableid); + if ((r->ieee8021q_pcp.setpcp & SETPCP_VALID) && + pf_ieee8021q_setpcp(m, r)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate 802.1q mtag\n")); + } + #ifdef ALTQ - if (action == PF_PASS && r->qid) { - if (pd.pf_mtag == NULL && - ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - } else { - if (pd.tos & IPTOS_LOWDELAY) - pd.pf_mtag->qid = r->pqid; - else - pd.pf_mtag->qid = r->qid; - /* Add hints for ecn. */ - pd.pf_mtag->hdr = h; - } + if (s && s->qid) { + pd.act.pqid = s->pqid; + pd.act.qid = s->qid; + } else if (r->qid) { + pd.act.pqid = r->pqid; + pd.act.qid = r->qid; + } + if (action == PF_PASS && pd.act.qid) { + if (s != NULL) + pd.pf_mtag->qid_hash = pf_state_hash(s); + if (pd.tos & IPTOS_LOWDELAY) + pd.pf_mtag->qid = pd.act.pqid; + else + pd.pf_mtag->qid = pd.act.qid; + /* Add hints for ecn. */ + pd.pf_mtag->hdr = h; } #endif /* ALTQ */ + if (s && (s->dnpipe || s->pdnpipe)) { + pd.act.dnpipe = s->dnpipe; + pd.act.pdnpipe = s->pdnpipe; + pd.act.flags = s->state_flags; + } else if (r->dnpipe || r->pdnpipe) { + pd.act.dnpipe = r->dnpipe; + pd.act.dnpipe = r->pdnpipe; + pd.act.flags = r->free_flags; + } + if ((pd.act.dnpipe || pd.act.pdnpipe) && ip_dn_io_ptr != NULL && loopedfrom != 1) { + if (dir != r->direction && pd.act.pdnpipe) { + dnflow.rule.info = pd.act.pdnpipe; + } else if (dir == r->direction && pd.act.dnpipe) { + dnflow.rule.info = pd.act.dnpipe; + } else + goto continueprocessing6; + + if (pd.act.flags & PFRULE_DN_IS_PIPE) + dnflow.rule.info |= IPFW_IS_PIPE; + dnflow.f_id.addr_type = 6; /* IPv4 type */ + dnflow.f_id.proto = pd.proto; + dnflow.f_id.src_ip = 0; + dnflow.f_id.dst_ip = 0; + if (dir == PF_OUT && s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action == PF_NAT) + dnflow.f_id.src_ip6 = s->key[(s->direction == PF_IN)]->addr[0].v6; + else + dnflow.f_id.src_ip6 = h->ip6_src; + dnflow.f_id.dst_ip6 = h->ip6_dst; + + if (s != NULL && s->nat_rule.ptr) + pf_packet_undo_nat(m, &pd, off, s, dir); + + ip_dn_io_ptr(m0, + ((dir == PF_IN) ? DIR_IN : DIR_OUT) | PROTO_IPV6, + &dnflow); + /* This is dummynet fast io processing */ + if (*m0 != NULL) { + m_tag_delete(*m0, m_tag_first(*m0)); + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; + if (s != NULL && s->nat_rule.ptr) + pf_packet_redo_nat(m, &pd, off, s, dir); + } else { + *m0 = NULL; + if (s) + PF_STATE_UNLOCK(s); + return (action); + } + } else + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; +continueprocessing6: + + if (action == PF_PASS && s != NULL && pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); + if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h index 3a5d2aa..5351786 100644 --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -45,7 +45,8 @@ enum { PF_INOUT, PF_IN, PF_OUT, PF_FWD }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, - PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER }; + PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER, + PF_MATCH }; enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT, @@ -125,7 +126,8 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define PFRES_MAXSTATES 12 /* State limit */ #define PFRES_SRCLIMIT 13 /* Source node/conn limit */ #define PFRES_SYNPROXY 14 /* SYN proxy */ -#define PFRES_MAX 15 /* total+1 */ +#define PFRES_DIVERT 15 /* Divert override */ +#define PFRES_MAX 16 /* total+1 */ #define PFRES_NAMES { \ "match", \ @@ -143,6 +145,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, "state-limit", \ "src-limit", \ "synproxy", \ + "divert", \ NULL \ } diff --git a/sys/netpfil/pf/pf_altq.h b/sys/netpfil/pf/pf_altq.h index eda0965..3efd4ff 100644 --- a/sys/netpfil/pf/pf_altq.h +++ b/sys/netpfil/pf/pf_altq.h @@ -45,6 +45,12 @@ struct cbq_opts { int flags; }; +struct codel_opts { + u_int target; + u_int interval; + int ecn; +}; + struct priq_opts { int flags; }; @@ -65,6 +71,20 @@ struct hfsc_opts { int flags; }; +/* + * XXX this needs some work + */ +struct fairq_opts { + u_int nbuckets; + u_int hogs_m1; + int flags; + + /* link sharing service curve */ + u_int lssc_m1; + u_int lssc_d; + u_int lssc_m2; +}; + struct pf_altq { char ifname[IFNAMSIZ]; @@ -89,8 +109,10 @@ struct pf_altq { uint16_t flags; /* misc flags */ union { struct cbq_opts cbq_opts; + struct codel_opts codel_opts; struct priq_opts priq_opts; struct hfsc_opts hfsc_opts; + struct fairq_opts fairq_opts; } pq_u; uint32_t qid; /* return value */ diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index e90a8fa..b00952c 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -1004,6 +1004,8 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td case DIOCCLRRULECTRS: case DIOCGETLIMIT: case DIOCGETALTQS: + case DIOCGETNAMEDALTQ: + case DIOCGETNAMEDTAG: case DIOCGETALTQ: case DIOCGETQSTATS: case DIOCGETRULESETS: @@ -1050,6 +1052,8 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td case DIOCGETTIMEOUT: case DIOCGETLIMIT: case DIOCGETALTQS: + case DIOCGETNAMEDALTQ: + case DIOCGETNAMEDTAG: case DIOCGETALTQ: case DIOCGETQSTATS: case DIOCGETRULESETS: @@ -1164,7 +1168,9 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td rule->states_cur = counter_u64_alloc(M_WAITOK); rule->states_tot = counter_u64_alloc(M_WAITOK); rule->src_nodes = counter_u64_alloc(M_WAITOK); +#ifdef PF_USER_INFO rule->cuid = td->td_ucred->cr_ruid; +#endif rule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&rule->rpool.list); @@ -1190,7 +1196,6 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td V_ticket_pabuf)); ERROUT(EBUSY); } - tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_rulequeue); if (tail) @@ -1269,8 +1274,29 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td } rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); +#ifndef PF_USER_INFO + if (rule->cuid) { + tail = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + while ((tail != NULL) && (tail->cuid != rule->cuid)) + tail = TAILQ_NEXT(tail, entries); + if (tail != NULL) { + rule->evaluations = tail->evaluations; + rule->packets[0] = tail->packets[0]; + rule->packets[1] = tail->packets[1]; + rule->bytes[0] = tail->bytes[0]; + rule->bytes[1] = tail->bytes[1]; + } else { + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; + } + } else { + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; + } +#else rule->evaluations = rule->packets[0] = rule->packets[1] = rule->bytes[0] = rule->bytes[1] = 0; +#endif TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; @@ -1420,7 +1446,9 @@ DIOCADDRULE_error: newrule->states_cur = counter_u64_alloc(M_WAITOK); newrule->states_tot = counter_u64_alloc(M_WAITOK); newrule->src_nodes = counter_u64_alloc(M_WAITOK); +#ifdef PF_USER_INFO newrule->cuid = td->td_ucred->cr_ruid; +#endif newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&newrule->rpool.list); } @@ -1707,6 +1735,30 @@ relock_DIOCKILLSTATES: break; } + case DIOCKILLSCHEDULE: { + struct pf_state *state; + struct pfioc_schedule_kill *psk = (struct pfioc_schedule_kill *)addr; + int killed = 0; + u_int i; + + for (i = 0; i <= pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + +relock_DIOCKILLSCHEDULE: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(state, &ih->states, entry) { + if (!strcmp(psk->schedule, state->rule.ptr->schedule)) { + pf_unlink_state(state, PF_ENTER_LOCKED); + killed++; + goto relock_DIOCKILLSCHEDULE; + } + } + PF_HASHROW_UNLOCK(ih); + } + psk->numberkilled = killed; + break; + } + case DIOCADDSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pfsync_state *sp = &ps->state; @@ -2097,6 +2149,16 @@ DIOCGETSTATES_full: break; } + case DIOCGETNAMEDALTQ: { + struct pfioc_ruleset *pa = (struct pfioc_ruleset *)addr; + + if (pa->name[0]) { + pa->nr = pf_qname2qid(pa->name); + pf_qid_unref(pa->nr); + } + break; + } + case DIOCGETALTQS: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq; @@ -2182,6 +2244,16 @@ DIOCGETSTATES_full: } #endif /* ALTQ */ + case DIOCGETNAMEDTAG: { + /* Little abuse. */ + struct pfioc_ruleset *pa = (struct pfioc_ruleset *)addr; + + if (pa->name[0]) + pa->nr = pf_tagname2tag(pa->name); + + break; + } + case DIOCBEGINADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; @@ -3629,8 +3701,8 @@ hook_pf(void) pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return (ESRCH); /* XXX */ - pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); - pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); + pfil_add_named_hook(pf_check_in, NULL, "pf", PFIL_IN | PFIL_WAITOK, pfh_inet); + pfil_add_named_hook(pf_check_out, NULL, "pf", PFIL_OUT | PFIL_WAITOK, pfh_inet); #endif #ifdef INET6 pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); @@ -3643,8 +3715,10 @@ hook_pf(void) #endif return (ESRCH); /* XXX */ } - pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); - pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); + pfil_add_named_hook(pf_check6_in, NULL, "pf", PFIL_IN | PFIL_WAITOK, + pfh_inet6); + pfil_add_named_hook(pf_check6_out, NULL, "pf", PFIL_OUT | PFIL_WAITOK, + pfh_inet6); #endif V_pf_pfil_hooked = 1; diff --git a/sys/netpfil/pf/pf_mtag.h b/sys/netpfil/pf/pf_mtag.h index 3aacb2e..fd8554a 100644 --- a/sys/netpfil/pf/pf_mtag.h +++ b/sys/netpfil/pf/pf_mtag.h @@ -44,6 +44,7 @@ struct pf_mtag { void *hdr; /* saved hdr pos in mbuf, for ECN */ u_int32_t qid; /* queue id */ + u_int32_t qid_hash; /* queue hashid used by WFQ like algos */ u_int16_t tag; /* tag id */ u_int8_t flags; u_int8_t routed; diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c index 4f9a499..b925960 100644 --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -1151,6 +1151,7 @@ pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag) for (t = m; m; m = t) { t = m->m_nextpkt; m->m_nextpkt = NULL; + m->m_pkthdr.rcvif = ifp; m->m_flags |= M_SKIP_FIREWALL; memset(&pd, 0, sizeof(pd)); pd.pf_mtag = pf_find_mtag(m); diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c index 384e42b..2274359 100644 --- a/sys/netpfil/pf/pf_ruleset.c +++ b/sys/netpfil/pf/pf_ruleset.c @@ -120,6 +120,7 @@ pf_get_ruleset_number(u_int8_t action) return (PF_RULESET_SCRUB); break; case PF_PASS: + case PF_MATCH: case PF_DROP: return (PF_RULESET_FILTER); break; |